From 51d59a64eed6c2cd2d2f991f44ffbe21eb33c733 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Thu, 14 Mar 2024 13:27:34 -0700 Subject: [PATCH 01/85] vhost: dirty log should be per backend type There could be a mix of both vhost-user and vhost-kernel clients in the same QEMU process, where separate vhost loggers for the specific vhost type have to be used. Make the vhost logger per backend type, and have them properly reference counted. Suggested-by: Michael S. Tsirkin Signed-off-by: Si-Wei Liu Message-Id: <1710448055-11709-1-git-send-email-si-wei.liu@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 4acd77e890..a1e8b79e1a 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -43,8 +43,8 @@ do { } while (0) #endif -static struct vhost_log *vhost_log; -static struct vhost_log *vhost_log_shm; +static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX]; +static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX]; /* Memslots used by backends that support private memslots (without an fd). */ static unsigned int used_memslots; @@ -287,6 +287,10 @@ static int vhost_set_backend_type(struct vhost_dev *dev, r = -1; } + if (r == 0) { + assert(dev->vhost_ops->backend_type == backend_type); + } + return r; } @@ -319,16 +323,22 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) return log; } -static struct vhost_log *vhost_log_get(uint64_t size, bool share) +static struct vhost_log *vhost_log_get(VhostBackendType backend_type, + uint64_t size, bool share) { - struct vhost_log *log = share ? vhost_log_shm : vhost_log; + struct vhost_log *log; + + assert(backend_type > VHOST_BACKEND_TYPE_NONE); + assert(backend_type < VHOST_BACKEND_TYPE_MAX); + + log = share ? vhost_log_shm[backend_type] : vhost_log[backend_type]; if (!log || log->size != size) { log = vhost_log_alloc(size, share); if (share) { - vhost_log_shm = log; + vhost_log_shm[backend_type] = log; } else { - vhost_log = log; + vhost_log[backend_type] = log; } } else { ++log->refcnt; @@ -340,11 +350,20 @@ static struct vhost_log *vhost_log_get(uint64_t size, bool share) static void vhost_log_put(struct vhost_dev *dev, bool sync) { struct vhost_log *log = dev->log; + VhostBackendType backend_type; if (!log) { return; } + assert(dev->vhost_ops); + backend_type = dev->vhost_ops->backend_type; + + if (backend_type == VHOST_BACKEND_TYPE_NONE || + backend_type >= VHOST_BACKEND_TYPE_MAX) { + return; + } + --log->refcnt; if (log->refcnt == 0) { /* Sync only the range covered by the old log */ @@ -352,13 +371,13 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); } - if (vhost_log == log) { + if (vhost_log[backend_type] == log) { g_free(log->log); - vhost_log = NULL; - } else if (vhost_log_shm == log) { + vhost_log[backend_type] = NULL; + } else if (vhost_log_shm[backend_type] == log) { qemu_memfd_free(log->log, log->size * sizeof(*(log->log)), log->fd); - vhost_log_shm = NULL; + vhost_log_shm[backend_type] = NULL; } g_free(log); @@ -376,7 +395,8 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev) static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) { - struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); + struct vhost_log *log = vhost_log_get(dev->vhost_ops->backend_type, + size, vhost_dev_log_is_shared(dev)); uint64_t log_base = (uintptr_t)log->log; int r; @@ -2044,7 +2064,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = vhost_log_get(hdev->log_size, + hdev->log = vhost_log_get(hdev->vhost_ops->backend_type, + hdev->log_size, vhost_dev_log_is_shared(hdev)); log_base = (uintptr_t)hdev->log->log; r = hdev->vhost_ops->vhost_set_log_base(hdev, From c5cd7e5f230afb56891e3826fbb60f9e2b6c086a Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Thu, 14 Mar 2024 13:27:35 -0700 Subject: [PATCH 02/85] vhost: Perform memory section dirty scans once per iteration On setups with one or more virtio-net devices with vhost on, dirty tracking iteration increases cost the bigger the number amount of queues are set up e.g. on idle guests migration the following is observed with virtio-net with vhost=on: 48 queues -> 78.11% [.] vhost_dev_sync_region.isra.13 8 queues -> 40.50% [.] vhost_dev_sync_region.isra.13 1 queue -> 6.89% [.] vhost_dev_sync_region.isra.13 2 devices, 1 queue -> 18.60% [.] vhost_dev_sync_region.isra.14 With high memory rates the symptom is lack of convergence as soon as it has a vhost device with a sufficiently high number of queues, the sufficient number of vhost devices. On every migration iteration (every 100msecs) it will redundantly query the *shared log* the number of queues configured with vhost that exist in the guest. For the virtqueue data, this is necessary, but not for the memory sections which are the same. So essentially we end up scanning the dirty log too often. To fix that, select a vhost device responsible for scanning the log with regards to memory sections dirty tracking. It is selected when we enable the logger (during migration) and cleared when we disable the logger. If the vhost logger device goes away for some reason, the logger will be re-selected from the rest of vhost devices. After making mem-section logger a singleton instance, constant cost of 7%-9% (like the 1 queue report) will be seen, no matter how many queues or how many vhost devices are configured: 48 queues -> 8.71% [.] vhost_dev_sync_region.isra.13 2 devices, 8 queues -> 7.97% [.] vhost_dev_sync_region.isra.14 Co-developed-by: Joao Martins Signed-off-by: Joao Martins Signed-off-by: Si-Wei Liu Message-Id: <1710448055-11709-2-git-send-email-si-wei.liu@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- hw/virtio/vhost.c | 67 +++++++++++++++++++++++++++++++++++---- include/hw/virtio/vhost.h | 1 + 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index a1e8b79e1a..06fc71746e 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -45,6 +45,7 @@ static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX]; static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX]; +static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX]; /* Memslots used by backends that support private memslots (without an fd). */ static unsigned int used_memslots; @@ -149,6 +150,47 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev) } } +static inline bool vhost_dev_should_log(struct vhost_dev *dev) +{ + assert(dev->vhost_ops); + assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE); + assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX); + + return dev == QLIST_FIRST(&vhost_log_devs[dev->vhost_ops->backend_type]); +} + +static inline void vhost_dev_elect_mem_logger(struct vhost_dev *hdev, bool add) +{ + VhostBackendType backend_type; + + assert(hdev->vhost_ops); + + backend_type = hdev->vhost_ops->backend_type; + assert(backend_type > VHOST_BACKEND_TYPE_NONE); + assert(backend_type < VHOST_BACKEND_TYPE_MAX); + + if (add && !QLIST_IS_INSERTED(hdev, logdev_entry)) { + if (QLIST_EMPTY(&vhost_log_devs[backend_type])) { + QLIST_INSERT_HEAD(&vhost_log_devs[backend_type], + hdev, logdev_entry); + } else { + /* + * The first vhost_device in the list is selected as the shared + * logger to scan memory sections. Put new entry next to the head + * to avoid inadvertent change to the underlying logger device. + * This is done in order to get better cache locality and to avoid + * performance churn on the hot path for log scanning. Even when + * new devices come and go quickly, it wouldn't end up changing + * the active leading logger device at all. + */ + QLIST_INSERT_AFTER(QLIST_FIRST(&vhost_log_devs[backend_type]), + hdev, logdev_entry); + } + } else if (!add && QLIST_IS_INSERTED(hdev, logdev_entry)) { + QLIST_REMOVE(hdev, logdev_entry); + } +} + static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, MemoryRegionSection *section, hwaddr first, @@ -166,12 +208,14 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, start_addr = MAX(first, start_addr); end_addr = MIN(last, end_addr); - for (i = 0; i < dev->mem->nregions; ++i) { - struct vhost_memory_region *reg = dev->mem->regions + i; - vhost_dev_sync_region(dev, section, start_addr, end_addr, - reg->guest_phys_addr, - range_get_last(reg->guest_phys_addr, - reg->memory_size)); + if (vhost_dev_should_log(dev)) { + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + vhost_dev_sync_region(dev, section, start_addr, end_addr, + reg->guest_phys_addr, + range_get_last(reg->guest_phys_addr, + reg->memory_size)); + } } for (i = 0; i < dev->nvqs; ++i) { struct vhost_virtqueue *vq = dev->vqs + i; @@ -383,6 +427,7 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) g_free(log); } + vhost_dev_elect_mem_logger(dev, false); dev->log = NULL; dev->log_size = 0; } @@ -998,6 +1043,15 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) goto err_vq; } } + + /* + * At log start we select our vhost_device logger that will scan the + * memory sections and skip for the others. This is possible because + * the log is shared amongst all vhost devices for a given type of + * backend. + */ + vhost_dev_elect_mem_logger(dev, enable_log); + return 0; err_vq: for (; i >= 0; --i) { @@ -2075,6 +2129,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); goto fail_log; } + vhost_dev_elect_mem_logger(hdev, true); } if (vrings) { r = vhost_dev_set_vring_enable(hdev, true); diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 02477788df..d75faf46e9 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -129,6 +129,7 @@ struct vhost_dev { void *opaque; struct vhost_log *log; QLIST_ENTRY(vhost_dev) entry; + QLIST_ENTRY(vhost_dev) logdev_entry; QLIST_HEAD(, vhost_iommu) iommu_list; IOMMUNotifier n; const VhostDevConfigOps *config_ops; From 9d5a807c4cb56837f11be9a9250f854fab951290 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 22 Mar 2024 10:23:15 +0100 Subject: [PATCH 03/85] vhost-vdpa: check vhost_vdpa_set_vring_ready() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost_vdpa_set_vring_ready() could already fail, but if Linux's patch [1] will be merged, it may fail with more chance if userspace does not activate virtqueues before DRIVER_OK when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not negotiated. So better check its return value anyway. [1] https://lore.kernel.org/virtualization/20240206145154.118044-1-sgarzare@redhat.com/T/#u Acked-by: Eugenio Pérez Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Message-Id: <20240322092315.31885-1-sgarzare@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 85e73dd6a7..eda714d1a4 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -399,7 +399,10 @@ static int vhost_vdpa_net_data_load(NetClientState *nc) } for (int i = 0; i < v->dev->nvqs; ++i) { - vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); + int ret = vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); + if (ret < 0) { + return ret; + } } return 0; } @@ -1238,7 +1241,10 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - vhost_vdpa_set_vring_ready(v, v->dev->vq_index); + r = vhost_vdpa_set_vring_ready(v, v->dev->vq_index); + if (unlikely(r < 0)) { + return r; + } if (v->shadow_vqs_enabled) { n = VIRTIO_NET(v->dev->vdev); @@ -1277,7 +1283,10 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) } for (int i = 0; i < v->dev->vq_index; ++i) { - vhost_vdpa_set_vring_ready(v, i); + r = vhost_vdpa_set_vring_ready(v, i); + if (unlikely(r < 0)) { + return r; + } } return 0; From cf39b82860b63589460d8797dd70ae3c1647ccca Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Fri, 15 Mar 2024 12:55:52 -0400 Subject: [PATCH 04/85] virtio/virtio-pci: Handle extra notification data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support to virtio-pci devices for handling the extra data sent from the driver to the device when the VIRTIO_F_NOTIFICATION_DATA transport feature has been negotiated. The extra data that's passed to the virtio-pci device when this feature is enabled varies depending on the device's virtqueue layout. In a split virtqueue layout, this data includes: - upper 16 bits: shadow_avail_idx - lower 16 bits: virtqueue index In a packed virtqueue layout, this data includes: - upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx - lower 16 bits: virtqueue index Signed-off-by: Jonah Palmer Message-Id: <20240315165557.26942-2-jonah.palmer@oracle.com> Reviewed-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 12 +++++++++--- hw/virtio/virtio.c | 18 ++++++++++++++++++ include/hw/virtio/virtio.h | 2 ++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index b1d02f4b3d..cffc7efcae 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -384,7 +384,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) { VirtIOPCIProxy *proxy = opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - uint16_t vector; + uint16_t vector, vq_idx; hwaddr pa; switch (addr) { @@ -408,8 +408,14 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) vdev->queue_sel = val; break; case VIRTIO_PCI_QUEUE_NOTIFY: - if (val < VIRTIO_QUEUE_MAX) { - virtio_queue_notify(vdev, val); + vq_idx = val; + if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) { + if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) { + VirtQueue *vq = virtio_get_queue(vdev, vq_idx); + + virtio_queue_set_shadow_avail_idx(vq, val >> 16); + } + virtio_queue_notify(vdev, vq_idx); } break; case VIRTIO_PCI_STATUS: diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 893a072c9d..f7c99e3a96 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2264,6 +2264,24 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) } } +void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t shadow_avail_idx) +{ + if (!vq->vring.desc) { + return; + } + + /* + * 16-bit data for packed VQs include 1-bit wrap counter and + * 15-bit shadow_avail_idx. + */ + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + vq->shadow_avail_wrap_counter = (shadow_avail_idx >> 15) & 0x1; + vq->shadow_avail_idx = shadow_avail_idx & 0x7FFF; + } else { + vq->shadow_avail_idx = shadow_avail_idx; + } +} + static void virtio_queue_notify_vq(VirtQueue *vq) { if (vq->vring.desc && vq->handle_output) { diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7d5ffdc145..1451926a13 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -307,6 +307,8 @@ int virtio_queue_ready(VirtQueue *vq); int virtio_queue_empty(VirtQueue *vq); +void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t idx); + /* Host binding interface. */ uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr); From 78378f450a723eed34156259ca2861a0c5ca77cf Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Fri, 15 Mar 2024 12:55:53 -0400 Subject: [PATCH 05/85] virtio: Prevent creation of device using notification-data with ioeventfd Prevent the realization of a virtio device that attempts to use the VIRTIO_F_NOTIFICATION_DATA transport feature without disabling ioeventfd. Due to ioeventfd not being able to carry the extra data associated with this feature, having both enabled is a functional mismatch and therefore Qemu should not continue the device's realization process. Although the device does not yet know if the feature will be successfully negotiated, many devices using this feature wont actually work without this extra data and would fail FEATURES_OK anyway. If ioeventfd is able to work with the extra notification data in the future, this compatibility check can be removed. Signed-off-by: Jonah Palmer Message-Id: <20240315165557.26942-3-jonah.palmer@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f7c99e3a96..28cd406e16 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2980,6 +2980,20 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } +static void virtio_device_check_notification_compatibility(VirtIODevice *vdev, + Error **errp) +{ + VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); + DeviceState *proxy = DEVICE(BUS(bus)->parent); + + if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) && + k->ioeventfd_enabled(proxy)) { + error_setg(errp, + "notification_data=on without ioeventfd=off is not supported"); + } +} + size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, uint64_t host_features) { @@ -3740,6 +3754,14 @@ static void virtio_device_realize(DeviceState *dev, Error **errp) } } + /* Devices should not use both ioeventfd and notification data feature */ + virtio_device_check_notification_compatibility(vdev, &err); + if (err != NULL) { + error_propagate(errp, err); + vdc->unrealize(dev); + return; + } + virtio_bus_device_plugged(vdev, &err); if (err != NULL) { error_propagate(errp, err); From 54869366be60af2eb52cffaedad73ba1f4247e15 Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Fri, 15 Mar 2024 12:55:54 -0400 Subject: [PATCH 06/85] virtio-mmio: Handle extra notification data Add support to virtio-mmio devices for handling the extra data sent from the driver to the device when the VIRTIO_F_NOTIFICATION_DATA transport feature has been negotiated. The extra data that's passed to the virtio-mmio device when this feature is enabled varies depending on the device's virtqueue layout. The data passed to the virtio-mmio device is in the same format as the data passed to virtio-pci devices. Signed-off-by: Jonah Palmer Message-Id: <20240315165557.26942-4-jonah.palmer@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-mmio.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 22f9fbcf5a..320428ac0d 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -248,6 +248,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, { VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + uint16_t vq_idx; trace_virtio_mmio_write_offset(offset, value); @@ -407,8 +408,14 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, } break; case VIRTIO_MMIO_QUEUE_NOTIFY: - if (value < VIRTIO_QUEUE_MAX) { - virtio_queue_notify(vdev, value); + vq_idx = value; + if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) { + if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) { + VirtQueue *vq = virtio_get_queue(vdev, vq_idx); + + virtio_queue_set_shadow_avail_idx(vq, (value >> 16) & 0xFFFF); + } + virtio_queue_notify(vdev, vq_idx); } break; case VIRTIO_MMIO_INTERRUPT_ACK: From 594b543a4a75d08f47e5ea92c96a89502a3eab72 Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Fri, 15 Mar 2024 12:55:55 -0400 Subject: [PATCH 07/85] virtio-ccw: Handle extra notification data Add support to virtio-ccw devices for handling the extra data sent from the driver to the device when the VIRTIO_F_NOTIFICATION_DATA transport feature has been negotiated. The extra data that's passed to the virtio-ccw device when this feature is enabled varies depending on the device's virtqueue layout. That data passed to the virtio-ccw device is in the same format as the data passed to virtio-pci devices. Signed-off-by: Jonah Palmer Message-Id: <20240315165557.26942-5-jonah.palmer@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/s390x/s390-virtio-ccw.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index cd063f8b64..8cd912f20e 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -126,9 +126,11 @@ static void subsystem_reset(void) static int virtio_ccw_hcall_notify(const uint64_t *args) { uint64_t subch_id = args[0]; - uint64_t queue = args[1]; + uint64_t data = args[1]; SubchDev *sch; + VirtIODevice *vdev; int cssid, ssid, schid, m; + uint16_t vq_idx = data; if (ioinst_disassemble_sch_ident(subch_id, &m, &cssid, &ssid, &schid)) { return -EINVAL; @@ -137,12 +139,19 @@ static int virtio_ccw_hcall_notify(const uint64_t *args) if (!sch || !css_subch_visible(sch)) { return -EINVAL; } - if (queue >= VIRTIO_QUEUE_MAX) { + + vdev = virtio_ccw_get_vdev(sch); + if (vq_idx >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, vq_idx)) { return -EINVAL; } - virtio_queue_notify(virtio_ccw_get_vdev(sch), queue); - return 0; + if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) { + virtio_queue_set_shadow_avail_idx(virtio_get_queue(vdev, vq_idx), + (data >> 16) & 0xFFFF); + } + + virtio_queue_notify(vdev, vq_idx); + return 0; } static int virtio_ccw_hcall_early_printk(const uint64_t *args) From b937fa896321fb7b6d7f2205edb5490e0e5d6c69 Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Fri, 15 Mar 2024 12:55:56 -0400 Subject: [PATCH 08/85] vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the VIRTIO_F_NOTIFICATION_DATA feature across a variety of vhost devices. The inclusion of VIRTIO_F_NOTIFICATION_DATA in the feature bits arrays for these devices ensures that the backend is capable of offering and providing support for this feature, and that it can be disabled if the backend does not support it. Tested-by: Lei Yang Reviewed-by: Eugenio Pérez Signed-off-by: Jonah Palmer Message-Id: <20240315165557.26942-6-jonah.palmer@oracle.com> Acked-by: Srujana Challa Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 1 + hw/net/vhost_net.c | 2 ++ hw/scsi/vhost-scsi.c | 1 + hw/scsi/vhost-user-scsi.c | 1 + hw/virtio/vhost-user-fs.c | 2 +- hw/virtio/vhost-user-vsock.c | 1 + net/vhost-vdpa.c | 1 + 7 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 9e6bbc6950..bc2677dbef 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -51,6 +51,7 @@ static const int user_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_RESET, + VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index fd1a93701a..18898afe81 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -48,6 +48,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, VIRTIO_F_RING_RESET, + VIRTIO_F_NOTIFICATION_DATA, VIRTIO_NET_F_HASH_REPORT, VHOST_INVALID_FEATURE_BIT }; @@ -55,6 +56,7 @@ static const int kernel_feature_bits[] = { /* Features supported by others. */ static const int user_feature_bits[] = { VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_NOTIFICATION_DATA, VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX, diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index ae26bc19a4..3d5fe0994d 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -38,6 +38,7 @@ static const int kernel_feature_bits[] = { VIRTIO_RING_F_EVENT_IDX, VIRTIO_SCSI_F_HOTPLUG, VIRTIO_F_RING_RESET, + VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index a63b1f4948..0b050805a8 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -36,6 +36,7 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_EVENT_IDX, VIRTIO_SCSI_F_HOTPLUG, VIRTIO_F_RING_RESET, + VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index cca2cd41be..ae48cc1c96 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -33,7 +33,7 @@ static const int user_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_RESET, - + VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 9431b9792c..802b44a07d 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -21,6 +21,7 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index eda714d1a4..daa38428c5 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -62,6 +62,7 @@ const int vdpa_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_RING_RESET, VIRTIO_F_VERSION_1, + VIRTIO_F_NOTIFICATION_DATA, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, VIRTIO_NET_F_CTRL_MAC_ADDR, From 5093bee0fa8a6c9712c96653da3a79bc37a4e45d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20P=C3=B6tzsch?= Date: Fri, 26 Apr 2024 10:33:13 +0200 Subject: [PATCH 09/85] Fix vhost user assertion when sending more than one fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the client sends more than one region this assert triggers. The reason is that two fd's are 8 bytes and VHOST_MEMORY_BASELINE_NREGIONS is exactly 8. The assert is wrong because it should not test for the size of the fd array, but for the numbers of regions. Signed-off-by: Christian Pötzsch Message-Id: <20240426083313.3081272-1-christian.poetzsch@kernkonzept.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- subprojects/libvhost-user/libvhost-user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index a879149fef..8adb277d54 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -568,7 +568,7 @@ vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { fd_size = cmsg->cmsg_len - CMSG_LEN(0); vmsg->fd_num = fd_size / sizeof(int); - assert(fd_size < VHOST_MEMORY_BASELINE_NREGIONS); + assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS); memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); break; } From a0eebd790ca4f90fc1e3662cb38542ccc21963bf Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 29 Apr 2024 13:33:34 +0200 Subject: [PATCH 10/85] vhost-vsock: add VIRTIO_F_RING_PACKED to feature_bits Not having VIRTIO_F_RING_PACKED in feature_bits[] is a problem when the vhost-vsock device does not offer the feature bit VIRTIO_F_RING_PACKED but the in QEMU device is configured to try to use the packed layout (the virtio property "packed" is on). As of today, the Linux kernel vhost-vsock device does not support the packed queue layout (as vhost does not support packed), and does not offer VIRTIO_F_RING_PACKED. Thus when for example a vhost-vsock-ccw is used with packed=on, VIRTIO_F_RING_PACKED ends up being negotiated, despite the fact that the device does not actually support it, and one gets to keep the pieces. Fixes: 74b3e46630 ("virtio: add property to enable packed virtqueue") Reported-by: Marc Hartmayer Signed-off-by: Halil Pasic Message-Id: <20240429113334.2454197-1-pasic@linux.ibm.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vsock-common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index 12ea87d7a7..fd88df2560 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -22,6 +22,7 @@ const int feature_bits[] = { VIRTIO_VSOCK_F_SEQPACKET, VIRTIO_F_RING_RESET, + VIRTIO_F_RING_PACKED, VHOST_INVALID_FEATURE_BIT }; From 33abfea239592a706e98269b01c0096249612ea4 Mon Sep 17 00:00:00 2001 From: Wafer Date: Fri, 10 May 2024 15:27:53 +0800 Subject: [PATCH 11/85] hw/virtio: Fix obtain the buffer id from the last descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The virtio-1.3 specification writes: 2.8.6 Next Flag: Descriptor Chaining Buffer ID is included in the last descriptor in the list. If the feature (_F_INDIRECT_DESC) has been negotiated, install only one descriptor in the virtqueue. Therefor the buffer id should be obtained from the first descriptor. In descriptor chaining scenarios, the buffer id should be obtained from the last descriptor. Fixes: 86044b24e8 ("virtio: basic packed virtqueue support") Signed-off-by: Wafer Reviewed-by: Jason Wang Reviewed-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20240510072753.26158-2-wafer@jaguarmicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 28cd406e16..3678ec2f88 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1745,6 +1745,11 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) &indirect_desc_cache); } while (rc == VIRTQUEUE_READ_DESC_MORE); + if (desc_cache != &indirect_desc_cache) { + /* Buffer ID is included in the last descriptor in the list. */ + id = desc.id; + } + /* Now copy what we have collected and mapped */ elem = virtqueue_alloc_element(sz, out_num, in_num); for (i = 0; i < out_num; i++) { From 84b58169e40f5c7428db6f0b229e01213068aa21 Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Wed, 15 May 2024 15:35:25 +0800 Subject: [PATCH 12/85] virtio-pci: only reset pm state during resetting Fix bug imported by 27ce0f3afc9dd ("fix Power Management Control Register for PCI Express virtio devices" After this change, observe that QEMU may erroneously clear the power status of the device, or may erroneously clear non writable registers, such as NO_SOFT_RESET, etc. Only state of PM_CTRL is writable. Only when flag VIRTIO_PCI_FLAG_INIT_PM is set, need to reset state. Fixes: 27ce0f3afc9dd ("fix Power Management Control Register for PCI Express virtio devices" Signed-off-by: Jiqian Chen Message-Id: <20240515073526.17297-2-Jiqian.Chen@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index cffc7efcae..7d62e92365 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2306,10 +2306,16 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) virtio_pci_reset(qdev); if (pci_is_express(dev)) { + VirtIOPCIProxy *proxy = VIRTIO_PCI(dev); + pcie_cap_deverr_reset(dev); pcie_cap_lnkctl_reset(dev); - pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0); + if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) { + pci_word_test_and_clear_mask( + dev->config + dev->exp.pm_cap + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK); + } } } From 80c8a26de5f1b7d67d4594957c0d82a0c47626be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 15 May 2024 14:52:37 +0400 Subject: [PATCH 13/85] vhost-user-gpu: fix import of DMABUF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using vhost-user-gpu with GL, qemu -display gtk doesn't show output and prints: qemu: eglCreateImageKHR failed Since commit 9ac06df8b ("virtio-gpu-udmabuf: correct naming of QemuDmaBuf size properties"), egl_dmabuf_import_texture() uses backing_{width,height} for the texture dimension. Fixes: 9ac06df8b ("virtio-gpu-udmabuf: correct naming of QemuDmaBuf size properties") Signed-off-by: Marc-André Lureau Message-Id: <20240515105237.1074116-1-marcandre.lureau@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/vhost-user-gpu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index e4b398d26c..63c64ddde6 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -281,8 +281,9 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg) modifier = m2->modifier; } - dmabuf = qemu_dmabuf_new(m->fd_width, m->fd_height, - m->fd_stride, 0, 0, 0, 0, + dmabuf = qemu_dmabuf_new(m->width, m->height, + m->fd_stride, 0, 0, + m->fd_width, m->fd_height, m->fd_drm_fourcc, modifier, fd, false, m->fd_flags & VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP); From 9569fe0aacbe6c7752935c9ede427ca1e8aafe51 Mon Sep 17 00:00:00 2001 From: Li Feng Date: Thu, 16 May 2024 10:57:45 +0800 Subject: [PATCH 14/85] Revert "vhost-user: fix lost reconnect" This reverts commit f02a4b8e6431598612466f76aac64ab492849abf. Since the current patch cannot completely fix the lost reconnect problem, there is a scenario that is not considered: - When the virtio-blk driver is removed from the guest os, s->connected has no chance to be set to false, resulting in subsequent reconnection not being executed. The next patch will completely fix this issue with a better approach. Signed-off-by: Li Feng Message-Id: <20240516025753.130171-2-fengli@smartx.com> Reviewed-by: Raphael Norwitz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 2 +- hw/scsi/vhost-user-scsi.c | 3 +-- hw/virtio/vhost-user-base.c | 2 +- hw/virtio/vhost-user.c | 10 ++-------- include/hw/virtio/vhost-user.h | 3 +-- 5 files changed, 6 insertions(+), 14 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index bc2677dbef..15cc24d017 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -385,7 +385,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) case CHR_EVENT_CLOSED: /* defer close until later to avoid circular close */ vhost_user_async_close(dev, &s->chardev, &s->dev, - vhost_user_blk_disconnect, vhost_user_blk_event); + vhost_user_blk_disconnect); break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 0b050805a8..421cd654f8 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -215,8 +215,7 @@ static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event) case CHR_EVENT_CLOSED: /* defer close until later to avoid circular close */ vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev, - vhost_user_scsi_disconnect, - vhost_user_scsi_event); + vhost_user_scsi_disconnect); break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index a83167191e..4b54255682 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -254,7 +254,7 @@ static void vub_event(void *opaque, QEMUChrEvent event) case CHR_EVENT_CLOSED: /* defer close until later to avoid circular close */ vhost_user_async_close(dev, &vub->chardev, &vub->vhost_dev, - vub_disconnect, vub_event); + vub_disconnect); break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index cdf9af4a4b..c929097e87 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2776,7 +2776,6 @@ typedef struct { DeviceState *dev; CharBackend *cd; struct vhost_dev *vhost; - IOEventHandler *event_cb; } VhostAsyncCallback; static void vhost_user_async_close_bh(void *opaque) @@ -2791,10 +2790,7 @@ static void vhost_user_async_close_bh(void *opaque) */ if (vhost->vdev) { data->cb(data->dev); - } else if (data->event_cb) { - qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb, - NULL, data->dev, NULL, true); - } + } g_free(data); } @@ -2806,8 +2802,7 @@ static void vhost_user_async_close_bh(void *opaque) */ void vhost_user_async_close(DeviceState *d, CharBackend *chardev, struct vhost_dev *vhost, - vu_async_close_fn cb, - IOEventHandler *event_cb) + vu_async_close_fn cb) { if (!runstate_check(RUN_STATE_SHUTDOWN)) { /* @@ -2823,7 +2818,6 @@ void vhost_user_async_close(DeviceState *d, data->dev = d; data->cd = chardev; data->vhost = vhost; - data->event_cb = event_cb; /* Disable any further notifications on the chardev */ qemu_chr_fe_set_handlers(chardev, diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index d7c09ffd34..324cd8663a 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -108,7 +108,6 @@ typedef void (*vu_async_close_fn)(DeviceState *cb); void vhost_user_async_close(DeviceState *d, CharBackend *chardev, struct vhost_dev *vhost, - vu_async_close_fn cb, - IOEventHandler *event_cb); + vu_async_close_fn cb); #endif From 6eaf0e612b415877d1c411b95bed2ecb53b546bb Mon Sep 17 00:00:00 2001 From: Li Feng Date: Thu, 16 May 2024 10:57:46 +0800 Subject: [PATCH 15/85] vhost-user: fix lost reconnect again When the vhost-user is reconnecting to the backend, and if the vhost-user fails at the get_features in vhost_dev_init(), then the reconnect will fail and it will not be retriggered forever. The reason is: When the vhost-user fail at get_features, the vhost_dev_cleanup will be called immediately. vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'. The reconnect path is: vhost_user_blk_event vhost_user_async_close(.. vhost_user_blk_disconnect ..) qemu_chr_fe_set_handlers <----- clear the notifier callback schedule vhost_user_async_close_bh The vhost->vdev is null, so the vhost_user_blk_disconnect will not be called, then the event fd callback will not be reinstalled. We need to ensure that even if vhost_dev_init initialization fails, the event handler still needs to be reinstalled when s->connected is false. All vhost-user devices have this issue, including vhost-user-blk/scsi. Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling") Signed-off-by: Li Feng Message-Id: <20240516025753.130171-3-fengli@smartx.com> Reviewed-by: Raphael Norwitz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 3 ++- hw/scsi/vhost-user-scsi.c | 3 ++- hw/virtio/vhost-user-base.c | 3 ++- hw/virtio/vhost-user.c | 10 +--------- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 15cc24d017..fdbc30b9ce 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -354,7 +354,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev) VHostUserBlk *s = VHOST_USER_BLK(vdev); if (!s->connected) { - return; + goto done; } s->connected = false; @@ -362,6 +362,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev) vhost_dev_cleanup(&s->dev); +done: /* Re-instate the event handler for new connections */ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, NULL, dev, NULL, true); diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 421cd654f8..cc91ade525 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -182,7 +182,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev) VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); if (!s->connected) { - return; + goto done; } s->connected = false; @@ -190,6 +190,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev) vhost_dev_cleanup(&vsc->dev); +done: /* Re-instate the event handler for new connections */ qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, vhost_user_scsi_event, NULL, dev, NULL, true); diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index 4b54255682..11e72b1e3b 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -225,13 +225,14 @@ static void vub_disconnect(DeviceState *dev) VHostUserBase *vub = VHOST_USER_BASE(vdev); if (!vub->connected) { - return; + goto done; } vub->connected = false; vub_stop(vdev); vhost_dev_cleanup(&vub->vhost_dev); +done: /* Re-instate the event handler for new connections */ qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index c929097e87..c407ea8939 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2781,16 +2781,8 @@ typedef struct { static void vhost_user_async_close_bh(void *opaque) { VhostAsyncCallback *data = opaque; - struct vhost_dev *vhost = data->vhost; - /* - * If the vhost_dev has been cleared in the meantime there is - * nothing left to do as some other path has completed the - * cleanup. - */ - if (vhost->vdev) { - data->cb(data->dev); - } + data->cb(data->dev); g_free(data); } From 05b70ceba033759d44c6d3d9b24118cd9fc9d616 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Thu, 23 May 2024 10:44:41 -0700 Subject: [PATCH 16/85] hw/cxl/mailbox: change CCI cmd set structure to be a member, not a reference This allows devices to have fully customized CCIs, along with complex devices where wrapper devices can override or add additional CCI commands without having to replicate full command structures or pollute a base device with every command that might ever be used. Signed-off-by: Gregory Price Signed-off-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-2-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 19 +++++++++++++++---- include/hw/cxl/cxl_device.h | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index e5eb97cb91..2c9f50f0f9 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1447,10 +1447,21 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max) bg_timercb, cci); } +static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[256]) +{ + for (int set = 0; set < 256; set++) { + for (int cmd = 0; cmd < 256; cmd++) { + if (cxl_cmds[set][cmd].handler) { + cci->cxl_cmd_set[set][cmd] = cxl_cmds[set][cmd]; + } + } + } +} + void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf, DeviceState *d, size_t payload_max) { - cci->cxl_cmd_set = cxl_cmd_set_sw; + cxl_copy_cci_commands(cci, cxl_cmd_set_sw); cci->d = d; cci->intf = intf; cxl_init_cci(cci, payload_max); @@ -1458,7 +1469,7 @@ void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf, void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max) { - cci->cxl_cmd_set = cxl_cmd_set; + cxl_copy_cci_commands(cci, cxl_cmd_set); cci->d = d; /* No separation for PCI MB as protocol handled in PCI device */ @@ -1476,7 +1487,7 @@ static const struct cxl_cmd cxl_cmd_set_t3_ld[256][256] = { void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d, DeviceState *intf, size_t payload_max) { - cci->cxl_cmd_set = cxl_cmd_set_t3_ld; + cxl_copy_cci_commands(cci, cxl_cmd_set_t3_ld); cci->d = d; cci->intf = intf; cxl_init_cci(cci, payload_max); @@ -1496,7 +1507,7 @@ void cxl_initialize_t3_fm_owned_ld_mctpcci(CXLCCI *cci, DeviceState *d, DeviceState *intf, size_t payload_max) { - cci->cxl_cmd_set = cxl_cmd_set_t3_fm_owned_ld_mctp; + cxl_copy_cci_commands(cci, cxl_cmd_set_t3_fm_owned_ld_mctp); cci->d = d; cci->intf = intf; cxl_init_cci(cci, payload_max); diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 279b276bda..ccc4611875 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -164,7 +164,7 @@ typedef struct CXLEventLog { } CXLEventLog; typedef struct CXLCCI { - const struct cxl_cmd (*cxl_cmd_set)[256]; + struct cxl_cmd cxl_cmd_set[256][256]; struct cel_log { uint16_t opcode; uint16_t effect; From 67adb7979b6c1151a906d99f19fccf1e789316b1 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Thu, 23 May 2024 10:44:42 -0700 Subject: [PATCH 17/85] hw/cxl/mailbox: interface to add CCI commands to an existing CCI This enables wrapper devices to customize the base device's CCI (for example, with custom commands outside the specification) without the need to change the base device. The also enabled the base device to dispatch those commands without requiring additional driver support. Heavily edited by Jonathan Cameron to increase code reuse Signed-off-by: Gregory Price Signed-off-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-3-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 19 +++++++++++++++++-- include/hw/cxl/cxl_device.h | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 2c9f50f0f9..2a64c58e2f 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1424,9 +1424,9 @@ static void bg_timercb(void *opaque) } } -void cxl_init_cci(CXLCCI *cci, size_t payload_max) +static void cxl_rebuild_cel(CXLCCI *cci) { - cci->payload_max = payload_max; + cci->cel_size = 0; /* Reset for a fresh build */ for (int set = 0; set < 256; set++) { for (int cmd = 0; cmd < 256; cmd++) { if (cci->cxl_cmd_set[set][cmd].handler) { @@ -1440,6 +1440,13 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max) } } } +} + +void cxl_init_cci(CXLCCI *cci, size_t payload_max) +{ + cci->payload_max = payload_max; + cxl_rebuild_cel(cci); + cci->bg.complete_pct = 0; cci->bg.starttime = 0; cci->bg.runtime = 0; @@ -1458,6 +1465,14 @@ static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[ } } +void cxl_add_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmd_set)[256], + size_t payload_max) +{ + cci->payload_max = MAX(payload_max, cci->payload_max); + cxl_copy_cci_commands(cci, cxl_cmd_set); + cxl_rebuild_cel(cci); +} + void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf, DeviceState *d, size_t payload_max) { diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index ccc4611875..a5f8e25020 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -301,6 +301,8 @@ void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max); void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf, DeviceState *d, size_t payload_max); void cxl_init_cci(CXLCCI *cci, size_t payload_max); +void cxl_add_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmd_set)[256], + size_t payload_max); int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, size_t len_in, uint8_t *pl_in, size_t *len_out, uint8_t *pl_out, From 7a21e5dedbbcec11ebab7a53186085f09a53f9e7 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:43 -0700 Subject: [PATCH 18/85] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command Based on CXL spec r3.1 Table 8-127 (Identify Memory Device Output Payload), dynamic capacity event log size should be part of output of the Identify command. Add dc_event_log_size to the output payload for the host to get the info. Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-4-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 2a64c58e2f..626acc1d0d 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -21,6 +21,7 @@ #include "sysemu/hostmem.h" #define CXL_CAPACITY_MULTIPLIER (256 * MiB) +#define CXL_DC_EVENT_LOG_SIZE 8 /* * How to add a new command, example. The command set FOO, with cmd BAR. @@ -780,8 +781,9 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, uint16_t inject_poison_limit; uint8_t poison_caps; uint8_t qos_telemetry_caps; + uint16_t dc_event_log_size; } QEMU_PACKED *id; - QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43); + QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45); CXLType3Dev *ct3d = CXL_TYPE3(cci->d); CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate; @@ -807,6 +809,7 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, st24_le_p(id->poison_list_max_mer, 256); /* No limit - so limited by main poison record limit */ stw_le_p(&id->inject_poison_limit, 0); + stw_le_p(&id->dc_event_log_size, CXL_DC_EVENT_LOG_SIZE); *len_out = sizeof(*id); return CXL_MBOX_SUCCESS; From 0f0f140b100392fd938eb6933752155ea68b26a8 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:44 -0700 Subject: [PATCH 19/85] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support Per cxl spec r3.1, add dynamic capacity (DC) region representative based on Table 8-165 and extend the cxl type3 device definition to include DC region information. Also, based on info in 8.2.9.9.9.1, add 'Get Dynamic Capacity Configuration' mailbox support. Note: we store region decode length as byte-wise length on the device, which should be divided by 256 * MiB before being returned to the host for "Get Dynamic Capacity Configuration" mailbox command per specification. Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-5-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 96 +++++++++++++++++++++++++++++++++++++ include/hw/cxl/cxl_device.h | 16 +++++++ 2 files changed, 112 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 626acc1d0d..bede28e3c8 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -22,6 +22,8 @@ #define CXL_CAPACITY_MULTIPLIER (256 * MiB) #define CXL_DC_EVENT_LOG_SIZE 8 +#define CXL_NUM_EXTENTS_SUPPORTED 512 +#define CXL_NUM_TAGS_SUPPORTED 0 /* * How to add a new command, example. The command set FOO, with cmd BAR. @@ -80,6 +82,8 @@ enum { #define GET_POISON_LIST 0x0 #define INJECT_POISON 0x1 #define CLEAR_POISON 0x2 + DCD_CONFIG = 0x48, + #define GET_DC_CONFIG 0x0 PHYSICAL_SWITCH = 0x51, #define IDENTIFY_SWITCH_DEVICE 0x0 #define GET_PHYSICAL_PORT_STATE 0x1 @@ -1238,6 +1242,88 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* + * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration + * (Opcode: 4800h) + */ +static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + struct { + uint8_t region_cnt; + uint8_t start_rid; + } QEMU_PACKED *in = (void *)payload_in; + struct { + uint8_t num_regions; + uint8_t regions_returned; + uint8_t rsvd1[6]; + struct { + uint64_t base; + uint64_t decode_len; + uint64_t region_len; + uint64_t block_size; + uint32_t dsmadhandle; + uint8_t flags; + uint8_t rsvd2[3]; + } QEMU_PACKED records[]; + } QEMU_PACKED *out = (void *)payload_out; + struct { + uint32_t num_extents_supported; + uint32_t num_extents_available; + uint32_t num_tags_supported; + uint32_t num_tags_available; + } QEMU_PACKED *extra_out; + uint16_t record_count; + uint16_t i; + uint16_t out_pl_len; + uint8_t start_rid; + + start_rid = in->start_rid; + if (start_rid >= ct3d->dc.num_regions) { + return CXL_MBOX_INVALID_INPUT; + } + + record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt); + + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + extra_out = (void *)(payload_out + out_pl_len); + out_pl_len += sizeof(*extra_out); + assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); + + out->num_regions = ct3d->dc.num_regions; + out->regions_returned = record_count; + for (i = 0; i < record_count; i++) { + stq_le_p(&out->records[i].base, + ct3d->dc.regions[start_rid + i].base); + stq_le_p(&out->records[i].decode_len, + ct3d->dc.regions[start_rid + i].decode_len / + CXL_CAPACITY_MULTIPLIER); + stq_le_p(&out->records[i].region_len, + ct3d->dc.regions[start_rid + i].len); + stq_le_p(&out->records[i].block_size, + ct3d->dc.regions[start_rid + i].block_size); + stl_le_p(&out->records[i].dsmadhandle, + ct3d->dc.regions[start_rid + i].dsmadhandle); + out->records[i].flags = ct3d->dc.regions[start_rid + i].flags; + } + /* + * TODO: Assign values once extents and tags are introduced + * to use. + */ + stl_le_p(&extra_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED); + stl_le_p(&extra_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED); + stl_le_p(&extra_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED); + stl_le_p(&extra_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED); + + *len_out = out_pl_len; + return CXL_MBOX_SUCCESS; +} + #define IMMEDIATE_CONFIG_CHANGE (1 << 1) #define IMMEDIATE_DATA_CHANGE (1 << 2) #define IMMEDIATE_POLICY_CHANGE (1 << 3) @@ -1282,6 +1368,11 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { cmd_media_clear_poison, 72, 0 }, }; +static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = { + [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG", + cmd_dcd_get_dyn_cap_config, 2, 0 }, +}; + static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { [INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 0 }, [INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS", @@ -1487,7 +1578,12 @@ void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf, void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max) { + CXLType3Dev *ct3d = CXL_TYPE3(d); + cxl_copy_cci_commands(cci, cxl_cmd_set); + if (ct3d->dc.num_regions) { + cxl_copy_cci_commands(cci, cxl_cmd_set_dcd); + } cci->d = d; /* No separation for PCI MB as protocol handled in PCI device */ diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index a5f8e25020..e839370266 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -422,6 +422,17 @@ typedef struct CXLPoison { typedef QLIST_HEAD(, CXLPoison) CXLPoisonList; #define CXL_POISON_LIST_LIMIT 256 +#define DCD_MAX_NUM_REGION 8 + +typedef struct CXLDCRegion { + uint64_t base; /* aligned to 256*MiB */ + uint64_t decode_len; /* aligned to 256*MiB */ + uint64_t len; + uint64_t block_size; + uint32_t dsmadhandle; + uint8_t flags; +} CXLDCRegion; + struct CXLType3Dev { /* Private */ PCIDevice parent_obj; @@ -454,6 +465,11 @@ struct CXLType3Dev { unsigned int poison_list_cnt; bool poison_list_overflowed; uint64_t poison_list_overflow_ts; + + struct dynamic_capacity { + uint8_t num_regions; /* 0-8 regions */ + CXLDCRegion regions[DCD_MAX_NUM_REGION]; + } dc; }; #define TYPE_CXL_TYPE3 "cxl-type3" From 25851080772387ae33d6ee94250b3a31bf719e5c Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:45 -0700 Subject: [PATCH 20/85] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices Rename mem_size as static_mem_size for type3 memdev to cover static RAM and pmem capacity, preparing for the introduction of dynamic capacity to support dynamic capacity devices. Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-6-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 4 ++-- hw/mem/cxl_type3.c | 8 ++++---- include/hw/cxl/cxl_device.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index bede28e3c8..b592473587 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -803,7 +803,7 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); stq_le_p(&id->total_capacity, - cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER); + cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER); stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER); stq_le_p(&id->volatile_capacity, @@ -1179,7 +1179,7 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, struct clear_poison_pl *in = (void *)payload_in; dpa = ldq_le_p(&in->dpa); - if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) { + if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) { return CXL_MBOX_INVALID_PA; } diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 3e42490b6c..7194c8f902 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -608,7 +608,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) } address_space_init(&ct3d->hostvmem_as, vmr, v_name); ct3d->cxl_dstate.vmem_size = memory_region_size(vmr); - ct3d->cxl_dstate.mem_size += memory_region_size(vmr); + ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr); g_free(v_name); } @@ -631,7 +631,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) } address_space_init(&ct3d->hostpmem_as, pmr, p_name); ct3d->cxl_dstate.pmem_size = memory_region_size(pmr); - ct3d->cxl_dstate.mem_size += memory_region_size(pmr); + ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr); g_free(p_name); } @@ -837,7 +837,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d, return -EINVAL; } - if (*dpa_offset > ct3d->cxl_dstate.mem_size) { + if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) { return -EINVAL; } @@ -1010,7 +1010,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data) return false; } - if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) { + if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) { return false; } diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index e839370266..f7f56b44e3 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -234,7 +234,7 @@ typedef struct cxl_device_state { } timestamp; /* memory region size, HDM */ - uint64_t mem_size; + uint64_t static_mem_size; uint64_t pmem_size; uint64_t vmem_size; From f4fd91af3ae3b80c795ff5f3e0c1001b14ceb761 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:46 -0700 Subject: [PATCH 21/85] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices With the change, when setting up memory for type3 memory device, we can create DC regions. A property 'num-dc-regions' is added to ct3_props to allow users to pass the number of DC regions to create. To make it easier, other region parameters like region base, length, and block size are hard coded. If needed, these parameters can be added easily. With the change, we can create DC regions with proper kernel side support like below: region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region) echo $region > /sys/bus/cxl/devices/decoder0.0/create_dc_region echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity echo 1 > /sys/bus/cxl/devices/$region/interleave_ways echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode echo 0x40000000 >/sys/bus/cxl/devices/decoder2.0/dpa_size echo 0x40000000 > /sys/bus/cxl/devices/$region/size echo "decoder2.0" > /sys/bus/cxl/devices/$region/target0 echo 1 > /sys/bus/cxl/devices/$region/commit echo $region > /sys/bus/cxl/drivers/cxl_region/bind Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-7-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Li Zhijian --- hw/mem/cxl_type3.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 7194c8f902..06c6f9bb78 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -30,6 +30,7 @@ #include "hw/pci/msix.h" #define DWORD_BYTE 4 +#define CXL_CAPACITY_MULTIPLIER (256 * MiB) /* Default CDAT entries for a memory region */ enum { @@ -567,6 +568,50 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, } } +/* + * TODO: dc region configuration will be updated once host backend and address + * space support is added for DCD. + */ +static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) +{ + int i; + uint64_t region_base = 0; + uint64_t region_len = 2 * GiB; + uint64_t decode_len = 2 * GiB; + uint64_t blk_size = 2 * MiB; + CXLDCRegion *region; + MemoryRegion *mr; + + if (ct3d->hostvmem) { + mr = host_memory_backend_get_memory(ct3d->hostvmem); + region_base += memory_region_size(mr); + } + if (ct3d->hostpmem) { + mr = host_memory_backend_get_memory(ct3d->hostpmem); + region_base += memory_region_size(mr); + } + if (region_base % CXL_CAPACITY_MULTIPLIER != 0) { + error_setg(errp, "DC region base not aligned to 0x%lx", + CXL_CAPACITY_MULTIPLIER); + return false; + } + + for (i = 0, region = &ct3d->dc.regions[0]; + i < ct3d->dc.num_regions; + i++, region++, region_base += region_len) { + *region = (CXLDCRegion) { + .base = region_base, + .decode_len = decode_len, + .len = region_len, + .block_size = blk_size, + /* dsmad_handle set when creating CDAT table entries */ + .flags = 0, + }; + } + + return true; +} + static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) { DeviceState *ds = DEVICE(ct3d); @@ -635,6 +680,13 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) g_free(p_name); } + if (ct3d->dc.num_regions > 0) { + if (!cxl_create_dc_regions(ct3d, errp)) { + error_append_hint(errp, "setup DC regions failed"); + return false; + } + } + return true; } @@ -930,6 +982,7 @@ static Property ct3_props[] = { HostMemoryBackend *), DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL), DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename), + DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0), DEFINE_PROP_END_OF_LIST(), }; From 69e4fb569dc1602bfeef5b8c58de5f40cd5d756e Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:47 -0700 Subject: [PATCH 22/85] hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr size instead of mr as argument The function ct3_build_cdat_entries_for_mr only uses size of the passed memory region argument, refactor the function definition to make the passed arguments more specific. Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-8-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 06c6f9bb78..51be50ce87 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -44,7 +44,7 @@ enum { }; static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, - int dsmad_handle, MemoryRegion *mr, + int dsmad_handle, uint64_t size, bool is_pmem, uint64_t dpa_base) { CDATDsmas *dsmas; @@ -63,7 +63,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, .DSMADhandle = dsmad_handle, .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0, .DPA_base = dpa_base, - .DPA_length = memory_region_size(mr), + .DPA_length = size, }; /* For now, no memory side cache, plausiblish numbers */ @@ -132,7 +132,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, */ .EFI_memory_type_attr = is_pmem ? 2 : 1, .DPA_offset = 0, - .DPA_length = memory_region_size(mr), + .DPA_length = size, }; /* Header always at start of structure */ @@ -149,6 +149,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) g_autofree CDATSubHeader **table = NULL; CXLType3Dev *ct3d = priv; MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL; + uint64_t vmr_size = 0, pmr_size = 0; int dsmad_handle = 0; int cur_ent = 0; int len = 0; @@ -163,6 +164,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) return -EINVAL; } len += CT3_CDAT_NUM_ENTRIES; + vmr_size = memory_region_size(volatile_mr); } if (ct3d->hostpmem) { @@ -171,21 +173,22 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) return -EINVAL; } len += CT3_CDAT_NUM_ENTRIES; + pmr_size = memory_region_size(nonvolatile_mr); } table = g_malloc0(len * sizeof(*table)); /* Now fill them in */ if (volatile_mr) { - ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr, + ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size, false, 0); cur_ent = CT3_CDAT_NUM_ENTRIES; } if (nonvolatile_mr) { - uint64_t base = volatile_mr ? memory_region_size(volatile_mr) : 0; + uint64_t base = vmr_size; ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++, - nonvolatile_mr, true, base); + pmr_size, true, base); cur_ent += CT3_CDAT_NUM_ENTRIES; } assert(len == cur_ent); From 90de94612bb568117e038c6ce9edd35d17d239f9 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:48 -0700 Subject: [PATCH 23/85] hw/mem/cxl_type3: Add host backend and address space handling for DC regions Add (file/memory backed) host backend for DCD. All the dynamic capacity regions will share a single, large enough host backend. Set up address space for DC regions to support read/write operations to dynamic capacity for DCD. With the change, the following support is added: 1. Add a new property to type3 device "volatile-dc-memdev" to point to host memory backend for dynamic capacity. Currently, all DC regions share one host backend; 2. Add namespace for dynamic capacity for read/write support; 3. Create cdat entries for each dynamic capacity region. Reviewed-by: Gregory Price Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-9-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 16 +++- hw/mem/cxl_type3.c | 177 +++++++++++++++++++++++++++++------- include/hw/cxl/cxl_device.h | 8 ++ 3 files changed, 164 insertions(+), 37 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index b592473587..6ad227f112 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -622,7 +622,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, size_t *len_out, CXLCCI *cci) { - CXLDeviceState *cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate; struct { uint8_t slots_supported; uint8_t slot_info; @@ -636,7 +637,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50); if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) || - (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) { + (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) || + (ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) { return CXL_MBOX_INTERNAL_ERROR; } @@ -793,7 +795,8 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate; if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) || - (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) { + (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) || + (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) { return CXL_MBOX_INTERNAL_ERROR; } @@ -835,9 +838,11 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct cxl_cmd *cmd, uint64_t next_pmem; } QEMU_PACKED *part_info = (void *)payload_out; QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20); + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) || - (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) { + (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) || + (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) { return CXL_MBOX_INTERNAL_ERROR; } @@ -1179,7 +1184,8 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, struct clear_poison_pl *in = (void *)payload_in; dpa = ldq_le_p(&in->dpa); - if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) { + if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size + + ct3d->dc.total_capacity) { return CXL_MBOX_INVALID_PA; } diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 51be50ce87..658570aa1a 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -45,7 +45,8 @@ enum { static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, int dsmad_handle, uint64_t size, - bool is_pmem, uint64_t dpa_base) + bool is_pmem, bool is_dynamic, + uint64_t dpa_base) { CDATDsmas *dsmas; CDATDslbis *dslbis0; @@ -61,7 +62,8 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, .length = sizeof(*dsmas), }, .DSMADhandle = dsmad_handle, - .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0, + .flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) | + (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0), .DPA_base = dpa_base, .DPA_length = size, }; @@ -149,12 +151,13 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) g_autofree CDATSubHeader **table = NULL; CXLType3Dev *ct3d = priv; MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL; + MemoryRegion *dc_mr = NULL; uint64_t vmr_size = 0, pmr_size = 0; int dsmad_handle = 0; int cur_ent = 0; int len = 0; - if (!ct3d->hostpmem && !ct3d->hostvmem) { + if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) { return 0; } @@ -176,21 +179,54 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) pmr_size = memory_region_size(nonvolatile_mr); } + if (ct3d->dc.num_regions) { + if (!ct3d->dc.host_dc) { + return -EINVAL; + } + dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc); + if (!dc_mr) { + return -EINVAL; + } + len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions; + } + table = g_malloc0(len * sizeof(*table)); /* Now fill them in */ if (volatile_mr) { ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size, - false, 0); + false, false, 0); cur_ent = CT3_CDAT_NUM_ENTRIES; } if (nonvolatile_mr) { uint64_t base = vmr_size; ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++, - pmr_size, true, base); + pmr_size, true, false, base); cur_ent += CT3_CDAT_NUM_ENTRIES; } + + if (dc_mr) { + int i; + uint64_t region_base = vmr_size + pmr_size; + + /* + * We assume the dynamic capacity to be volatile for now. + * Non-volatile dynamic capacity will be added if needed in the + * future. + */ + for (i = 0; i < ct3d->dc.num_regions; i++) { + ct3_build_cdat_entries_for_mr(&(table[cur_ent]), + dsmad_handle++, + ct3d->dc.regions[i].len, + false, true, region_base); + ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1; + + cur_ent += CT3_CDAT_NUM_ENTRIES; + region_base += ct3d->dc.regions[i].len; + } + } + assert(len == cur_ent); *cdat_table = g_steal_pointer(&table); @@ -301,10 +337,17 @@ static void build_dvsecs(CXLType3Dev *ct3d) range2_size_lo = (2 << 5) | (2 << 2) | 0x3 | (ct3d->hostpmem->size & 0xF0000000); } - } else { + } else if (ct3d->hostpmem) { range1_size_hi = ct3d->hostpmem->size >> 32; range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | (ct3d->hostpmem->size & 0xF0000000); + } else { + /* + * For DCD with no static memory, set memory active, memory class bits. + * No range is set. + */ + range1_size_hi = 0; + range1_size_lo = (2 << 5) | (2 << 2) | 0x3; } dvsec = (uint8_t *)&(CXLDVSECDevice){ @@ -579,11 +622,29 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) { int i; uint64_t region_base = 0; - uint64_t region_len = 2 * GiB; - uint64_t decode_len = 2 * GiB; + uint64_t region_len; + uint64_t decode_len; uint64_t blk_size = 2 * MiB; CXLDCRegion *region; MemoryRegion *mr; + uint64_t dc_size; + + mr = host_memory_backend_get_memory(ct3d->dc.host_dc); + dc_size = memory_region_size(mr); + region_len = DIV_ROUND_UP(dc_size, ct3d->dc.num_regions); + + if (dc_size % (ct3d->dc.num_regions * CXL_CAPACITY_MULTIPLIER) != 0) { + error_setg(errp, + "backend size is not multiple of region len: 0x%" PRIx64, + region_len); + return false; + } + if (region_len % CXL_CAPACITY_MULTIPLIER != 0) { + error_setg(errp, "DC region size is unaligned to 0x%" PRIx64, + CXL_CAPACITY_MULTIPLIER); + return false; + } + decode_len = region_len; if (ct3d->hostvmem) { mr = host_memory_backend_get_memory(ct3d->hostvmem); @@ -594,7 +655,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) region_base += memory_region_size(mr); } if (region_base % CXL_CAPACITY_MULTIPLIER != 0) { - error_setg(errp, "DC region base not aligned to 0x%lx", + error_setg(errp, "DC region base not aligned to 0x%" PRIx64, CXL_CAPACITY_MULTIPLIER); return false; } @@ -610,6 +671,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) /* dsmad_handle set when creating CDAT table entries */ .flags = 0, }; + ct3d->dc.total_capacity += region->len; } return true; @@ -619,7 +681,8 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) { DeviceState *ds = DEVICE(ct3d); - if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) { + if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem + && !ct3d->dc.num_regions) { error_setg(errp, "at least one memdev property must be set"); return false; } else if (ct3d->hostmem && ct3d->hostpmem) { @@ -683,7 +746,37 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) g_free(p_name); } + ct3d->dc.total_capacity = 0; if (ct3d->dc.num_regions > 0) { + MemoryRegion *dc_mr; + char *dc_name; + + if (!ct3d->dc.host_dc) { + error_setg(errp, "dynamic capacity must have a backing device"); + return false; + } + + dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc); + if (!dc_mr) { + error_setg(errp, "dynamic capacity must have a backing device"); + return false; + } + + /* + * Set DC regions as volatile for now, non-volatile support can + * be added in the future if needed. + */ + memory_region_set_nonvolatile(dc_mr, false); + memory_region_set_enabled(dc_mr, true); + host_memory_backend_set_mapped(ct3d->dc.host_dc, true); + if (ds->id) { + dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id); + } else { + dc_name = g_strdup("cxl-dcd-dpa-dc-space"); + } + address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name); + g_free(dc_name); + if (!cxl_create_dc_regions(ct3d, errp)) { error_append_hint(errp, "setup DC regions failed"); return false; @@ -779,6 +872,9 @@ err_release_cdat: err_free_special_ops: g_free(regs->special_ops); err_address_space_free: + if (ct3d->dc.host_dc) { + address_space_destroy(&ct3d->dc.host_dc_as); + } if (ct3d->hostpmem) { address_space_destroy(&ct3d->hostpmem_as); } @@ -797,6 +893,9 @@ static void ct3_exit(PCIDevice *pci_dev) pcie_aer_exit(pci_dev); cxl_doe_cdat_release(cxl_cstate); g_free(regs->special_ops); + if (ct3d->dc.host_dc) { + address_space_destroy(&ct3d->dc.host_dc_as); + } if (ct3d->hostpmem) { address_space_destroy(&ct3d->hostpmem_as); } @@ -875,16 +974,23 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d, AddressSpace **as, uint64_t *dpa_offset) { - MemoryRegion *vmr = NULL, *pmr = NULL; + MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL; + uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0; if (ct3d->hostvmem) { vmr = host_memory_backend_get_memory(ct3d->hostvmem); + vmr_size = memory_region_size(vmr); } if (ct3d->hostpmem) { pmr = host_memory_backend_get_memory(ct3d->hostpmem); + pmr_size = memory_region_size(pmr); + } + if (ct3d->dc.host_dc) { + dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc); + dc_size = memory_region_size(dc_mr); } - if (!vmr && !pmr) { + if (!vmr && !pmr && !dc_mr) { return -ENODEV; } @@ -892,19 +998,18 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d, return -EINVAL; } - if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) { + if (*dpa_offset >= vmr_size + pmr_size + dc_size) { return -EINVAL; } - if (vmr) { - if (*dpa_offset < memory_region_size(vmr)) { - *as = &ct3d->hostvmem_as; - } else { - *as = &ct3d->hostpmem_as; - *dpa_offset -= memory_region_size(vmr); - } - } else { + if (*dpa_offset < vmr_size) { + *as = &ct3d->hostvmem_as; + } else if (*dpa_offset < vmr_size + pmr_size) { *as = &ct3d->hostpmem_as; + *dpa_offset -= vmr_size; + } else { + *as = &ct3d->dc.host_dc_as; + *dpa_offset -= (vmr_size + pmr_size); } return 0; @@ -986,6 +1091,8 @@ static Property ct3_props[] = { DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL), DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename), DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0), + DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), DEFINE_PROP_END_OF_LIST(), }; @@ -1052,33 +1159,39 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data) { - MemoryRegion *vmr = NULL, *pmr = NULL; + MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL; AddressSpace *as; + uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0; if (ct3d->hostvmem) { vmr = host_memory_backend_get_memory(ct3d->hostvmem); + vmr_size = memory_region_size(vmr); } if (ct3d->hostpmem) { pmr = host_memory_backend_get_memory(ct3d->hostpmem); + pmr_size = memory_region_size(pmr); } + if (ct3d->dc.host_dc) { + dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc); + dc_size = memory_region_size(dc_mr); + } - if (!vmr && !pmr) { + if (!vmr && !pmr && !dc_mr) { return false; } - if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) { + if (dpa_offset + CXL_CACHE_LINE_SIZE > vmr_size + pmr_size + dc_size) { return false; } - if (vmr) { - if (dpa_offset < memory_region_size(vmr)) { - as = &ct3d->hostvmem_as; - } else { - as = &ct3d->hostpmem_as; - dpa_offset -= memory_region_size(vmr); - } - } else { + if (dpa_offset < vmr_size) { + as = &ct3d->hostvmem_as; + } else if (dpa_offset < vmr_size + pmr_size) { as = &ct3d->hostpmem_as; + dpa_offset -= vmr_size; + } else { + as = &ct3d->dc.host_dc_as; + dpa_offset -= (vmr_size + pmr_size); } address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data, diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index f7f56b44e3..c2c3df0d2a 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -467,6 +467,14 @@ struct CXLType3Dev { uint64_t poison_list_overflow_ts; struct dynamic_capacity { + HostMemoryBackend *host_dc; + AddressSpace host_dc_as; + /* + * total_capacity is equivalent to the dynamic capability + * memory region size. + */ + uint64_t total_capacity; /* 256M aligned */ + uint8_t num_regions; /* 0-8 regions */ CXLDCRegion regions[DCD_MAX_NUM_REGION]; } dc; From 1c9221f19e62e448a9ca71a2d5c8a369102a0c38 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:49 -0700 Subject: [PATCH 24/85] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support Add dynamic capacity extent list representative to the definition of CXLType3Dev and implement get DC extent list mailbox command per CXL.spec.3.1:.8.2.9.9.9.2. Tested-by: Svetly Todorov Reviewed-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-10-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 73 ++++++++++++++++++++++++++++++++++++- hw/mem/cxl_type3.c | 1 + include/hw/cxl/cxl_device.h | 22 +++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 6ad227f112..7872d2f3e6 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -84,6 +84,7 @@ enum { #define CLEAR_POISON 0x2 DCD_CONFIG = 0x48, #define GET_DC_CONFIG 0x0 + #define GET_DYN_CAP_EXT_LIST 0x1 PHYSICAL_SWITCH = 0x51, #define IDENTIFY_SWITCH_DEVICE 0x0 #define GET_PHYSICAL_PORT_STATE 0x1 @@ -1322,7 +1323,8 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd, * to use. */ stl_le_p(&extra_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED); - stl_le_p(&extra_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED); + stl_le_p(&extra_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED - + ct3d->dc.total_extent_count); stl_le_p(&extra_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED); stl_le_p(&extra_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED); @@ -1330,6 +1332,72 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* + * CXL r3.1 section 8.2.9.9.9.2: + * Get Dynamic Capacity Extent List (Opcode 4801h) + */ +static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + struct { + uint32_t extent_cnt; + uint32_t start_extent_id; + } QEMU_PACKED *in = (void *)payload_in; + struct { + uint32_t count; + uint32_t total_extents; + uint32_t generation_num; + uint8_t rsvd[4]; + CXLDCExtentRaw records[]; + } QEMU_PACKED *out = (void *)payload_out; + uint32_t start_extent_id = in->start_extent_id; + CXLDCExtentList *extent_list = &ct3d->dc.extents; + uint16_t record_count = 0, i = 0, record_done = 0; + uint16_t out_pl_len, size; + CXLDCExtent *ent; + + if (start_extent_id > ct3d->dc.total_extent_count) { + return CXL_MBOX_INVALID_INPUT; + } + + record_count = MIN(in->extent_cnt, + ct3d->dc.total_extent_count - start_extent_id); + size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out); + record_count = MIN(record_count, size / sizeof(out->records[0])); + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + + stl_le_p(&out->count, record_count); + stl_le_p(&out->total_extents, ct3d->dc.total_extent_count); + stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq); + + if (record_count > 0) { + CXLDCExtentRaw *out_rec = &out->records[record_done]; + + QTAILQ_FOREACH(ent, extent_list, node) { + if (i++ < start_extent_id) { + continue; + } + stq_le_p(&out_rec->start_dpa, ent->start_dpa); + stq_le_p(&out_rec->len, ent->len); + memcpy(&out_rec->tag, ent->tag, 0x10); + stw_le_p(&out_rec->shared_seq, ent->shared_seq); + + record_done++; + if (record_done == record_count) { + break; + } + } + } + + *len_out = out_pl_len; + return CXL_MBOX_SUCCESS; +} + #define IMMEDIATE_CONFIG_CHANGE (1 << 1) #define IMMEDIATE_DATA_CHANGE (1 << 2) #define IMMEDIATE_POLICY_CHANGE (1 << 3) @@ -1377,6 +1445,9 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = { [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG", cmd_dcd_get_dyn_cap_config, 2, 0 }, + [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = { + "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list, + 8, 0 }, }; static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 658570aa1a..2075846b1b 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -673,6 +673,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) }; ct3d->dc.total_capacity += region->len; } + QTAILQ_INIT(&ct3d->dc.extents); return true; } diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index c2c3df0d2a..6aec6ac983 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -424,6 +424,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList; #define DCD_MAX_NUM_REGION 8 +typedef struct CXLDCExtentRaw { + uint64_t start_dpa; + uint64_t len; + uint8_t tag[0x10]; + uint16_t shared_seq; + uint8_t rsvd[0x6]; +} QEMU_PACKED CXLDCExtentRaw; + +typedef struct CXLDCExtent { + uint64_t start_dpa; + uint64_t len; + uint8_t tag[0x10]; + uint16_t shared_seq; + uint8_t rsvd[0x6]; + + QTAILQ_ENTRY(CXLDCExtent) node; +} CXLDCExtent; +typedef QTAILQ_HEAD(, CXLDCExtent) CXLDCExtentList; + typedef struct CXLDCRegion { uint64_t base; /* aligned to 256*MiB */ uint64_t decode_len; /* aligned to 256*MiB */ @@ -474,6 +493,9 @@ struct CXLType3Dev { * memory region size. */ uint64_t total_capacity; /* 256M aligned */ + CXLDCExtentList extents; + uint32_t total_extent_count; + uint32_t ext_list_gen_seq; uint8_t num_regions; /* 0-8 regions */ CXLDCRegion regions[DCD_MAX_NUM_REGION]; From 16fd1b1216a2895a7995345ad6630151954c43a3 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:50 -0700 Subject: [PATCH 25/85] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response Per CXL spec 3.1, two mailbox commands are implemented: Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4. For the process of the above two commands, we use two-pass approach. Pass 1: Check whether the input payload is valid or not; if not, skip Pass 2 and return mailbox process error. Pass 2: Do the real work--add or release extents, respectively. Tested-by: Svetly Todorov Reviewed-by: Gregory Price Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-11-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 394 ++++++++++++++++++++++++++++++++++++ hw/mem/cxl_type3.c | 11 + include/hw/cxl/cxl_device.h | 4 + 3 files changed, 409 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 7872d2f3e6..e322407fb3 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -19,6 +19,7 @@ #include "qemu/units.h" #include "qemu/uuid.h" #include "sysemu/hostmem.h" +#include "qemu/range.h" #define CXL_CAPACITY_MULTIPLIER (256 * MiB) #define CXL_DC_EVENT_LOG_SIZE 8 @@ -85,6 +86,8 @@ enum { DCD_CONFIG = 0x48, #define GET_DC_CONFIG 0x0 #define GET_DYN_CAP_EXT_LIST 0x1 + #define ADD_DYN_CAP_RSP 0x2 + #define RELEASE_DYN_CAP 0x3 PHYSICAL_SWITCH = 0x51, #define IDENTIFY_SWITCH_DEVICE 0x0 #define GET_PHYSICAL_PORT_STATE 0x1 @@ -1398,6 +1401,391 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* + * Check whether any bit between addr[nr, nr+size) is set, + * return true if any bit is set, otherwise return false + */ +static bool test_any_bits_set(const unsigned long *addr, unsigned long nr, + unsigned long size) +{ + unsigned long res = find_next_bit(addr, size + nr, nr); + + return res < nr + size; +} + +CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len) +{ + int i; + CXLDCRegion *region = &ct3d->dc.regions[0]; + + if (dpa < region->base || + dpa >= region->base + ct3d->dc.total_capacity) { + return NULL; + } + + /* + * CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD) + * + * Regions are used in increasing-DPA order, with Region 0 being used for + * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA. + * So check from the last region to find where the dpa belongs. Extents that + * cross multiple regions are not allowed. + */ + for (i = ct3d->dc.num_regions - 1; i >= 0; i--) { + region = &ct3d->dc.regions[i]; + if (dpa >= region->base) { + if (dpa + len > region->base + region->len) { + return NULL; + } + return region; + } + } + + return NULL; +} + +static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list, + uint64_t dpa, + uint64_t len, + uint8_t *tag, + uint16_t shared_seq) +{ + CXLDCExtent *extent; + + extent = g_new0(CXLDCExtent, 1); + extent->start_dpa = dpa; + extent->len = len; + if (tag) { + memcpy(extent->tag, tag, 0x10); + } + extent->shared_seq = shared_seq; + + QTAILQ_INSERT_TAIL(list, extent, node); +} + +void cxl_remove_extent_from_extent_list(CXLDCExtentList *list, + CXLDCExtent *extent) +{ + QTAILQ_REMOVE(list, extent, node); + g_free(extent); +} + +/* + * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload + * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload + */ +typedef struct CXLUpdateDCExtentListInPl { + uint32_t num_entries_updated; + uint8_t flags; + uint8_t rsvd[3]; + /* CXL r3.1 Table 8-169: Updated Extent */ + struct { + uint64_t start_dpa; + uint64_t len; + uint8_t rsvd[8]; + } QEMU_PACKED updated_entries[]; +} QEMU_PACKED CXLUpdateDCExtentListInPl; + +/* + * For the extents in the extent list to operate, check whether they are valid + * 1. The extent should be in the range of a valid DC region; + * 2. The extent should not cross multiple regions; + * 3. The start DPA and the length of the extent should align with the block + * size of the region; + * 4. The address range of multiple extents in the list should not overlap. + */ +static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d, + const CXLUpdateDCExtentListInPl *in) +{ + uint64_t min_block_size = UINT64_MAX; + CXLDCRegion *region; + CXLDCRegion *lastregion = &ct3d->dc.regions[ct3d->dc.num_regions - 1]; + g_autofree unsigned long *blk_bitmap = NULL; + uint64_t dpa, len; + uint32_t i; + + for (i = 0; i < ct3d->dc.num_regions; i++) { + region = &ct3d->dc.regions[i]; + min_block_size = MIN(min_block_size, region->block_size); + } + + blk_bitmap = bitmap_new((lastregion->base + lastregion->len - + ct3d->dc.regions[0].base) / min_block_size); + + for (i = 0; i < in->num_entries_updated; i++) { + dpa = in->updated_entries[i].start_dpa; + len = in->updated_entries[i].len; + + region = cxl_find_dc_region(ct3d, dpa, len); + if (!region) { + return CXL_MBOX_INVALID_PA; + } + + dpa -= ct3d->dc.regions[0].base; + if (dpa % region->block_size || len % region->block_size) { + return CXL_MBOX_INVALID_EXTENT_LIST; + } + /* the dpa range already covered by some other extents in the list */ + if (test_any_bits_set(blk_bitmap, dpa / min_block_size, + len / min_block_size)) { + return CXL_MBOX_INVALID_EXTENT_LIST; + } + bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size); + } + + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode cxl_dcd_add_dyn_cap_rsp_dry_run(CXLType3Dev *ct3d, + const CXLUpdateDCExtentListInPl *in) +{ + uint32_t i; + CXLDCExtent *ent; + uint64_t dpa, len; + Range range1, range2; + + for (i = 0; i < in->num_entries_updated; i++) { + dpa = in->updated_entries[i].start_dpa; + len = in->updated_entries[i].len; + + range_init_nofail(&range1, dpa, len); + + /* + * TODO: once the pending extent list is added, check against + * the list will be added here. + */ + + /* to-be-added range should not overlap with range already accepted */ + QTAILQ_FOREACH(ent, &ct3d->dc.extents, node) { + range_init_nofail(&range2, ent->start_dpa, ent->len); + if (range_overlaps_range(&range1, &range2)) { + return CXL_MBOX_INVALID_PA; + } + } + } + return CXL_MBOX_SUCCESS; +} + +/* + * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 4802h) + * An extent is added to the extent list and becomes usable only after the + * response is processed successfully. + */ +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLUpdateDCExtentListInPl *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDCExtentList *extent_list = &ct3d->dc.extents; + uint32_t i; + uint64_t dpa, len; + CXLRetCode ret; + + if (in->num_entries_updated == 0) { + /* + * TODO: once the pending list is introduced, extents in the beginning + * will get wiped out. + */ + return CXL_MBOX_SUCCESS; + } + + /* Adding extents causes exceeding device's extent tracking ability. */ + if (in->num_entries_updated + ct3d->dc.total_extent_count > + CXL_NUM_EXTENTS_SUPPORTED) { + return CXL_MBOX_RESOURCES_EXHAUSTED; + } + + ret = cxl_detect_malformed_extent_list(ct3d, in); + if (ret != CXL_MBOX_SUCCESS) { + return ret; + } + + ret = cxl_dcd_add_dyn_cap_rsp_dry_run(ct3d, in); + if (ret != CXL_MBOX_SUCCESS) { + return ret; + } + + for (i = 0; i < in->num_entries_updated; i++) { + dpa = in->updated_entries[i].start_dpa; + len = in->updated_entries[i].len; + + cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0); + ct3d->dc.total_extent_count += 1; + /* + * TODO: we will add a pending extent list based on event log record + * and process the list accordingly here. + */ + } + + return CXL_MBOX_SUCCESS; +} + +/* + * Copy extent list from src to dst + * Return value: number of extents copied + */ +static uint32_t copy_extent_list(CXLDCExtentList *dst, + const CXLDCExtentList *src) +{ + uint32_t cnt = 0; + CXLDCExtent *ent; + + if (!dst || !src) { + return 0; + } + + QTAILQ_FOREACH(ent, src, node) { + cxl_insert_extent_to_extent_list(dst, ent->start_dpa, ent->len, + ent->tag, ent->shared_seq); + cnt++; + } + return cnt; +} + +static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d, + const CXLUpdateDCExtentListInPl *in, CXLDCExtentList *updated_list, + uint32_t *updated_list_size) +{ + CXLDCExtent *ent, *ent_next; + uint64_t dpa, len; + uint32_t i; + int cnt_delta = 0; + CXLRetCode ret = CXL_MBOX_SUCCESS; + + QTAILQ_INIT(updated_list); + copy_extent_list(updated_list, &ct3d->dc.extents); + + for (i = 0; i < in->num_entries_updated; i++) { + Range range; + + dpa = in->updated_entries[i].start_dpa; + len = in->updated_entries[i].len; + + while (len > 0) { + QTAILQ_FOREACH(ent, updated_list, node) { + range_init_nofail(&range, ent->start_dpa, ent->len); + + if (range_contains(&range, dpa)) { + uint64_t len1, len2 = 0, len_done = 0; + uint64_t ent_start_dpa = ent->start_dpa; + uint64_t ent_len = ent->len; + + len1 = dpa - ent->start_dpa; + /* Found the extent or the subset of an existing extent */ + if (range_contains(&range, dpa + len - 1)) { + len2 = ent_start_dpa + ent_len - dpa - len; + } else { + /* + * TODO: we reject the attempt to remove an extent + * that overlaps with multiple extents in the device + * for now. We will allow it once superset release + * support is added. + */ + ret = CXL_MBOX_INVALID_PA; + goto free_and_exit; + } + len_done = ent_len - len1 - len2; + + cxl_remove_extent_from_extent_list(updated_list, ent); + cnt_delta--; + + if (len1) { + cxl_insert_extent_to_extent_list(updated_list, + ent_start_dpa, + len1, NULL, 0); + cnt_delta++; + } + if (len2) { + cxl_insert_extent_to_extent_list(updated_list, + dpa + len, + len2, NULL, 0); + cnt_delta++; + } + + if (cnt_delta + ct3d->dc.total_extent_count > + CXL_NUM_EXTENTS_SUPPORTED) { + ret = CXL_MBOX_RESOURCES_EXHAUSTED; + goto free_and_exit; + } + + len -= len_done; + /* len == 0 here until superset release is added */ + break; + } + } + if (len) { + ret = CXL_MBOX_INVALID_PA; + goto free_and_exit; + } + } + } +free_and_exit: + if (ret != CXL_MBOX_SUCCESS) { + QTAILQ_FOREACH_SAFE(ent, updated_list, node, ent_next) { + cxl_remove_extent_from_extent_list(updated_list, ent); + } + *updated_list_size = 0; + } else { + *updated_list_size = ct3d->dc.total_extent_count + cnt_delta; + } + + return ret; +} + +/* + * CXL r3.1 section 8.2.9.9.9.4: Release Dynamic Capacity (Opcode 4803h) + */ +static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLUpdateDCExtentListInPl *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDCExtentList updated_list; + CXLDCExtent *ent, *ent_next; + uint32_t updated_list_size; + CXLRetCode ret; + + if (in->num_entries_updated == 0) { + return CXL_MBOX_INVALID_INPUT; + } + + ret = cxl_detect_malformed_extent_list(ct3d, in); + if (ret != CXL_MBOX_SUCCESS) { + return ret; + } + + ret = cxl_dc_extent_release_dry_run(ct3d, in, &updated_list, + &updated_list_size); + if (ret != CXL_MBOX_SUCCESS) { + return ret; + } + + /* + * If the dry run release passes, the returned updated_list will + * be the updated extent list and we just need to clear the extents + * in the accepted list and copy extents in the updated_list to accepted + * list and update the extent count; + */ + QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) { + cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent); + } + copy_extent_list(&ct3d->dc.extents, &updated_list); + QTAILQ_FOREACH_SAFE(ent, &updated_list, node, ent_next) { + cxl_remove_extent_from_extent_list(&updated_list, ent); + } + ct3d->dc.total_extent_count = updated_list_size; + + return CXL_MBOX_SUCCESS; +} + #define IMMEDIATE_CONFIG_CHANGE (1 << 1) #define IMMEDIATE_DATA_CHANGE (1 << 2) #define IMMEDIATE_POLICY_CHANGE (1 << 3) @@ -1448,6 +1836,12 @@ static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = { [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = { "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list, 8, 0 }, + [DCD_CONFIG][ADD_DYN_CAP_RSP] = { + "DCD_ADD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp, + ~0, IMMEDIATE_DATA_CHANGE }, + [DCD_CONFIG][RELEASE_DYN_CAP] = { + "DCD_RELEASE_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap, + ~0, IMMEDIATE_DATA_CHANGE }, }; static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 2075846b1b..db5191b3b7 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -678,6 +678,15 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) return true; } +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d) +{ + CXLDCExtent *ent, *ent_next; + + QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) { + cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent); + } +} + static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) { DeviceState *ds = DEVICE(ct3d); @@ -874,6 +883,7 @@ err_free_special_ops: g_free(regs->special_ops); err_address_space_free: if (ct3d->dc.host_dc) { + cxl_destroy_dc_regions(ct3d); address_space_destroy(&ct3d->dc.host_dc_as); } if (ct3d->hostpmem) { @@ -895,6 +905,7 @@ static void ct3_exit(PCIDevice *pci_dev) cxl_doe_cdat_release(cxl_cstate); g_free(regs->special_ops); if (ct3d->dc.host_dc) { + cxl_destroy_dc_regions(ct3d); address_space_destroy(&ct3d->dc.host_dc_as); } if (ct3d->hostpmem) { diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 6aec6ac983..df3511e91b 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -551,4 +551,8 @@ void cxl_event_irq_assert(CXLType3Dev *ct3d); void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d); +CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len); + +void cxl_remove_extent_from_extent_list(CXLDCExtentList *list, + CXLDCExtent *extent); #endif From d0b9b28a5b9f1e3d22b508f4f05d903a4b443e38 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:51 -0700 Subject: [PATCH 26/85] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents To simulate FM functionalities for initiating Dynamic Capacity Add (Opcode 5604h) and Dynamic Capacity Release (Opcode 5605h) as in CXL spec r3.1 7.6.7.6.5 and 7.6.7.6.6, we implemented two QMP interfaces to issue add/release dynamic capacity extents requests. With the change, we allow to release an extent only when its DPA range is contained by a single accepted extent in the device. That is to say, extent superset release is not supported yet. 1. Add dynamic capacity extents: For example, the command to add two continuous extents (each 128MiB long) to region 0 (starting at DPA offset 0) looks like below: { "execute": "qmp_capabilities" } { "execute": "cxl-add-dynamic-capacity", "arguments": { "path": "/machine/peripheral/cxl-dcd0", "host-id": 0, "selection-policy": "prescriptive", "region": 0, "extents": [ { "offset": 0, "len": 134217728 }, { "offset": 134217728, "len": 134217728 } ] } } 2. Release dynamic capacity extents: For example, the command to release an extent of size 128MiB from region 0 (DPA offset 128MiB) looks like below: { "execute": "cxl-release-dynamic-capacity", "arguments": { "path": "/machine/peripheral/cxl-dcd0", "host-id": 0, "removal-policy":"prescriptive", "region": 0, "extents": [ { "offset": 134217728, "len": 134217728 } ] } } Tested-by: Svetly Todorov Reviewed-by: Gregory Price Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-12-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 62 ++++++-- hw/mem/cxl_type3.c | 306 +++++++++++++++++++++++++++++++++++- hw/mem/cxl_type3_stubs.c | 25 +++ include/hw/cxl/cxl_device.h | 22 +++ include/hw/cxl/cxl_events.h | 18 +++ qapi/cxl.json | 143 +++++++++++++++++ 6 files changed, 563 insertions(+), 13 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index e322407fb3..64387f34ce 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1405,7 +1405,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd, * Check whether any bit between addr[nr, nr+size) is set, * return true if any bit is set, otherwise return false */ -static bool test_any_bits_set(const unsigned long *addr, unsigned long nr, +bool test_any_bits_set(const unsigned long *addr, unsigned long nr, unsigned long size) { unsigned long res = find_next_bit(addr, size + nr, nr); @@ -1444,7 +1444,7 @@ CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len) return NULL; } -static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list, +void cxl_insert_extent_to_extent_list(CXLDCExtentList *list, uint64_t dpa, uint64_t len, uint8_t *tag, @@ -1470,6 +1470,44 @@ void cxl_remove_extent_from_extent_list(CXLDCExtentList *list, g_free(extent); } +/* + * Add a new extent to the extent "group" if group exists; + * otherwise, create a new group + * Return value: the extent group where the extent is inserted. + */ +CXLDCExtentGroup *cxl_insert_extent_to_extent_group(CXLDCExtentGroup *group, + uint64_t dpa, + uint64_t len, + uint8_t *tag, + uint16_t shared_seq) +{ + if (!group) { + group = g_new0(CXLDCExtentGroup, 1); + QTAILQ_INIT(&group->list); + } + cxl_insert_extent_to_extent_list(&group->list, dpa, len, + tag, shared_seq); + return group; +} + +void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list, + CXLDCExtentGroup *group) +{ + QTAILQ_INSERT_TAIL(list, group, node); +} + +void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list) +{ + CXLDCExtent *ent, *ent_next; + CXLDCExtentGroup *group = QTAILQ_FIRST(list); + + QTAILQ_REMOVE(list, group, node); + QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) { + cxl_remove_extent_from_extent_list(&group->list, ent); + } + g_free(group); +} + /* * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload @@ -1541,6 +1579,7 @@ static CXLRetCode cxl_dcd_add_dyn_cap_rsp_dry_run(CXLType3Dev *ct3d, { uint32_t i; CXLDCExtent *ent; + CXLDCExtentGroup *ext_group; uint64_t dpa, len; Range range1, range2; @@ -1551,9 +1590,13 @@ static CXLRetCode cxl_dcd_add_dyn_cap_rsp_dry_run(CXLType3Dev *ct3d, range_init_nofail(&range1, dpa, len); /* - * TODO: once the pending extent list is added, check against - * the list will be added here. + * The host-accepted DPA range must be contained by the first extent + * group in the pending list */ + ext_group = QTAILQ_FIRST(&ct3d->dc.extents_pending); + if (!cxl_extents_contains_dpa_range(&ext_group->list, dpa, len)) { + return CXL_MBOX_INVALID_PA; + } /* to-be-added range should not overlap with range already accepted */ QTAILQ_FOREACH(ent, &ct3d->dc.extents, node) { @@ -1586,10 +1629,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, CXLRetCode ret; if (in->num_entries_updated == 0) { - /* - * TODO: once the pending list is introduced, extents in the beginning - * will get wiped out. - */ + cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); return CXL_MBOX_SUCCESS; } @@ -1615,11 +1655,9 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0); ct3d->dc.total_extent_count += 1; - /* - * TODO: we will add a pending extent list based on event log record - * and process the list accordingly here. - */ } + /* Remove the first extent group in the pending list */ + cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); return CXL_MBOX_SUCCESS; } diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index db5191b3b7..f53bcca6d3 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -674,6 +674,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) ct3d->dc.total_capacity += region->len; } QTAILQ_INIT(&ct3d->dc.extents); + QTAILQ_INIT(&ct3d->dc.extents_pending); return true; } @@ -681,10 +682,19 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) static void cxl_destroy_dc_regions(CXLType3Dev *ct3d) { CXLDCExtent *ent, *ent_next; + CXLDCExtentGroup *group, *group_next; QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) { cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent); } + + QTAILQ_FOREACH_SAFE(group, &ct3d->dc.extents_pending, node, group_next) { + QTAILQ_REMOVE(&ct3d->dc.extents_pending, group, node); + QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) { + cxl_remove_extent_from_extent_list(&group->list, ent); + } + g_free(group); + } } static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) @@ -1449,7 +1459,6 @@ static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log) return CXL_EVENT_TYPE_FAIL; case CXL_EVENT_LOG_FATAL: return CXL_EVENT_TYPE_FATAL; -/* DCD not yet supported */ default: return -EINVAL; } @@ -1700,6 +1709,301 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, } } +/* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */ +static const QemuUUID dynamic_capacity_uuid = { + .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f, + 0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a), +}; + +typedef enum CXLDCEventType { + DC_EVENT_ADD_CAPACITY = 0x0, + DC_EVENT_RELEASE_CAPACITY = 0x1, + DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2, + DC_EVENT_REGION_CONFIG_UPDATED = 0x3, + DC_EVENT_ADD_CAPACITY_RSP = 0x4, + DC_EVENT_CAPACITY_RELEASED = 0x5, +} CXLDCEventType; + +/* + * Check whether the range [dpa, dpa + len - 1] has overlaps with extents in + * the list. + * Return value: return true if has overlaps; otherwise, return false + */ +static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list, + uint64_t dpa, uint64_t len) +{ + CXLDCExtent *ent; + Range range1, range2; + + if (!list) { + return false; + } + + range_init_nofail(&range1, dpa, len); + QTAILQ_FOREACH(ent, list, node) { + range_init_nofail(&range2, ent->start_dpa, ent->len); + if (range_overlaps_range(&range1, &range2)) { + return true; + } + } + return false; +} + +/* + * Check whether the range [dpa, dpa + len - 1] is contained by extents in + * the list. + * Will check multiple extents containment once superset release is added. + * Return value: return true if range is contained; otherwise, return false + */ +bool cxl_extents_contains_dpa_range(CXLDCExtentList *list, + uint64_t dpa, uint64_t len) +{ + CXLDCExtent *ent; + Range range1, range2; + + if (!list) { + return false; + } + + range_init_nofail(&range1, dpa, len); + QTAILQ_FOREACH(ent, list, node) { + range_init_nofail(&range2, ent->start_dpa, ent->len); + if (range_contains_range(&range2, &range1)) { + return true; + } + } + return false; +} + +static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list, + uint64_t dpa, uint64_t len) +{ + CXLDCExtentGroup *group; + + if (!list) { + return false; + } + + QTAILQ_FOREACH(group, list, node) { + if (cxl_extents_overlaps_dpa_range(&group->list, dpa, len)) { + return true; + } + } + return false; +} + +/* + * The main function to process dynamic capacity event with extent list. + * Currently DC extents add/release requests are processed. + */ +static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, + uint16_t hid, CXLDCEventType type, uint8_t rid, + CXLDynamicCapacityExtentList *records, Error **errp) +{ + Object *obj; + CXLEventDynamicCapacity dCap = {}; + CXLEventRecordHdr *hdr = &dCap.hdr; + CXLType3Dev *dcd; + uint8_t flags = 1 << CXL_EVENT_TYPE_INFO; + uint32_t num_extents = 0; + CXLDynamicCapacityExtentList *list; + CXLDCExtentGroup *group = NULL; + g_autofree CXLDCExtentRaw *extents = NULL; + uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP; + uint64_t dpa, offset, len, block_size; + g_autofree unsigned long *blk_bitmap = NULL; + int i; + + obj = object_resolve_path_type(path, TYPE_CXL_TYPE3, NULL); + if (!obj) { + error_setg(errp, "Unable to resolve CXL type 3 device"); + return; + } + + dcd = CXL_TYPE3(obj); + if (!dcd->dc.num_regions) { + error_setg(errp, "No dynamic capacity support from the device"); + return; + } + + + if (rid >= dcd->dc.num_regions) { + error_setg(errp, "region id is too large"); + return; + } + block_size = dcd->dc.regions[rid].block_size; + blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size); + + /* Sanity check and count the extents */ + list = records; + while (list) { + offset = list->value->offset; + len = list->value->len; + dpa = offset + dcd->dc.regions[rid].base; + + if (len == 0) { + error_setg(errp, "extent with 0 length is not allowed"); + return; + } + + if (offset % block_size || len % block_size) { + error_setg(errp, "dpa or len is not aligned to region block size"); + return; + } + + if (offset + len > dcd->dc.regions[rid].len) { + error_setg(errp, "extent range is beyond the region end"); + return; + } + + /* No duplicate or overlapped extents are allowed */ + if (test_any_bits_set(blk_bitmap, offset / block_size, + len / block_size)) { + error_setg(errp, "duplicate or overlapped extents are detected"); + return; + } + bitmap_set(blk_bitmap, offset / block_size, len / block_size); + + if (type == DC_EVENT_RELEASE_CAPACITY) { + if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending, + dpa, len)) { + error_setg(errp, + "cannot release extent with pending DPA range"); + return; + } + if (!cxl_extents_contains_dpa_range(&dcd->dc.extents, dpa, len)) { + error_setg(errp, + "cannot release extent with non-existing DPA range"); + return; + } + } else if (type == DC_EVENT_ADD_CAPACITY) { + if (cxl_extents_overlaps_dpa_range(&dcd->dc.extents, dpa, len)) { + error_setg(errp, + "cannot add DPA already accessible to the same LD"); + return; + } + if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending, + dpa, len)) { + error_setg(errp, + "cannot add DPA again while still pending"); + return; + } + } + list = list->next; + num_extents++; + } + + /* Create extent list for event being passed to host */ + i = 0; + list = records; + extents = g_new0(CXLDCExtentRaw, num_extents); + while (list) { + offset = list->value->offset; + len = list->value->len; + dpa = dcd->dc.regions[rid].base + offset; + + extents[i].start_dpa = dpa; + extents[i].len = len; + memset(extents[i].tag, 0, 0x10); + extents[i].shared_seq = 0; + if (type == DC_EVENT_ADD_CAPACITY) { + group = cxl_insert_extent_to_extent_group(group, + extents[i].start_dpa, + extents[i].len, + extents[i].tag, + extents[i].shared_seq); + } + + list = list->next; + i++; + } + if (group) { + cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group); + } + + /* + * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record + * + * All Dynamic Capacity event records shall set the Event Record Severity + * field in the Common Event Record Format to Informational Event. All + * Dynamic Capacity related events shall be logged in the Dynamic Capacity + * Event Log. + */ + cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap), + cxl_device_get_timestamp(&dcd->cxl_dstate)); + + dCap.type = type; + /* FIXME: for now, validity flag is cleared */ + dCap.validity_flags = 0; + stw_le_p(&dCap.host_id, hid); + /* only valid for DC_REGION_CONFIG_UPDATED event */ + dCap.updated_region_id = 0; + dCap.flags = 0; + for (i = 0; i < num_extents; i++) { + memcpy(&dCap.dynamic_capacity_extent, &extents[i], + sizeof(CXLDCExtentRaw)); + + if (i < num_extents - 1) { + /* Set "More" flag */ + dCap.flags |= BIT(0); + } + + if (cxl_event_insert(&dcd->cxl_dstate, enc_log, + (CXLEventRecordRaw *)&dCap)) { + cxl_event_irq_assert(dcd); + } + } +} + +void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id, + CXLExtSelPolicy sel_policy, uint8_t region, + const char *tag, + CXLDynamicCapacityExtentList *extents, + Error **errp) +{ + switch (sel_policy) { + case CXL_EXT_SEL_POLICY_PRESCRIPTIVE: + qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, + DC_EVENT_ADD_CAPACITY, + region, extents, errp); + return; + default: + error_setg(errp, "Selection policy not supported"); + return; + } +} + +void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id, + CXLExtRemovalPolicy removal_policy, + bool has_forced_removal, + bool forced_removal, + bool has_sanitize_on_release, + bool sanitize_on_release, + uint8_t region, + const char *tag, + CXLDynamicCapacityExtentList *extents, + Error **errp) +{ + CXLDCEventType type = DC_EVENT_RELEASE_CAPACITY; + + if (has_forced_removal && forced_removal) { + /* TODO: enable forced removal in the future */ + type = DC_EVENT_FORCED_RELEASE_CAPACITY; + error_setg(errp, "Forced removal not supported yet"); + return; + } + + switch (removal_policy) { + case CXL_EXT_REMOVAL_POLICY_PRESCRIPTIVE: + qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, type, + region, extents, errp); + return; + default: + error_setg(errp, "Removal policy not supported"); + return; + } +} + static void ct3_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index 3e1851e32b..45419bbefe 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -67,3 +67,28 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, { error_setg(errp, "CXL Type 3 support is not compiled in"); } + +void qmp_cxl_add_dynamic_capacity(const char *path, + uint16_t host_id, + CXLExtSelPolicy sel_policy, + uint8_t region, + const char *tag, + CXLDynamicCapacityExtentList *extents, + Error **errp) +{ + error_setg(errp, "CXL Type 3 support is not compiled in"); +} + +void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id, + CXLExtRemovalPolicy removal_policy, + bool has_forced_removal, + bool forced_removal, + bool has_sanitize_on_release, + bool sanitize_on_release, + uint8_t region, + const char *tag, + CXLDynamicCapacityExtentList *extents, + Error **errp) +{ + error_setg(errp, "CXL Type 3 support is not compiled in"); +} diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index df3511e91b..c69ff6b5de 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -443,6 +443,12 @@ typedef struct CXLDCExtent { } CXLDCExtent; typedef QTAILQ_HEAD(, CXLDCExtent) CXLDCExtentList; +typedef struct CXLDCExtentGroup { + CXLDCExtentList list; + QTAILQ_ENTRY(CXLDCExtentGroup) node; +} CXLDCExtentGroup; +typedef QTAILQ_HEAD(, CXLDCExtentGroup) CXLDCExtentGroupList; + typedef struct CXLDCRegion { uint64_t base; /* aligned to 256*MiB */ uint64_t decode_len; /* aligned to 256*MiB */ @@ -494,6 +500,7 @@ struct CXLType3Dev { */ uint64_t total_capacity; /* 256M aligned */ CXLDCExtentList extents; + CXLDCExtentGroupList extents_pending; uint32_t total_extent_count; uint32_t ext_list_gen_seq; @@ -555,4 +562,19 @@ CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len); void cxl_remove_extent_from_extent_list(CXLDCExtentList *list, CXLDCExtent *extent); +void cxl_insert_extent_to_extent_list(CXLDCExtentList *list, uint64_t dpa, + uint64_t len, uint8_t *tag, + uint16_t shared_seq); +bool test_any_bits_set(const unsigned long *addr, unsigned long nr, + unsigned long size); +bool cxl_extents_contains_dpa_range(CXLDCExtentList *list, + uint64_t dpa, uint64_t len); +CXLDCExtentGroup *cxl_insert_extent_to_extent_group(CXLDCExtentGroup *group, + uint64_t dpa, + uint64_t len, + uint8_t *tag, + uint16_t shared_seq); +void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list, + CXLDCExtentGroup *group); +void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list); #endif diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h index 5170b8dbf8..38cadaa0f3 100644 --- a/include/hw/cxl/cxl_events.h +++ b/include/hw/cxl/cxl_events.h @@ -166,4 +166,22 @@ typedef struct CXLEventMemoryModule { uint8_t reserved[0x3d]; } QEMU_PACKED CXLEventMemoryModule; +/* + * CXL r3.1 section Table 8-50: Dynamic Capacity Event Record + * All fields little endian. + */ +typedef struct CXLEventDynamicCapacity { + CXLEventRecordHdr hdr; + uint8_t type; + uint8_t validity_flags; + uint16_t host_id; + uint8_t updated_region_id; + uint8_t flags; + uint8_t reserved2[2]; + uint8_t dynamic_capacity_extent[0x28]; /* defined in cxl_device.h */ + uint8_t reserved[0x18]; + uint32_t extents_avail; + uint32_t tags_avail; +} QEMU_PACKED CXLEventDynamicCapacity; + #endif /* CXL_EVENTS_H */ diff --git a/qapi/cxl.json b/qapi/cxl.json index 4281726dec..57d9f82014 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -361,3 +361,146 @@ ## {'command': 'cxl-inject-correctable-error', 'data': {'path': 'str', 'type': 'CxlCorErrorType'}} + +## +# @CXLDynamicCapacityExtent: +# +# A single dynamic capacity extent +# +# @offset: The offset (in bytes) to the start of the region +# where the extent belongs to. +# +# @len: The length of the extent in bytes. +# +# Since: 9.1 +## +{ 'struct': 'CXLDynamicCapacityExtent', + 'data': { + 'offset':'uint64', + 'len': 'uint64' + } +} + +## +# @CXLExtSelPolicy: +# +# The policy to use for selecting which extents comprise the added +# capacity, as defined in cxl spec r3.1 Table 7-70. +# +# @free: 0h = Free +# +# @contiguous: 1h = Continuous +# +# @prescriptive: 2h = Prescriptive +# +# @enable-shared-access: 3h = Enable Shared Access +# +# Since: 9.1 +## +{ 'enum': 'CXLExtSelPolicy', + 'data': ['free', + 'contiguous', + 'prescriptive', + 'enable-shared-access'] +} + +## +# @cxl-add-dynamic-capacity: +# +# Command to initiate to add dynamic capacity extents to a host. It +# simulates operations defined in cxl spec r3.1 7.6.7.6.5. +# +# @path: CXL DCD canonical QOM path. +# +# @host-id: The "Host ID" field as defined in cxl spec r3.1 +# Table 7-70. +# +# @selection-policy: The "Selection Policy" bits as defined in +# cxl spec r3.1 Table 7-70. It specifies the policy to use for +# selecting which extents comprise the added capacity. +# +# @region: The "Region Number" field as defined in cxl spec r3.1 +# Table 7-70. The dynamic capacity region where the capacity +# is being added. Valid range is from 0-7. +# +# @tag: The "Tag" field as defined in cxl spec r3.1 Table 7-70. +# +# @extents: The "Extent List" field as defined in cxl spec r3.1 +# Table 7-70. +# +# Since : 9.1 +## +{ 'command': 'cxl-add-dynamic-capacity', + 'data': { 'path': 'str', + 'host-id': 'uint16', + 'selection-policy': 'CXLExtSelPolicy', + 'region': 'uint8', + '*tag': 'str', + 'extents': [ 'CXLDynamicCapacityExtent' ] + } +} + +## +# @CXLExtRemovalPolicy: +# +# The policy to use for selecting which extents comprise the released +# capacity, defined in the "Flags" field in cxl spec r3.1 Table 7-71. +# +# @tag-based: value = 0h. Extents are selected by the device based +# on tag, with no requirement for contiguous extents. +# +# @prescriptive: value = 1h. Extent list of capacity to release is +# included in the request payload. +# +# Since: 9.1 +## +{ 'enum': 'CXLExtRemovalPolicy', + 'data': ['tag-based', + 'prescriptive'] +} + +## +# @cxl-release-dynamic-capacity: +# +# Command to initiate to release dynamic capacity extents from a +# host. It simulates operations defined in cxl spec r3.1 7.6.7.6.6. +# +# @path: CXL DCD canonical QOM path. +# +# @host-id: The "Host ID" field as defined in cxl spec r3.1 +# Table 7-71. +# +# @removal-policy: Bit[3:0] of the "Flags" field as defined in cxl +# spec r3.1 Table 7-71. +# +# @forced-removal: Bit[4] of the "Flags" field in cxl spec r3.1 +# Table 7-71. When set, device does not wait for a Release +# Dynamic Capacity command from the host. Host immediately +# loses access to released capacity. +# +# @sanitize-on-release: Bit[5] of the "Flags" field in cxl spec r3.1 +# Table 7-71. When set, device should sanitize all released +# capacity as a result of this request. +# +# @region: The "Region Number" field as defined in cxl spec r3.1 +# Table 7-71. The dynamic capacity region where the capacity +# is being added. Valid range is from 0-7. +# +# @tag: The "Tag" field as defined in cxl spec r3.1 Table 7-71. +# +# @extents: The "Extent List" field as defined in cxl spec r3.1 +# Table 7-71. +# +# Since : 9.1 +## +{ 'command': 'cxl-release-dynamic-capacity', + 'data': { 'path': 'str', + 'host-id': 'uint16', + 'removal-policy': 'CXLExtRemovalPolicy', + '*forced-removal': 'bool', + '*sanitize-on-release': 'bool', + 'region': 'uint8', + '*tag': 'str', + 'extents': [ 'CXLDynamicCapacityExtent' ] + } +} From e4180db4e63b904183374c6e7ec07f66aa0decde Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:52 -0700 Subject: [PATCH 27/85] hw/mem/cxl_type3: Add DPA range validation for accesses to DC regions All DPA ranges in the DC regions are invalid to access until an extent covering the range has been successfully accepted by the host. A bitmap is added to each region to record whether a DC block in the region has been backed by a DC extent. Each bit in the bitmap represents a DC block. When a DC extent is accepted, all the bits representing the blocks in the extent are set, which will be cleared when the extent is released. Tested-by: Svetly Todorov Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-13-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 3 ++ hw/mem/cxl_type3.c | 76 +++++++++++++++++++++++++++++++++++++ include/hw/cxl/cxl_device.h | 7 ++++ 3 files changed, 86 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 64387f34ce..c4852112fe 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1655,6 +1655,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0); ct3d->dc.total_extent_count += 1; + ct3_set_region_block_backed(ct3d, dpa, len); } /* Remove the first extent group in the pending list */ cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); @@ -1813,10 +1814,12 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd, * list and update the extent count; */ QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) { + ct3_clear_region_block_backed(ct3d, ent->start_dpa, ent->len); cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent); } copy_extent_list(&ct3d->dc.extents, &updated_list); QTAILQ_FOREACH_SAFE(ent, &updated_list, node, ent_next) { + ct3_set_region_block_backed(ct3d, ent->start_dpa, ent->len); cxl_remove_extent_from_extent_list(&updated_list, ent); } ct3d->dc.total_extent_count = updated_list_size; diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index f53bcca6d3..0d18259ec0 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -672,6 +672,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) .flags = 0, }; ct3d->dc.total_capacity += region->len; + region->blk_bitmap = bitmap_new(region->len / region->block_size); } QTAILQ_INIT(&ct3d->dc.extents); QTAILQ_INIT(&ct3d->dc.extents_pending); @@ -683,6 +684,8 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d) { CXLDCExtent *ent, *ent_next; CXLDCExtentGroup *group, *group_next; + int i; + CXLDCRegion *region; QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) { cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent); @@ -695,6 +698,11 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d) } g_free(group); } + + for (i = 0; i < ct3d->dc.num_regions; i++) { + region = &ct3d->dc.regions[i]; + g_free(region->blk_bitmap); + } } static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) @@ -926,6 +934,70 @@ static void ct3_exit(PCIDevice *pci_dev) } } +/* + * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This + * happens when a DC extent is added and accepted by the host. + */ +void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, + uint64_t len) +{ + CXLDCRegion *region; + + region = cxl_find_dc_region(ct3d, dpa, len); + if (!region) { + return; + } + + bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size, + len / region->block_size); +} + +/* + * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents. + * Used when validating read/write to dc regions + */ +bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, + uint64_t len) +{ + CXLDCRegion *region; + uint64_t nbits; + long nr; + + region = cxl_find_dc_region(ct3d, dpa, len); + if (!region) { + return false; + } + + nr = (dpa - region->base) / region->block_size; + nbits = DIV_ROUND_UP(len, region->block_size); + /* + * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is + * backed with DC extents, return true; else return false. + */ + return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits; +} + +/* + * Mark the DPA range [dpa, dap + len - 1] to be unbacked and inaccessible. + * This happens when a dc extent is released by the host. + */ +void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, + uint64_t len) +{ + CXLDCRegion *region; + uint64_t nbits; + long nr; + + region = cxl_find_dc_region(ct3d, dpa, len); + if (!region) { + return; + } + + nr = (dpa - region->base) / region->block_size; + nbits = len / region->block_size; + bitmap_clear(region->blk_bitmap, nr, nbits); +} + static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) { int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; @@ -1030,6 +1102,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d, *as = &ct3d->hostpmem_as; *dpa_offset -= vmr_size; } else { + if (!ct3_test_region_block_backed(ct3d, *dpa_offset, size)) { + return -ENODEV; + } + *as = &ct3d->dc.host_dc_as; *dpa_offset -= (vmr_size + pmr_size); } diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index c69ff6b5de..0a4fcb2800 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -456,6 +456,7 @@ typedef struct CXLDCRegion { uint64_t block_size; uint32_t dsmadhandle; uint8_t flags; + unsigned long *blk_bitmap; } CXLDCRegion; struct CXLType3Dev { @@ -577,4 +578,10 @@ CXLDCExtentGroup *cxl_insert_extent_to_extent_group(CXLDCExtentGroup *group, void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list, CXLDCExtentGroup *group); void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list); +void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, + uint64_t len); +void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, + uint64_t len); +bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, + uint64_t len); #endif From 3083f018b59fd35b9ee993715694f967c49afeb1 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:53 -0700 Subject: [PATCH 28/85] hw/cxl/cxl-mailbox-utils: Add superset extent release mailbox support With the change, we extend the extent release mailbox command processing to allow more flexible release. As long as the DPA range of the extent to release is covered by accepted extent(s) in the device, the release can be performed. Tested-by: Svetly Todorov Reviewed-by: Gregory Price Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-14-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index c4852112fe..74eeb6fde7 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1704,6 +1704,13 @@ static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d, dpa = in->updated_entries[i].start_dpa; len = in->updated_entries[i].len; + /* Check if the DPA range is not fully backed with valid extents */ + if (!ct3_test_region_block_backed(ct3d, dpa, len)) { + ret = CXL_MBOX_INVALID_PA; + goto free_and_exit; + } + + /* After this point, extent overflow is the only error can happen */ while (len > 0) { QTAILQ_FOREACH(ent, updated_list, node) { range_init_nofail(&range, ent->start_dpa, ent->len); @@ -1718,14 +1725,7 @@ static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d, if (range_contains(&range, dpa + len - 1)) { len2 = ent_start_dpa + ent_len - dpa - len; } else { - /* - * TODO: we reject the attempt to remove an extent - * that overlaps with multiple extents in the device - * for now. We will allow it once superset release - * support is added. - */ - ret = CXL_MBOX_INVALID_PA; - goto free_and_exit; + dpa = ent_start_dpa + ent_len; } len_done = ent_len - len1 - len2; @@ -1752,14 +1752,9 @@ static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d, } len -= len_done; - /* len == 0 here until superset release is added */ break; } } - if (len) { - ret = CXL_MBOX_INVALID_PA; - goto free_and_exit; - } } } free_and_exit: From c51dca04281f9be6eacdad8fc8f9c7ddc87dcf3c Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 23 May 2024 10:44:54 -0700 Subject: [PATCH 29/85] hw/mem/cxl_type3: Allow to release extent superset in QMP interface Before the change, the QMP interface used for add/release DC extents only allows to release an extent whose DPA range is contained by a single accepted extent in the device. With the change, we relax the constraints. As long as the DPA range of the extent is covered by accepted extents, we allow the release. Tested-by: Svetly Todorov Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Signed-off-by: Fan Ni Message-Id: <20240523174651.1089554-15-nifan.cxl@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 0d18259ec0..5d4a1276be 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1947,7 +1947,7 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, "cannot release extent with pending DPA range"); return; } - if (!cxl_extents_contains_dpa_range(&dcd->dc.extents, dpa, len)) { + if (!ct3_test_region_block_backed(dcd, dpa, len)) { error_setg(errp, "cannot release extent with non-existing DPA range"); return; From c5614ee3f2775534871914c02be4b5a61b71ed40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 27 May 2024 08:27:48 +0200 Subject: [PATCH 30/85] linux-headers: update to 6.10-rc1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Thomas Weißschuh Message-Id: <20240527-pvpanic-shutdown-v8-2-5a28ec02558b@t-8ch.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/standard-headers/linux/ethtool.h | 55 ++++++++ include/standard-headers/linux/pci_regs.h | 6 + include/standard-headers/linux/virtio_bt.h | 1 - include/standard-headers/linux/virtio_mem.h | 2 + include/standard-headers/linux/virtio_net.h | 143 ++++++++++++++++++++ include/standard-headers/misc/pvpanic.h | 7 +- linux-headers/asm-generic/unistd.h | 5 +- linux-headers/asm-mips/unistd_n32.h | 1 + linux-headers/asm-mips/unistd_n64.h | 1 + linux-headers/asm-mips/unistd_o32.h | 1 + linux-headers/asm-powerpc/unistd_32.h | 1 + linux-headers/asm-powerpc/unistd_64.h | 1 + linux-headers/asm-s390/unistd_32.h | 1 + linux-headers/asm-s390/unistd_64.h | 1 + linux-headers/asm-x86/unistd_32.h | 1 + linux-headers/asm-x86/unistd_64.h | 1 + linux-headers/asm-x86/unistd_x32.h | 2 + linux-headers/linux/kvm.h | 4 +- linux-headers/linux/stddef.h | 8 ++ 19 files changed, 236 insertions(+), 6 deletions(-) diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 01503784d2..b0b4b68410 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -752,6 +752,61 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_pse_types - Types of PSE controller. + * @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown + * @ETHTOOL_PSE_PODL: PSE controller which support PoDL + * @ETHTOOL_PSE_C33: PSE controller which support Clause 33 (PoE) + */ +enum ethtool_pse_types { + ETHTOOL_PSE_UNKNOWN = 1 << 0, + ETHTOOL_PSE_PODL = 1 << 1, + ETHTOOL_PSE_C33 = 1 << 2, +}; + +/** + * enum ethtool_c33_pse_admin_state - operational state of the PoDL PSE + * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState + * @ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN: state of PSE functions is unknown + * @ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED: PSE functions are disabled + * @ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED: PSE functions are enabled + */ +enum ethtool_c33_pse_admin_state { + ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN = 1, + ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED, + ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED, +}; + +/** + * enum ethtool_c33_pse_pw_d_status - power detection status of the PSE. + * IEEE 802.3-2022 30.9.1.1.3 aPoDLPSEPowerDetectionStatus: + * @ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN: PSE status is unknown + * @ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED: The enumeration "disabled" + * indicates that the PSE State diagram is in the state DISABLED. + * @ETHTOOL_C33_PSE_PW_D_STATUS_SEARCHING: The enumeration "searching" + * indicates the PSE State diagram is in a state other than those + * listed. + * @ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING: The enumeration + * "deliveringPower" indicates that the PSE State diagram is in the + * state POWER_ON. + * @ETHTOOL_C33_PSE_PW_D_STATUS_TEST: The enumeration "test" indicates that + * the PSE State diagram is in the state TEST_MODE. + * @ETHTOOL_C33_PSE_PW_D_STATUS_FAULT: The enumeration "fault" indicates that + * the PSE State diagram is in the state TEST_ERROR. + * @ETHTOOL_C33_PSE_PW_D_STATUS_OTHERFAULT: The enumeration "otherFault" + * indicates that the PSE State diagram is in the state IDLE due to + * the variable error_condition = true. + */ +enum ethtool_c33_pse_pw_d_status { + ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN = 1, + ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED, + ETHTOOL_C33_PSE_PW_D_STATUS_SEARCHING, + ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING, + ETHTOOL_C33_PSE_PW_D_STATUS_TEST, + ETHTOOL_C33_PSE_PW_D_STATUS_FAULT, + ETHTOOL_C33_PSE_PW_D_STATUS_OTHERFAULT, +}; + /** * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index a39193213f..94c00996e6 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -1144,8 +1144,14 @@ #define PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH 0x0003ffff #define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff +#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_VER 0x0000ff00 #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000 #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000 +/* Compute Express Link (CXL r3.1, sec 8.1.5) */ +#define PCI_DVSEC_CXL_PORT 3 +#define PCI_DVSEC_CXL_PORT_CTL 0x0c +#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 + #endif /* LINUX_PCI_REGS_H */ diff --git a/include/standard-headers/linux/virtio_bt.h b/include/standard-headers/linux/virtio_bt.h index a11ecc3f92..6f0dee7e32 100644 --- a/include/standard-headers/linux/virtio_bt.h +++ b/include/standard-headers/linux/virtio_bt.h @@ -13,7 +13,6 @@ enum virtio_bt_config_type { VIRTIO_BT_CONFIG_TYPE_PRIMARY = 0, - VIRTIO_BT_CONFIG_TYPE_AMP = 1, }; enum virtio_bt_config_vendor { diff --git a/include/standard-headers/linux/virtio_mem.h b/include/standard-headers/linux/virtio_mem.h index 18c74c527c..6bfa41bd8b 100644 --- a/include/standard-headers/linux/virtio_mem.h +++ b/include/standard-headers/linux/virtio_mem.h @@ -90,6 +90,8 @@ #define VIRTIO_MEM_F_ACPI_PXM 0 /* unplugged memory must not be accessed */ #define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE 1 +/* plugged memory will remain plugged when suspending+resuming */ +#define VIRTIO_MEM_F_PERSISTENT_SUSPEND 2 /* --- virtio-mem: guest -> host requests --- */ diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index 0f88417742..fc594fe5fc 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -56,6 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_DEVICE_STATS 50 /* Device can provide device-level statistics. */ #define VIRTIO_NET_F_VQ_NOTF_COAL 52 /* Device supports virtqueue notification coalescing */ #define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ #define VIRTIO_NET_F_GUEST_USO4 54 /* Guest can handle USOv4 in. */ @@ -406,4 +407,146 @@ struct virtio_net_ctrl_coal_vq { struct virtio_net_ctrl_coal coal; }; +/* + * Device Statistics + */ +#define VIRTIO_NET_CTRL_STATS 8 +#define VIRTIO_NET_CTRL_STATS_QUERY 0 +#define VIRTIO_NET_CTRL_STATS_GET 1 + +struct virtio_net_stats_capabilities { + +#define VIRTIO_NET_STATS_TYPE_CVQ (1ULL << 32) + +#define VIRTIO_NET_STATS_TYPE_RX_BASIC (1ULL << 0) +#define VIRTIO_NET_STATS_TYPE_RX_CSUM (1ULL << 1) +#define VIRTIO_NET_STATS_TYPE_RX_GSO (1ULL << 2) +#define VIRTIO_NET_STATS_TYPE_RX_SPEED (1ULL << 3) + +#define VIRTIO_NET_STATS_TYPE_TX_BASIC (1ULL << 16) +#define VIRTIO_NET_STATS_TYPE_TX_CSUM (1ULL << 17) +#define VIRTIO_NET_STATS_TYPE_TX_GSO (1ULL << 18) +#define VIRTIO_NET_STATS_TYPE_TX_SPEED (1ULL << 19) + + uint64_t supported_stats_types[1]; +}; + +struct virtio_net_ctrl_queue_stats { + struct { + uint16_t vq_index; + uint16_t reserved[3]; + uint64_t types_bitmap[1]; + } stats[1]; +}; + +struct virtio_net_stats_reply_hdr { +#define VIRTIO_NET_STATS_TYPE_REPLY_CVQ 32 + +#define VIRTIO_NET_STATS_TYPE_REPLY_RX_BASIC 0 +#define VIRTIO_NET_STATS_TYPE_REPLY_RX_CSUM 1 +#define VIRTIO_NET_STATS_TYPE_REPLY_RX_GSO 2 +#define VIRTIO_NET_STATS_TYPE_REPLY_RX_SPEED 3 + +#define VIRTIO_NET_STATS_TYPE_REPLY_TX_BASIC 16 +#define VIRTIO_NET_STATS_TYPE_REPLY_TX_CSUM 17 +#define VIRTIO_NET_STATS_TYPE_REPLY_TX_GSO 18 +#define VIRTIO_NET_STATS_TYPE_REPLY_TX_SPEED 19 + uint8_t type; + uint8_t reserved; + uint16_t vq_index; + uint16_t reserved1; + uint16_t size; +}; + +struct virtio_net_stats_cvq { + struct virtio_net_stats_reply_hdr hdr; + + uint64_t command_num; + uint64_t ok_num; +}; + +struct virtio_net_stats_rx_basic { + struct virtio_net_stats_reply_hdr hdr; + + uint64_t rx_notifications; + + uint64_t rx_packets; + uint64_t rx_bytes; + + uint64_t rx_interrupts; + + uint64_t rx_drops; + uint64_t rx_drop_overruns; +}; + +struct virtio_net_stats_tx_basic { + struct virtio_net_stats_reply_hdr hdr; + + uint64_t tx_notifications; + + uint64_t tx_packets; + uint64_t tx_bytes; + + uint64_t tx_interrupts; + + uint64_t tx_drops; + uint64_t tx_drop_malformed; +}; + +struct virtio_net_stats_rx_csum { + struct virtio_net_stats_reply_hdr hdr; + + uint64_t rx_csum_valid; + uint64_t rx_needs_csum; + uint64_t rx_csum_none; + uint64_t rx_csum_bad; +}; + +struct virtio_net_stats_tx_csum { + struct virtio_net_stats_reply_hdr hdr; + + uint64_t tx_csum_none; + uint64_t tx_needs_csum; +}; + +struct virtio_net_stats_rx_gso { + struct virtio_net_stats_reply_hdr hdr; + + uint64_t rx_gso_packets; + uint64_t rx_gso_bytes; + uint64_t rx_gso_packets_coalesced; + uint64_t rx_gso_bytes_coalesced; +}; + +struct virtio_net_stats_tx_gso { + struct virtio_net_stats_reply_hdr hdr; + + uint64_t tx_gso_packets; + uint64_t tx_gso_bytes; + uint64_t tx_gso_segments; + uint64_t tx_gso_segments_bytes; + uint64_t tx_gso_packets_noseg; + uint64_t tx_gso_bytes_noseg; +}; + +struct virtio_net_stats_rx_speed { + struct virtio_net_stats_reply_hdr hdr; + + /* rx_{packets,bytes}_allowance_exceeded are too long. So rename to + * short name. + */ + uint64_t rx_ratelimit_packets; + uint64_t rx_ratelimit_bytes; +}; + +struct virtio_net_stats_tx_speed { + struct virtio_net_stats_reply_hdr hdr; + + /* tx_{packets,bytes}_allowance_exceeded are too long. So rename to + * short name. + */ + uint64_t tx_ratelimit_packets; + uint64_t tx_ratelimit_bytes; +}; + #endif /* _LINUX_VIRTIO_NET_H */ diff --git a/include/standard-headers/misc/pvpanic.h b/include/standard-headers/misc/pvpanic.h index 54b7485390..b115094431 100644 --- a/include/standard-headers/misc/pvpanic.h +++ b/include/standard-headers/misc/pvpanic.h @@ -3,7 +3,10 @@ #ifndef __PVPANIC_H__ #define __PVPANIC_H__ -#define PVPANIC_PANICKED (1 << 0) -#define PVPANIC_CRASH_LOADED (1 << 1) +#include "standard-headers/linux/const.h" + +#define PVPANIC_PANICKED _BITUL(0) +#define PVPANIC_CRASH_LOADED _BITUL(1) +#define PVPANIC_SHUTDOWN _BITUL(2) #endif /* __PVPANIC_H__ */ diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 75f00965ab..d983c48a3b 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -842,8 +842,11 @@ __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) #define __NR_lsm_list_modules 461 __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) +#define __NR_mseal 462 +__SYSCALL(__NR_mseal, sys_mseal) + #undef __NR_syscalls -#define __NR_syscalls 462 +#define __NR_syscalls 463 /* * 32 bit systems traditionally used different diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index ce2e050a9b..fc93b3be30 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -390,5 +390,6 @@ #define __NR_lsm_get_self_attr (__NR_Linux + 459) #define __NR_lsm_set_self_attr (__NR_Linux + 460) #define __NR_lsm_list_modules (__NR_Linux + 461) +#define __NR_mseal (__NR_Linux + 462) #endif /* _ASM_UNISTD_N32_H */ diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index 5bfb3733ff..e72a3eb2c9 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -366,5 +366,6 @@ #define __NR_lsm_get_self_attr (__NR_Linux + 459) #define __NR_lsm_set_self_attr (__NR_Linux + 460) #define __NR_lsm_list_modules (__NR_Linux + 461) +#define __NR_mseal (__NR_Linux + 462) #endif /* _ASM_UNISTD_N64_H */ diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index 02eaecd020..b86eb0786c 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -436,5 +436,6 @@ #define __NR_lsm_get_self_attr (__NR_Linux + 459) #define __NR_lsm_set_self_attr (__NR_Linux + 460) #define __NR_lsm_list_modules (__NR_Linux + 461) +#define __NR_mseal (__NR_Linux + 462) #endif /* _ASM_UNISTD_O32_H */ diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index bbab08d6ec..28627b6546 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -443,6 +443,7 @@ #define __NR_lsm_get_self_attr 459 #define __NR_lsm_set_self_attr 460 #define __NR_lsm_list_modules 461 +#define __NR_mseal 462 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index af34cde70f..1fc42a8300 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -415,6 +415,7 @@ #define __NR_lsm_get_self_attr 459 #define __NR_lsm_set_self_attr 460 #define __NR_lsm_list_modules 461 +#define __NR_mseal 462 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index a3ece69d82..7706c21b87 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -434,5 +434,6 @@ #define __NR_lsm_get_self_attr 459 #define __NR_lsm_set_self_attr 460 #define __NR_lsm_list_modules 461 +#define __NR_mseal 462 #endif /* _ASM_S390_UNISTD_32_H */ diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index 8c5fd93495..62082d592d 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -382,5 +382,6 @@ #define __NR_lsm_get_self_attr 459 #define __NR_lsm_set_self_attr 460 #define __NR_lsm_list_modules 461 +#define __NR_mseal 462 #endif /* _ASM_S390_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index 5c9c329e93..fb7b8b169b 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -452,6 +452,7 @@ #define __NR_lsm_get_self_attr 459 #define __NR_lsm_set_self_attr 460 #define __NR_lsm_list_modules 461 +#define __NR_mseal 462 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index d9aab7ae87..da439afee1 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -374,6 +374,7 @@ #define __NR_lsm_get_self_attr 459 #define __NR_lsm_set_self_attr 460 #define __NR_lsm_list_modules 461 +#define __NR_mseal 462 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 63cdd1ee43..4fcb607c72 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -318,6 +318,7 @@ #define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450) #define __NR_cachestat (__X32_SYSCALL_BIT + 451) #define __NR_fchmodat2 (__X32_SYSCALL_BIT + 452) +#define __NR_map_shadow_stack (__X32_SYSCALL_BIT + 453) #define __NR_futex_wake (__X32_SYSCALL_BIT + 454) #define __NR_futex_wait (__X32_SYSCALL_BIT + 455) #define __NR_futex_requeue (__X32_SYSCALL_BIT + 456) @@ -326,6 +327,7 @@ #define __NR_lsm_get_self_attr (__X32_SYSCALL_BIT + 459) #define __NR_lsm_set_self_attr (__X32_SYSCALL_BIT + 460) #define __NR_lsm_list_modules (__X32_SYSCALL_BIT + 461) +#define __NR_mseal (__X32_SYSCALL_BIT + 462) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 038731cdef..c93876ca0b 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -1217,9 +1217,9 @@ struct kvm_vfio_spapr_tce { /* Available with KVM_CAP_SPAPR_RESIZE_HPT */ #define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt) #define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt) -/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ +/* Available with KVM_CAP_PPC_MMU_RADIX or KVM_CAP_PPC_MMU_HASH_V3 */ #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) -/* Available with KVM_CAP_PPC_RADIX_MMU */ +/* Available with KVM_CAP_PPC_MMU_RADIX */ #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) /* Available with KVM_CAP_PPC_GET_CPU_CHAR */ #define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h index bf9749dd14..96aa341942 100644 --- a/linux-headers/linux/stddef.h +++ b/linux-headers/linux/stddef.h @@ -55,4 +55,12 @@ #define __counted_by(m) #endif +#ifndef __counted_by_le +#define __counted_by_le(m) +#endif + +#ifndef __counted_by_be +#define __counted_by_be(m) +#endif + #endif /* _LINUX_STDDEF_H */ From 9b13640da3f94c0fbacbae6d23bd91febfa44588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 27 May 2024 08:27:49 +0200 Subject: [PATCH 31/85] hw/misc/pvpanic: centralize definition of supported events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The different components of pvpanic duplicate the list of supported events. Move it to the shared header file to minimize changes when new events are added. MST: tweak: keep header included in pvpanic.c to avoid header dependency, rebase. Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Thomas Weißschuh Message-Id: <20240527-pvpanic-shutdown-v8-3-5a28ec02558b@t-8ch.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/misc/pvpanic-isa.c | 3 +-- hw/misc/pvpanic-pci.c | 2 +- hw/misc/pvpanic.c | 2 +- include/hw/misc/pvpanic.h | 4 ++++ 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c index b4f84c4110..9a923b7869 100644 --- a/hw/misc/pvpanic-isa.c +++ b/hw/misc/pvpanic-isa.c @@ -21,7 +21,6 @@ #include "hw/misc/pvpanic.h" #include "qom/object.h" #include "hw/isa/isa.h" -#include "standard-headers/misc/pvpanic.h" #include "hw/acpi/acpi_aml_interface.h" OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE) @@ -102,7 +101,7 @@ static void build_pvpanic_isa_aml(AcpiDevAmlIf *adev, Aml *scope) static Property pvpanic_isa_properties[] = { DEFINE_PROP_UINT16(PVPANIC_IOPORT_PROP, PVPanicISAState, ioport, 0x505), DEFINE_PROP_UINT8("events", PVPanicISAState, pvpanic.events, - PVPANIC_PANICKED | PVPANIC_CRASH_LOADED), + PVPANIC_EVENTS), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c index 4d44a881da..106d03ccd6 100644 --- a/hw/misc/pvpanic-pci.c +++ b/hw/misc/pvpanic-pci.c @@ -55,7 +55,7 @@ static void pvpanic_pci_realizefn(PCIDevice *dev, Error **errp) static Property pvpanic_pci_properties[] = { DEFINE_PROP_UINT8("events", PVPanicPCIState, pvpanic.events, - PVPANIC_PANICKED | PVPANIC_CRASH_LOADED), + PVPANIC_EVENTS), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c index 80289ecf5f..4b2307d2c2 100644 --- a/hw/misc/pvpanic.c +++ b/hw/misc/pvpanic.c @@ -27,7 +27,7 @@ static void handle_event(int event) { static bool logged; - if (event & ~(PVPANIC_PANICKED | PVPANIC_CRASH_LOADED) && !logged) { + if (event & ~PVPANIC_EVENTS && !logged) { qemu_log_mask(LOG_GUEST_ERROR, "pvpanic: unknown event %#x.\n", event); logged = true; } diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h index fab94165d0..1e5b20e4ed 100644 --- a/include/hw/misc/pvpanic.h +++ b/include/hw/misc/pvpanic.h @@ -18,6 +18,10 @@ #include "exec/memory.h" #include "qom/object.h" +#include "standard-headers/misc/pvpanic.h" + +#define PVPANIC_EVENTS (PVPANIC_PANICKED | PVPANIC_CRASH_LOADED) + #define TYPE_PVPANIC_ISA_DEVICE "pvpanic" #define TYPE_PVPANIC_PCI_DEVICE "pvpanic-pci" From 462dc749c110fe8e41ae0fb554b9bc2f2671e973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 27 May 2024 08:27:50 +0200 Subject: [PATCH 32/85] tests/qtest/pvpanic: use centralized definition of supported events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid the necessity to update all tests when new events are added to the device. Acked-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Thomas Weißschuh Message-Id: <20240527-pvpanic-shutdown-v8-4-5a28ec02558b@t-8ch.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/pvpanic-pci-test.c | 5 +++-- tests/qtest/pvpanic-test.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/qtest/pvpanic-pci-test.c b/tests/qtest/pvpanic-pci-test.c index 2c05b376ba..b372caf41d 100644 --- a/tests/qtest/pvpanic-pci-test.c +++ b/tests/qtest/pvpanic-pci-test.c @@ -16,6 +16,7 @@ #include "qapi/qmp/qdict.h" #include "libqos/pci.h" #include "libqos/pci-pc.h" +#include "hw/misc/pvpanic.h" #include "hw/pci/pci_regs.h" static void test_panic_nopause(void) @@ -34,7 +35,7 @@ static void test_panic_nopause(void) bar = qpci_iomap(dev, 0, NULL); qpci_memread(dev, bar, 0, &val, sizeof(val)); - g_assert_cmpuint(val, ==, 3); + g_assert_cmpuint(val, ==, PVPANIC_EVENTS); val = 1; qpci_memwrite(dev, bar, 0, &val, sizeof(val)); @@ -67,7 +68,7 @@ static void test_panic(void) bar = qpci_iomap(dev, 0, NULL); qpci_memread(dev, bar, 0, &val, sizeof(val)); - g_assert_cmpuint(val, ==, 3); + g_assert_cmpuint(val, ==, PVPANIC_EVENTS); val = 1; qpci_memwrite(dev, bar, 0, &val, sizeof(val)); diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c index 78f1cf8186..ccc603472f 100644 --- a/tests/qtest/pvpanic-test.c +++ b/tests/qtest/pvpanic-test.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include "libqtest.h" #include "qapi/qmp/qdict.h" +#include "hw/misc/pvpanic.h" static void test_panic_nopause(void) { @@ -20,7 +21,7 @@ static void test_panic_nopause(void) qts = qtest_init("-device pvpanic -action panic=none"); val = qtest_inb(qts, 0x505); - g_assert_cmpuint(val, ==, 3); + g_assert_cmpuint(val, ==, PVPANIC_EVENTS); qtest_outb(qts, 0x505, 0x1); @@ -43,7 +44,7 @@ static void test_panic(void) qts = qtest_init("-device pvpanic -action panic=pause"); val = qtest_inb(qts, 0x505); - g_assert_cmpuint(val, ==, 3); + g_assert_cmpuint(val, ==, PVPANIC_EVENTS); qtest_outb(qts, 0x505, 0x1); From 6269086b0179e3d70750672174ed7fbd29ac7eaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 27 May 2024 08:27:51 +0200 Subject: [PATCH 33/85] hw/misc/pvpanic: add support for normal shutdowns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shutdown requests are normally hardware dependent. By extending pvpanic to also handle shutdown requests, guests can submit such requests with an easily implementable and cross-platform mechanism. Acked-by: Cornelia Huck Signed-off-by: Thomas Weißschuh Message-Id: <20240527-pvpanic-shutdown-v8-5-5a28ec02558b@t-8ch.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/misc/pvpanic.c | 5 +++++ include/hw/misc/pvpanic.h | 4 +++- include/sysemu/runstate.h | 1 + system/runstate.c | 5 +++++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c index 4b2307d2c2..3b893340c0 100644 --- a/hw/misc/pvpanic.c +++ b/hw/misc/pvpanic.c @@ -41,6 +41,11 @@ static void handle_event(int event) qemu_system_guest_crashloaded(NULL); return; } + + if (event & PVPANIC_SHUTDOWN) { + qemu_system_guest_pvshutdown(); + return; + } } /* return supported events on read */ diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h index 1e5b20e4ed..9a71a5ad0d 100644 --- a/include/hw/misc/pvpanic.h +++ b/include/hw/misc/pvpanic.h @@ -20,7 +20,9 @@ #include "standard-headers/misc/pvpanic.h" -#define PVPANIC_EVENTS (PVPANIC_PANICKED | PVPANIC_CRASH_LOADED) +#define PVPANIC_EVENTS (PVPANIC_PANICKED | \ + PVPANIC_CRASH_LOADED | \ + PVPANIC_SHUTDOWN) #define TYPE_PVPANIC_ISA_DEVICE "pvpanic" #define TYPE_PVPANIC_PCI_DEVICE "pvpanic-pci" diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h index 0117d243c4..e210a37abf 100644 --- a/include/sysemu/runstate.h +++ b/include/sysemu/runstate.h @@ -104,6 +104,7 @@ void qemu_system_killed(int signal, pid_t pid); void qemu_system_reset(ShutdownCause reason); void qemu_system_guest_panicked(GuestPanicInformation *info); void qemu_system_guest_crashloaded(GuestPanicInformation *info); +void qemu_system_guest_pvshutdown(void); bool qemu_system_dump_in_progress(void); #endif diff --git a/system/runstate.c b/system/runstate.c index ec32e270cb..fc49fd3e61 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -584,6 +584,11 @@ void qemu_system_guest_crashloaded(GuestPanicInformation *info) qapi_free_GuestPanicInformation(info); } +void qemu_system_guest_pvshutdown(void) +{ + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); +} + void qemu_system_reset_request(ShutdownCause reason) { if (reboot_action == REBOOT_ACTION_SHUTDOWN && From 8db1f7be788b23f8eca189fe4546298ed387e9cb Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Mon, 27 May 2024 08:27:52 +0200 Subject: [PATCH 34/85] pvpanic: Emit GUEST_PVSHUTDOWN QMP event on pvpanic shutdown signal Emit a QMP event on receiving a PVPANIC_SHUTDOWN event. Even though a typical SHUTDOWN event will be sent, it will be indistinguishable from a shutdown originating from other cases (e.g. KVM exit due to KVM_SYSTEM_EVENT_SHUTDOWN) that also issue the guest-shutdown cause. A management layer application can detect the new GUEST_PVSHUTDOWN event to determine if the guest is using the pvpanic interface to request shutdowns. Signed-off-by: Alejandro Jimenez Message-Id: <20240527-pvpanic-shutdown-v8-6-5a28ec02558b@t-8ch.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qapi/run-state.json | 14 ++++++++++++++ system/runstate.c | 1 + 2 files changed, 15 insertions(+) diff --git a/qapi/run-state.json b/qapi/run-state.json index f8773f23b2..5ac0fec852 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -462,6 +462,20 @@ { 'event': 'GUEST_CRASHLOADED', 'data': { 'action': 'GuestPanicAction', '*info': 'GuestPanicInformation' } } +## +# @GUEST_PVSHUTDOWN: +# +# Emitted when guest submits a shutdown request via pvpanic interface +# +# Since: 9.1 +# +# Example: +# +# <- { "event": "GUEST_PVSHUTDOWN", +# "timestamp": { "seconds": 1648245259, "microseconds": 893771 } } +## +{ 'event': 'GUEST_PVSHUTDOWN' } + ## # @GuestPanicAction: # diff --git a/system/runstate.c b/system/runstate.c index fc49fd3e61..c833316f6d 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -586,6 +586,7 @@ void qemu_system_guest_crashloaded(GuestPanicInformation *info) void qemu_system_guest_pvshutdown(void) { + qapi_event_send_guest_pvshutdown(); qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } From b279c3c88da3e8a301e4436fcdf233c0838ed4bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 27 May 2024 08:27:53 +0200 Subject: [PATCH 35/85] tests/qtest/pvpanic: add tests for pvshutdown event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validate that a shutdown via the pvpanic device emits the correct QMP events. Signed-off-by: Thomas Weißschuh Reviewed-by: Thomas Huth Message-Id: <20240527-pvpanic-shutdown-v8-7-5a28ec02558b@t-8ch.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/pvpanic-pci-test.c | 39 ++++++++++++++++++++++++++++++++++ tests/qtest/pvpanic-test.c | 29 +++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/tests/qtest/pvpanic-pci-test.c b/tests/qtest/pvpanic-pci-test.c index b372caf41d..dc021c2fdf 100644 --- a/tests/qtest/pvpanic-pci-test.c +++ b/tests/qtest/pvpanic-pci-test.c @@ -85,11 +85,50 @@ static void test_panic(void) qtest_quit(qts); } +static void test_pvshutdown(void) +{ + uint8_t val; + QDict *response, *data; + QTestState *qts; + QPCIBus *pcibus; + QPCIDevice *dev; + QPCIBar bar; + + qts = qtest_init("-device pvpanic-pci,addr=04.0"); + pcibus = qpci_new_pc(qts, NULL); + dev = qpci_device_find(pcibus, QPCI_DEVFN(0x4, 0x0)); + qpci_device_enable(dev); + bar = qpci_iomap(dev, 0, NULL); + + qpci_memread(dev, bar, 0, &val, sizeof(val)); + g_assert_cmpuint(val, ==, PVPANIC_EVENTS); + + val = PVPANIC_SHUTDOWN; + qpci_memwrite(dev, bar, 0, &val, sizeof(val)); + + response = qtest_qmp_eventwait_ref(qts, "GUEST_PVSHUTDOWN"); + qobject_unref(response); + + response = qtest_qmp_eventwait_ref(qts, "SHUTDOWN"); + g_assert(qdict_haskey(response, "data")); + data = qdict_get_qdict(response, "data"); + g_assert(qdict_haskey(data, "guest")); + g_assert(qdict_get_bool(data, "guest")); + g_assert(qdict_haskey(data, "reason")); + g_assert_cmpstr(qdict_get_str(data, "reason"), ==, "guest-shutdown"); + qobject_unref(response); + + g_free(dev); + qpci_free_pc(pcibus); + qtest_quit(qts); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); qtest_add_func("/pvpanic-pci/panic", test_panic); qtest_add_func("/pvpanic-pci/panic-nopause", test_panic_nopause); + qtest_add_func("/pvpanic-pci/pvshutdown", test_pvshutdown); return g_test_run(); } diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c index ccc603472f..d49d2ba931 100644 --- a/tests/qtest/pvpanic-test.c +++ b/tests/qtest/pvpanic-test.c @@ -58,11 +58,40 @@ static void test_panic(void) qtest_quit(qts); } +static void test_pvshutdown(void) +{ + uint8_t val; + QDict *response, *data; + QTestState *qts; + + qts = qtest_init("-device pvpanic"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, PVPANIC_EVENTS); + + qtest_outb(qts, 0x505, PVPANIC_SHUTDOWN); + + response = qtest_qmp_eventwait_ref(qts, "GUEST_PVSHUTDOWN"); + qobject_unref(response); + + response = qtest_qmp_eventwait_ref(qts, "SHUTDOWN"); + g_assert(qdict_haskey(response, "data")); + data = qdict_get_qdict(response, "data"); + g_assert(qdict_haskey(data, "guest")); + g_assert(qdict_get_bool(data, "guest")); + g_assert(qdict_haskey(data, "reason")); + g_assert_cmpstr(qdict_get_str(data, "reason"), ==, "guest-shutdown"); + qobject_unref(response); + + qtest_quit(qts); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); qtest_add_func("/pvpanic/panic", test_panic); qtest_add_func("/pvpanic/panic-nopause", test_panic_nopause); + qtest_add_func("/pvpanic/pvshutdown", test_pvshutdown); return g_test_run(); } From 0c0cc13d319cf7b876f327fa1c5cc1866ad868cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 27 May 2024 08:27:54 +0200 Subject: [PATCH 36/85] Revert "docs/specs/pvpanic: mark shutdown event as not implemented" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The missing functionality has been implemented now. This reverts commit e739d1935c461d0668057e9dbba9d06f728d29ec. Signed-off-by: Thomas Weißschuh Message-Id: <20240527-pvpanic-shutdown-v8-8-5a28ec02558b@t-8ch.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/specs/pvpanic.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/specs/pvpanic.rst b/docs/specs/pvpanic.rst index b0f27860ec..61a80480ed 100644 --- a/docs/specs/pvpanic.rst +++ b/docs/specs/pvpanic.rst @@ -29,7 +29,7 @@ bit 1 a guest panic has happened and will be handled by the guest; the host should record it or report it, but should not affect the execution of the guest. -bit 2 (to be implemented) +bit 2 a regular guest shutdown has happened and should be processed by the host PCI Interface From a113d041e8d0b152d72a7c2bf47dd09aabf9ade2 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Tue, 28 May 2024 16:48:15 +0800 Subject: [PATCH 37/85] virtio-pci: Fix the failure process in kvm_virtio_pci_vector_use_one() In function kvm_virtio_pci_vector_use_one(), the function will only use the irqfd/vector for itself. Therefore, in the undo label, the failing process is incorrect. To fix this, we can just remove this label. Fixes: f9a09ca3ea ("vhost: add support for configure interrupt") Cc: qemu-stable@nongnu.org Signed-off-by: Cindy Lu Message-Id: <20240528084840.194538-1-lulu@redhat.com> Reviewed-by: Peter Maydell Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7d62e92365..5941f1a94d 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -898,7 +898,7 @@ static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) } ret = kvm_virtio_pci_vq_vector_use(proxy, vector); if (ret < 0) { - goto undo; + return ret; } /* * If guest supports masking, set up irqfd now. @@ -908,25 +908,11 @@ static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); if (ret < 0) { kvm_virtio_pci_vq_vector_release(proxy, vector); - goto undo; + return ret; } } return 0; -undo: - - vector = virtio_queue_vector(vdev, queue_no); - if (vector >= msix_nr_vectors_allocated(dev)) { - return ret; - } - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { - ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); - if (ret < 0) { - return ret; - } - kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - return ret; } static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) { From e6c9c9e7f46a9ecaf1d90a68595915d65cd9d72d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 31 May 2024 11:22:05 -0500 Subject: [PATCH 38/85] hw/cxl: Fix read from bogus memory Peter and coverity report: We've passed '&data' to address_space_write(), which means "read from the address on the stack where the function argument 'data' lives", so instead of writing 64 bytes of data to the guest , we'll write 64 bytes which start with a host pointer value and then continue with whatever happens to be on the host stack after that. Indeed the intention was to write 64 bytes of data at the address given. Fix the parameter to address_space_write(). Reported-by: Peter Maydell Link: https://lore.kernel.org/all/CAFEAcA-u4sytGwTKsb__Y+_+0O2-WwARntm3x8WNhvL1WfHOBg@mail.gmail.com/ Fixes: 6bda41a69bdc ("hw/cxl: Add clear poison mailbox command support.") Cc: Jonathan Cameron Signed-off-by: Ira Weiny Message-Id: <20240531-fix-poison-set-cacheline-v1-1-e3bc7e8f1158@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Jonathan Cameron --- hw/mem/cxl_type3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 5d4a1276be..3274e5dcbb 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1292,7 +1292,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data) dpa_offset -= (vmr_size + pmr_size); } - address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data, + address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, data, CXL_CACHE_LINE_SIZE); return true; } From 5d98e18823af6d5230fca8098a7ee966aaedeb29 Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Thu, 6 Jun 2024 18:22:05 +0800 Subject: [PATCH 39/85] virtio-pci: implement No_Soft_Reset bit In current code, when guest does S3, virtio-gpu are reset due to the bit No_Soft_Reset is not set. After resetting, the display resources of virtio-gpu are destroyed, then the display can't come back and only show blank after resuming. Implement No_Soft_Reset bit of PCI_PM_CTRL register, then guest can check this bit, if this bit is set, the devices resetting will not be done, and then the display can work after resuming. No_Soft_Reset bit is implemented for all virtio devices, and was tested only on virtio-gpu device. Set it false by default for safety. Signed-off-by: Jiqian Chen Message-Id: <20240606102205.114671-3-Jiqian.Chen@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/core/machine.c | 1 + hw/virtio/virtio-pci.c | 29 +++++++++++++++++++++++++++++ include/hw/virtio/virtio-pci.h | 5 +++++ 3 files changed, 35 insertions(+) diff --git a/hw/core/machine.c b/hw/core/machine.c index 655d75c21f..f4cba6496c 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -38,6 +38,7 @@ GlobalProperty hw_compat_9_0[] = { {"arm-cpu", "backcompat-cntfrq", "true" }, {"scsi-disk-base", "migrate-emulated-scsi-request", "false" }, {"vfio-pci", "skip-vsc-check", "false" }, + { "virtio-pci", "x-pcie-pm-no-soft-reset", "off" }, }; const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0); diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 5941f1a94d..9534730bba 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2222,6 +2222,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pcie_cap_lnkctl_init(pci_dev); } + if (proxy->flags & VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET) { + pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, + PCI_PM_CTRL_NO_SOFT_RESET); + } + if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) { /* Init Power Management Control Register */ pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL, @@ -2284,11 +2289,33 @@ static void virtio_pci_reset(DeviceState *qdev) } } +static bool virtio_pci_no_soft_reset(PCIDevice *dev) +{ + uint16_t pmcsr; + + if (!pci_is_express(dev) || !dev->exp.pm_cap) { + return false; + } + + pmcsr = pci_get_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL); + + /* + * When No_Soft_Reset bit is set and the device + * is in D3hot state, don't reset device + */ + return (pmcsr & PCI_PM_CTRL_NO_SOFT_RESET) && + (pmcsr & PCI_PM_CTRL_STATE_MASK) == 3; +} + static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) { PCIDevice *dev = PCI_DEVICE(obj); DeviceState *qdev = DEVICE(obj); + if (virtio_pci_no_soft_reset(dev)) { + return; + } + virtio_pci_reset(qdev); if (pci_is_express(dev)) { @@ -2328,6 +2355,8 @@ static Property virtio_pci_properties[] = { VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true), DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_INIT_PM_BIT, true), + DEFINE_PROP_BIT("x-pcie-pm-no-soft-reset", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET_BIT, false), DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_INIT_FLR_BIT, true), DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags, diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 59d88018c1..9e67ba38c7 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -43,6 +43,7 @@ enum { VIRTIO_PCI_FLAG_INIT_FLR_BIT, VIRTIO_PCI_FLAG_AER_BIT, VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, + VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET_BIT, }; /* Need to activate work-arounds for buggy guests at vmstate load. */ @@ -79,6 +80,10 @@ enum { /* Init Power Management */ #define VIRTIO_PCI_FLAG_INIT_PM (1 << VIRTIO_PCI_FLAG_INIT_PM_BIT) +/* Init The No_Soft_Reset bit of Power Management */ +#define VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET \ + (1 << VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET_BIT) + /* Init Function Level Reset capability */ #define VIRTIO_PCI_FLAG_INIT_FLR (1 << VIRTIO_PCI_FLAG_INIT_FLR_BIT) From f72fc16910c8f44edf052f52672e0e63bbbc773c Mon Sep 17 00:00:00 2001 From: Yuxue Liu Date: Thu, 11 Apr 2024 15:35:55 +0800 Subject: [PATCH 40/85] vhost-user-test: no set non-blocking for cal fd less than 0. In the scenario where vhost-user sets eventfd to -1, qemu_chr_fe_get_msgfds retrieves fd as -1. When vhost_user_read receives, it does not perform blocking operations on the descriptor with fd=-1, so non-blocking operations should not be performed here either.This is a normal use case. Calling g_unix_set_fd_nonblocking at this point will cause the test to interrupt. When vhost_user_write sets the call fd to -1, it sets the number of fds to 0, so the fds obtained by qemu_chr_fe_get_msgfds will also be 0. Signed-off-by: Yuxue Liu Message-Id: <20240411073555.1357-1-yuxue.liu@jaguarmicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/vhost-user-test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index d4e437265f..255bde54ab 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -458,7 +458,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) case VHOST_USER_SET_VRING_KICK: case VHOST_USER_SET_VRING_CALL: /* consume the fd */ - qemu_chr_fe_get_msgfds(chr, &fd, 1); + if (!qemu_chr_fe_get_msgfds(chr, &fd, 1) && fd < 0) { + qos_printf("call fd: %d, do not set non-blocking\n", fd); + break; + } /* * This is a non-blocking eventfd. * The receive function forces it to be blocking, From e05ee2994a9c188fc49a9ddf70b79ed7f1808e2f Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Thu, 6 Jun 2024 22:08:58 +0800 Subject: [PATCH 41/85] i386/apic: Add hint on boot failure because of disabling x2APIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the Q35 supports up to 4096 vCPUs (since v9.0), but for TCG cases, if x2APIC is not actively enabled to boot more than 255 vCPUs ( e.g., qemu-system-i386 -M pc-q35-9.0 -smp 666), the following error is reported: Unexpected error in apic_common_set_id() at ../hw/intc/apic_common.c:449: qemu-system-i386: APIC ID 255 requires x2APIC feature in CPU Aborted (core dumped) This error can be resolved by setting x2apic=on in -cpu. In order to better help users deal with this scenario, add the error hint to instruct users on how to enable the x2apic feature. Then, the error report becomes the following: Unexpected error in apic_common_set_id() at ../hw/intc/apic_common.c:448: qemu-system-i386: APIC ID 255 requires x2APIC feature in CPU Try x2apic=on in -cpu. Aborted (core dumped) Note since @errp is &error_abort, error_append_hint() can't be applied on @errp. And in order to separate the exact error message from the (perhaps effectively) hint, adding a hint via error_append_hint() is also necessary. Therefore, introduce @local_error in apic_common_set_id() to handle both the error message and the error hint. Suggested-by: Philippe Mathieu-Daudé Signed-off-by: Zhao Liu Message-Id: <20240606140858.2157106-1-zhao1.liu@intel.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/intc/apic_common.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index d8fc1e2815..c13cdd7994 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -433,6 +433,7 @@ static void apic_common_set_id(Object *obj, Visitor *v, const char *name, APICCommonState *s = APIC_COMMON(obj); DeviceState *dev = DEVICE(obj); uint32_t value; + Error *local_err = NULL; if (dev->realized) { qdev_prop_set_after_realize(dev, name, errp); @@ -444,7 +445,11 @@ static void apic_common_set_id(Object *obj, Visitor *v, const char *name, } if (value >= 255 && !cpu_has_x2apic_feature(&s->cpu->env)) { - error_setg(errp, "APIC ID %d requires x2APIC feature in CPU", value); + error_setg(&local_err, + "APIC ID %d requires x2APIC feature in CPU", + value); + error_append_hint(&local_err, "Try x2apic=on in -cpu.\n"); + error_propagate(errp, local_err); return; } From 25b8a0f40c7f408442c5fd4da195fce9997cfb78 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 22:37:50 +0900 Subject: [PATCH 42/85] hw/virtio: Free vqs after vhost_dev_cleanup() This fixes LeakSanitizer warnings. Signed-off-by: Akihiko Odaki Message-Id: <20240627-san-v2-7-750bb0946dbd@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index 11e72b1e3b..2bc3423326 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -223,6 +223,7 @@ static void vub_disconnect(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBase *vub = VHOST_USER_BASE(vdev); + struct vhost_virtqueue *vhost_vqs = vub->vhost_dev.vqs; if (!vub->connected) { goto done; @@ -231,6 +232,7 @@ static void vub_disconnect(DeviceState *dev) vub_stop(vdev); vhost_dev_cleanup(&vub->vhost_dev); + g_free(vhost_vqs); done: /* Re-instate the event handler for new connections */ From 704391f94a5494f10b886ba79c157363a79b1239 Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Thu, 13 Jun 2024 08:49:12 +0300 Subject: [PATCH 43/85] virtio-iommu: add error check before assert A fuzzer case discovered by Zheyu Ma causes an assert failure. Add a check before the assert, and respond with an error before moving on to the next queue element. To reproduce the failure: cat << EOF | \ qemu-system-x86_64 \ -display none -machine accel=qtest -m 512M -machine q35 -nodefaults \ -device virtio-iommu -qtest stdio outl 0xcf8 0x80000804 outw 0xcfc 0x06 outl 0xcf8 0x80000820 outl 0xcfc 0xe0004000 write 0x10000e 0x1 0x01 write 0xe0004020 0x4 0x00001000 write 0xe0004028 0x4 0x00101000 write 0xe000401c 0x1 0x01 write 0x106000 0x1 0x05 write 0x100001 0x1 0x60 write 0x100002 0x1 0x10 write 0x100009 0x1 0x04 write 0x10000c 0x1 0x01 write 0x100018 0x1 0x04 write 0x10001c 0x1 0x02 write 0x101003 0x1 0x01 write 0xe0007001 0x1 0x00 EOF Reported-by: Zheyu Ma Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2359 Signed-off-by: Manos Pitsidianakis Message-Id: <20240613-fuzz-2359-fix-v2-manos.pitsidianakis@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-iommu.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index b9a7ddcd14..ed7426afc7 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -974,6 +974,9 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) iov = elem->out_sg; sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); if (unlikely(sz != sizeof(head))) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: read %zu bytes from command head" + "but expected %zu\n", __func__, sz, sizeof(head)); tail.status = VIRTIO_IOMMU_S_DEVERR; goto out; } @@ -1010,6 +1013,25 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) out: sz = iov_from_buf(elem->in_sg, elem->in_num, 0, buf ? buf : &tail, output_size); + if (unlikely(sz != output_size)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: wrote %zu bytes to command response" + "but response size is %zu\n", + __func__, sz, output_size); + tail.status = VIRTIO_IOMMU_S_DEVERR; + /* + * We checked that sizeof(tail) can fit to elem->in_sg at the + * beginning of the loop + */ + output_size = sizeof(tail); + g_free(buf); + buf = NULL; + sz = iov_from_buf(elem->in_sg, + elem->in_num, + 0, + &tail, + output_size); + } assert(sz == output_size); virtqueue_push(vq, elem, sz); From 7c211eb078c42146ee9a441cc028fbc4c378ef5a Mon Sep 17 00:00:00 2001 From: BillXiang Date: Thu, 13 Jun 2024 14:51:50 +0800 Subject: [PATCH 44/85] vhost-user: Skip unnecessary duplicated VHOST_USER_SET_LOG_BASE requests The VHOST_USER_SET_LOG_BASE requests should be categorized into non-vring specific messages, and should be sent only once. If send more than once, dpdk will munmap old log_addr which may has been used and cause segmentation fault. Signed-off-by: BillXiang Message-Id: <20240613065150.3100-1-xiangwencheng@dayudpu.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index c407ea8939..00561daa06 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -371,6 +371,7 @@ static bool vhost_user_per_device_request(VhostUserRequest request) case VHOST_USER_RESET_DEVICE: case VHOST_USER_ADD_MEM_REG: case VHOST_USER_REM_MEM_REG: + case VHOST_USER_SET_LOG_BASE: return true; default: return false; From d4f471eb7e562c2cc398448a1c1e7ee838ec30bd Mon Sep 17 00:00:00 2001 From: Dmitry Frolov Date: Thu, 13 Jun 2024 17:35:30 +0300 Subject: [PATCH 45/85] hw/net/virtio-net.c: fix crash in iov_copy() A crash found while fuzzing device virtio-net-socket-check-used. Assertion "offset == 0" in iov_copy() fails if less than guest_hdr_len bytes were transmited. Signed-off-by: Dmitry Frolov Message-Id: <20240613143529.602591-2-frolov@swemel.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 9c7e85caea..8f30972708 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2735,6 +2735,10 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) */ assert(n->host_hdr_len <= n->guest_hdr_len); if (n->host_hdr_len != n->guest_hdr_len) { + if (iov_size(out_sg, out_num) < n->guest_hdr_len) { + virtio_error(vdev, "virtio-net header is invalid"); + goto detach; + } unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), out_sg, out_num, 0, n->host_hdr_len); From 0aa7f10c7af222a32e49e38df8383e394a0aa5c3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:00:31 +0200 Subject: [PATCH 46/85] qapi: clarify that the default is backend dependent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default value of the @share option of the @MemoryBackendProperties really depends on the backend type, so let's document the default values in the same place where we define the option to avoid dispersing the information. Cc: David Hildenbrand Suggested-by: Markus Armbruster Reviewed-by: Markus Armbruster Signed-off-by: Stefano Garzarella Message-Id: <20240618100043.144657-2-sgarzare@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qapi/qom.json | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/qapi/qom.json b/qapi/qom.json index 8bd299265e..9b8f6a7ab5 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -600,7 +600,9 @@ # preallocation threads (default: none) (since 7.2) # # @share: if false, the memory is private to QEMU; if true, it is -# shared (default: false) +# shared (default false for backends memory-backend-file and +# memory-backend-ram, true for backends memory-backend-epc and +# memory-backend-memfd) # # @reserve: if true, reserve swap space (or huge pages) if applicable # (default: true) (since 6.1) @@ -700,8 +702,6 @@ # # Properties for memory-backend-memfd objects. # -# The @share boolean option is true by default with memfd. -# # @hugetlb: if true, the file to be created resides in the hugetlbfs # filesystem (default: false) # @@ -726,8 +726,6 @@ # # Properties for memory-backend-epc objects. # -# The @share boolean option is true by default with epc -# # The @merge boolean option is false by default with epc # # The @dump boolean option is false by default with epc From 516dfbb783484959cf33f051864f2e44cbed45ca Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:00:32 +0200 Subject: [PATCH 47/85] libvhost-user: set msg.msg_control to NULL when it is empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some OS (e.g. macOS) sendmsg() returns -1 (errno EINVAL) if the `struct msghdr` has the field `msg_controllen` set to 0, but `msg_control` is not NULL. Reviewed-by: Eric Blake Reviewed-by: David Hildenbrand Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Acked-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Message-Id: <20240618100043.144657-3-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- subprojects/libvhost-user/libvhost-user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 8adb277d54..53bf1adda6 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -632,6 +632,7 @@ vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize); } else { msg.msg_controllen = 0; + msg.msg_control = NULL; } do { From 92b58bc7e9086e489295040d408118a81c47b31d Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:00:33 +0200 Subject: [PATCH 48/85] libvhost-user: fail vu_message_write() if sendmsg() is failing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In vu_message_write() we use sendmsg() to send the message header, then a write() to send the payload. If sendmsg() fails we should avoid sending the payload, since we were unable to send the header. Discovered before fixing the issue with the previous patch, where sendmsg() failed on macOS due to wrong parameters, but the frontend still sent the payload which the backend incorrectly interpreted as a wrong header. Reviewed-by: Daniel P. Berrangé Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Acked-by: Stefan Hajnoczi Reviewed-by: David Hildenbrand Signed-off-by: Stefano Garzarella Message-Id: <20240618100043.144657-4-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- subprojects/libvhost-user/libvhost-user.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 53bf1adda6..ea27683dac 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -639,6 +639,11 @@ vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) rc = sendmsg(conn_fd, &msg, 0); } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + if (rc <= 0) { + vu_panic(dev, "Error while writing: %s", strerror(errno)); + return false; + } + if (vmsg->size) { do { if (vmsg->data) { From ebdede644bbf5744f91dbe0d39742f17b03c4e10 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:00:34 +0200 Subject: [PATCH 49/85] libvhost-user: mask F_INFLIGHT_SHMFD if memfd is not supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit libvhost-user will panic when receiving VHOST_USER_GET_INFLIGHT_FD message if MFD_ALLOW_SEALING is not defined, since it's not able to create a memfd. VHOST_USER_GET_INFLIGHT_FD is used only if VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD is negotiated. So, let's mask that feature if the backend is not able to properly handle these messages. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Acked-by: Stefan Hajnoczi Reviewed-by: David Hildenbrand Signed-off-by: Stefano Garzarella Message-Id: <20240618100043.144657-5-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- subprojects/libvhost-user/libvhost-user.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index ea27683dac..9c630c2170 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -1674,6 +1674,17 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) features |= dev->iface->get_protocol_features(dev); } +#ifndef MFD_ALLOW_SEALING + /* + * If MFD_ALLOW_SEALING is not defined, we are not able to handle + * VHOST_USER_GET_INFLIGHT_FD messages, since we can't create a memfd. + * Those messages are used only if VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD + * is negotiated. A device implementation can enable it, so let's mask + * it to avoid a runtime panic. + */ + features &= ~(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD); +#endif + vmsg_set_reply_u64(vmsg, features); return true; } From 4c58843e5d3192c67394b28a3330144ea56eefac Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:00:35 +0200 Subject: [PATCH 50/85] vhost-user-server: do not set memory fd non-blocking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In vhost-user-server we set all fd received from the other peer in non-blocking mode. For some of them (e.g. memfd, shm_open, etc.) it's not really needed, because we don't use these fd with blocking operations, but only to map memory. In addition, in some systems this operation can fail (e.g. in macOS setting an fd returned by shm_open() non-blocking fails with errno = ENOTTY). So, let's avoid setting fd non-blocking for those messages that we know carry memory fd (e.g. VHOST_USER_ADD_MEM_REG, VHOST_USER_SET_MEM_TABLE). Reviewed-by: Daniel P. Berrangé Acked-by: Stefan Hajnoczi Reviewed-by: David Hildenbrand Signed-off-by: Stefano Garzarella Message-Id: <20240618100043.144657-6-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- util/vhost-user-server.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c index 3bfb1ad3ec..b19229074a 100644 --- a/util/vhost-user-server.c +++ b/util/vhost-user-server.c @@ -65,6 +65,18 @@ static void vmsg_close_fds(VhostUserMsg *vmsg) static void vmsg_unblock_fds(VhostUserMsg *vmsg) { int i; + + /* + * These messages carry fd used to map memory, not to send/receive messages, + * so this operation is useless. In addition, in some systems this + * operation can fail (e.g. in macOS setting an fd returned by shm_open() + * non-blocking fails with errno = ENOTTY) + */ + if (vmsg->request == VHOST_USER_ADD_MEM_REG || + vmsg->request == VHOST_USER_SET_MEM_TABLE) { + return; + } + for (i = 0; i < vmsg->fd_num; i++) { qemu_socket_set_nonblock(vmsg->fds[i]); } From 03582094da1ea7ce978cec58008c81f7458ee8dd Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:04:39 +0200 Subject: [PATCH 51/85] contrib/vhost-user-blk: fix bind() using the right size of the address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On macOS passing `-s /tmp/vhost.socket` parameter to the vhost-user-blk application, the bind was done on `/tmp/vhost.socke` pathname, missing the last character. This sounds like one of the portability problems described in the unix(7) manpage: Pathname sockets When binding a socket to a pathname, a few rules should be observed for maximum portability and ease of coding: • The pathname in sun_path should be null-terminated. • The length of the pathname, including the terminating null byte, should not exceed the size of sun_path. • The addrlen argument that describes the enclosing sockaddr_un structure should have a value of at least: offsetof(struct sockaddr_un, sun_path) + strlen(addr.sun_path)+1 or, more simply, addrlen can be specified as sizeof(struct sockaddr_un). So let's follow the last advice and simplify the code as well. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Acked-by: Stefan Hajnoczi Reviewed-by: David Hildenbrand Signed-off-by: Stefano Garzarella Message-Id: <20240618100440.145664-1-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/vhost-user-blk/vhost-user-blk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index 89e5f11a64..a8ab9269a2 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -469,7 +469,6 @@ static int unix_sock_new(char *unix_fn) { int sock; struct sockaddr_un un; - size_t len; assert(unix_fn); @@ -481,10 +480,9 @@ static int unix_sock_new(char *unix_fn) un.sun_family = AF_UNIX; (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); - len = sizeof(un.sun_family) + strlen(un.sun_path); (void)unlink(unix_fn); - if (bind(sock, (struct sockaddr *)&un, len) < 0) { + if (bind(sock, (struct sockaddr *)&un, sizeof(un)) < 0) { perror("bind"); goto fail; } From 5ab04420c3de11ae4a573b08b53584a2a0c5dd00 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:04:47 +0200 Subject: [PATCH 52/85] contrib/vhost-user-*: use QEMU bswap helper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's replace the calls to le*toh() and htole*() with qemu/bswap.h helpers to make the code more portable. Suggested-by: Philippe Mathieu-Daudé Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Acked-by: Stefan Hajnoczi Reviewed-by: David Hildenbrand Signed-off-by: Stefano Garzarella Message-Id: <20240618100447.145697-1-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/vhost-user-blk/vhost-user-blk.c | 9 +++++---- contrib/vhost-user-input/main.c | 16 ++++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index a8ab9269a2..9492146855 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -16,6 +16,7 @@ */ #include "qemu/osdep.h" +#include "qemu/bswap.h" #include "standard-headers/linux/virtio_blk.h" #include "libvhost-user-glib.h" @@ -194,8 +195,8 @@ vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt, #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) VubDev *vdev_blk = req->vdev_blk; desc = buf; - uint64_t range[2] = { le64toh(desc->sector) << 9, - le32toh(desc->num_sectors) << 9 }; + uint64_t range[2] = { le64_to_cpu(desc->sector) << 9, + le32_to_cpu(desc->num_sectors) << 9 }; if (type == VIRTIO_BLK_T_DISCARD) { if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) { g_free(buf); @@ -267,13 +268,13 @@ static int vub_virtio_process_req(VubDev *vdev_blk, req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; in_num--; - type = le32toh(req->out->type); + type = le32_to_cpu(req->out->type); switch (type & ~VIRTIO_BLK_T_BARRIER) { case VIRTIO_BLK_T_IN: case VIRTIO_BLK_T_OUT: { ssize_t ret = 0; bool is_write = type & VIRTIO_BLK_T_OUT; - req->sector_num = le64toh(req->out->sector); + req->sector_num = le64_to_cpu(req->out->sector); if (is_write) { ret = vub_writev(req, &elem->out_sg[1], out_num); } else { diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c index 081230da54..f3362d41ac 100644 --- a/contrib/vhost-user-input/main.c +++ b/contrib/vhost-user-input/main.c @@ -51,8 +51,8 @@ static void vi_input_send(VuInput *vi, struct virtio_input_event *event) vi->queue[vi->qindex++].event = *event; /* ... until we see a report sync ... */ - if (event->type != htole16(EV_SYN) || - event->code != htole16(SYN_REPORT)) { + if (event->type != cpu_to_le16(EV_SYN) || + event->code != cpu_to_le16(SYN_REPORT)) { return; } @@ -103,9 +103,9 @@ vi_evdev_watch(VuDev *dev, int condition, void *data) g_debug("input %d %d %d", evdev.type, evdev.code, evdev.value); - virtio.type = htole16(evdev.type); - virtio.code = htole16(evdev.code); - virtio.value = htole32(evdev.value); + virtio.type = cpu_to_le16(evdev.type); + virtio.code = cpu_to_le16(evdev.code); + virtio.value = cpu_to_le32(evdev.value); vi_input_send(vi, &virtio); } } @@ -124,9 +124,9 @@ static void vi_handle_status(VuInput *vi, virtio_input_event *event) evdev.input_event_sec = tval.tv_sec; evdev.input_event_usec = tval.tv_usec; - evdev.type = le16toh(event->type); - evdev.code = le16toh(event->code); - evdev.value = le32toh(event->value); + evdev.type = le16_to_cpu(event->type); + evdev.code = le16_to_cpu(event->code); + evdev.value = le32_to_cpu(event->value); rc = write(vi->evdevfd, &evdev, sizeof(evdev)); if (rc == -1) { From 4e647fa08586a5ada74cf6d3ae1cdf3a027202cb Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:05:19 +0200 Subject: [PATCH 53/85] hostmem: add a new memory backend based on POSIX shm_open() shm_open() creates and opens a new POSIX shared memory object. A POSIX shared memory object allows creating memory backend with an associated file descriptor that can be shared with external processes (e.g. vhost-user). The new `memory-backend-shm` can be used as an alternative when `memory-backend-memfd` is not available (Linux only), since shm_open() should be provided by any POSIX-compliant operating system. This backend mimics memfd, allocating memory that is practically anonymous. In theory shm_open() requires a name, but this is allocated for a short time interval and shm_unlink() is called right after shm_open(). After that, only fd is shared with external processes (e.g., vhost-user) as if it were associated with anonymous memory. In the future we may also allow the user to specify the name to be passed to shm_open(), but for now we keep the backend simple, mimicking anonymous memory such as memfd. Acked-by: David Hildenbrand Acked-by: Stefan Hajnoczi Acked-by: Markus Armbruster (QAPI schema) Signed-off-by: Stefano Garzarella Message-Id: <20240618100519.145853-1-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- backends/hostmem-shm.c | 123 +++++++++++++++++++++++++++++ backends/meson.build | 1 + docs/system/devices/vhost-user.rst | 5 +- qapi/qom.json | 23 +++++- qemu-options.hx | 16 ++++ 5 files changed, 164 insertions(+), 4 deletions(-) create mode 100644 backends/hostmem-shm.c diff --git a/backends/hostmem-shm.c b/backends/hostmem-shm.c new file mode 100644 index 0000000000..374edc3db8 --- /dev/null +++ b/backends/hostmem-shm.c @@ -0,0 +1,123 @@ +/* + * QEMU host POSIX shared memory object backend + * + * Copyright (C) 2024 Red Hat Inc + * + * Authors: + * Stefano Garzarella + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "sysemu/hostmem.h" +#include "qapi/error.h" + +#define TYPE_MEMORY_BACKEND_SHM "memory-backend-shm" + +OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendShm, MEMORY_BACKEND_SHM) + +struct HostMemoryBackendShm { + HostMemoryBackend parent_obj; +}; + +static bool +shm_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) +{ + g_autoptr(GString) shm_name = g_string_new(NULL); + g_autofree char *backend_name = NULL; + uint32_t ram_flags; + int fd, oflag; + mode_t mode; + + if (!backend->size) { + error_setg(errp, "can't create shm backend with size 0"); + return false; + } + + if (!backend->share) { + error_setg(errp, "can't create shm backend with `share=off`"); + return false; + } + + /* + * Let's use `mode = 0` because we don't want other processes to open our + * memory unless we share the file descriptor with them. + */ + mode = 0; + oflag = O_RDWR | O_CREAT | O_EXCL; + backend_name = host_memory_backend_get_name(backend); + + /* + * Some operating systems allow creating anonymous POSIX shared memory + * objects (e.g. FreeBSD provides the SHM_ANON constant), but this is not + * defined by POSIX, so let's create a unique name. + * + * From Linux's shm_open(3) man-page: + * For portable use, a shared memory object should be identified + * by a name of the form /somename;" + */ + g_string_printf(shm_name, "/qemu-" FMT_pid "-shm-%s", getpid(), + backend_name); + + fd = shm_open(shm_name->str, oflag, mode); + if (fd < 0) { + error_setg_errno(errp, errno, + "failed to create POSIX shared memory"); + return false; + } + + /* + * We have the file descriptor, so we no longer need to expose the + * POSIX shared memory object. However it will remain allocated as long as + * there are file descriptors pointing to it. + */ + shm_unlink(shm_name->str); + + if (ftruncate(fd, backend->size) == -1) { + error_setg_errno(errp, errno, + "failed to resize POSIX shared memory to %" PRIu64, + backend->size); + close(fd); + return false; + } + + ram_flags = RAM_SHARED; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + + return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), + backend_name, backend->size, + ram_flags, fd, 0, errp); +} + +static void +shm_backend_instance_init(Object *obj) +{ + HostMemoryBackendShm *m = MEMORY_BACKEND_SHM(obj); + + MEMORY_BACKEND(m)->share = true; +} + +static void +shm_backend_class_init(ObjectClass *oc, void *data) +{ + HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc); + + bc->alloc = shm_backend_memory_alloc; +} + +static const TypeInfo shm_backend_info = { + .name = TYPE_MEMORY_BACKEND_SHM, + .parent = TYPE_MEMORY_BACKEND, + .instance_init = shm_backend_instance_init, + .class_init = shm_backend_class_init, + .instance_size = sizeof(HostMemoryBackendShm), +}; + +static void register_types(void) +{ + type_register_static(&shm_backend_info); +} + +type_init(register_types); diff --git a/backends/meson.build b/backends/meson.build index 106312f0c8..749b491f12 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -13,6 +13,7 @@ system_ss.add([files( if host_os != 'windows' system_ss.add(files('rng-random.c')) system_ss.add(files('hostmem-file.c')) + system_ss.add([files('hostmem-shm.c'), rt]) endif if host_os == 'linux' system_ss.add(files('hostmem-memfd.c')) diff --git a/docs/system/devices/vhost-user.rst b/docs/system/devices/vhost-user.rst index 9b2da106ce..35259d8ec7 100644 --- a/docs/system/devices/vhost-user.rst +++ b/docs/system/devices/vhost-user.rst @@ -98,8 +98,9 @@ Shared memory object In order for the daemon to access the VirtIO queues to process the requests it needs access to the guest's address space. This is -achieved via the ``memory-backend-file`` or ``memory-backend-memfd`` -objects. A reference to a file-descriptor which can access this object +achieved via the ``memory-backend-file``, ``memory-backend-memfd``, or +``memory-backend-shm`` objects. +A reference to a file-descriptor which can access this object will be passed via the socket as part of the protocol negotiation. Currently the shared memory object needs to match the size of the main diff --git a/qapi/qom.json b/qapi/qom.json index 9b8f6a7ab5..92b0fea76c 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -601,8 +601,8 @@ # # @share: if false, the memory is private to QEMU; if true, it is # shared (default false for backends memory-backend-file and -# memory-backend-ram, true for backends memory-backend-epc and -# memory-backend-memfd) +# memory-backend-ram, true for backends memory-backend-epc, +# memory-backend-memfd, and memory-backend-shm) # # @reserve: if true, reserve swap space (or huge pages) if applicable # (default: true) (since 6.1) @@ -721,6 +721,21 @@ '*hugetlbsize': 'size', '*seal': 'bool' } } +## +# @MemoryBackendShmProperties: +# +# Properties for memory-backend-shm objects. +# +# This memory backend supports only shared memory, which is the +# default. +# +# Since: 9.1 +## +{ 'struct': 'MemoryBackendShmProperties', + 'base': 'MemoryBackendProperties', + 'data': { }, + 'if': 'CONFIG_POSIX' } + ## # @MemoryBackendEpcProperties: # @@ -1049,6 +1064,8 @@ { 'name': 'memory-backend-memfd', 'if': 'CONFIG_LINUX' }, 'memory-backend-ram', + { 'name': 'memory-backend-shm', + 'if': 'CONFIG_POSIX' }, 'pef-guest', { 'name': 'pr-manager-helper', 'if': 'CONFIG_LINUX' }, @@ -1121,6 +1138,8 @@ 'memory-backend-memfd': { 'type': 'MemoryBackendMemfdProperties', 'if': 'CONFIG_LINUX' }, 'memory-backend-ram': 'MemoryBackendProperties', + 'memory-backend-shm': { 'type': 'MemoryBackendShmProperties', + 'if': 'CONFIG_POSIX' }, 'pr-manager-helper': { 'type': 'PrManagerHelperProperties', 'if': 'CONFIG_LINUX' }, 'qtest': 'QtestProperties', diff --git a/qemu-options.hx b/qemu-options.hx index 8ca7f34ef0..ad6521ef5e 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5240,6 +5240,22 @@ SRST The ``share`` boolean option is on by default with memfd. + ``-object memory-backend-shm,id=id,merge=on|off,dump=on|off,share=on|off,prealloc=on|off,size=size,host-nodes=host-nodes,policy=default|preferred|bind|interleave`` + Creates a POSIX shared memory backend object, which allows + QEMU to share the memory with an external process (e.g. when + using vhost-user). + + ``memory-backend-shm`` is a more portable and less featureful version + of ``memory-backend-memfd``. It can then be used in any POSIX system, + especially when memfd is not supported. + + Please refer to ``memory-backend-file`` for a description of the + options. + + The ``share`` boolean option is on by default with shm. Setting it to + off will cause a failure during allocation because it is not supported + by this backend. + ``-object iommufd,id=id[,fd=fd]`` Creates an iommufd backend which allows control of DMA mapping through the ``/dev/iommu`` device. From e349062727c7b35ac4c9bd08c534f64f571389fe Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:05:27 +0200 Subject: [PATCH 54/85] tests/qtest/vhost-user-blk-test: use memory-backend-shm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `memory-backend-memfd` is available only on Linux while the new `memory-backend-shm` can be used on any POSIX-compliant operating system. Let's use it so we can run the test in multiple environments. Since we are here, let`s remove `share=on` which is the default for shm (and also for memfd). Acked-by: Thomas Huth Acked-by: Stefan Hajnoczi Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Reviewed-by: David Hildenbrand Signed-off-by: Stefano Garzarella Message-Id: <20240618100527.145883-1-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/vhost-user-blk-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c index 117b9acd10..ea90d41232 100644 --- a/tests/qtest/vhost-user-blk-test.c +++ b/tests/qtest/vhost-user-blk-test.c @@ -906,7 +906,7 @@ static void start_vhost_user_blk(GString *cmd_line, int vus_instances, vhost_user_blk_bin); g_string_append_printf(cmd_line, - " -object memory-backend-memfd,id=mem,size=256M,share=on " + " -object memory-backend-shm,id=mem,size=256M " " -M memory-backend=mem -m 256M "); for (i = 0; i < vus_instances; i++) { From 0173ce4b2bb4f159f4a6d54e14adbe35f826a848 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 18 Jun 2024 12:05:34 +0200 Subject: [PATCH 55/85] tests/qtest/vhost-user-test: add a test case for memory-backend-shm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `memory-backend-shm` can be used with vhost-user devices, so let's add a new test case for it. Acked-by: Thomas Huth Acked-by: Stefan Hajnoczi Reviewed-by: David Hildenbrand Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Stefano Garzarella Message-Id: <20240618100534.145917-1-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/vhost-user-test.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index 255bde54ab..0fa8951c9f 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -44,6 +44,8 @@ "mem-path=%s,share=on -numa node,memdev=mem" #define QEMU_CMD_MEMFD " -m %d -object memory-backend-memfd,id=mem,size=%dM," \ " -numa node,memdev=mem" +#define QEMU_CMD_SHM " -m %d -object memory-backend-shm,id=mem,size=%dM," \ + " -numa node,memdev=mem" #define QEMU_CMD_CHR " -chardev socket,id=%s,path=%s%s" #define QEMU_CMD_NETDEV " -netdev vhost-user,id=hs0,chardev=%s,vhostforce=on" @@ -195,6 +197,7 @@ enum test_memfd { TEST_MEMFD_AUTO, TEST_MEMFD_YES, TEST_MEMFD_NO, + TEST_MEMFD_SHM, }; static void append_vhost_net_opts(TestServer *s, GString *cmd_line, @@ -228,6 +231,8 @@ static void append_mem_opts(TestServer *server, GString *cmd_line, if (memfd == TEST_MEMFD_YES) { g_string_append_printf(cmd_line, QEMU_CMD_MEMFD, size, size); + } else if (memfd == TEST_MEMFD_SHM) { + g_string_append_printf(cmd_line, QEMU_CMD_SHM, size, size); } else { const char *root = init_hugepagefs() ? : server->tmpfs; @@ -791,6 +796,19 @@ static void *vhost_user_test_setup_memfd(GString *cmd_line, void *arg) return server; } +static void *vhost_user_test_setup_shm(GString *cmd_line, void *arg) +{ + TestServer *server = test_server_new("vhost-user-test", arg); + test_server_listen(server); + + append_mem_opts(server, cmd_line, 256, TEST_MEMFD_SHM); + server->vu_ops->append_opts(server, cmd_line, ""); + + g_test_queue_destroy(vhost_user_test_cleanup, server); + + return server; +} + static void test_read_guest_mem(void *obj, void *arg, QGuestAllocator *alloc) { TestServer *server = arg; @@ -1084,6 +1102,11 @@ static void register_vhost_user_test(void) "virtio-net", test_read_guest_mem, &opts); + opts.before = vhost_user_test_setup_shm; + qos_add_test("vhost-user/read-guest-mem/shm", + "virtio-net", + test_read_guest_mem, &opts); + if (qemu_memfd_check(MFD_ALLOW_SEALING)) { opts.before = vhost_user_test_setup_memfd; qos_add_test("vhost-user/read-guest-mem/memfd", From d72479b11797c28893e1e3fc565497a9cae5ca16 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 18 Jun 2024 14:19:58 +0200 Subject: [PATCH 56/85] hw/virtio: Fix the de-initialization of vhost-user devices The unrealize functions of the various vhost-user devices are calling the corresponding vhost_*_set_status() functions with a status of 0 to shut down the device correctly. Now these vhost_*_set_status() functions all follow this scheme: bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { return; } if (should_start) { /* ... do the initialization stuff ... */ } else { /* ... do the cleanup stuff ... */ } The problem here is virtio_device_should_start(vdev, 0) currently always returns "true" since it internally only looks at vdev->started instead of looking at the "status" parameter. Thus once the device got started once, virtio_device_should_start() always returns true and thus the vhost_*_set_status() functions return early, without ever doing any clean-up when being called with status == 0. This causes e.g. problems when trying to hot-plug and hot-unplug a vhost user devices multiple times since the de-initialization step is completely skipped during the unplug operation. This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started") which replaced should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; with should_start = virtio_device_started(vdev, status); which later got replaced by virtio_device_should_start(). This blocked the possibility to set should_start to false in case the status flag VIRTIO_CONFIG_S_DRIVER_OK was not set. Fix it by adjusting the virtio_device_should_start() function to only consider the status flag instead of vdev->started. Since this function is only used in the various vhost_*_set_status() functions for exactly the same purpose, it should be fine to fix it in this central place there without any risk to change the behavior of other code. Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started") Buglink: https://issues.redhat.com/browse/RHEL-40708 Signed-off-by: Thomas Huth Message-Id: <20240618121958.88673-1-thuth@redhat.com> Reviewed-by: Manos Pitsidianakis Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 1451926a13..7512afbc84 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -472,9 +472,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status) * @vdev - the VirtIO device * @status - the devices status bits * - * This is similar to virtio_device_started() but also encapsulates a - * check on the VM status which would prevent a device starting - * anyway. + * This is similar to virtio_device_started() but ignores vdev->started + * and also encapsulates a check on the VM status which would prevent a + * device from starting anyway. */ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status) { @@ -482,7 +482,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status return false; } - return virtio_device_started(vdev, status); + return status & VIRTIO_CONFIG_S_DRIVER_OK; } static inline void virtio_set_started(VirtIODevice *vdev, bool started) From e9fd827711ed47edfe8cf23036a56e5a83f2bfda Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 18 Jun 2024 17:17:08 -0700 Subject: [PATCH 57/85] hw/arm/virt-acpi-build: Drop local iort_node_offset Both the other two callers of build_iort_id_mapping() just directly pass in the IORT_NODE_OFFSET macro. Keeping a "const uint32_t" local variable storing the same value doesn't have any gain. Simplify this by replacing the only place using this local variable with the macro directly. Signed-off-by: Nicolin Chen Message-Id: <20240619001708.926511-1-nicolinc@nvidia.com> Reviewed-by: Richard Henderson Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt-acpi-build.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index b2366f24f9..102e2da934 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -269,7 +269,6 @@ static void build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { int i, nb_nodes, rc_mapping_count; - const uint32_t iort_node_offset = IORT_NODE_OFFSET; size_t node_size, smmu_offset = 0; AcpiIortIdMapping *idmap; uint32_t id = 0; @@ -415,7 +414,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) range = &g_array_index(its_idmaps, AcpiIortIdMapping, i); /* output IORT node is the ITS group node (the first node) */ build_iort_id_mapping(table_data, range->input_base, - range->id_count, iort_node_offset); + range->id_count, IORT_NODE_OFFSET); } } else { /* output IORT node is the ITS group node (the first node) */ From 93c76555d842b5d84b95f66abecb6b19545338d9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 19 Jun 2024 14:03:08 +0100 Subject: [PATCH 58/85] hw/i386/fw_cfg: Add etc/e820 to fw_cfg late In e820_add_entry() the e820_table is reallocated with g_renew() to make space for a new entry. However, fw_cfg_arch_create() just uses the existing e820_table pointer. This leads to a use-after-free if anything adds a new entry after fw_cfg is set up. Shift the addition of the etc/e820 file to the machine done notifier, via a new fw_cfg_add_e820() function. Also make e820_table private and use an e820_get_table() accessor function for it, which sets a flag that will trigger an assert() for any *later* attempts to add to the table. Make e820_add_entry() return void, as most callers don't check for error anyway. Signed-off-by: David Woodhouse Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/e820_memory_layout.c | 17 ++++++++++++----- hw/i386/e820_memory_layout.h | 8 ++------ hw/i386/fw_cfg.c | 18 +++++++++++++----- hw/i386/fw_cfg.h | 1 + hw/i386/microvm.c | 4 ++-- hw/i386/pc.c | 1 + target/i386/kvm/kvm.c | 6 +----- target/i386/kvm/xen-emu.c | 7 +------ 8 files changed, 33 insertions(+), 29 deletions(-) diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c index 06970ac44a..3e848fb69c 100644 --- a/hw/i386/e820_memory_layout.c +++ b/hw/i386/e820_memory_layout.c @@ -11,22 +11,29 @@ #include "e820_memory_layout.h" static size_t e820_entries; -struct e820_entry *e820_table; +static struct e820_entry *e820_table; +static gboolean e820_done; -int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) +void e820_add_entry(uint64_t address, uint64_t length, uint32_t type) { + assert(!e820_done); + /* new "etc/e820" file -- include ram and reserved entries */ e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); e820_table[e820_entries].address = cpu_to_le64(address); e820_table[e820_entries].length = cpu_to_le64(length); e820_table[e820_entries].type = cpu_to_le32(type); e820_entries++; - - return e820_entries; } -int e820_get_num_entries(void) +int e820_get_table(struct e820_entry **table) { + e820_done = true; + + if (table) { + *table = e820_table; + } + return e820_entries; } diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h index 7c239aa033..b50acfa201 100644 --- a/hw/i386/e820_memory_layout.h +++ b/hw/i386/e820_memory_layout.h @@ -22,13 +22,9 @@ struct e820_entry { uint32_t type; } QEMU_PACKED __attribute((__aligned__(4))); -extern struct e820_entry *e820_table; - -int e820_add_entry(uint64_t address, uint64_t length, uint32_t type); -int e820_get_num_entries(void); +void e820_add_entry(uint64_t address, uint64_t length, uint32_t type); bool e820_get_entry(int index, uint32_t type, uint64_t *address, uint64_t *length); - - +int e820_get_table(struct e820_entry **table); #endif diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c index 7c43c325ef..0e4494627c 100644 --- a/hw/i386/fw_cfg.c +++ b/hw/i386/fw_cfg.c @@ -48,6 +48,15 @@ const char *fw_cfg_arch_key_name(uint16_t key) return NULL; } +/* Add etc/e820 late, once all regions should be present */ +void fw_cfg_add_e820(FWCfgState *fw_cfg) +{ + struct e820_entry *table; + int nr_e820 = e820_get_table(&table); + + fw_cfg_add_file(fw_cfg, "etc/e820", table, nr_e820 * sizeof(*table)); +} + void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, SmbiosEntryPointType ep_type) { @@ -60,6 +69,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); MachineClass *mc = MACHINE_GET_CLASS(pcms); X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); + int nr_e820; if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -78,8 +88,9 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, } /* build the array of physical mem area from e820 table */ - mem_array = g_malloc0(sizeof(*mem_array) * e820_get_num_entries()); - for (i = 0, array_count = 0; i < e820_get_num_entries(); i++) { + nr_e820 = e820_get_table(NULL); + mem_array = g_malloc0(sizeof(*mem_array) * nr_e820); + for (i = 0, array_count = 0; i < nr_e820; i++) { uint64_t addr, len; if (e820_get_entry(i, E820_RAM, &addr, &len)) { @@ -138,9 +149,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms, #endif fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1); - fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, - sizeof(struct e820_entry) * e820_get_num_entries()); - fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg)); /* allocate memory for the NUMA channel: one (64bit) word for the number * of nodes, one word for each VCPU->node and one word for each node to diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h index 92e310f5fd..e560fd7be8 100644 --- a/hw/i386/fw_cfg.h +++ b/hw/i386/fw_cfg.h @@ -27,5 +27,6 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, SmbiosEntryPointType ep_type); void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); +void fw_cfg_add_e820(FWCfgState *fw_cfg); #endif diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index fec63cacfa..40edcee7af 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -324,8 +324,6 @@ static void microvm_memory_init(MicrovmMachineState *mms) fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size); fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1); - fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, - sizeof(struct e820_entry) * e820_get_num_entries()); rom_set_fw(fw_cfg); @@ -586,9 +584,11 @@ static void microvm_machine_done(Notifier *notifier, void *data) { MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState, machine_done); + X86MachineState *x86ms = X86_MACHINE(mms); acpi_setup_microvm(mms); dt_setup_microvm(mms); + fw_cfg_add_e820(x86ms->fw_cfg); } static void microvm_powerdown_req(Notifier *notifier, void *data) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 77415064c6..d2c29fbfcb 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -625,6 +625,7 @@ void pc_machine_done(Notifier *notifier, void *data) acpi_setup(); if (x86ms->fw_cfg) { fw_cfg_build_smbios(pcms, x86ms->fw_cfg, pcms->smbios_entry_point_type); + fw_cfg_add_e820(x86ms->fw_cfg); fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index dd8b0f3313..bf182570fe 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2706,11 +2706,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } /* Tell fw_cfg to notify the BIOS to reserve the range. */ - ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED); - if (ret < 0) { - fprintf(stderr, "e820_add_entry() table is full\n"); - return ret; - } + e820_add_entry(identity_base, 0x4000, E820_RESERVED); shadow_mem = object_property_get_int(OBJECT(s), "kvm-shadow-mem", &error_abort); if (shadow_mem != -1) { diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index fc2c2321ac..2f89dc628e 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -176,12 +176,7 @@ int kvm_xen_init(KVMState *s, uint32_t hypercall_msr) s->xen_caps = xen_caps; /* Tell fw_cfg to notify the BIOS to reserve the range. */ - ret = e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE, - E820_RESERVED); - if (ret < 0) { - fprintf(stderr, "e820_add_entry() table is full\n"); - return ret; - } + e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE, E820_RESERVED); /* The pages couldn't be overlaid until KVM was initialized */ xen_primary_console_reset(); From 5786827f47a0721bb997ad3f653d2b843ba3fd76 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 19 Jun 2024 13:12:43 -0700 Subject: [PATCH 59/85] hw/arm/virt-acpi-build: Fix id_count in build_iort_id_mapping It's observed that Linux kernel booting with the VM reports a "conflicting mapping for input ID" FW_BUG. The IORT doc defines "Number of IDs" to be "the number of IDs in the range minus one", while virt-acpi-build.c simply stores the number of IDs in the id_count without the "minus one". Meanwhile, some of the callers pass in a 0xFFFF following the spec. So, this is a mismatch between the function and its callers. Fix build_iort_id_mapping() by internally subtracting one from the pass-in @id_count. Accordingly make sure that all existing callers pass in a value without the "minus one", i.e. change all 0xFFFFs to 0x10000s. Also, add a few lines of comments to highlight this change along with the referencing document for this build_iort_id_mapping(). Fixes: 42e0f050e3a5 ("hw/arm/virt-acpi-build: Add IORT support to bypass SMMUv3") Suggested-by: Michael S. Tsirkin Reviewed-by: Eric Auger Signed-off-by: Nicolin Chen Message-Id: <20240619201243.936819-1-nicolinc@nvidia.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt-acpi-build.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 102e2da934..e10cad86dd 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -209,12 +209,19 @@ static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms) #define ROOT_COMPLEX_ENTRY_SIZE 36 #define IORT_NODE_OFFSET 48 +/* + * Append an ID mapping entry as described by "Table 4 ID mapping format" in + * "IO Remapping Table System Software on ARM Platforms", Chapter 3. + * Document number: ARM DEN 0049E.f, Apr 2024 + * + * Note that @id_count gets internally subtracted by one, following the spec. + */ static void build_iort_id_mapping(GArray *table_data, uint32_t input_base, uint32_t id_count, uint32_t out_ref) { - /* Table 4 ID mapping format */ build_append_int_noprefix(table_data, input_base, 4); /* Input base */ - build_append_int_noprefix(table_data, id_count, 4); /* Number of IDs */ + /* Number of IDs - The number of IDs in the range minus one */ + build_append_int_noprefix(table_data, id_count - 1, 4); build_append_int_noprefix(table_data, input_base, 4); /* Output base */ build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */ /* Flags */ @@ -305,8 +312,8 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } /* Append the last RC -> ITS ID mapping */ - if (next_range.input_base < 0xFFFF) { - next_range.id_count = 0xFFFF - next_range.input_base; + if (next_range.input_base < 0x10000) { + next_range.id_count = 0x10000 - next_range.input_base; g_array_append_val(its_idmaps, next_range); } @@ -365,7 +372,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) build_append_int_noprefix(table_data, 0, 4); /* output IORT node is the ITS group node (the first node) */ - build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET); + build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET); } /* Table 17 Root Complex Node */ @@ -418,7 +425,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } } else { /* output IORT node is the ITS group node (the first node) */ - build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET); + build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET); } acpi_table_end(linker, &table); From b05ff4086f79cdc59c4adcfd278259792a8bb714 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:25 +0530 Subject: [PATCH 60/85] uefi-test-tools/UefiTestToolsPkg: Add RISC-V support Enable building the test application for RISC-V with appropriate dependencies updated. Signed-off-by: Sunil V L Acked-by: Gerd Hoffmann Acked-by: Alistair Francis Acked-by: Igor Mammedov Message-Id: <20240625150839.1358279-3-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc b/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc index c8511cd732..0902fd3c73 100644 --- a/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc +++ b/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc @@ -19,7 +19,7 @@ PLATFORM_VERSION = 0.1 PLATFORM_NAME = UefiTestTools SKUID_IDENTIFIER = DEFAULT - SUPPORTED_ARCHITECTURES = ARM|AARCH64|IA32|X64 + SUPPORTED_ARCHITECTURES = ARM|AARCH64|IA32|X64|RISCV64 BUILD_TARGETS = DEBUG [BuildOptions.IA32] @@ -60,6 +60,10 @@ [LibraryClasses.IA32, LibraryClasses.X64] BaseMemoryLib|MdePkg/Library/BaseMemoryLibRepStr/BaseMemoryLibRepStr.inf + RegisterFilterLib|MdePkg/Library/RegisterFilterLibNull/RegisterFilterLibNull.inf + +[LibraryClasses.RISCV64] + BaseMemoryLib|MdePkg/Library/BaseMemoryLib/BaseMemoryLib.inf [PcdsFixedAtBuild] gEfiMdePkgTokenSpaceGuid.PcdDebugPrintErrorLevel|0x8040004F From ad8560fa29fc18acda2e8cfc1fdd87f6c6cca122 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:26 +0530 Subject: [PATCH 61/85] uefi-test-tools: Add support for python based build script edk2-funcs.sh which is used in this Makefile, was removed in the commit c28a2891f3 ("edk2: update build script"). It is replaced with a python based script. So, update the Makefile and add the configuration file as required to support the python based build script. Signed-off-by: Sunil V L Acked-by: Gerd Hoffmann Acked-by: Igor Mammedov Message-Id: <20240625150839.1358279-4-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/uefi-test-tools/Makefile | 19 +++---- tests/uefi-test-tools/uefi-test-build.config | 52 ++++++++++++++++++++ 2 files changed, 59 insertions(+), 12 deletions(-) create mode 100644 tests/uefi-test-tools/uefi-test-build.config diff --git a/tests/uefi-test-tools/Makefile b/tests/uefi-test-tools/Makefile index 0c003f2877..f4eaebd8ff 100644 --- a/tests/uefi-test-tools/Makefile +++ b/tests/uefi-test-tools/Makefile @@ -12,7 +12,7 @@ edk2_dir := ../../roms/edk2 images_dir := ../data/uefi-boot-images -emulation_targets := arm aarch64 i386 x86_64 +emulation_targets := arm aarch64 i386 x86_64 riscv64 uefi_binaries := bios-tables-test intermediate_suffixes := .efi .fat .iso.raw @@ -56,7 +56,8 @@ Build/%.iso.raw: Build/%.fat # stripped from, the argument. map_arm_to_uefi = $(subst arm,ARM,$(1)) map_aarch64_to_uefi = $(subst aarch64,AA64,$(call map_arm_to_uefi,$(1))) -map_i386_to_uefi = $(subst i386,IA32,$(call map_aarch64_to_uefi,$(1))) +map_riscv64_to_uefi = $(subst riscv64,RISCV64,$(call map_aarch64_to_uefi,$(1))) +map_i386_to_uefi = $(subst i386,IA32,$(call map_riscv64_to_uefi,$(1))) map_x86_64_to_uefi = $(subst x86_64,X64,$(call map_i386_to_uefi,$(1))) map_to_uefi = $(subst .,,$(call map_x86_64_to_uefi,$(1))) @@ -70,7 +71,7 @@ Build/%.fat: Build/%.efi uefi_bin_b=$$(stat --format=%s -- $<) && \ uefi_fat_kb=$$(( (uefi_bin_b * 11 / 10 + 1023) / 1024 )) && \ uefi_fat_kb=$$(( uefi_fat_kb >= 64 ? uefi_fat_kb : 64 )) && \ - mkdosfs -C $@ -n $(basename $(@F)) -- $$uefi_fat_kb + mkdosfs -C $@ -n "bios-test" -- $$uefi_fat_kb MTOOLS_SKIP_CHECK=1 mmd -i $@ ::EFI MTOOLS_SKIP_CHECK=1 mmd -i $@ ::EFI/BOOT MTOOLS_SKIP_CHECK=1 mcopy -i $@ -- $< \ @@ -95,15 +96,9 @@ Build/%.fat: Build/%.efi # we must mark the recipe manually as recursive, by using the "+" indicator. # This way, when the inner "make" starts a parallel build of the target edk2 # module, it can communicate with the outer "make"'s job server. -Build/bios-tables-test.%.efi: build-edk2-tools - +./build.sh $(edk2_dir) BiosTablesTest $* $@ - -build-edk2-tools: - cd $(edk2_dir)/BaseTools && git submodule update --init --force - $(MAKE) -C $(edk2_dir)/BaseTools \ - PYTHON_COMMAND=$${EDK2_PYTHON_COMMAND:-python3} \ - EXTRA_OPTFLAGS='$(EDK2_BASETOOLS_OPTFLAGS)' \ - EXTRA_LDFLAGS='$(EDK2_BASETOOLS_LDFLAGS)' +Build/bios-tables-test.%.efi: + $(PYTHON) ../../roms/edk2-build.py --config uefi-test-build.config \ + --match $* clean: rm -rf Build Conf log diff --git a/tests/uefi-test-tools/uefi-test-build.config b/tests/uefi-test-tools/uefi-test-build.config new file mode 100644 index 0000000000..1f389ae541 --- /dev/null +++ b/tests/uefi-test-tools/uefi-test-build.config @@ -0,0 +1,52 @@ +[global] +core = ../../roms/edk2 + +#################################################################################### +# arm + +[build.arm] +conf = UefiTestToolsPkg/UefiTestToolsPkg.dsc +plat = UefiTestTools +dest = ./Build +arch = ARM +cpy1 = ARM/BiosTablesTest.efi bios-tables-test.arm.efi + +#################################################################################### +# aarch64 + +[build.aarch64] +conf = UefiTestToolsPkg/UefiTestToolsPkg.dsc +plat = UefiTestTools +dest = ./Build +arch = AARCH64 +cpy1 = AARCH64/BiosTablesTest.efi bios-tables-test.aarch64.efi + +#################################################################################### +# riscv64 + +[build.riscv] +conf = UefiTestToolsPkg/UefiTestToolsPkg.dsc +plat = UefiTestTools +dest = ./Build +arch = RISCV64 +cpy1 = RISCV64/BiosTablesTest.efi bios-tables-test.riscv64.efi + +#################################################################################### +# ia32 + +[build.ia32] +conf = UefiTestToolsPkg/UefiTestToolsPkg.dsc +plat = UefiTestTools +dest = ./Build +arch = IA32 +cpy1 = IA32/BiosTablesTest.efi bios-tables-test.i386.efi + +#################################################################################### +# x64 + +[build.x64] +conf = UefiTestToolsPkg/UefiTestToolsPkg.dsc +plat = UefiTestTools +dest = ./Build +arch = X64 +cpy1 = X64/BiosTablesTest.efi bios-tables-test.x86_64.efi From 2f95279aa8c3ced9607eb5959c20d2995ecd980b Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:27 +0530 Subject: [PATCH 62/85] tests/data/uefi-boot-images: Add RISC-V ISO image To test ACPI tables, edk2 needs to be booted with a disk image having EFI partition. This image is created using UefiTestToolsPkg. The image is generated using tests/uefi-test-tools source. Signed-off-by: Sunil V L Message-Id: <20240625150839.1358279-5-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- .../bios-tables-test.riscv64.iso.qcow2 | Bin 0 -> 16896 bytes tests/uefi-test-tools/uefi-test-build.config | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2 diff --git a/tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2 b/tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2 new file mode 100644 index 0000000000000000000000000000000000000000..c720bf99a45fab6d1e21963cca563ee0ea059b82 GIT binary patch literal 16896 zcmeIZbyQp5w=Wvpf_t$*DNx*s76?*Gkpjhw1#fW*1ef3~T4-^K6sNdLaVJ>O;85J1 zQtYN*dfvO^_r|&BjPd@sc6P{~`K&p2_F8kzHD!gZF8|^R005vN-~auA^AEuE|F<^$ z`wy6ZG3pwPn*YYNLL{ z@~dP#ETCmdJ*7_kgqm;AKA-YZ)H7i(_@NMQyg)%>KxsGsR-2gBcMs8>CNZu}?giTr zW$PK&8%mMm2N;bN$uC%lqEk?%*o3+>e_ic4JOyZ|QDL}X0t1oRteN9K<8(Pw$37xF z1|I2P6F9n_yPUW{fk&qoW*`a$lHzw{Kz0D&>R{It0_pAL#g%74US}QyK_2E*7!OJR zAOI>3rhJGxFB1wt!!!m0K*PZ#AnIyU>wpil!a2G-TU)W^B7llPn!FEq6{Iy4q~)j} z>{K9cNmkXnrC&tPStW#fh-_pr=`de}dA<-N&D^d}y}pq>O>MJ&j$(%bF!LVvC;g^~ zXE=dk`Gi+tSz8DwS_3uBWFbZj%AzyV%_=l~oF%n-a=AdsJV^C9}F;wuhCstjLCnUP*F48|zU^QfHp zBMJ#O{w?+cY7szMfzGGoxOAWs2Ye9nKr0PthyUvz;D5RZZ20$j`j@}-Kj!00+{+OG zApZywo!w&+yn9UQdymNi?=d<2o>EHQw^NnfV`{`brO~{nv}N~vdis0JU~x}DfBiTU z_#QueeNUO^!GJ#p3peB+${KJ_*~Y>DV0OiO%z=B4IqmK-*N1z`!*k!xn|6=+obNIJ z-|rFdzsG_R_xvXb_gE=9YrM4Ci?)1f@8Iihn0CWpe5WAz@Sgkk! z`gPpBm$H~L9JTE~PC!(x9k^sr57SWaemy2t{-hFrElVz*fv{aT9xicJN6~n1W2%AH zFz7ge3Yn;tW&tB8wW|-F68=c@4o$;g>UMmqbv5_1c)wkvMLraUAyg?f9Xt3O+n84 zxsFBX$^U1H-jvM19JShUm36pu-|w@BOyrj!y)XL!K-(D4iF`Jx?;EZRkq(^%-V;CE zcCCm2TZd0Zyx9J>IzrROgqwwaV4a)5#CQHST8)M~U(xf(CglJ&*7=;~S6QbpX;V)T zw2syD4S@~j;erLP2kkcLLaZLfMRkwS0(o~v2kvzw!X~{MrJ5esl&4GEOJV7d&hn8{ zeXmzCDRM1cZ?;5Rdn$LUpGE2RNlNXtlH|}3`e>#khWUA_OBnq{A4nn&@%q-@rrO#r zxunkjqcl+%<=?T&QPlY8on}hQAT0X`EyB5^YASJg%Nk_nMh*dNzwzGuKByng+~1e4 z(>xQDn?MQP0zxSLe$(8ldOyAz4en>*y!(XjKOM3bU4b#c5Vo*p2u|1JNb!apiEl7F z6oe$t^09h6zbrWyIyW4@vXsP?ApYD`Ds;wbvlic;r&SvoW5h{zAnIo~U0XGI;nY+E^rWKx**=fVr<=(@I zO5qv}aXk5CR8{LkI<~~9xK4fKH@V}TA6(DfZG-1LPdWF)-~zu^aB6Q!#@P^;GI@u; z4tpm`m-41mquxn8e1f?;V%ABiZlX3YrJ~3ltEae`Zce;$8)|WdW*6O}gvLkKGd3_9 zK`!RvNGvcn)8@&>_L<;My9Y^gH$Wy9eAsc z*l+dtwVGq!z*W<*a1;Eh1G*bV8qoDlueCBHZfEf7Q`bwwGQ{VxZ8~GZq<3WpzWh?k zT^6RC1loy@L<6#`Y~N7#q0j_Ql`njd2(+Sh@_)AET3l<&=RUtU`}k1AHlY=2AWFiR zovd$E!^p++iWWXf$oR@TjbdO6BO>4t4(8kFvAP_iAL-p&EUHtW1AuDMT@z$@l<*S619(Zb8 z11;tS2SD(-G3d&nqSq}%+pP^FazC!cZ==@6M{`LtD{*?fm96(&ehVYezoIYJk4S)) zQ@i?t9&_f*Va3bo2_~kAP|b(*l9fU*#m*RR{`>>b<@(Mp5FAI6`bxJvxvJG&>q@ii zg+L}t4!No&{*r%U&TG_aRqAZhC;`bhs?$*wv3TL6Y8Z{_D;vpiNzt+web{los3i}D zk=~Bo$0P=jR8wMzSnp-9$l=K-yPkD#7M)*XV5<^J@Ux1wx3U7f9})pl;@=IKPZese z;0ivjh}nz&5sitdmdK%Rd-?DXy!yTqFG;u`lM~+0!UT^)NS`K1Wr!rt4c$`qVj_-) zONtSn)_5x44j6~gkfT#mgH6(L1vL^s9+gzQ&xoy%{z?)E9PzpMgkLj<)^HeA(e+t; zZ2nJZY*Z3rKz0D&^~+lf4XxuO1hZaiPlHf6rE)Ii(#RkZCY?CQ<;uT0auZ$W*bk0WdiVP6=D`al1d^tu2Ltds|9(CyFZmdEpQ3olTc!W)ow(-WJ(Ev zwi!l6z4{Hr?d$K(pG0wVla<{5y%qX0=B$2JR@RyaLH#_5k@PH40g9?rA|TJG6q(a1 zC>p>j>_f`2cPTO`1%Z0{y)kBl@U`3s2ou*@iRwJrR=)?3|! zzb=BOYGq}bI)_Y74y>ePd|u(&FjaV31ajTwHU^%S`I)=w*t-w4_5w6I{@8s(;o3pc zpx*7E`y7z@g6yD=0_y{AC4=AWCd&|}c{$yc>xfpO7C7^*=huINDC<+poR;LcId^*N zS5G>b&k{)bw2M18?fr&;r?U|~W7K~V3y~(Hi3KlOrjzTf)YYVet_kqIdSw#a)BlA5 zd9S5Pb#T629fsBg)e2=;e!FuJ&VDozOO@zLOD#x9;*vFvdWs|d>NZ`Mn)7}K-2D*S zME*)5h`MK&rBVny2ze4b-Rds`CUae3wFSI=ecVBi37ia{_9X$`wzVfKjeoXPs^+Kc!n;i1!26 zA`<~LJYj@y42SeBtp3-&M_Bm81;_vhutDByM$ixe>1+AxgU^etTRa+&nY8gSK9nFV zvE%m76x250soo4>6YDub{@fXE$$6Js#E-OMJ1x0**F?^e?p6@IM)@uO4w{iu2Cm7Z zCQklkk*1Ea8`kz)sb-It$EnACww!*66Vcb0$!A_jrEzp}L9R;61OyxOxd9b-Y$@I- zAF8h_ZG7UF;fgfVejt~<>InIEyHe52-fM@#)Fjt12qKU-5uJ* z6j-Jy(hHe)^D%cCW{NQ=o0_+-zf;S9l7a>>yR3cackc`@5CZh7RuG8$f+J+V@OH!+K$dW|Xf^n8IhY(28FQjzKhKv?y ze6l{B5)tTJ?F2fh8%aR(uX%Y=Vwmm0|L!t?qb z{rTWLU#p-QVdwifL^>0+dA|Cy0+8l;$K4(H3_+Aofuo9Cz_7a0eC+XS7VMi0hULa{ z`k;fkY?jIg;Ik@R8MsD1R(;QX#l$hH2=v02G=?LJP8APMYjgQqLv!*QYvGgx#F);KHR7v7iorV`*AdN zN%-+a5@19pszp)hFGb=uz$S{&)ezk}-gX9g#<09oveKK$Z%;T2t!WrK0Evv+JX-j~Aj zsW6n@A!HH4nuJ_l=~}bR+CerXU!<=A??#cf7^}%-SOp@SfA-ckPLDhY#Qn-5H2+;# z$6nT{U`BWapW&(_<=C@`07VBXbnwcb1@WX|wwfiUET9Xq!1f)`S9U#S066}Mw6trZnJ;%&bBRIBNS|kMb>&4SDDpb>mFe|)6A@ul913}EBRz{^MT-)L>_a<<8ykGHdKa24#OxJD~jA&`h6*jiA za=)ZuHl1q7_b=#tQeKc*p|*t=7*sJBE7eb-e+yW<6{5#V;yZEr+Q|KqM)#vFAPIj3 zWl(Bp=#bdDnPl4KxpWPyA|aJXt^hCZQwNdO&fYRM*yFB`#D**|V#W@^%mUa)lx&$T z95tgfn8`bFuUT5Fj3mfqzf28}B7?uZklwK>0x}?Fk@65+YkVnigEZw~ng`xG$kG-F zHN1eItXKnq?Y7qSNYgodUxbSIrg{R}rm}_I{t~_{6p{TZE->;Onb7zJBdBx}k@d2) zAxBFgooMPIqHrnX?N0f4-#09D#+sU3*>Nu3V1(E_?P@7Bl)7(F*Uk?VW$R_mILna3 zfDbHleE?#>Hw4VsK&8y|4{t=K}1(|XNK{H@e>m_(=V*BCAA_EYiUOnyB{Ee zX94I~(sEda1_rwznW8A}fTA~i1dkrSikNBK6gTVITA24*JlOCmRrIvE;R$r5T@&Rz zd)I(%RrLZF=gVfYd?qv#%Jad%fH9a#wLVb>KPcw0>Ug;ewqch_QFKw9jZzZ6hoqZY zT%+T5%+e}#+pEt@n{;gjGpzCM7w+kom~=ga>>n&hop5q;URpHLosQK{5$)ugI?f+e zO3uCJl^wu*S+4n^=!33x!(n$Xv-l1#A+GiBswHoJ*VmDpmY@Omm!OUV{@>}{KR-ms zOFtLucFElC4!3wx`~c;gL(n;EnYoNfYigjqHmS?GbulY@GX)Omq~=zGQwd zlPN+Jydg0^MTOIqaZdNSsbD}-=%et71_vzEX}!wsR&?G|;N7(IN^#BDS;4+O-C3%) zeoNCA2OA+PkEvfxqY}ogZB@w1V}A8ums3eqmGYm+{`5`MN%zT^)b-t6Q>L^fvLMKJ z_gtdgU^(jUgRgTQ#7RB1BKc=~N4&<9pCd52?s3du{|xSEPAmTC@_uOc&mCvXG9S;h z{eDm5+g(F|y7L^3$tMu@aDU_n?;|y3qJ-c z=8|W(008%q=gYaX8T+PT976xhl;<5ESWL_N%zo;HQ)bEn08lf}gfZcWp6jx9qt84v z5WV@K+$VZ$9+|MYPilwlb>mR6>Sqp+8fK9LKWvN5)QD-iSzacA*3o~3^6;3-#k^0 zE_=FTa-^B(xt#T!+>l&zN7)C$FCkW0vEA0yU7YzfEo@EAm~l|!+_xM1_fO_ijMu>= z*SS2BX>OvLk&g{&%B%~d_tk2W&wbTv8qP~_=jKE(eD)ZJQWYJkFQKKmuem|XRUvZ*Zbu= z3GdbP^667MSFHM482#91^()d(>Yrupewt15O%%VG+{+xxv6gtiWXf}mbyiVZl|`&F z)%1uw!zwy_YY(K#W7_aqCl;$Vo1_VYu^FQ{{L)|8-BYX#bnAhsf1bidP#in~eF~|&Z_SS#kjfDgC%UY&q z8PuTIsBXSB;OOG8Z6IlbT3q;jpHa)LaWa0-g3%tq23@J+ugi3((`(VrIpA1->#a@x zfLM%?V9M6m<=iPRRoKU=&BFc~FXi?*Vs)SqS-QH#rB@0tl_$ka(>k8JL8H1aK$BAl z5T7N;#`O1~Z{l^^etl`mpx#2jL=(BagOys1$Fb$hcV8PSx)O{zDE(I{D$o!P;BTQT z&Z*KXO&s8+q*C=We{(XOjO{lI3%3l+1Qj->ZW~8^yFle|M2k1tk$BrRzT<8Cem8Qz zI2-|b8Z$7}Y`j%sd$OifdG*=h&)!A4`vazv8=l^qVvuM?A7@LPLz#fI_j#q?wwVG| zzJUL@vSkb4N&4D__u4b{AZ`R%*6eB_|98OU7P0Y&b3ubMOy-4{2L(& zvgh$>@S$TAYo575!UPl7Y7_z;8h@9{YinWwC) zrN!l&3y5K;dXG@P{)9QBeQ=#-CjR=5+M=?C@s$C}Z`>FkI$Th@npuo3txPa~_;u)~ z_?@F!w4gj|LH^J}b|slK@BjF6!kHo_%<<~tt`Qkj+*GhkoHBTV5eUk?WCH$Ba`R!G z7^9ey`u#q_F9I;##+g}v>#L>hSyt)sz+Ee3$@7`>$`HHV@)GyxrvOSA!`|yW0so(0 z4c?Kfozsn6w}8K60{rz*)t+pddnrKv zBIPD>8@e~R&gJKOj@+2PY(;L!nB;g>th$A4Z3BVG+`isFtL2>z*nJy9Ka$;&f{+s7 zMN^r3Vu)B8XjIO>eg}~QFj1@Q6r#T=?*4g%bs~W-139jc6zh}6JYl@#T;Yy zG$Rfr$kMdmoN2qIQtUIo#m|wwLrv4?o*I$nNACk+V1$Y(j%Egeh{ar>L~S`Tn2+#1 zPh%%vFLqIUZkYRi;gf#x-Hp&Hsb#+ABk>ZQ5DP{}twgr;ZfP1zw#bHp&ACI-7+JoF zMvoo^OU6Tm52@YtsIZ4_9d_P|8Yr*;8 zOSLWY2(_gTk{;c>QLCaE{w@6Zk+T|0R_VLmM@}6(N6<+RcDsj<4G}BsBHq}cD&d+3 zxhK@RQ?T#6%hN%Tv@HNW+cOGIwb%8Gzbf9AD)RRgq$>_liM83TPgs}Mx$KkPc$hnX zX>xl%w6X~&ryJNGC*6O}mj`ef(?#6u)NSDObUDPc-k@F`2P0ENL#yAzx9R_^7^$cD zt9SN7R+sfjH;sXY`p&;Uwm|pd5LAgNDtfnkDK$;$x65~T8~B9u?6aI68;yY6*EI~; zB>AB@Y{wQ;E*5b-j&XSbDqp;4L;IBTS;Xg1lIC`A*Jf7_7B4C3F$+2bE6+rG;R_ug zS2&pbYDRZnZBW#a&O8eF+mLXgUSIJpD)_3Uwt_?7`|{isV$`1KSlxzffu755&RE)oJhPF(w{uIIA|@jUr6mcG6BLwF z6I2+_SQ?pGA;H4I@7i*7xv3ZTl6vw+^o!R){l&%8(k_Lg_S*5z&725)p&_6CErT8M z(SnGp6I|a=;fD~64t9F{4NFb}Zy~sEZuk$&j{0tjj&3|lm4|#|=oYlLzT`>{OPiX> zenyU5$5-RS9EF5@Kb~$>oCHTr?wuK7+(a=Cvn|G~q@tx{v1nnc@D@Orxf}_Y#jn9G zVhSwsQBN?w+!iL0A>ob2^YC6Wf~jWZuO0My6mqr#F^E zK4Zh6Wg~F=2E$No;AK0QjE>QMy3TR=hso$Pw<91pSX~zHa~aBy&{?sO$3mZKpt&ee zIY!?AuY~<;lp72wSmY50`fLE^(>~^nFJAj_3FkPmCBXFJjkjMiPl7S&;gO@Je)hoF zL9Jecpy9Xi%bkIrYr65}s{KRJJ^undZa`ZUx71>mxSkDZliBmQO+!zZY**{;sK zbA*mMh?s^2y?oQ-gN!MnK5;#9LzL`A=3b&g^tihO*Qj+x_Ee;2^=lbL*L zUqZW{-^MJ$AU<=Ww7Q;o5!GiiN-yy=S=b5_h}p#fv;ms22_04=gxEPmjb_uVX1z}p zlp_HbQ$ zDHCbT7LR?8UjFXo{AI%{?@JTl{a}&4~Nh} zi22saY}Y(Uj!pC}jUw<9uPx{KYrWX95We%xrZ$-IAkUZDOl>JOIxUj@G@tiq*JGc& zSvyy%R~)GAJwiNK!e?DrzeU(7-PT3FexP|8q^6*o=S>5wLM_?95b)f!AsV4)zq@GFa3gS@YI8x-Gz60UE#+@Y<2CE zZz_Uz7t6*DljLONqbIS^&X;KeqE)-(JKRs|4tU^*29rdG0|bB1S&cc~cc*?jy?|gN zQ@eS-LiUsbZT+7T=N*f76cJ2}rrcxhLaJs%=^{jRZW_YaHuzsX)I#XxXL+YKhjYbN;s1tpP0G?P({a;ilV{ zHVPfbRuVar^Yimp*u@d-TF!&@Txkby?qV!4X#}w)k_K^du!70!O@7Ap)bZxwd@O_% z%F1p+wNFJ@C4Bv7NVZD@z1JK9a?-*8!UZMK(0Dy z%rQ@~-f$u%sqZZr+&y$Xv#FLGxp-5#c!Tb$W~dqnCTdErXpZUVq(9SB?v(r_M}t;e zI*5a3Q)l6pnm9l@w-6(@ z>$|(kQE_iq_;1h)E8^Y<;+&>Q%kFj`uNRC>>^{fKw)uoPq{R%pL@xJdmFpZK&tmY2 z-wAONN#kbb5XWTI8R~2AVk#9FQs?3NMUQ0@YL%G?`NGqjOG9D|yQpH(6^KAp@bG7_ zg12^$mT?uH4>32&3?C0X9!%qZz)cE%WAn-128eF9NGOVn5fZbArjkc0PibM)RAy`R zOMjb7S1852lx>wt+u+HoIn+--sW)$*#om@%7{}zQT5OGe z!q8kax#wA+Wf7D;#BSK~!mv>_S`_Yu7NF`L%`4(%;Sg`bTD*?GRz$2$1U4xyf9IaJ=oBWnZEqGDl86w8fYym%BB$DU5JZ zAKQk)AN#dbIA(kzLm$uIraCqf!30;9Cx1Pomw|Bc9wpRKW7`(#Gl=-rey)Br8eIbI zwy`pg8?a_S9vy<`eT7DP_iRebdsbM&-u%Kz`zS>iz)RG1$+iInp>cMjV2x9~xTYLU z5>z`RVZ+2_^!4QElIzGeO!lg8zM8kOLMy4c^&p6+j#nXfraAro6E$}!S>8|Wb5awm z77$UPkt#>C?Y=T#A?rc%*ko9YJ-NNqEK5zBng6D9m+r6_FDJymq2m_u?fLJNc3u`( zOcMzwpG#~%rOvCt{TPnRqT<6Nvt_2TzY z2vdh#X@8D+hP}&*ch?UT0Nf4<(@U|?-?1?n1$L|*qT9s`*ta7M`jV`)&|CxSw~@;z z*(*l%4-be2tdoI|u4vvGywe>U=?|!ThPq$9ECtyb!jG zj^^At{H)$vJSB4+nfXChJ)AI7zh^4nJ74H3AWl8rOz-TM&HFg(zUM1({?Mn$xgnXs z2)v_6c(#$j_QA5_$OZYW0@zW>r_Wgfnbg=OrZ$_6Pe6Uw&-df(CrlGpf_1 zlF_!x!UkP(2IVU$>x*>kMxMVX(8HARY^O+!6FG`rB8pInG^s~@91XfIlbgiCvKkN| zzV^1FE7UlQqA+xyG9rpci2#1G1)Jx)cBuKm$R4EtV7Mk1wz}@-xO@w!4w!DaARsqKh*`#&ADG+hCSqu zkaJ9~z9`&ILyyYeQ+YD8G(jE2C2*8}+4qu&Ksi8uL=qm&2{yKn&4Oc&&8PNmP-n8P z(^^m)!x?#&f(g8;Ue-Fgy0v=mI9_%=-KS;4nuS7hL8P|4bv2b=NFOsBz%1#$;w4ym zyR3owvtDbdkqidl7oHdhyrygrC~ho--rOGY)S*7;wyI=#5prEvh8MD)-<2lO9+XOp z;VKiS^h-FA_VN`+{HvRCzt;sdZIf=_1i0RUj7QrCc4i49)!LAT0Nl%SPbio-*;;Bx zA!r55q82<*5ZBIh3VfDa#vE)fb9Ih^8`eQ^vWD;-9(Ib|Bstg|*;fb8sl?ehXJI6g z9 zib8qSYv}pZYO)UtQw0WIF`;j;!)?$Vuj4c8x?^zhWYc z#Bj(jEXw2vi{oouijknA6hQJcR5^-w?eZi`wfe zhtASXU|NPxJ{VvEn05Y%l#??}*}1!GoWHvBqaBOHs25jrJ!IN@DK2ZpM;Z<*^vXm-hi>;-eHvq9`3;b z_$bdXPyeKG15V&MZZr zyv`L(kpAMMO3rb3cKb9N+Y@0v0iH+I3Oq?BBLR16fS*^-w1ZNc84kubTug7GOQP7( z=IcwvU3l<@NX!y5?EOY+51+~R^spviT(I#8($;RhaUs6BdfV?pSDNIh>XxqK{KPHRJSf-v~g@h=J+?p!6S&CU$%! z*7WAbc$Kmtkwz`ElvJMXVu7Fl>oX}bwg;$WK$P(MISMKq8)GlDWoV_o{Bt zJ=MJx7oCcV)?brsj478=nHv6zvr>0e*Go6AY|AOk@v~F)5$%#_M`d(E;4rK-tTe!Z zZV|27%KPPY+%Ky`zhx|IO+YeXwgFWWRUy?&K5r9DuT9PMc2iPqR-ilwmkgIC1}c+Vi-5t-U&Iha>^T4fR5D-)+v# zJ<*-qoy&dkaByX@{VtkeR~roz69?4Q1!~XegmjigEfaLcfe96O1$eKQe|L1X4z>=W z2GN3h(SlIHY^XsbU^cWMTyQ>W&;xKjS`a4q4Qdbt_zhYR0eBNNh#rg$Aa5gfltDh1 zIzlMzX5%JJgf7jC9q@TT-X&YXT*K*^u)n83hyMOI^(Ds#e*r>_#WWOViCs2V1hx*)jJkg1iMdi|D zCL-N(FsT+Y8f(F!eN$mMAEKgPvo~FBwFr}a9+f8Ix;_`Ycw6VPHWR7ucH3~VG#Y2c zWAxqQVtFi~&Fhe!-?lTK?bW6>2rDhbE@r_>N;{0>t7xa_%HZ>v Date: Tue, 25 Jun 2024 20:38:28 +0530 Subject: [PATCH 63/85] qtest: bios-tables-test: Rename aarch64 tests with aarch64 in them Existing AARCH64 virt test functions do not have AARCH64 in their name. To add RISC-V virt related test cases, better to rename existing functions to indicate they are ARM only. Signed-off-by: Sunil V L Reviewed-by: Alistair Francis Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-6-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test.c | 35 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index d1ff4db7a2..c4a4d1c7bf 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1570,7 +1570,7 @@ static void test_acpi_piix4_tcg_dimm_pxm(void) test_acpi_tcg_dimm_pxm(MACHINE_PC); } -static void test_acpi_virt_tcg_memhp(void) +static void test_acpi_aarch64_virt_tcg_memhp(void) { test_data data = { .machine = "virt", @@ -1663,7 +1663,7 @@ static void test_acpi_microvm_ioapic2_tcg(void) free_test_data(&data); } -static void test_acpi_virt_tcg_numamem(void) +static void test_acpi_aarch64_virt_tcg_numamem(void) { test_data data = { .machine = "virt", @@ -1685,7 +1685,7 @@ static void test_acpi_virt_tcg_numamem(void) } -static void test_acpi_virt_tcg_pxb(void) +static void test_acpi_aarch64_virt_tcg_pxb(void) { test_data data = { .machine = "virt", @@ -1758,7 +1758,7 @@ static void test_acpi_piix4_tcg_acpi_hmat(void) test_acpi_tcg_acpi_hmat(MACHINE_PC); } -static void test_acpi_virt_tcg_acpi_hmat(void) +static void test_acpi_aarch64_virt_tcg_acpi_hmat(void) { test_data data = { .machine = "virt", @@ -1914,7 +1914,7 @@ static void test_acpi_microvm_acpi_erst(void) } #endif /* CONFIG_POSIX */ -static void test_acpi_virt_tcg(void) +static void test_acpi_aarch64_virt_tcg(void) { test_data data = { .machine = "virt", @@ -1933,7 +1933,7 @@ static void test_acpi_virt_tcg(void) free_test_data(&data); } -static void test_acpi_virt_tcg_topology(void) +static void test_acpi_aarch64_virt_tcg_topology(void) { test_data data = { .machine = "virt", @@ -2016,7 +2016,7 @@ static void test_acpi_q35_cxl(void) } #endif /* CONFIG_POSIX */ -static void test_acpi_virt_viot(void) +static void test_acpi_aarch64_virt_viot(void) { test_data data = { .machine = "virt", @@ -2192,7 +2192,7 @@ static void test_acpi_microvm_oem_fields(void) g_free(args); } -static void test_acpi_virt_oem_fields(void) +static void test_acpi_aarch64_virt_oem_fields(void) { test_data data = { .machine = "virt", @@ -2364,16 +2364,19 @@ int main(int argc, char *argv[]) } } else if (strcmp(arch, "aarch64") == 0) { if (has_tcg && qtest_has_device("virtio-blk-pci")) { - qtest_add_func("acpi/virt", test_acpi_virt_tcg); + qtest_add_func("acpi/virt", test_acpi_aarch64_virt_tcg); qtest_add_func("acpi/virt/acpihmatvirt", - test_acpi_virt_tcg_acpi_hmat); - qtest_add_func("acpi/virt/topology", test_acpi_virt_tcg_topology); - qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); - qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); - qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); - qtest_add_func("acpi/virt/oem-fields", test_acpi_virt_oem_fields); + test_acpi_aarch64_virt_tcg_acpi_hmat); + qtest_add_func("acpi/virt/topology", + test_acpi_aarch64_virt_tcg_topology); + qtest_add_func("acpi/virt/numamem", + test_acpi_aarch64_virt_tcg_numamem); + qtest_add_func("acpi/virt/memhp", test_acpi_aarch64_virt_tcg_memhp); + qtest_add_func("acpi/virt/pxb", test_acpi_aarch64_virt_tcg_pxb); + qtest_add_func("acpi/virt/oem-fields", + test_acpi_aarch64_virt_oem_fields); if (qtest_has_device("virtio-iommu-pci")) { - qtest_add_func("acpi/virt/viot", test_acpi_virt_viot); + qtest_add_func("acpi/virt/viot", test_acpi_aarch64_virt_viot); } } } From c9ad3decca13cd0f01ef16dba7a3d44abb096964 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:29 +0530 Subject: [PATCH 64/85] tests/qtest/bios-tables-test.c: Add support for arch in path Since machine name can be common for multiple architectures (ex: virt), add "arch" in the path to search for expected AML files. Since the AML files are still under old path, add support for searching with and without arch in the path. Signed-off-by: Sunil V L Acked-by: Alistair Francis Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-7-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index c4a4d1c7bf..29c52952f4 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -78,6 +78,7 @@ typedef struct { bool tcg_only; const char *machine; + const char *arch; const char *machine_param; const char *variant; const char *uefi_fl1; @@ -262,8 +263,19 @@ static void dump_aml_files(test_data *data, bool rebuild) g_assert(exp_sdt->aml); if (rebuild) { - aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, data->machine, + aml_file = g_strdup_printf("%s/%s/%s/%.4s%s", data_dir, + data->arch, data->machine, sdt->aml, ext); + + /* + * To keep test cases not failing before the DATA files are moved to + * ${arch}/${machine} folder, add this check as well. + */ + if (!g_file_test(aml_file, G_FILE_TEST_EXISTS)) { + aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, + data->machine, sdt->aml, ext); + } + if (!g_file_test(aml_file, G_FILE_TEST_EXISTS) && sdt->aml_len == exp_sdt->aml_len && !memcmp(sdt->aml, exp_sdt->aml, sdt->aml_len)) { @@ -398,8 +410,13 @@ static GArray *load_expected_aml(test_data *data) memset(&exp_sdt, 0, sizeof(exp_sdt)); try_again: - aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, data->machine, - sdt->aml, ext); + aml_file = g_strdup_printf("%s/%s/%s/%.4s%s", data_dir, data->arch, + data->machine, sdt->aml, ext); + if (!g_file_test(aml_file, G_FILE_TEST_EXISTS)) { + aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, data->machine, + sdt->aml, ext); + } + if (verbosity_level >= 2) { fprintf(stderr, "Looking for expected file '%s'\n", aml_file); } From 193e4b90d60a3a976ee7940d6e318ebab4db00e9 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:30 +0530 Subject: [PATCH 65/85] tests/qtest/bios-tables-test.c: Set "arch" for aarch64 tests To search for expected AML files under ${arch}/${machine} path, set this field for AARCH64 related test cases. Signed-off-by: Sunil V L Acked-by: Alistair Francis Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-8-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 29c52952f4..007c281c9a 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1591,6 +1591,7 @@ static void test_acpi_aarch64_virt_tcg_memhp(void) { test_data data = { .machine = "virt", + .arch = "aarch64", .tcg_only = true, .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", @@ -1684,6 +1685,7 @@ static void test_acpi_aarch64_virt_tcg_numamem(void) { test_data data = { .machine = "virt", + .arch = "aarch64", .tcg_only = true, .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", @@ -1706,6 +1708,7 @@ static void test_acpi_aarch64_virt_tcg_pxb(void) { test_data data = { .machine = "virt", + .arch = "aarch64", .tcg_only = true, .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", @@ -1779,6 +1782,7 @@ static void test_acpi_aarch64_virt_tcg_acpi_hmat(void) { test_data data = { .machine = "virt", + .arch = "aarch64", .tcg_only = true, .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", @@ -1935,6 +1939,7 @@ static void test_acpi_aarch64_virt_tcg(void) { test_data data = { .machine = "virt", + .arch = "aarch64", .tcg_only = true, .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", @@ -1954,6 +1959,7 @@ static void test_acpi_aarch64_virt_tcg_topology(void) { test_data data = { .machine = "virt", + .arch = "aarch64", .variant = ".topology", .tcg_only = true, .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", @@ -2037,6 +2043,7 @@ static void test_acpi_aarch64_virt_viot(void) { test_data data = { .machine = "virt", + .arch = "aarch64", .tcg_only = true, .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", @@ -2213,6 +2220,7 @@ static void test_acpi_aarch64_virt_oem_fields(void) { test_data data = { .machine = "virt", + .arch = "aarch64", .tcg_only = true, .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", From d488c66b13f6070996f493f94400397ff835ed05 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:31 +0530 Subject: [PATCH 66/85] tests/qtest/bios-tables-test.c: Set "arch" for x86 tests To search for expected AML files under ${arch}/${machine} path, set this field for X86 related test cases. Signed-off-by: Sunil V L Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-9-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test.c | 77 ++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 007c281c9a..f4c4704bab 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -933,6 +933,7 @@ static void test_acpi_piix4_tcg(void) * This is to make guest actually run. */ data.machine = MACHINE_PC; + data.arch = "x86"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); test_acpi_one(NULL, &data); @@ -944,6 +945,7 @@ static void test_acpi_piix4_tcg_bridge(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".bridge"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -981,6 +983,7 @@ static void test_acpi_piix4_no_root_hotplug(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".roothp"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -997,6 +1000,7 @@ static void test_acpi_piix4_no_bridge_hotplug(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".hpbridge"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -1013,6 +1017,7 @@ static void test_acpi_piix4_no_acpi_pci_hotplug(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".hpbrroot"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -1034,6 +1039,7 @@ static void test_acpi_q35_tcg(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); test_acpi_one(NULL, &data); @@ -1049,6 +1055,7 @@ static void test_acpi_q35_kvm_type4_count(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".type4-count", .required_struct_types = base_required_struct_types, .required_struct_types_len = ARRAY_SIZE(base_required_struct_types), @@ -1065,6 +1072,7 @@ static void test_acpi_q35_kvm_core_count(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".core-count", .required_struct_types = base_required_struct_types, .required_struct_types_len = ARRAY_SIZE(base_required_struct_types), @@ -1082,6 +1090,7 @@ static void test_acpi_q35_kvm_core_count2(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".core-count2", .required_struct_types = base_required_struct_types, .required_struct_types_len = ARRAY_SIZE(base_required_struct_types), @@ -1099,6 +1108,7 @@ static void test_acpi_q35_kvm_thread_count(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".thread-count", .required_struct_types = base_required_struct_types, .required_struct_types_len = ARRAY_SIZE(base_required_struct_types), @@ -1116,6 +1126,7 @@ static void test_acpi_q35_kvm_thread_count2(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".thread-count2", .required_struct_types = base_required_struct_types, .required_struct_types_len = ARRAY_SIZE(base_required_struct_types), @@ -1134,6 +1145,7 @@ static void test_acpi_q35_tcg_bridge(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".bridge"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -1148,6 +1160,7 @@ static void test_acpi_q35_tcg_no_acpi_hotplug(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".noacpihp"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -1176,6 +1189,7 @@ static void test_acpi_q35_multif_bridge(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".multi-bridge", }; test_vm_prepare("-S" @@ -1225,6 +1239,7 @@ static void test_acpi_q35_tcg_mmio64(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".mmio64", .tcg_only = true, .required_struct_types = base_required_struct_types, @@ -1245,6 +1260,7 @@ static void test_acpi_piix4_tcg_cphp(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".cphp"; test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6" " -object memory-backend-ram,id=ram0,size=64M" @@ -1260,6 +1276,7 @@ static void test_acpi_q35_tcg_cphp(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".cphp"; test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6" " -object memory-backend-ram,id=ram0,size=64M" @@ -1279,6 +1296,7 @@ static void test_acpi_q35_tcg_ipmi(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".ipmibt"; data.required_struct_types = ipmi_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types); @@ -1293,6 +1311,7 @@ static void test_acpi_q35_tcg_smbus_ipmi(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".ipmismbus"; data.required_struct_types = ipmi_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types); @@ -1310,6 +1329,7 @@ static void test_acpi_piix4_tcg_ipmi(void) * This is to make guest actually run. */ data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".ipmikcs"; data.required_struct_types = ipmi_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types); @@ -1324,6 +1344,7 @@ static void test_acpi_q35_tcg_memhp(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".memhp"; test_acpi_one(" -m 128,slots=3,maxmem=1G" " -object memory-backend-ram,id=ram0,size=64M" @@ -1339,6 +1360,7 @@ static void test_acpi_piix4_tcg_memhp(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".memhp"; test_acpi_one(" -m 128,slots=3,maxmem=1G" " -object memory-backend-ram,id=ram0,size=64M" @@ -1354,6 +1376,7 @@ static void test_acpi_piix4_tcg_nosmm(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".nosmm"; test_acpi_one("-machine smm=off", &data); free_test_data(&data); @@ -1364,6 +1387,7 @@ static void test_acpi_piix4_tcg_smm_compat(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".smm-compat"; test_acpi_one("-global PIIX4_PM.smm-compat=on", &data); free_test_data(&data); @@ -1374,6 +1398,7 @@ static void test_acpi_piix4_tcg_smm_compat_nosmm(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".smm-compat-nosmm"; test_acpi_one("-global PIIX4_PM.smm-compat=on -machine smm=off", &data); free_test_data(&data); @@ -1384,6 +1409,7 @@ static void test_acpi_piix4_tcg_nohpet(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.machine_param = ",hpet=off"; data.variant = ".nohpet"; test_acpi_one(NULL, &data); @@ -1395,6 +1421,7 @@ static void test_acpi_q35_tcg_numamem(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".numamem"; test_acpi_one(" -object memory-backend-ram,id=ram0,size=128M" " -numa node -numa node,memdev=ram0", &data); @@ -1406,6 +1433,7 @@ static void test_acpi_q35_kvm_xapic(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".xapic"; test_acpi_one(" -object memory-backend-ram,id=ram0,size=128M" " -numa node -numa node,memdev=ram0" @@ -1418,6 +1446,7 @@ static void test_acpi_q35_tcg_nosmm(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".nosmm"; test_acpi_one("-machine smm=off", &data); free_test_data(&data); @@ -1428,6 +1457,7 @@ static void test_acpi_q35_tcg_smm_compat(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".smm-compat"; test_acpi_one("-global ICH9-LPC.smm-compat=on", &data); free_test_data(&data); @@ -1438,6 +1468,7 @@ static void test_acpi_q35_tcg_smm_compat_nosmm(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".smm-compat-nosmm"; test_acpi_one("-global ICH9-LPC.smm-compat=on -machine smm=off", &data); free_test_data(&data); @@ -1448,6 +1479,7 @@ static void test_acpi_q35_tcg_nohpet(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.machine_param = ",hpet=off"; data.variant = ".nohpet"; test_acpi_one(NULL, &data); @@ -1459,6 +1491,7 @@ static void test_acpi_q35_kvm_dmar(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".dmar"; test_acpi_one("-machine kernel-irqchip=split -accel kvm" " -device intel-iommu,intremap=on,device-iotlb=on", &data); @@ -1470,6 +1503,7 @@ static void test_acpi_q35_tcg_ivrs(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86", data.variant = ".ivrs"; data.tcg_only = true, test_acpi_one(" -device amd-iommu", &data); @@ -1481,6 +1515,7 @@ static void test_acpi_piix4_tcg_numamem(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.variant = ".numamem"; test_acpi_one(" -object memory-backend-ram,id=ram0,size=128M" " -numa node -numa node,memdev=ram0", &data); @@ -1489,8 +1524,9 @@ static void test_acpi_piix4_tcg_numamem(void) uint64_t tpm_tis_base_addr; -static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if, - uint64_t base, enum TPMVersion tpm_version) +static void test_acpi_tcg_tpm(const char *machine, const char *arch, + const char *tpm_if, uint64_t base, + enum TPMVersion tpm_version) { gchar *tmp_dir_name = g_strdup_printf("qemu-test_acpi_%s_tcg_%s.XXXXXX", machine, tpm_if); @@ -1517,6 +1553,7 @@ static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if, tpm_emu_test_wait_cond(&test); data.machine = machine; + data.arch = arch; data.variant = variant; args = g_strdup_printf( @@ -1540,19 +1577,20 @@ static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if, static void test_acpi_q35_tcg_tpm2_tis(void) { - test_acpi_tcg_tpm("q35", "tis", 0xFED40000, TPM_VERSION_2_0); + test_acpi_tcg_tpm("q35", "x86", "tis", 0xFED40000, TPM_VERSION_2_0); } static void test_acpi_q35_tcg_tpm12_tis(void) { - test_acpi_tcg_tpm("q35", "tis", 0xFED40000, TPM_VERSION_1_2); + test_acpi_tcg_tpm("q35", "x86", "tis", 0xFED40000, TPM_VERSION_1_2); } -static void test_acpi_tcg_dimm_pxm(const char *machine) +static void test_acpi_tcg_dimm_pxm(const char *machine, const char *arch) { test_data data = {}; data.machine = machine; + data.arch = arch; data.variant = ".dimmpxm"; test_acpi_one(" -machine nvdimm=on,nvdimm-persistence=cpu" " -smp 4,sockets=4" @@ -1579,12 +1617,12 @@ static void test_acpi_tcg_dimm_pxm(const char *machine) static void test_acpi_q35_tcg_dimm_pxm(void) { - test_acpi_tcg_dimm_pxm(MACHINE_Q35); + test_acpi_tcg_dimm_pxm(MACHINE_Q35, "x86"); } static void test_acpi_piix4_tcg_dimm_pxm(void) { - test_acpi_tcg_dimm_pxm(MACHINE_PC); + test_acpi_tcg_dimm_pxm(MACHINE_PC, "x86"); } static void test_acpi_aarch64_virt_tcg_memhp(void) @@ -1621,6 +1659,7 @@ static void test_acpi_aarch64_virt_tcg_memhp(void) static void test_acpi_microvm_prepare(test_data *data) { data->machine = "microvm"; + data->arch = "x86"; data->required_struct_types = NULL; /* no smbios */ data->required_struct_types_len = 0; data->blkdev = "virtio-blk-device"; @@ -1737,11 +1776,12 @@ static void test_acpi_aarch64_virt_tcg_pxb(void) free_test_data(&data); } -static void test_acpi_tcg_acpi_hmat(const char *machine) +static void test_acpi_tcg_acpi_hmat(const char *machine, const char *arch) { test_data data = {}; data.machine = machine; + data.arch = arch; data.variant = ".acpihmat"; test_acpi_one(" -machine hmat=on" " -smp 2,sockets=2" @@ -1770,12 +1810,12 @@ static void test_acpi_tcg_acpi_hmat(const char *machine) static void test_acpi_q35_tcg_acpi_hmat(void) { - test_acpi_tcg_acpi_hmat(MACHINE_Q35); + test_acpi_tcg_acpi_hmat(MACHINE_Q35, "x86"); } static void test_acpi_piix4_tcg_acpi_hmat(void) { - test_acpi_tcg_acpi_hmat(MACHINE_PC); + test_acpi_tcg_acpi_hmat(MACHINE_PC, "x86"); } static void test_acpi_aarch64_virt_tcg_acpi_hmat(void) @@ -1841,6 +1881,7 @@ static void test_acpi_q35_tcg_acpi_hmat_noinitiator(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86"; data.variant = ".acpihmat-noinitiator"; test_acpi_one(" -machine hmat=on" " -smp 4,sockets=2" @@ -1884,13 +1925,14 @@ static void test_acpi_q35_tcg_acpi_hmat_noinitiator(void) } #ifdef CONFIG_POSIX -static void test_acpi_erst(const char *machine) +static void test_acpi_erst(const char *machine, const char *arch) { gchar *tmp_path = g_dir_make_tmp("qemu-test-erst.XXXXXX", NULL); gchar *params; test_data data = {}; data.machine = machine; + data.arch = arch; data.variant = ".acpierst"; params = g_strdup_printf( " -object memory-backend-file,id=erstnvram," @@ -1905,12 +1947,12 @@ static void test_acpi_erst(const char *machine) static void test_acpi_piix4_acpi_erst(void) { - test_acpi_erst(MACHINE_PC); + test_acpi_erst(MACHINE_PC, "x86"); } static void test_acpi_q35_acpi_erst(void) { - test_acpi_erst(MACHINE_Q35); + test_acpi_erst(MACHINE_Q35, "x86"); } static void test_acpi_microvm_acpi_erst(void) @@ -1978,6 +2020,7 @@ static void test_acpi_q35_viot(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".viot", }; @@ -2002,6 +2045,7 @@ static void test_acpi_q35_cxl(void) test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".cxl", }; /* @@ -2067,6 +2111,7 @@ static void test_acpi_q35_slic(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".slic", }; @@ -2081,6 +2126,7 @@ static void test_acpi_q35_applesmc(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".applesmc", }; @@ -2094,6 +2140,7 @@ static void test_acpi_q35_pvpanic_isa(void) { test_data data = { .machine = MACHINE_Q35, + .arch = "x86", .variant = ".pvpanic-isa", }; @@ -2106,6 +2153,7 @@ static void test_acpi_pc_smbios_options(void) uint8_t req_type11[] = { 11 }; test_data data = { .machine = MACHINE_PC, + .arch = "x86", .variant = ".pc_smbios_options", .required_struct_types = req_type11, .required_struct_types_len = ARRAY_SIZE(req_type11), @@ -2120,6 +2168,7 @@ static void test_acpi_pc_smbios_blob(void) uint8_t req_type11[] = { 11 }; test_data data = { .machine = MACHINE_PC, + .arch = "x86", .variant = ".pc_smbios_blob", .required_struct_types = req_type11, .required_struct_types_len = ARRAY_SIZE(req_type11), @@ -2169,6 +2218,7 @@ static void test_acpi_piix4_oem_fields(void) test_data data = {}; data.machine = MACHINE_PC; + data.arch = "x86"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -2187,6 +2237,7 @@ static void test_acpi_q35_oem_fields(void) test_data data = {}; data.machine = MACHINE_Q35; + data.arch = "x86"; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); From 7c08eefcaf6588b80bd8ce027fab748db3c53f11 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:32 +0530 Subject: [PATCH 67/85] tests/data/acpi: Move x86 ACPI tables under x86/${machine} path To support multiple architectures using same machine name, create x86 folder and move all x86 related AML files for each machine type inside. Signed-off-by: Sunil V L Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-10-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/{ => x86}/microvm/APIC | Bin tests/data/acpi/{ => x86}/microvm/APIC.ioapic2 | Bin tests/data/acpi/{ => x86}/microvm/APIC.pcie | Bin tests/data/acpi/{ => x86}/microvm/DSDT | Bin tests/data/acpi/{ => x86}/microvm/DSDT.ioapic2 | Bin tests/data/acpi/{ => x86}/microvm/DSDT.pcie | Bin tests/data/acpi/{ => x86}/microvm/DSDT.rtc | Bin tests/data/acpi/{ => x86}/microvm/DSDT.usb | Bin tests/data/acpi/{ => x86}/microvm/ERST.pcie | Bin tests/data/acpi/{ => x86}/microvm/FACP | Bin tests/data/acpi/{ => x86}/pc/APIC | Bin tests/data/acpi/{ => x86}/pc/APIC.acpihmat | Bin tests/data/acpi/{ => x86}/pc/APIC.cphp | Bin tests/data/acpi/{ => x86}/pc/APIC.dimmpxm | Bin tests/data/acpi/{ => x86}/pc/DSDT | Bin tests/data/acpi/{ => x86}/pc/DSDT.acpierst | Bin tests/data/acpi/{ => x86}/pc/DSDT.acpihmat | Bin tests/data/acpi/{ => x86}/pc/DSDT.bridge | Bin tests/data/acpi/{ => x86}/pc/DSDT.cphp | Bin tests/data/acpi/{ => x86}/pc/DSDT.dimmpxm | Bin tests/data/acpi/{ => x86}/pc/DSDT.hpbridge | Bin tests/data/acpi/{ => x86}/pc/DSDT.hpbrroot | Bin tests/data/acpi/{ => x86}/pc/DSDT.ipmikcs | Bin tests/data/acpi/{ => x86}/pc/DSDT.memhp | Bin tests/data/acpi/{ => x86}/pc/DSDT.nohpet | Bin tests/data/acpi/{ => x86}/pc/DSDT.numamem | Bin tests/data/acpi/{ => x86}/pc/DSDT.roothp | Bin tests/data/acpi/{ => x86}/pc/ERST.acpierst | Bin tests/data/acpi/{ => x86}/pc/FACP | Bin tests/data/acpi/{ => x86}/pc/FACP.nosmm | Bin tests/data/acpi/{ => x86}/pc/FACS | Bin tests/data/acpi/{ => x86}/pc/HMAT.acpihmat | Bin tests/data/acpi/{ => x86}/pc/HPET | Bin tests/data/acpi/{ => x86}/pc/NFIT.dimmpxm | Bin tests/data/acpi/{ => x86}/pc/SLIT.cphp | Bin tests/data/acpi/{ => x86}/pc/SLIT.memhp | Bin tests/data/acpi/{ => x86}/pc/SRAT.acpihmat | Bin tests/data/acpi/{ => x86}/pc/SRAT.cphp | Bin tests/data/acpi/{ => x86}/pc/SRAT.dimmpxm | Bin tests/data/acpi/{ => x86}/pc/SRAT.memhp | Bin tests/data/acpi/{ => x86}/pc/SRAT.numamem | Bin tests/data/acpi/{ => x86}/pc/SSDT.dimmpxm | Bin tests/data/acpi/{ => x86}/pc/WAET | Bin tests/data/acpi/{ => x86}/q35/APIC | Bin tests/data/acpi/{ => x86}/q35/APIC.acpihmat | Bin .../acpi/{ => x86}/q35/APIC.acpihmat-noinitiator | Bin tests/data/acpi/{ => x86}/q35/APIC.core-count | Bin tests/data/acpi/{ => x86}/q35/APIC.core-count2 | Bin tests/data/acpi/{ => x86}/q35/APIC.cphp | Bin tests/data/acpi/{ => x86}/q35/APIC.dimmpxm | Bin tests/data/acpi/{ => x86}/q35/APIC.thread-count | Bin tests/data/acpi/{ => x86}/q35/APIC.thread-count2 | Bin tests/data/acpi/{ => x86}/q35/APIC.type4-count | Bin tests/data/acpi/{ => x86}/q35/APIC.xapic | Bin tests/data/acpi/{ => x86}/q35/CEDT.cxl | Bin tests/data/acpi/{ => x86}/q35/DMAR.dmar | Bin tests/data/acpi/{ => x86}/q35/DSDT | Bin tests/data/acpi/{ => x86}/q35/DSDT.acpierst | Bin tests/data/acpi/{ => x86}/q35/DSDT.acpihmat | Bin .../acpi/{ => x86}/q35/DSDT.acpihmat-noinitiator | Bin tests/data/acpi/{ => x86}/q35/DSDT.applesmc | Bin tests/data/acpi/{ => x86}/q35/DSDT.bridge | Bin tests/data/acpi/{ => x86}/q35/DSDT.core-count | Bin tests/data/acpi/{ => x86}/q35/DSDT.core-count2 | Bin tests/data/acpi/{ => x86}/q35/DSDT.cphp | Bin tests/data/acpi/{ => x86}/q35/DSDT.cxl | Bin tests/data/acpi/{ => x86}/q35/DSDT.dimmpxm | Bin tests/data/acpi/{ => x86}/q35/DSDT.ipmibt | Bin tests/data/acpi/{ => x86}/q35/DSDT.ipmismbus | Bin tests/data/acpi/{ => x86}/q35/DSDT.ivrs | Bin tests/data/acpi/{ => x86}/q35/DSDT.memhp | Bin tests/data/acpi/{ => x86}/q35/DSDT.mmio64 | Bin tests/data/acpi/{ => x86}/q35/DSDT.multi-bridge | Bin tests/data/acpi/{ => x86}/q35/DSDT.noacpihp | Bin tests/data/acpi/{ => x86}/q35/DSDT.nohpet | Bin tests/data/acpi/{ => x86}/q35/DSDT.numamem | Bin tests/data/acpi/{ => x86}/q35/DSDT.pvpanic-isa | Bin tests/data/acpi/{ => x86}/q35/DSDT.thread-count | Bin tests/data/acpi/{ => x86}/q35/DSDT.thread-count2 | Bin tests/data/acpi/{ => x86}/q35/DSDT.tis.tpm12 | Bin tests/data/acpi/{ => x86}/q35/DSDT.tis.tpm2 | Bin tests/data/acpi/{ => x86}/q35/DSDT.type4-count | Bin tests/data/acpi/{ => x86}/q35/DSDT.viot | Bin tests/data/acpi/{ => x86}/q35/DSDT.xapic | Bin tests/data/acpi/{ => x86}/q35/ERST.acpierst | Bin tests/data/acpi/{ => x86}/q35/FACP | Bin tests/data/acpi/{ => x86}/q35/FACP.core-count | Bin tests/data/acpi/{ => x86}/q35/FACP.core-count2 | Bin tests/data/acpi/{ => x86}/q35/FACP.nosmm | Bin tests/data/acpi/{ => x86}/q35/FACP.slic | Bin tests/data/acpi/{ => x86}/q35/FACP.thread-count | Bin tests/data/acpi/{ => x86}/q35/FACP.thread-count2 | Bin tests/data/acpi/{ => x86}/q35/FACP.type4-count | Bin tests/data/acpi/{ => x86}/q35/FACP.xapic | Bin tests/data/acpi/{ => x86}/q35/FACS | Bin tests/data/acpi/{ => x86}/q35/HMAT.acpihmat | Bin .../acpi/{ => x86}/q35/HMAT.acpihmat-noinitiator | Bin tests/data/acpi/{ => x86}/q35/HPET | Bin tests/data/acpi/{ => x86}/q35/IVRS.ivrs | Bin tests/data/acpi/{ => x86}/q35/MCFG | Bin tests/data/acpi/{ => x86}/q35/NFIT.dimmpxm | Bin tests/data/acpi/{ => x86}/q35/SLIC.slic | Bin tests/data/acpi/{ => x86}/q35/SLIT.cphp | Bin tests/data/acpi/{ => x86}/q35/SLIT.memhp | Bin tests/data/acpi/{ => x86}/q35/SRAT.acpihmat | Bin .../acpi/{ => x86}/q35/SRAT.acpihmat-noinitiator | Bin tests/data/acpi/{ => x86}/q35/SRAT.cphp | Bin tests/data/acpi/{ => x86}/q35/SRAT.dimmpxm | Bin tests/data/acpi/{ => x86}/q35/SRAT.memhp | Bin tests/data/acpi/{ => x86}/q35/SRAT.mmio64 | Bin tests/data/acpi/{ => x86}/q35/SRAT.numamem | Bin tests/data/acpi/{ => x86}/q35/SRAT.xapic | Bin tests/data/acpi/{ => x86}/q35/SSDT.dimmpxm | Bin tests/data/acpi/{ => x86}/q35/TCPA.tis.tpm12 | Bin tests/data/acpi/{ => x86}/q35/TPM2.tis.tpm2 | Bin tests/data/acpi/{ => x86}/q35/VIOT.viot | Bin tests/data/acpi/{ => x86}/q35/WAET | Bin 117 files changed, 0 insertions(+), 0 deletions(-) rename tests/data/acpi/{ => x86}/microvm/APIC (100%) rename tests/data/acpi/{ => x86}/microvm/APIC.ioapic2 (100%) rename tests/data/acpi/{ => x86}/microvm/APIC.pcie (100%) rename tests/data/acpi/{ => x86}/microvm/DSDT (100%) rename tests/data/acpi/{ => x86}/microvm/DSDT.ioapic2 (100%) rename tests/data/acpi/{ => x86}/microvm/DSDT.pcie (100%) rename tests/data/acpi/{ => x86}/microvm/DSDT.rtc (100%) rename tests/data/acpi/{ => x86}/microvm/DSDT.usb (100%) rename tests/data/acpi/{ => x86}/microvm/ERST.pcie (100%) rename tests/data/acpi/{ => x86}/microvm/FACP (100%) rename tests/data/acpi/{ => x86}/pc/APIC (100%) rename tests/data/acpi/{ => x86}/pc/APIC.acpihmat (100%) rename tests/data/acpi/{ => x86}/pc/APIC.cphp (100%) rename tests/data/acpi/{ => x86}/pc/APIC.dimmpxm (100%) rename tests/data/acpi/{ => x86}/pc/DSDT (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.acpierst (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.acpihmat (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.bridge (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.cphp (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.dimmpxm (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.hpbridge (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.hpbrroot (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.ipmikcs (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.memhp (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.nohpet (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.numamem (100%) rename tests/data/acpi/{ => x86}/pc/DSDT.roothp (100%) rename tests/data/acpi/{ => x86}/pc/ERST.acpierst (100%) rename tests/data/acpi/{ => x86}/pc/FACP (100%) rename tests/data/acpi/{ => x86}/pc/FACP.nosmm (100%) rename tests/data/acpi/{ => x86}/pc/FACS (100%) rename tests/data/acpi/{ => x86}/pc/HMAT.acpihmat (100%) rename tests/data/acpi/{ => x86}/pc/HPET (100%) rename tests/data/acpi/{ => x86}/pc/NFIT.dimmpxm (100%) rename tests/data/acpi/{ => x86}/pc/SLIT.cphp (100%) rename tests/data/acpi/{ => x86}/pc/SLIT.memhp (100%) rename tests/data/acpi/{ => x86}/pc/SRAT.acpihmat (100%) rename tests/data/acpi/{ => x86}/pc/SRAT.cphp (100%) rename tests/data/acpi/{ => x86}/pc/SRAT.dimmpxm (100%) rename tests/data/acpi/{ => x86}/pc/SRAT.memhp (100%) rename tests/data/acpi/{ => x86}/pc/SRAT.numamem (100%) rename tests/data/acpi/{ => x86}/pc/SSDT.dimmpxm (100%) rename tests/data/acpi/{ => x86}/pc/WAET (100%) rename tests/data/acpi/{ => x86}/q35/APIC (100%) rename tests/data/acpi/{ => x86}/q35/APIC.acpihmat (100%) rename tests/data/acpi/{ => x86}/q35/APIC.acpihmat-noinitiator (100%) rename tests/data/acpi/{ => x86}/q35/APIC.core-count (100%) rename tests/data/acpi/{ => x86}/q35/APIC.core-count2 (100%) rename tests/data/acpi/{ => x86}/q35/APIC.cphp (100%) rename tests/data/acpi/{ => x86}/q35/APIC.dimmpxm (100%) rename tests/data/acpi/{ => x86}/q35/APIC.thread-count (100%) rename tests/data/acpi/{ => x86}/q35/APIC.thread-count2 (100%) rename tests/data/acpi/{ => x86}/q35/APIC.type4-count (100%) rename tests/data/acpi/{ => x86}/q35/APIC.xapic (100%) rename tests/data/acpi/{ => x86}/q35/CEDT.cxl (100%) rename tests/data/acpi/{ => x86}/q35/DMAR.dmar (100%) rename tests/data/acpi/{ => x86}/q35/DSDT (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.acpierst (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.acpihmat (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.acpihmat-noinitiator (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.applesmc (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.bridge (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.core-count (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.core-count2 (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.cphp (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.cxl (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.dimmpxm (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.ipmibt (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.ipmismbus (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.ivrs (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.memhp (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.mmio64 (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.multi-bridge (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.noacpihp (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.nohpet (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.numamem (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.pvpanic-isa (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.thread-count (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.thread-count2 (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.tis.tpm12 (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.tis.tpm2 (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.type4-count (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.viot (100%) rename tests/data/acpi/{ => x86}/q35/DSDT.xapic (100%) rename tests/data/acpi/{ => x86}/q35/ERST.acpierst (100%) rename tests/data/acpi/{ => x86}/q35/FACP (100%) rename tests/data/acpi/{ => x86}/q35/FACP.core-count (100%) rename tests/data/acpi/{ => x86}/q35/FACP.core-count2 (100%) rename tests/data/acpi/{ => x86}/q35/FACP.nosmm (100%) rename tests/data/acpi/{ => x86}/q35/FACP.slic (100%) rename tests/data/acpi/{ => x86}/q35/FACP.thread-count (100%) rename tests/data/acpi/{ => x86}/q35/FACP.thread-count2 (100%) rename tests/data/acpi/{ => x86}/q35/FACP.type4-count (100%) rename tests/data/acpi/{ => x86}/q35/FACP.xapic (100%) rename tests/data/acpi/{ => x86}/q35/FACS (100%) rename tests/data/acpi/{ => x86}/q35/HMAT.acpihmat (100%) rename tests/data/acpi/{ => x86}/q35/HMAT.acpihmat-noinitiator (100%) rename tests/data/acpi/{ => x86}/q35/HPET (100%) rename tests/data/acpi/{ => x86}/q35/IVRS.ivrs (100%) rename tests/data/acpi/{ => x86}/q35/MCFG (100%) rename tests/data/acpi/{ => x86}/q35/NFIT.dimmpxm (100%) rename tests/data/acpi/{ => x86}/q35/SLIC.slic (100%) rename tests/data/acpi/{ => x86}/q35/SLIT.cphp (100%) rename tests/data/acpi/{ => x86}/q35/SLIT.memhp (100%) rename tests/data/acpi/{ => x86}/q35/SRAT.acpihmat (100%) rename tests/data/acpi/{ => x86}/q35/SRAT.acpihmat-noinitiator (100%) rename tests/data/acpi/{ => x86}/q35/SRAT.cphp (100%) rename tests/data/acpi/{ => x86}/q35/SRAT.dimmpxm (100%) rename tests/data/acpi/{ => x86}/q35/SRAT.memhp (100%) rename tests/data/acpi/{ => x86}/q35/SRAT.mmio64 (100%) rename tests/data/acpi/{ => x86}/q35/SRAT.numamem (100%) rename tests/data/acpi/{ => x86}/q35/SRAT.xapic (100%) rename tests/data/acpi/{ => x86}/q35/SSDT.dimmpxm (100%) rename tests/data/acpi/{ => x86}/q35/TCPA.tis.tpm12 (100%) rename tests/data/acpi/{ => x86}/q35/TPM2.tis.tpm2 (100%) rename tests/data/acpi/{ => x86}/q35/VIOT.viot (100%) rename tests/data/acpi/{ => x86}/q35/WAET (100%) diff --git a/tests/data/acpi/microvm/APIC b/tests/data/acpi/x86/microvm/APIC similarity index 100% rename from tests/data/acpi/microvm/APIC rename to tests/data/acpi/x86/microvm/APIC diff --git a/tests/data/acpi/microvm/APIC.ioapic2 b/tests/data/acpi/x86/microvm/APIC.ioapic2 similarity index 100% rename from tests/data/acpi/microvm/APIC.ioapic2 rename to tests/data/acpi/x86/microvm/APIC.ioapic2 diff --git a/tests/data/acpi/microvm/APIC.pcie b/tests/data/acpi/x86/microvm/APIC.pcie similarity index 100% rename from tests/data/acpi/microvm/APIC.pcie rename to tests/data/acpi/x86/microvm/APIC.pcie diff --git a/tests/data/acpi/microvm/DSDT b/tests/data/acpi/x86/microvm/DSDT similarity index 100% rename from tests/data/acpi/microvm/DSDT rename to tests/data/acpi/x86/microvm/DSDT diff --git a/tests/data/acpi/microvm/DSDT.ioapic2 b/tests/data/acpi/x86/microvm/DSDT.ioapic2 similarity index 100% rename from tests/data/acpi/microvm/DSDT.ioapic2 rename to tests/data/acpi/x86/microvm/DSDT.ioapic2 diff --git a/tests/data/acpi/microvm/DSDT.pcie b/tests/data/acpi/x86/microvm/DSDT.pcie similarity index 100% rename from tests/data/acpi/microvm/DSDT.pcie rename to tests/data/acpi/x86/microvm/DSDT.pcie diff --git a/tests/data/acpi/microvm/DSDT.rtc b/tests/data/acpi/x86/microvm/DSDT.rtc similarity index 100% rename from tests/data/acpi/microvm/DSDT.rtc rename to tests/data/acpi/x86/microvm/DSDT.rtc diff --git a/tests/data/acpi/microvm/DSDT.usb b/tests/data/acpi/x86/microvm/DSDT.usb similarity index 100% rename from tests/data/acpi/microvm/DSDT.usb rename to tests/data/acpi/x86/microvm/DSDT.usb diff --git a/tests/data/acpi/microvm/ERST.pcie b/tests/data/acpi/x86/microvm/ERST.pcie similarity index 100% rename from tests/data/acpi/microvm/ERST.pcie rename to tests/data/acpi/x86/microvm/ERST.pcie diff --git a/tests/data/acpi/microvm/FACP b/tests/data/acpi/x86/microvm/FACP similarity index 100% rename from tests/data/acpi/microvm/FACP rename to tests/data/acpi/x86/microvm/FACP diff --git a/tests/data/acpi/pc/APIC b/tests/data/acpi/x86/pc/APIC similarity index 100% rename from tests/data/acpi/pc/APIC rename to tests/data/acpi/x86/pc/APIC diff --git a/tests/data/acpi/pc/APIC.acpihmat b/tests/data/acpi/x86/pc/APIC.acpihmat similarity index 100% rename from tests/data/acpi/pc/APIC.acpihmat rename to tests/data/acpi/x86/pc/APIC.acpihmat diff --git a/tests/data/acpi/pc/APIC.cphp b/tests/data/acpi/x86/pc/APIC.cphp similarity index 100% rename from tests/data/acpi/pc/APIC.cphp rename to tests/data/acpi/x86/pc/APIC.cphp diff --git a/tests/data/acpi/pc/APIC.dimmpxm b/tests/data/acpi/x86/pc/APIC.dimmpxm similarity index 100% rename from tests/data/acpi/pc/APIC.dimmpxm rename to tests/data/acpi/x86/pc/APIC.dimmpxm diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/x86/pc/DSDT similarity index 100% rename from tests/data/acpi/pc/DSDT rename to tests/data/acpi/x86/pc/DSDT diff --git a/tests/data/acpi/pc/DSDT.acpierst b/tests/data/acpi/x86/pc/DSDT.acpierst similarity index 100% rename from tests/data/acpi/pc/DSDT.acpierst rename to tests/data/acpi/x86/pc/DSDT.acpierst diff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/x86/pc/DSDT.acpihmat similarity index 100% rename from tests/data/acpi/pc/DSDT.acpihmat rename to tests/data/acpi/x86/pc/DSDT.acpihmat diff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/x86/pc/DSDT.bridge similarity index 100% rename from tests/data/acpi/pc/DSDT.bridge rename to tests/data/acpi/x86/pc/DSDT.bridge diff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/x86/pc/DSDT.cphp similarity index 100% rename from tests/data/acpi/pc/DSDT.cphp rename to tests/data/acpi/x86/pc/DSDT.cphp diff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/x86/pc/DSDT.dimmpxm similarity index 100% rename from tests/data/acpi/pc/DSDT.dimmpxm rename to tests/data/acpi/x86/pc/DSDT.dimmpxm diff --git a/tests/data/acpi/pc/DSDT.hpbridge b/tests/data/acpi/x86/pc/DSDT.hpbridge similarity index 100% rename from tests/data/acpi/pc/DSDT.hpbridge rename to tests/data/acpi/x86/pc/DSDT.hpbridge diff --git a/tests/data/acpi/pc/DSDT.hpbrroot b/tests/data/acpi/x86/pc/DSDT.hpbrroot similarity index 100% rename from tests/data/acpi/pc/DSDT.hpbrroot rename to tests/data/acpi/x86/pc/DSDT.hpbrroot diff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/x86/pc/DSDT.ipmikcs similarity index 100% rename from tests/data/acpi/pc/DSDT.ipmikcs rename to tests/data/acpi/x86/pc/DSDT.ipmikcs diff --git a/tests/data/acpi/pc/DSDT.memhp b/tests/data/acpi/x86/pc/DSDT.memhp similarity index 100% rename from tests/data/acpi/pc/DSDT.memhp rename to tests/data/acpi/x86/pc/DSDT.memhp diff --git a/tests/data/acpi/pc/DSDT.nohpet b/tests/data/acpi/x86/pc/DSDT.nohpet similarity index 100% rename from tests/data/acpi/pc/DSDT.nohpet rename to tests/data/acpi/x86/pc/DSDT.nohpet diff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/x86/pc/DSDT.numamem similarity index 100% rename from tests/data/acpi/pc/DSDT.numamem rename to tests/data/acpi/x86/pc/DSDT.numamem diff --git a/tests/data/acpi/pc/DSDT.roothp b/tests/data/acpi/x86/pc/DSDT.roothp similarity index 100% rename from tests/data/acpi/pc/DSDT.roothp rename to tests/data/acpi/x86/pc/DSDT.roothp diff --git a/tests/data/acpi/pc/ERST.acpierst b/tests/data/acpi/x86/pc/ERST.acpierst similarity index 100% rename from tests/data/acpi/pc/ERST.acpierst rename to tests/data/acpi/x86/pc/ERST.acpierst diff --git a/tests/data/acpi/pc/FACP b/tests/data/acpi/x86/pc/FACP similarity index 100% rename from tests/data/acpi/pc/FACP rename to tests/data/acpi/x86/pc/FACP diff --git a/tests/data/acpi/pc/FACP.nosmm b/tests/data/acpi/x86/pc/FACP.nosmm similarity index 100% rename from tests/data/acpi/pc/FACP.nosmm rename to tests/data/acpi/x86/pc/FACP.nosmm diff --git a/tests/data/acpi/pc/FACS b/tests/data/acpi/x86/pc/FACS similarity index 100% rename from tests/data/acpi/pc/FACS rename to tests/data/acpi/x86/pc/FACS diff --git a/tests/data/acpi/pc/HMAT.acpihmat b/tests/data/acpi/x86/pc/HMAT.acpihmat similarity index 100% rename from tests/data/acpi/pc/HMAT.acpihmat rename to tests/data/acpi/x86/pc/HMAT.acpihmat diff --git a/tests/data/acpi/pc/HPET b/tests/data/acpi/x86/pc/HPET similarity index 100% rename from tests/data/acpi/pc/HPET rename to tests/data/acpi/x86/pc/HPET diff --git a/tests/data/acpi/pc/NFIT.dimmpxm b/tests/data/acpi/x86/pc/NFIT.dimmpxm similarity index 100% rename from tests/data/acpi/pc/NFIT.dimmpxm rename to tests/data/acpi/x86/pc/NFIT.dimmpxm diff --git a/tests/data/acpi/pc/SLIT.cphp b/tests/data/acpi/x86/pc/SLIT.cphp similarity index 100% rename from tests/data/acpi/pc/SLIT.cphp rename to tests/data/acpi/x86/pc/SLIT.cphp diff --git a/tests/data/acpi/pc/SLIT.memhp b/tests/data/acpi/x86/pc/SLIT.memhp similarity index 100% rename from tests/data/acpi/pc/SLIT.memhp rename to tests/data/acpi/x86/pc/SLIT.memhp diff --git a/tests/data/acpi/pc/SRAT.acpihmat b/tests/data/acpi/x86/pc/SRAT.acpihmat similarity index 100% rename from tests/data/acpi/pc/SRAT.acpihmat rename to tests/data/acpi/x86/pc/SRAT.acpihmat diff --git a/tests/data/acpi/pc/SRAT.cphp b/tests/data/acpi/x86/pc/SRAT.cphp similarity index 100% rename from tests/data/acpi/pc/SRAT.cphp rename to tests/data/acpi/x86/pc/SRAT.cphp diff --git a/tests/data/acpi/pc/SRAT.dimmpxm b/tests/data/acpi/x86/pc/SRAT.dimmpxm similarity index 100% rename from tests/data/acpi/pc/SRAT.dimmpxm rename to tests/data/acpi/x86/pc/SRAT.dimmpxm diff --git a/tests/data/acpi/pc/SRAT.memhp b/tests/data/acpi/x86/pc/SRAT.memhp similarity index 100% rename from tests/data/acpi/pc/SRAT.memhp rename to tests/data/acpi/x86/pc/SRAT.memhp diff --git a/tests/data/acpi/pc/SRAT.numamem b/tests/data/acpi/x86/pc/SRAT.numamem similarity index 100% rename from tests/data/acpi/pc/SRAT.numamem rename to tests/data/acpi/x86/pc/SRAT.numamem diff --git a/tests/data/acpi/pc/SSDT.dimmpxm b/tests/data/acpi/x86/pc/SSDT.dimmpxm similarity index 100% rename from tests/data/acpi/pc/SSDT.dimmpxm rename to tests/data/acpi/x86/pc/SSDT.dimmpxm diff --git a/tests/data/acpi/pc/WAET b/tests/data/acpi/x86/pc/WAET similarity index 100% rename from tests/data/acpi/pc/WAET rename to tests/data/acpi/x86/pc/WAET diff --git a/tests/data/acpi/q35/APIC b/tests/data/acpi/x86/q35/APIC similarity index 100% rename from tests/data/acpi/q35/APIC rename to tests/data/acpi/x86/q35/APIC diff --git a/tests/data/acpi/q35/APIC.acpihmat b/tests/data/acpi/x86/q35/APIC.acpihmat similarity index 100% rename from tests/data/acpi/q35/APIC.acpihmat rename to tests/data/acpi/x86/q35/APIC.acpihmat diff --git a/tests/data/acpi/q35/APIC.acpihmat-noinitiator b/tests/data/acpi/x86/q35/APIC.acpihmat-noinitiator similarity index 100% rename from tests/data/acpi/q35/APIC.acpihmat-noinitiator rename to tests/data/acpi/x86/q35/APIC.acpihmat-noinitiator diff --git a/tests/data/acpi/q35/APIC.core-count b/tests/data/acpi/x86/q35/APIC.core-count similarity index 100% rename from tests/data/acpi/q35/APIC.core-count rename to tests/data/acpi/x86/q35/APIC.core-count diff --git a/tests/data/acpi/q35/APIC.core-count2 b/tests/data/acpi/x86/q35/APIC.core-count2 similarity index 100% rename from tests/data/acpi/q35/APIC.core-count2 rename to tests/data/acpi/x86/q35/APIC.core-count2 diff --git a/tests/data/acpi/q35/APIC.cphp b/tests/data/acpi/x86/q35/APIC.cphp similarity index 100% rename from tests/data/acpi/q35/APIC.cphp rename to tests/data/acpi/x86/q35/APIC.cphp diff --git a/tests/data/acpi/q35/APIC.dimmpxm b/tests/data/acpi/x86/q35/APIC.dimmpxm similarity index 100% rename from tests/data/acpi/q35/APIC.dimmpxm rename to tests/data/acpi/x86/q35/APIC.dimmpxm diff --git a/tests/data/acpi/q35/APIC.thread-count b/tests/data/acpi/x86/q35/APIC.thread-count similarity index 100% rename from tests/data/acpi/q35/APIC.thread-count rename to tests/data/acpi/x86/q35/APIC.thread-count diff --git a/tests/data/acpi/q35/APIC.thread-count2 b/tests/data/acpi/x86/q35/APIC.thread-count2 similarity index 100% rename from tests/data/acpi/q35/APIC.thread-count2 rename to tests/data/acpi/x86/q35/APIC.thread-count2 diff --git a/tests/data/acpi/q35/APIC.type4-count b/tests/data/acpi/x86/q35/APIC.type4-count similarity index 100% rename from tests/data/acpi/q35/APIC.type4-count rename to tests/data/acpi/x86/q35/APIC.type4-count diff --git a/tests/data/acpi/q35/APIC.xapic b/tests/data/acpi/x86/q35/APIC.xapic similarity index 100% rename from tests/data/acpi/q35/APIC.xapic rename to tests/data/acpi/x86/q35/APIC.xapic diff --git a/tests/data/acpi/q35/CEDT.cxl b/tests/data/acpi/x86/q35/CEDT.cxl similarity index 100% rename from tests/data/acpi/q35/CEDT.cxl rename to tests/data/acpi/x86/q35/CEDT.cxl diff --git a/tests/data/acpi/q35/DMAR.dmar b/tests/data/acpi/x86/q35/DMAR.dmar similarity index 100% rename from tests/data/acpi/q35/DMAR.dmar rename to tests/data/acpi/x86/q35/DMAR.dmar diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/x86/q35/DSDT similarity index 100% rename from tests/data/acpi/q35/DSDT rename to tests/data/acpi/x86/q35/DSDT diff --git a/tests/data/acpi/q35/DSDT.acpierst b/tests/data/acpi/x86/q35/DSDT.acpierst similarity index 100% rename from tests/data/acpi/q35/DSDT.acpierst rename to tests/data/acpi/x86/q35/DSDT.acpierst diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/x86/q35/DSDT.acpihmat similarity index 100% rename from tests/data/acpi/q35/DSDT.acpihmat rename to tests/data/acpi/x86/q35/DSDT.acpihmat diff --git a/tests/data/acpi/q35/DSDT.acpihmat-noinitiator b/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator similarity index 100% rename from tests/data/acpi/q35/DSDT.acpihmat-noinitiator rename to tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator diff --git a/tests/data/acpi/q35/DSDT.applesmc b/tests/data/acpi/x86/q35/DSDT.applesmc similarity index 100% rename from tests/data/acpi/q35/DSDT.applesmc rename to tests/data/acpi/x86/q35/DSDT.applesmc diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/x86/q35/DSDT.bridge similarity index 100% rename from tests/data/acpi/q35/DSDT.bridge rename to tests/data/acpi/x86/q35/DSDT.bridge diff --git a/tests/data/acpi/q35/DSDT.core-count b/tests/data/acpi/x86/q35/DSDT.core-count similarity index 100% rename from tests/data/acpi/q35/DSDT.core-count rename to tests/data/acpi/x86/q35/DSDT.core-count diff --git a/tests/data/acpi/q35/DSDT.core-count2 b/tests/data/acpi/x86/q35/DSDT.core-count2 similarity index 100% rename from tests/data/acpi/q35/DSDT.core-count2 rename to tests/data/acpi/x86/q35/DSDT.core-count2 diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/x86/q35/DSDT.cphp similarity index 100% rename from tests/data/acpi/q35/DSDT.cphp rename to tests/data/acpi/x86/q35/DSDT.cphp diff --git a/tests/data/acpi/q35/DSDT.cxl b/tests/data/acpi/x86/q35/DSDT.cxl similarity index 100% rename from tests/data/acpi/q35/DSDT.cxl rename to tests/data/acpi/x86/q35/DSDT.cxl diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/x86/q35/DSDT.dimmpxm similarity index 100% rename from tests/data/acpi/q35/DSDT.dimmpxm rename to tests/data/acpi/x86/q35/DSDT.dimmpxm diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/x86/q35/DSDT.ipmibt similarity index 100% rename from tests/data/acpi/q35/DSDT.ipmibt rename to tests/data/acpi/x86/q35/DSDT.ipmibt diff --git a/tests/data/acpi/q35/DSDT.ipmismbus b/tests/data/acpi/x86/q35/DSDT.ipmismbus similarity index 100% rename from tests/data/acpi/q35/DSDT.ipmismbus rename to tests/data/acpi/x86/q35/DSDT.ipmismbus diff --git a/tests/data/acpi/q35/DSDT.ivrs b/tests/data/acpi/x86/q35/DSDT.ivrs similarity index 100% rename from tests/data/acpi/q35/DSDT.ivrs rename to tests/data/acpi/x86/q35/DSDT.ivrs diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/x86/q35/DSDT.memhp similarity index 100% rename from tests/data/acpi/q35/DSDT.memhp rename to tests/data/acpi/x86/q35/DSDT.memhp diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/x86/q35/DSDT.mmio64 similarity index 100% rename from tests/data/acpi/q35/DSDT.mmio64 rename to tests/data/acpi/x86/q35/DSDT.mmio64 diff --git a/tests/data/acpi/q35/DSDT.multi-bridge b/tests/data/acpi/x86/q35/DSDT.multi-bridge similarity index 100% rename from tests/data/acpi/q35/DSDT.multi-bridge rename to tests/data/acpi/x86/q35/DSDT.multi-bridge diff --git a/tests/data/acpi/q35/DSDT.noacpihp b/tests/data/acpi/x86/q35/DSDT.noacpihp similarity index 100% rename from tests/data/acpi/q35/DSDT.noacpihp rename to tests/data/acpi/x86/q35/DSDT.noacpihp diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/x86/q35/DSDT.nohpet similarity index 100% rename from tests/data/acpi/q35/DSDT.nohpet rename to tests/data/acpi/x86/q35/DSDT.nohpet diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/x86/q35/DSDT.numamem similarity index 100% rename from tests/data/acpi/q35/DSDT.numamem rename to tests/data/acpi/x86/q35/DSDT.numamem diff --git a/tests/data/acpi/q35/DSDT.pvpanic-isa b/tests/data/acpi/x86/q35/DSDT.pvpanic-isa similarity index 100% rename from tests/data/acpi/q35/DSDT.pvpanic-isa rename to tests/data/acpi/x86/q35/DSDT.pvpanic-isa diff --git a/tests/data/acpi/q35/DSDT.thread-count b/tests/data/acpi/x86/q35/DSDT.thread-count similarity index 100% rename from tests/data/acpi/q35/DSDT.thread-count rename to tests/data/acpi/x86/q35/DSDT.thread-count diff --git a/tests/data/acpi/q35/DSDT.thread-count2 b/tests/data/acpi/x86/q35/DSDT.thread-count2 similarity index 100% rename from tests/data/acpi/q35/DSDT.thread-count2 rename to tests/data/acpi/x86/q35/DSDT.thread-count2 diff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/x86/q35/DSDT.tis.tpm12 similarity index 100% rename from tests/data/acpi/q35/DSDT.tis.tpm12 rename to tests/data/acpi/x86/q35/DSDT.tis.tpm12 diff --git a/tests/data/acpi/q35/DSDT.tis.tpm2 b/tests/data/acpi/x86/q35/DSDT.tis.tpm2 similarity index 100% rename from tests/data/acpi/q35/DSDT.tis.tpm2 rename to tests/data/acpi/x86/q35/DSDT.tis.tpm2 diff --git a/tests/data/acpi/q35/DSDT.type4-count b/tests/data/acpi/x86/q35/DSDT.type4-count similarity index 100% rename from tests/data/acpi/q35/DSDT.type4-count rename to tests/data/acpi/x86/q35/DSDT.type4-count diff --git a/tests/data/acpi/q35/DSDT.viot b/tests/data/acpi/x86/q35/DSDT.viot similarity index 100% rename from tests/data/acpi/q35/DSDT.viot rename to tests/data/acpi/x86/q35/DSDT.viot diff --git a/tests/data/acpi/q35/DSDT.xapic b/tests/data/acpi/x86/q35/DSDT.xapic similarity index 100% rename from tests/data/acpi/q35/DSDT.xapic rename to tests/data/acpi/x86/q35/DSDT.xapic diff --git a/tests/data/acpi/q35/ERST.acpierst b/tests/data/acpi/x86/q35/ERST.acpierst similarity index 100% rename from tests/data/acpi/q35/ERST.acpierst rename to tests/data/acpi/x86/q35/ERST.acpierst diff --git a/tests/data/acpi/q35/FACP b/tests/data/acpi/x86/q35/FACP similarity index 100% rename from tests/data/acpi/q35/FACP rename to tests/data/acpi/x86/q35/FACP diff --git a/tests/data/acpi/q35/FACP.core-count b/tests/data/acpi/x86/q35/FACP.core-count similarity index 100% rename from tests/data/acpi/q35/FACP.core-count rename to tests/data/acpi/x86/q35/FACP.core-count diff --git a/tests/data/acpi/q35/FACP.core-count2 b/tests/data/acpi/x86/q35/FACP.core-count2 similarity index 100% rename from tests/data/acpi/q35/FACP.core-count2 rename to tests/data/acpi/x86/q35/FACP.core-count2 diff --git a/tests/data/acpi/q35/FACP.nosmm b/tests/data/acpi/x86/q35/FACP.nosmm similarity index 100% rename from tests/data/acpi/q35/FACP.nosmm rename to tests/data/acpi/x86/q35/FACP.nosmm diff --git a/tests/data/acpi/q35/FACP.slic b/tests/data/acpi/x86/q35/FACP.slic similarity index 100% rename from tests/data/acpi/q35/FACP.slic rename to tests/data/acpi/x86/q35/FACP.slic diff --git a/tests/data/acpi/q35/FACP.thread-count b/tests/data/acpi/x86/q35/FACP.thread-count similarity index 100% rename from tests/data/acpi/q35/FACP.thread-count rename to tests/data/acpi/x86/q35/FACP.thread-count diff --git a/tests/data/acpi/q35/FACP.thread-count2 b/tests/data/acpi/x86/q35/FACP.thread-count2 similarity index 100% rename from tests/data/acpi/q35/FACP.thread-count2 rename to tests/data/acpi/x86/q35/FACP.thread-count2 diff --git a/tests/data/acpi/q35/FACP.type4-count b/tests/data/acpi/x86/q35/FACP.type4-count similarity index 100% rename from tests/data/acpi/q35/FACP.type4-count rename to tests/data/acpi/x86/q35/FACP.type4-count diff --git a/tests/data/acpi/q35/FACP.xapic b/tests/data/acpi/x86/q35/FACP.xapic similarity index 100% rename from tests/data/acpi/q35/FACP.xapic rename to tests/data/acpi/x86/q35/FACP.xapic diff --git a/tests/data/acpi/q35/FACS b/tests/data/acpi/x86/q35/FACS similarity index 100% rename from tests/data/acpi/q35/FACS rename to tests/data/acpi/x86/q35/FACS diff --git a/tests/data/acpi/q35/HMAT.acpihmat b/tests/data/acpi/x86/q35/HMAT.acpihmat similarity index 100% rename from tests/data/acpi/q35/HMAT.acpihmat rename to tests/data/acpi/x86/q35/HMAT.acpihmat diff --git a/tests/data/acpi/q35/HMAT.acpihmat-noinitiator b/tests/data/acpi/x86/q35/HMAT.acpihmat-noinitiator similarity index 100% rename from tests/data/acpi/q35/HMAT.acpihmat-noinitiator rename to tests/data/acpi/x86/q35/HMAT.acpihmat-noinitiator diff --git a/tests/data/acpi/q35/HPET b/tests/data/acpi/x86/q35/HPET similarity index 100% rename from tests/data/acpi/q35/HPET rename to tests/data/acpi/x86/q35/HPET diff --git a/tests/data/acpi/q35/IVRS.ivrs b/tests/data/acpi/x86/q35/IVRS.ivrs similarity index 100% rename from tests/data/acpi/q35/IVRS.ivrs rename to tests/data/acpi/x86/q35/IVRS.ivrs diff --git a/tests/data/acpi/q35/MCFG b/tests/data/acpi/x86/q35/MCFG similarity index 100% rename from tests/data/acpi/q35/MCFG rename to tests/data/acpi/x86/q35/MCFG diff --git a/tests/data/acpi/q35/NFIT.dimmpxm b/tests/data/acpi/x86/q35/NFIT.dimmpxm similarity index 100% rename from tests/data/acpi/q35/NFIT.dimmpxm rename to tests/data/acpi/x86/q35/NFIT.dimmpxm diff --git a/tests/data/acpi/q35/SLIC.slic b/tests/data/acpi/x86/q35/SLIC.slic similarity index 100% rename from tests/data/acpi/q35/SLIC.slic rename to tests/data/acpi/x86/q35/SLIC.slic diff --git a/tests/data/acpi/q35/SLIT.cphp b/tests/data/acpi/x86/q35/SLIT.cphp similarity index 100% rename from tests/data/acpi/q35/SLIT.cphp rename to tests/data/acpi/x86/q35/SLIT.cphp diff --git a/tests/data/acpi/q35/SLIT.memhp b/tests/data/acpi/x86/q35/SLIT.memhp similarity index 100% rename from tests/data/acpi/q35/SLIT.memhp rename to tests/data/acpi/x86/q35/SLIT.memhp diff --git a/tests/data/acpi/q35/SRAT.acpihmat b/tests/data/acpi/x86/q35/SRAT.acpihmat similarity index 100% rename from tests/data/acpi/q35/SRAT.acpihmat rename to tests/data/acpi/x86/q35/SRAT.acpihmat diff --git a/tests/data/acpi/q35/SRAT.acpihmat-noinitiator b/tests/data/acpi/x86/q35/SRAT.acpihmat-noinitiator similarity index 100% rename from tests/data/acpi/q35/SRAT.acpihmat-noinitiator rename to tests/data/acpi/x86/q35/SRAT.acpihmat-noinitiator diff --git a/tests/data/acpi/q35/SRAT.cphp b/tests/data/acpi/x86/q35/SRAT.cphp similarity index 100% rename from tests/data/acpi/q35/SRAT.cphp rename to tests/data/acpi/x86/q35/SRAT.cphp diff --git a/tests/data/acpi/q35/SRAT.dimmpxm b/tests/data/acpi/x86/q35/SRAT.dimmpxm similarity index 100% rename from tests/data/acpi/q35/SRAT.dimmpxm rename to tests/data/acpi/x86/q35/SRAT.dimmpxm diff --git a/tests/data/acpi/q35/SRAT.memhp b/tests/data/acpi/x86/q35/SRAT.memhp similarity index 100% rename from tests/data/acpi/q35/SRAT.memhp rename to tests/data/acpi/x86/q35/SRAT.memhp diff --git a/tests/data/acpi/q35/SRAT.mmio64 b/tests/data/acpi/x86/q35/SRAT.mmio64 similarity index 100% rename from tests/data/acpi/q35/SRAT.mmio64 rename to tests/data/acpi/x86/q35/SRAT.mmio64 diff --git a/tests/data/acpi/q35/SRAT.numamem b/tests/data/acpi/x86/q35/SRAT.numamem similarity index 100% rename from tests/data/acpi/q35/SRAT.numamem rename to tests/data/acpi/x86/q35/SRAT.numamem diff --git a/tests/data/acpi/q35/SRAT.xapic b/tests/data/acpi/x86/q35/SRAT.xapic similarity index 100% rename from tests/data/acpi/q35/SRAT.xapic rename to tests/data/acpi/x86/q35/SRAT.xapic diff --git a/tests/data/acpi/q35/SSDT.dimmpxm b/tests/data/acpi/x86/q35/SSDT.dimmpxm similarity index 100% rename from tests/data/acpi/q35/SSDT.dimmpxm rename to tests/data/acpi/x86/q35/SSDT.dimmpxm diff --git a/tests/data/acpi/q35/TCPA.tis.tpm12 b/tests/data/acpi/x86/q35/TCPA.tis.tpm12 similarity index 100% rename from tests/data/acpi/q35/TCPA.tis.tpm12 rename to tests/data/acpi/x86/q35/TCPA.tis.tpm12 diff --git a/tests/data/acpi/q35/TPM2.tis.tpm2 b/tests/data/acpi/x86/q35/TPM2.tis.tpm2 similarity index 100% rename from tests/data/acpi/q35/TPM2.tis.tpm2 rename to tests/data/acpi/x86/q35/TPM2.tis.tpm2 diff --git a/tests/data/acpi/q35/VIOT.viot b/tests/data/acpi/x86/q35/VIOT.viot similarity index 100% rename from tests/data/acpi/q35/VIOT.viot rename to tests/data/acpi/x86/q35/VIOT.viot diff --git a/tests/data/acpi/q35/WAET b/tests/data/acpi/x86/q35/WAET similarity index 100% rename from tests/data/acpi/q35/WAET rename to tests/data/acpi/x86/q35/WAET From 7434f904673c36b78871a3b18ab1f5c09c640131 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:33 +0530 Subject: [PATCH 68/85] tests/data/acpi/virt: Move ARM64 ACPI tables under aarch64/${machine} path Same machine name can be used by different architectures. Hence, create aarch64 folder and move all aarch64 related AML files for virt machine inside. Signed-off-by: Sunil V L Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-11-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/{ => aarch64}/virt/APIC | Bin .../data/acpi/{ => aarch64}/virt/APIC.acpihmatvirt | Bin tests/data/acpi/{ => aarch64}/virt/APIC.topology | Bin tests/data/acpi/{ => aarch64}/virt/DBG2 | Bin tests/data/acpi/{ => aarch64}/virt/DSDT | Bin .../data/acpi/{ => aarch64}/virt/DSDT.acpihmatvirt | Bin tests/data/acpi/{ => aarch64}/virt/DSDT.memhp | Bin tests/data/acpi/{ => aarch64}/virt/DSDT.pxb | Bin tests/data/acpi/{ => aarch64}/virt/DSDT.topology | Bin tests/data/acpi/{ => aarch64}/virt/FACP | Bin tests/data/acpi/{ => aarch64}/virt/GTDT | Bin .../data/acpi/{ => aarch64}/virt/HMAT.acpihmatvirt | Bin tests/data/acpi/{ => aarch64}/virt/IORT | Bin tests/data/acpi/{ => aarch64}/virt/MCFG | Bin tests/data/acpi/{ => aarch64}/virt/NFIT.memhp | Bin tests/data/acpi/{ => aarch64}/virt/PPTT | Bin .../data/acpi/{ => aarch64}/virt/PPTT.acpihmatvirt | Bin tests/data/acpi/{ => aarch64}/virt/PPTT.topology | Bin tests/data/acpi/{ => aarch64}/virt/SLIT.memhp | Bin tests/data/acpi/{ => aarch64}/virt/SPCR | Bin .../data/acpi/{ => aarch64}/virt/SRAT.acpihmatvirt | Bin tests/data/acpi/{ => aarch64}/virt/SRAT.memhp | Bin tests/data/acpi/{ => aarch64}/virt/SRAT.numamem | Bin tests/data/acpi/{ => aarch64}/virt/SSDT.memhp | Bin tests/data/acpi/{ => aarch64}/virt/VIOT | Bin 25 files changed, 0 insertions(+), 0 deletions(-) rename tests/data/acpi/{ => aarch64}/virt/APIC (100%) rename tests/data/acpi/{ => aarch64}/virt/APIC.acpihmatvirt (100%) rename tests/data/acpi/{ => aarch64}/virt/APIC.topology (100%) rename tests/data/acpi/{ => aarch64}/virt/DBG2 (100%) rename tests/data/acpi/{ => aarch64}/virt/DSDT (100%) rename tests/data/acpi/{ => aarch64}/virt/DSDT.acpihmatvirt (100%) rename tests/data/acpi/{ => aarch64}/virt/DSDT.memhp (100%) rename tests/data/acpi/{ => aarch64}/virt/DSDT.pxb (100%) rename tests/data/acpi/{ => aarch64}/virt/DSDT.topology (100%) rename tests/data/acpi/{ => aarch64}/virt/FACP (100%) rename tests/data/acpi/{ => aarch64}/virt/GTDT (100%) rename tests/data/acpi/{ => aarch64}/virt/HMAT.acpihmatvirt (100%) rename tests/data/acpi/{ => aarch64}/virt/IORT (100%) rename tests/data/acpi/{ => aarch64}/virt/MCFG (100%) rename tests/data/acpi/{ => aarch64}/virt/NFIT.memhp (100%) rename tests/data/acpi/{ => aarch64}/virt/PPTT (100%) rename tests/data/acpi/{ => aarch64}/virt/PPTT.acpihmatvirt (100%) rename tests/data/acpi/{ => aarch64}/virt/PPTT.topology (100%) rename tests/data/acpi/{ => aarch64}/virt/SLIT.memhp (100%) rename tests/data/acpi/{ => aarch64}/virt/SPCR (100%) rename tests/data/acpi/{ => aarch64}/virt/SRAT.acpihmatvirt (100%) rename tests/data/acpi/{ => aarch64}/virt/SRAT.memhp (100%) rename tests/data/acpi/{ => aarch64}/virt/SRAT.numamem (100%) rename tests/data/acpi/{ => aarch64}/virt/SSDT.memhp (100%) rename tests/data/acpi/{ => aarch64}/virt/VIOT (100%) diff --git a/tests/data/acpi/virt/APIC b/tests/data/acpi/aarch64/virt/APIC similarity index 100% rename from tests/data/acpi/virt/APIC rename to tests/data/acpi/aarch64/virt/APIC diff --git a/tests/data/acpi/virt/APIC.acpihmatvirt b/tests/data/acpi/aarch64/virt/APIC.acpihmatvirt similarity index 100% rename from tests/data/acpi/virt/APIC.acpihmatvirt rename to tests/data/acpi/aarch64/virt/APIC.acpihmatvirt diff --git a/tests/data/acpi/virt/APIC.topology b/tests/data/acpi/aarch64/virt/APIC.topology similarity index 100% rename from tests/data/acpi/virt/APIC.topology rename to tests/data/acpi/aarch64/virt/APIC.topology diff --git a/tests/data/acpi/virt/DBG2 b/tests/data/acpi/aarch64/virt/DBG2 similarity index 100% rename from tests/data/acpi/virt/DBG2 rename to tests/data/acpi/aarch64/virt/DBG2 diff --git a/tests/data/acpi/virt/DSDT b/tests/data/acpi/aarch64/virt/DSDT similarity index 100% rename from tests/data/acpi/virt/DSDT rename to tests/data/acpi/aarch64/virt/DSDT diff --git a/tests/data/acpi/virt/DSDT.acpihmatvirt b/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt similarity index 100% rename from tests/data/acpi/virt/DSDT.acpihmatvirt rename to tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt diff --git a/tests/data/acpi/virt/DSDT.memhp b/tests/data/acpi/aarch64/virt/DSDT.memhp similarity index 100% rename from tests/data/acpi/virt/DSDT.memhp rename to tests/data/acpi/aarch64/virt/DSDT.memhp diff --git a/tests/data/acpi/virt/DSDT.pxb b/tests/data/acpi/aarch64/virt/DSDT.pxb similarity index 100% rename from tests/data/acpi/virt/DSDT.pxb rename to tests/data/acpi/aarch64/virt/DSDT.pxb diff --git a/tests/data/acpi/virt/DSDT.topology b/tests/data/acpi/aarch64/virt/DSDT.topology similarity index 100% rename from tests/data/acpi/virt/DSDT.topology rename to tests/data/acpi/aarch64/virt/DSDT.topology diff --git a/tests/data/acpi/virt/FACP b/tests/data/acpi/aarch64/virt/FACP similarity index 100% rename from tests/data/acpi/virt/FACP rename to tests/data/acpi/aarch64/virt/FACP diff --git a/tests/data/acpi/virt/GTDT b/tests/data/acpi/aarch64/virt/GTDT similarity index 100% rename from tests/data/acpi/virt/GTDT rename to tests/data/acpi/aarch64/virt/GTDT diff --git a/tests/data/acpi/virt/HMAT.acpihmatvirt b/tests/data/acpi/aarch64/virt/HMAT.acpihmatvirt similarity index 100% rename from tests/data/acpi/virt/HMAT.acpihmatvirt rename to tests/data/acpi/aarch64/virt/HMAT.acpihmatvirt diff --git a/tests/data/acpi/virt/IORT b/tests/data/acpi/aarch64/virt/IORT similarity index 100% rename from tests/data/acpi/virt/IORT rename to tests/data/acpi/aarch64/virt/IORT diff --git a/tests/data/acpi/virt/MCFG b/tests/data/acpi/aarch64/virt/MCFG similarity index 100% rename from tests/data/acpi/virt/MCFG rename to tests/data/acpi/aarch64/virt/MCFG diff --git a/tests/data/acpi/virt/NFIT.memhp b/tests/data/acpi/aarch64/virt/NFIT.memhp similarity index 100% rename from tests/data/acpi/virt/NFIT.memhp rename to tests/data/acpi/aarch64/virt/NFIT.memhp diff --git a/tests/data/acpi/virt/PPTT b/tests/data/acpi/aarch64/virt/PPTT similarity index 100% rename from tests/data/acpi/virt/PPTT rename to tests/data/acpi/aarch64/virt/PPTT diff --git a/tests/data/acpi/virt/PPTT.acpihmatvirt b/tests/data/acpi/aarch64/virt/PPTT.acpihmatvirt similarity index 100% rename from tests/data/acpi/virt/PPTT.acpihmatvirt rename to tests/data/acpi/aarch64/virt/PPTT.acpihmatvirt diff --git a/tests/data/acpi/virt/PPTT.topology b/tests/data/acpi/aarch64/virt/PPTT.topology similarity index 100% rename from tests/data/acpi/virt/PPTT.topology rename to tests/data/acpi/aarch64/virt/PPTT.topology diff --git a/tests/data/acpi/virt/SLIT.memhp b/tests/data/acpi/aarch64/virt/SLIT.memhp similarity index 100% rename from tests/data/acpi/virt/SLIT.memhp rename to tests/data/acpi/aarch64/virt/SLIT.memhp diff --git a/tests/data/acpi/virt/SPCR b/tests/data/acpi/aarch64/virt/SPCR similarity index 100% rename from tests/data/acpi/virt/SPCR rename to tests/data/acpi/aarch64/virt/SPCR diff --git a/tests/data/acpi/virt/SRAT.acpihmatvirt b/tests/data/acpi/aarch64/virt/SRAT.acpihmatvirt similarity index 100% rename from tests/data/acpi/virt/SRAT.acpihmatvirt rename to tests/data/acpi/aarch64/virt/SRAT.acpihmatvirt diff --git a/tests/data/acpi/virt/SRAT.memhp b/tests/data/acpi/aarch64/virt/SRAT.memhp similarity index 100% rename from tests/data/acpi/virt/SRAT.memhp rename to tests/data/acpi/aarch64/virt/SRAT.memhp diff --git a/tests/data/acpi/virt/SRAT.numamem b/tests/data/acpi/aarch64/virt/SRAT.numamem similarity index 100% rename from tests/data/acpi/virt/SRAT.numamem rename to tests/data/acpi/aarch64/virt/SRAT.numamem diff --git a/tests/data/acpi/virt/SSDT.memhp b/tests/data/acpi/aarch64/virt/SSDT.memhp similarity index 100% rename from tests/data/acpi/virt/SSDT.memhp rename to tests/data/acpi/aarch64/virt/SSDT.memhp diff --git a/tests/data/acpi/virt/VIOT b/tests/data/acpi/aarch64/virt/VIOT similarity index 100% rename from tests/data/acpi/virt/VIOT rename to tests/data/acpi/aarch64/virt/VIOT From 008115bba06a30b13b3fd86cade8a280490e06f3 Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:34 +0530 Subject: [PATCH 69/85] meson.build: Add RISC-V to the edk2-target list so that ACPI table test can be supported. Signed-off-by: Sunil V L Reviewed-by: Alistair Francis Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-12-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 54e6b09f4f..efd3d4c6e5 100644 --- a/meson.build +++ b/meson.build @@ -93,7 +93,7 @@ else iasl = find_program(get_option('iasl'), required: true) endif -edk2_targets = [ 'arm-softmmu', 'aarch64-softmmu', 'i386-softmmu', 'x86_64-softmmu' ] +edk2_targets = [ 'arm-softmmu', 'aarch64-softmmu', 'i386-softmmu', 'x86_64-softmmu', 'riscv64-softmmu' ] unpack_edk2_blobs = false foreach target : edk2_targets if target in target_dirs From ce7325c160953e717ff662eabdc7bb911029760f Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:35 +0530 Subject: [PATCH 70/85] pc-bios/meson.build: Add support for RISC-V in unpack_edk2_blobs Update list of images supported in unpack_edk2_blobs to enable RISC-V ACPI table testing. Signed-off-by: Sunil V L Reviewed-by: Alistair Francis Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-13-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- pc-bios/meson.build | 2 ++ tests/qtest/meson.build | 3 +++ 2 files changed, 5 insertions(+) diff --git a/pc-bios/meson.build b/pc-bios/meson.build index 0760612bea..8602b45b9b 100644 --- a/pc-bios/meson.build +++ b/pc-bios/meson.build @@ -4,6 +4,8 @@ if unpack_edk2_blobs 'edk2-aarch64-code.fd', 'edk2-arm-code.fd', 'edk2-arm-vars.fd', + 'edk2-riscv-code.fd', + 'edk2-riscv-vars.fd', 'edk2-i386-code.fd', 'edk2-i386-secure-code.fd', 'edk2-i386-vars.fd', diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 12792948ff..6508bfb1a2 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -259,6 +259,9 @@ qtests_s390x = \ qtests_riscv32 = \ (config_all_devices.has_key('CONFIG_SIFIVE_E_AON') ? ['sifive-e-aon-watchdog-test'] : []) +qtests_riscv64 = \ + (unpack_edk2_blobs ? ['bios-tables-test'] : []) + qos_test_ss = ss.source_set() qos_test_ss.add( 'ac97-test.c', From 0f130d9e372552ef900bfac062ffc6a77b4049cc Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Tue, 25 Jun 2024 20:38:36 +0530 Subject: [PATCH 71/85] tests/data/acpi/rebuild-expected-aml.sh: Add RISC-V Update the list of supported architectures to include RISC-V. Signed-off-by: Sunil V L Reviewed-by: Alistair Francis Reviewed-by: Igor Mammedov Message-Id: <20240625150839.1358279-14-sunilvl@ventanamicro.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/rebuild-expected-aml.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/data/acpi/rebuild-expected-aml.sh b/tests/data/acpi/rebuild-expected-aml.sh index dcf2e2f221..c1092fb8ba 100755 --- a/tests/data/acpi/rebuild-expected-aml.sh +++ b/tests/data/acpi/rebuild-expected-aml.sh @@ -12,7 +12,7 @@ # This work is licensed under the terms of the GNU GPLv2. # See the COPYING.LIB file in the top-level directory. -qemu_arches="x86_64 aarch64" +qemu_arches="x86_64 aarch64 riscv64" if [ ! -e "tests/qtest/bios-tables-test" ]; then echo "Test: bios-tables-test is required! Run make check before this script." @@ -36,7 +36,8 @@ fi if [ -z "$qemu_bins" ]; then echo "Only the following architectures are currently supported: $qemu_arches" echo "None of these configured!" - echo "To fix, run configure --target-list=x86_64-softmmu,aarch64-softmmu" + echo "To fix, run configure \ + --target-list=x86_64-softmmu,aarch64-softmmu,riscv64-softmmu" exit 1; fi From efc4ad6f9901bbba08c0e11443ba89f18b1a28e9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 25 Jun 2024 18:08:04 +0100 Subject: [PATCH 72/85] hw/cxl/events: Improve QMP interfaces and documentation for add/release dynamic capacity. New DCD command definitions updated in response to review comments from Markus. - Used CxlXXXX instead of CXLXXXXX for newly added types. - Expanded some abreviations in type names to be easier to read. - Additional documentation for some fields. - Replace slightly vague cxl r3.1 references with "Compute Express Link (CXL) Specification, Revision 3.1, XXXX" to bring them inline with what it says on the specification cover. Suggested-by: Markus Armbruster Signed-off-by: Jonathan Cameron Message-Id: <20240625170805.359278-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 18 ++--- hw/mem/cxl_type3_stubs.c | 8 +-- qapi/cxl.json | 144 ++++++++++++++++++++++++--------------- 3 files changed, 103 insertions(+), 67 deletions(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 3274e5dcbb..35ac59883a 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1874,7 +1874,7 @@ static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list, */ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, uint16_t hid, CXLDCEventType type, uint8_t rid, - CXLDynamicCapacityExtentList *records, Error **errp) + CxlDynamicCapacityExtentList *records, Error **errp) { Object *obj; CXLEventDynamicCapacity dCap = {}; @@ -1882,7 +1882,7 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, CXLType3Dev *dcd; uint8_t flags = 1 << CXL_EVENT_TYPE_INFO; uint32_t num_extents = 0; - CXLDynamicCapacityExtentList *list; + CxlDynamicCapacityExtentList *list; CXLDCExtentGroup *group = NULL; g_autofree CXLDCExtentRaw *extents = NULL; uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP; @@ -2032,13 +2032,13 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, } void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id, - CXLExtSelPolicy sel_policy, uint8_t region, - const char *tag, - CXLDynamicCapacityExtentList *extents, + CxlExtentSelectionPolicy sel_policy, + uint8_t region, const char *tag, + CxlDynamicCapacityExtentList *extents, Error **errp) { switch (sel_policy) { - case CXL_EXT_SEL_POLICY_PRESCRIPTIVE: + case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE: qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, DC_EVENT_ADD_CAPACITY, region, extents, errp); @@ -2050,14 +2050,14 @@ void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id, } void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id, - CXLExtRemovalPolicy removal_policy, + CxlExtentRemovalPolicy removal_policy, bool has_forced_removal, bool forced_removal, bool has_sanitize_on_release, bool sanitize_on_release, uint8_t region, const char *tag, - CXLDynamicCapacityExtentList *extents, + CxlDynamicCapacityExtentList *extents, Error **errp) { CXLDCEventType type = DC_EVENT_RELEASE_CAPACITY; @@ -2070,7 +2070,7 @@ void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id, } switch (removal_policy) { - case CXL_EXT_REMOVAL_POLICY_PRESCRIPTIVE: + case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE: qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, type, region, extents, errp); return; diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index 45419bbefe..c1a5e4a7c1 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -70,24 +70,24 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id, - CXLExtSelPolicy sel_policy, + CxlExtentSelectionPolicy sel_policy, uint8_t region, const char *tag, - CXLDynamicCapacityExtentList *extents, + CxlDynamicCapacityExtentList *extents, Error **errp) { error_setg(errp, "CXL Type 3 support is not compiled in"); } void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id, - CXLExtRemovalPolicy removal_policy, + CxlExtentRemovalPolicy removal_policy, bool has_forced_removal, bool forced_removal, bool has_sanitize_on_release, bool sanitize_on_release, uint8_t region, const char *tag, - CXLDynamicCapacityExtentList *extents, + CxlDynamicCapacityExtentList *extents, Error **errp) { error_setg(errp, "CXL Type 3 support is not compiled in"); diff --git a/qapi/cxl.json b/qapi/cxl.json index 57d9f82014..a38622a0d1 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -363,9 +363,11 @@ 'data': {'path': 'str', 'type': 'CxlCorErrorType'}} ## -# @CXLDynamicCapacityExtent: +# @CxlDynamicCapacityExtent: # -# A single dynamic capacity extent +# A single dynamic capacity extent. This is a contiguous allocation +# of memory by Device Physical Address within a single Dynamic +# Capacity Region on a CXL Type 3 Device. # # @offset: The offset (in bytes) to the start of the region # where the extent belongs to. @@ -374,7 +376,7 @@ # # Since: 9.1 ## -{ 'struct': 'CXLDynamicCapacityExtent', +{ 'struct': 'CxlDynamicCapacityExtent', 'data': { 'offset':'uint64', 'len': 'uint64' @@ -382,22 +384,40 @@ } ## -# @CXLExtSelPolicy: +# @CxlExtentSelectionPolicy: # # The policy to use for selecting which extents comprise the added -# capacity, as defined in cxl spec r3.1 Table 7-70. +# capacity, as defined in Compute Express Link (CXL) Specification, +# Revision 3.1, Table 7-70. # -# @free: 0h = Free +# @free: Device is responsible for allocating the requested memory +# capacity and is free to do this using any combination of +# supported extents. # -# @contiguous: 1h = Continuous +# @contiguous: Device is responsible for allocating the requested +# memory capacity but must do so as a single contiguous +# extent. # -# @prescriptive: 2h = Prescriptive +# @prescriptive: The precise set of extents to be allocated is +# specified by the command. Thus allocation is being managed +# by the issuer of the allocation command, not the device. # -# @enable-shared-access: 3h = Enable Shared Access +# @enable-shared-access: Capacity has already been allocated to a +# different host using free, contiguous or prescriptive policy +# with a known tag. This policy then instructs the device to +# make the capacity with the specified tag available to an +# additional host. Capacity is implicit as it matches that +# already associated with the tag. Note that the extent list +# (and hence Device Physical Addresses) used are per host, so +# a device may use different representations on each host. +# The ordering of the extents provided to each host is indicated +# to the host using per extent sequence numbers generated by +# the device. Has a similar meaning for temporal sharing, but +# in that case there may be only one host involved. # # Since: 9.1 ## -{ 'enum': 'CXLExtSelPolicy', +{ 'enum': 'CxlExtentSelectionPolicy', 'data': ['free', 'contiguous', 'prescriptive', @@ -407,54 +427,60 @@ ## # @cxl-add-dynamic-capacity: # -# Command to initiate to add dynamic capacity extents to a host. It -# simulates operations defined in cxl spec r3.1 7.6.7.6.5. +# Initiate adding dynamic capacity extents to a host. This simulates +# operations defined in Compute Express Link (CXL) Specification, +# Revision 3.1, Section 7.6.7.6.5. Note that, currently, establishing +# success or failure of the full Add Dynamic Capacity flow requires +# out of band communication with the OS of the CXL host. # -# @path: CXL DCD canonical QOM path. +# @path: path to the CXL Dynamic Capacity Device in the QOM tree. # -# @host-id: The "Host ID" field as defined in cxl spec r3.1 -# Table 7-70. +# @host-id: The "Host ID" field as defined in Compute Express Link +# (CXL) Specification, Revision 3.1, Table 7-70. # # @selection-policy: The "Selection Policy" bits as defined in -# cxl spec r3.1 Table 7-70. It specifies the policy to use for -# selecting which extents comprise the added capacity. +# Compute Express Link (CXL) Specification, Revision 3.1, +# Table 7-70. It specifies the policy to use for selecting +# which extents comprise the added capacity. # -# @region: The "Region Number" field as defined in cxl spec r3.1 -# Table 7-70. The dynamic capacity region where the capacity -# is being added. Valid range is from 0-7. +# @region: The "Region Number" field as defined in Compute Express +# Link (CXL) Specification, Revision 3.1, Table 7-70. Valid +# range is from 0-7. # -# @tag: The "Tag" field as defined in cxl spec r3.1 Table 7-70. +# @tag: The "Tag" field as defined in Compute Express Link (CXL) +# Specification, Revision 3.1, Table 7-70. # -# @extents: The "Extent List" field as defined in cxl spec r3.1 -# Table 7-70. +# @extents: The "Extent List" field as defined in Compute Express Link +# (CXL) Specification, Revision 3.1, Table 7-70. # # Since : 9.1 ## { 'command': 'cxl-add-dynamic-capacity', 'data': { 'path': 'str', 'host-id': 'uint16', - 'selection-policy': 'CXLExtSelPolicy', + 'selection-policy': 'CxlExtentSelectionPolicy', 'region': 'uint8', '*tag': 'str', - 'extents': [ 'CXLDynamicCapacityExtent' ] + 'extents': [ 'CxlDynamicCapacityExtent' ] } } ## -# @CXLExtRemovalPolicy: +# @CxlExtentRemovalPolicy: # # The policy to use for selecting which extents comprise the released -# capacity, defined in the "Flags" field in cxl spec r3.1 Table 7-71. +# capacity, defined in the "Flags" field in Compute Express Link (CXL) +# Specification, Revision 3.1, Table 7-71. # -# @tag-based: value = 0h. Extents are selected by the device based -# on tag, with no requirement for contiguous extents. +# @tag-based: Extents are selected by the device based on tag, with +# no requirement for contiguous extents. # -# @prescriptive: value = 1h. Extent list of capacity to release is -# included in the request payload. +# @prescriptive: Extent list of capacity to release is included in +# the request payload. # # Since: 9.1 ## -{ 'enum': 'CXLExtRemovalPolicy', +{ 'enum': 'CxlExtentRemovalPolicy', 'data': ['tag-based', 'prescriptive'] } @@ -462,45 +488,55 @@ ## # @cxl-release-dynamic-capacity: # -# Command to initiate to release dynamic capacity extents from a -# host. It simulates operations defined in cxl spec r3.1 7.6.7.6.6. +# Initiate release of dynamic capacity extents from a host. This +# simulates operations defined in Compute Express Link (CXL) +# Specification, Revision 3.1, Section 7.6.7.6.6. Note that, +# currently, success or failure of the full Release Dynamic Capacity +# flow requires out of band communication with the OS of the CXL host. # -# @path: CXL DCD canonical QOM path. +# @path: path to the CXL Dynamic Capacity Device in the QOM tree. # -# @host-id: The "Host ID" field as defined in cxl spec r3.1 +# @host-id: The "Host ID" field as defined in Compute Express Link +# (CXL) Specification, Revision 3.1, Table 7-71. +# +# @removal-policy: Bit[3:0] of the "Flags" field as defined in +# Compute Express Link (CXL) Specification, Revision 3.1, # Table 7-71. # -# @removal-policy: Bit[3:0] of the "Flags" field as defined in cxl -# spec r3.1 Table 7-71. +# @forced-removal: Bit[4] of the "Flags" field in Compute Express +# Link (CXL) Specification, Revision 3.1, Table 7-71. When set, +# the device does not wait for a Release Dynamic Capacity command +# from the host. Instead, the host immediately looses access to +# the released capacity. # -# @forced-removal: Bit[4] of the "Flags" field in cxl spec r3.1 -# Table 7-71. When set, device does not wait for a Release -# Dynamic Capacity command from the host. Host immediately -# loses access to released capacity. +# @sanitize-on-release: Bit[5] of the "Flags" field in Compute +# Express Link (CXL) Specification, Revision 3.1, Table 7-71. +# When set, the device should sanitize all released capacity as +# a result of this request. This ensures that all user data +# and metadata is made permanently unavailable by whatever +# means is appropriate for the media type. Note that changing +# encryption keys is not sufficient. # -# @sanitize-on-release: Bit[5] of the "Flags" field in cxl spec r3.1 -# Table 7-71. When set, device should sanitize all released -# capacity as a result of this request. +# @region: The "Region Number" field as defined in Compute Express +# Link Specification, Revision 3.1, Table 7-71. Valid range +# is from 0-7. # -# @region: The "Region Number" field as defined in cxl spec r3.1 -# Table 7-71. The dynamic capacity region where the capacity -# is being added. Valid range is from 0-7. +# @tag: The "Tag" field as defined in Compute Express Link (CXL) +# Specification, Revision 3.1, Table 7-71. # -# @tag: The "Tag" field as defined in cxl spec r3.1 Table 7-71. -# -# @extents: The "Extent List" field as defined in cxl spec r3.1 -# Table 7-71. +# @extents: The "Extent List" field as defined in Compute Express +# Link (CXL) Specification, Revision 3.1, Table 7-71. # # Since : 9.1 ## { 'command': 'cxl-release-dynamic-capacity', 'data': { 'path': 'str', 'host-id': 'uint16', - 'removal-policy': 'CXLExtRemovalPolicy', + 'removal-policy': 'CxlExtentRemovalPolicy', '*forced-removal': 'bool', '*sanitize-on-release': 'bool', 'region': 'uint8', '*tag': 'str', - 'extents': [ 'CXLDynamicCapacityExtent' ] + 'extents': [ 'CxlDynamicCapacityExtent' ] } } From 5e3cd0a2f526c2e52dd513ee6b4385f1fb47a19e Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 25 Jun 2024 18:08:05 +0100 Subject: [PATCH 73/85] hw/cxl/events: Mark cxl-add-dynamic-capacity and cxl-release-dynamic-capcity unstable Markus suggested that we make the unstable. I don't expect these interfaces to change because of their tight coupling to the Compute Express Link (CXL) Specification, Revision 3.1 Fabric Management API definitions which can only be extended in backwards compatible way. However, there seems little disadvantage in taking a cautious path for now and marking them as unstable interfaces. Suggested-by: Markus Armbruster Signed-off-by: Jonathan Cameron Message-Id: <20240625170805.359278-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qapi/cxl.json | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/qapi/cxl.json b/qapi/cxl.json index a38622a0d1..bdfac67c47 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -453,6 +453,10 @@ # @extents: The "Extent List" field as defined in Compute Express Link # (CXL) Specification, Revision 3.1, Table 7-70. # +# Features: +# +# @unstable: For now this command is subject to change. +# # Since : 9.1 ## { 'command': 'cxl-add-dynamic-capacity', @@ -462,7 +466,8 @@ 'region': 'uint8', '*tag': 'str', 'extents': [ 'CxlDynamicCapacityExtent' ] - } + }, + 'features': [ 'unstable' ] } ## @@ -527,6 +532,10 @@ # @extents: The "Extent List" field as defined in Compute Express # Link (CXL) Specification, Revision 3.1, Table 7-71. # +# Features: +# +# @unstable: For now this command is subject to change. +# # Since : 9.1 ## { 'command': 'cxl-release-dynamic-capacity', @@ -538,5 +547,6 @@ 'region': 'uint8', '*tag': 'str', 'extents': [ 'CxlDynamicCapacityExtent' ] - } + }, + 'features': [ 'unstable' ] } From 7aa6492401e95fb296dec7cda81e67d91f6037d7 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 1 Jul 2024 09:52:08 +0200 Subject: [PATCH 74/85] virtio: remove virtio_tswap16s() call in vring_packed_event_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d152cdd6f6 ("virtio: use virtio accessor to access packed event") switched using of address_space_read_cached() to virito_lduw_phys_cached() to access packed descriptor event. When we used address_space_read_cached(), we needed to call virtio_tswap16s() to handle the endianess of the field, but virito_lduw_phys_cached() already handles it internally, so we no longer need to call virtio_tswap16s() (as the commit had done for `off_wrap`, but forgot for `flags`). Fixes: d152cdd6f6 ("virtio: use virtio accessor to access packed event") Cc: jasowang@redhat.com Cc: qemu-stable@nongnu.org Reported-by: Xoykie Link: https://lore.kernel.org/qemu-devel/CAFU8RB_pjr77zMLsM0Unf9xPNxfr_--Tjr49F_eX32ZBc5o2zQ@mail.gmail.com Signed-off-by: Stefano Garzarella Message-Id: <20240701075208.19634-1-sgarzare@redhat.com> Acked-by: Jason Wang Reviewed-by: Peter Maydell Reviewed-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 3678ec2f88..583a224163 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -323,7 +323,6 @@ static void vring_packed_event_read(VirtIODevice *vdev, /* Make sure flags is seen before off_wrap */ smp_rmb(); e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off); - virtio_tswap16s(vdev, &e->flags); } static void vring_packed_off_wrap_write(VirtIODevice *vdev, From 1b889d6e39c32d709f1114699a014b381bcf1cb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 1 Jul 2024 12:14:53 +0200 Subject: [PATCH 75/85] virtio-iommu: Clear IOMMUDevice when VFIO device is unplugged MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a VFIO device is hoplugged in a VM using virtio-iommu, IOMMUPciBus and IOMMUDevice cache entries are created in the .get_address_space() handler of the machine IOMMU device. However, these entries are never destroyed, not even when the VFIO device is detached from the machine. This can lead to an assert if the device is reattached again. When reattached, the .get_address_space() handler reuses an IOMMUDevice entry allocated when the VFIO device was first attached. virtio_iommu_set_host_iova_ranges() is called later on from the .set_iommu_device() handler an fails with an assert on 'probe_done' because the device appears to have been already probed when this is not the case. The IOMMUDevice entry is allocated in pci_device_iommu_address_space() called from under vfio_realize(), the VFIO PCI realize handler. Since pci_device_unset_iommu_device() is called from vfio_exitfn(), a sub function of the PCIDevice unrealize() handler, it seems that the .unset_iommu_device() handler is the best place to release resources allocated at realize time. Clear the IOMMUDevice cache entry there to fix hotplug. Fixes: 817ef10da23c ("virtio-iommu: Implement set|unset]_iommu_device() callbacks") Signed-off-by: Cédric Le Goater Message-Id: <20240701101453.203985-1-clg@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-iommu.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index ed7426afc7..7c54c6b5e2 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -467,6 +467,26 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } +static void virtio_iommu_device_clear(VirtIOIOMMU *s, PCIBus *bus, int devfn) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + + if (!sbus) { + return; + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + return; + } + + g_list_free_full(sdev->resv_regions, g_free); + sdev->resv_regions = NULL; + g_free(sdev); + sbus->pbdev[devfn] = NULL; +} + static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) { const struct hiod_key *key1 = v1; @@ -650,6 +670,7 @@ virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) } g_hash_table_remove(viommu->host_iommu_devices, &key); + virtio_iommu_device_clear(viommu, bus, devfn); } static const PCIIOMMUOps virtio_iommu_ops = { From 6a31b219a5338564f3978251c79f96f689e037da Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:50 +0900 Subject: [PATCH 76/85] hw/pci: Rename has_power to enabled The renamed state will not only represent powering state of PFs, but also represent SR-IOV VF enablement in the future. Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-1-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 14 +++++++------- hw/pci/pci_host.c | 4 ++-- include/hw/pci/pci.h | 7 ++++++- include/hw/pci/pci_device.h | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 50b86d5790..68d30feb86 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1525,7 +1525,7 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); - if (!d->has_power) { + if (!d->enabled) { new_addr = PCI_BAR_UNMAPPED; } @@ -1613,7 +1613,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->has_power); + & PCI_COMMAND_MASTER) && d->enabled); } msi_write_config(d, addr, val_in, l); @@ -2884,18 +2884,18 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) return msg; } -void pci_set_power(PCIDevice *d, bool state) +void pci_set_enabled(PCIDevice *d, bool state) { - if (d->has_power == state) { + if (d->enabled == state) { return; } - d->has_power = state; + d->enabled = state; pci_update_mappings(d); memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->has_power); - if (!d->has_power) { + & PCI_COMMAND_MASTER) && d->enabled); + if (!d->enabled) { pci_device_reset(d); } } diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index dfe6fe6184..0d82727cc9 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, * allowing direct removal of unexposed functions. */ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || - !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { + !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { return; } @@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, * allowing direct removal of unexposed functions. */ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || - !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { + !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { return ~0x0; } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index eb26cac810..fe04b4fafd 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -678,6 +678,11 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) } MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); -void pci_set_power(PCIDevice *pci_dev, bool state); +void pci_set_enabled(PCIDevice *pci_dev, bool state); + +static inline void pci_set_power(PCIDevice *pci_dev, bool state) +{ + pci_set_enabled(pci_dev, state); +} #endif diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index d3dd0f64b2..d57f9ce838 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -56,7 +56,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; struct PCIDevice { DeviceState qdev; bool partially_hotplugged; - bool has_power; + bool enabled; /* PCI config space */ uint8_t *config; From 723c5b4628d047e43825a046c6ee517b82b88117 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:51 +0900 Subject: [PATCH 77/85] hw/ppc/spapr_pci: Do not create DT for disabled PCI device Disabled means it is a disabled SR-IOV VF or it is powered off, and hidden from the guest. Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-2-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/ppc/spapr_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 7cf9904c35..f63182a03c 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1296,6 +1296,10 @@ static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev, return; } + if (!pdev->enabled) { + return; + } + err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset); if (err < 0) { p->err = err; From 26f86093ec989cb73ad03e8a234f5dc321e1e267 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:52 +0900 Subject: [PATCH 78/85] hw/ppc/spapr_pci: Do not reject VFs created after a PF A PF may automatically create VFs and the PF may be function 0. Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-3-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/ppc/spapr_pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index f63182a03c..ed4454bbf7 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1573,7 +1573,9 @@ static void spapr_pci_pre_plug(HotplugHandler *plug_handler, * hotplug, we do not allow functions to be hotplugged to a * slot that already has function 0 present */ - if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] && + if (plugged_dev->hotplugged && + !pci_is_vf(pdev) && + bus->devices[PCI_DEVFN(slotnr, 0)] && PCI_FUNC(pdev->devfn) != 0) { error_setg(errp, "PCI: slot %d function 0 already occupied by %s," " additional functions can no longer be exposed to guest.", From c613ad25125bf3016aa8f81ce170f5ac91d2379f Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:53 +0900 Subject: [PATCH 79/85] pcie_sriov: Do not manually unrealize A device gets automatically unrealized when being unparented. Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-4-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index e9b23221d7..499becd527 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -204,11 +204,7 @@ static void unregister_vfs(PCIDevice *dev) trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { - Error *err = NULL; PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) { - error_reportf_err(err, "Failed to unplug: "); - } object_unparent(OBJECT(vf)); object_unref(OBJECT(vf)); } From 77718701157f6ca77ea7a57b536fa0a22f676082 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:54 +0900 Subject: [PATCH 80/85] pcie_sriov: Ensure VF function number does not overflow pci_new() aborts when creating a VF with a function number equals to or is greater than PCI_DEVFN_MAX. Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-5-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/pcie_sriov.txt | 8 +++++--- hw/net/igb.c | 13 ++++++++++--- hw/nvme/ctrl.c | 24 ++++++++++++++++-------- hw/pci/pcie_sriov.c | 19 +++++++++++++++++-- include/hw/pci/pcie_sriov.h | 5 +++-- 5 files changed, 51 insertions(+), 18 deletions(-) diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt index a47aad0bfa..ab2142807f 100644 --- a/docs/pcie_sriov.txt +++ b/docs/pcie_sriov.txt @@ -52,9 +52,11 @@ setting up a BAR for a VF. ... /* Add and initialize the SR/IOV capability */ - pcie_sriov_pf_init(d, 0x200, "your_virtual_dev", - vf_devid, initial_vfs, total_vfs, - fun_offset, stride); + if (!pcie_sriov_pf_init(d, 0x200, "your_virtual_dev", + vf_devid, initial_vfs, total_vfs, + fun_offset, stride, errp)) { + return; + } /* Set up individual VF BARs (parameters as for normal BARs) */ pcie_sriov_pf_init_vf_bar( ... ) diff --git a/hw/net/igb.c b/hw/net/igb.c index b92bba402e..b6ca2f1b8a 100644 --- a/hw/net/igb.c +++ b/hw/net/igb.c @@ -446,9 +446,16 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp) pcie_ari_init(pci_dev, 0x150); - pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF, - IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS, - IGB_VF_OFFSET, IGB_VF_STRIDE); + if (!pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, + TYPE_IGBVF, IGB_82576_VF_DEV_ID, + IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS, + IGB_VF_OFFSET, IGB_VF_STRIDE, + errp)) { + pcie_cap_exit(pci_dev); + igb_cleanup_msix(s); + msi_uninit(pci_dev); + return; + } pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX, PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 127c3d2383..066389e391 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -8048,7 +8048,8 @@ out: return pow2ceil(bar_size); } -static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) +static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, + Error **errp) { uint16_t vf_dev_id = n->params.use_intel_id ? PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; @@ -8057,12 +8058,17 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) le16_to_cpu(cap->vifrsm), NULL, NULL); - pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, - n->params.sriov_max_vfs, n->params.sriov_max_vfs, - NVME_VF_OFFSET, NVME_VF_STRIDE); + if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, + n->params.sriov_max_vfs, n->params.sriov_max_vfs, + NVME_VF_OFFSET, NVME_VF_STRIDE, + errp)) { + return false; + } pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size); + + return true; } static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) @@ -8155,6 +8161,12 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) return false; } + if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs && + !nvme_init_sriov(n, pci_dev, 0x120, errp)) { + msix_uninit(pci_dev, &n->bar0, &n->bar0); + return false; + } + nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); if (n->params.cmb_size_mb) { @@ -8165,10 +8177,6 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) nvme_init_pmr(n, pci_dev); } - if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) { - nvme_init_sriov(n, pci_dev, 0x120); - } - return true; } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 499becd527..f0bde0d3fc 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -24,14 +24,27 @@ static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, uint16_t vf_num); static void unregister_vfs(PCIDevice *dev); -void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, const char *vfname, uint16_t vf_dev_id, uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride) + uint16_t vf_offset, uint16_t vf_stride, + Error **errp) { uint8_t *cfg = dev->config + offset; uint8_t *wmask; + if (total_vfs) { + uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI); + uint16_t first_vf_devfn = dev->devfn + vf_offset; + uint16_t last_vf_devfn = first_vf_devfn + vf_stride * (total_vfs - 1); + + if ((!ari_cap && PCI_SLOT(dev->devfn) != PCI_SLOT(last_vf_devfn)) || + last_vf_devfn >= PCI_DEVFN_MAX) { + error_setg(errp, "VF function number overflows"); + return false; + } + } + pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; @@ -69,6 +82,8 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553); qdev_prop_set_bit(&dev->qdev, "multifunction", true); + + return true; } void pcie_sriov_pf_exit(PCIDevice *dev) diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index 450cbef6c2..aa704e8f9d 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -27,10 +27,11 @@ typedef struct PCIESriovVF { uint16_t vf_number; /* Logical VF number of this function */ } PCIESriovVF; -void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, const char *vfname, uint16_t vf_dev_id, uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride); + uint16_t vf_offset, uint16_t vf_stride, + Error **errp); void pcie_sriov_pf_exit(PCIDevice *dev); /* Set up a VF bar in the SR/IOV bar area */ From 139610ae67f6ecf92127bb7bf53ac6265b459ec8 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:55 +0900 Subject: [PATCH 81/85] pcie_sriov: Reuse SR-IOV VF device instances Disable SR-IOV VF devices by reusing code to power down PCI devices instead of removing them when the guest requests to disable VFs. This allows to realize devices and report VF realization errors at PF realization time. Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-6-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 2 +- hw/pci/pcie_sriov.c | 95 ++++++++++++++++--------------------- include/hw/pci/pci.h | 5 -- include/hw/pci/pci_device.h | 15 ++++++ include/hw/pci/pcie_sriov.h | 1 - 5 files changed, 56 insertions(+), 62 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 68d30feb86..e32a69f3fa 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2895,7 +2895,7 @@ void pci_set_enabled(PCIDevice *d, bool state) memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_MASTER) && d->enabled); - if (!d->enabled) { + if (d->qdev.realized) { pci_device_reset(d); } } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index f0bde0d3fc..faadb0d2ea 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -20,9 +20,16 @@ #include "qapi/error.h" #include "trace.h" -static PCIDevice *register_vf(PCIDevice *pf, int devfn, - const char *name, uint16_t vf_num); -static void unregister_vfs(PCIDevice *dev); +static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) +{ + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + object_unparent(OBJECT(vf)); + object_unref(OBJECT(vf)); + } + g_free(dev->exp.sriov_pf.vf); + dev->exp.sriov_pf.vf = NULL; +} bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, const char *vfname, uint16_t vf_dev_id, @@ -30,6 +37,8 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, uint16_t vf_offset, uint16_t vf_stride, Error **errp) { + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; @@ -49,7 +58,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; dev->exp.sriov_pf.num_vfs = 0; - dev->exp.sriov_pf.vfname = g_strdup(vfname); dev->exp.sriov_pf.vf = NULL; pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); @@ -83,14 +91,34 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, qdev_prop_set_bit(&dev->qdev, "multifunction", true); + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); + + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = pci_new(devfn, vfname); + vf->exp.sriov_vf.pf = dev; + vf->exp.sriov_vf.vf_number = i; + + if (!qdev_realize(&vf->qdev, bus, errp)) { + unparent_vfs(dev, i); + return false; + } + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vf->config, 0xffff); + pci_config_set_device_id(vf->config, 0xffff); + + dev->exp.sriov_pf.vf[i] = vf; + devfn += vf_stride; + } + return true; } void pcie_sriov_pf_exit(PCIDevice *dev) { - unregister_vfs(dev); - g_free((char *)dev->exp.sriov_pf.vfname); - dev->exp.sriov_pf.vfname = NULL; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -156,38 +184,11 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, } } -static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, - uint16_t vf_num) -{ - PCIDevice *dev = pci_new(devfn, name); - dev->exp.sriov_vf.pf = pf; - dev->exp.sriov_vf.vf_number = vf_num; - PCIBus *bus = pci_get_bus(pf); - Error *local_err = NULL; - - qdev_realize(&dev->qdev, &bus->qbus, &local_err); - if (local_err) { - error_report_err(local_err); - return NULL; - } - - /* set vid/did according to sr/iov spec - they are not used */ - pci_config_set_vendor_id(dev->config, 0xffff); - pci_config_set_device_id(dev->config, 0xffff); - - return dev; -} - static void register_vfs(PCIDevice *dev) { uint16_t num_vfs; uint16_t i; uint16_t sriov_cap = dev->exp.sriov_cap; - uint16_t vf_offset = - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); - uint16_t vf_stride = - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - int32_t devfn = dev->devfn + vf_offset; assert(sriov_cap > 0); num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); @@ -195,18 +196,10 @@ static void register_vfs(PCIDevice *dev) return; } - dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); - trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { - dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, - dev->exp.sriov_pf.vfname, i); - if (!dev->exp.sriov_pf.vf[i]) { - num_vfs = i; - break; - } - devfn += vf_stride; + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); } dev->exp.sriov_pf.num_vfs = num_vfs; } @@ -219,12 +212,8 @@ static void unregister_vfs(PCIDevice *dev) trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { - PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - object_unparent(OBJECT(vf)); - object_unref(OBJECT(vf)); + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); } - g_free(dev->exp.sriov_pf.vf); - dev->exp.sriov_pf.vf = NULL; dev->exp.sriov_pf.num_vfs = 0; } @@ -246,14 +235,10 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, PCI_FUNC(dev->devfn), off, val, len); if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { - if (dev->exp.sriov_pf.num_vfs) { - if (!(val & PCI_SRIOV_CTRL_VFE)) { - unregister_vfs(dev); - } + if (val & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); } else { - if (val & PCI_SRIOV_CTRL_VFE) { - register_vfs(dev); - } + unregister_vfs(dev); } } } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index fe04b4fafd..14a869eeaa 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -680,9 +680,4 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); void pci_set_enabled(PCIDevice *pci_dev, bool state); -static inline void pci_set_power(PCIDevice *pci_dev, bool state) -{ - pci_set_enabled(pci_dev, state); -} - #endif diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index d57f9ce838..ca15132508 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -205,6 +205,21 @@ static inline uint16_t pci_get_bdf(PCIDevice *dev) return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn); } +static inline void pci_set_power(PCIDevice *pci_dev, bool state) +{ + /* + * Don't change the enabled state of VFs when powering on/off the device. + * + * When powering on, VFs must not be enabled immediately but they must + * wait until the guest configures SR-IOV. + * When powering off, their corresponding PFs will be reset and disable + * VFs. + */ + if (!pci_is_vf(pci_dev)) { + pci_set_enabled(pci_dev, state); + } +} + uint16_t pci_requester_id(PCIDevice *dev); /* DMA access functions */ diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index aa704e8f9d..70649236c1 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -18,7 +18,6 @@ typedef struct PCIESriovPF { uint16_t num_vfs; /* Number of virtual functions created */ uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ - const char *vfname; /* Reference to the device type used for the VFs */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ } PCIESriovPF; From 1a9bf009012e590cb166a4a9bae4bc18fb084d76 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:56 +0900 Subject: [PATCH 82/85] pcie_sriov: Release VFs failed to realize Release VFs failed to realize just as we do in unregister_vfs(). Fixes: 7c0fa8dff811 ("pcie: Add support for Single Root I/O Virtualization (SR/IOV)") Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-7-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index faadb0d2ea..9bd7f8acc3 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -99,6 +99,8 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, vf->exp.sriov_vf.vf_number = i; if (!qdev_realize(&vf->qdev, bus, errp)) { + object_unparent(OBJECT(vf)); + object_unref(vf); unparent_vfs(dev, i); return false; } From cbd9e5120bac3e292eee77b7a2e3692f235a1a26 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:57 +0900 Subject: [PATCH 83/85] pcie_sriov: Remove num_vfs from PCIESriovPF num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF instead. Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-8-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++-------- hw/pci/trace-events | 2 +- include/hw/pci/pcie_sriov.h | 1 - 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 9bd7f8acc3..fae6acea4a 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; - dev->exp.sriov_pf.num_vfs = 0; dev->exp.sriov_pf.vf = NULL; pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, } } +static void clear_ctrl_vfe(PCIDevice *dev) +{ + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); +} + static void register_vfs(PCIDevice *dev) { uint16_t num_vfs; @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev) assert(sriov_cap > 0); num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { + clear_ctrl_vfe(dev); return; } @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev) for (i = 0; i < num_vfs; i++) { pci_set_enabled(dev->exp.sriov_pf.vf[i], true); } - dev->exp.sriov_pf.num_vfs = num_vfs; } static void unregister_vfs(PCIDevice *dev) { - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; uint16_t i; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { pci_set_enabled(dev->exp.sriov_pf.vf[i], false); } - dev->exp.sriov_pf.num_vfs = 0; } void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, } else { unregister_vfs(dev); } + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { + clear_ctrl_vfe(dev); + unregister_vfs(dev); } } @@ -304,7 +311,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) { assert(!pci_is_vf(dev)); - if (n < dev->exp.sriov_pf.num_vfs) { + if (n < pcie_sriov_num_vfs(dev)) { return dev->exp.sriov_pf.vf[n]; } return NULL; @@ -312,5 +319,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) uint16_t pcie_sriov_num_vfs(PCIDevice *dev) { - return dev->exp.sriov_pf.num_vfs; + uint16_t sriov_cap = dev->exp.sriov_cap; + uint8_t *cfg = dev->config + sriov_cap; + + return sriov_cap && + (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ? + pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0; } diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 19643aa8c6..e98f575a9d 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask # hw/pci/pcie_sriov.c sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs" -sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs" +sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs" sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d" # pcie.c diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index 70649236c1..5148c5b77d 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -16,7 +16,6 @@ #include "hw/pci/pci.h" typedef struct PCIESriovPF { - uint16_t num_vfs; /* Number of virtual functions created */ uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ } PCIESriovPF; From 107a64b9a360cf5ca046852bc03334f7a9f22aef Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:58 +0900 Subject: [PATCH 84/85] pcie_sriov: Register VFs after migration pcie_sriov doesn't have code to restore its state after migration, but igb, which uses pcie_sriov, naively claimed its migration capability. Add code to register VFs after migration and fix igb migration. Fixes: 3a977deebe6b ("Intrdocue igb device emulation") Signed-off-by: Akihiko Odaki Message-Id: <20240627-reuse-v10-9-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 7 +++++++ hw/pci/pcie_sriov.c | 7 +++++++ include/hw/pci/pcie_sriov.h | 2 ++ 3 files changed, 16 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e32a69f3fa..fa85f87b1c 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -733,10 +733,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id) return !pci_is_express((PCIDevice *)opaque); } +static int pci_post_load(void *opaque, int version_id) +{ + pcie_sriov_pf_post_load(opaque); + return 0; +} + const VMStateDescription vmstate_pci_device = { .name = "PCIDevice", .version_id = 2, .minimum_version_id = 1, + .post_load = pci_post_load, .fields = (const VMStateField[]) { VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice, diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index fae6acea4a..56523ab4e8 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -252,6 +252,13 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, } } +void pcie_sriov_pf_post_load(PCIDevice *dev) +{ + if (dev->exp.sriov_cap) { + register_vfs(dev); + } +} + /* Reset SR/IOV */ void pcie_sriov_pf_reset(PCIDevice *dev) diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index 5148c5b77d..c5d2d318d3 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -57,6 +57,8 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize); void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, uint32_t val, int len); +void pcie_sriov_pf_post_load(PCIDevice *dev); + /* Reset SR/IOV */ void pcie_sriov_pf_reset(PCIDevice *dev); From 6a67577d8003428bdbeba61d32a9f8158f12624b Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 27 Jun 2024 15:07:59 +0900 Subject: [PATCH 85/85] hw/pci: Replace -1 with UINT32_MAX for romsize romsize is an uint32_t variable. Specifying -1 as an uint32_t value is obscure way to denote UINT32_MAX. Worse, if int is wider than 32-bit, it will change the behavior of a construct like the following: romsize = -1; if (romsize != -1) { ... } When -1 is assigned to romsize, -1 will be implicitly casted into uint32_t, resulting in UINT32_MAX. On contrary, when evaluating romsize != -1, romsize will be casted into int, and it will be a comparison of UINT32_MAX and -1, and result in false. Replace -1 with UINT32_MAX for statements involving the variable to clarify the intent and prevent potential breakage. Signed-off-by: Akihiko Odaki Reviewed-by: Markus Armbruster Message-Id: <20240627-reuse-v10-10-7ca0b8ed3d9f@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 8 ++++---- hw/xen/xen_pt_load_rom.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index fa85f87b1c..4c7be52951 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -70,7 +70,7 @@ static bool pcie_has_upstream_port(PCIDevice *dev); static Property pci_props[] = { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), DEFINE_PROP_STRING("romfile", PCIDevice, romfile), - DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, -1), + DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, UINT32_MAX), DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1), DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present, QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false), @@ -2073,7 +2073,7 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) g_cmp_uint32, NULL); } - if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) { + if (pci_dev->romsize != UINT32_MAX && !is_power_of_2(pci_dev->romsize)) { error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize); return; } @@ -2359,7 +2359,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, return; } - if (load_file || pdev->romsize == -1) { + if (load_file || pdev->romsize == UINT32_MAX) { path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); if (path == NULL) { path = g_strdup(pdev->romfile); @@ -2378,7 +2378,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, pdev->romfile); return; } - if (pdev->romsize != -1) { + if (pdev->romsize != UINT_MAX) { if (size > pdev->romsize) { error_setg(errp, "romfile \"%s\" (%u bytes) " "is too large for ROM size %u", diff --git a/hw/xen/xen_pt_load_rom.c b/hw/xen/xen_pt_load_rom.c index 03422a8a71..6bc64acd33 100644 --- a/hw/xen/xen_pt_load_rom.c +++ b/hw/xen/xen_pt_load_rom.c @@ -53,7 +53,7 @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, } fseek(fp, 0, SEEK_SET); - if (dev->romsize != -1) { + if (dev->romsize != UINT_MAX) { if (st.st_size > dev->romsize) { error_report("ROM BAR \"%s\" (%ld bytes) is too large for ROM size %u", rom_file, (long) st.st_size, dev->romsize);