From c3ec57e495b032047ddfef2075792340c407532a Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Tue, 10 Sep 2024 14:35:12 -0700 Subject: [PATCH 01/65] softmmu: Expand comments describing max_bounce_buffer_size Clarify how the parameter gets configured and how it is used when servicing DMA mapping requests targeting indirect memory regions. Signed-off-by: Mattias Nissler Message-Id: <20240910213512.843130-1-mnissler@rivosinc.com> Acked-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/exec/memory.h | 9 ++++++++- include/hw/pci/pci_device.h | 6 +++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index e5e865d1a9..9458e2801d 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1104,7 +1104,14 @@ struct AddressSpace { QTAILQ_HEAD(, MemoryListener) listeners; QTAILQ_ENTRY(AddressSpace) address_spaces_link; - /* Maximum DMA bounce buffer size used for indirect memory map requests */ + /* + * Maximum DMA bounce buffer size used for indirect memory map requests. + * This limits the total size of bounce buffer allocations made for + * DMA requests to indirect memory regions within this AddressSpace. DMA + * requests that exceed the limit (e.g. due to overly large requested size + * or concurrent DMA requests having claimed too much buffer space) will be + * rejected and left to the caller to handle. + */ size_t max_bounce_buffer_size; /* Total size of bounce buffers currently allocated, atomically accessed */ size_t bounce_buffer_size; diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index 91df40f989..8eaf0d58bb 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -168,7 +168,11 @@ struct PCIDevice { char *failover_pair_id; uint32_t acpi_index; - /* Maximum DMA bounce buffer size used for indirect memory map requests */ + /* + * Indirect DMA region bounce buffer size as configured for the device. This + * is a configuration parameter that is reflected into bus_master_as when + * realizing the device. + */ uint32_t max_bounce_buffer_size; }; From b87ea798eb83693286cb2db6606280431e02628d Mon Sep 17 00:00:00 2001 From: luzhixing12345 Date: Wed, 11 Sep 2024 14:04:00 +0800 Subject: [PATCH 02/65] docs: fix vhost-user protocol doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some editorial tweaks to the doc: Add a ref link to Memory region description and Multiple Memory region description. Descriptions about memory regions are merged into one line. Add extra type(64 bits) to Log description structure fields Fix ’s to 's Signed-off-by: luzhixing12345 Message-Id: <20240911060400.3472-1-luzhixing12345@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/interop/vhost-user.rst | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index d8419fd2f1..2e50f2ddfa 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -167,6 +167,8 @@ A vring address description Note that a ring address is an IOVA if ``VIRTIO_F_IOMMU_PLATFORM`` has been negotiated. Otherwise it is a user address. +.. _memory_region_description: + Memory region description ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -180,7 +182,7 @@ Memory region description :user address: a 64-bit user address -:mmap offset: 64-bit offset where region starts in the mapped memory +:mmap offset: a 64-bit offset where region starts in the mapped memory When the ``VHOST_USER_PROTOCOL_F_XEN_MMAP`` protocol feature has been successfully negotiated, the memory region description contains two extra @@ -190,7 +192,7 @@ fields at the end. | guest address | size | user address | mmap offset | xen mmap flags | domid | +---------------+------+--------------+-------------+----------------+-------+ -:xen mmap flags: 32-bit bit field +:xen mmap flags: a 32-bit bit field - Bit 0 is set for Xen foreign memory mapping. - Bit 1 is set for Xen grant memory mapping. @@ -211,7 +213,7 @@ Single memory region description :padding: 64-bit -A region is represented by Memory region description. +:region: region is represented by :ref:`Memory region description `. Multiple Memory regions description ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -224,7 +226,7 @@ Multiple Memory regions description :padding: 32-bit -A region is represented by Memory region description. +:regions: regions field contains 8 regions of type :ref:`Memory region description `. Log description ^^^^^^^^^^^^^^^ @@ -233,9 +235,9 @@ Log description | log size | log offset | +----------+------------+ -:log size: size of area used for logging +:log size: a 64-bit size of area used for logging -:log offset: offset from start of supplied file descriptor where +:log offset: a 64-bit offset from start of supplied file descriptor where logging starts (i.e. where guest address 0 would be logged) @@ -382,7 +384,7 @@ the kernel implementation. The communication consists of the *front-end* sending message requests and the *back-end* sending message replies. Most of the requests don't require -replies. Here is a list of the ones that do: +replies, except for the following requests: * ``VHOST_USER_GET_FEATURES`` * ``VHOST_USER_GET_PROTOCOL_FEATURES`` @@ -1239,11 +1241,11 @@ Front-end message types (*a vring descriptor index for split virtqueues* vs. *vring descriptor indices for packed virtqueues*). - When and as long as all of a device’s vrings are stopped, it is + When and as long as all of a device's vrings are stopped, it is *suspended*, see :ref:`Suspended device state `. - The request payload’s *num* field is currently reserved and must be + The request payload's *num* field is currently reserved and must be set to 0. ``VHOST_USER_SET_VRING_KICK`` @@ -1662,7 +1664,7 @@ Front-end message types :reply payload: ``u64`` Front-end and back-end negotiate a channel over which to transfer the - back-end’s internal state during migration. Either side (front-end or + back-end's internal state during migration. Either side (front-end or back-end) may create the channel. The nature of this channel is not restricted or defined in this document, but whichever side creates it must create a file descriptor that is provided to the respectively @@ -1714,7 +1716,7 @@ Front-end message types :request payload: N/A :reply payload: ``u64`` - After transferring the back-end’s internal state during migration (see + After transferring the back-end's internal state during migration (see the :ref:`Migrating back-end state ` section), check whether the back-end was able to successfully fully process the state. From 16c687d84574a1139a6475c33e3b9191f7932ac0 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:06 +0100 Subject: [PATCH 03/65] hw/acpi: Fix ordering of BDF in Generic Initiator PCI Device Handle. The ordering in ACPI specification [1] has bus number in the lowest byte. As ACPI tables are little endian this is the reverse of the ordering used by PCI_BUILD_BDF(). As a minimal fix split the QEMU BDF up into bus and devfn and write them as single bytes in the correct order. [1] ACPI Spec 6.3, Table 5.80 Fixes: 0a5b5acdf2d8 ("hw/acpi: Implement the SRAT GI affinity structure") Reviewed-by: Igor Mammedov Tested-by: "Huang, Ying" Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/acpi_generic_initiator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c index 17b9a052f5..3d2b567999 100644 --- a/hw/acpi/acpi_generic_initiator.c +++ b/hw/acpi/acpi_generic_initiator.c @@ -92,7 +92,8 @@ build_srat_generic_pci_initiator_affinity(GArray *table_data, int node, /* Device Handle - PCI */ build_append_int_noprefix(table_data, handle->segment, 2); - build_append_int_noprefix(table_data, handle->bdf, 2); + build_append_int_noprefix(table_data, PCI_BUS_NUM(handle->bdf), 1); + build_append_int_noprefix(table_data, PCI_BDF_TO_DEVFN(handle->bdf), 1); for (index = 0; index < 12; index++) { build_append_int_noprefix(table_data, 0, 1); } From dc55a90e295e05b6246b0b152a854612467e417f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:07 +0100 Subject: [PATCH 04/65] hw/acpi/GI: Fix trivial parameter alignment issue. Before making additional modification, tidy up this misleading indentation. Reviewed-by: Ankit Agrawal Reviewed-by: Igor Mammedov Tested-by: "Huang, Ying" Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/acpi_generic_initiator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c index 3d2b567999..4a02c19468 100644 --- a/hw/acpi/acpi_generic_initiator.c +++ b/hw/acpi/acpi_generic_initiator.c @@ -133,7 +133,7 @@ static int build_all_acpi_generic_initiators(Object *obj, void *opaque) dev_handle.segment = 0; dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), - pci_dev->devfn); + pci_dev->devfn); build_srat_generic_pci_initiator_affinity(table_data, gi->node, &dev_handle); From d8a4b4c3b4dd258c5f5f6237473349d377ea7fc9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:08 +0100 Subject: [PATCH 05/65] hw/acpi: Move AML building code for Generic Initiators to aml_build.c Rather than attempting to create a generic function with mess of the two different device handle types, use a PCI handle specific variant. If the ACPI handle form is needed then that can be introduced alongside this with little duplicated code. Drop the PCIDeviceHandle in favor of just passing the bus, devfn and segment directly. devfn kept as a single byte because ARI means that in this case it is just an 8 bit function number. Suggested-by: Igor Mammedov Link: https://lore.kernel.org/qemu-devel/20240618142333.102be976@imammedo.users.ipa.redhat.com/ Tested-by: "Huang, Ying" Reviewed-by: Igor Mammedov Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/acpi_generic_initiator.c | 39 ++------------------- hw/acpi/aml-build.c | 44 ++++++++++++++++++++++++ include/hw/acpi/acpi_generic_initiator.h | 23 ------------- include/hw/acpi/aml-build.h | 4 +++ 4 files changed, 51 insertions(+), 59 deletions(-) diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c index 4a02c19468..7665b16107 100644 --- a/hw/acpi/acpi_generic_initiator.c +++ b/hw/acpi/acpi_generic_initiator.c @@ -74,40 +74,11 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data) acpi_generic_initiator_set_node, NULL, NULL); } -/* - * ACPI 6.3: - * Table 5-78 Generic Initiator Affinity Structure - */ -static void -build_srat_generic_pci_initiator_affinity(GArray *table_data, int node, - PCIDeviceHandle *handle) -{ - uint8_t index; - - build_append_int_noprefix(table_data, 5, 1); /* Type */ - build_append_int_noprefix(table_data, 32, 1); /* Length */ - build_append_int_noprefix(table_data, 0, 1); /* Reserved */ - build_append_int_noprefix(table_data, 1, 1); /* Device Handle Type: PCI */ - build_append_int_noprefix(table_data, node, 4); /* Proximity Domain */ - - /* Device Handle - PCI */ - build_append_int_noprefix(table_data, handle->segment, 2); - build_append_int_noprefix(table_data, PCI_BUS_NUM(handle->bdf), 1); - build_append_int_noprefix(table_data, PCI_BDF_TO_DEVFN(handle->bdf), 1); - for (index = 0; index < 12; index++) { - build_append_int_noprefix(table_data, 0, 1); - } - - build_append_int_noprefix(table_data, GEN_AFFINITY_ENABLED, 4); /* Flags */ - build_append_int_noprefix(table_data, 0, 4); /* Reserved */ -} - static int build_all_acpi_generic_initiators(Object *obj, void *opaque) { MachineState *ms = MACHINE(qdev_get_machine()); AcpiGenericInitiator *gi; GArray *table_data = opaque; - PCIDeviceHandle dev_handle; PCIDevice *pci_dev; Object *o; @@ -130,13 +101,9 @@ static int build_all_acpi_generic_initiators(Object *obj, void *opaque) } pci_dev = PCI_DEVICE(o); - - dev_handle.segment = 0; - dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), - pci_dev->devfn); - - build_srat_generic_pci_initiator_affinity(table_data, - gi->node, &dev_handle); + build_srat_pci_generic_initiator(table_data, gi->node, 0, + pci_bus_num(pci_get_bus(pci_dev)), + pci_dev->devfn); return 0; } diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 34e0ddbde8..aeb4cb94e8 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1937,6 +1937,50 @@ void build_srat_memory(GArray *table_data, uint64_t base, build_append_int_noprefix(table_data, 0, 8); /* Reserved */ } +/* + * ACPI Spec Revision 6.3 + * Table 5-80 Device Handle - PCI + */ +static void build_append_srat_pci_device_handle(GArray *table_data, + uint16_t segment, + uint8_t bus, uint8_t devfn) +{ + /* PCI segment number */ + build_append_int_noprefix(table_data, segment, 2); + /* PCI Bus Device Function */ + build_append_int_noprefix(table_data, bus, 1); + build_append_int_noprefix(table_data, devfn, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 12); +} + +/* + * ACPI spec, Revision 6.3 + * 5.2.16.6 Generic Initiator Affinity Structure + * With PCI Device Handle. + */ +void build_srat_pci_generic_initiator(GArray *table_data, int node, + uint16_t segment, uint8_t bus, + uint8_t devfn) +{ + /* Type */ + build_append_int_noprefix(table_data, 5, 1); + /* Length */ + build_append_int_noprefix(table_data, 32, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); + /* Device Handle Type: PCI */ + build_append_int_noprefix(table_data, 1, 1); + /* Proximity Domain */ + build_append_int_noprefix(table_data, node, 4); + /* Device Handle */ + build_append_srat_pci_device_handle(table_data, segment, bus, devfn); + /* Flags - GI Enabled */ + build_append_int_noprefix(table_data, 1, 4); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); +} + /* * ACPI spec 5.2.17 System Locality Distance Information Table * (Revision 2.0 or later) diff --git a/include/hw/acpi/acpi_generic_initiator.h b/include/hw/acpi/acpi_generic_initiator.h index a304bad73e..7b98676713 100644 --- a/include/hw/acpi/acpi_generic_initiator.h +++ b/include/hw/acpi/acpi_generic_initiator.h @@ -19,29 +19,6 @@ typedef struct AcpiGenericInitiator { uint16_t node; } AcpiGenericInitiator; -/* - * ACPI 6.3: - * Table 5-81 Flags – Generic Initiator Affinity Structure - */ -typedef enum { - /* - * If clear, the OSPM ignores the contents of the Generic - * Initiator/Port Affinity Structure. This allows system firmware - * to populate the SRAT with a static number of structures, but only - * enable them as necessary. - */ - GEN_AFFINITY_ENABLED = (1 << 0), -} GenericAffinityFlags; - -/* - * ACPI 6.3: - * Table 5-80 Device Handle - PCI - */ -typedef struct PCIDeviceHandle { - uint16_t segment; - uint16_t bdf; -} PCIDeviceHandle; - void build_srat_generic_pci_initiator(GArray *table_data); #endif diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index a3784155cb..33eef85791 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -486,6 +486,10 @@ Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set, uint32_t io_offset, void build_srat_memory(GArray *table_data, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags); +void build_srat_pci_generic_initiator(GArray *table_data, int node, + uint16_t segment, uint8_t bus, + uint8_t devfn); + void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, const char *oem_id, const char *oem_table_id); From a20b6c8f0b279edf370bc20b7845f87c36d03f7a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:09 +0100 Subject: [PATCH 06/65] hw/acpi: Rename build_all_acpi_generic_initiators() to build_acpi_generic_initiator() Igor noted that this function only builds one instance, so was rather misleadingly named. Fix that. Suggested-by: Igor Mammedov Reviewed-by: Igor Mammedov Tested-by: "Huang, Ying" Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/acpi_generic_initiator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c index 7665b16107..73bafaaaea 100644 --- a/hw/acpi/acpi_generic_initiator.c +++ b/hw/acpi/acpi_generic_initiator.c @@ -74,7 +74,7 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data) acpi_generic_initiator_set_node, NULL, NULL); } -static int build_all_acpi_generic_initiators(Object *obj, void *opaque) +static int build_acpi_generic_initiator(Object *obj, void *opaque) { MachineState *ms = MACHINE(qdev_get_machine()); AcpiGenericInitiator *gi; @@ -111,6 +111,6 @@ static int build_all_acpi_generic_initiators(Object *obj, void *opaque) void build_srat_generic_pci_initiator(GArray *table_data) { object_child_foreach_recursive(object_get_root(), - build_all_acpi_generic_initiators, + build_acpi_generic_initiator, table_data); } From df9ac7254fd943c834f9666969b0852b50e91692 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:10 +0100 Subject: [PATCH 07/65] hw/pci: Add a busnr property to pci_props and use for acpi/gi Using a property allows us to hide the internal details of the PCI device from the code to build a SRAT Generic Initiator Affinity Structure with PCI Device Handle. Suggested-by: Igor Mammedov Reviewed-by: Igor Mammedov Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-6-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/acpi_generic_initiator.c | 14 +++++++++----- hw/pci/pci.c | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c index 73bafaaaea..365feb527f 100644 --- a/hw/acpi/acpi_generic_initiator.c +++ b/hw/acpi/acpi_generic_initiator.c @@ -9,6 +9,7 @@ #include "hw/boards.h" #include "hw/pci/pci_device.h" #include "qemu/error-report.h" +#include "qapi/error.h" typedef struct AcpiGenericInitiatorClass { ObjectClass parent_class; @@ -79,7 +80,8 @@ static int build_acpi_generic_initiator(Object *obj, void *opaque) MachineState *ms = MACHINE(qdev_get_machine()); AcpiGenericInitiator *gi; GArray *table_data = opaque; - PCIDevice *pci_dev; + int32_t devfn; + uint8_t bus; Object *o; if (!object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) { @@ -100,10 +102,12 @@ static int build_acpi_generic_initiator(Object *obj, void *opaque) exit(1); } - pci_dev = PCI_DEVICE(o); - build_srat_pci_generic_initiator(table_data, gi->node, 0, - pci_bus_num(pci_get_bus(pci_dev)), - pci_dev->devfn); + bus = object_property_get_uint(o, "busnr", &error_fatal); + devfn = object_property_get_int(o, "addr", &error_fatal); + /* devfn is constrained in PCI to be 8 bit but storage is an int32_t */ + assert(devfn >= 0 && devfn < PCI_DEVFN_MAX); + + build_srat_pci_generic_initiator(table_data, gi->node, 0, bus, devfn); return 0; } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 87da35ca9b..0b6bdaa0d7 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -67,6 +67,19 @@ static char *pcibus_get_fw_dev_path(DeviceState *dev); static void pcibus_reset_hold(Object *obj, ResetType type); static bool pcie_has_upstream_port(PCIDevice *dev); +static void prop_pci_busnr_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t busnr = pci_dev_bus_num(PCI_DEVICE(obj)); + + visit_type_uint8(v, name, &busnr, errp); +} + +static const PropertyInfo prop_pci_busnr = { + .name = "busnr", + .get = prop_pci_busnr_get, +}; + static Property pci_props[] = { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), DEFINE_PROP_STRING("romfile", PCIDevice, romfile), @@ -87,6 +100,7 @@ static Property pci_props[] = { QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice, max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE), + { .name = "busnr", .info = &prop_pci_busnr }, DEFINE_PROP_END_OF_LIST() }; From f74e78220dbfec557922eb6e8ec0a78d08743e02 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:11 +0100 Subject: [PATCH 08/65] acpi/pci: Move Generic Initiator object handling into acpi/pci.* Whilst ACPI SRAT Generic Initiator Afinity Structures are able to refer to both PCI and ACPI Device Handles, the QEMU implementation only implements the PCI Device Handle case. For now move the code into the existing hw/acpi/pci.c file and header. If support for ACPI Device Handles is added in the future, perhaps this will be moved again. Also push the struct AcpiGenericInitiator down into the c file as not used outside pci.c. Suggested-by: Igor Mammedov Tested-by: "Huang, Ying" Reviewed-by: Igor Mammedov Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-7-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/acpi_generic_initiator.c | 120 ---------------------- hw/acpi/meson.build | 1 - hw/acpi/pci.c | 124 +++++++++++++++++++++++ hw/arm/virt-acpi-build.c | 1 - hw/i386/acpi-build.c | 1 - include/hw/acpi/acpi_generic_initiator.h | 24 ----- include/hw/acpi/pci.h | 3 + 7 files changed, 127 insertions(+), 147 deletions(-) delete mode 100644 hw/acpi/acpi_generic_initiator.c delete mode 100644 include/hw/acpi/acpi_generic_initiator.h diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c deleted file mode 100644 index 365feb527f..0000000000 --- a/hw/acpi/acpi_generic_initiator.c +++ /dev/null @@ -1,120 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved - */ - -#include "qemu/osdep.h" -#include "hw/acpi/acpi_generic_initiator.h" -#include "hw/acpi/aml-build.h" -#include "hw/boards.h" -#include "hw/pci/pci_device.h" -#include "qemu/error-report.h" -#include "qapi/error.h" - -typedef struct AcpiGenericInitiatorClass { - ObjectClass parent_class; -} AcpiGenericInitiatorClass; - -OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, acpi_generic_initiator, - ACPI_GENERIC_INITIATOR, OBJECT, - { TYPE_USER_CREATABLE }, - { NULL }) - -OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR) - -static void acpi_generic_initiator_init(Object *obj) -{ - AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj); - - gi->node = MAX_NODES; - gi->pci_dev = NULL; -} - -static void acpi_generic_initiator_finalize(Object *obj) -{ - AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj); - - g_free(gi->pci_dev); -} - -static void acpi_generic_initiator_set_pci_device(Object *obj, const char *val, - Error **errp) -{ - AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj); - - gi->pci_dev = g_strdup(val); -} - -static void acpi_generic_initiator_set_node(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj); - MachineState *ms = MACHINE(qdev_get_machine()); - uint32_t value; - - if (!visit_type_uint32(v, name, &value, errp)) { - return; - } - - if (value >= MAX_NODES) { - error_printf("%s: Invalid NUMA node specified\n", - TYPE_ACPI_GENERIC_INITIATOR); - exit(1); - } - - gi->node = value; - ms->numa_state->nodes[gi->node].has_gi = true; -} - -static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data) -{ - object_class_property_add_str(oc, "pci-dev", NULL, - acpi_generic_initiator_set_pci_device); - object_class_property_add(oc, "node", "int", NULL, - acpi_generic_initiator_set_node, NULL, NULL); -} - -static int build_acpi_generic_initiator(Object *obj, void *opaque) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - AcpiGenericInitiator *gi; - GArray *table_data = opaque; - int32_t devfn; - uint8_t bus; - Object *o; - - if (!object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) { - return 0; - } - - gi = ACPI_GENERIC_INITIATOR(obj); - if (gi->node >= ms->numa_state->num_nodes) { - error_printf("%s: Specified node %d is invalid.\n", - TYPE_ACPI_GENERIC_INITIATOR, gi->node); - exit(1); - } - - o = object_resolve_path_type(gi->pci_dev, TYPE_PCI_DEVICE, NULL); - if (!o) { - error_printf("%s: Specified device must be a PCI device.\n", - TYPE_ACPI_GENERIC_INITIATOR); - exit(1); - } - - bus = object_property_get_uint(o, "busnr", &error_fatal); - devfn = object_property_get_int(o, "addr", &error_fatal); - /* devfn is constrained in PCI to be 8 bit but storage is an int32_t */ - assert(devfn >= 0 && devfn < PCI_DEVFN_MAX); - - build_srat_pci_generic_initiator(table_data, gi->node, 0, bus, devfn); - - return 0; -} - -void build_srat_generic_pci_initiator(GArray *table_data) -{ - object_child_foreach_recursive(object_get_root(), - build_acpi_generic_initiator, - table_data); -} diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index 7f8ccc9b7a..c8854f4d48 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -1,6 +1,5 @@ acpi_ss = ss.source_set() acpi_ss.add(files( - 'acpi_generic_initiator.c', 'acpi_interface.c', 'aml-build.c', 'bios-linker-loader.c', diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c index 20b70dcd81..3e1db161cc 100644 --- a/hw/acpi/pci.c +++ b/hw/acpi/pci.c @@ -24,8 +24,13 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qom/object_interfaces.h" +#include "qapi/error.h" +#include "hw/boards.h" #include "hw/acpi/aml-build.h" #include "hw/acpi/pci.h" +#include "hw/pci/pci_device.h" #include "hw/pci/pcie_host.h" /* @@ -59,3 +64,122 @@ void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info, acpi_table_end(linker, &table); } + +typedef struct AcpiGenericInitiator { + /* private */ + Object parent; + + /* public */ + char *pci_dev; + uint16_t node; +} AcpiGenericInitiator; + +typedef struct AcpiGenericInitiatorClass { + ObjectClass parent_class; +} AcpiGenericInitiatorClass; + +#define TYPE_ACPI_GENERIC_INITIATOR "acpi-generic-initiator" + +OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, acpi_generic_initiator, + ACPI_GENERIC_INITIATOR, OBJECT, + { TYPE_USER_CREATABLE }, + { NULL }) + +OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR) + +static void acpi_generic_initiator_init(Object *obj) +{ + AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj); + + gi->node = MAX_NODES; + gi->pci_dev = NULL; +} + +static void acpi_generic_initiator_finalize(Object *obj) +{ + AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj); + + g_free(gi->pci_dev); +} + +static void acpi_generic_initiator_set_pci_device(Object *obj, const char *val, + Error **errp) +{ + AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj); + + gi->pci_dev = g_strdup(val); +} + +static void acpi_generic_initiator_set_node(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj); + MachineState *ms = MACHINE(qdev_get_machine()); + uint32_t value; + + if (!visit_type_uint32(v, name, &value, errp)) { + return; + } + + if (value >= MAX_NODES) { + error_printf("%s: Invalid NUMA node specified\n", + TYPE_ACPI_GENERIC_INITIATOR); + exit(1); + } + + gi->node = value; + ms->numa_state->nodes[gi->node].has_gi = true; +} + +static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add_str(oc, "pci-dev", NULL, + acpi_generic_initiator_set_pci_device); + object_class_property_add(oc, "node", "int", NULL, + acpi_generic_initiator_set_node, NULL, NULL); +} + +static int build_acpi_generic_initiator(Object *obj, void *opaque) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + AcpiGenericInitiator *gi; + GArray *table_data = opaque; + int32_t devfn; + uint8_t bus; + Object *o; + + if (!object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) { + return 0; + } + + gi = ACPI_GENERIC_INITIATOR(obj); + if (gi->node >= ms->numa_state->num_nodes) { + error_printf("%s: Specified node %d is invalid.\n", + TYPE_ACPI_GENERIC_INITIATOR, gi->node); + exit(1); + } + + o = object_resolve_path_type(gi->pci_dev, TYPE_PCI_DEVICE, NULL); + if (!o) { + error_printf("%s: Specified device must be a PCI device.\n", + TYPE_ACPI_GENERIC_INITIATOR); + exit(1); + } + + bus = object_property_get_uint(o, "busnr", &error_fatal); + devfn = object_property_get_uint(o, "addr", &error_fatal); + /* devfn is constrained in PCI to be 8 bit but storage is an int32_t */ + assert(devfn >= 0 && devfn < PCI_DEVFN_MAX); + + build_srat_pci_generic_initiator(table_data, gi->node, 0, bus, devfn); + + return 0; +} + +void build_srat_generic_pci_initiator(GArray *table_data) +{ + object_child_foreach_recursive(object_get_root(), + build_acpi_generic_initiator, + table_data); +} diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index f76fb117ad..b5973c9148 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -57,7 +57,6 @@ #include "migration/vmstate.h" #include "hw/acpi/ghes.h" #include "hw/acpi/viot.h" -#include "hw/acpi/acpi_generic_initiator.h" #include "hw/virtio/virtio-acpi.h" #include "target/arm/multiprocessing.h" diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 4967aa7459..afb2fa2edc 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -68,7 +68,6 @@ #include "hw/acpi/utils.h" #include "hw/acpi/pci.h" #include "hw/acpi/cxl.h" -#include "hw/acpi/acpi_generic_initiator.h" #include "qom/qom-qobject.h" #include "hw/i386/amd_iommu.h" diff --git a/include/hw/acpi/acpi_generic_initiator.h b/include/hw/acpi/acpi_generic_initiator.h deleted file mode 100644 index 7b98676713..0000000000 --- a/include/hw/acpi/acpi_generic_initiator.h +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved - */ - -#ifndef ACPI_GENERIC_INITIATOR_H -#define ACPI_GENERIC_INITIATOR_H - -#include "qom/object_interfaces.h" - -#define TYPE_ACPI_GENERIC_INITIATOR "acpi-generic-initiator" - -typedef struct AcpiGenericInitiator { - /* private */ - Object parent; - - /* public */ - char *pci_dev; - uint16_t node; -} AcpiGenericInitiator; - -void build_srat_generic_pci_initiator(GArray *table_data); - -#endif diff --git a/include/hw/acpi/pci.h b/include/hw/acpi/pci.h index 467a99461c..3015a8171c 100644 --- a/include/hw/acpi/pci.h +++ b/include/hw/acpi/pci.h @@ -40,4 +40,7 @@ Aml *aml_pci_device_dsm(void); void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus); void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope); + +void build_srat_generic_pci_initiator(GArray *table_data); + #endif From 97b9cb066e5f10845b4bc4d2ec657deb1e73f910 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:12 +0100 Subject: [PATCH 09/65] hw/pci-bridge: Add acpi_uid property to TYPE_PXB_BUS Enable ACPI table creation for PCI Expander Bridges to be independent of PCI internals. Note that the UID is currently the PCI bus number. This is motivated by the forthcoming ACPI Generic Port SRAT entries which can be made completely independent of PCI internals. Suggested-by: Igor Mammedov Tested-by: "Huang, Ying" Reviewed-by: Igor Mammedov Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-8-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_expander_bridge.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index dfaea6cbf4..3d52ea5867 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -85,12 +85,25 @@ static uint16_t pxb_bus_numa_node(PCIBus *bus) return pxb->numa_node; } +static void prop_pxb_uid_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint32_t uid = pci_bus_num(PCI_BUS(obj)); + + visit_type_uint32(v, name, &uid, errp); +} + static void pxb_bus_class_init(ObjectClass *class, void *data) { PCIBusClass *pbc = PCI_BUS_CLASS(class); pbc->bus_num = pxb_bus_num; pbc->numa_node = pxb_bus_numa_node; + + object_class_property_add(class, "acpi_uid", "uint32", + prop_pxb_uid_get, NULL, NULL, NULL); + object_class_property_set_description(class, "acpi_uid", + "ACPI Unique ID used to distinguish this PCI Host Bridge / ACPI00016"); } static const TypeInfo pxb_bus_info = { From dc907b5cac14ef06f59963d697e81ff2516b9b3f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:13 +0100 Subject: [PATCH 10/65] hw/i386/acpi: Use TYPE_PXB_BUS property acpi_uid for DSDT Rather than relying on PCI internals, use the new acpi_property to obtain the ACPI _UID values. These are still the same as the PCI Bus numbers so no functional change. Suggested-by: Igor Mammedov Tested-by: "Huang, Ying" Reviewed-by: Igor Mammedov Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-9-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index afb2fa2edc..88227e343e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1475,6 +1475,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, QLIST_FOREACH(bus, &bus->child, sibling) { uint8_t bus_num = pci_bus_num(bus); uint8_t numa_node = pci_bus_numa_node(bus); + uint32_t uid; /* look only for expander root buses */ if (!pci_bus_is_root(bus)) { @@ -1485,6 +1486,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, root_bus_limit = bus_num - 1; } + uid = object_property_get_uint(OBJECT(bus), "acpi_uid", + &error_fatal); scope = aml_scope("\\_SB"); if (pci_bus_is_cxl(bus)) { @@ -1492,7 +1495,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } else { dev = aml_device("PC%.02X", bus_num); } - aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); + aml_append(dev, aml_name_decl("_UID", aml_int(uid))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); if (pci_bus_is_cxl(bus)) { struct Aml *aml_pkg = aml_package(2); From 43eb5e1f73f1b943d952d9776681a51f05ca7aa8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:10:14 +0100 Subject: [PATCH 11/65] hw/pci-host/gpex-acpi: Use acpi_uid property. Reduce the direct use of PCI internals inside ACPI table creation. Suggested-by: Igor Mammedov Tested-by: "Huang, Ying" Reviewed-by: Igor Mammedov Signed-off-by: Jonathan Cameron Message-Id: <20240916171017.1841767-10-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/gpex-acpi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index 391fabb8a8..e8b4c64c5f 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -141,6 +141,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) QLIST_FOREACH(bus, &bus->child, sibling) { uint8_t bus_num = pci_bus_num(bus); uint8_t numa_node = pci_bus_numa_node(bus); + uint32_t uid; bool is_cxl = pci_bus_is_cxl(bus); if (!pci_bus_is_root(bus)) { @@ -156,6 +157,8 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) nr_pcie_buses = bus_num; } + uid = object_property_get_uint(OBJECT(bus), "acpi_uid", + &error_fatal); dev = aml_device("PC%.02X", bus_num); if (is_cxl) { struct Aml *pkg = aml_package(2); @@ -168,7 +171,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03"))); } aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); - aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); + aml_append(dev, aml_name_decl("_UID", aml_int(uid))); aml_append(dev, aml_name_decl("_STR", aml_unicode("pxb Device"))); aml_append(dev, aml_name_decl("_CCA", aml_int(1))); if (numa_node != NUMA_NODE_UNASSIGNED) { From a82fe82916432091ca6fcbd7f357cccf35f6e80d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:41:22 +0100 Subject: [PATCH 12/65] hw/acpi: Generic Port Affinity Structure support These are very similar to the recently added Generic Initiators but instead of representing an initiator of memory traffic they represent an edge point beyond which may lie either targets or initiators. Here we add these ports such that they may be targets of hmat_lb records to describe the latency and bandwidth from host side initiators to the port. A discoverable mechanism such as UEFI CDAT read from CXL devices and switches is used to discover the remainder of the path, and the OS can build up full latency and bandwidth numbers as need for work and data placement decisions. Acked-by: Markus Armbruster Tested-by: "Huang, Ying" Signed-off-by: Jonathan Cameron Message-Id: <20240916174122.1843197-1-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/aml-build.c | 39 ++++++++++ hw/acpi/pci.c | 116 +++++++++++++++++++++++++++- hw/arm/virt-acpi-build.c | 2 +- hw/i386/acpi-build.c | 2 +- hw/pci-bridge/pci_expander_bridge.c | 1 - include/hw/acpi/aml-build.h | 3 + include/hw/acpi/pci.h | 2 +- include/hw/pci/pci_bridge.h | 1 + qapi/qom.json | 41 ++++++++++ 9 files changed, 202 insertions(+), 5 deletions(-) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index aeb4cb94e8..7a06ffec5a 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1954,6 +1954,19 @@ static void build_append_srat_pci_device_handle(GArray *table_data, build_append_int_noprefix(table_data, 0, 12); } +static void build_append_srat_acpi_device_handle(GArray *table_data, + const char *hid, + uint32_t uid) +{ + assert(strlen(hid) == 8); + /* Device Handle - ACPI */ + for (int i = 0; i < sizeof(hid); i++) { + build_append_int_noprefix(table_data, hid[i], 1); + } + build_append_int_noprefix(table_data, uid, 4); + build_append_int_noprefix(table_data, 0, 4); +} + /* * ACPI spec, Revision 6.3 * 5.2.16.6 Generic Initiator Affinity Structure @@ -1981,6 +1994,32 @@ void build_srat_pci_generic_initiator(GArray *table_data, int node, build_append_int_noprefix(table_data, 0, 4); } +/* + * ACPI spec, Revision 6.5 + * 5.2.16.7 Generic Port Affinity Structure + * With ACPI Device Handle. + */ +void build_srat_acpi_generic_port(GArray *table_data, uint32_t node, + const char *hid, uint32_t uid) +{ + /* Type */ + build_append_int_noprefix(table_data, 6, 1); + /* Length */ + build_append_int_noprefix(table_data, 32, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); + /* Device Handle Type: ACPI */ + build_append_int_noprefix(table_data, 0, 1); + /* Proximity Domain */ + build_append_int_noprefix(table_data, node, 4); + /* Device Handle */ + build_append_srat_acpi_device_handle(table_data, hid, uid); + /* Flags - GP Enabled */ + build_append_int_noprefix(table_data, 1, 4); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); +} + /* * ACPI spec 5.2.17 System Locality Distance Information Table * (Revision 2.0 or later) diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c index 3e1db161cc..d7a0e91f01 100644 --- a/hw/acpi/pci.c +++ b/hw/acpi/pci.c @@ -30,6 +30,7 @@ #include "hw/boards.h" #include "hw/acpi/aml-build.h" #include "hw/acpi/pci.h" +#include "hw/pci/pci_bridge.h" #include "hw/pci/pci_device.h" #include "hw/pci/pcie_host.h" @@ -177,9 +178,122 @@ static int build_acpi_generic_initiator(Object *obj, void *opaque) return 0; } -void build_srat_generic_pci_initiator(GArray *table_data) +typedef struct AcpiGenericPort { + /* private */ + Object parent; + + /* public */ + char *pci_bus; + uint32_t node; +} AcpiGenericPort; + +typedef struct AcpiGenericPortClass { + ObjectClass parent_class; +} AcpiGenericPortClass; + +#define TYPE_ACPI_GENERIC_PORT "acpi-generic-port" + +OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericPort, acpi_generic_port, + ACPI_GENERIC_PORT, OBJECT, + { TYPE_USER_CREATABLE }, + { NULL }) + +OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericPort, ACPI_GENERIC_PORT) + +static void acpi_generic_port_init(Object *obj) +{ + AcpiGenericPort *gp = ACPI_GENERIC_PORT(obj); + + gp->node = MAX_NODES; + gp->pci_bus = NULL; +} + +static void acpi_generic_port_finalize(Object *obj) +{ + AcpiGenericPort *gp = ACPI_GENERIC_PORT(obj); + + g_free(gp->pci_bus); +} + +static void acpi_generic_port_set_pci_bus(Object *obj, const char *val, + Error **errp) +{ + AcpiGenericPort *gp = ACPI_GENERIC_PORT(obj); + + gp->pci_bus = g_strdup(val); +} + +static void acpi_generic_port_set_node(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + AcpiGenericPort *gp = ACPI_GENERIC_PORT(obj); + uint32_t value; + + if (!visit_type_uint32(v, name, &value, errp)) { + return; + } + + if (value >= MAX_NODES) { + error_printf("%s: Invalid NUMA node specified\n", + TYPE_ACPI_GENERIC_INITIATOR); + exit(1); + } + + gp->node = value; +} + +static void acpi_generic_port_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add_str(oc, "pci-bus", NULL, + acpi_generic_port_set_pci_bus); + object_class_property_set_description(oc, "pci-bus", + "PCI Bus of the host bridge associated with this GP affinity structure"); + object_class_property_add(oc, "node", "int", NULL, + acpi_generic_port_set_node, NULL, NULL); + object_class_property_set_description(oc, "node", + "The NUMA node like ID to index HMAT/SLIT NUMA properties involving GP"); +} + +static int build_acpi_generic_port(Object *obj, void *opaque) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + const char *hid = "ACPI0016"; + GArray *table_data = opaque; + AcpiGenericPort *gp; + uint32_t uid; + Object *o; + + if (!object_dynamic_cast(obj, TYPE_ACPI_GENERIC_PORT)) { + return 0; + } + + gp = ACPI_GENERIC_PORT(obj); + + if (gp->node >= ms->numa_state->num_nodes) { + error_printf("%s: node %d is invalid.\n", + TYPE_ACPI_GENERIC_PORT, gp->node); + exit(1); + } + + o = object_resolve_path_type(gp->pci_bus, TYPE_PXB_CXL_BUS, NULL); + if (!o) { + error_printf("%s: device must be a CXL host bridge.\n", + TYPE_ACPI_GENERIC_PORT); + exit(1); + } + + uid = object_property_get_uint(o, "acpi_uid", &error_fatal); + build_srat_acpi_generic_port(table_data, gp->node, hid, uid); + + return 0; +} + +void build_srat_generic_affinity_structures(GArray *table_data) { object_child_foreach_recursive(object_get_root(), build_acpi_generic_initiator, table_data); + object_child_foreach_recursive(object_get_root(), build_acpi_generic_port, + table_data); } diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index b5973c9148..620992c92c 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -510,7 +510,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } } - build_srat_generic_pci_initiator(table_data); + build_srat_generic_affinity_structures(table_data); if (ms->nvdimms_state->is_enabled) { nvdimm_build_srat(table_data); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 88227e343e..d01e704162 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1973,7 +1973,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) build_srat_memory(table_data, 0, 0, 0, MEM_AFFINITY_NOFLAGS); } - build_srat_generic_pci_initiator(table_data); + build_srat_generic_affinity_structures(table_data); /* * Entry is required for Windows to enable memory hotplug in OS diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index 3d52ea5867..4578e03024 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -38,7 +38,6 @@ DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS, DECLARE_INSTANCE_CHECKER(PXBBus, PXB_PCIE_BUS, TYPE_PXB_PCIE_BUS) -#define TYPE_PXB_CXL_BUS "pxb-cxl-bus" DECLARE_INSTANCE_CHECKER(PXBBus, PXB_CXL_BUS, TYPE_PXB_CXL_BUS) diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 33eef85791..47a4692a7d 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -490,6 +490,9 @@ void build_srat_pci_generic_initiator(GArray *table_data, int node, uint16_t segment, uint8_t bus, uint8_t devfn); +void build_srat_acpi_generic_port(GArray *table_data, uint32_t node, + const char *hid, uint32_t uid); + void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, const char *oem_id, const char *oem_table_id); diff --git a/include/hw/acpi/pci.h b/include/hw/acpi/pci.h index 3015a8171c..6359d574fd 100644 --- a/include/hw/acpi/pci.h +++ b/include/hw/acpi/pci.h @@ -41,6 +41,6 @@ Aml *aml_pci_device_dsm(void); void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus); void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope); -void build_srat_generic_pci_initiator(GArray *table_data); +void build_srat_generic_affinity_structures(GArray *table_data); #endif diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 5cd452115a..5456e24883 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -102,6 +102,7 @@ typedef struct PXBPCIEDev { PXBDev parent_obj; } PXBPCIEDev; +#define TYPE_PXB_CXL_BUS "pxb-cxl-bus" #define TYPE_PXB_DEV "pxb" OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV) diff --git a/qapi/qom.json b/qapi/qom.json index 321ccd708a..a8beeabf1f 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -844,6 +844,45 @@ 'data': { 'pci-dev': 'str', 'node': 'uint32' } } +## +# @AcpiGenericPortProperties: +# +# Properties for acpi-generic-port objects. +# +# @pci-bus: QOM path of the PCI bus of the hostbridge associated with +# this SRAT Generic Port Affinity Structure. This is the same as +# the bus parameter for the root ports attached to this host +# bridge. The resulting SRAT Generic Port Affinity Structure will +# refer to the ACPI object in DSDT that represents the host bridge +# (e.g. ACPI0016 for CXL host bridges). See ACPI 6.5 Section +# 5.2.16.7 for more information. +# +# @node: Similar to a NUMA node ID, but instead of providing a +# reference point used for defining NUMA distances and access +# characteristics to memory or from an initiator (e.g. CPU), this +# node defines the boundary point between non-discoverable system +# buses which must be described by firmware, and a discoverable +# bus. NUMA distances and access characteristics are defined to +# and from that point. For system software to establish full +# initiator to target characteristics this information must be +# combined with information retrieved from the discoverable part +# of the path. An example would use CDAT (see UEFI.org) +# information read from devices and switches in conjunction with +# link characteristics read from PCIe Configuration space. +# To get the full path latency from CPU to CXL attached DRAM +# CXL device: Add the latency from CPU to Generic Port (from +# HMAT indexed via the the node ID in this SRAT structure) to +# that for CXL bus links, the latency across intermediate switches +# and from the EP port to the actual memory. Bandwidth is more +# complex as there may be interleaving across multiple devices +# and shared links in the path. +# +# Since: 9.1 +## +{ 'struct': 'AcpiGenericPortProperties', + 'data': { 'pci-bus': 'str', + 'node': 'uint32' } } + ## # @RngProperties: # @@ -1043,6 +1082,7 @@ { 'enum': 'ObjectType', 'data': [ 'acpi-generic-initiator', + 'acpi-generic-port', 'authz-list', 'authz-listfile', 'authz-pam', @@ -1118,6 +1158,7 @@ 'discriminator': 'qom-type', 'data': { 'acpi-generic-initiator': 'AcpiGenericInitiatorProperties', + 'acpi-generic-port': 'AcpiGenericPortProperties', 'authz-list': 'AuthZListProperties', 'authz-listfile': 'AuthZListFileProperties', 'authz-pam': 'AuthZPAMProperties', From cf2181aef23e7f145e8fe7b8395694d32b115ae5 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:42:37 +0100 Subject: [PATCH 13/65] hw/acpi: Make storage of node id uint32_t to reduce fragility >From review of generic port introduction. The value is handled as a uint32_t so store it in that type. The value cannot in reality exceed MAX_NODES which is currently 128 but if the types are matched there is no need to rely on that restriction. Signed-off-by: Jonathan Cameron Message-Id: <20240916174237.1843213-1-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/aml-build.c | 2 +- hw/acpi/pci.c | 2 +- include/hw/acpi/aml-build.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 7a06ffec5a..6a76626177 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1972,7 +1972,7 @@ static void build_append_srat_acpi_device_handle(GArray *table_data, * 5.2.16.6 Generic Initiator Affinity Structure * With PCI Device Handle. */ -void build_srat_pci_generic_initiator(GArray *table_data, int node, +void build_srat_pci_generic_initiator(GArray *table_data, uint32_t node, uint16_t segment, uint8_t bus, uint8_t devfn) { diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c index d7a0e91f01..a4835ce563 100644 --- a/hw/acpi/pci.c +++ b/hw/acpi/pci.c @@ -72,7 +72,7 @@ typedef struct AcpiGenericInitiator { /* public */ char *pci_dev; - uint16_t node; + uint32_t node; } AcpiGenericInitiator; typedef struct AcpiGenericInitiatorClass { diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 47a4692a7d..4fd5da49e7 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -486,7 +486,7 @@ Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set, uint32_t io_offset, void build_srat_memory(GArray *table_data, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags); -void build_srat_pci_generic_initiator(GArray *table_data, int node, +void build_srat_pci_generic_initiator(GArray *table_data, uint32_t node, uint16_t segment, uint8_t bus, uint8_t devfn); From df37d496981344c24746be3553d7f6d8a0a9b1b9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:43:21 +0100 Subject: [PATCH 14/65] hw/acpi: Generic Initiator - add missing object class property descriptions. >From review of the Generic Ports support. These properties had no description set so add one. Signed-off-by: Jonathan Cameron Message-Id: <20240916174321.1843228-1-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c index a4835ce563..f88f450af3 100644 --- a/hw/acpi/pci.c +++ b/hw/acpi/pci.c @@ -137,8 +137,12 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data) { object_class_property_add_str(oc, "pci-dev", NULL, acpi_generic_initiator_set_pci_device); + object_class_property_set_description(oc, "pci-dev", + "PCI device to associate with the node"); object_class_property_add(oc, "node", "int", NULL, acpi_generic_initiator_set_node, NULL, NULL); + object_class_property_set_description(oc, "node", + "NUMA node associated with the PCI device"); } static int build_acpi_generic_initiator(Object *obj, void *opaque) From 1478b5609022ed4331bff83d06cefed983df82ac Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:35:13 +0100 Subject: [PATCH 15/65] hw/pci-bridge/cxl_root_port: Provide x-speed and x-width properties. Approach copied from gen_pcie_root_port.c Previously the link defaulted to a maximum of 2.5GT/s and 1x. Enable setting it's maximum values. The actual value after 'training' will depend on the downstream device configuration. Signed-off-by: Jonathan Cameron Message-Id: <20240916173518.1843023-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Fan Ni --- hw/pci-bridge/cxl_root_port.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c index 2dd10239bd..5e2156d7ba 100644 --- a/hw/pci-bridge/cxl_root_port.c +++ b/hw/pci-bridge/cxl_root_port.c @@ -24,6 +24,7 @@ #include "hw/pci/pcie_port.h" #include "hw/pci/msi.h" #include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" #include "hw/sysbus.h" #include "qapi/error.h" #include "hw/cxl/cxl.h" @@ -206,6 +207,10 @@ static Property gen_rp_props[] = { -1), DEFINE_PROP_SIZE("pref64-reserve", CXLRootPort, res_reserve.mem_pref_64, -1), + DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot, + speed, PCIE_LINK_SPEED_64), + DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, + width, PCIE_LINK_WIDTH_32), DEFINE_PROP_END_OF_LIST() }; From 845f94de78cb6c063234176ff7c0ac8e430d19fe Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:35:14 +0100 Subject: [PATCH 16/65] hw/pci-bridge/cxl_upstream: Provide x-speed and x-width properties. Copied from gen_pcie_root_port.c Drop the previous code that ensured a valid value in s->width, s->speed as now a default is provided so this will always be set. Note this changes the default settings but it is unlikely to have a negative effect on software as will only affect ports with now downstream device. All other ports will use the settings from that device. Signed-off-by: Jonathan Cameron Message-Id: <20240916173518.1843023-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/cxl_downstream.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c index 4b42984360..c347ac06f3 100644 --- a/hw/pci-bridge/cxl_downstream.c +++ b/hw/pci-bridge/cxl_downstream.c @@ -13,6 +13,8 @@ #include "hw/pci/msi.h" #include "hw/pci/pcie.h" #include "hw/pci/pcie_port.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" #include "hw/cxl/cxl.h" #include "qapi/error.h" @@ -210,24 +212,20 @@ static void cxl_dsp_exitfn(PCIDevice *d) pci_bridge_exitfn(d); } -static void cxl_dsp_instance_post_init(Object *obj) -{ - PCIESlot *s = PCIE_SLOT(obj); - - if (!s->speed) { - s->speed = QEMU_PCI_EXP_LNK_2_5GT; - } - - if (!s->width) { - s->width = QEMU_PCI_EXP_LNK_X1; - } -} +static Property cxl_dsp_props[] = { + DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot, + speed, PCIE_LINK_SPEED_64), + DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, + width, PCIE_LINK_WIDTH_16), + DEFINE_PROP_END_OF_LIST() +}; static void cxl_dsp_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PCIDeviceClass *k = PCI_DEVICE_CLASS(oc); + device_class_set_props(dc, cxl_dsp_props); k->config_write = cxl_dsp_config_write; k->realize = cxl_dsp_realize; k->exit = cxl_dsp_exitfn; @@ -243,7 +241,6 @@ static const TypeInfo cxl_dsp_info = { .name = TYPE_CXL_DSP, .instance_size = sizeof(CXLDownstreamPort), .parent = TYPE_PCIE_SLOT, - .instance_post_init = cxl_dsp_instance_post_init, .class_init = cxl_dsp_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, From 6d1bda91337dcd0e7bf78da6f6b15af497966052 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:35:15 +0100 Subject: [PATCH 17/65] hw/pcie: Factor out PCI Express link register filling common to EP. Whilst not all link related registers are common between RP / Switch DSP and EP / Switch USP many of them are. Factor that group out to save on duplication when adding EP / Swtich USP configurability. Signed-off-by: Jonathan Cameron Message-Id: <20240916173518.1843023-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 91 ++++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 41 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 4b2f0805c6..1ac6d89dcf 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -105,6 +105,55 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) pci_set_word(cmask + PCI_EXP_LNKSTA, 0); } +/* Includes setting the target speed default */ +static void pcie_cap_fill_lnk(uint8_t *exp_cap, PCIExpLinkWidth width, + PCIExpLinkSpeed speed) +{ + /* Clear and fill LNKCAP from what was configured above */ + pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + QEMU_PCI_EXP_LNKCAP_MLW(width) | + QEMU_PCI_EXP_LNKCAP_MLS(speed)); + + if (speed > QEMU_PCI_EXP_LNK_2_5GT) { + /* + * Target Link Speed defaults to the highest link speed supported by + * the component. 2.5GT/s devices are permitted to hardwire to zero. + */ + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2, + QEMU_PCI_EXP_LNKCAP_MLS(speed) & + PCI_EXP_LNKCTL2_TLS); + } + + /* + * 2.5 & 5.0GT/s can be fully described by LNKCAP, but 8.0GT/s is + * actually a reference to the highest bit supported in this register. + * We assume the device supports all link speeds. + */ + if (speed > QEMU_PCI_EXP_LNK_5GT) { + pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_2_5GB | + PCI_EXP_LNKCAP2_SLS_5_0GB | + PCI_EXP_LNKCAP2_SLS_8_0GB); + if (speed > QEMU_PCI_EXP_LNK_8GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_16_0GB); + } + if (speed > QEMU_PCI_EXP_LNK_16GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_32_0GB); + } + if (speed > QEMU_PCI_EXP_LNK_32GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_64_0GB); + } + } +} + static void pcie_cap_fill_slot_lnk(PCIDevice *dev) { PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT); @@ -115,13 +164,6 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) return; } - /* Clear and fill LNKCAP from what was configured above */ - pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, - PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, - QEMU_PCI_EXP_LNKCAP_MLW(s->width) | - QEMU_PCI_EXP_LNKCAP_MLS(s->speed)); - /* * Link bandwidth notification is required for all root ports and * downstream ports supporting links wider than x1 or multiple link @@ -144,42 +186,9 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, PCI_EXP_LNKCAP_DLLLARC); /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */ - - /* - * Target Link Speed defaults to the highest link speed supported by - * the component. 2.5GT/s devices are permitted to hardwire to zero. - */ - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2, - PCI_EXP_LNKCTL2_TLS); - pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2, - QEMU_PCI_EXP_LNKCAP_MLS(s->speed) & - PCI_EXP_LNKCTL2_TLS); } - /* - * 2.5 & 5.0GT/s can be fully described by LNKCAP, but 8.0GT/s is - * actually a reference to the highest bit supported in this register. - * We assume the device supports all link speeds. - */ - if (s->speed > QEMU_PCI_EXP_LNK_5GT) { - pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U); - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, - PCI_EXP_LNKCAP2_SLS_2_5GB | - PCI_EXP_LNKCAP2_SLS_5_0GB | - PCI_EXP_LNKCAP2_SLS_8_0GB); - if (s->speed > QEMU_PCI_EXP_LNK_8GT) { - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, - PCI_EXP_LNKCAP2_SLS_16_0GB); - } - if (s->speed > QEMU_PCI_EXP_LNK_16GT) { - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, - PCI_EXP_LNKCAP2_SLS_32_0GB); - } - if (s->speed > QEMU_PCI_EXP_LNK_32GT) { - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, - PCI_EXP_LNKCAP2_SLS_64_0GB); - } - } + pcie_cap_fill_lnk(exp_cap, s->width, s->speed); } int pcie_cap_init(PCIDevice *dev, uint8_t offset, From ea3f0ebc1a3ba380e682ea8aad38f8e8cbc0d6f7 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:35:16 +0100 Subject: [PATCH 18/65] hw/pcie: Provide a utility function for control of EP / SW USP link Whilst similar to existing PCIESlot link configuration a few registers need to be set differently so that the downstream device presents a 'configured' state that is then used to 'train' the upstream port on the link. Basically that means setting the status register to reflect it succeeding in training up to target settings. Signed-off-by: Jonathan Cameron Message-Id: <20240916173518.1843023-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 18 ++++++++++++++++++ include/hw/pci/pcie.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 1ac6d89dcf..2738dbb28d 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -154,6 +154,24 @@ static void pcie_cap_fill_lnk(uint8_t *exp_cap, PCIExpLinkWidth width, } } +void pcie_cap_fill_link_ep_usp(PCIDevice *dev, PCIExpLinkWidth width, + PCIExpLinkSpeed speed) +{ + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + + /* + * For an end point or USP need to set the current status as well + * as the capabilities. + */ + pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + QEMU_PCI_EXP_LNKSTA_NLW(width) | + QEMU_PCI_EXP_LNKSTA_CLS(speed)); + + pcie_cap_fill_lnk(exp_cap, width, speed); +} + static void pcie_cap_fill_slot_lnk(PCIDevice *dev) { PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT); diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index 5eddb90976..b8d59732bc 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -141,6 +141,8 @@ void pcie_acs_reset(PCIDevice *dev); void pcie_ari_init(PCIDevice *dev, uint16_t offset); void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num); void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned); +void pcie_cap_fill_link_ep_usp(PCIDevice *dev, PCIExpLinkWidth width, + PCIExpLinkSpeed speed); void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); From 14bd0f3865489d537a93b7c80617622473f224e4 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:35:17 +0100 Subject: [PATCH 19/65] hw/mem/cxl-type3: Add properties to control link speed and width To establish performance characteristics of a CXL device when used via a particular CXL topology (root ports, switches, end points) it is necessary to set the appropriate link speed and width in the PCI Express capability structure. Provide x-speed and x-link properties for this. Signed-off-by: Jonathan Cameron Message-Id: <20240916173518.1843023-6-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 6 ++++++ include/hw/cxl/cxl_device.h | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 235ac40aeb..44d491d8f6 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -17,6 +17,7 @@ #include "hw/mem/pc-dimm.h" #include "hw/pci/pci.h" #include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" #include "qapi/error.h" #include "qemu/log.h" #include "qemu/module.h" @@ -1200,6 +1201,7 @@ static void ct3d_reset(DeviceState *dev) uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers; uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask; + pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed); cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE); cxl_device_register_init_t3(ct3d); @@ -1229,6 +1231,10 @@ static Property ct3_props[] = { DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0), DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc, TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_PCIE_LINK_SPEED("x-speed", CXLType3Dev, + speed, PCIE_LINK_SPEED_32), + DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev, + width, PCIE_LINK_WIDTH_16), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index fdd0f4e62b..e14e56ae4b 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -549,6 +549,10 @@ struct CXLType3Dev { CXLCCI vdm_fm_owned_ld_mctp_cci; CXLCCI ld0_cci; + /* PCIe link characteristics */ + PCIExpLinkSpeed speed; + PCIExpLinkWidth width; + /* DOE */ DOECap doe_cdat; From fa19fe4e3a61765ff60914ee00fc1e7a6a38dba9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 16 Sep 2024 18:35:18 +0100 Subject: [PATCH 20/65] hw/pci-bridge/cxl-upstream: Add properties to control link speed and width To establish performance characteristics of a CXL device when used via a particular CXL topology (root ports, switches, end points) it is necessary to set the appropriate link speed and width in the PCI Express capability structure. Provide x-speed and x-link properties for this. Signed-off-by: Jonathan Cameron Message-Id: <20240916173518.1843023-7-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/cxl_upstream.c | 6 ++++++ include/hw/pci-bridge/cxl_upstream_port.h | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c index a5a39cc524..55f8b0053f 100644 --- a/hw/pci-bridge/cxl_upstream.c +++ b/hw/pci-bridge/cxl_upstream.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" #include "hw/pci/msi.h" #include "hw/pci/pcie.h" #include "hw/pci/pcie_port.h" @@ -100,6 +101,7 @@ static void cxl_usp_reset(DeviceState *qdev) pci_bridge_reset(qdev); pcie_cap_deverr_reset(d); + pcie_cap_fill_link_ep_usp(d, usp->width, usp->speed); latch_registers(usp); } @@ -363,6 +365,10 @@ static void cxl_usp_exitfn(PCIDevice *d) static Property cxl_upstream_props[] = { DEFINE_PROP_UINT64("sn", CXLUpstreamPort, sn, UI64_NULL), DEFINE_PROP_STRING("cdat", CXLUpstreamPort, cxl_cstate.cdat.filename), + DEFINE_PROP_PCIE_LINK_SPEED("x-speed", CXLUpstreamPort, + speed, PCIE_LINK_SPEED_32), + DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLUpstreamPort, + width, PCIE_LINK_WIDTH_16), DEFINE_PROP_END_OF_LIST() }; diff --git a/include/hw/pci-bridge/cxl_upstream_port.h b/include/hw/pci-bridge/cxl_upstream_port.h index 12635139f6..f208397ffe 100644 --- a/include/hw/pci-bridge/cxl_upstream_port.h +++ b/include/hw/pci-bridge/cxl_upstream_port.h @@ -12,6 +12,10 @@ typedef struct CXLUpstreamPort { /*< public >*/ CXLComponentState cxl_cstate; CXLCCI swcci; + + PCIExpLinkSpeed speed; + PCIExpLinkWidth width; + DOECap doe_cdat; uint64_t sn; } CXLUpstreamPort; From 9e4cc917e0be9c757d834a0e40c66def1fed5adc Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 20 Sep 2024 12:49:34 +0300 Subject: [PATCH 21/65] qdev-monitor: add option to report GenericError from find_device_state Here we just prepare for the following patch, making possible to report GenericError as recommended. This patch doesn't aim to prevent further use of DeviceNotFound by future interfaces: - find_device_state() is used in blk_by_qdev_id() and qmp_get_blk() functions, which may lead to spread of DeviceNotFound anyway - also, nothing prevent simply copy-pasting find_device_state() calls with false argument Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Markus Armbruster Acked-by: Raphael Norwitz Message-Id: <20240920094936.450987-2-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- system/qdev-monitor.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c index 44994ea0e1..6671137a91 100644 --- a/system/qdev-monitor.c +++ b/system/qdev-monitor.c @@ -885,13 +885,20 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) object_unref(OBJECT(dev)); } -static DeviceState *find_device_state(const char *id, Error **errp) +/* + * Note that creating new APIs using error classes other than GenericError is + * not recommended. Set use_generic_error=true for new interfaces. + */ +static DeviceState *find_device_state(const char *id, bool use_generic_error, + Error **errp) { Object *obj = object_resolve_path_at(qdev_get_peripheral(), id); DeviceState *dev; if (!obj) { - error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + error_set(errp, + (use_generic_error ? + ERROR_CLASS_GENERIC_ERROR : ERROR_CLASS_DEVICE_NOT_FOUND), "Device '%s' not found", id); return NULL; } @@ -956,7 +963,7 @@ void qdev_unplug(DeviceState *dev, Error **errp) void qmp_device_del(const char *id, Error **errp) { - DeviceState *dev = find_device_state(id, errp); + DeviceState *dev = find_device_state(id, false, errp); if (dev != NULL) { if (dev->pending_deleted_event && (dev->pending_deleted_expires_ms == 0 || @@ -1076,7 +1083,7 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp) GLOBAL_STATE_CODE(); - dev = find_device_state(id, errp); + dev = find_device_state(id, false, errp); if (dev == NULL) { return NULL; } From 4dfa12731439c4a3cbfd9d1767acddfbf79549fd Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 20 Sep 2024 12:49:35 +0300 Subject: [PATCH 22/65] vhost-user-blk: split vhost_user_blk_sync_config() Split vhost_user_blk_sync_config() out from vhost_user_blk_handle_config_change(), to be reused in the following commit. Signed-off-by: Vladimir Sementsov-Ogievskiy Acked-by: Raphael Norwitz Message-Id: <20240920094936.450987-3-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- hw/block/vhost-user-blk.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 5b7f46bbb0..48b3dabb8d 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -90,27 +90,39 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) s->blkcfg.wce = blkcfg->wce; } +static int vhost_user_blk_sync_config(DeviceState *dev, Error **errp) +{ + int ret; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, + vdev->config_len, errp); + if (ret < 0) { + return ret; + } + + memcpy(vdev->config, &s->blkcfg, vdev->config_len); + virtio_notify_config(vdev); + + return 0; +} + static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) { int ret; - VirtIODevice *vdev = dev->vdev; - VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); Error *local_err = NULL; if (!dev->started) { return 0; } - ret = vhost_dev_get_config(dev, (uint8_t *)&s->blkcfg, - vdev->config_len, &local_err); + ret = vhost_user_blk_sync_config(DEVICE(dev->vdev), &local_err); if (ret < 0) { error_report_err(local_err); return ret; } - memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len); - virtio_notify_config(dev->vdev); - return 0; } From 3f98408e2e4fb1792102aed2cd5425aa0e34cc9c Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 20 Sep 2024 12:49:36 +0300 Subject: [PATCH 23/65] qapi: introduce device-sync-config Add command to sync config from vhost-user backend to the device. It may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not triggered interrupt to the guest or just not available (not supported by vhost-user server). Command result is racy if allow it during migration. Let's not allow that. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Markus Armbruster Acked-by: Raphael Norwitz Message-Id: <20240920094936.450987-4-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 1 + hw/virtio/virtio-pci.c | 9 +++++++++ include/hw/qdev-core.h | 6 ++++++ qapi/qdev.json | 24 ++++++++++++++++++++++++ system/qdev-monitor.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 48b3dabb8d..7996e49821 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -591,6 +591,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, vhost_user_blk_properties); dc->vmsd = &vmstate_vhost_user_blk; + dc->sync_config = vhost_user_blk_sync_config; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = vhost_user_blk_device_realize; vdc->unrealize = vhost_user_blk_device_unrealize; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 4d832fe845..c5a809b956 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2385,6 +2385,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) vpciklass->parent_dc_realize(qdev, errp); } +static int virtio_pci_sync_config(DeviceState *dev, Error **errp) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + + return qdev_sync_config(DEVICE(vdev), errp); +} + static void virtio_pci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -2401,6 +2409,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data) device_class_set_parent_realize(dc, virtio_pci_dc_realize, &vpciklass->parent_dc_realize); rc->phases.hold = virtio_pci_bus_reset_hold; + dc->sync_config = virtio_pci_sync_config; } static const TypeInfo virtio_pci_info = { diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index aa97c34a4b..94914858d8 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -95,6 +95,7 @@ typedef void (*DeviceUnrealize)(DeviceState *dev); typedef void (*DeviceReset)(DeviceState *dev); typedef void (*BusRealize)(BusState *bus, Error **errp); typedef void (*BusUnrealize)(BusState *bus); +typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp); /** * struct DeviceClass - The base class for all devices. @@ -103,6 +104,9 @@ typedef void (*BusUnrealize)(BusState *bus); * property is changed to %true. * @unrealize: Callback function invoked when the #DeviceState:realized * property is changed to %false. + * @sync_config: Callback function invoked when QMP command device-sync-config + * is called. Should synchronize device configuration from host to guest part + * and notify the guest about the change. * @hotpluggable: indicates if #DeviceClass is hotpluggable, available * as readonly "hotpluggable" property of #DeviceState instance * @@ -162,6 +166,7 @@ struct DeviceClass { DeviceReset legacy_reset; DeviceRealize realize; DeviceUnrealize unrealize; + DeviceSyncConfig sync_config; /** * @vmsd: device state serialisation description for @@ -547,6 +552,7 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp); */ HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev); void qdev_unplug(DeviceState *dev, Error **errp); +int qdev_sync_config(DeviceState *dev, Error **errp); void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void qdev_machine_creation_done(void); diff --git a/qapi/qdev.json b/qapi/qdev.json index 53d147c7b4..2a581129c9 100644 --- a/qapi/qdev.json +++ b/qapi/qdev.json @@ -163,3 +163,27 @@ ## { 'event': 'DEVICE_UNPLUG_GUEST_ERROR', 'data': { '*device': 'str', 'path': 'str' } } + +## +# @device-sync-config: +# +# Synchronize device configuration from host to guest part. First, +# copy the configuration from the host part (backend) to the guest +# part (frontend). Then notify guest software that device +# configuration changed. +# +# The command may be used to notify the guest about block device +# capcity change. Currently only vhost-user-blk device supports +# this. +# +# @id: the device's ID or QOM path +# +# Features: +# +# @unstable: The command is experimental. +# +# Since: 9.1 +## +{ 'command': 'device-sync-config', + 'features': [ 'unstable' ], + 'data': {'id': 'str'} } diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c index 6671137a91..127456080b 100644 --- a/system/qdev-monitor.c +++ b/system/qdev-monitor.c @@ -23,6 +23,7 @@ #include "monitor/monitor.h" #include "monitor/qdev.h" #include "sysemu/arch_init.h" +#include "sysemu/runstate.h" #include "qapi/error.h" #include "qapi/qapi-commands-qdev.h" #include "qapi/qmp/dispatch.h" @@ -977,6 +978,43 @@ void qmp_device_del(const char *id, Error **errp) } } +int qdev_sync_config(DeviceState *dev, Error **errp) +{ + DeviceClass *dc = DEVICE_GET_CLASS(dev); + + if (!dc->sync_config) { + error_setg(errp, "device-sync-config is not supported for '%s'", + object_get_typename(OBJECT(dev))); + return -ENOTSUP; + } + + return dc->sync_config(dev, errp); +} + +void qmp_device_sync_config(const char *id, Error **errp) +{ + DeviceState *dev; + + /* + * During migration there is a race between syncing`configuration + * and migrating it (if migrate first, that target would get + * outdated version), so let's just not allow it. + */ + + if (migration_is_running()) { + error_setg(errp, "Config synchronization is not allowed " + "during migration"); + return; + } + + dev = find_device_state(id, true, errp); + if (!dev) { + return; + } + + qdev_sync_config(dev, errp); +} + void hmp_device_add(Monitor *mon, const QDict *qdict) { Error *err = NULL; From feb58e3b261db503ade94c5f43ccedeee4eac41f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 4 Nov 2024 09:11:46 -0500 Subject: [PATCH 24/65] acpi/disassemle-aml.sh: fix up after dir reorg We moved expected files around, fix up the disassembler script. Fixes: 7c08eefcaf ("tests/data/acpi: Move x86 ACPI tables under x86/${machine} path") Fixes: 7434f90467 ("tests/data/acpi/virt: Move ARM64 ACPI tables under aarch64/${machine} path") Cc: "Sunil V L" Message-ID: Signed-off-by: Michael S. Tsirkin Acked-by: Igor Mammedov --- tests/data/acpi/disassemle-aml.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data/acpi/disassemle-aml.sh b/tests/data/acpi/disassemle-aml.sh index 253b7620a0..89561d233d 100755 --- a/tests/data/acpi/disassemle-aml.sh +++ b/tests/data/acpi/disassemle-aml.sh @@ -14,7 +14,7 @@ while getopts "o:" arg; do esac done -for machine in tests/data/acpi/* +for machine in tests/data/acpi/*/* do if [[ ! -d "$machine" ]]; then From d944497b5519cdefe2d38cf68317b93e14dd388a Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 24 Sep 2024 13:24:10 +0000 Subject: [PATCH 25/65] tests/acpi: pc: allow DSDT acpi table changes Signed-off-by: Ricardo Ribalda Message-Id: <20240924132417.739809-2-ribalda@chromium.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Igor Mammedov --- tests/qtest/bios-tables-test-allowed-diff.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..6fef8e558b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,17 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/x86/pc/DSDT", +"tests/data/acpi/x86/pc/DSDT.acpierst", +"tests/data/acpi/x86/pc/DSDT.acpihmat", +"tests/data/acpi/x86/pc/DSDT.bridge", +"tests/data/acpi/x86/pc/DSDT.cphp", +"tests/data/acpi/x86/pc/DSDT.dimmpxm", +"tests/data/acpi/x86/pc/DSDT.hpbridge", +"tests/data/acpi/x86/pc/DSDT.hpbrroot", +"tests/data/acpi/x86/pc/DSDT.ipmikcs", +"tests/data/acpi/x86/pc/DSDT.memhp", +"tests/data/acpi/x86/pc/DSDT.nohpet", +"tests/data/acpi/x86/pc/DSDT.numamem", +"tests/data/acpi/x86/pc/DSDT.roothp", +"tests/data/acpi/x86/q35/DSDT.cxl", +"tests/data/acpi/x86/q35/DSDT.viot", +"tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x", From 7916bb54319a56be5c5eca0c890a4d2aa22b9bef Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 24 Sep 2024 13:24:11 +0000 Subject: [PATCH 26/65] hw/i386/acpi-build: return a non-var package from _PRT() Windows XP seems to have issues when _PRT() returns a variable package. We know in advance the size, so we can return a fixed package instead. https://lore.kernel.org/qemu-devel/c82d9331-a8ce-4bb0-b51f-2ee789e27c86@ilande.co.uk/T/#m541190c942676bccf7a7f7fbcb450d94a4e2da53 Reviewed-by: Igor Mammedov Reported-by: Mark Cave-Ayland Fixes: 99cb2c6c7b ("hw/i386/acpi-build: Return a pre-computed _PRT table") Closes: https://lore.kernel.org/all/eb11c984-ebe4-4a09-9d71-1e9db7fe7e6f@ilande.co.uk/ Signed-off-by: Ricardo Ribalda Message-Id: <20240924132417.739809-3-ribalda@chromium.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index d01e704162..508a6094aa 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -740,7 +740,8 @@ static Aml *build_prt(bool is_pci0_prt) int pin; method = aml_method("_PRT", 0, AML_NOTSERIALIZED); - rt_pkg = aml_varpackage(nroutes); + assert(nroutes < 256); + rt_pkg = aml_package(nroutes); for (pin = 0; pin < nroutes; pin++) { Aml *pkg = aml_package(4); From 9848a76c0b56172a370640744e99fe78fea4d4c0 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 24 Sep 2024 13:24:12 +0000 Subject: [PATCH 27/65] tests/acpi: pc: update golden masters for DSDT Note: since all we did is replace VarPackageOp with PackageOP, and both are represented by Package() in ASL, the AML is different but ASL is the same. Signed-off-by: Ricardo Ribalda Message-Id: <20240924132417.739809-4-ribalda@chromium.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Igor Mammedov --- tests/data/acpi/x86/pc/DSDT | Bin 8527 -> 8526 bytes tests/data/acpi/x86/pc/DSDT.acpierst | Bin 8438 -> 8437 bytes tests/data/acpi/x86/pc/DSDT.acpihmat | Bin 9852 -> 9851 bytes tests/data/acpi/x86/pc/DSDT.bridge | Bin 15398 -> 15397 bytes tests/data/acpi/x86/pc/DSDT.cphp | Bin 8991 -> 8990 bytes tests/data/acpi/x86/pc/DSDT.dimmpxm | Bin 10181 -> 10180 bytes tests/data/acpi/x86/pc/DSDT.hpbridge | Bin 8478 -> 8477 bytes tests/data/acpi/x86/pc/DSDT.hpbrroot | Bin 5034 -> 5033 bytes tests/data/acpi/x86/pc/DSDT.ipmikcs | Bin 8599 -> 8598 bytes tests/data/acpi/x86/pc/DSDT.memhp | Bin 9886 -> 9885 bytes tests/data/acpi/x86/pc/DSDT.nohpet | Bin 8385 -> 8384 bytes tests/data/acpi/x86/pc/DSDT.numamem | Bin 8533 -> 8532 bytes tests/data/acpi/x86/pc/DSDT.roothp | Bin 12320 -> 12319 bytes tests/data/acpi/x86/q35/DSDT.cxl | Bin 13148 -> 13146 bytes tests/data/acpi/x86/q35/DSDT.viot | Bin 14615 -> 14612 bytes tests/qtest/bios-tables-test-allowed-diff.h | 16 ---------------- 16 files changed, 16 deletions(-) diff --git a/tests/data/acpi/x86/pc/DSDT b/tests/data/acpi/x86/pc/DSDT index 92225236e717b2e522a2ee00492fb0ded418dc7b..8b8235fe79e2fa08a6f840c8479edb75f5a047b9 100644 GIT binary patch delta 50 zcmX@_bk2#(CD@oAE|a9ky!c?JcmeN{0B27F5towqfS?eDB|_fCn**8t G$pHX!zYhWc delta 51 zcmX@-bl!=}CD)i9E|Zk!y!c?Jcmbc10B27F5!aIVfS?eDCBi<%T$=-! H{>cFVe<%+E diff --git a/tests/data/acpi/x86/pc/DSDT.acpierst b/tests/data/acpi/x86/pc/DSDT.acpierst index 25b39955059409b177870800949eaf937cd39005..06829b9c6c6d726d955dc7c99bc9f42448e22aeb 100644 GIT binary patch delta 50 zcmez7_|=iiCD delta 51 zcmezE^T&tFCDBgE$Wk diff --git a/tests/data/acpi/x86/pc/DSDT.cphp b/tests/data/acpi/x86/pc/DSDT.cphp index 1dc928333d7ae7e4df6bb51d850af5e1cb480158..045a52e75b7fcd4e5f840a758c548231498b96e4 100644 GIT binary patch delta 50 zcmbR5HqVXACD-1^5)N_z delta 51 zcmbR1G|!34CDw)2U4E%;)9*y1$;^ZoIMRhTub5ufTRsZDwX% GCIkS3!Vb~^ diff --git a/tests/data/acpi/x86/pc/DSDT.ipmikcs b/tests/data/acpi/x86/pc/DSDT.ipmikcs index c2a0330d97d495298889b9e28bde2f90235cea88..0ca664688b16baa3a06b8440181de4f17511c6b0 100644 GIT binary patch delta 50 zcmbR4Jk6QQCDO%X%3M9 delta 51 zcmX@$c+io{CDBT_!2ddGWzc@d7?20nVNVBCaLz0YM=QON4!jxi$wd H703Yqg6|HI diff --git a/tests/data/acpi/x86/pc/DSDT.numamem b/tests/data/acpi/x86/pc/DSDT.numamem index 8a6b56fe7da18bf42c339d13b863aabf81780527..2c98cafbff5db04410b35a1151eaf18723a4dad7 100644 GIT binary patch delta 50 zcmccWbj69wCD2P delta 51 zcmbQAupoiUCDBTe@uL^Q+#xj=VoBze@uL^Q+#xj*JgEDD@GpIlK6n25QZhfKE+&{ VBfLri?IQ+#xj>tuD=rQ9wh@c}_03`>N(i#Okp Z4P{4`H&;cKf1!#h@2-m~|3g=v8vqA!B?tfj delta 127 zcmbPIG`)z+CD(KT@#aP@VOcK!hM4$Zr}*e5x5?_VOL<&N;sb(07?ue86mxC9 gE*r{@tiVhaLxC6jWCLApWJRvJSQKrxH@(UX0RQ4B(*OVf diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 6fef8e558b..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,17 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/x86/pc/DSDT", -"tests/data/acpi/x86/pc/DSDT.acpierst", -"tests/data/acpi/x86/pc/DSDT.acpihmat", -"tests/data/acpi/x86/pc/DSDT.bridge", -"tests/data/acpi/x86/pc/DSDT.cphp", -"tests/data/acpi/x86/pc/DSDT.dimmpxm", -"tests/data/acpi/x86/pc/DSDT.hpbridge", -"tests/data/acpi/x86/pc/DSDT.hpbrroot", -"tests/data/acpi/x86/pc/DSDT.ipmikcs", -"tests/data/acpi/x86/pc/DSDT.memhp", -"tests/data/acpi/x86/pc/DSDT.nohpet", -"tests/data/acpi/x86/pc/DSDT.numamem", -"tests/data/acpi/x86/pc/DSDT.roothp", -"tests/data/acpi/x86/q35/DSDT.cxl", -"tests/data/acpi/x86/q35/DSDT.viot", -"tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x", From 2e6f051cfc58e69dcb392cd245d8f01b0c2e963f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 27 Sep 2024 12:29:09 -0500 Subject: [PATCH 28/65] amd_iommu: Rename variable mmio to mr_mmio Rename the MMIO memory region variable 'mmio' to 'mr_mmio' so to correctly name align with struct AMDVIState::variable type. No functional change intended. Reviewed-by: Alejandro Jimenez Signed-off-by: Suravee Suthikulpanit Signed-off-by: Santosh Shukla Message-Id: <20240927172913.121477-2-santosh.shukla@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 4 ++-- hw/i386/amd_iommu.c | 6 +++--- hw/i386/amd_iommu.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 508a6094aa..9fcc2897b8 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2324,7 +2324,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, /* Capability offset */ build_append_int_noprefix(table_data, s->pci.capab_offset, 2); /* IOMMU base address */ - build_append_int_noprefix(table_data, s->mmio.addr, 8); + build_append_int_noprefix(table_data, s->mr_mmio.addr, 8); /* PCI Segment Group */ build_append_int_noprefix(table_data, 0, 2); /* IOMMU info */ @@ -2359,7 +2359,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, /* Capability offset */ build_append_int_noprefix(table_data, s->pci.capab_offset, 2); /* IOMMU base address */ - build_append_int_noprefix(table_data, s->mmio.addr, 8); + build_append_int_noprefix(table_data, s->mr_mmio.addr, 8); /* PCI Segment Group */ build_append_int_noprefix(table_data, 0, 2); /* IOMMU info */ diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 464f0b666e..abb64ea507 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1598,10 +1598,10 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); /* set up MMIO */ - memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", - AMDVI_MMIO_SIZE); + memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s, + "amdvi-mmio", AMDVI_MMIO_SIZE); memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR, - &s->mmio); + &s->mr_mmio); pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 73619fe9ea..e5c2ae94f2 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -353,7 +353,7 @@ struct AMDVIState { uint32_t pprlog_head; /* ppr log head */ uint32_t pprlog_tail; /* ppr log tail */ - MemoryRegion mmio; /* MMIO region */ + MemoryRegion mr_mmio; /* MMIO region */ uint8_t mmior[AMDVI_MMIO_SIZE]; /* read/write MMIO */ uint8_t w1cmask[AMDVI_MMIO_SIZE]; /* read/write 1 clear mask */ uint8_t romask[AMDVI_MMIO_SIZE]; /* MMIO read/only mask */ From c1f46999ef506d9854534560a94d02cf3cf9edd1 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 27 Sep 2024 12:29:10 -0500 Subject: [PATCH 29/65] amd_iommu: Add support for pass though mode Introduce 'nodma' shared memory region to support PT mode so that for each device, we only create an alias to shared memory region when DMA-remapping is disabled. Reviewed-by: Alejandro Jimenez Signed-off-by: Suravee Suthikulpanit Signed-off-by: Santosh Shukla Message-Id: <20240927172913.121477-3-santosh.shukla@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 49 ++++++++++++++++++++++++++++++++++++--------- hw/i386/amd_iommu.h | 2 ++ 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index abb64ea507..7c7760c573 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -60,8 +60,9 @@ struct AMDVIAddressSpace { uint8_t bus_num; /* bus number */ uint8_t devfn; /* device function */ AMDVIState *iommu_state; /* AMDVI - one per machine */ - MemoryRegion root; /* AMDVI Root memory map region */ + MemoryRegion root; /* AMDVI Root memory map region */ IOMMUMemoryRegion iommu; /* Device's address translation region */ + MemoryRegion iommu_nodma; /* Alias of shared nodma memory region */ MemoryRegion iommu_ir; /* Device's interrupt remapping region */ AddressSpace as; /* device's corresponding address space */ }; @@ -1412,6 +1413,7 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVIState *s = opaque; AMDVIAddressSpace **iommu_as, *amdvi_dev_as; int bus_num = pci_bus_num(bus); + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); iommu_as = s->address_spaces[bus_num]; @@ -1436,13 +1438,13 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) * Memory region relationships looks like (Address range shows * only lower 32 bits to make it short in length...): * - * |-----------------+-------------------+----------| - * | Name | Address range | Priority | - * |-----------------+-------------------+----------+ - * | amdvi_root | 00000000-ffffffff | 0 | - * | amdvi_iommu | 00000000-ffffffff | 1 | - * | amdvi_iommu_ir | fee00000-feefffff | 64 | - * |-----------------+-------------------+----------| + * |--------------------+-------------------+----------| + * | Name | Address range | Priority | + * |--------------------+-------------------+----------+ + * | amdvi-root | 00000000-ffffffff | 0 | + * | amdvi-iommu_nodma | 00000000-ffffffff | 0 | + * | amdvi-iommu_ir | fee00000-feefffff | 64 | + * |--------------------+-------------------+----------| */ memory_region_init_iommu(&amdvi_dev_as->iommu, sizeof(amdvi_dev_as->iommu), @@ -1461,7 +1463,25 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) 64); memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, MEMORY_REGION(&amdvi_dev_as->iommu), - 1); + 0); + + /* Build the DMA Disabled alias to shared memory */ + memory_region_init_alias(&amdvi_dev_as->iommu_nodma, OBJECT(s), + "amdvi-sys", &s->mr_sys, 0, + memory_region_size(&s->mr_sys)); + memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, + &amdvi_dev_as->iommu_nodma, + 0); + + if (!x86_iommu->pt_supported) { + memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); + memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), + true); + } else { + memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), + false); + memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true); + } } return &iommu_as[devfn]->as; } @@ -1602,6 +1622,17 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) "amdvi-mmio", AMDVI_MMIO_SIZE); memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR, &s->mr_mmio); + + /* Create the share memory regions by all devices */ + memory_region_init(&s->mr_sys, OBJECT(s), "amdvi-sys", UINT64_MAX); + + /* set up the DMA disabled memory region */ + memory_region_init_alias(&s->mr_nodma, OBJECT(s), + "amdvi-nodma", get_system_memory(), 0, + memory_region_size(get_system_memory())); + memory_region_add_subregion_overlap(&s->mr_sys, 0, + &s->mr_nodma, 0); + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index e5c2ae94f2..be417e51c4 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -354,6 +354,8 @@ struct AMDVIState { uint32_t pprlog_tail; /* ppr log tail */ MemoryRegion mr_mmio; /* MMIO region */ + MemoryRegion mr_sys; + MemoryRegion mr_nodma; uint8_t mmior[AMDVI_MMIO_SIZE]; /* read/write MMIO */ uint8_t w1cmask[AMDVI_MMIO_SIZE]; /* read/write 1 clear mask */ uint8_t romask[AMDVI_MMIO_SIZE]; /* MMIO read/only mask */ From 9fc9dbac61ddde7d8df37e84c8e02cec249d3222 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 27 Sep 2024 12:29:11 -0500 Subject: [PATCH 30/65] amd_iommu: Use shared memory region for Interrupt Remapping Use shared memory region for interrupt remapping which can be aliased by all devices. Reviewed-by: Alejandro Jimenez Signed-off-by: Suravee Suthikulpanit Signed-off-by: Santosh Shukla Message-Id: <20240927172913.121477-4-santosh.shukla@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 22 ++++++++++++++-------- hw/i386/amd_iommu.h | 1 + 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 7c7760c573..9b923f6eb4 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1443,7 +1443,7 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) * |--------------------+-------------------+----------+ * | amdvi-root | 00000000-ffffffff | 0 | * | amdvi-iommu_nodma | 00000000-ffffffff | 0 | - * | amdvi-iommu_ir | fee00000-feefffff | 64 | + * | amdvi-iommu_ir | fee00000-feefffff | 1 | * |--------------------+-------------------+----------| */ memory_region_init_iommu(&amdvi_dev_as->iommu, @@ -1454,13 +1454,6 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) memory_region_init(&amdvi_dev_as->root, OBJECT(s), "amdvi_root", UINT64_MAX); address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name); - memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s), - &amdvi_ir_ops, s, "amd_iommu_ir", - AMDVI_INT_ADDR_SIZE); - memory_region_add_subregion_overlap(&amdvi_dev_as->root, - AMDVI_INT_ADDR_FIRST, - &amdvi_dev_as->iommu_ir, - 64); memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, MEMORY_REGION(&amdvi_dev_as->iommu), 0); @@ -1472,6 +1465,13 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, &amdvi_dev_as->iommu_nodma, 0); + /* Build the Interrupt Remapping alias to shared memory */ + memory_region_init_alias(&amdvi_dev_as->iommu_ir, OBJECT(s), + "amdvi-ir", &s->mr_ir, 0, + memory_region_size(&s->mr_ir)); + memory_region_add_subregion_overlap(MEMORY_REGION(&amdvi_dev_as->iommu), + AMDVI_INT_ADDR_FIRST, + &amdvi_dev_as->iommu_ir, 1); if (!x86_iommu->pt_supported) { memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); @@ -1633,6 +1633,12 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) memory_region_add_subregion_overlap(&s->mr_sys, 0, &s->mr_nodma, 0); + /* set up the Interrupt Remapping memory region */ + memory_region_init_io(&s->mr_ir, OBJECT(s), &amdvi_ir_ops, + s, "amdvi-ir", AMDVI_INT_ADDR_SIZE); + memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST, + &s->mr_ir, 1); + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index be417e51c4..e0dac4d9a9 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -356,6 +356,7 @@ struct AMDVIState { MemoryRegion mr_mmio; /* MMIO region */ MemoryRegion mr_sys; MemoryRegion mr_nodma; + MemoryRegion mr_ir; uint8_t mmior[AMDVI_MMIO_SIZE]; /* read/write MMIO */ uint8_t w1cmask[AMDVI_MMIO_SIZE]; /* read/write 1 clear mask */ uint8_t romask[AMDVI_MMIO_SIZE]; /* MMIO read/only mask */ From f84aad4d718b83d2a4d90485992e5421430032e1 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 27 Sep 2024 12:29:12 -0500 Subject: [PATCH 31/65] amd_iommu: Send notification when invalidate interrupt entry cache In order to support AMD IOMMU interrupt remapping emulation with PCI pass-through devices, QEMU needs to notify VFIO when guest IOMMU driver updates and invalidate the guest interrupt remapping table (IRT), and communicate information so that the host IOMMU driver can update the shadowed interrupt remapping table in the host IOMMU. Therefore, send notification when guest IOMMU emulates the IRT invalidation commands. Reviewed-by: Alejandro Jimenez Signed-off-by: Suravee Suthikulpanit Signed-off-by: Santosh Shukla Message-Id: <20240927172913.121477-5-santosh.shukla@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 9b923f6eb4..38297376e7 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -431,6 +431,12 @@ static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd) trace_amdvi_ppr_exec(); } +static void amdvi_intremap_inval_notify_all(AMDVIState *s, bool global, + uint32_t index, uint32_t mask) +{ + x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask); +} + static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd) { if (extract64(cmd[0], 0, 60) || cmd[1]) { @@ -438,6 +444,9 @@ static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd) s->cmdbuf + s->cmdbuf_head); } + /* Notify global invalidation */ + amdvi_intremap_inval_notify_all(s, true, 0, 0); + amdvi_iotlb_reset(s); trace_amdvi_all_inval(); } @@ -486,6 +495,9 @@ static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) return; } + /* Notify global invalidation */ + amdvi_intremap_inval_notify_all(s, true, 0, 0); + trace_amdvi_intr_inval(); } From b12cb3819baf6d9ee8140d4dd6d36fa829e2c6d9 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 27 Sep 2024 12:29:13 -0500 Subject: [PATCH 32/65] amd_iommu: Check APIC ID > 255 for XTSup The XTSup mode enables x2APIC support for AMD IOMMU, which is needed to support vcpu w/ APIC ID > 255. Reviewed-by: Alejandro Jimenez Signed-off-by: Suravee Suthikulpanit Signed-off-by: Santosh Shukla Message-Id: <20240927172913.121477-6-santosh.shukla@amd.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 38297376e7..13af7211e1 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -32,6 +32,7 @@ #include "trace.h" #include "hw/i386/apic-msidef.h" #include "hw/qdev-properties.h" +#include "kvm/kvm_i386.h" /* used AMD-Vi MMIO registers */ const char *amdvi_mmio_low[] = { @@ -1651,6 +1652,16 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST, &s->mr_ir, 1); + /* AMD IOMMU with x2APIC mode requires xtsup=on */ + if (x86ms->apic_id_limit > 255 && !s->xtsup) { + error_report("AMD IOMMU with x2APIC confguration requires xtsup=on"); + exit(EXIT_FAILURE); + } + if (s->xtsup && kvm_irqchip_is_split() && !kvm_enable_x2apic()) { + error_report("AMD IOMMU xtsup=on requires support on the KVM side"); + exit(EXIT_FAILURE); + } + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } From 55fa4be6f76a3e1b1caa33a8f0ab4dc217d32e49 Mon Sep 17 00:00:00 2001 From: Gao Shiyuan Date: Wed, 30 Oct 2024 21:13:24 +0800 Subject: [PATCH 33/65] virtio-pci: fix memory_region_find for VirtIOPCIRegion's MR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As shown below, if a virtio PCI device is attached under a pci-bridge, the MR of VirtIOPCIRegion does not belong to any address space. So memory_region_find cannot be used to search for this MR. Introduce the virtio-pci and pci_bridge address spaces to solve this problem. Before: memory-region: pci_bridge_pci 0000000000000000-ffffffffffffffff (prio 0, i/o): pci_bridge_pci 00000000fe840000-00000000fe840fff (prio 1, i/o): virtio-net-pci-msix 00000000fe840000-00000000fe84003f (prio 0, i/o): msix-table 00000000fe840800-00000000fe840807 (prio 0, i/o): msix-pba 0000380000000000-0000380000003fff (prio 1, i/o): virtio-pci 0000380000000000-0000380000000fff (prio 0, i/o): virtio-pci-common-virtio-net 0000380000001000-0000380000001fff (prio 0, i/o): virtio-pci-isr-virtio-net 0000380000002000-0000380000002fff (prio 0, i/o): virtio-pci-device-virtio-net 0000380000003000-0000380000003fff (prio 0, i/o): virtio-pci-notify-virtio-net After: address-space: virtio-pci-cfg-mem-as 0000380000000000-0000380000003fff (prio 1, i/o): virtio-pci 0000380000000000-0000380000000fff (prio 0, i/o): virtio-pci-common-virtio-net 0000380000001000-0000380000001fff (prio 0, i/o): virtio-pci-isr-virtio-net 0000380000002000-0000380000002fff (prio 0, i/o): virtio-pci-device-virtio-net 0000380000003000-0000380000003fff (prio 0, i/o): virtio-pci-notify-virtio-net address-space: pci_bridge_pci_mem 0000000000000000-ffffffffffffffff (prio 0, i/o): pci_bridge_pci 00000000fe840000-00000000fe840fff (prio 1, i/o): virtio-net-pci-msix 00000000fe840000-00000000fe84003f (prio 0, i/o): msix-table 00000000fe840800-00000000fe840807 (prio 0, i/o): msix-pba 0000380000000000-0000380000003fff (prio 1, i/o): virtio-pci 0000380000000000-0000380000000fff (prio 0, i/o): virtio-pci-common-virtio-net 0000380000001000-0000380000001fff (prio 0, i/o): virtio-pci-isr-virtio-net 0000380000002000-0000380000002fff (prio 0, i/o): virtio-pci-device-virtio-net 0000380000003000-0000380000003fff (prio 0, i/o): virtio-pci-notify-virtio-net Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2576 Fixes: ffa8a3e3b2e6 ("virtio-pci: Add lookup subregion of VirtIOPCIRegion MR") Co-developed-by: Zuo Boqun Signed-off-by: Zuo Boqun Co-developed-by: Wang Liang Signed-off-by: Wang Liang Signed-off-by: Gao Shiyuan Message-Id: <20241030131324.34144-1-gaoshiyuan@baidu.com> Tested-by: Daniel P. Berrangé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci_bridge.c | 5 ++++ hw/virtio/virtio-pci.c | 10 +++++++ include/hw/pci/pci_bridge.h | 2 ++ include/hw/virtio/virtio-pci.h | 3 +++ tests/qtest/fuzz-virtio-balloon-test.c | 37 ++++++++++++++++++++++++++ tests/qtest/meson.build | 1 + 6 files changed, 58 insertions(+) create mode 100644 tests/qtest/fuzz-virtio-balloon-test.c diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index 6a4e38856d..2c7bb1a525 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -380,9 +380,12 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) sec_bus->map_irq = br->map_irq ? br->map_irq : pci_swizzle_map_irq_fn; sec_bus->address_space_mem = &br->address_space_mem; memory_region_init(&br->address_space_mem, OBJECT(br), "pci_bridge_pci", UINT64_MAX); + address_space_init(&br->as_mem, &br->address_space_mem, + "pci_bridge_pci_mem"); sec_bus->address_space_io = &br->address_space_io; memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 4 * GiB); + address_space_init(&br->as_io, &br->address_space_io, "pci_bridge_pci_io"); pci_bridge_region_init(br); QLIST_INIT(&sec_bus->child); QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); @@ -399,6 +402,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev) PCIBridge *s = PCI_BRIDGE(pci_dev); assert(QLIST_EMPTY(&s->sec_bus.child)); QLIST_REMOVE(&s->sec_bus, sibling); + address_space_destroy(&s->as_mem); + address_space_destroy(&s->as_io); pci_bridge_region_del(s, &s->windows); pci_bridge_region_cleanup(s, &s->windows); /* object_unparent() is called automatically during device deletion */ diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index c5a809b956..5a394821da 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2057,6 +2057,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) if (modern_pio) { memory_region_init(&proxy->io_bar, OBJECT(proxy), "virtio-pci-io", 0x4); + address_space_init(&proxy->modern_cfg_io_as, &proxy->io_bar, + "virtio-pci-cfg-io-as"); pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar); @@ -2180,6 +2182,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) /* PCI BAR regions must be powers of 2 */ pow2ceil(proxy->notify.offset + proxy->notify.size)); + address_space_init(&proxy->modern_cfg_mem_as, &proxy->modern_bar, + "virtio-pci-cfg-mem-as"); + if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) { proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } @@ -2269,12 +2274,17 @@ static void virtio_pci_exit(PCIDevice *pci_dev) VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) && !pci_bus_is_root(pci_get_bus(pci_dev)); + bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; msix_uninit_exclusive_bar(pci_dev); if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port && pci_is_express(pci_dev)) { pcie_aer_exit(pci_dev); } + address_space_destroy(&proxy->modern_cfg_mem_as); + if (modern_pio) { + address_space_destroy(&proxy->modern_cfg_io_as); + } } static void virtio_pci_reset(DeviceState *qdev) diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 5456e24883..b0f5204d80 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -72,6 +72,8 @@ struct PCIBridge { */ MemoryRegion address_space_mem; MemoryRegion address_space_io; + AddressSpace as_mem; + AddressSpace as_io; PCIBridgeWindows windows; diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 9e67ba38c7..971c5fabd4 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -147,6 +147,9 @@ struct VirtIOPCIProxy { }; MemoryRegion modern_bar; MemoryRegion io_bar; + /* address space for VirtIOPCIRegions */ + AddressSpace modern_cfg_mem_as; + AddressSpace modern_cfg_io_as; uint32_t legacy_io_bar_idx; uint32_t msix_bar_idx; uint32_t modern_io_bar_idx; diff --git a/tests/qtest/fuzz-virtio-balloon-test.c b/tests/qtest/fuzz-virtio-balloon-test.c new file mode 100644 index 0000000000..ecb597fbee --- /dev/null +++ b/tests/qtest/fuzz-virtio-balloon-test.c @@ -0,0 +1,37 @@ +/* + * QTest fuzzer-generated testcase for virtio balloon device + * + * Copyright (c) 2024 Gao Shiyuan + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "libqtest.h" + +/* + * https://gitlab.com/qemu-project/qemu/-/issues/2576 + * Used to trigger: + * virtio_address_space_lookup: Assertion `mrs.mr' failed. + */ +static void oss_fuzz_71649(void) +{ + QTestState *s = qtest_init("-device virtio-balloon -machine q35" + " -nodefaults"); + + qtest_outl(s, 0xcf8, 0x80000890); + qtest_outl(s, 0xcfc, 0x2); + qtest_outl(s, 0xcf8, 0x80000891); + qtest_inl(s, 0xcfc); + qtest_quit(s); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("fuzz/virtio/oss_fuzz_71649", oss_fuzz_71649); + + return g_test_run(); +} + diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 9d51114539..924dc4be57 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -88,6 +88,7 @@ qtests_i386 = \ (config_all_devices.has_key('CONFIG_MEGASAS_SCSI_PCI') ? ['fuzz-megasas-test'] : []) + \ (config_all_devices.has_key('CONFIG_LSI_SCSI_PCI') ? ['fuzz-lsi53c895a-test'] : []) + \ (config_all_devices.has_key('CONFIG_VIRTIO_SCSI') ? ['fuzz-virtio-scsi-test'] : []) + \ + (config_all_devices.has_key('CONFIG_VIRTIO_BALLOON') ? ['fuzz-virtio-balloon-test'] : []) + \ (config_all_devices.has_key('CONFIG_Q35') ? ['q35-test'] : []) + \ (config_all_devices.has_key('CONFIG_SB16') ? ['fuzz-sb16-test'] : []) + \ (config_all_devices.has_key('CONFIG_SDHCI_PCI') ? ['fuzz-sdcard-test'] : []) + \ From 963b02764537c66af88b82bd297c375b147e0756 Mon Sep 17 00:00:00 2001 From: yaozhenguo Date: Fri, 11 Oct 2024 18:29:13 +0800 Subject: [PATCH 34/65] virtio/vhost-user: fix qemu abort when hotunplug vhost-user-net device During the hot-unplugging of vhost-user-net type network cards, the vhost_user_cleanup function may add the same rcu node to the rcu linked list. The function call in this case is as follows: vhost_user_cleanup ->vhost_user_host_notifier_remove ->call_rcu(n, vhost_user_host_notifier_free, rcu); ->g_free_rcu(n, rcu); When this happens, QEMU will abort in try_dequeue: if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) { abort(); } backtrace is as follows: 0 __pthread_kill_implementation () at /usr/lib64/libc.so.6 1 raise () at /usr/lib64/libc.so.6 2 abort () at /usr/lib64/libc.so.6 3 try_dequeue () at ../util/rcu.c:235 4 call_rcu_thread (0) at ../util/rcu.c:288 5 qemu_thread_start (0) at ../util/qemu-thread-posix.c:541 6 start_thread () at /usr/lib64/libc.so.6 7 clone3 () at /usr/lib64/libc.so.6 The reason for the abort is that adding two identical nodes to the rcu linked list will cause the rcu linked list to become a ring, but when the dummy node is added after the two identical nodes, the ring is opened. But only one node is added to list with rcu_call_count added twice. This will cause rcu try_dequeue abort. This happens when n->addr != 0. In some scenarios, this does happen. For example, this situation will occur when using a 32-queue DPU vhost-user-net type network card for hot-unplug testing, because VhostUserHostNotifier->addr will be cleared during the processing of VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG. However,it is asynchronous, so we cannot guarantee that VhostUserHostNotifier->addr is zero in vhost_user_cleanup. Therefore, it is necessary to merge g_free_rcu and vhost_user_host_notifier_free into one rcu node. Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers") Signed-off-by: yaozhenguo Message-Id: <20241011102913.45582-1-yaozhenguo@jd.com> Reviewed-by: Stefano Garzarella Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 46 +++++++++++++++++++--------------- include/hw/virtio/vhost-user.h | 1 + 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 00561daa06..d1b0893b4d 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1185,9 +1185,16 @@ static int vhost_user_set_vring_num(struct vhost_dev *dev, static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) { - assert(n && n->unmap_addr); - munmap(n->unmap_addr, qemu_real_host_page_size()); - n->unmap_addr = NULL; + if (n->unmap_addr) { + munmap(n->unmap_addr, qemu_real_host_page_size()); + n->unmap_addr = NULL; + } + if (n->destroy) { + memory_region_transaction_begin(); + object_unparent(OBJECT(&n->mr)); + memory_region_transaction_commit(); + g_free(n); + } } /* @@ -1195,17 +1202,28 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) * under rcu. */ static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, - VirtIODevice *vdev) + VirtIODevice *vdev, bool destroy) { + /* + * if destroy == false and n->addr == NULL, we have nothing to do. + * so, just return. + */ + if (!n || (!destroy && !n->addr)) { + return; + } + if (n->addr) { if (vdev) { + memory_region_transaction_begin(); virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); + memory_region_transaction_commit(); } assert(!n->unmap_addr); n->unmap_addr = n->addr; n->addr = NULL; - call_rcu(n, vhost_user_host_notifier_free, rcu); } + n->destroy = destroy; + call_rcu(n, vhost_user_host_notifier_free, rcu); } static int vhost_user_set_vring_base(struct vhost_dev *dev, @@ -1279,9 +1297,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_user *u = dev->opaque; VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); - if (n) { - vhost_user_host_notifier_remove(n, dev->vdev); - } + vhost_user_host_notifier_remove(n, dev->vdev, false); ret = vhost_user_write(dev, &msg, NULL, 0); if (ret < 0) { @@ -1562,7 +1578,7 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, * new mapped address. */ n = fetch_or_create_notifier(user, queue_idx); - vhost_user_host_notifier_remove(n, vdev); + vhost_user_host_notifier_remove(n, vdev, false); if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { return 0; @@ -2736,15 +2752,7 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev, static void vhost_user_state_destroy(gpointer data) { VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; - if (n) { - vhost_user_host_notifier_remove(n, NULL); - object_unparent(OBJECT(&n->mr)); - /* - * We can't free until vhost_user_host_notifier_remove has - * done it's thing so schedule the free with RCU. - */ - g_free_rcu(n, rcu); - } + vhost_user_host_notifier_remove(n, NULL, true); } bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) @@ -2765,9 +2773,7 @@ void vhost_user_cleanup(VhostUserState *user) if (!user->chr) { return; } - memory_region_transaction_begin(); user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); - memory_region_transaction_commit(); user->chr = NULL; } diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 324cd8663a..9a3f238b43 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier { void *addr; void *unmap_addr; int idx; + bool destroy; } VhostUserHostNotifier; /** From df66b85f357f9669457906ece865d6183cf12580 Mon Sep 17 00:00:00 2001 From: Dmitry Frolov Date: Mon, 14 Oct 2024 13:18:56 +0100 Subject: [PATCH 35/65] hw/cxl: Fix uint32 overflow cxl-mailbox-utils.c The sum offset + length may overflow uint32. Since this sum is compared with uint64_t return value of get_lsa_size(), it makes sense to choose uint64_t type for offset and length. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 3ebe676a3463 ("hw/cxl/device: Implement get/set Label Storage Area (LSA)") Signed-off-by: Dmitry Frolov Link: https://lore.kernel.org/r/20240917080925.270597-2-frolov@swemel.ru Signed-off-by: Jonathan Cameron Message-Id: <20241014121902.2146424-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 9258e48f95..9f794e4655 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1445,7 +1445,7 @@ static CXLRetCode cmd_ccls_get_lsa(const struct cxl_cmd *cmd, } QEMU_PACKED *get_lsa; CXLType3Dev *ct3d = CXL_TYPE3(cci->d); CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); - uint32_t offset, length; + uint64_t offset, length; get_lsa = (void *)payload_in; offset = get_lsa->offset; From 8352756ffa72668773d6904bc76dfc0bf2619e5a Mon Sep 17 00:00:00 2001 From: Ajay Joshi Date: Mon, 14 Oct 2024 13:18:57 +0100 Subject: [PATCH 36/65] hw/cxl: Fix background completion percentage calculation The current completion percentage calculation does not account for the relative time since the start of the background activity, this leads to showing incorrect start percentage vs what has actually been completed. This patch calculates the percentage based on the actual elapsed time since the start of the operation. Fixes: 221d2cfbdb53 ("hw/cxl/mbox: Add support for background operations") Signed-off-by: Ajay Joshi Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/20240729102338.22337-1-ajay.opensrc@micron.com Signed-off-by: Jonathan Cameron Message-Id: <20241014121902.2146424-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 9f794e4655..3a93966e77 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -2879,7 +2879,8 @@ static void bg_timercb(void *opaque) } } else { /* estimate only */ - cci->bg.complete_pct = 100 * now / total_time; + cci->bg.complete_pct = + 100 * (now - cci->bg.starttime) / cci->bg.runtime; timer_mod(cci->bg.timer, now + CXL_MBOX_BG_UPDATE_FREQ); } From 5eabca7ec0a92032c7dd5188f1708344c225a385 Mon Sep 17 00:00:00 2001 From: Yao Xingtao Date: Mon, 14 Oct 2024 13:18:58 +0100 Subject: [PATCH 37/65] mem/cxl_type3: Fix overlapping region validation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When injecting a new poisoned region through qmp_cxl_inject_poison(), the newly injected region should not overlap with existing poisoned regions. The current validation method does not consider the following overlapping region: ┌───┬───────┬───┐ │a │ b(a) │a │ └───┴───────┴───┘ (a is a newly added region, b is an existing region, and b is a subregion of a) Fixes: 9547754f40ee ("hw/cxl: QMP based poison injection support") Signed-off-by: Yao Xingtao Signed-off-by: Jonathan Cameron Message-Id: <20241014121902.2146424-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 44d491d8f6..16c60b9b0d 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1381,9 +1381,7 @@ void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, ct3d = CXL_TYPE3(obj); QLIST_FOREACH(p, &ct3d->poison_list, node) { - if (((start >= p->start) && (start < p->start + p->length)) || - ((start + length > p->start) && - (start + length <= p->start + p->length))) { + if ((start < p->start + p->length) && (start + length > p->start)) { error_setg(errp, "Overlap with existing poisoned region not supported"); return; From 80ee960f8d646505385bce8ed143a9bb8ea36d1d Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Mon, 14 Oct 2024 13:18:59 +0100 Subject: [PATCH 38/65] hw/mem/cxl_type3: Fix More flag setting for dynamic capacity event records Per cxl spec r3.1, for multiple dynamic capacity event records grouped via the More flag, the last record in the sequence should clear the More flag. Before the change, the More flag of the event record is cleared before the loop of inserting records into the event log, which will leave the flag always set once it is set in the loop. Fixes: d0b9b28a5b9f ("hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents") Signed-off-by: Fan Ni Link: https://lore.kernel.org/r/20240827164304.88876-2-nifan.cxl@gmail.com Signed-off-by: Jonathan Cameron Message-Id: <20241014121902.2146424-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 16c60b9b0d..6911d13fe6 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -2064,11 +2064,11 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, stw_le_p(&dCap.host_id, hid); /* only valid for DC_REGION_CONFIG_UPDATED event */ dCap.updated_region_id = 0; - dCap.flags = 0; for (i = 0; i < num_extents; i++) { memcpy(&dCap.dynamic_capacity_extent, &extents[i], sizeof(CXLDCExtentRaw)); + dCap.flags = 0; if (i < num_extents - 1) { /* Set "More" flag */ dCap.flags |= BIT(0); From d1853190db5c59ad5b0537a2ac59c8d4494cbd98 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Mon, 14 Oct 2024 13:19:00 +0100 Subject: [PATCH 39/65] hw/cxl/cxl-mailbox-utils: Fix for device DDR5 ECS control feature tables CXL spec 3.1 section 8.2.9.9.11.2 describes the DDR5 Error Check Scrub (ECS) control feature. ECS log capabilities field in following ECS tables, which is common for all memory media FRUs in a CXL device. Fix struct CXLMemECSReadAttrs and struct CXLMemECSWriteAttrs to make log entry type field common. Fixes: 2d41ce38fb9a ("hw/cxl/cxl-mailbox-utils: Add device DDR5 ECS control feature") Signed-off-by: Shiju Jose Signed-off-by: Jonathan Cameron Message-Id: <20241014121902.2146424-6-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 24 +++++++++--------------- hw/mem/cxl_type3.c | 9 ++++----- include/hw/cxl/cxl_device.h | 36 ++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 3a93966e77..67041f45d3 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1133,10 +1133,8 @@ static CXLRetCode cmd_features_get_supported(const struct cxl_cmd *cmd, (struct CXLSupportedFeatureEntry) { .uuid = ecs_uuid, .feat_index = index, - .get_feat_size = CXL_ECS_NUM_MEDIA_FRUS * - sizeof(CXLMemECSReadAttrs), - .set_feat_size = CXL_ECS_NUM_MEDIA_FRUS * - sizeof(CXLMemECSWriteAttrs), + .get_feat_size = sizeof(CXLMemECSReadAttrs), + .set_feat_size = sizeof(CXLMemECSWriteAttrs), .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE, .get_feat_version = CXL_ECS_GET_FEATURE_VERSION, .set_feat_version = CXL_ECS_SET_FEATURE_VERSION, @@ -1204,13 +1202,10 @@ static CXLRetCode cmd_features_get_feature(const struct cxl_cmd *cmd, (uint8_t *)&ct3d->patrol_scrub_attrs + get_feature->offset, bytes_to_copy); } else if (qemu_uuid_is_equal(&get_feature->uuid, &ecs_uuid)) { - if (get_feature->offset >= CXL_ECS_NUM_MEDIA_FRUS * - sizeof(CXLMemECSReadAttrs)) { + if (get_feature->offset >= sizeof(CXLMemECSReadAttrs)) { return CXL_MBOX_INVALID_INPUT; } - bytes_to_copy = CXL_ECS_NUM_MEDIA_FRUS * - sizeof(CXLMemECSReadAttrs) - - get_feature->offset; + bytes_to_copy = sizeof(CXLMemECSReadAttrs) - get_feature->offset; bytes_to_copy = MIN(bytes_to_copy, get_feature->count); memcpy(payload_out, (uint8_t *)&ct3d->ecs_attrs + get_feature->offset, @@ -1299,18 +1294,17 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd, ecs_set_feature = (void *)payload_in; ecs_write_attrs = ecs_set_feature->feat_data; - memcpy((uint8_t *)ct3d->ecs_wr_attrs + hdr->offset, + memcpy((uint8_t *)&ct3d->ecs_wr_attrs + hdr->offset, ecs_write_attrs, bytes_to_copy); set_feat_info->data_size += bytes_to_copy; if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER || data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) { + ct3d->ecs_attrs.ecs_log_cap = ct3d->ecs_wr_attrs.ecs_log_cap; for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) { - ct3d->ecs_attrs[count].ecs_log_cap = - ct3d->ecs_wr_attrs[count].ecs_log_cap; - ct3d->ecs_attrs[count].ecs_config = - ct3d->ecs_wr_attrs[count].ecs_config & 0x1F; + ct3d->ecs_attrs.fru_attrs[count].ecs_config = + ct3d->ecs_wr_attrs.fru_attrs[count].ecs_config & 0x1F; } } } else { @@ -1324,7 +1318,7 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd, if (qemu_uuid_is_equal(&hdr->uuid, &patrol_scrub_uuid)) { memset(&ct3d->patrol_scrub_wr_attrs, 0, set_feat_info->data_size); } else if (qemu_uuid_is_equal(&hdr->uuid, &ecs_uuid)) { - memset(ct3d->ecs_wr_attrs, 0, set_feat_info->data_size); + memset(&ct3d->ecs_wr_attrs, 0, set_feat_info->data_size); } set_feat_info->data_transfer_flag = 0; set_feat_info->data_saved_across_reset = false; diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 6911d13fe6..5cf754b38f 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -920,16 +920,15 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) ct3d->patrol_scrub_attrs.scrub_flags = CXL_MEMDEV_PS_ENABLE_DEFAULT; /* Set default value for DDR5 ECS read attributes */ + ct3d->ecs_attrs.ecs_log_cap = CXL_ECS_LOG_ENTRY_TYPE_DEFAULT; for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) { - ct3d->ecs_attrs[count].ecs_log_cap = - CXL_ECS_LOG_ENTRY_TYPE_DEFAULT; - ct3d->ecs_attrs[count].ecs_cap = + ct3d->ecs_attrs.fru_attrs[count].ecs_cap = CXL_ECS_REALTIME_REPORT_CAP_DEFAULT; - ct3d->ecs_attrs[count].ecs_config = + ct3d->ecs_attrs.fru_attrs[count].ecs_config = CXL_ECS_THRESHOLD_COUNT_DEFAULT | (CXL_ECS_MODE_DEFAULT << 3); /* Reserved */ - ct3d->ecs_attrs[count].ecs_flags = 0; + ct3d->ecs_attrs.fru_attrs[count].ecs_flags = 0; } return; diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index e14e56ae4b..561b375dc8 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -463,18 +463,6 @@ typedef struct CXLMemPatrolScrubWriteAttrs { #define CXL_MEMDEV_PS_ENABLE_DEFAULT 0 /* CXL memory device DDR5 ECS control attributes */ -typedef struct CXLMemECSReadAttrs { - uint8_t ecs_log_cap; - uint8_t ecs_cap; - uint16_t ecs_config; - uint8_t ecs_flags; -} QEMU_PACKED CXLMemECSReadAttrs; - -typedef struct CXLMemECSWriteAttrs { - uint8_t ecs_log_cap; - uint16_t ecs_config; -} QEMU_PACKED CXLMemECSWriteAttrs; - #define CXL_ECS_GET_FEATURE_VERSION 0x01 #define CXL_ECS_SET_FEATURE_VERSION 0x01 #define CXL_ECS_LOG_ENTRY_TYPE_DEFAULT 0x01 @@ -483,6 +471,26 @@ typedef struct CXLMemECSWriteAttrs { #define CXL_ECS_MODE_DEFAULT 0 #define CXL_ECS_NUM_MEDIA_FRUS 3 /* Default */ +typedef struct CXLMemECSFRUReadAttrs { + uint8_t ecs_cap; + uint16_t ecs_config; + uint8_t ecs_flags; +} QEMU_PACKED CXLMemECSFRUReadAttrs; + +typedef struct CXLMemECSReadAttrs { + uint8_t ecs_log_cap; + CXLMemECSFRUReadAttrs fru_attrs[CXL_ECS_NUM_MEDIA_FRUS]; +} QEMU_PACKED CXLMemECSReadAttrs; + +typedef struct CXLMemECSFRUWriteAttrs { + uint16_t ecs_config; +} QEMU_PACKED CXLMemECSFRUWriteAttrs; + +typedef struct CXLMemECSWriteAttrs { + uint8_t ecs_log_cap; + CXLMemECSFRUWriteAttrs fru_attrs[CXL_ECS_NUM_MEDIA_FRUS]; +} QEMU_PACKED CXLMemECSWriteAttrs; + #define DCD_MAX_NUM_REGION 8 typedef struct CXLDCExtentRaw { @@ -575,8 +583,8 @@ struct CXLType3Dev { CXLMemPatrolScrubReadAttrs patrol_scrub_attrs; CXLMemPatrolScrubWriteAttrs patrol_scrub_wr_attrs; /* ECS control attributes */ - CXLMemECSReadAttrs ecs_attrs[CXL_ECS_NUM_MEDIA_FRUS]; - CXLMemECSWriteAttrs ecs_wr_attrs[CXL_ECS_NUM_MEDIA_FRUS]; + CXLMemECSReadAttrs ecs_attrs; + CXLMemECSWriteAttrs ecs_wr_attrs; struct dynamic_capacity { HostMemoryBackend *host_dc; From d1978226c81b0e9b3d6a7779cf92cbfe9f4a10e8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 14 Oct 2024 13:19:01 +0100 Subject: [PATCH 40/65] hw/cxl: Fix indent of structure member Add missing 4 spaces of indent to structure element. Reported-by: Davidlohr Bueso Signed-off-by: Jonathan Cameron Message-Id: <20241014121902.2146424-7-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 67041f45d3..5f63099724 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -374,7 +374,7 @@ static CXLRetCode cmd_infostat_identify(const struct cxl_cmd *cmd, uint16_t pcie_subsys_vid; uint16_t pcie_subsys_id; uint64_t sn; - uint8_t max_message_size; + uint8_t max_message_size; uint8_t component_type; } QEMU_PACKED *is_identify; QEMU_BUILD_BUG_ON(sizeof(*is_identify) != 18); From d4d5212c541f57c339a592bcf1b38bf325940bfd Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 14 Oct 2024 13:19:02 +0100 Subject: [PATCH 41/65] hw/pci-bridge: Make pxb_dev_realize_common() return if it succeeded For the CXL PXB there is additional code after pxb_dev_realize_common() is called. If that realize failed (e.g. due to an out of range numa_node) we will get a segfault. Return a bool so the caller can check if the pxb_dev_realize_common() succeeded or not without having to poke around in the errp. Fixes: 4f8db8711cbd ("hw/pxb: Allow creation of a CXL PXB (host bridge)") Signed-off-by: Jonathan Cameron Message-Id: <20241014121902.2146424-8-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_expander_bridge.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index 4578e03024..07d411cff5 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -330,7 +330,7 @@ static gint pxb_compare(gconstpointer a, gconstpointer b) 0; } -static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type, +static bool pxb_dev_realize_common(PCIDevice *dev, enum BusType type, Error **errp) { PXBDev *pxb = PXB_DEV(dev); @@ -342,13 +342,13 @@ static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type, if (ms->numa_state == NULL) { error_setg(errp, "NUMA is not supported by this machine-type"); - return; + return false; } if (pxb->numa_node != NUMA_NODE_UNASSIGNED && pxb->numa_node >= ms->numa_state->num_nodes) { error_setg(errp, "Illegal numa node %d", pxb->numa_node); - return; + return false; } if (dev->qdev.id && *dev->qdev.id) { @@ -394,12 +394,13 @@ static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type, pci_config_set_class(dev->config, PCI_CLASS_BRIDGE_HOST); pxb_dev_list = g_list_insert_sorted(pxb_dev_list, pxb, pxb_compare); - return; + return true; err_register_bus: object_unref(OBJECT(bds)); object_unparent(OBJECT(bus)); object_unref(OBJECT(ds)); + return false; } static void pxb_dev_realize(PCIDevice *dev, Error **errp) @@ -500,7 +501,9 @@ static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp) return; } - pxb_dev_realize_common(dev, CXL, errp); + if (!pxb_dev_realize_common(dev, CXL, errp)) { + return; + } pxb_cxl_dev_reset(DEVICE(dev)); } From eea5aeef84e1b74f515b474d3a86377701f93750 Mon Sep 17 00:00:00 2001 From: Albert Esteve Date: Tue, 22 Oct 2024 14:46:14 +0200 Subject: [PATCH 42/65] vhost-user: fix shared object return values VHOST_USER_BACKEND_SHARED_OBJECT_ADD and VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE state in the spec that they return 0 for successful operations, non-zero otherwise. However, implementation relies on the return types of the virtio-dmabuf library, with opposite semantics (true if everything is correct, false otherwise). Therefore, current implementation violates the specification. Revert the logic so that the implementation of the vhost-user handling methods matches the specification. Fixes: 043e127a126bb3ceb5fc753deee27d261fd0c5ce Fixes: 160947666276c5b7f6bca4d746bcac2966635d79 Reviewed-by: Stefano Garzarella Signed-off-by: Albert Esteve Message-Id: <20241022124615.585596-1-aesteve@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index d1b0893b4d..f170f0b25b 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1623,9 +1623,14 @@ vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, QemuUUID uuid; memcpy(uuid.data, object->uuid, sizeof(object->uuid)); - return virtio_add_vhost_device(&uuid, dev); + return !virtio_add_vhost_device(&uuid, dev); } +/* + * Handle VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE backend requests. + * + * Return: 0 on success, 1 on error. + */ static int vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev, VhostUserShared *object) @@ -1639,16 +1644,16 @@ vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev, struct vhost_dev *owner = virtio_lookup_vhost_device(&uuid); if (dev != owner) { /* Not allowed to remove non-owned entries */ - return 0; + return 1; } break; } default: /* Not allowed to remove non-owned entries */ - return 0; + return 1; } - return virtio_remove_resource(&uuid); + return !virtio_remove_resource(&uuid); } static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, From 6ce12bd29777d41afef859652eaa62b5c964d3f7 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 28 Oct 2024 10:25:14 +0800 Subject: [PATCH 43/65] intel_iommu: Introduce property "stale-tm" to control Transient Mapping (TM) field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VT-d spec removed Transient Mapping (TM) field from second-level page-tables and treat the field as Reserved(0) since revision 3.2. Changing the field as reserved(0) will break backward compatibility, so introduce a property "stale-tm" to allow user to control the setting. Use pc_compat_9_1 to handle the compatibility for machines before 9.2 which allow guest to set the field. Starting from 9.2, this field is reserved(0) by default to match spec. Of course, user can force it on command line. This doesn't impact function of vIOMMU as there was no logic to emulate Transient Mapping. Suggested-by: Yi Liu Suggested-by: Jason Wang Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Reviewed-by: Yi Liu Message-Id: <20241028022514.806657-1-zhenzhong.duan@intel.com> Reviewed-by: Clément Mathieu--Drif Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 7 ++++--- hw/i386/intel_iommu_internal.h | 12 ++++++------ hw/i386/pc.c | 1 + include/hw/i386/intel_iommu.h | 3 +++ 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 08fe218935..8612d0917b 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3372,6 +3372,7 @@ static Property vtd_properties[] = { DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), + DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false), DEFINE_PROP_END_OF_LIST(), }; @@ -4138,15 +4139,15 @@ static void vtd_init(IntelIOMMUState *s) */ vtd_spte_rsvd[0] = ~0ULL; vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); + x86_iommu->dt_supported && s->stale_tm); vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); + x86_iommu->dt_supported && s->stale_tm); vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); + x86_iommu->dt_supported && s->stale_tm); if (s->scalable_mode || s->snoop_control) { vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 13d5d129ae..2f9bc0147d 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -412,8 +412,8 @@ typedef union VTDInvDesc VTDInvDesc; /* Rsvd field masks for spte */ #define VTD_SPTE_SNP 0x800ULL -#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw, dt_supported) \ - dt_supported ? \ +#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw, stale_tm) \ + stale_tm ? \ (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) #define VTD_SPTE_PAGE_L2_RSVD_MASK(aw) \ @@ -423,12 +423,12 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \ (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) -#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw, dt_supported) \ - dt_supported ? \ +#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw, stale_tm) \ + stale_tm ? \ (0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ (0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) -#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw, dt_supported) \ - dt_supported ? \ +#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw, stale_tm) \ + stale_tm ? \ (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 2047633e4c..830614d930 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -82,6 +82,7 @@ GlobalProperty pc_compat_9_1[] = { { "ICH9-LPC", "x-smi-swsmi-timer", "off" }, { "ICH9-LPC", "x-smi-periodic-timer", "off" }, + { TYPE_INTEL_IOMMU_DEVICE, "stale-tm", "on" }, }; const size_t pc_compat_9_1_len = G_N_ELEMENTS(pc_compat_9_1); diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 1eb05c29fc..d372cd396b 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -306,6 +306,9 @@ struct IntelIOMMUState { bool dma_translation; /* Whether DMA translation supported */ bool pasid; /* Whether to support PASID */ + /* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */ + bool stale_tm; + /* * Protects IOMMU states in general. Currently it protects the * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace. From 449dca6ac93afbed1af00b6a29c9729f6cb18c35 Mon Sep 17 00:00:00 2001 From: Marcin Juszkiewicz Date: Wed, 23 Oct 2024 13:38:20 +0200 Subject: [PATCH 44/65] pcie: enable Extended tag field support >From what I read PCI has 32 transactions, PCI Express devices can handle 256 with Extended tag enabled (spec mentions also larger values but I lack PCIe knowledge). QEMU leaves 'Extended tag field' with 0 as value: Capabilities: [e0] Express (v1) Root Complex Integrated Endpoint, IntMsgNum 0 DevCap: MaxPayload 128 bytes, PhantFunc 0 ExtTag- RBE+ FLReset- TEE-IO- SBSA ACS has test 824 which checks for PCIe device capabilities. BSA specification [1] (SBSA is on top of BSA) in section F.3.2 lists expected values for Device Capabilities Register: Device Capabilities Register Requirement Role based error reporting RCEC and RCiEP: Hardwired to 1 Endpoint L0s acceptable latency RCEC and RCiEP: Hardwired to 0 L1 acceptable latency RCEC and RCiEP: Hardwired to 0 Captured slot power limit scale RCEC and RCiEP: Hardwired to 0 Captured slot power limit value RCEC and RCiEP: Hardwired to 0 Max payload size value must be compliant with PCIe spec Phantom functions RCEC and RCiEP: Recommendation is to hardwire this bit to 0. Extended tag field Hardwired to 1 1. https://developer.arm.com/documentation/den0094/c/ This change enables Extended tag field. All versioned platforms should have it disabled for older versions (tested with Arm/virt). Signed-off-by: Marcin Juszkiewicz Message-Id: <20241023113820.486017-1-marcin.juszkiewicz@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/core/machine.c | 4 +++- hw/pci/pci.c | 2 ++ hw/pci/pcie.c | 8 +++++++- include/hw/pci/pci.h | 2 ++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index adaba17eba..8ccc74067a 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -34,7 +34,9 @@ #include "hw/virtio/virtio-iommu.h" #include "audio/audio.h" -GlobalProperty hw_compat_9_1[] = {}; +GlobalProperty hw_compat_9_1[] = { + { TYPE_PCI_DEVICE, "x-pcie-ext-tag", "false" }, +}; const size_t hw_compat_9_1_len = G_N_ELEMENTS(hw_compat_9_1); GlobalProperty hw_compat_9_0[] = { diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 0b6bdaa0d7..1416ae202c 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -100,6 +100,8 @@ static Property pci_props[] = { QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice, max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE), + DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present, + QEMU_PCIE_EXT_TAG_BITNR, true), { .name = "busnr", .info = &prop_pci_busnr }, DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 2738dbb28d..0b455c8654 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -86,7 +86,13 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) * Specification, Revision 1.1., or subsequent PCI Express Base * Specification revisions. */ - pci_set_long(exp_cap + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER); + uint32_t devcap = PCI_EXP_DEVCAP_RBER; + + if (dev->cap_present & QEMU_PCIE_EXT_TAG) { + devcap = PCI_EXP_DEVCAP_RBER | PCI_EXP_DEVCAP_EXT_TAG; + } + + pci_set_long(exp_cap + PCI_EXP_DEVCAP, devcap); pci_set_long(exp_cap + PCI_EXP_LNKCAP, (port << PCI_EXP_LNKCAP_PN_SHIFT) | diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 35d4fe0bbf..38ffa7bcee 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -214,6 +214,8 @@ enum { QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12 QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR), +#define QEMU_PCIE_EXT_TAG_BITNR 13 + QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR), }; typedef struct PCIINTxRoute { From 0564019bf1d9dd76ee7d212c26cf40e73eb3a00c Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Fri, 1 Nov 2024 13:20:04 +0000 Subject: [PATCH 45/65] cxl/cxl-mailbox-utils: Fix size check for cmd_firmware_update_get_info In the function cmd_firmware_update_get_info for handling Get FW info command (0x0200h), the vmem, pmem and DC capacity size check were incorrect. The size should be aligned to 256MiB, not smaller than 256MiB. Signed-off-by: Fan Ni Reviewed-by: Davidlohr Bueso Signed-off-by: Jonathan Cameron Message-Id: <20241101132005.26633-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 5f63099724..8bb0d2dd29 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -649,9 +649,9 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, } QEMU_PACKED *fw_info; QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50); - if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) || - (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) || - (ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) { + if (!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER) || + !QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER) || + !QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER)) { return CXL_MBOX_INTERNAL_ERROR; } From 802671c37a6ab4de116866883841bcc8b2318124 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Fri, 1 Nov 2024 13:20:05 +0000 Subject: [PATCH 46/65] hw/cxl/cxl-mailbox-util: Fix output buffer index update when retrieving DC extents In the function of retrieving DC extents (cmd_dcd_get_dyn_cap_ext_list), the output buffer index was not correctly updated while iterating the extent list on the device, leaving the extents returned incorrect except for the first one. Fixes: 1c9221f19e62 ("hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support") Signed-off-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20241101132005.26633-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 8bb0d2dd29..97cb8bbcec 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -2227,6 +2227,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd, stw_le_p(&out_rec->shared_seq, ent->shared_seq); record_done++; + out_rec++; if (record_done == record_count) { break; } From 7edbbff5ee85dd28699c5acd6ea2f2c2e41c37d2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:08 +0000 Subject: [PATCH 47/65] hw/cxl: Check size of input data to dynamic capacity mailbox commands cxl_cmd_dcd_release_dyn_cap() and cmd_dcd_add_dyn_cap_rsp() are missing input message size checks. These must be done in the individual commands when the command has a variable length input payload. A buggy or malicious guest might send undersized messages via the mailbox. As that size is used to take a copy of the mailbox content, each command must check there is sufficient data. In this case the first check is that there is enough data to read how many extents there are, and the second that there is enough for those elements to be accessed. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 97cb8bbcec..17924410dd 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -2465,11 +2465,20 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, uint64_t dpa, len; CXLRetCode ret; + if (len_in < sizeof(*in)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + if (in->num_entries_updated == 0) { cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); return CXL_MBOX_SUCCESS; } + if (len_in < + sizeof(*in) + sizeof(*in->updated_entries) * in->num_entries_updated) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + /* Adding extents causes exceeding device's extent tracking ability. */ if (in->num_entries_updated + ct3d->dc.total_extent_count > CXL_NUM_EXTENTS_SUPPORTED) { @@ -2624,10 +2633,19 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd, uint32_t updated_list_size; CXLRetCode ret; + if (len_in < sizeof(*in)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + if (in->num_entries_updated == 0) { return CXL_MBOX_INVALID_INPUT; } + if (len_in < + sizeof(*in) + sizeof(*in->updated_entries) * in->num_entries_updated) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + ret = cxl_detect_malformed_extent_list(ct3d, in); if (ret != CXL_MBOX_SUCCESS) { return ret; From 91a743bd021a262af61c79cc35f0b634b2fcf3ad Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:09 +0000 Subject: [PATCH 48/65] hw/cxl: Check input includes at least the header in cmd_features_set_feature() A buggy guest might write an insufficiently large message. Check the header is present. Whilst zero data after the header is very odd it will just result in failure to copy any data. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 17924410dd..e63140aefe 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1238,6 +1238,9 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd, CXLType3Dev *ct3d; uint16_t count; + if (len_in < sizeof(*hdr)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { return CXL_MBOX_UNSUPPORTED; From f4a12ba66bebfe200d7f56015c1cd5af321ab152 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:10 +0000 Subject: [PATCH 49/65] hw/cxl: Check input length is large enough in cmd_events_clear_records() Buggy software might write a message that is too short for either the header, or the header + the event data that is specified in the header. This may result in accesses beyond the range of the message allocated as a duplicate of the incoming message buffer. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index e63140aefe..3cb499a24f 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -266,6 +266,12 @@ static CXLRetCode cmd_events_clear_records(const struct cxl_cmd *cmd, CXLClearEventPayload *pl; pl = (CXLClearEventPayload *)payload_in; + + if (len_in < sizeof(*pl) || + len_in < sizeof(*pl) + sizeof(*pl->handle) * pl->nr_recs) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + *len_out = 0; return cxl_event_clear_records(cxlds, pl); } From a3995360aeec62902f045142840c1fd334e9725f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:11 +0000 Subject: [PATCH 50/65] hw/cxl: Check enough data in cmd_firmware_update_transfer() Buggy guest can write a message that advertises more data that is provided. As QEMU internally duplicates the reported message size, this may result in an out of bounds access. Add sanity checks on the size to avoid this. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 3cb499a24f..27fadc4fa8 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -705,6 +705,10 @@ static CXLRetCode cmd_firmware_update_transfer(const struct cxl_cmd *cmd, } QEMU_PACKED *fw_transfer = (void *)payload_in; size_t offset, length; + if (len < sizeof(*fw_transfer)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + if (fw_transfer->action == CXL_FW_XFER_ACTION_ABORT) { /* * At this point there aren't any on-going transfers From f9f0fa2438c6934aa76b06e9a6cef283176ceb8d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:12 +0000 Subject: [PATCH 51/65] hw/cxl: Check the length of data requested fits in get_log() Checking offset + length is of no relevance when verifying the CEL data will fit in the mailbox payload. Only the length is is relevant. Note that this removes a potential overflow. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-6-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 27fadc4fa8..2aa7ffed84 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -947,7 +947,7 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, * the only possible failure would be if the mailbox itself isn't big * enough. */ - if (get_log->offset + get_log->length > cci->payload_max) { + if (get_log->length > cci->payload_max) { return CXL_MBOX_INVALID_INPUT; } From a3de73c2a835efc30851f9e810e0cd355e1cd0cf Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:13 +0000 Subject: [PATCH 52/65] hw/cxl: Avoid accesses beyond the end of cel_log. Add a check that the requested offset + length does not go beyond the end of the cel_log. Whilst the cci->cel_log is large enough to include all possible CEL entries, the guest might still ask for entries beyond the end of it. Move the comment to this new check rather than before the check on the type of log requested. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-7-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 2aa7ffed84..5e571955b6 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -937,16 +937,6 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, get_log = (void *)payload_in; - /* - * CXL r3.1 Section 8.2.9.5.2: Get Log (Opcode 0401h) - * The device shall return Invalid Input if the Offset or Length - * fields attempt to access beyond the size of the log as reported by Get - * Supported Logs. - * - * The CEL buffer is large enough to fit all commands in the emulation, so - * the only possible failure would be if the mailbox itself isn't big - * enough. - */ if (get_log->length > cci->payload_max) { return CXL_MBOX_INVALID_INPUT; } @@ -955,6 +945,20 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, return CXL_MBOX_INVALID_LOG; } + /* + * CXL r3.1 Section 8.2.9.5.2: Get Log (Opcode 0401h) + * The device shall return Invalid Input if the Offset or Length + * fields attempt to access beyond the size of the log as reported by Get + * Supported Log. + * + * Only valid for there to be one entry per opcode, but the length + offset + * may still be greater than that if the inputs are not valid and so access + * beyond the end of cci->cel_log. + */ + if ((uint64_t)get_log->offset + get_log->length >= sizeof(cci->cel_log)) { + return CXL_MBOX_INVALID_INPUT; + } + /* Store off everything to local variables so we can wipe out the payload */ *len_out = get_log->length; From c0f122419fbcd1e0bf2bc2a0a3ae6410bb2cda22 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:14 +0000 Subject: [PATCH 53/65] hw/cxl: Ensuring enough data to read parameters in cmd_tunnel_management_cmd() If len_in is less than the minimum spec allowed value, then return CXL_MBOX_INVALID_PAYLOAD_LENGTH Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-8-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 5e571955b6..a40d81219c 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -151,6 +151,9 @@ static CXLRetCode cmd_tunnel_management_cmd(const struct cxl_cmd *cmd, in = (void *)payload_in; out = (void *)payload_out; + if (len_in < sizeof(*in)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } /* Enough room for minimum sized message - no payload */ if (in->size < sizeof(in->ccimessage)) { return CXL_MBOX_INVALID_PAYLOAD_LENGTH; From c1c4d6b38b13952b0a9e2d7393e1ccc70b2615a4 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:15 +0000 Subject: [PATCH 54/65] hw/cxl: Check that writes do not go beyond end of target attributes In cmd_features_set_feature() the an offset + data size schemed is used to allow for large features. Ensure this does not write beyond the end fo the buffers used to accumulate the full feature attribute set. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-9-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index a40d81219c..078782e8b9 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1292,6 +1292,11 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd, ps_set_feature = (void *)payload_in; ps_write_attrs = &ps_set_feature->feat_data; + + if ((uint32_t)hdr->offset + bytes_to_copy > + sizeof(ct3d->patrol_scrub_wr_attrs)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } memcpy((uint8_t *)&ct3d->patrol_scrub_wr_attrs + hdr->offset, ps_write_attrs, bytes_to_copy); @@ -1314,6 +1319,11 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd, ecs_set_feature = (void *)payload_in; ecs_write_attrs = ecs_set_feature->feat_data; + + if ((uint32_t)hdr->offset + bytes_to_copy > + sizeof(ct3d->ecs_wr_attrs)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } memcpy((uint8_t *)&ct3d->ecs_wr_attrs + hdr->offset, ecs_write_attrs, bytes_to_copy); From 5300bdf5898dda5989215e183bccd555cc782b9a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:16 +0000 Subject: [PATCH 55/65] hw/cxl: Ensure there is enough data for the header in cmd_ccls_set_lsa() The properties of the requested set command cannot be established if len_in is less than the size of the header. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-10-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 078782e8b9..f4a436e172 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -1503,8 +1503,8 @@ static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd, const size_t hdr_len = offsetof(struct set_lsa_pl, data); *len_out = 0; - if (!len_in) { - return CXL_MBOX_SUCCESS; + if (len_in < hdr_len) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; } if (set_lsa_payload->offset + len_in > cvc->get_lsa_size(ct3d) + hdr_len) { From 721c99aefcdb311bd41d20678d3935fd11454641 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 1 Nov 2024 13:39:17 +0000 Subject: [PATCH 56/65] hw/cxl: Ensure there is enough data to read the input header in cmd_get_physical_port_state() If len_in is smaller than the header length then the accessing the number of ports will result in an out of bounds access. Add a check to avoid this. Reported-by: Esifiel Signed-off-by: Jonathan Cameron Message-Id: <20241101133917.27634-11-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index f4a436e172..2d4d62c454 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -530,6 +530,9 @@ static CXLRetCode cmd_get_physical_port_state(const struct cxl_cmd *cmd, in = (struct cxl_fmapi_get_phys_port_state_req_pl *)payload_in; out = (struct cxl_fmapi_get_phys_port_state_resp_pl *)payload_out; + if (len_in < sizeof(*in)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } /* Check if what was requested can fit */ if (sizeof(*out) + sizeof(*out->ports) * in->num_ports > cci->payload_max) { return CXL_MBOX_INVALID_INPUT; From 26f2660bf7a3f0b6e9a939657ba656f4891ff46d Mon Sep 17 00:00:00 2001 From: Roque Arcudia Hernandez Date: Fri, 1 Nov 2024 21:59:23 +0000 Subject: [PATCH 57/65] hw/pci: Add parenthesis to PCI_BUILD_BDF macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bus parameter in the macro PCI_BUILD_BDF is not surrounded by parenthesis. This can create a compile error when warnings are treated as errors or can potentially create runtime errors due to the operator precedence. For instance: file.c:x:32: error: suggest parentheses around '-' inside '<<' [-Werror=parentheses] 171 | uint16_t bdf = PCI_BUILD_BDF(a - b, sdev->devfn); | ~~^~~ include/hw/pci/pci.h:19:41: note: in definition of macro 'PCI_BUILD_BDF' 19 | #define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) | ^~~ cc1: all warnings being treated as errors Signed-off-by: Roque Arcudia Hernandez Reviewed-by: Nabih Estefan Message-Id: <20241101215923.3399311-1-roqueh@google.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/pci/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 38ffa7bcee..135695c551 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -16,7 +16,7 @@ extern bool pci_available; #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff) #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define PCI_FUNC(devfn) ((devfn) & 0x07) -#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) +#define PCI_BUILD_BDF(bus, devfn) (((bus) << 8) | (devfn)) #define PCI_BDF_TO_DEVFN(x) ((x) & 0xff) #define PCI_BUS_MAX 256 #define PCI_DEVFN_MAX 256 From 2d6cfbaf174b91dfa9a50065f7494634afb39c23 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Sun, 3 Nov 2024 10:24:15 +0000 Subject: [PATCH 58/65] hw/acpi: Make CPUs ACPI `presence` conditional during vCPU hot-unplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On most architectures, during vCPU hot-plug and hot-unplug actions, the firmware or VMM/QEMU can update the OS on vCPU status by toggling the ACPI method `_STA.Present` bit. However, certain CPU architectures prohibit [1] modifications to a CPU’s `presence` status after the kernel has booted. This limitation [2][3] exists because many per-CPU components, such as interrupt controllers and various per-CPU features tightly integrated with CPUs, may not support reconfiguration once the kernel is initialized. Often, these components cannot be powered down, as they may belong to an `always-on` power domain. As a result, some architectures require all CPUs to remain `_STA.Present` after system initialization. Therefore, it is essential to mirror the exact QOM vCPU status through ACPI for the Guest kernel. For this, we should determine—via architecture-specific code[4]—whether vCPUs must always remain present and whether the associated `AcpiCpuStatus::cpu` object should remain valid, even following a vCPU hot-unplug operation. References: [1] Check comment 5 in the bugzilla entry Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5 [2] KVMForum 2023 Presentation: Challenges Revisited in Supporting Virt CPU Hotplug on architectures that don’t Support CPU Hotplug (like ARM64) a. Kernel Link: https://kvm-forum.qemu.org/2023/KVM-forum-cpu-hotplug_7OJ1YyJ.pdf b. Qemu Link: https://kvm-forum.qemu.org/2023/Challenges_Revisited_in_Supporting_Virt_CPU_Hotplug_-__ii0iNb3.pdf [3] KVMForum 2020 Presentation: Challenges in Supporting Virtual CPU Hotplug on SoC Based Systems (like ARM64) Link: https://kvmforum2020.sched.com/event/eE4m [4] Example implementation of architecture-specific CPU persistence hook Link: https://github.com/salil-mehta/qemu/commit/c0b416b11e5af6505e558866f0eb6c9f3709173e Signed-off-by: Salil Mehta Message-Id: <20241103102419.202225-2-salil.mehta@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 15 ++++++++++++++- include/hw/core/cpu.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 5cb60ca8bc..9b03b4292e 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -233,6 +233,17 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, memory_region_add_subregion(as, base_addr, &state->ctrl_reg); } +static bool should_remain_acpi_present(DeviceState *dev) +{ + CPUClass *k = CPU_GET_CLASS(dev); + /* + * A system may contain CPUs that are always present on one die, NUMA node, + * or socket, yet may be non-present on another simultaneously. Check from + * architecture specific code. + */ + return k->cpu_persistent_status && k->cpu_persistent_status(CPU(dev)); +} + static AcpiCpuStatus *get_cpu_status(CPUHotplugState *cpu_st, DeviceState *dev) { CPUClass *k = CPU_GET_CLASS(dev); @@ -289,7 +300,9 @@ void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, return; } - cdev->cpu = NULL; + if (!should_remain_acpi_present(dev)) { + cdev->cpu = NULL; + } } static const VMStateDescription vmstate_cpuhp_sts = { diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index c3ca0babcb..e7de77dc6d 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -158,6 +158,7 @@ struct CPUClass { void (*dump_state)(CPUState *cpu, FILE *, int flags); void (*query_cpu_fast)(CPUState *cpu, CpuInfoFast *value); int64_t (*get_arch_id)(CPUState *cpu); + bool (*cpu_persistent_status)(CPUState *cpu); void (*set_pc)(CPUState *cpu, vaddr value); vaddr (*get_pc)(CPUState *cpu); int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg); From e98411c2cbbac24ff49992a09226a2662726a031 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Sun, 3 Nov 2024 10:24:16 +0000 Subject: [PATCH 59/65] qtest: allow ACPI DSDT Table changes list changed files in tests/qtest/bios-tables-test-allowed-diff.h Reported-by: Zhao Liu Signed-off-by: Salil Mehta Message-Id: <20241103102419.202225-3-salil.mehta@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..512d40665d 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,42 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/x86/pc/DSDT", +"tests/data/acpi/x86/pc/DSDT.acpierst", +"tests/data/acpi/x86/pc/DSDT.acpihmat", +"tests/data/acpi/x86/pc/DSDT.bridge", +"tests/data/acpi/x86/pc/DSDT.cphp", +"tests/data/acpi/x86/pc/DSDT.dimmpxm", +"tests/data/acpi/x86/pc/DSDT.hpbridge", +"tests/data/acpi/x86/pc/DSDT.hpbrroot", +"tests/data/acpi/x86/pc/DSDT.ipmikcs", +"tests/data/acpi/x86/pc/DSDT.memhp", +"tests/data/acpi/x86/pc/DSDT.nohpet", +"tests/data/acpi/x86/pc/DSDT.numamem", +"tests/data/acpi/x86/pc/DSDT.roothp", +"tests/data/acpi/x86/q35/DSDT", +"tests/data/acpi/x86/q35/DSDT.acpierst", +"tests/data/acpi/x86/q35/DSDT.acpihmat", +"tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator", +"tests/data/acpi/x86/q35/DSDT.applesmc", +"tests/data/acpi/x86/q35/DSDT.bridge", +"tests/data/acpi/x86/q35/DSDT.core-count", +"tests/data/acpi/x86/q35/DSDT.core-count2", +"tests/data/acpi/x86/q35/DSDT.cphp", +"tests/data/acpi/x86/q35/DSDT.cxl", +"tests/data/acpi/x86/q35/DSDT.dimmpxm", +"tests/data/acpi/x86/q35/DSDT.ipmibt", +"tests/data/acpi/x86/q35/DSDT.ipmismbus", +"tests/data/acpi/x86/q35/DSDT.ivrs", +"tests/data/acpi/x86/q35/DSDT.memhp", +"tests/data/acpi/x86/q35/DSDT.mmio64", +"tests/data/acpi/x86/q35/DSDT.multi-bridge", +"tests/data/acpi/x86/q35/DSDT.noacpihp", +"tests/data/acpi/x86/q35/DSDT.nohpet", +"tests/data/acpi/x86/q35/DSDT.numamem", +"tests/data/acpi/x86/q35/DSDT.pvpanic-isa", +"tests/data/acpi/x86/q35/DSDT.thread-count", +"tests/data/acpi/x86/q35/DSDT.thread-count2", +"tests/data/acpi/x86/q35/DSDT.tis.tpm12", +"tests/data/acpi/x86/q35/DSDT.tis.tpm2", +"tests/data/acpi/x86/q35/DSDT.type4-count", +"tests/data/acpi/x86/q35/DSDT.viot", +"tests/data/acpi/x86/q35/DSDT.xapic", From bf1ecc8dad6061914730a2a2d57af6b37c3a4f8d Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Sun, 3 Nov 2024 10:24:17 +0000 Subject: [PATCH 60/65] hw/acpi: Update ACPI `_STA` method with QOM vCPU ACPI Hotplug states Reflect the QOM vCPUs ACPI CPU hotplug states in the `_STA.Present` and and `_STA.Enabled` bits when the guest kernel evaluates the ACPI `_STA` method during initialization, as well as when vCPUs are hot-plugged or hot-unplugged. If the CPU is present then the its `enabled` status can be fetched using architecture-specific code [1]. Reference: [1] Example implementation of architecture-specific hook to fetch CPU `enabled status Link: https://github.com/salil-mehta/qemu/commit/c0b416b11e5af6505e558866f0eb6c9f3709173e Signed-off-by: Salil Mehta Message-Id: <20241103102419.202225-4-salil.mehta@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 38 ++++++++++++++++++++++++++++++++++---- include/hw/core/cpu.h | 1 + 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 9b03b4292e..23443f09a5 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -50,6 +50,18 @@ void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list) } } +static bool check_cpu_enabled_status(DeviceState *dev) +{ + CPUClass *k = dev ? CPU_GET_CLASS(dev) : NULL; + CPUState *cpu = CPU(dev); + + if (cpu && (!k->cpu_enabled_status || k->cpu_enabled_status(cpu))) { + return true; + } + + return false; +} + static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) { uint64_t val = 0; @@ -63,10 +75,11 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) cdev = &cpu_st->devs[cpu_st->selector]; switch (addr) { case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ - val |= cdev->cpu ? 1 : 0; + val |= check_cpu_enabled_status(DEVICE(cdev->cpu)) ? 1 : 0; val |= cdev->is_inserting ? 2 : 0; val |= cdev->is_removing ? 4 : 0; val |= cdev->fw_remove ? 16 : 0; + val |= cdev->cpu ? 32 : 0; trace_cpuhp_acpi_read_flags(cpu_st->selector, val); break; case ACPI_CPU_CMD_DATA_OFFSET_RW: @@ -349,6 +362,7 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_REMOVE_EVENT "CRMV" #define CPU_EJECT_EVENT "CEJ0" #define CPU_FW_EJECT_EVENT "CEJF" +#define CPU_PRESENT "CPRS" void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, @@ -409,7 +423,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(field, aml_named_field(CPU_EJECT_EVENT, 1)); /* tell firmware to do device eject, write only */ aml_append(field, aml_named_field(CPU_FW_EJECT_EVENT, 1)); - aml_append(field, aml_reserved_field(3)); + /* 1 if present, read only */ + aml_append(field, aml_named_field(CPU_PRESENT, 1)); + aml_append(field, aml_reserved_field(2)); aml_append(field, aml_named_field(CPU_COMMAND, 8)); aml_append(cpu_ctrl_dev, field); @@ -439,6 +455,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK); Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR); Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED); + Aml *is_present = aml_name("%s.%s", cphp_res_path, CPU_PRESENT); Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND); Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA); Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT); @@ -467,13 +484,26 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, { Aml *idx = aml_arg(0); Aml *sta = aml_local(0); + Aml *ifctx2; + Aml *else_ctx; aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); aml_append(method, aml_store(idx, cpu_selector)); aml_append(method, aml_store(zero, sta)); - ifctx = aml_if(aml_equal(is_enabled, one)); + ifctx = aml_if(aml_equal(is_present, one)); { - aml_append(ifctx, aml_store(aml_int(0xF), sta)); + ifctx2 = aml_if(aml_equal(is_enabled, one)); + { + /* cpu is present and enabled */ + aml_append(ifctx2, aml_store(aml_int(0xF), sta)); + } + aml_append(ifctx, ifctx2); + else_ctx = aml_else(); + { + /* cpu is present but disabled */ + aml_append(else_ctx, aml_store(aml_int(0xD), sta)); + } + aml_append(ifctx, else_ctx); } aml_append(method, ifctx); aml_append(method, aml_release(ctrl_lock)); diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index e7de77dc6d..db8a6fbc6e 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -159,6 +159,7 @@ struct CPUClass { void (*query_cpu_fast)(CPUState *cpu, CpuInfoFast *value); int64_t (*get_arch_id)(CPUState *cpu); bool (*cpu_persistent_status)(CPUState *cpu); + bool (*cpu_enabled_status)(CPUState *cpu); void (*set_pc)(CPUState *cpu, vaddr value); vaddr (*get_pc)(CPUState *cpu); int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg); From 4d62d15b11909e9af121577e707b88f2e4524371 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Mon, 4 Nov 2024 06:28:23 -0500 Subject: [PATCH 61/65] tests/qtest/bios-tables-test: Update DSDT golden masters for x86/{pc,q35} Update DSDT golden master files for x86/pc and x86/q35 platforms to accommodate changes made in the architecture-agnostic CPU AML. These updates notify the guest OS of vCPU hot-plug and hot-unplug status using the ACPI `_STA.Enabled` bit. The following is a diff of the changes in the .dsl file generated with IASL: @@ -1480,6 +1480,7 @@ CRMV, 1, CEJ0, 1, CEJF, 1, + CPRS, 1, Offset (0x05), CCMD, 8 } @@ -1514,9 +1515,16 @@ Acquire (\_SB.PCI0.PRES.CPLK, 0xFFFF) \_SB.PCI0.PRES.CSEL = Arg0 Local0 = Zero - If ((\_SB.PCI0.PRES.CPEN == One)) - { - Local0 = 0x0F + If ((\_SB.PCI0.PRES.CPRS == One)) + { + If ((\_SB.PCI0.PRES.CPEN == One)) + { + Local0 = 0x0F + } + Else + { + Local0 = 0x0D + } } Release (\_SB.PCI0.PRES.CPLK) Reported-by: Zhao Liu Signed-off-by: Salil Mehta Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/x86/pc/DSDT | Bin 8526 -> 8560 bytes tests/data/acpi/x86/pc/DSDT.acpierst | Bin 8437 -> 8471 bytes tests/data/acpi/x86/pc/DSDT.acpihmat | Bin 9851 -> 9885 bytes tests/data/acpi/x86/pc/DSDT.bridge | Bin 15397 -> 15431 bytes tests/data/acpi/x86/pc/DSDT.cphp | Bin 8990 -> 9024 bytes tests/data/acpi/x86/pc/DSDT.dimmpxm | Bin 10180 -> 10214 bytes tests/data/acpi/x86/pc/DSDT.hpbridge | Bin 8477 -> 8511 bytes tests/data/acpi/x86/pc/DSDT.hpbrroot | Bin 5033 -> 5067 bytes tests/data/acpi/x86/pc/DSDT.ipmikcs | Bin 8598 -> 8632 bytes tests/data/acpi/x86/pc/DSDT.memhp | Bin 9885 -> 9919 bytes tests/data/acpi/x86/pc/DSDT.nohpet | Bin 8384 -> 8418 bytes tests/data/acpi/x86/pc/DSDT.numamem | Bin 8532 -> 8566 bytes tests/data/acpi/x86/pc/DSDT.roothp | Bin 12319 -> 12353 bytes tests/data/acpi/x86/q35/DSDT | Bin 8355 -> 8389 bytes tests/data/acpi/x86/q35/DSDT.acpierst | Bin 8372 -> 8406 bytes tests/data/acpi/x86/q35/DSDT.acpihmat | Bin 9680 -> 9714 bytes .../acpi/x86/q35/DSDT.acpihmat-noinitiator | Bin 8634 -> 8668 bytes tests/data/acpi/x86/q35/DSDT.applesmc | Bin 8401 -> 8435 bytes tests/data/acpi/x86/q35/DSDT.bridge | Bin 11968 -> 12002 bytes tests/data/acpi/x86/q35/DSDT.core-count | Bin 12913 -> 12947 bytes tests/data/acpi/x86/q35/DSDT.core-count2 | Bin 33770 -> 33804 bytes tests/data/acpi/x86/q35/DSDT.cphp | Bin 8819 -> 8853 bytes tests/data/acpi/x86/q35/DSDT.cxl | Bin 13146 -> 13180 bytes tests/data/acpi/x86/q35/DSDT.dimmpxm | Bin 10009 -> 10043 bytes tests/data/acpi/x86/q35/DSDT.ipmibt | Bin 8430 -> 8464 bytes tests/data/acpi/x86/q35/DSDT.ipmismbus | Bin 8443 -> 8477 bytes tests/data/acpi/x86/q35/DSDT.ivrs | Bin 8372 -> 8406 bytes tests/data/acpi/x86/q35/DSDT.memhp | Bin 9714 -> 9748 bytes tests/data/acpi/x86/q35/DSDT.mmio64 | Bin 9485 -> 9519 bytes tests/data/acpi/x86/q35/DSDT.multi-bridge | Bin 13208 -> 13242 bytes tests/data/acpi/x86/q35/DSDT.noacpihp | Bin 8235 -> 8269 bytes tests/data/acpi/x86/q35/DSDT.nohpet | Bin 8213 -> 8247 bytes tests/data/acpi/x86/q35/DSDT.numamem | Bin 8361 -> 8395 bytes tests/data/acpi/x86/q35/DSDT.pvpanic-isa | Bin 8456 -> 8490 bytes tests/data/acpi/x86/q35/DSDT.thread-count | Bin 12913 -> 12947 bytes tests/data/acpi/x86/q35/DSDT.thread-count2 | Bin 33770 -> 33804 bytes tests/data/acpi/x86/q35/DSDT.tis.tpm12 | Bin 8961 -> 8995 bytes tests/data/acpi/x86/q35/DSDT.tis.tpm2 | Bin 8987 -> 9021 bytes tests/data/acpi/x86/q35/DSDT.type4-count | Bin 18589 -> 18623 bytes tests/data/acpi/x86/q35/DSDT.viot | Bin 14612 -> 14646 bytes tests/data/acpi/x86/q35/DSDT.xapic | Bin 35718 -> 35752 bytes tests/qtest/bios-tables-test-allowed-diff.h | 41 ------------------ 42 files changed, 41 deletions(-) diff --git a/tests/data/acpi/x86/pc/DSDT b/tests/data/acpi/x86/pc/DSDT index 8b8235fe79e2fa08a6f840c8479edb75f5a047b9..f68a32e6068788c3fdfaff139206b725ba2bf1d1 100644 GIT binary patch delta 125 zcmX@-^udYCCD*Nn-PWAb`lQ#13)G5RbZ4FS%c1_42?!C<8eq%ehD{TK_l_!Abg7I5)y J{>vLE3;@@vBVGUi delta 76 zcmez1bk2#(CD@o=P99EwyZB(I_~<5&$!B=t7&Rtq^C~bgFi#HR)nM{9 go?OFg#^%B1930{}*^W<-i6LR~e_rX$3VZ>=07R=4p8x;= diff --git a/tests/data/acpi/x86/pc/DSDT.acpierst b/tests/data/acpi/x86/pc/DSDT.acpierst index 06829b9c6c6d726d955dc7c99bc9f42448e22aeb..0fd79699ebbfb650a5bbe49faecbebf2fc13a86c 100644 GIT binary patch delta 125 zcmezBINgcMCDP99E2$M|5U_~<6z$!B=t7LiIrkL{mgm^&kKM0kW|QxeN(UK3`KpUt5AmlgtcW0VtCz4Ilyl1Cvq> iC<0A0lY9*}21o`&Q&d5dJ`OGd0AQ2<4I8r{4)+pssulqN diff --git a/tests/data/acpi/x86/pc/DSDT.acpihmat b/tests/data/acpi/x86/pc/DSDT.acpihmat index 2fe355ebdbb858fa9247d09112e21712e3eddc45..a4dd09e5ef71a47c31be0d9a57e93767f5d15b3c 100644 GIT binary patch delta 108 zcmezEGuM~PCD fC<075lY9+90!RjvzYR5$3l18Skq{NLKn`6P2n`eu diff --git a/tests/data/acpi/x86/pc/DSDT.bridge b/tests/data/acpi/x86/pc/DSDT.bridge index 4d4067c182a6625db1e877408eb7436113884b50..7ef58152d281c1e2c8da5b15bd30f23e61439d69 100644 GIT binary patch delta 125 zcmZ2lalC@dCD*Nn-PWAb`lQ#13)G5RbZ4FS%c1_42?!C<8eq%ehD{TK_l_!Abg7I5)y J{>!VO2LR(fBWeHu delta 76 zcmX?Jv9yBACD43t0=ecoQZs5D?w$ I&v#w~08Tv}v;Y7A delta 69 zcmV-L0J{IcMxI6rL{mgm9wPt%0nM=rxeN(UTVGQ`Ut5AmlgtcW0VtCz4Ilyl1Cvq> bC<0ATlY9+w0!Rjv@C`MSJPsPOTn^+E!a5Wv diff --git a/tests/data/acpi/x86/pc/DSDT.dimmpxm b/tests/data/acpi/x86/pc/DSDT.dimmpxm index 205219b99d903555125c4b07fc047c42993eb338..34fe3fcad9dc245eb173ca5a9655d17f15bd42ef 100644 GIT binary patch delta 103 zcmX@&|IDAuCD4FZB(gTYECOYupwE@Um>;@xb*H$f5r Dad91& delta 96 zcmaFnf5e~5CDz&#S=1z&tsPSA)sT zb@Bw>NG1=q$**}$rKBdu=(B*-1~_{f1O&MTI|sP>F&1#~Crp+X5Z#=^S0xDmhw>Y! diff --git a/tests/data/acpi/x86/pc/DSDT.hpbridge b/tests/data/acpi/x86/pc/DSDT.hpbridge index 8fa8b519ec65bd5099c45f4e1c85b11b47a23845..33c7529f5c8a6ec651f979b3cebadb18774a781f 100644 GIT binary patch delta 125 zcmbR1wBL!#CD*Nn-PWAb`lQ#13)G5RbZ4FS%c1_42?!C<8eq%ehD{TK_l_!Abg7I5)y J{>!T%3;?JoBF+E+ delta 76 zcmdn*G}npCCD3`4>KsmU?=EFdib&YlJVL9W5h0j_?G1zh}_FY)|j0{}jp B8;bw{ diff --git a/tests/data/acpi/x86/pc/DSDT.ipmikcs b/tests/data/acpi/x86/pc/DSDT.ipmikcs index 0ca664688b16baa3a06b8440181de4f17511c6b0..688faf83cbd7395e1112ba04303d66e11934bf8d 100644 GIT binary patch delta 125 zcmbQ{yu+ExCD*Nn-PWAb`lQ#13)G5RbZ4FS%c1_42?!C<8eq%ehD{TK_l_!Abg7I5)y J{>wX47yyb?BK!aV delta 76 zcmV-S0JHzNLzY7dL{mgmmLUKD0p+m@xeN(UK3`KpUt5AmlgtcW0VtCz4Ilyl1Cvq> iC<0A0lY9*}21o`&Q&d5dJ`OGd0AQ2<4I8r{4ww^F-xWOo diff --git a/tests/data/acpi/x86/pc/DSDT.memhp b/tests/data/acpi/x86/pc/DSDT.memhp index 03ff464ba4e72082fce0921815cfc09ca20b561a..6ede4361f4b7bf18c496d7ca117310675126bbbf 100644 GIT binary patch delta 125 zcmbR1yWf|~CD_Are@}oWAs@-8Uma>4FZB(gTYD{NMQ=Q`Y{%8@h2=~E#Tta J{Firu1OUv#BbopJ delta 76 zcmV-S0JHzUO`S~&L{mgmohASP0b8*OxeN(KKVMTqUt5AmlgtcW0VtCz4Ilyl1Cvq> iC;~$?lY9*}21o`&Q&d5dJ`OGd0AQ2<4I8r{4xbo4h!ooZ diff --git a/tests/data/acpi/x86/pc/DSDT.nohpet b/tests/data/acpi/x86/pc/DSDT.nohpet index b081030f0ed171e52b13e28cfdc8770a04c2806e..9d6040733f57f243056ee8f7601af84dd3cf2fa2 100644 GIT binary patch delta 125 zcmX@$_{fpVCDP99Eo$M|5U_~<6z$!B=t7S?YsTctF?l_&shRoY7=0Fyh5%<(CD0CDAX8JH%| zmegSKGM#)>(u~QKWAa}~Q!(?&G5RbZ4FS%c1_42?!Cn7P1y_@g_{(C?mQ# IQOb!80A}|c$^ZZW delta 76 zcmX@=xY&`)CDu1FqxnWIc1ccuyZB(I_~<5&$x@PWj2e?GB^8($m?tlg)L`;8 go_tEujLn11IXJ{|a*vcA6GOseUn%L$c~aqQ02K=qHUIzs diff --git a/tests/data/acpi/x86/q35/DSDT.acpierst b/tests/data/acpi/x86/q35/DSDT.acpierst index 46fd25400b7c00ee9149ddb64cb5d5bd73f6a82b..f91cbe55fcfeea319babf7c9a0c6a6ccdc3320d1 100644 GIT binary patch delta 108 zcmdnuc+HW^CDAX8JH%| zmegSKGM#)>(u~QKWAa}~Q!(?&G5RbZ4FS%c1_42?!Cn7P1y_@g_{(C?mQ# IQ7V`X09c|Ot^fc4 delta 76 zcmccSxW$pnCD2b8v{`AX8JH%| zmegSKw4Qua(vj1Z!#Oy_kuzF(vZ9Re*F7d%m@zG5llcgl%7&RtWN-8ihFi&0}slnu9 zIr)^NBa;W)WL_y#DXGaZ`Ya%|0nVNV0YR?8&H=7|j0If$36mXVL^tL{mgmx*-4n0ll#b2N?-QP+wC*Ut5AmlNuRb0VtDs86W}x1CyW` iC;~)9lgb%c21o`&Q&d5duNp1_0AQ1F8XL2b8cPRMBNdqd diff --git a/tests/data/acpi/x86/q35/DSDT.applesmc b/tests/data/acpi/x86/q35/DSDT.applesmc index 944209adeaa5bbb722431161c404cb51b8209993..a5d032b7d96113c9393036b2ba831adb6d584142 100644 GIT binary patch delta 108 zcmccU_}P)mCDAX8JH%| zmegSKGM#)>(u~QKWAa}~Q!(?&G5RbZ4FS%c1_42?!Cn7P1y_@g_{(C?mQ# IQL2Cq0GY8KasU7T delta 76 zcmezDc+ru|CDQ`zV&nCD4FZB(gTYEC8%jyDEo3d=;!T*mQATug IqEx&H0I`T3t^fc4 delta 76 zcmaD9dmxs}CD>Gcqtu zo+GKj)E>0?nB-%qo delta 71 zcmV-N0J#5?W$|PRL{mgmaWVh^0k5$N2N?>DTLE8FLSI{gNRt~GUI8eRdKn-B00Wbu d87KmZP_xS!vjPH029tmsHIvL78nf&irxpI;78U>i diff --git a/tests/data/acpi/x86/q35/DSDT.core-count2 b/tests/data/acpi/x86/q35/DSDT.core-count2 index 3a0cb8c581c8cc630a2ec21712b7f8b75fcad1c8..d29a7108f82110ce9f9b4e006501215d41c5420a 100644 GIT binary patch delta 126 zcmaFW&eYSv#N`s~62jBMz`*FVk&9iDtD}@HKG-Qfy2*F4v}7Ek?&K;-1$O6vpkPJ@ zrpa?8HJBQ*HXoCW=izkaa1IV}>&YlJVL9W4I aos*;0q}dj-7I5(llNU;A zFg0gvJ}nu~!{os>Sx3!ON@{Y9J_|^3fU~DTK#*&&bAYQKV*wX`!sJ8^(ames;$s1w CTO4Nq diff --git a/tests/data/acpi/x86/q35/DSDT.cphp b/tests/data/acpi/x86/q35/DSDT.cphp index 20955d0aa30120553da35d5a6640055d26255cf9..7fd59bf6702c04a622f05ae356a2ea37312ab403 100644 GIT binary patch delta 108 zcmezDGS!vKCDQ!(?&G5RbZ4FS%c1_42?!CzZ%vD0Bsc$ diff --git a/tests/data/acpi/x86/q35/DSDT.cxl b/tests/data/acpi/x86/q35/DSDT.cxl index 3c34d4dcab16783abe65f6fa5e64eb69d40795fb..613a40b957dab99fd97b465bd89f6afac3a5b047 100644 GIT binary patch delta 108 zcmcbW_9uAX8JH%| zmegSKGM#)>(u~QKWAa}~Q!(?&G5RbZ4FS%c1_42?!Cn7P1y_@g_{(C?mQ# IQR=4z0JAtBNdN!< delta 76 zcmey9b}Nm`CDAX8JH%o zm(*Z#cAtDxGLp@e!#Oy_aq>bbJxhj!1?H1u^jSdq0-QY!0)kwF!8#U5VG6tYF&1#~ QCoE(w;NsofBsHA}0HD?-HUIzs delta 91 zcmdn(H`9;HCDNE4v?QlTKzy)Md~}n?WIf3^Mvck6k_t==%#*iBYB0IE tPQE7@$>hN{*;2|>QfhLHJ_|@~fU~DTK#*&&bAYQKV*wZc<|R_oc>rS!7_9&R diff --git a/tests/data/acpi/x86/q35/DSDT.ipmibt b/tests/data/acpi/x86/q35/DSDT.ipmibt index 45f911ada5645f158f3d6c0c430ec1d52cadc5d8..25f43ae8efb55364a739e6b5e3cb4e71e61862b0 100644 GIT binary patch delta 108 zcmaFoIKhd_CDAX8JH%| zmegSKGM#)>(u~QKWAa}~Q!(?&G5RbZ4FS%c1_42?!Cn7P1y_@g_{(C?mQ# IQL2{>06xVXs{jB1 delta 76 zcmbQ>^v;pXCD`WA;Wac1ccuyZB(I_~<5&$x@PWj2e?GB^8($m?tlg)L`;8 go_tEujLn11IXJ{|a*vcA6GOseUn%L$c~Xnm06Z5J7XSbN diff --git a/tests/data/acpi/x86/q35/DSDT.ipmismbus b/tests/data/acpi/x86/q35/DSDT.ipmismbus index e5d6811bee1233d74236453c49060390d74d4416..32bcd25bda9e9d2775790385f8da6a11e9d5cb46 100644 GIT binary patch delta 108 zcmezEIM<2GCD02)FaPyhe` delta 76 zcmV-S0JHy{Li<4qL{mgm`yc=S0o<_)2N?-ZK3`KpUt5AmlNuRb0VtDs86W}x1CyW` iC<0A0lgb%321o`&Q&d5dj2bQi0AQ0%8XL228m|Uoloh-H diff --git a/tests/data/acpi/x86/q35/DSDT.ivrs b/tests/data/acpi/x86/q35/DSDT.ivrs index 46fd25400b7c00ee9149ddb64cb5d5bd73f6a82b..f91cbe55fcfeea319babf7c9a0c6a6ccdc3320d1 100644 GIT binary patch delta 108 zcmdnuc+HW^CDAX8JH%| zmegSKGM#)>(u~QKWAa}~Q!(?&G5RbZ4FS%c1_42?!Cn7P1y_@g_{(C?mQ# IQ7V`X09c|Ot^fc4 delta 76 zcmccSxW$pnCD2b8v{`50iyCkQ(V|=hvd~}oVWGTrwM%~Gkk_zn30YSlx3`~<} zOKLDVm`y$^X~yKrG5N2gshIiX7=0Fyh5%66_M9ugbu{sJ)SkU6Rw?F+SKSKDxTqweHNNd4FZB(gTYEC8%jyDEo3d=;!T*mQATug IqLjHL0EhJ+B>(^b delta 76 zcmdm$J|msWCDNGPy72LuH(GB8c{ zlh9!DGM!u@VaDXjF?oZ8shRoY7=0Fyh5%u-bvkCDnCDoVSr39y=V|=hvd~}oVAX8JH&X zN@_59nNBvBG-Gn*n4B+ZYGyt;MxOBc1ccm$M|5U_~<6z$x@PWjJlI6B^B761A>AX8JH%| zmegQ!Fq?c-(u~QKWAa}~Q!(?&G5RbZ4FS%c1_42?!Cn7P1y_@g_{(C?mQ# IQObi207+UMdH?_b delta 76 zcmX@@xYCi!CD2b8v{`TqweHNNdUP0Qtck6951J delta 76 zcmZ4G)ZxVC66_Mfp~%3%_;Mo`yCkQ-U3{=pd~}n?WGTrwMvcjpk_t==%##;LYB2d4 gPd+7S#^%B1930{}xkpNmi6LRKuaxxWJgGfw010Fie*gdg diff --git a/tests/data/acpi/x86/q35/DSDT.thread-count b/tests/data/acpi/x86/q35/DSDT.thread-count index a24b04cbdbf09383b933a42a2a15182545543a87..08f5d5f54bcb61235b98fc85bb814046dd038c13 100644 GIT binary patch delta 110 zcmeyEGC7sYCD>Gcqtu zo+GKj)E>0?nB-%qo delta 71 zcmV-N0J#5?W$|PRL{mgmaWVh^0k5$N2N?>DTLE8FLSI{gNRt~GUI8eRdKn-B00Wbu d87KmZP_xS!vjPH029tmsHIvL78nf&irxpI;78U>i diff --git a/tests/data/acpi/x86/q35/DSDT.thread-count2 b/tests/data/acpi/x86/q35/DSDT.thread-count2 index 3a0cb8c581c8cc630a2ec21712b7f8b75fcad1c8..d29a7108f82110ce9f9b4e006501215d41c5420a 100644 GIT binary patch delta 126 zcmaFW&eYSv#N`s~62jBMz`*FVk&9iDtD}@HKG-Qfy2*F4v}7Ek?&K;-1$O6vpkPJ@ zrpa?8HJBQ*HXoCW=izkaa1IV}>&YlJVL9W4I aos*;0q}dj-7I5(llNU;A zFg0gvJ}nu~!{os>Sx3!ON@{Y9J_|^3fU~DTK#*&&bAYQKV*wX`!sJ8^(ames;$s1w CTO4Nq diff --git a/tests/data/acpi/x86/q35/DSDT.tis.tpm12 b/tests/data/acpi/x86/q35/DSDT.tis.tpm12 index e381ce4cbf2b11f56a2d0537db4d21acc97450c9..29a416f0508655d2bfde01fff4d25ad7f89581d9 100644 GIT binary patch delta 108 zcmZp4TkOW=66_M9tjxf`xML$1yCkQhV|=hvd~}oVWGTrwM%~Gkk_zn30YSlx3`~<} zOKLEAnNB_`X~yKrG5N2gshIiX7=0Fyh5%TqsHV)Nd+bb=E(~rHJE&j gC!dluWAk8h4i0gg+#{vO#E>xAS4w(wp43(j0Oqn2?EnA( diff --git a/tests/data/acpi/x86/q35/DSDT.tis.tpm2 b/tests/data/acpi/x86/q35/DSDT.tis.tpm2 index a09253042ce4a715922027245de8a2ab7449c5b7..59288f02c43cf2efc1555599131fde05dbbaa1cd 100644 GIT binary patch delta 108 zcmbR3w%3izCD0PFk|4*&oF diff --git a/tests/data/acpi/x86/q35/DSDT.type4-count b/tests/data/acpi/x86/q35/DSDT.type4-count index edc23198cdb47a981bcbc82bc8e392b815abb554..eaca76e8e61eb62f75dbdf093e803eea34330deb 100644 GIT binary patch delta 135 zcmbO`k#YY-MlP3NmyrD)3=E928@bpexw;Q9#RogZM>qLSmX?fT)SX-*k8bjqEG-$ws4=-pQh|wqdGbO@ z4W_oun@>xgW@Yzaa}Ex1 J&CCjWGytMf9?Ad! diff --git a/tests/data/acpi/x86/q35/DSDT.viot b/tests/data/acpi/x86/q35/DSDT.viot index 4c93dfd5c4b362714d3f9aa606a838d4625b3369..de0942a13dc65b75e78e66ee8df904b31bf6079a 100644 GIT binary patch delta 108 zcmbPIw5^ECCDQ1hbRA6@w2nuFo zV46HfQiG}I@#bTar+L|2Ih=z-94Be$Su!LnFrOTw&jQjH;OuD-5ab#R*0DeeQ`psy Tv4D#|VIgY)7w_hW>PwRWojNEz delta 107 zcmZ26ovCd)6PHV{OGsNc0|VpRja=-KTupD;;)9*yqnkV?OH0NvYD}(@RA6FYp1e>} zgQ@@F=F^g=dD%VKoP$FgIVU%2h)=fC5R+j@SRgeyMxO1bj(qsVVzaPi| diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 512d40665d..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,42 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/x86/pc/DSDT", -"tests/data/acpi/x86/pc/DSDT.acpierst", -"tests/data/acpi/x86/pc/DSDT.acpihmat", -"tests/data/acpi/x86/pc/DSDT.bridge", -"tests/data/acpi/x86/pc/DSDT.cphp", -"tests/data/acpi/x86/pc/DSDT.dimmpxm", -"tests/data/acpi/x86/pc/DSDT.hpbridge", -"tests/data/acpi/x86/pc/DSDT.hpbrroot", -"tests/data/acpi/x86/pc/DSDT.ipmikcs", -"tests/data/acpi/x86/pc/DSDT.memhp", -"tests/data/acpi/x86/pc/DSDT.nohpet", -"tests/data/acpi/x86/pc/DSDT.numamem", -"tests/data/acpi/x86/pc/DSDT.roothp", -"tests/data/acpi/x86/q35/DSDT", -"tests/data/acpi/x86/q35/DSDT.acpierst", -"tests/data/acpi/x86/q35/DSDT.acpihmat", -"tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator", -"tests/data/acpi/x86/q35/DSDT.applesmc", -"tests/data/acpi/x86/q35/DSDT.bridge", -"tests/data/acpi/x86/q35/DSDT.core-count", -"tests/data/acpi/x86/q35/DSDT.core-count2", -"tests/data/acpi/x86/q35/DSDT.cphp", -"tests/data/acpi/x86/q35/DSDT.cxl", -"tests/data/acpi/x86/q35/DSDT.dimmpxm", -"tests/data/acpi/x86/q35/DSDT.ipmibt", -"tests/data/acpi/x86/q35/DSDT.ipmismbus", -"tests/data/acpi/x86/q35/DSDT.ivrs", -"tests/data/acpi/x86/q35/DSDT.memhp", -"tests/data/acpi/x86/q35/DSDT.mmio64", -"tests/data/acpi/x86/q35/DSDT.multi-bridge", -"tests/data/acpi/x86/q35/DSDT.noacpihp", -"tests/data/acpi/x86/q35/DSDT.nohpet", -"tests/data/acpi/x86/q35/DSDT.numamem", -"tests/data/acpi/x86/q35/DSDT.pvpanic-isa", -"tests/data/acpi/x86/q35/DSDT.thread-count", -"tests/data/acpi/x86/q35/DSDT.thread-count2", -"tests/data/acpi/x86/q35/DSDT.tis.tpm12", -"tests/data/acpi/x86/q35/DSDT.tis.tpm2", -"tests/data/acpi/x86/q35/DSDT.type4-count", -"tests/data/acpi/x86/q35/DSDT.viot", -"tests/data/acpi/x86/q35/DSDT.xapic", From 65fb66980d3a918ebe1e665cf6ae4ceb8dea2db1 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Sun, 3 Nov 2024 10:24:19 +0000 Subject: [PATCH 62/65] hw/acpi: Update GED with vCPU Hotplug VMSD for migration The ACPI CPU hotplug states must be migrated along with other vCPU hotplug states to the destination VM. Update the GED's VM State Description (VMSD) table subsection to conditionally include the CPU Hotplug VM State Description (VMSD). Excerpt of GED VMSD State Dump at Source: "acpi-ged (16)": { "ged_state": { "sel": "0x00000000" }, [...] "acpi-ged/cpuhp": { "cpuhp_state": { "selector": "0x00000005", "command": "0x00", "devs": [ { "is_inserting": false, "is_removing": false, "ost_event": "0x00000000", "ost_status": "0x00000000" }, [...] { "is_inserting": false, "is_removing": false, "ost_event": "0x00000000", "ost_status": "0x00000000" } ] } } }, Signed-off-by: Salil Mehta Message-Id: <20241103102419.202225-6-salil.mehta@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/generic_event_device.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index d00f5a6c1c..663d9cb093 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -331,6 +331,24 @@ static const VMStateDescription vmstate_memhp_state = { } }; +static bool cpuhp_needed(void *opaque) +{ + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + return mc->has_hotpluggable_cpus; +} + +static const VMStateDescription vmstate_cpuhp_state = { + .name = "acpi-ged/cpuhp", + .version_id = 1, + .minimum_version_id = 1, + .needed = cpuhp_needed, + .fields = (VMStateField[]) { + VMSTATE_CPU_HOTPLUG(cpuhp_state, AcpiGedState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_ged_state = { .name = "acpi-ged-state", .version_id = 1, @@ -379,6 +397,7 @@ static const VMStateDescription vmstate_acpi_ged = { }, .subsections = (const VMStateDescription * const []) { &vmstate_memhp_state, + &vmstate_cpuhp_state, &vmstate_ghes_state, NULL } From e70e83f561c45864eeb0945ae0298caa595262d2 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 4 Nov 2024 20:55:34 +0800 Subject: [PATCH 63/65] intel_iommu: Send IQE event when setting reserved bit in IQT_TAIL According to VTD spec, Figure 11-22, Invalidation Queue Tail Register, "When Descriptor Width (DW) field in Invalidation Queue Address Register (IQA_REG) is Set (256-bit descriptors), hardware treats bit-4 as reserved and a value of 1 in the bit will result in invalidation queue error." Current code missed to send IQE event to guest, fix it. Fixes: c0c1d351849b ("intel_iommu: add 256 bits qi_desc support") Suggested-by: Yi Liu Signed-off-by: Zhenzhong Duan Message-Id: <20241104125536.1236118-2-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 8612d0917b..1ecfe47963 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2847,6 +2847,7 @@ static void vtd_handle_iqt_write(IntelIOMMUState *s) if (s->iq_dw && (val & VTD_IQT_QT_256_RSV_BIT)) { error_report_once("%s: RSV bit is set: val=0x%"PRIx64, __func__, val); + vtd_handle_inv_queue_error(s); return; } s->iq_tail = VTD_IQT_QT(s->iq_dw, val); From 8e761fb61cafa95f4f41acaf8e86fae7e898b555 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 4 Nov 2024 20:55:35 +0800 Subject: [PATCH 64/65] intel_iommu: Add missed sanity check for 256-bit invalidation queue According to VTD spec, a 256-bit descriptor will result in an invalid descriptor error if submitted in an IQ that is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0). Meanwhile, there are old inv desc types (e.g. iotlb_inv_desc) that can be either 128bits or 256bits. If a 128-bit version of this descriptor is submitted into an IQ that is setup to provide hardware with 256-bit descriptors will also result in an invalid descriptor error. The 2nd will be captured by the tail register update. So we only need to focus on the 1st. Because the reserved bit check between different types of invalidation desc are common, so introduce a common function vtd_inv_desc_reserved_check() to do all the checks and pass the differences as parameters. With this change, need to replace error_report_once() call with error_report() to catch different call sites. This isn't an issue as error_report_once() here is mainly used to help debug guest error, but it only dumps once in qemu life cycle and doesn't help much, we need error_report() instead. Fixes: c0c1d351849b ("intel_iommu: add 256 bits qi_desc support") Suggested-by: Yi Liu Signed-off-by: Zhenzhong Duan Message-Id: <20241104125536.1236118-3-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 80 ++++++++++++++++++++++++---------- hw/i386/intel_iommu_internal.h | 1 + 2 files changed, 59 insertions(+), 22 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 1ecfe47963..2fc3866433 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2532,15 +2532,51 @@ static bool vtd_get_inv_desc(IntelIOMMUState *s, return true; } +static bool vtd_inv_desc_reserved_check(IntelIOMMUState *s, + VTDInvDesc *inv_desc, + uint64_t mask[4], bool dw, + const char *func_name, + const char *desc_type) +{ + if (s->iq_dw) { + if (inv_desc->val[0] & mask[0] || inv_desc->val[1] & mask[1] || + inv_desc->val[2] & mask[2] || inv_desc->val[3] & mask[3]) { + error_report("%s: invalid %s desc val[3]: 0x%"PRIx64 + " val[2]: 0x%"PRIx64" val[1]=0x%"PRIx64 + " val[0]=0x%"PRIx64" (reserved nonzero)", + func_name, desc_type, inv_desc->val[3], + inv_desc->val[2], inv_desc->val[1], + inv_desc->val[0]); + return false; + } + } else { + if (dw) { + error_report("%s: 256-bit %s desc in 128-bit invalidation queue", + func_name, desc_type); + return false; + } + + if (inv_desc->lo & mask[0] || inv_desc->hi & mask[1]) { + error_report("%s: invalid %s desc: hi=%"PRIx64", lo=%"PRIx64 + " (reserved nonzero)", func_name, desc_type, + inv_desc->hi, inv_desc->lo); + return false; + } + } + + return true; +} + static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { - if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) || - (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) { - error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64 - " (reserved nonzero)", __func__, inv_desc->hi, - inv_desc->lo); + uint64_t mask[4] = {VTD_INV_DESC_WAIT_RSVD_LO, VTD_INV_DESC_WAIT_RSVD_HI, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false, + __func__, "wait")) { return false; } + if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) { /* Status Write */ uint32_t status_data = (uint32_t)(inv_desc->lo >> @@ -2574,13 +2610,14 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { uint16_t sid, fmask; + uint64_t mask[4] = {VTD_INV_DESC_CC_RSVD, VTD_INV_DESC_ALL_ONE, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; - if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) { - error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64 - " (reserved nonzero)", __func__, inv_desc->hi, - inv_desc->lo); + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false, + __func__, "cc inv")) { return false; } + switch (inv_desc->lo & VTD_INV_DESC_CC_G) { case VTD_INV_DESC_CC_DOMAIN: trace_vtd_inv_desc_cc_domain( @@ -2610,12 +2647,11 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) uint16_t domain_id; uint8_t am; hwaddr addr; + uint64_t mask[4] = {VTD_INV_DESC_IOTLB_RSVD_LO, VTD_INV_DESC_IOTLB_RSVD_HI, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; - if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) || - (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) { - error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64 - ", lo=0x%"PRIx64" (reserved bits unzero)", - __func__, inv_desc->hi, inv_desc->lo); + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false, + __func__, "iotlb inv")) { return false; } @@ -2705,19 +2741,19 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, hwaddr addr; uint16_t sid; bool size; + uint64_t mask[4] = {VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO, + VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false, + __func__, "dev-iotlb inv")) { + return false; + } addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi); sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo); size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi); - if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) || - (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) { - error_report_once("%s: invalid dev-iotlb inv desc: hi=%"PRIx64 - ", lo=%"PRIx64" (reserved nonzero)", __func__, - inv_desc->hi, inv_desc->lo); - return false; - } - /* * Using sid is OK since the guest should have finished the * initialization of both the bus and device. diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 2f9bc0147d..75ccd501b0 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -356,6 +356,7 @@ union VTDInvDesc { typedef union VTDInvDesc VTDInvDesc; /* Masks for struct VTDInvDesc */ +#define VTD_INV_DESC_ALL_ONE -1ULL #define VTD_INV_DESC_TYPE(val) ((((val) >> 5) & 0x70ULL) | \ ((val) & 0xfULL)) #define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */ From 096d96e7be7071aa805c4e70ef51da0b99b6a8fc Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 4 Nov 2024 20:55:36 +0800 Subject: [PATCH 65/65] intel_iommu: Add missed reserved bit check for IEC descriptor IEC descriptor is 128-bit invalidation descriptor, must be padded with 128-bits of 0s in the upper bytes to create a 256-bit descriptor when the invalidation queue is configured for 256-bit descriptors (IQA_REG.DW=1). Fixes: 02a2cbc872df ("x86-iommu: introduce IEC notifiers") Signed-off-by: Zhenzhong Duan Message-Id: <20241104125536.1236118-4-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 8 ++++++++ hw/i386/intel_iommu_internal.h | 3 +++ 2 files changed, 11 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 2fc3866433..4c0d1d7d47 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2692,6 +2692,14 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { + uint64_t mask[4] = {VTD_INV_DESC_IEC_RSVD, VTD_INV_DESC_ALL_ONE, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false, + __func__, "iec inv")) { + return false; + } + trace_vtd_inv_desc_iec(inv_desc->iec.granularity, inv_desc->iec.index, inv_desc->iec.index_mask); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 75ccd501b0..4323fc5d6d 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -410,6 +410,9 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0f1f0 +/* Masks for Interrupt Entry Invalidate Descriptor */ +#define VTD_INV_DESC_IEC_RSVD 0xffff000007fff1e0ULL + /* Rsvd field masks for spte */ #define VTD_SPTE_SNP 0x800ULL