From 44d3d8981402ce7eef435dd912f21fb84f1f7c4d Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 27 Oct 2021 11:10:12 -0400 Subject: [PATCH 1/9] qtest: fix 'expression is always false' build failure in qtest_has_accel() If KVM is disabled or not present, qtest library build may fail with: libqtest.c: In function 'qtest_has_accel': comparison of unsigned expression < 0 is always false [-Werror=type-limits] for (i = 0; i < ARRAY_SIZE(targets); i++) { due to empty 'targets' array. Fix it by making sure that CONFIG_KVM_TARGETS isn't empty. Fixes: e741aff0f43343 ("tests: qtest: add qtest_has_accel() to check if tested binary supports accelerator") Reported-by: Jason Andryuk Suggested-by: "Michael S. Tsirkin" Signed-off-by: Igor Mammedov Message-Id: <20211027151012.2639284-1-imammedo@redhat.com> Tested-by: Jason Andryuk Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index b092728397..ab4a5723f0 100644 --- a/meson.build +++ b/meson.build @@ -75,7 +75,7 @@ else kvm_targets = [] endif -kvm_targets_c = '' +kvm_targets_c = '""' if not get_option('kvm').disabled() and targetos == 'linux' kvm_targets_c = '"' + '" ,"'.join(kvm_targets) + '"' endif From e1c1915befbd2a991b20812eef6ad650b5637a36 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 27 Oct 2021 15:03:24 +0200 Subject: [PATCH 2/9] vhost-vdpa: Set discarding of RAM broken when initializing the backend Similar to VFIO, vDPA will go ahead an map+pin all guest memory. Memory that used to be discarded will get re-populated and if we discard+re-access memory after mapping+pinning, the pages mapped into the vDPA IOMMU will go out of sync with the actual pages mapped into the user space page tables. Set discarding of RAM broken such that: - virtio-mem and vhost-vdpa run mutually exclusive - virtio-balloon is inhibited and no memory discards will get issued In the future, we might be able to support coordinated discarding of RAM as used by virtio-mem and already supported by vfio via the RamDiscardManager. Acked-by: Jason Wang Cc: Jason Wang Cc: Michael S. Tsirkin Cc: Cindy Lu Signed-off-by: David Hildenbrand Message-Id: <20211027130324.59791-1-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- hw/virtio/vhost-vdpa.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 12661fd5b1..0d8051426c 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -331,6 +331,17 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) struct vhost_vdpa *v; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); trace_vhost_vdpa_init(dev, opaque); + int ret; + + /* + * Similar to VFIO, we end up pinning all guest memory and have to + * disable discarding of RAM. + */ + ret = ram_block_discard_disable(true); + if (ret) { + error_report("Cannot set discarding of RAM broken"); + return ret; + } v = opaque; v->dev = dev; @@ -442,6 +453,8 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev) memory_listener_unregister(&v->listener); dev->opaque = NULL; + ram_block_discard_disable(false); + return 0; } From 1f85d74ac542d57559babbe0525633837e9fd52f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 26 Oct 2021 19:20:20 +0100 Subject: [PATCH 3/9] hw/acpi: Add VIOT table Add a function that generates a Virtual I/O Translation table (VIOT), describing the topology of paravirtual IOMMUs. The table is created if a virtio-iommu device is present. It contains a virtio-iommu node and PCI Range nodes for endpoints managed by the IOMMU. By default, a single node describes all PCI devices. When passing the "default_bus_bypass_iommu" machine option and "bypass_iommu" PXB option, only buses that do not bypass the IOMMU are described by PCI Range nodes. Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Message-Id: <20211026182024.2642038-2-jean-philippe@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/Kconfig | 4 ++ hw/acpi/meson.build | 1 + hw/acpi/viot.c | 114 ++++++++++++++++++++++++++++++++++++++++++++ hw/acpi/viot.h | 13 +++++ 4 files changed, 132 insertions(+) create mode 100644 hw/acpi/viot.c create mode 100644 hw/acpi/viot.h diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index 3b5e118c54..622b0b50b7 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -51,6 +51,10 @@ config ACPI_VMGENID default y depends on PC +config ACPI_VIOT + bool + depends on ACPI + config ACPI_HW_REDUCED bool select ACPI diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index 7d8c0eb43e..adf6347bc4 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -20,6 +20,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false: files( acpi_ss.add(when: 'CONFIG_ACPI_PIIX4', if_true: files('piix4.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_true: files('pcihp.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_false: files('acpi-pci-hotplug-stub.c')) +acpi_ss.add(when: 'CONFIG_ACPI_VIOT', if_true: files('viot.c')) acpi_ss.add(when: 'CONFIG_ACPI_X86_ICH', if_true: files('ich9.c', 'tco.c')) acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c')) acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c')) diff --git a/hw/acpi/viot.c b/hw/acpi/viot.c new file mode 100644 index 0000000000..c1af75206e --- /dev/null +++ b/hw/acpi/viot.c @@ -0,0 +1,114 @@ +/* + * ACPI Virtual I/O Translation table implementation + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/viot.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" + +struct viot_pci_ranges { + GArray *blob; + size_t count; + uint16_t output_node; +}; + +/* Build PCI range for a given PCI host bridge */ +static int build_pci_range_node(Object *obj, void *opaque) +{ + struct viot_pci_ranges *pci_ranges = opaque; + GArray *blob = pci_ranges->blob; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus && !pci_bus_bypass_iommu(bus)) { + int min_bus, max_bus; + + pci_bus_range(bus, &min_bus, &max_bus); + + /* Type */ + build_append_int_noprefix(blob, 1 /* PCI range */, 1); + /* Reserved */ + build_append_int_noprefix(blob, 0, 1); + /* Length */ + build_append_int_noprefix(blob, 24, 2); + /* Endpoint start */ + build_append_int_noprefix(blob, PCI_BUILD_BDF(min_bus, 0), 4); + /* PCI Segment start */ + build_append_int_noprefix(blob, 0, 2); + /* PCI Segment end */ + build_append_int_noprefix(blob, 0, 2); + /* PCI BDF start */ + build_append_int_noprefix(blob, PCI_BUILD_BDF(min_bus, 0), 2); + /* PCI BDF end */ + build_append_int_noprefix(blob, PCI_BUILD_BDF(max_bus, 0xff), 2); + /* Output node */ + build_append_int_noprefix(blob, pci_ranges->output_node, 2); + /* Reserved */ + build_append_int_noprefix(blob, 0, 6); + + pci_ranges->count++; + } + } + + return 0; +} + +/* + * Generate a VIOT table with one PCI-based virtio-iommu that manages PCI + * endpoints. + * + * Defined in the ACPI Specification (Version TBD) + */ +void build_viot(MachineState *ms, GArray *table_data, BIOSLinker *linker, + uint16_t virtio_iommu_bdf, const char *oem_id, + const char *oem_table_id) +{ + /* The virtio-iommu node follows the 48-bytes header */ + int viommu_off = 48; + AcpiTable table = { .sig = "VIOT", .rev = 0, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + struct viot_pci_ranges pci_ranges = { + .output_node = viommu_off, + .blob = g_array_new(false, true /* clear */, 1), + }; + + /* Build the list of PCI ranges that this viommu manages */ + object_child_foreach_recursive(OBJECT(ms), build_pci_range_node, + &pci_ranges); + + /* ACPI table header */ + acpi_table_begin(&table, table_data); + /* Node count */ + build_append_int_noprefix(table_data, pci_ranges.count + 1, 2); + /* Node offset */ + build_append_int_noprefix(table_data, viommu_off, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 8); + + /* Virtio-iommu node */ + /* Type */ + build_append_int_noprefix(table_data, 3 /* virtio-pci IOMMU */, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); + /* Length */ + build_append_int_noprefix(table_data, 16, 2); + /* PCI Segment */ + build_append_int_noprefix(table_data, 0, 2); + /* PCI BDF number */ + build_append_int_noprefix(table_data, virtio_iommu_bdf, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 8); + + /* PCI ranges found above */ + g_array_append_vals(table_data, pci_ranges.blob->data, + pci_ranges.blob->len); + g_array_free(pci_ranges.blob, true); + + acpi_table_end(linker, &table); +} + diff --git a/hw/acpi/viot.h b/hw/acpi/viot.h new file mode 100644 index 0000000000..9fe565bb87 --- /dev/null +++ b/hw/acpi/viot.h @@ -0,0 +1,13 @@ +/* + * ACPI Virtual I/O Translation Table implementation + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef VIOT_H +#define VIOT_H + +void build_viot(MachineState *ms, GArray *table_data, BIOSLinker *linker, + uint16_t virtio_iommu_bdf, const char *oem_id, + const char *oem_table_id); + +#endif /* VIOT_H */ From 867e9c9f4cbc152aee5ed6157257986ecad29096 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 26 Oct 2021 19:20:21 +0100 Subject: [PATCH 4/9] hw/i386/pc: Remove x86_iommu_get_type() To generate the IOMMU ACPI table, acpi-build.c can use base QEMU types instead of a special IommuType value. Reviewed-by: Eric Auger Reviewed-by: Igor Mammedov Signed-off-by: Jean-Philippe Brucker Message-Id: <20211026182024.2642038-3-jean-philippe@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 20 +++++++++----------- hw/i386/amd_iommu.c | 2 -- hw/i386/intel_iommu.c | 3 --- hw/i386/x86-iommu-stub.c | 5 ----- hw/i386/x86-iommu.c | 5 ----- include/hw/i386/x86-iommu.h | 12 ------------ 6 files changed, 9 insertions(+), 38 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 81418b7911..ab49e799ff 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2488,6 +2488,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(machine); + X86IOMMUState *iommu = x86_iommu_get_default(); GArray *table_offsets; unsigned facs, dsdt, rsdt, fadt; AcpiPmInfo pm; @@ -2604,17 +2605,14 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) build_mcfg(tables_blob, tables->linker, &mcfg, x86ms->oem_id, x86ms->oem_table_id); } - if (x86_iommu_get_default()) { - IommuType IOMMUType = x86_iommu_get_type(); - if (IOMMUType == TYPE_AMD) { - acpi_add_table(table_offsets, tables_blob); - build_amd_iommu(tables_blob, tables->linker, x86ms->oem_id, - x86ms->oem_table_id); - } else if (IOMMUType == TYPE_INTEL) { - acpi_add_table(table_offsets, tables_blob); - build_dmar_q35(tables_blob, tables->linker, x86ms->oem_id, - x86ms->oem_table_id); - } + if (object_dynamic_cast(OBJECT(iommu), TYPE_AMD_IOMMU_DEVICE)) { + acpi_add_table(table_offsets, tables_blob); + build_amd_iommu(tables_blob, tables->linker, x86ms->oem_id, + x86ms->oem_table_id); + } else if (object_dynamic_cast(OBJECT(iommu), TYPE_INTEL_IOMMU_DEVICE)) { + acpi_add_table(table_offsets, tables_blob); + build_dmar_q35(tables_blob, tables->linker, x86ms->oem_id, + x86ms->oem_table_id); } if (machine->nvdimms_state->is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 9242a0d3ed..91fe34ae58 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1538,7 +1538,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) { int ret = 0; AMDVIState *s = AMD_IOMMU_DEVICE(dev); - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); MachineState *ms = MACHINE(qdev_get_machine()); PCMachineState *pcms = PC_MACHINE(ms); X86MachineState *x86ms = X86_MACHINE(ms); @@ -1548,7 +1547,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) amdvi_uint64_equal, g_free, g_free); /* This device should take care of IOMMU PCI properties */ - x86_iommu->type = TYPE_AMD; if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { return; } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 75f075547f..c27b20090e 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3806,9 +3806,6 @@ static void vtd_realize(DeviceState *dev, Error **errp) X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); - - x86_iommu->type = TYPE_INTEL; if (!vtd_decide_config(s, errp)) { return; diff --git a/hw/i386/x86-iommu-stub.c b/hw/i386/x86-iommu-stub.c index c5ba077f9d..781b5ff922 100644 --- a/hw/i386/x86-iommu-stub.c +++ b/hw/i386/x86-iommu-stub.c @@ -36,8 +36,3 @@ bool x86_iommu_ir_supported(X86IOMMUState *s) { return false; } - -IommuType x86_iommu_get_type(void) -{ - abort(); -} diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 86ad03972e..dc968c7a53 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -98,11 +98,6 @@ X86IOMMUState *x86_iommu_get_default(void) return x86_iommu_default; } -IommuType x86_iommu_get_type(void) -{ - return x86_iommu_default->type; -} - static void x86_iommu_realize(DeviceState *dev, Error **errp) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 9de92d33a1..5ba0c056d6 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -33,12 +33,6 @@ OBJECT_DECLARE_TYPE(X86IOMMUState, X86IOMMUClass, X86_IOMMU_DEVICE) typedef struct X86IOMMUIrq X86IOMMUIrq; typedef struct X86IOMMU_MSIMessage X86IOMMU_MSIMessage; -typedef enum IommuType { - TYPE_INTEL, - TYPE_AMD, - TYPE_NONE -} IommuType; - struct X86IOMMUClass { SysBusDeviceClass parent; /* Intel/AMD specific realize() hook */ @@ -71,7 +65,6 @@ struct X86IOMMUState { OnOffAuto intr_supported; /* Whether vIOMMU supports IR */ bool dt_supported; /* Whether vIOMMU supports DT */ bool pt_supported; /* Whether vIOMMU supports pass-through */ - IommuType type; /* IOMMU type - AMD/Intel */ QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ }; @@ -140,11 +133,6 @@ struct X86IOMMU_MSIMessage { */ X86IOMMUState *x86_iommu_get_default(void); -/* - * x86_iommu_get_type - get IOMMU type - */ -IommuType x86_iommu_get_type(void); - /** * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry * Cache) notifiers From 1b3bf13890fd849b2628ca8c059f8d63c74b9572 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 26 Oct 2021 19:20:22 +0100 Subject: [PATCH 5/9] hw/i386/pc: Move IOMMU singleton into PCMachineState We're about to support a third vIOMMU for x86, virtio-iommu which doesn't inherit X86IOMMUState. Move the IOMMU singleton into PCMachineState, so it can be shared between all three vIOMMUs. The x86_iommu_get_default() helper is still needed by KVM and IOAPIC to fetch the default IRQ-remapping IOMMU. Since virtio-iommu doesn't support IRQ remapping, this interface doesn't need to change for the moment. We could later replace X86IOMMUState with an "IRQ remapping IOMMU" interface if necessary. Reviewed-by: Eric Auger Reviewed-by: Igor Mammedov Signed-off-by: Jean-Philippe Brucker Message-Id: <20211026182024.2642038-4-jean-philippe@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 12 +++++++++++- hw/i386/x86-iommu.c | 28 +++++++++------------------- include/hw/i386/pc.h | 1 + 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 86223acfd3..7b1c4f41cd 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1330,6 +1330,15 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { pc_virtio_md_pci_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + + if (pcms->iommu) { + error_setg(errp, "QEMU does not support multiple vIOMMUs " + "for x86 yet."); + return; + } + pcms->iommu = dev; } } @@ -1384,7 +1393,8 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || object_dynamic_cast(OBJECT(dev), TYPE_CPU) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { return HOTPLUG_HANDLER(machine); } diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index dc968c7a53..01d11325a6 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -77,25 +77,17 @@ void x86_iommu_irq_to_msi_message(X86IOMMUIrq *irq, MSIMessage *msg_out) msg_out->data = msg.msi_data; } -/* Default X86 IOMMU device */ -static X86IOMMUState *x86_iommu_default = NULL; - -static void x86_iommu_set_default(X86IOMMUState *x86_iommu) -{ - assert(x86_iommu); - - if (x86_iommu_default) { - error_report("QEMU does not support multiple vIOMMUs " - "for x86 yet."); - exit(1); - } - - x86_iommu_default = x86_iommu; -} - X86IOMMUState *x86_iommu_get_default(void) { - return x86_iommu_default; + MachineState *ms = MACHINE(qdev_get_machine()); + PCMachineState *pcms = + PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); + + if (pcms && + object_dynamic_cast(OBJECT(pcms->iommu), TYPE_X86_IOMMU_DEVICE)) { + return X86_IOMMU_DEVICE(pcms->iommu); + } + return NULL; } static void x86_iommu_realize(DeviceState *dev, Error **errp) @@ -131,8 +123,6 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) if (x86_class->realize) { x86_class->realize(dev, errp); } - - x86_iommu_set_default(X86_IOMMU_DEVICE(dev)); } static Property x86_iommu_properties[] = { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 11426e26dc..b72e5bf9d1 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -35,6 +35,7 @@ typedef struct PCMachineState { I2CBus *smbus; PFlashCFI01 *flash[2]; ISADevice *pcspk; + DeviceState *iommu; /* Configuration options: */ uint64_t max_ram_below_4g; From 36efa250a4ff56a73a5a73540c5097ffd7a67170 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 26 Oct 2021 19:20:23 +0100 Subject: [PATCH 6/9] hw/i386/pc: Allow instantiating a virtio-iommu device Allow instantiating a virtio-iommu device by adding an ACPI Virtual I/O Translation table (VIOT), which describes the relation between the virtio-iommu and the endpoints it manages. Add a hotplug handler for virtio-iommu on x86 and set the necessary reserved region property. On x86, the [0xfee00000, 0xfeefffff] DMA region is reserved for MSIs. DMA transactions to this range either trigger IRQ remapping in the IOMMU or bypasses IOMMU translation. Although virtio-iommu does not support IRQ remapping it must be informed of the reserved region so that it can forward DMA transactions targeting this region. Reviewed-by: Eric Auger Reviewed-by: Igor Mammedov Tested-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Message-Id: <20211026182024.2642038-5-jean-philippe@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/Kconfig | 1 + hw/i386/acpi-build.c | 10 +++++++++- hw/i386/pc.c | 16 +++++++++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 962d2c981b..d22ac4a4b9 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -59,6 +59,7 @@ config PC_ACPI select ACPI_X86 select ACPI_CPU_HOTPLUG select ACPI_MEMORY_HOTPLUG + select ACPI_VIOT select SMBUS_EEPROM select PFLASH_CFI01 depends on ACPI_SMBUS diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index ab49e799ff..3ca6cc8118 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -68,9 +68,11 @@ #include "qom/qom-qobject.h" #include "hw/i386/amd_iommu.h" #include "hw/i386/intel_iommu.h" +#include "hw/virtio/virtio-iommu.h" #include "hw/acpi/ipmi.h" #include "hw/acpi/hmat.h" +#include "hw/acpi/viot.h" /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows @@ -2488,7 +2490,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(machine); - X86IOMMUState *iommu = x86_iommu_get_default(); + DeviceState *iommu = pcms->iommu; GArray *table_offsets; unsigned facs, dsdt, rsdt, fadt; AcpiPmInfo pm; @@ -2613,6 +2615,12 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); build_dmar_q35(tables_blob, tables->linker, x86ms->oem_id, x86ms->oem_table_id); + } else if (object_dynamic_cast(OBJECT(iommu), TYPE_VIRTIO_IOMMU_PCI)) { + PCIDevice *pdev = PCI_DEVICE(iommu); + + acpi_add_table(table_offsets, tables_blob); + build_viot(machine, tables_blob, tables->linker, pci_get_bdf(pdev), + x86ms->oem_id, x86ms->oem_table_id); } if (machine->nvdimms_state->is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7b1c4f41cd..e99017e662 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -83,6 +83,7 @@ #include "hw/i386/intel_iommu.h" #include "hw/net/ne2000-isa.h" #include "standard-headers/asm-x86/bootparam.h" +#include "hw/virtio/virtio-iommu.h" #include "hw/virtio/virtio-pmem-pci.h" #include "hw/virtio/virtio-mem-pci.h" #include "hw/mem/memory-device.h" @@ -1330,7 +1331,19 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { pc_virtio_md_pci_pre_plug(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { + /* Declare the APIC range as the reserved MSI region */ + char *resv_prop_str = g_strdup_printf("0xfee00000:0xfeefffff:%d", + VIRTIO_IOMMU_RESV_MEM_T_MSI); + + object_property_set_uint(OBJECT(dev), "len-reserved-regions", 1, errp); + object_property_set_str(OBJECT(dev), "reserved-regions[0]", + resv_prop_str, errp); + g_free(resv_prop_str); + } + + if (object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { PCMachineState *pcms = PC_MACHINE(hotplug_dev); if (pcms->iommu) { @@ -1394,6 +1407,7 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, object_dynamic_cast(OBJECT(dev), TYPE_CPU) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { return HOTPLUG_HANDLER(machine); } From b3dcf94f77fdf5bbea76d7101cc2a300d2550adb Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 28 Oct 2021 12:31:25 +0800 Subject: [PATCH 7/9] pci: Define pci_bus_dev_fn/pci_bus_fn/pci_bus_ret_fn They're used in quite a few places of pci.[ch] and also in the rest of the code base. Define them so that it doesn't need to be defined all over the places. The pci_bus_fn is similar to pci_bus_dev_fn that only takes a PCIBus* and an opaque. The pci_bus_ret_fn is similar to pci_bus_fn but it allows to return a void* pointer. Reviewed-by: David Hildenbrand Reviewed-by: Eric Auger Signed-off-by: Peter Xu Message-Id: <20211028043129.38871-2-peterx@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 20 ++++++-------------- include/hw/pci/pci.h | 19 +++++++++---------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 186758ee11..17e59cb3a3 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1655,9 +1655,7 @@ static const pci_class_desc pci_class_descriptions[] = }; static void pci_for_each_device_under_bus_reverse(PCIBus *bus, - void (*fn)(PCIBus *b, - PCIDevice *d, - void *opaque), + pci_bus_dev_fn fn, void *opaque) { PCIDevice *d; @@ -1672,8 +1670,7 @@ static void pci_for_each_device_under_bus_reverse(PCIBus *bus, } void pci_for_each_device_reverse(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), - void *opaque) + pci_bus_dev_fn fn, void *opaque) { bus = pci_find_bus_nr(bus, bus_num); @@ -1683,9 +1680,7 @@ void pci_for_each_device_reverse(PCIBus *bus, int bus_num, } static void pci_for_each_device_under_bus(PCIBus *bus, - void (*fn)(PCIBus *b, PCIDevice *d, - void *opaque), - void *opaque) + pci_bus_dev_fn fn, void *opaque) { PCIDevice *d; int devfn; @@ -1699,8 +1694,7 @@ static void pci_for_each_device_under_bus(PCIBus *bus, } void pci_for_each_device(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), - void *opaque) + pci_bus_dev_fn fn, void *opaque) { bus = pci_find_bus_nr(bus, bus_num); @@ -2078,10 +2072,8 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) return NULL; } -void pci_for_each_bus_depth_first(PCIBus *bus, - void *(*begin)(PCIBus *bus, void *parent_state), - void (*end)(PCIBus *bus, void *state), - void *parent_state) +void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin, + pci_bus_fn end, void *parent_state) { PCIBus *sec; void *state; diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 7fc90132cf..4a8740b76b 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -401,6 +401,10 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); OBJECT_DECLARE_TYPE(PCIBus, PCIBusClass, PCI_BUS) #define TYPE_PCIE_BUS "PCIE" +typedef void (*pci_bus_dev_fn)(PCIBus *b, PCIDevice *d, void *opaque); +typedef void (*pci_bus_fn)(PCIBus *b, void *opaque); +typedef void *(*pci_bus_ret_fn)(PCIBus *b, void *opaque); + bool pci_bus_is_express(PCIBus *bus); void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent, @@ -458,23 +462,18 @@ static inline int pci_dev_bus_num(const PCIDevice *dev) int pci_bus_numa_node(PCIBus *bus); void pci_for_each_device(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque), + pci_bus_dev_fn fn, void *opaque); void pci_for_each_device_reverse(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *bus, PCIDevice *d, - void *opaque), + pci_bus_dev_fn fn, void *opaque); -void pci_for_each_bus_depth_first(PCIBus *bus, - void *(*begin)(PCIBus *bus, void *parent_state), - void (*end)(PCIBus *bus, void *state), - void *parent_state); +void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin, + pci_bus_fn end, void *parent_state); PCIDevice *pci_get_function_0(PCIDevice *pci_dev); /* Use this wrapper when specific scan order is not required. */ static inline -void pci_for_each_bus(PCIBus *bus, - void (*fn)(PCIBus *bus, void *opaque), - void *opaque) +void pci_for_each_bus(PCIBus *bus, pci_bus_fn fn, void *opaque) { pci_for_each_bus_depth_first(bus, NULL, fn, opaque); } From 2914fc61d5d72c6010d2b1fe8b4048b561e44ef3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 28 Oct 2021 12:31:26 +0800 Subject: [PATCH 8/9] pci: Export pci_for_each_device_under_bus*() They're actually more commonly used than the helper without _under_bus, because most callers do have the pci bus on hand. After exporting we can switch a lot of the call sites to use these two helpers. Reviewed-by: David Hildenbrand Reviewed-by: Eric Auger Signed-off-by: Peter Xu Message-Id: <20211028043129.38871-3-peterx@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: David Gibson --- hw/i386/acpi-build.c | 5 ++--- hw/pci/pci.c | 10 +++++----- hw/pci/pcie.c | 4 +--- hw/ppc/spapr_pci.c | 12 +++++------- hw/ppc/spapr_pci_nvlink2.c | 7 +++---- hw/ppc/spapr_pci_vfio.c | 4 ++-- hw/s390x/s390-pci-bus.c | 5 ++--- hw/xen/xen_pt.c | 4 ++-- include/hw/pci/pci.h | 5 +++++ 9 files changed, 27 insertions(+), 29 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 3ca6cc8118..a3ad6abd33 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2134,8 +2134,7 @@ dmar_host_bridges(Object *obj, void *opaque) PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; if (bus && !pci_bus_bypass_iommu(bus)) { - pci_for_each_device(bus, pci_bus_num(bus), insert_scope, - scope_blob); + pci_for_each_device_under_bus(bus, insert_scope, scope_blob); } } @@ -2341,7 +2340,7 @@ ivrs_host_bridges(Object *obj, void *opaque) PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; if (bus && !pci_bus_bypass_iommu(bus)) { - pci_for_each_device(bus, pci_bus_num(bus), insert_ivhd, ivhd_blob); + pci_for_each_device_under_bus(bus, insert_ivhd, ivhd_blob); } } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 17e59cb3a3..4a84e478ce 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1654,9 +1654,9 @@ static const pci_class_desc pci_class_descriptions[] = { 0, NULL} }; -static void pci_for_each_device_under_bus_reverse(PCIBus *bus, - pci_bus_dev_fn fn, - void *opaque) +void pci_for_each_device_under_bus_reverse(PCIBus *bus, + pci_bus_dev_fn fn, + void *opaque) { PCIDevice *d; int devfn; @@ -1679,8 +1679,8 @@ void pci_for_each_device_reverse(PCIBus *bus, int bus_num, } } -static void pci_for_each_device_under_bus(PCIBus *bus, - pci_bus_dev_fn fn, void *opaque) +void pci_for_each_device_under_bus(PCIBus *bus, + pci_bus_dev_fn fn, void *opaque) { PCIDevice *d; int devfn; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 6e95d82903..914a9bf3d1 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -694,9 +694,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - pcie_unplug_device, NULL); - + pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 7430bd6314..5bfd4aa9e5 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1317,8 +1317,7 @@ static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus, RESOURCE_CELLS_SIZE)); assert(bus); - pci_for_each_device_reverse(bus, pci_bus_num(bus), - spapr_dt_pci_device_cb, &cbinfo); + pci_for_each_device_under_bus_reverse(bus, spapr_dt_pci_device_cb, &cbinfo); if (cbinfo.err) { return cbinfo.err; } @@ -2306,8 +2305,8 @@ static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, return; } - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - spapr_phb_pci_enumerate_bridge, bus_no); + pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_enumerate_bridge, + bus_no); pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); } @@ -2316,9 +2315,8 @@ static void spapr_phb_pci_enumerate(SpaprPhbState *phb) PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; unsigned int bus_no = 0; - pci_for_each_device(bus, pci_bus_num(bus), - spapr_phb_pci_enumerate_bridge, - &bus_no); + pci_for_each_device_under_bus(bus, spapr_phb_pci_enumerate_bridge, + &bus_no); } diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c index 8ef9b40a18..7fb0cf4d04 100644 --- a/hw/ppc/spapr_pci_nvlink2.c +++ b/hw/ppc/spapr_pci_nvlink2.c @@ -164,8 +164,7 @@ static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev, return; } - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - spapr_phb_pci_collect_nvgpu, opaque); + pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_collect_nvgpu, opaque); } void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp) @@ -183,8 +182,8 @@ void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp) sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr; bus = PCI_HOST_BRIDGE(sphb)->bus; - pci_for_each_device(bus, pci_bus_num(bus), - spapr_phb_pci_collect_nvgpu, sphb->nvgpus); + pci_for_each_device_under_bus(bus, spapr_phb_pci_collect_nvgpu, + sphb->nvgpus); if (sphb->nvgpus->err) { error_propagate(errp, sphb->nvgpus->err); diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index f3b37df8ea..2a76b4e0b5 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -164,8 +164,8 @@ static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) { - pci_for_each_device(bus, pci_bus_num(bus), - spapr_phb_vfio_eeh_clear_dev_msix, NULL); + pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix, + NULL); } static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 6fafffb029..1b51a72838 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -1163,8 +1163,7 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, } /* Assign numbers to all child bridges. The last is the highest number. */ - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - s390_pci_enumerate_bridge, s); + pci_for_each_device_under_bus(sec_bus, s390_pci_enumerate_bridge, s); pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1); } @@ -1193,7 +1192,7 @@ static void s390_pcihost_reset(DeviceState *dev) * on every system reset, we also have to reassign numbers. */ s->bus_no = 0; - pci_for_each_device(bus, pci_bus_num(bus), s390_pci_enumerate_bridge, s); + pci_for_each_device_under_bus(bus, s390_pci_enumerate_bridge, s); } static void s390_pcihost_class_init(ObjectClass *klass, void *data) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index ca0a98187e..027190fa44 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -615,8 +615,8 @@ static void xen_pt_region_update(XenPCIPassthroughState *s, } args.type = d->io_regions[bar].type; - pci_for_each_device(pci_get_bus(d), pci_dev_bus_num(d), - xen_pt_check_bar_overlap, &args); + pci_for_each_device_under_bus(pci_get_bus(d), + xen_pt_check_bar_overlap, &args); if (args.rc) { XEN_PT_WARN(d, "Region: %d (addr: 0x%"FMT_PCIBUS ", len: 0x%"FMT_PCIBUS") is overlapped.\n", diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 4a8740b76b..5c4016b995 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -467,6 +467,11 @@ void pci_for_each_device(PCIBus *bus, int bus_num, void pci_for_each_device_reverse(PCIBus *bus, int bus_num, pci_bus_dev_fn fn, void *opaque); +void pci_for_each_device_under_bus(PCIBus *bus, + pci_bus_dev_fn fn, void *opaque); +void pci_for_each_device_under_bus_reverse(PCIBus *bus, + pci_bus_dev_fn fn, + void *opaque); void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin, pci_bus_fn end, void *parent_state); PCIDevice *pci_get_function_0(PCIDevice *pci_dev); From d99e8b5fcb138b19f751c027ed5599224f9b5036 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Tue, 26 Oct 2021 12:54:05 +0300 Subject: [PATCH 9/9] hw/i386: fix vmmouse registration According to the logic of vmmouse_update_handler function, vmmouse should be registered as an event handler when it's status is zero. vmmouse_read_id resets the status but does not register the handler. This patch adds vmmouse registration and activation when status is reset. Signed-off-by: Pavel Dovgalyuk Message-Id: <163524204515.1914131.16465061981774791228.stgit@pasha-ThinkPad-X280> Signed-off-by: Michael S. Tsirkin --- hw/i386/vmmouse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/i386/vmmouse.c b/hw/i386/vmmouse.c index df4798f502..3d66368286 100644 --- a/hw/i386/vmmouse.c +++ b/hw/i386/vmmouse.c @@ -158,6 +158,7 @@ static void vmmouse_read_id(VMMouseState *s) s->queue[s->nb_queue++] = VMMOUSE_VERSION; s->status = 0; + vmmouse_update_handler(s, s->absolute); } static void vmmouse_request_relative(VMMouseState *s)