From 4565917bb034479a29c04f0b44124e7f61585ccf Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 29 Aug 2023 16:14:29 -0400 Subject: [PATCH 01/53] pci: SLT must be RO current code sets PCI_SEC_LATENCY_TIMER to RW, but for pcie to pcie bridges it must be RO 0 according to pci express spec which says: This register does not apply to PCI Express. It must be read-only and hardwired to 00h. For PCI Express to PCI/PCI-X Bridges, refer to the [PCIe-to-PCI-PCI-X-Bridge] for requirements for this register. also, fix typo in comment where it's made writeable - this typo is likely what prevented us noticing we violate this requirement in the 1st place. Reported-by: Marcin Juszkiewicz Message-Id: Tested-by: Marcin Juszkiewicz Signed-off-by: Michael S. Tsirkin --- hw/core/machine.c | 5 ++++- hw/pci/pci.c | 2 +- hw/pci/pci_bridge.c | 14 ++++++++++++++ include/hw/pci/pci_bridge.h | 3 +++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index cb38b8cf4c..9ae8f793ae 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -32,6 +32,7 @@ #include "qemu/error-report.h" #include "sysemu/qtest.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" #include "hw/mem/nvdimm.h" #include "migration/global_state.h" #include "migration/vmstate.h" @@ -40,7 +41,9 @@ #include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-net.h" -GlobalProperty hw_compat_8_1[] = {}; +GlobalProperty hw_compat_8_1[] = { + { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" }, +}; const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1); GlobalProperty hw_compat_8_0[] = { diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 881d774fb6..b0d21bf43a 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -893,7 +893,7 @@ static void pci_init_w1cmask(PCIDevice *dev) static void pci_init_mask_bridge(PCIDevice *d) { /* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and - PCI_SEC_LETENCY_TIMER */ + PCI_SEC_LATENCY_TIMER */ memset(d->wmask + PCI_PRIMARY_BUS, 0xff, 4); /* base and limit */ diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index e7b9345615..6a4e38856d 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -38,6 +38,7 @@ #include "qapi/error.h" #include "hw/acpi/acpi_aml_interface.h" #include "hw/acpi/pci.h" +#include "hw/qdev-properties.h" /* PCI bridge subsystem vendor ID helper functions */ #define PCI_SSVID_SIZEOF 8 @@ -385,6 +386,11 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) pci_bridge_region_init(br); QLIST_INIT(&sec_bus->child); QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); + + /* For express secondary buses, secondary latency timer is RO 0 */ + if (pci_bus_is_express(sec_bus) && !br->pcie_writeable_slt_bug) { + dev->wmask[PCI_SEC_LATENCY_TIMER] = 0; + } } /* default qdev clean up function for PCI-to-PCI bridge */ @@ -466,10 +472,18 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, return 0; } +static Property pci_bridge_properties[] = { + DEFINE_PROP_BOOL("x-pci-express-writeable-slt-bug", PCIBridge, + pcie_writeable_slt_bug, false), + DEFINE_PROP_END_OF_LIST(), +}; + static void pci_bridge_class_init(ObjectClass *klass, void *data) { AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass); + DeviceClass *k = DEVICE_CLASS(klass); + device_class_set_props(k, pci_bridge_properties); adevc->build_dev_aml = build_pci_bridge_aml; } diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index ea54a81a15..5cd452115a 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -77,6 +77,9 @@ struct PCIBridge { pci_map_irq_fn map_irq; const char *bus_name; + + /* SLT is RO for PCIE to PCIE bridges, but old QEMU versions had it RW */ + bool pcie_writeable_slt_bug; }; #define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr" From 961d60e934e793a6065fb17d2312d5bced25031e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 10 Jul 2023 11:49:26 +0200 Subject: [PATCH 02/53] hw/virtio: Propagate page_mask to vhost_vdpa_listener_skipped_section() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to make vhost-vdpa.c a target-agnostic source unit, we need to remove the TARGET_PAGE_SIZE / TARGET_PAGE_MASK / TARGET_PAGE_ALIGN uses. TARGET_PAGE_SIZE will be replaced by the runtime qemu_target_page_size(). The other ones will be deduced from TARGET_PAGE_SIZE. Since the 3 macros are used in 3 related functions (sharing the same call tree), we'll refactor them to only depend on TARGET_PAGE_MASK. Having the following call tree: vhost_vdpa_listener_region_del() -> vhost_vdpa_listener_skipped_section() -> vhost_vdpa_section_end() The first step is to propagate TARGET_PAGE_MASK to vhost_vdpa_listener_skipped_section(). Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20230710094931.84402-2-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 42f2a4bae9..118c588205 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -42,7 +42,8 @@ static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, uint64_t iova_min, - uint64_t iova_max) + uint64_t iova_max, + int page_mask) { Int128 llend; @@ -313,7 +314,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, int ret; if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last)) { + v->iova_range.last, TARGET_PAGE_MASK)) { return; } if (memory_region_is_iommu(section->mr)) { @@ -398,7 +399,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, int ret; if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last)) { + v->iova_range.last, TARGET_PAGE_MASK)) { return; } if (memory_region_is_iommu(section->mr)) { From 8b1a8884c6aacd9a35863d18a757be17ec7b1369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 10 Jul 2023 11:49:27 +0200 Subject: [PATCH 03/53] hw/virtio: Propagate page_mask to vhost_vdpa_section_end() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Propagate TARGET_PAGE_MASK (see the previous commit for rationale). Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20230710094931.84402-3-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 118c588205..3ab0dc0b5b 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -31,11 +31,12 @@ * Return one past the end of the end of section. Be careful with uint64_t * conversions! */ -static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) +static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, + int page_mask) { Int128 llend = int128_make64(section->offset_within_address_space); llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + llend = int128_and(llend, int128_exts64(page_mask)); return llend; } @@ -69,7 +70,7 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, */ if (!memory_region_is_iommu(section->mr)) { - llend = vhost_vdpa_section_end(section); + llend = vhost_vdpa_section_end(section, page_mask); if (int128_gt(llend, int128_make64(iova_max))) { error_report("RAM section out of device range (max=0x%" PRIx64 ", end addr=0x%" PRIx64 ")", @@ -331,7 +332,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, } iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); - llend = vhost_vdpa_section_end(section); + llend = vhost_vdpa_section_end(section, TARGET_PAGE_MASK); if (int128_ge(int128_make64(iova), llend)) { return; } @@ -415,7 +416,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, } iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); - llend = vhost_vdpa_section_end(section); + llend = vhost_vdpa_section_end(section, TARGET_PAGE_MASK); trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(int128_sub(llend, int128_one()))); From 1dca36fb3d4f07354c9f6bc38b6e5c72fe1e9855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 10 Jul 2023 11:49:28 +0200 Subject: [PATCH 04/53] hw/virtio/vhost-vdpa: Inline TARGET_PAGE_ALIGN() macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use TARGET_PAGE_SIZE to calculate TARGET_PAGE_ALIGN (see the rationale in previous commits). Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20230710094931.84402-4-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 3ab0dc0b5b..0e0ed6d7ac 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -331,7 +331,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, return; } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + iova = ROUND_UP(section->offset_within_address_space, TARGET_PAGE_SIZE); llend = vhost_vdpa_section_end(section, TARGET_PAGE_MASK); if (int128_ge(int128_make64(iova), llend)) { return; @@ -415,7 +415,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, return; } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + iova = ROUND_UP(section->offset_within_address_space, TARGET_PAGE_SIZE); llend = vhost_vdpa_section_end(section, TARGET_PAGE_MASK); trace_vhost_vdpa_listener_region_del(v, iova, From 33f21860b766701f92c01094dcfc5390974d4020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 10 Jul 2023 11:49:29 +0200 Subject: [PATCH 05/53] hw/virtio/vhost-vdpa: Use target-agnostic qemu_target_page_mask() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to commit e414ed2c47 ("virtio-iommu: Use target-agnostic qemu_target_page_mask"), Replace the target-specific TARGET_PAGE_SIZE and TARGET_PAGE_MASK definitions by a call to the runtime qemu_target_page_size() helper which is target agnostic. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230710094931.84402-5-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 0e0ed6d7ac..50b932a930 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -14,6 +14,7 @@ #include #include #include +#include "exec/target_page.h" #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio-net.h" @@ -23,7 +24,6 @@ #include "migration/blocker.h" #include "qemu/cutils.h" #include "qemu/main-loop.h" -#include "cpu.h" #include "trace.h" #include "qapi/error.h" @@ -313,9 +313,11 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, Int128 llend, llsize; void *vaddr; int ret; + int page_size = qemu_target_page_size(); + int page_mask = -page_size; if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last, TARGET_PAGE_MASK)) { + v->iova_range.last, page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { @@ -323,16 +325,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, return; } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & ~page_mask) != + (section->offset_within_region & ~page_mask))) { trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, - section->offset_within_address_space & ~TARGET_PAGE_MASK, - section->offset_within_region & ~TARGET_PAGE_MASK); + section->offset_within_address_space & ~page_mask, + section->offset_within_region & ~page_mask); return; } - iova = ROUND_UP(section->offset_within_address_space, TARGET_PAGE_SIZE); - llend = vhost_vdpa_section_end(section, TARGET_PAGE_MASK); + iova = ROUND_UP(section->offset_within_address_space, page_size); + llend = vhost_vdpa_section_end(section, page_mask); if (int128_ge(int128_make64(iova), llend)) { return; } @@ -398,25 +400,27 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, hwaddr iova; Int128 llend, llsize; int ret; + int page_size = qemu_target_page_size(); + int page_mask = -page_size; if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last, TARGET_PAGE_MASK)) { + v->iova_range.last, page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { vhost_vdpa_iommu_region_del(listener, section); } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & ~page_mask) != + (section->offset_within_region & ~page_mask))) { trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, - section->offset_within_address_space & ~TARGET_PAGE_MASK, - section->offset_within_region & ~TARGET_PAGE_MASK); + section->offset_within_address_space & ~page_mask, + section->offset_within_region & ~page_mask); return; } - iova = ROUND_UP(section->offset_within_address_space, TARGET_PAGE_SIZE); - llend = vhost_vdpa_section_end(section, TARGET_PAGE_MASK); + iova = ROUND_UP(section->offset_within_address_space, page_size); + llend = vhost_vdpa_section_end(section, page_mask); trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(int128_sub(llend, int128_one()))); From 05632635f84311f241ad4dbffdb591f97339a5dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 10 Jul 2023 12:04:32 +0200 Subject: [PATCH 06/53] hw/virtio: Build vhost-vdpa.o once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit removed the dependencies on the target-specific TARGET_PAGE_FOO macros. We can now move vhost-vdpa.c to the 'softmmu_virtio_ss' source set to build it once for all our targets. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20230710100432.84819-1-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/meson.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 13e7c6c272..9737450afd 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -18,7 +18,8 @@ if have_vhost specific_virtio_ss.add(files('vhost-user.c')) endif if have_vhost_vdpa - specific_virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c')) + softmmu_virtio_ss.add(files('vhost-vdpa.c')) + specific_virtio_ss.add(files('vhost-shadow-virtqueue.c')) endif else softmmu_virtio_ss.add(files('vhost-stub.c')) From f05356f84d2e3cb4f6437716cc9b5dc59baf769d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 10 Jul 2023 12:05:10 +0200 Subject: [PATCH 07/53] hw/virtio/meson: Rename softmmu_virtio_ss[] -> system_virtio_ss[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to commit de6cd7599b ("meson: Replace softmmu_ss -> system_ss"), rename the virtio source set common to all system emulation as 'system_virtio_ss[]'. This is clearer because softmmu can be used for user emulation. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20230710100510.84862-1-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/meson.build | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 9737450afd..4e81d10834 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -1,28 +1,28 @@ -softmmu_virtio_ss = ss.source_set() -softmmu_virtio_ss.add(files('virtio-bus.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) -softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) +system_virtio_ss = ss.source_set() +system_virtio_ss.add(files('virtio-bus.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) +system_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c')) if have_vhost - softmmu_virtio_ss.add(files('vhost.c')) + system_virtio_ss.add(files('vhost.c')) specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user specific_virtio_ss.add(files('vhost-user.c')) endif if have_vhost_vdpa - softmmu_virtio_ss.add(files('vhost-vdpa.c')) + system_virtio_ss.add(files('vhost-vdpa.c')) specific_virtio_ss.add(files('vhost-shadow-virtqueue.c')) endif else - softmmu_virtio_ss.add(files('vhost-stub.c')) + system_virtio_ss.add(files('vhost-stub.c')) endif specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) @@ -68,7 +68,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MD', if_true: files('virtio-md-pci.c')) specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) -system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) +system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: system_virtio_ss) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('virtio-stub.c')) system_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) From eee77809733d3a94c0d78a29a93d032c1faefd2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 10 Jul 2023 16:35:09 +0100 Subject: [PATCH 08/53] virtio: add vhost-user-base and a generic vhost-user-device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In theory we shouldn't need to repeat so much boilerplate to support vhost-user backends. This provides a generic vhost-user-base QOM object and a derived vhost-user-device for which the user needs to provide the few bits of information that aren't currently provided by the vhost-user protocol. This should provide a baseline implementation from which the other vhost-user stub can specialise. Signed-off-by: Alex Bennée Message-Id: <20230710153522.3469097-8-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/meson.build | 3 + hw/virtio/vhost-user-device-pci.c | 71 ++++++ hw/virtio/vhost-user-device.c | 324 ++++++++++++++++++++++++++ include/hw/virtio/vhost-user-device.h | 45 ++++ 4 files changed, 443 insertions(+) create mode 100644 hw/virtio/vhost-user-device-pci.c create mode 100644 hw/virtio/vhost-user-device.c create mode 100644 include/hw/virtio/vhost-user-device.h diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 4e81d10834..c0055a7832 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -15,7 +15,10 @@ if have_vhost system_virtio_ss.add(files('vhost.c')) specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user + # fixme - this really should be generic specific_virtio_ss.add(files('vhost-user.c')) + system_virtio_ss.add(files('vhost-user-device.c')) + system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) endif if have_vhost_vdpa system_virtio_ss.add(files('vhost-vdpa.c')) diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-device-pci.c new file mode 100644 index 0000000000..41f9b7905b --- /dev/null +++ b/hw/virtio/vhost-user-device-pci.c @@ -0,0 +1,71 @@ +/* + * Vhost-user generic virtio device PCI glue + * + * Copyright (c) 2023 Linaro Ltd + * Author: Alex Bennée + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-device.h" +#include "hw/virtio/virtio-pci.h" + +struct VHostUserDevicePCI { + VirtIOPCIProxy parent_obj; + VHostUserBase vub; +}; + +typedef struct VHostUserDevicePCI VHostUserDevicePCI; + +#define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserDevicePCI, + VHOST_USER_DEVICE_PCI, + TYPE_VHOST_USER_DEVICE_PCI) + +static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vub); + + vpci_dev->nvectors = 1; + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_device_pci_realize; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_device_pci_instance_init(Object *obj) +{ + VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(obj); + + virtio_instance_init_common(obj, &dev->vub, sizeof(dev->vub), + TYPE_VHOST_USER_DEVICE); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_device_pci_info = { + .base_name = TYPE_VHOST_USER_DEVICE_PCI, + .non_transitional_name = "vhost-user-device-pci", + .instance_size = sizeof(VHostUserDevicePCI), + .instance_init = vhost_user_device_pci_instance_init, + .class_init = vhost_user_device_pci_class_init, +}; + +static void vhost_user_device_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_device_pci_info); +} + +type_init(vhost_user_device_pci_register); diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c new file mode 100644 index 0000000000..b0239fa033 --- /dev/null +++ b/hw/virtio/vhost-user-device.c @@ -0,0 +1,324 @@ +/* + * Generic vhost-user stub. This can be used to connect to any + * vhost-user backend. All configuration details must be handled by + * the vhost-user daemon itself + * + * Copyright (c) 2023 Linaro Ltd + * Author: Alex Bennée + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-device.h" +#include "qemu/error-report.h" + +static void vub_start(VirtIODevice *vdev) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + int ret, i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&vub->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + vub->vhost_dev.acked_features = vdev->guest_features; + + ret = vhost_dev_start(&vub->vhost_dev, vdev, true); + if (ret < 0) { + error_report("Error starting vhost-user-device: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < vub->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&vub->vhost_dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); +} + +static void vub_stop(VirtIODevice *vdev) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&vub->vhost_dev, vdev, true); + + ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); +} + +static void vub_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + bool should_start = virtio_device_should_start(vdev, status); + + if (vhost_dev_is_started(&vub->vhost_dev) == should_start) { + return; + } + + if (should_start) { + vub_start(vdev); + } else { + vub_stop(vdev); + } +} + +/* + * For an implementation where everything is delegated to the backend + * we don't do anything other than return the full feature set offered + * by the daemon (module the reserved feature bit). + */ +static uint64_t vub_get_features(VirtIODevice *vdev, + uint64_t requested_features, Error **errp) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + /* This should be set when the vhost connection initialises */ + g_assert(vub->vhost_dev.features); + return vub->vhost_dev.features & ~(1ULL << VHOST_USER_F_PROTOCOL_FEATURES); +} + +static void vub_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserBase *vub) +{ + vhost_user_cleanup(&vub->vhost_user); + + for (int i = 0; i < vub->num_vqs; i++) { + VirtQueue *vq = g_ptr_array_index(vub->vqs, i); + virtio_delete_queue(vq); + } + + virtio_cleanup(vdev); +} + +static int vub_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + + if (vub->connected) { + return 0; + } + vub->connected = true; + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vub_start(vdev); + } + + return 0; +} + +static void vub_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + + if (!vub->connected) { + return; + } + vub->connected = false; + + if (vhost_dev_is_started(&vub->vhost_dev)) { + vub_stop(vdev); + } +} + +static void vub_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(vdev); + + switch (event) { + case CHR_EVENT_OPENED: + if (vub_connect(dev) < 0) { + qemu_chr_fe_disconnect(&vub->chardev); + return; + } + break; + case CHR_EVENT_CLOSED: + vub_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static void vub_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(dev); + int ret; + + if (!vub->chardev.chr) { + error_setg(errp, "vhost-user-device: missing chardev"); + return; + } + + if (!vub->virtio_id) { + error_setg(errp, "vhost-user-device: need to define device id"); + return; + } + + if (!vub->num_vqs) { + vub->num_vqs = 1; /* reasonable default? */ + } + + if (!vhost_user_init(&vub->vhost_user, &vub->chardev, errp)) { + return; + } + + virtio_init(vdev, vub->virtio_id, 0); + + /* + * Disable guest notifiers, by default all notifications will be via the + * asynchronous vhost-user socket. + */ + vdev->use_guest_notifier_mask = false; + + /* Allocate queues */ + vub->vqs = g_ptr_array_sized_new(vub->num_vqs); + for (int i = 0; i < vub->num_vqs; i++) { + g_ptr_array_add(vub->vqs, + virtio_add_queue(vdev, 4, vub_handle_output)); + } + + vub->vhost_dev.nvqs = vub->num_vqs; + vub->vhost_dev.vqs = g_new0(struct vhost_virtqueue, vub->vhost_dev.nvqs); + + /* connect to backend */ + ret = vhost_dev_init(&vub->vhost_dev, &vub->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + + if (ret < 0) { + do_vhost_user_cleanup(vdev, vub); + } + + qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, NULL, + dev, NULL, true); +} + +static void vub_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBase *vub = VHOST_USER_BASE(dev); + struct vhost_virtqueue *vhost_vqs = vub->vhost_dev.vqs; + + /* This will stop vhost backend if appropriate. */ + vub_set_status(vdev, 0); + vhost_dev_cleanup(&vub->vhost_dev); + g_free(vhost_vqs); + do_vhost_user_cleanup(vdev, vub); +} + +static void vub_class_init(ObjectClass *klass, void *data) +{ + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + vdc->realize = vub_device_realize; + vdc->unrealize = vub_device_unrealize; + vdc->get_features = vub_get_features; + vdc->set_status = vub_set_status; +} + +static const TypeInfo vub_info = { + .name = TYPE_VHOST_USER_BASE, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserBase), + .class_init = vub_class_init, + .class_size = sizeof(VHostUserBaseClass), + .abstract = true +}; + + +/* + * The following is a concrete implementation of the base class which + * allows the user to define the key parameters via the command line. + */ + +static const VMStateDescription vud_vmstate = { + .name = "vhost-user-device", + .unmigratable = 1, +}; + +static Property vud_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), + DEFINE_PROP_UINT16("virtio-id", VHostUserBase, virtio_id, 0), + DEFINE_PROP_UINT32("num_vqs", VHostUserBase, num_vqs, 1), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vud_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, vud_properties); + dc->vmsd = &vud_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); +} + +static const TypeInfo vud_info = { + .name = TYPE_VHOST_USER_DEVICE, + .parent = TYPE_VHOST_USER_BASE, + .instance_size = sizeof(VHostUserBase), + .class_init = vud_class_init, + .class_size = sizeof(VHostUserBaseClass), +}; + +static void vu_register_types(void) +{ + type_register_static(&vub_info); + type_register_static(&vud_info); +} + +type_init(vu_register_types) diff --git a/include/hw/virtio/vhost-user-device.h b/include/hw/virtio/vhost-user-device.h new file mode 100644 index 0000000000..9105011e25 --- /dev/null +++ b/include/hw/virtio/vhost-user-device.h @@ -0,0 +1,45 @@ +/* + * Vhost-user generic virtio device + * + * Copyright (c) 2023 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QEMU_VHOST_USER_DEVICE_H +#define QEMU_VHOST_USER_DEVICE_H + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" + +#define TYPE_VHOST_USER_BASE "vhost-user-base" + +OBJECT_DECLARE_TYPE(VHostUserBase, VHostUserBaseClass, VHOST_USER_BASE) + +struct VHostUserBase { + VirtIODevice parent; + /* Properties */ + CharBackend chardev; + uint16_t virtio_id; + uint32_t num_vqs; + /* State tracking */ + VhostUserState vhost_user; + struct vhost_virtqueue *vhost_vq; + struct vhost_dev vhost_dev; + GPtrArray *vqs; + bool connected; +}; + + /* needed so we can use the base realize after specialisation + tweaks */ +struct VHostUserBaseClass { + /*< private >*/ + VirtioDeviceClass parent_class; + /*< public >*/ + DeviceRealize parent_realize; +}; + +/* shared for the benefit of the derived pci class */ +#define TYPE_VHOST_USER_DEVICE "vhost-user-device" + +#endif /* QEMU_VHOST_USER_DEVICE_H */ From f92a2d61cd86fd585b1b2a57295fcde278aebd78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 10 Jul 2023 16:35:12 +0100 Subject: [PATCH 09/53] hw/virtio: add config support to vhost-user-device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To use the generic device the user will need to provide the config region size via the command line. We also add a notifier so the guest can be pinged if the remote daemon updates the config. With these changes: -device vhost-user-device-pci,virtio-id=41,num_vqs=2,config_size=8 is equivalent to: -device vhost-user-gpio-pci Signed-off-by: Alex Bennée Message-Id: <20230710153522.3469097-11-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-device.c | 58 ++++++++++++++++++++++++++- include/hw/virtio/vhost-user-device.h | 1 + 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c index b0239fa033..2b028cae08 100644 --- a/hw/virtio/vhost-user-device.c +++ b/hw/virtio/vhost-user-device.c @@ -117,6 +117,42 @@ static uint64_t vub_get_features(VirtIODevice *vdev, return vub->vhost_dev.features & ~(1ULL << VHOST_USER_F_PROTOCOL_FEATURES); } +/* + * To handle VirtIO config we need to know the size of the config + * space. We don't cache the config but re-fetch it from the guest + * every time in case something has changed. + */ +static void vub_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserBase *vub = VHOST_USER_BASE(vdev); + Error *local_err = NULL; + + /* + * There will have been a warning during vhost_dev_init, but lets + * assert here as nothing will go right now. + */ + g_assert(vub->config_size && vub->vhost_user.supports_config == true); + + if (vhost_dev_get_config(&vub->vhost_dev, config, + vub->config_size, &local_err)) { + error_report_err(local_err); + } +} + +/* + * When the daemon signals an update to the config we just need to + * signal the guest as we re-read the config on demand above. + */ +static int vub_config_notifier(struct vhost_dev *dev) +{ + virtio_notify_config(dev->vdev); + return 0; +} + +const VhostDevConfigOps vub_config_ops = { + .vhost_dev_config_notifier = vub_config_notifier, +}; + static void vub_handle_output(VirtIODevice *vdev, VirtQueue *vq) { /* @@ -141,12 +177,21 @@ static int vub_connect(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBase *vub = VHOST_USER_BASE(vdev); + struct vhost_dev *vhost_dev = &vub->vhost_dev; if (vub->connected) { return 0; } vub->connected = true; + /* + * If we support VHOST_USER_GET_CONFIG we must enable the notifier + * so we can ping the guest when it updates. + */ + if (vub->vhost_user.supports_config) { + vhost_dev_set_config_notifier(vhost_dev, &vub_config_ops); + } + /* restore vhost state */ if (virtio_device_started(vdev, vdev->status)) { vub_start(vdev); @@ -214,11 +259,20 @@ static void vub_device_realize(DeviceState *dev, Error **errp) vub->num_vqs = 1; /* reasonable default? */ } + /* + * We can't handle config requests unless we know the size of the + * config region, specialisations of the vhost-user-device will be + * able to set this. + */ + if (vub->config_size) { + vub->vhost_user.supports_config = true; + } + if (!vhost_user_init(&vub->vhost_user, &vub->chardev, errp)) { return; } - virtio_init(vdev, vub->virtio_id, 0); + virtio_init(vdev, vub->virtio_id, vub->config_size); /* * Disable guest notifiers, by default all notifications will be via the @@ -268,6 +322,7 @@ static void vub_class_init(ObjectClass *klass, void *data) vdc->realize = vub_device_realize; vdc->unrealize = vub_device_unrealize; vdc->get_features = vub_get_features; + vdc->get_config = vub_get_config; vdc->set_status = vub_set_status; } @@ -295,6 +350,7 @@ static Property vud_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), DEFINE_PROP_UINT16("virtio-id", VHostUserBase, virtio_id, 0), DEFINE_PROP_UINT32("num_vqs", VHostUserBase, num_vqs, 1), + DEFINE_PROP_UINT32("config_size", VHostUserBase, config_size, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/vhost-user-device.h b/include/hw/virtio/vhost-user-device.h index 9105011e25..3ddf88a146 100644 --- a/include/hw/virtio/vhost-user-device.h +++ b/include/hw/virtio/vhost-user-device.h @@ -22,6 +22,7 @@ struct VHostUserBase { CharBackend chardev; uint16_t virtio_id; uint32_t num_vqs; + uint32_t config_size; /* State tracking */ VhostUserState vhost_user; struct vhost_virtqueue *vhost_vq; From 06b636a1e2ad12ab130edcbb0ccf995118440706 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Sun, 23 Jul 2023 20:09:11 +0800 Subject: [PATCH 10/53] virtio-net: do not reset vlan filtering at set_features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is called after virtio_load, so all vlan configuration is lost in migration case. Just allow all the vlan-tagged packets if vlan is not configured, and trust device reset to clear all filtered vlans. Fixes: 0b1eaa8803 ("virtio-net: Do not filter VLANs without F_CTRL_VLAN") Signed-off-by: Eugenio Pérez Reviewed-by: Hawkins Jiawei Signed-off-by: Hawkins Jiawei Message-Id: <95af0d013281282f48ad3f47f6ad1ac4ca9e52eb.1690106284.git.yin31149@gmail.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 5a0201c423..1c31374334 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1029,9 +1029,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) vhost_net_save_acked_features(nc->peer); } - if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { - memset(n->vlans, 0, MAX_VLAN >> 3); - } else { + if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { memset(n->vlans, 0xff, MAX_VLAN >> 3); } From e19751a32f140a232fafb037e703abb961a94abb Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Sun, 23 Jul 2023 20:09:12 +0800 Subject: [PATCH 11/53] virtio-net: Expose MAX_VLAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost-vdpa shadowed CVQ needs to know the maximum number of vlans supported by the virtio-net device, so QEMU can restore the VLAN state in a migration. Co-developed-by: Eugenio Pérez Signed-off-by: Eugenio Pérez Signed-off-by: Hawkins Jiawei Message-Id: Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 2 -- include/hw/virtio/virtio-net.h | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 1c31374334..29e33ea5ed 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -49,8 +49,6 @@ #define VIRTIO_NET_VM_VERSION 11 -#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ - /* previously fixed value */ #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index e07a723027..55977f01f0 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -38,6 +38,12 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) /* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ #define MAC_TABLE_ENTRIES 64 +/* + * The maximum number of VLANs in the VLAN filter table + * added by VIRTIO_NET_CTRL_VLAN_ADD + */ +#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ + typedef struct virtio_net_conf { uint32_t txtimer; From 8f7e9967484dec2a727c24a509962ca3a4f5dad4 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Sun, 23 Jul 2023 20:09:13 +0800 Subject: [PATCH 12/53] vdpa: Restore vlan filtering state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces vhost_vdpa_net_load_single_vlan() and vhost_vdpa_net_load_vlan() to restore the vlan filtering state at device's startup. Co-developed-by: Eugenio Pérez Signed-off-by: Eugenio Pérez Signed-off-by: Hawkins Jiawei Message-Id: Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 4e94c50bc7..8648d86f64 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -968,6 +968,50 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, return 0; } +static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s, + const VirtIONet *n, + uint16_t vid) +{ + const struct iovec data = { + .iov_base = &vid, + .iov_len = sizeof(vid), + }; + ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_VLAN, + VIRTIO_NET_CTRL_VLAN_ADD, + &data, 1); + if (unlikely(dev_written < 0)) { + return dev_written; + } + if (unlikely(*s->status != VIRTIO_NET_OK)) { + return -EIO; + } + + return 0; +} + +static int vhost_vdpa_net_load_vlan(VhostVDPAState *s, + const VirtIONet *n) +{ + int r; + + if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_VLAN)) { + return 0; + } + + for (int i = 0; i < MAX_VLAN >> 5; i++) { + for (int j = 0; n->vlans[i] && j <= 0x1f; j++) { + if (n->vlans[i] & (1U << j)) { + r = vhost_vdpa_net_load_single_vlan(s, n, (i << 5) + j); + if (unlikely(r != 0)) { + return r; + } + } + } + } + + return 0; +} + static int vhost_vdpa_net_load(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); @@ -998,6 +1042,10 @@ static int vhost_vdpa_net_load(NetClientState *nc) if (unlikely(r)) { return r; } + r = vhost_vdpa_net_load_vlan(s, n); + if (unlikely(r)) { + return r; + } return 0; } From e213c45a042db2506b5e8f16293f1f1c5083a577 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Sun, 23 Jul 2023 20:09:14 +0800 Subject: [PATCH 13/53] vdpa: Allow VIRTIO_NET_F_CTRL_VLAN in SVQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable SVQ with VIRTIO_NET_F_CTRL_VLAN feature. Co-developed-by: Eugenio Pérez Signed-off-by: Eugenio Pérez Signed-off-by: Hawkins Jiawei Message-Id: <38dc63102a42c31c72fd293d0e6e2828fd54c86e.1690106284.git.yin31149@gmail.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 8648d86f64..144b33f997 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -114,6 +114,7 @@ static const uint64_t vdpa_svq_device_features = BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | BIT_ULL(VIRTIO_NET_F_CTRL_RX) | + BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_F_ANY_LAYOUT) | From 43d6376980d5567f2a6d00cfb30d10c0961671e6 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 11 Aug 2023 16:34:23 +0200 Subject: [PATCH 14/53] virtio: don't zero out memory region cache for indirect descriptors Lots of virtio functions that are on a hot path in data transmission are initializing indirect descriptor cache at the point of stack allocation. It's a 112 byte structure that is getting zeroed out on each call adding unnecessary overhead. It's going to be correctly initialized later via special init function. The only reason to actually initialize right away is the ability to safely destruct it. Replacing a designated initializer with a function to only initialize what is necessary. Removal of the unnecessary stack initializations improves throughput of virtio-net devices in terms of 64B packets per second by 6-14 % depending on the case. Tested with a proposed af-xdp network backend and a dpdk testpmd application in the guest, but should be beneficial for other virtio devices as well. Signed-off-by: Ilya Maximets Message-Id: <20230811143423.3258788-1-i.maximets@ovn.org> Reviewed-by: Stefan Hajnoczi Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 20 +++++++++++++++----- include/exec/memory.h | 16 +++++++++++++--- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 4577f3f5b3..d3a22e3d36 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1071,10 +1071,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq, VirtIODevice *vdev = vq->vdev; unsigned int idx; unsigned int total_bufs, in_total, out_total; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache indirect_desc_cache; int64_t len = 0; int rc; + address_space_cache_init_empty(&indirect_desc_cache); + idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; @@ -1207,12 +1209,14 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, VirtIODevice *vdev = vq->vdev; unsigned int idx; unsigned int total_bufs, in_total, out_total; + MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; int64_t len = 0; VRingPackedDesc desc; bool wrap_counter; + address_space_cache_init_empty(&indirect_desc_cache); + idx = vq->last_avail_idx; wrap_counter = vq->last_avail_wrap_counter; total_bufs = in_total = out_total = 0; @@ -1487,7 +1491,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) { unsigned int i, head, max; VRingMemoryRegionCaches *caches; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; int64_t len; VirtIODevice *vdev = vq->vdev; @@ -1498,6 +1502,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) VRingDesc desc; int rc; + address_space_cache_init_empty(&indirect_desc_cache); + RCU_READ_LOCK_GUARD(); if (virtio_queue_empty_rcu(vq)) { goto done; @@ -1624,7 +1630,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) { unsigned int i, max; VRingMemoryRegionCaches *caches; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; int64_t len; VirtIODevice *vdev = vq->vdev; @@ -1636,6 +1642,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) uint16_t id; int rc; + address_space_cache_init_empty(&indirect_desc_cache); + RCU_READ_LOCK_GUARD(); if (virtio_queue_packed_empty_rcu(vq)) { goto done; @@ -3970,13 +3978,15 @@ VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path, } else { unsigned int head, i, max; VRingMemoryRegionCaches *caches; - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; VRingDesc desc; VirtioRingDescList *list = NULL; VirtioRingDescList *node; int rc; int ndescs; + address_space_cache_init_empty(&indirect_desc_cache); + RCU_READ_LOCK_GUARD(); max = vq->vring.num; diff --git a/include/exec/memory.h b/include/exec/memory.h index ef23d65afc..c99842d2fc 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2671,9 +2671,6 @@ struct MemoryRegionCache { bool is_write; }; -#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mrs.mr = NULL }) - - /* address_space_ld*_cached: load from a cached #MemoryRegion * address_space_st*_cached: store into a cached #MemoryRegion * @@ -2762,6 +2759,19 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, hwaddr len, bool is_write); +/** + * address_space_cache_init_empty: Initialize empty #MemoryRegionCache + * + * @cache: The #MemoryRegionCache to operate on. + * + * Initializes #MemoryRegionCache structure without memory region attached. + * Cache initialized this way can only be safely destroyed, but not used. + */ +static inline void address_space_cache_init_empty(MemoryRegionCache *cache) +{ + cache->mrs.mr = NULL; +} + /** * address_space_cache_invalidate: complete a write to a #MemoryRegionCache * From b40eba9cdde3b041f02a9cbaa23ca0eeda9bd9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 22 Aug 2023 10:53:26 +0200 Subject: [PATCH 15/53] vdpa: use first queue SVQ state for CVQ default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous to this patch the only way CVQ would be shadowed is if it does support to isolate CVQ group or if all vqs were shadowed from the beginning. The second condition was checked at the beginning, and no more configuration was done. After this series we need to check if data queues are shadowed because they are in the middle of the migration. As checking if they are shadowed already covers the previous case, let's just mimic it. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20230822085330.3978829-2-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 144b33f997..30dc7e77c9 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -509,7 +509,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) s0 = vhost_vdpa_net_first_nc_vdpa(s); v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; - v->shadow_vqs_enabled = s->always_svq; + v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; if (s->vhost_vdpa.shadow_data) { From d7ce0841767d01c226fc0e22436ce22a0ec74226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 22 Aug 2023 10:53:27 +0200 Subject: [PATCH 16/53] vdpa: export vhost_vdpa_set_vring_ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vhost-vdpa net backend needs to enable vrings in a different order than default, so export it. No functional change intended except for tracing, that now includes the (virtio) index being enabled and the return value of the ioctl. Still ignoring return value of this function if called from vhost_vdpa_dev_start, as reorganize calling code around it is out of the scope of this series. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20230822085330.3978829-3-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 2 +- hw/virtio/vhost-vdpa.c | 25 +++++++++++++------------ include/hw/virtio/vhost-vdpa.h | 1 + 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 7109cf1a3b..1cb9027d1e 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -48,7 +48,7 @@ vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 vhost_vdpa_reset_device(void *dev) "dev: %p" vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" -vhost_vdpa_set_vring_ready(void *dev) "dev: %p" +vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d" vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 50b932a930..e7de880d51 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -882,18 +882,17 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) return idx; } -static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) +int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) { - int i; - trace_vhost_vdpa_set_vring_ready(dev); - for (i = 0; i < dev->nvqs; ++i) { - struct vhost_vring_state state = { - .index = dev->vq_index + i, - .num = 1, - }; - vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); - } - return 0; + struct vhost_dev *dev = v->dev; + struct vhost_vring_state state = { + .index = idx, + .num = 1, + }; + int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); + + trace_vhost_vdpa_set_vring_ready(dev, idx, r); + return r; } static int vhost_vdpa_set_config_call(struct vhost_dev *dev, @@ -1304,7 +1303,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) if (unlikely(!ok)) { return -1; } - vhost_vdpa_set_vring_ready(dev); + for (int i = 0; i < dev->nvqs; ++i) { + vhost_vdpa_set_vring_ready(v, dev->vq_index + i); + } } else { vhost_vdpa_suspend(dev); vhost_vdpa_svqs_stop(dev); diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index e64bfc7f98..5407d54fd7 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -57,6 +57,7 @@ typedef struct vhost_vdpa { } VhostVDPA; int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); +int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, hwaddr size, void *vaddr, bool readonly); From f3fada598c909bac12bd18da36437d9bed0b9f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 22 Aug 2023 10:53:28 +0200 Subject: [PATCH 17/53] vdpa: rename vhost_vdpa_net_load to vhost_vdpa_net_cvq_load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will add the corresponding data load. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20230822085330.3978829-4-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 30dc7e77c9..008c0cf8b3 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -1013,7 +1013,7 @@ static int vhost_vdpa_net_load_vlan(VhostVDPAState *s, return 0; } -static int vhost_vdpa_net_load(NetClientState *nc) +static int vhost_vdpa_net_cvq_load(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); struct vhost_vdpa *v = &s->vhost_vdpa; @@ -1056,7 +1056,7 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { .size = sizeof(VhostVDPAState), .receive = vhost_vdpa_receive, .start = vhost_vdpa_net_cvq_start, - .load = vhost_vdpa_net_load, + .load = vhost_vdpa_net_cvq_load, .stop = vhost_vdpa_net_cvq_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, From 6c4825476a4351530bcac17abab72295b75ffe98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 22 Aug 2023 10:53:29 +0200 Subject: [PATCH 18/53] vdpa: move vhost_vdpa_set_vring_ready to the caller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing that way allows CVQ to be enabled before the dataplane vqs, restoring the state as MQ or MAC addresses properly in the case of a migration. The patch does it by defining a ->load NetClientInfo callback also for dataplane. Ideally, this should be done by an independent patch, but the function is already static so it would only add an empty vhost_vdpa_net_data_load stub. Signed-off-by: Eugenio Pérez Message-Id: <20230822085330.3978829-5-eperezma@redhat.com> Acked-by: Jason Wang Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vdpa-dev.c | 3 ++ hw/virtio/vhost-vdpa.c | 3 -- net/vhost-vdpa.c | 65 ++++++++++++++++++++++++++++-------------- 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 363b625243..f22d5d5bc0 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -255,6 +255,9 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) error_setg_errno(errp, -ret, "Error starting vhost"); goto err_guest_notifiers; } + for (i = 0; i < s->dev.nvqs; ++i) { + vhost_vdpa_set_vring_ready(&s->vdpa, i); + } s->started = true; /* diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index e7de880d51..89ff02a999 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -1303,9 +1303,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) if (unlikely(!ok)) { return -1; } - for (int i = 0; i < dev->nvqs; ++i) { - vhost_vdpa_set_vring_ready(v, dev->vq_index + i); - } } else { vhost_vdpa_suspend(dev); vhost_vdpa_svqs_stop(dev); diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 008c0cf8b3..0715bed8e6 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -375,6 +375,22 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) return 0; } +static int vhost_vdpa_net_data_load(NetClientState *nc) +{ + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + struct vhost_vdpa *v = &s->vhost_vdpa; + bool has_cvq = v->dev->vq_index_end % 2; + + if (has_cvq) { + return 0; + } + + for (int i = 0; i < v->dev->nvqs; ++i) { + vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); + } + return 0; +} + static void vhost_vdpa_net_client_stop(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); @@ -397,6 +413,7 @@ static NetClientInfo net_vhost_vdpa_info = { .size = sizeof(VhostVDPAState), .receive = vhost_vdpa_receive, .start = vhost_vdpa_net_data_start, + .load = vhost_vdpa_net_data_load, .stop = vhost_vdpa_net_client_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, @@ -1022,30 +1039,34 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - if (!v->shadow_vqs_enabled) { - return 0; + vhost_vdpa_set_vring_ready(v, v->dev->vq_index); + + if (v->shadow_vqs_enabled) { + n = VIRTIO_NET(v->dev->vdev); + r = vhost_vdpa_net_load_mac(s, n); + if (unlikely(r < 0)) { + return r; + } + r = vhost_vdpa_net_load_mq(s, n); + if (unlikely(r)) { + return r; + } + r = vhost_vdpa_net_load_offloads(s, n); + if (unlikely(r)) { + return r; + } + r = vhost_vdpa_net_load_rx(s, n); + if (unlikely(r)) { + return r; + } + r = vhost_vdpa_net_load_vlan(s, n); + if (unlikely(r)) { + return r; + } } - n = VIRTIO_NET(v->dev->vdev); - r = vhost_vdpa_net_load_mac(s, n); - if (unlikely(r < 0)) { - return r; - } - r = vhost_vdpa_net_load_mq(s, n); - if (unlikely(r)) { - return r; - } - r = vhost_vdpa_net_load_offloads(s, n); - if (unlikely(r)) { - return r; - } - r = vhost_vdpa_net_load_rx(s, n); - if (unlikely(r)) { - return r; - } - r = vhost_vdpa_net_load_vlan(s, n); - if (unlikely(r)) { - return r; + for (int i = 0; i < v->dev->vq_index; ++i) { + vhost_vdpa_set_vring_ready(v, i); } return 0; From f13f5f6412fc51574c961f39dbd625357948282b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 22 Aug 2023 10:53:30 +0200 Subject: [PATCH 19/53] vdpa: remove net cvq migration blocker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have add migration blockers if the device does not support all the needed features, remove the general blocker applied to all net devices with CVQ. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20230822085330.3978829-6-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 0715bed8e6..90beda42e0 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -1465,18 +1465,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; s->cvq_isolated = cvq_isolated; - - /* - * TODO: We cannot migrate devices with CVQ and no x-svq enabled as - * there is no way to set the device state (MAC, MQ, etc) before - * starting the datapath. - * - * Migration blocker ownership now belongs to s->vhost_vdpa. - */ - if (!svq) { - error_setg(&s->vhost_vdpa.migration_blocker, - "net vdpa cannot migrate with CVQ feature"); - } } ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { From b0de17a2e28de477e09e77a587fcbeafbbc897c4 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Tue, 29 Aug 2023 13:54:43 +0800 Subject: [PATCH 20/53] vhost: Add count argument to vhost_svq_poll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches in this series will no longer perform an immediate poll and check of the device's used buffers for each CVQ state load command. Instead, they will send CVQ state load commands in parallel by polling multiple pending buffers at once. To achieve this, this patch refactoring vhost_svq_poll() to accept a new argument `num`, which allows vhost_svq_poll() to wait for the device to use multiple elements, rather than polling for a single element. Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <950b3bfcfc5d446168b9d6a249d554a013a691d4.1693287885.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-shadow-virtqueue.c | 36 ++++++++++++++++++------------ hw/virtio/vhost-shadow-virtqueue.h | 2 +- net/vhost-vdpa.c | 2 +- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 49e5aed931..e731b1d2ea 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -514,29 +514,37 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, } /** - * Poll the SVQ for one device used buffer. + * Poll the SVQ to wait for the device to use the specified number + * of elements and return the total length written by the device. * * This function race with main event loop SVQ polling, so extra * synchronization is needed. * - * Return the length written by the device. + * @svq: The svq + * @num: The number of elements that need to be used */ -size_t vhost_svq_poll(VhostShadowVirtqueue *svq) +size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num) { - int64_t start_us = g_get_monotonic_time(); - uint32_t len = 0; + size_t len = 0; + uint32_t r; - do { - if (vhost_svq_more_used(svq)) { - break; - } + while (num--) { + int64_t start_us = g_get_monotonic_time(); - if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { - return 0; - } - } while (true); + do { + if (vhost_svq_more_used(svq)) { + break; + } + + if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { + return len; + } + } while (true); + + vhost_svq_get_buf(svq, &r); + len += r; + } - vhost_svq_get_buf(svq, &len); return len; } diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index 6efe051a70..5bce67837b 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -119,7 +119,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, size_t out_num, const struct iovec *in_sg, size_t in_num, VirtQueueElement *elem); -size_t vhost_svq_poll(VhostShadowVirtqueue *svq); +size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num); void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 90beda42e0..5808d1b60c 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -645,7 +645,7 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, * descriptor. Also, we need to take the answer before SVQ pulls by itself, * when BQL is released */ - return vhost_svq_poll(svq); + return vhost_svq_poll(svq, 1); } static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, From b532c684e0d71bc69fa56a30f1c7588101aa086a Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Tue, 26 Sep 2023 18:41:05 -0400 Subject: [PATCH 21/53] qmp: remove virtio_list, search QOM tree instead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The virtio_list duplicates information about virtio devices that already exist in the QOM composition tree. Instead of creating this list of realized virtio devices, search the QOM composition tree instead. This patch modifies the QMP command qmp_x_query_virtio to instead recursively search the QOM composition tree for devices of type 'TYPE_VIRTIO_DEVICE'. The device is also checked to ensure it's realized. Signed-off-by: Jonah Palmer Reviewed-by: Daniel P. Berrangé Message-Id: <20230926224107.2951144-2-jonah.palmer@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-qmp.c | 85 ++++++++++++++---------------------------- hw/virtio/virtio-qmp.h | 7 ---- hw/virtio/virtio.c | 6 --- 3 files changed, 29 insertions(+), 69 deletions(-) diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index 7515b0947b..adebf87e9b 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -667,70 +667,43 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) return features; } +static int query_dev_child(Object *child, void *opaque) +{ + VirtioInfoList **vdevs = opaque; + Object *dev = object_dynamic_cast(child, TYPE_VIRTIO_DEVICE); + if (dev != NULL && DEVICE(dev)->realized) { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtioInfo *info = g_new(VirtioInfo, 1); + + /* Get canonical path & name of device */ + info->path = object_get_canonical_path(dev); + info->name = g_strdup(vdev->name); + QAPI_LIST_PREPEND(*vdevs, info); + } + return 0; +} + VirtioInfoList *qmp_x_query_virtio(Error **errp) { - VirtioInfoList *list = NULL; - VirtioInfo *node; - VirtIODevice *vdev; + VirtioInfoList *vdevs = NULL; - QTAILQ_FOREACH(vdev, &virtio_list, next) { - DeviceState *dev = DEVICE(vdev); - Error *err = NULL; - QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err); - - if (err == NULL) { - GString *is_realized = qobject_to_json_pretty(obj, true); - /* virtio device is NOT realized, remove it from list */ - if (!strncmp(is_realized->str, "false", 4)) { - QTAILQ_REMOVE(&virtio_list, vdev, next); - } else { - node = g_new(VirtioInfo, 1); - node->path = g_strdup(dev->canonical_path); - node->name = g_strdup(vdev->name); - QAPI_LIST_PREPEND(list, node); - } - g_string_free(is_realized, true); - } - qobject_unref(obj); + /* Query the QOM composition tree recursively for virtio devices */ + object_child_foreach_recursive(object_get_root(), query_dev_child, &vdevs); + if (vdevs == NULL) { + error_setg(errp, "No virtio devices found"); } - - return list; + return vdevs; } VirtIODevice *qmp_find_virtio_device(const char *path) { - VirtIODevice *vdev; - - QTAILQ_FOREACH(vdev, &virtio_list, next) { - DeviceState *dev = DEVICE(vdev); - - if (strcmp(dev->canonical_path, path) != 0) { - continue; - } - - Error *err = NULL; - QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err); - if (err == NULL) { - GString *is_realized = qobject_to_json_pretty(obj, true); - /* virtio device is NOT realized, remove it from list */ - if (!strncmp(is_realized->str, "false", 4)) { - g_string_free(is_realized, true); - qobject_unref(obj); - QTAILQ_REMOVE(&virtio_list, vdev, next); - return NULL; - } - g_string_free(is_realized, true); - } else { - /* virtio device doesn't exist in QOM tree */ - QTAILQ_REMOVE(&virtio_list, vdev, next); - qobject_unref(obj); - return NULL; - } - /* device exists in QOM tree & is realized */ - qobject_unref(obj); - return vdev; + /* Verify the canonical path is a realized virtio device */ + Object *dev = object_dynamic_cast(object_resolve_path(path, NULL), + TYPE_VIRTIO_DEVICE); + if (!dev || !DEVICE(dev)->realized) { + return NULL; } - return NULL; + return VIRTIO_DEVICE(dev); } VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) @@ -740,7 +713,7 @@ VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) vdev = qmp_find_virtio_device(path); if (vdev == NULL) { - error_setg(errp, "Path %s is not a VirtIODevice", path); + error_setg(errp, "Path %s is not a realized VirtIODevice", path); return NULL; } diff --git a/hw/virtio/virtio-qmp.h b/hw/virtio/virtio-qmp.h index 8af5f5e65a..245a446a56 100644 --- a/hw/virtio/virtio-qmp.h +++ b/hw/virtio/virtio-qmp.h @@ -15,13 +15,6 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" -#include "qemu/queue.h" - -typedef QTAILQ_HEAD(QmpVirtIODeviceList, VirtIODevice) QmpVirtIODeviceList; - -/* QAPI list of realized VirtIODevices */ -extern QmpVirtIODeviceList virtio_list; - VirtIODevice *qmp_find_virtio_device(const char *path); VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap); VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap); diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index d3a22e3d36..c727e9201b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -45,8 +45,6 @@ #include "standard-headers/linux/virtio_mem.h" #include "standard-headers/linux/virtio_vsock.h" -QmpVirtIODeviceList virtio_list; - /* * Maximum size of virtio device config space */ @@ -3659,7 +3657,6 @@ static void virtio_device_realize(DeviceState *dev, Error **errp) vdev->listener.commit = virtio_memory_listener_commit; vdev->listener.name = "virtio"; memory_listener_register(&vdev->listener, vdev->dma_as); - QTAILQ_INSERT_TAIL(&virtio_list, vdev, next); } static void virtio_device_unrealize(DeviceState *dev) @@ -3674,7 +3671,6 @@ static void virtio_device_unrealize(DeviceState *dev) vdc->unrealize(dev); } - QTAILQ_REMOVE(&virtio_list, vdev, next); g_free(vdev->bus_name); vdev->bus_name = NULL; } @@ -3848,8 +3844,6 @@ static void virtio_device_class_init(ObjectClass *klass, void *data) vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl; vdc->legacy_features |= VIRTIO_LEGACY_FEATURES; - - QTAILQ_INIT(&virtio_list); } bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev) From 58f81689789f63853d7585c5168f687f1633893a Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Tue, 26 Sep 2023 18:41:06 -0400 Subject: [PATCH 22/53] qmp: update virtio feature maps, vhost-user-gpio introspection Add new vhost-user protocol feature to vhost-user protocol feature map and enumeration: - VHOST_USER_PROTOCOL_F_STATUS Add new virtio device features for several virtio devices to their respective feature mappings: virtio-blk: - VIRTIO_BLK_F_SECURE_ERASE virtio-net: - VIRTIO_NET_F_NOTF_COAL - VIRTIO_NET_F_GUEST_USO4 - VIRTIO_NET_F_GUEST_USO6 - VIRTIO_NET_F_HOST_USO virtio/vhost-user-gpio: - VIRTIO_GPIO_F_IRQ - VHOST_USER_F_PROTOCOL_FEATURES Add support for introspection on vhost-user-gpio devices. Signed-off-by: Jonah Palmer Reviewed-by: Emmanouil Pitsidianakis Message-Id: <20230926224107.2951144-3-jonah.palmer@oracle.com> Acked-by: Viresh Kumar Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-gpio.c | 7 +++++++ hw/virtio/virtio-qmp.c | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index 3b013f2d0f..3d7fae3984 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -205,6 +205,12 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); } +static struct vhost_dev *vu_gpio_get_vhost(VirtIODevice *vdev) +{ + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + return &gpio->vhost_dev; +} + static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserGPIO *gpio) { virtio_delete_queue(gpio->command_vq); @@ -413,6 +419,7 @@ static void vu_gpio_class_init(ObjectClass *klass, void *data) vdc->get_config = vu_gpio_get_config; vdc->set_status = vu_gpio_set_status; vdc->guest_notifier_mask = vu_gpio_guest_notifier_mask; + vdc->get_vhost = vu_gpio_get_vhost; } static const TypeInfo vu_gpio_info = { diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index adebf87e9b..3431711db5 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -30,6 +30,7 @@ #include "standard-headers/linux/virtio_iommu.h" #include "standard-headers/linux/virtio_mem.h" #include "standard-headers/linux/virtio_vsock.h" +#include "standard-headers/linux/virtio_gpio.h" #include CONFIG_DEVICES @@ -53,6 +54,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, + VHOST_USER_PROTOCOL_F_STATUS = 16, VHOST_USER_PROTOCOL_F_MAX }; @@ -136,6 +138,9 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS, \ "VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS: Configuration for " "memory slots supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_STATUS, \ + "VHOST_USER_PROTOCOL_F_STATUS: Querying and notifying back-end " + "device status supported"), { -1, "" } }; @@ -178,6 +183,8 @@ static const qmp_virtio_feature_map_t virtio_blk_feature_map[] = { "VIRTIO_BLK_F_DISCARD: Discard command supported"), FEATURE_ENTRY(VIRTIO_BLK_F_WRITE_ZEROES, \ "VIRTIO_BLK_F_WRITE_ZEROES: Write zeroes command supported"), + FEATURE_ENTRY(VIRTIO_BLK_F_SECURE_ERASE, \ + "VIRTIO_BLK_F_SECURE_ERASE: Secure erase supported"), FEATURE_ENTRY(VIRTIO_BLK_F_ZONED, \ "VIRTIO_BLK_F_ZONED: Zoned block devices"), #ifndef VIRTIO_BLK_NO_LEGACY @@ -301,6 +308,14 @@ static const qmp_virtio_feature_map_t virtio_net_feature_map[] = { FEATURE_ENTRY(VIRTIO_NET_F_CTRL_MAC_ADDR, \ "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control " "channel"), + FEATURE_ENTRY(VIRTIO_NET_F_NOTF_COAL, \ + "VIRTIO_NET_F_NOTF_COAL: Device supports coalescing notifications"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_USO4, \ + "VIRTIO_NET_F_GUEST_USO4: Driver can receive USOv4"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_USO6, \ + "VIRTIO_NET_F_GUEST_USO4: Driver can receive USOv6"), + FEATURE_ENTRY(VIRTIO_NET_F_HOST_USO, \ + "VIRTIO_NET_F_HOST_USO: Device can receive USO"), FEATURE_ENTRY(VIRTIO_NET_F_HASH_REPORT, \ "VIRTIO_NET_F_HASH_REPORT: Hash reporting supported"), FEATURE_ENTRY(VIRTIO_NET_F_RSS, \ @@ -471,6 +486,18 @@ static const qmp_virtio_feature_map_t virtio_rng_feature_map[] = { }; #endif +/* virtio/vhost-gpio features mapping */ +#ifdef CONFIG_VHOST_USER_GPIO +static const qmp_virtio_feature_map_t virtio_gpio_feature_map[] = { + FEATURE_ENTRY(VIRTIO_GPIO_F_IRQ, \ + "VIRTIO_GPIO_F_IRQ: Device supports interrupts on GPIO lines"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; +#endif + #define CONVERT_FEATURES(type, map, is_status, bitmap) \ ({ \ type *list = NULL; \ @@ -627,6 +654,12 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) features->dev_features = CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap); break; +#endif +#ifdef CONFIG_VHOST_USER_GPIO + case VIRTIO_ID_GPIO: + features->dev_features = + CONVERT_FEATURES(strList, virtio_gpio_feature_map, 0, bitmap); + break; #endif /* No features */ case VIRTIO_ID_9P: @@ -653,7 +686,6 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) case VIRTIO_ID_DMABUF: case VIRTIO_ID_PARAM_SERV: case VIRTIO_ID_AUDIO_POLICY: - case VIRTIO_ID_GPIO: break; default: g_assert_not_reached(); From 3d123a8b411706423581db7d26a7bbe548360751 Mon Sep 17 00:00:00 2001 From: Jonah Palmer Date: Tue, 26 Sep 2023 18:41:07 -0400 Subject: [PATCH 23/53] vhost-user: move VhostUserProtocolFeature definition to header file Move the definition of VhostUserProtocolFeature to include/hw/virtio/vhost-user.h. Remove previous definitions in hw/scsi/vhost-user-scsi.c, hw/virtio/vhost-user.c, and hw/virtio/virtio-qmp.c. Previously there were 3 separate definitions of this over 3 different files. Now only 1 definition of this will be present for these 3 files. Signed-off-by: Jonah Palmer Reviewed-by: Emmanouil Pitsidianakis Message-Id: <20230926224107.2951144-4-jonah.palmer@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/scsi/vhost-user-scsi.c | 4 ---- hw/virtio/vhost-user.c | 21 --------------------- hw/virtio/virtio-qmp.c | 22 +--------------------- include/hw/virtio/vhost-user.h | 21 +++++++++++++++++++++ 4 files changed, 22 insertions(+), 46 deletions(-) diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index ee99b19e7a..df6b66cc1a 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -39,10 +39,6 @@ static const int user_feature_bits[] = { VHOST_INVALID_FEATURE_BIT }; -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, -}; - static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCSI *s = (VHostUserSCSI *)vdev; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 8dcf049d42..a096335921 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -56,27 +56,6 @@ */ #define VHOST_USER_MAX_CONFIG_SIZE 256 -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_MQ = 0, - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, - VHOST_USER_PROTOCOL_F_RARP = 2, - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, - VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, - VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, - VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, - VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, - VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, - VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, - VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, - /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ - VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, - VHOST_USER_PROTOCOL_F_STATUS = 16, - VHOST_USER_PROTOCOL_F_MAX -}; - #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) typedef enum VhostUserRequest { diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index 3431711db5..1dd96ed20f 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -17,6 +17,7 @@ #include "qapi/qapi-commands-qom.h" #include "qapi/qmp/qobject.h" #include "qapi/qmp/qjson.h" +#include "hw/virtio/vhost-user.h" #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/vhost_types.h" @@ -37,27 +38,6 @@ #define FEATURE_ENTRY(name, desc) (qmp_virtio_feature_map_t) \ { .virtio_bit = name, .feature_desc = desc } -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_MQ = 0, - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, - VHOST_USER_PROTOCOL_F_RARP = 2, - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, - VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, - VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, - VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, - VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, - VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, - VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, - VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, - VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, - VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, - VHOST_USER_PROTOCOL_F_STATUS = 16, - VHOST_USER_PROTOCOL_F_MAX -}; - /* Virtio transport features mapping */ static const qmp_virtio_feature_map_t virtio_transport_map[] = { /* Virtio device transport features */ diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 191216a74f..80e2b4a463 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -11,6 +11,27 @@ #include "chardev/char-fe.h" #include "hw/virtio/virtio.h" +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, + VHOST_USER_PROTOCOL_F_NET_MTU = 4, + VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, + VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, + VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, + VHOST_USER_PROTOCOL_F_CONFIG = 9, + VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, + VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, + VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, + VHOST_USER_PROTOCOL_F_STATUS = 16, + VHOST_USER_PROTOCOL_F_MAX +}; + /** * VhostUserHostNotifier - notifier information for one queue * @rcu: rcu_head for cleanup From 886e0a5f31bf3d40dd8d9199674a4bad64942fde Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 30 Aug 2023 20:59:43 +0100 Subject: [PATCH 24/53] hw/isa/ich9: Add comment on imperfect emulation of PIC vs. I/O APIC routing As noted in the comment, the PCI INTx lines are supposed to be routed to *both* the PIC and the I/O APIC. It's just that we don't cope with the concept of an IRQ being asserted to two *different* pins on the two irqchips. So we have this hack of routing to I/O APIC only if the PIRQ routing to the PIC is disabled. Which seems to work well enough, even when I try hard to break it with kexec. But should be explicitly documented and understood. Signed-off-by: David Woodhouse Message-Id: <112a09643b8191c4eae7d92fa247a861ab90a9ee.camel@infradead.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/lpc_ich9.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 9c47a2f6c7..bce487ac4e 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -304,6 +304,21 @@ static PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin) route.irq = -1; } } else { + /* + * Strictly speaking, this is wrong. The PIRQ should be routed + * to *both* the I/O APIC and the PIC, on different pins. The + * I/O APIC has a fixed mapping to IRQ16-23, while the PIC is + * routed according to the PIRQx_ROUT configuration. But QEMU + * doesn't (yet) cope with the concept of pin numbers differing + * between PIC and I/O APIC, and neither does the in-kernel KVM + * irqchip support. So we route to the I/O APIC *only* if the + * routing to the PIC is disabled in the PIRQx_ROUT settings. + * + * This seems to work even if we boot a Linux guest with 'noapic' + * to make it use the legacy PIC, and then kexec directly into a + * new kernel which uses the I/O APIC. The new kernel explicitly + * disables the PIRQ routing even though it doesn't need to care. + */ route.irq = ich9_pirq_to_gsi(pirq_pin); } From f4a06e5921ec93bbb8baeca59f662672077535c3 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 8 Sep 2023 10:42:27 +0200 Subject: [PATCH 25/53] hw/i386/acpi-build: Use pc_madt_cpu_entry() directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is x86-specific code, so there is no advantage in using pc_madt_cpu_entry() behind an architecture-agnostic interface. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230908084234.17642-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 3 +-- hw/i386/acpi-common.c | 5 ++--- hw/i386/acpi-common.h | 3 +-- hw/i386/acpi-microvm.c | 3 +-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 4d2d40bab5..2879e0d555 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2547,8 +2547,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); acpi_build_madt(tables_blob, tables->linker, x86ms, - ACPI_DEVICE_IF(x86ms->acpi_dev), x86ms->oem_id, - x86ms->oem_table_id); + x86ms->oem_id, x86ms->oem_table_id); #ifdef CONFIG_ACPI_ERST { diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c index 8a0932fe84..43dc23f7e0 100644 --- a/hw/i386/acpi-common.c +++ b/hw/i386/acpi-common.c @@ -94,14 +94,13 @@ build_xrupt_override(GArray *entry, uint8_t src, uint32_t gsi, uint16_t flags) * 5.2.8 Multiple APIC Description Table */ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, - X86MachineState *x86ms, AcpiDeviceIf *adev, + X86MachineState *x86ms, const char *oem_id, const char *oem_table_id) { int i; bool x2apic_mode = false; MachineClass *mc = MACHINE_GET_CLASS(x86ms); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms)); - AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(adev); AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = oem_id, .oem_table_id = oem_table_id }; @@ -111,7 +110,7 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ for (i = 0; i < apic_ids->len; i++) { - adevc->madt_cpu(i, apic_ids, table_data, false); + pc_madt_cpu_entry(i, apic_ids, table_data, false); if (apic_ids->cpus[i].arch_id > 254) { x2apic_mode = true; } diff --git a/hw/i386/acpi-common.h b/hw/i386/acpi-common.h index a68825acf5..b3c56ee014 100644 --- a/hw/i386/acpi-common.h +++ b/hw/i386/acpi-common.h @@ -1,7 +1,6 @@ #ifndef HW_I386_ACPI_COMMON_H #define HW_I386_ACPI_COMMON_H -#include "hw/acpi/acpi_dev_interface.h" #include "hw/acpi/bios-linker-loader.h" #include "hw/i386/x86.h" @@ -9,7 +8,7 @@ #define ACPI_BUILD_IOAPIC_ID 0x0 void acpi_build_madt(GArray *table_data, BIOSLinker *linker, - X86MachineState *x86ms, AcpiDeviceIf *adev, + X86MachineState *x86ms, const char *oem_id, const char *oem_table_id); #endif diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c index a075360d85..fec22d85c1 100644 --- a/hw/i386/acpi-microvm.c +++ b/hw/i386/acpi-microvm.c @@ -214,8 +214,7 @@ static void acpi_build_microvm(AcpiBuildTables *tables, acpi_add_table(table_offsets, tables_blob); acpi_build_madt(tables_blob, tables->linker, X86_MACHINE(machine), - ACPI_DEVICE_IF(x86ms->acpi_dev), x86ms->oem_id, - x86ms->oem_table_id); + x86ms->oem_id, x86ms->oem_table_id); #ifdef CONFIG_ACPI_ERST { From 9a4fedcf12ae388722fa5430df92d0f41e3ba9be Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 8 Sep 2023 10:42:28 +0200 Subject: [PATCH 26/53] hw/acpi/cpu: Have build_cpus_aml() take a build_madt_cpu_fn callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build_cpus_aml() is architecture independent but needs to create architecture- specific CPU AML. So far this was achieved by using a virtual method from TYPE_ACPI_DEVICE_IF. However, build_cpus_aml() would resolve this interface from global (!) state. This makes it quite incomprehensible where this interface comes from (TYPE_PIIX4_PM?, TYPE_ICH9_LPC_DEVICE?, TYPE_ACPI_GED_X86?) an can lead to crashes when the generic code is ported to new architectures. So far, build_cpus_aml() is only called in architecture-specific code -- and only in x86. We can therefore simply pass pc_madt_cpu_entry() as callback to build_cpus_aml(). This is the same callback that would be used through TYPE_ACPI_DEVICE_IF. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230908084234.17642-3-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 8 ++------ hw/i386/acpi-build.c | 4 ++-- include/hw/acpi/cpu.h | 6 +++++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 19c154d78f..65a3202d3f 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -338,7 +338,7 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_FW_EJECT_EVENT "CEJF" void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - hwaddr io_base, + build_madt_cpu_fn build_madt_cpu, hwaddr io_base, const char *res_root, const char *event_handler_method) { @@ -353,8 +353,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, MachineClass *mc = MACHINE_GET_CLASS(machine); const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine); char *cphp_res_path = g_strdup_printf("%s." CPUHP_RES_DEVICE, res_root); - Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL); - AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj); cpu_ctrl_dev = aml_device("%s", cphp_res_path); { @@ -664,9 +662,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(dev, method); /* build _MAT object */ - assert(adevc && adevc->madt_cpu); - adevc->madt_cpu(i, arch_ids, madt_buf, - true); /* set enabled flag */ + build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ aml_append(dev, aml_name_decl("_MAT", aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); g_array_free(madt_buf, true); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 2879e0d555..76581d51aa 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1549,8 +1549,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, .smi_path = pm->smi_on_cpuhp ? "\\_SB.PCI0.SMI0.SMIC" : NULL, .fw_unplugs_cpu = pm->smi_on_cpu_unplug, }; - build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base, - "\\_SB.PCI0", "\\_GPE._E02"); + build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, + pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02"); } if (pcms->memhp_io_base && nr_mem) { diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h index 999caaf510..bc901660fb 100644 --- a/include/hw/acpi/cpu.h +++ b/include/hw/acpi/cpu.h @@ -15,6 +15,7 @@ #include "hw/qdev-core.h" #include "hw/acpi/acpi.h" #include "hw/acpi/aml-build.h" +#include "hw/boards.h" #include "hw/hotplug.h" typedef struct AcpiCpuStatus { @@ -55,8 +56,11 @@ typedef struct CPUHotplugFeatures { const char *smi_path; } CPUHotplugFeatures; +typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, + GArray *entry, bool force_enabled); + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - hwaddr io_base, + build_madt_cpu_fn build_madt_cpu, hwaddr io_base, const char *res_root, const char *event_handler_method); From c461f3e3820f2a033e7eed08689060328b31dcbf Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 8 Sep 2023 10:42:29 +0200 Subject: [PATCH 27/53] hw/acpi/acpi_dev_interface: Remove now unused madt_cpu virtual method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This virtual method was always set to the x86-specific pc_madt_cpu_entry(), even in piix4 which is also used in MIPS. The previous changes use pc_madt_cpu_entry() otherwise, so madt_cpu can be dropped. Since pc_madt_cpu_entry() is now only used in x86-specific code, the stub in hw/acpi/acpi-x86-stub can be removed as well. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230908084234.17642-4-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/acpi-x86-stub.c | 6 ------ hw/acpi/piix4.c | 2 -- hw/i386/generic_event_device_x86.c | 9 --------- hw/isa/lpc_ich9.c | 1 - include/hw/acpi/acpi_dev_interface.h | 2 -- 5 files changed, 20 deletions(-) diff --git a/hw/acpi/acpi-x86-stub.c b/hw/acpi/acpi-x86-stub.c index d0d399d26b..9662a594ad 100644 --- a/hw/acpi/acpi-x86-stub.c +++ b/hw/acpi/acpi-x86-stub.c @@ -1,12 +1,6 @@ #include "qemu/osdep.h" -#include "hw/i386/pc.h" #include "hw/i386/acpi-build.h" -void pc_madt_cpu_entry(int uid, const CPUArchIdList *apic_ids, - GArray *entry, bool force_enabled) -{ -} - Object *acpi_get_i386_pci_host(void) { return NULL; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 63d2113b86..a7892c444c 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -20,7 +20,6 @@ */ #include "qemu/osdep.h" -#include "hw/i386/pc.h" #include "hw/irq.h" #include "hw/isa/apm.h" #include "hw/i2c/pm_smbus.h" @@ -654,7 +653,6 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) hc->is_hotpluggable_bus = piix4_is_hotpluggable_bus; adevc->ospm_status = piix4_ospm_status; adevc->send_event = piix4_send_gpe; - adevc->madt_cpu = pc_madt_cpu_entry; } static const TypeInfo piix4_pm_info = { diff --git a/hw/i386/generic_event_device_x86.c b/hw/i386/generic_event_device_x86.c index e26fb02a2e..8fc233e1f1 100644 --- a/hw/i386/generic_event_device_x86.c +++ b/hw/i386/generic_event_device_x86.c @@ -8,19 +8,10 @@ #include "qemu/osdep.h" #include "hw/acpi/generic_event_device.h" -#include "hw/i386/pc.h" - -static void acpi_ged_x86_class_init(ObjectClass *class, void *data) -{ - AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(class); - - adevc->madt_cpu = pc_madt_cpu_entry; -} static const TypeInfo acpi_ged_x86_info = { .name = TYPE_ACPI_GED_X86, .parent = TYPE_ACPI_GED, - .class_init = acpi_ged_x86_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { TYPE_ACPI_DEVICE_IF }, diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index bce487ac4e..3f59980aa0 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -891,7 +891,6 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) hc->is_hotpluggable_bus = ich9_pm_is_hotpluggable_bus; adevc->ospm_status = ich9_pm_ospm_status; adevc->send_event = ich9_send_gpe; - adevc->madt_cpu = pc_madt_cpu_entry; amldevc->build_dev_aml = build_ich9_isa_aml; } diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h index a1648220ff..ca92928124 100644 --- a/include/hw/acpi/acpi_dev_interface.h +++ b/include/hw/acpi/acpi_dev_interface.h @@ -52,7 +52,5 @@ struct AcpiDeviceIfClass { /* */ void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); - void (*madt_cpu)(int uid, const CPUArchIdList *apic_ids, GArray *entry, - bool force_enabled); }; #endif From 4f70dd5f6366ac04b0f67d026ee2e17eb35daa45 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 8 Sep 2023 10:42:30 +0200 Subject: [PATCH 28/53] hw/acpi/acpi_dev_interface: Remove now unused #include "hw/boards.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "hw/boards.h" is unused since the previous commit. Since its removal requires include fixes in various unrelated files to keep the code compiling it has been split in a dedicated commit. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230908084234.17642-5-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 1 + hw/acpi/hmat.c | 1 + hw/acpi/hmat.h | 3 ++- hw/acpi/memory_hotplug.c | 1 + include/hw/acpi/acpi_dev_interface.h | 1 - 5 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 65a3202d3f..011d2c6c2d 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "migration/vmstate.h" #include "hw/acpi/cpu.h" +#include "hw/core/cpu.h" #include "qapi/error.h" #include "qapi/qapi-events-acpi.h" #include "trace.h" diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c index 2d5e199ba9..3042d223c8 100644 --- a/hw/acpi/hmat.c +++ b/hw/acpi/hmat.c @@ -27,6 +27,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "sysemu/numa.h" +#include "hw/acpi/aml-build.h" #include "hw/acpi/hmat.h" /* diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h index b57f0e7e80..fd989cb661 100644 --- a/hw/acpi/hmat.h +++ b/hw/acpi/hmat.h @@ -27,7 +27,8 @@ #ifndef HMAT_H #define HMAT_H -#include "hw/acpi/aml-build.h" +#include "hw/acpi/bios-linker-loader.h" +#include "sysemu/numa.h" /* * ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure, diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index d926f4f77d..0b883df813 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "hw/acpi/memory_hotplug.h" #include "hw/mem/pc-dimm.h" +#include "hw/boards.h" #include "hw/qdev-core.h" #include "migration/vmstate.h" #include "trace.h" diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h index ca92928124..68d9d15f50 100644 --- a/include/hw/acpi/acpi_dev_interface.h +++ b/include/hw/acpi/acpi_dev_interface.h @@ -3,7 +3,6 @@ #include "qapi/qapi-types-acpi.h" #include "qom/object.h" -#include "hw/boards.h" #include "hw/qdev-core.h" /* These values are part of guest ABI, and can not be changed */ From c9c8ba69d5dbe5c1c6370e1f09ebd7531509d075 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 8 Sep 2023 10:42:31 +0200 Subject: [PATCH 29/53] hw/i386: Remove now redundant TYPE_ACPI_GED_X86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that TYPE_ACPI_GED_X86 doesn't assign AcpiDeviceIfClass::madt_cpu any more it is the same as TYPE_ACPI_GED. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230908084234.17642-6-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/generic_event_device_x86.c | 27 -------------------------- hw/i386/meson.build | 1 - hw/i386/microvm.c | 2 +- include/hw/acpi/generic_event_device.h | 2 -- 4 files changed, 1 insertion(+), 31 deletions(-) delete mode 100644 hw/i386/generic_event_device_x86.c diff --git a/hw/i386/generic_event_device_x86.c b/hw/i386/generic_event_device_x86.c deleted file mode 100644 index 8fc233e1f1..0000000000 --- a/hw/i386/generic_event_device_x86.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * x86 variant of the generic event device for hw reduced acpi - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2 or later, as published by the Free Software Foundation. - */ - -#include "qemu/osdep.h" -#include "hw/acpi/generic_event_device.h" - -static const TypeInfo acpi_ged_x86_info = { - .name = TYPE_ACPI_GED_X86, - .parent = TYPE_ACPI_GED, - .interfaces = (InterfaceInfo[]) { - { TYPE_HOTPLUG_HANDLER }, - { TYPE_ACPI_DEVICE_IF }, - { } - } -}; - -static void acpi_ged_x86_register_types(void) -{ - type_register_static(&acpi_ged_x86_info); -} - -type_init(acpi_ged_x86_register_types) diff --git a/hw/i386/meson.build b/hw/i386/meson.build index cfdbfdcbcb..ff879069c9 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -20,7 +20,6 @@ i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), if_false: files('sgx-stub.c')) i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) -i386_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device_x86.c')) i386_ss.add(when: 'CONFIG_PC', if_true: files( 'pc.c', 'pc_sysfw.c', diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 8deeb62774..b9c93039e2 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -204,7 +204,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) /* Optional and legacy devices */ if (x86_machine_is_acpi_enabled(x86ms)) { - DeviceState *dev = qdev_new(TYPE_ACPI_GED_X86); + DeviceState *dev = qdev_new(TYPE_ACPI_GED); qdev_prop_set_uint32(dev, "ged-event", ACPI_GED_PWR_DOWN_EVT); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, GED_MMIO_BASE); /* sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, GED_MMIO_BASE_MEMHP); */ diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h index d831bbd889..ba84ce0214 100644 --- a/include/hw/acpi/generic_event_device.h +++ b/include/hw/acpi/generic_event_device.h @@ -69,8 +69,6 @@ #define TYPE_ACPI_GED "acpi-ged" OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) -#define TYPE_ACPI_GED_X86 "acpi-ged-x86" - #define ACPI_GED_EVT_SEL_OFFSET 0x0 #define ACPI_GED_EVT_SEL_LEN 0x4 From 5cdb639d25f9951a90b6b7ba31d376d8ab132a61 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 8 Sep 2023 10:42:32 +0200 Subject: [PATCH 30/53] hw/i386/acpi-build: Determine SMI command port just once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SMI command port is currently hardcoded by means of the ACPI_PORT_SMI_CMD macro. This hardcoding is Intel specific and doesn't match VIA, for example. There is already the AcpiFadtData::smi_cmd attribute which is used when building the FADT. Let's also use it when building the DSDT which confines SMI command port determination to just one place. This allows it to become a property later, thus resolving the Intel assumption. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230908084234.17642-7-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 76581d51aa..863a939210 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1495,14 +1495,14 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(crs, aml_io( AML_DECODE16, - ACPI_PORT_SMI_CMD, - ACPI_PORT_SMI_CMD, + pm->fadt.smi_cmd, + pm->fadt.smi_cmd, 1, 2) ); aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(dev, aml_operation_region("SMIR", AML_SYSTEM_IO, - aml_int(ACPI_PORT_SMI_CMD), 2)); + aml_int(pm->fadt.smi_cmd), 2)); field = aml_field("SMIR", AML_BYTE_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("SMIC", 8)); From 7f558ea58bb60257b111abac0424dc601ff54875 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 8 Sep 2023 10:42:33 +0200 Subject: [PATCH 31/53] hw/acpi: Trace GPE access in all device models, not just PIIX4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230908084234.17642-8-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/core.c | 5 +++++ hw/acpi/piix4.c | 3 --- hw/acpi/trace-events | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 00b1e79a30..c561845a4a 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -32,6 +32,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "sysemu/runstate.h" +#include "trace.h" struct acpi_table_header { uint16_t _length; /* our length, not actual part of the hdr */ @@ -686,6 +687,8 @@ void acpi_gpe_ioport_writeb(ACPIREGS *ar, uint32_t addr, uint32_t val) { uint8_t *cur; + trace_acpi_gpe_ioport_writeb(addr, val); + cur = acpi_gpe_ioport_get_ptr(ar, addr); if (addr < ar->gpe.len / 2) { /* GPE_STS */ @@ -709,6 +712,8 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr) val = *cur; } + trace_acpi_gpe_ioport_readb(addr, val); + return val; } diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index a7892c444c..dd523d2e4c 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -42,7 +42,6 @@ #include "hw/acpi/acpi_dev_interface.h" #include "migration/vmstate.h" #include "hw/core/cpu.h" -#include "trace.h" #include "qom/object.h" #define GPE_BASE 0xafe0 @@ -517,7 +516,6 @@ static uint64_t gpe_readb(void *opaque, hwaddr addr, unsigned width) PIIX4PMState *s = opaque; uint32_t val = acpi_gpe_ioport_readb(&s->ar, addr); - trace_piix4_gpe_readb(addr, width, val); return val; } @@ -526,7 +524,6 @@ static void gpe_writeb(void *opaque, hwaddr addr, uint64_t val, { PIIX4PMState *s = opaque; - trace_piix4_gpe_writeb(addr, width, val); acpi_gpe_ioport_writeb(&s->ar, addr, val); acpi_update_sci(&s->ar, s->irq); } diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index 78e0a8670e..159937ddb9 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -17,6 +17,10 @@ mhp_acpi_clear_remove_evt(uint32_t slot) "slot[0x%"PRIx32"] clear remove event" mhp_acpi_pc_dimm_deleted(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm deleted" mhp_acpi_pc_dimm_delete_failed(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm delete failed" +# core.c +acpi_gpe_ioport_readb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " ==> 0x%" PRIx8 +acpi_gpe_ioport_writeb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " <== 0x%" PRIx8 + # cpu.c cpuhp_acpi_invalid_idx_selected(uint32_t idx) "0x%"PRIx32 cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"PRIx8 @@ -48,10 +52,6 @@ acpi_pci_sel_read(uint32_t val) "%" PRIu32 acpi_pci_ej_write(uint64_t addr, uint64_t data) "0x%" PRIx64 " <== %" PRIu64 acpi_pci_sel_write(uint64_t addr, uint64_t data) "0x%" PRIx64 " <== %" PRIu64 -# piix4.c -piix4_gpe_readb(uint64_t addr, unsigned width, uint64_t val) "addr: 0x%" PRIx64 " width: %d ==> 0x%" PRIx64 -piix4_gpe_writeb(uint64_t addr, unsigned width, uint64_t val) "addr: 0x%" PRIx64 " width: %d <== 0x%" PRIx64 - # tco.c tco_timer_reload(int ticks, int msec) "ticks=%d (%d ms)" tco_timer_expired(int timeouts_no, bool strap, bool no_reboot) "timeouts_no=%d no_reboot=%d/%d" From 40a6b8935d5862840c602f977564d2ebbea60ed6 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 8 Sep 2023 10:42:34 +0200 Subject: [PATCH 32/53] hw/acpi/core: Trace enable and status registers of GPE separately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bit positions of both registers are related. Tracing the registers independently results in the same offsets across these registers which eases debugging. Signed-off-by: Bernhard Beschow Acked-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230908084234.17642-9-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/core.c | 10 +++++++--- hw/acpi/trace-events | 6 ++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/hw/acpi/core.c b/hw/acpi/core.c index c561845a4a..ec5e127d17 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -687,13 +687,13 @@ void acpi_gpe_ioport_writeb(ACPIREGS *ar, uint32_t addr, uint32_t val) { uint8_t *cur; - trace_acpi_gpe_ioport_writeb(addr, val); - cur = acpi_gpe_ioport_get_ptr(ar, addr); if (addr < ar->gpe.len / 2) { + trace_acpi_gpe_sts_ioport_writeb(addr, val); /* GPE_STS */ *cur = (*cur) & ~val; } else if (addr < ar->gpe.len) { + trace_acpi_gpe_en_ioport_writeb(addr - (ar->gpe.len / 2), val); /* GPE_EN */ *cur = val; } else { @@ -712,7 +712,11 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr) val = *cur; } - trace_acpi_gpe_ioport_readb(addr, val); + if (addr < ar->gpe.len / 2) { + trace_acpi_gpe_sts_ioport_readb(addr, val); + } else { + trace_acpi_gpe_en_ioport_readb(addr - (ar->gpe.len / 2), val); + } return val; } diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index 159937ddb9..edc93e703c 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -18,8 +18,10 @@ mhp_acpi_pc_dimm_deleted(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm deleted" mhp_acpi_pc_dimm_delete_failed(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm delete failed" # core.c -acpi_gpe_ioport_readb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " ==> 0x%" PRIx8 -acpi_gpe_ioport_writeb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " <== 0x%" PRIx8 +acpi_gpe_en_ioport_readb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " ==> 0x%02" PRIx8 +acpi_gpe_en_ioport_writeb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " <== 0x%02" PRIx8 +acpi_gpe_sts_ioport_readb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " ==> 0x%02" PRIx8 +acpi_gpe_sts_ioport_writeb(uint32_t addr, uint8_t val) "addr: 0x%" PRIx32 " <== 0x%02" PRIx8 # cpu.c cpuhp_acpi_invalid_idx_selected(uint32_t idx) "0x%"PRIx32 From e77db790d1bdef9370d23a0a9350c084ce45d91d Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 11 Sep 2023 17:54:35 -0400 Subject: [PATCH 33/53] vdpa: fix gcc cvq_isolated uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc 13.2.1 emits the following warning: net/vhost-vdpa.c: In function ‘net_vhost_vdpa_init.constprop’: net/vhost-vdpa.c:1394:25: error: ‘cvq_isolated’ may be used uninitialized [-Werror=maybe-uninitialized] 1394 | s->cvq_isolated = cvq_isolated; | ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~ net/vhost-vdpa.c:1355:9: note: ‘cvq_isolated’ was declared here 1355 | int cvq_isolated; | ^~~~~~~~~~~~ cc1: all warnings being treated as errors Cc: Eugenio Pérez Cc: Michael S. Tsirkin Cc: Jason Wang Signed-off-by: Stefan Hajnoczi Message-Id: <20230911215435.4156314-1-stefanha@redhat.com> Acked-by: Eugenio Pérez Acked-by: Jason Wang Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 5808d1b60c..94635fcbee 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -1425,7 +1425,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, VhostVDPAState *s; int ret = 0; assert(name); - int cvq_isolated; + int cvq_isolated = 0; if (is_datapath) { nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, From 0a7a164bc37b4ecbf74466e1e5243d72a768ad06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Wed, 13 Sep 2023 14:34:08 +0200 Subject: [PATCH 34/53] vdpa net: zero vhost_vdpa iova_tree pointer at cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not zeroing it causes a SIGSEGV if the live migration is cancelled, at net device restart. This is caused because CVQ tries to reuse the iova_tree that is present in the first vhost_vdpa device at the end of vhost_vdpa_net_cvq_start. As a consequence, it tries to access an iova_tree that has been already free. Fixes: 00ef422e9fbf ("vdpa net: move iova tree creation from init to start") Reported-by: Yanhui Ma Signed-off-by: Eugenio Pérez Message-Id: <20230913123408.2819185-1-eperezma@redhat.com> Acked-by: Jason Wang Tested-by: Lei Yang Reviewed-by: Si-Wei Liu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 94635fcbee..fe519d908d 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -405,6 +405,8 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc) dev = s->vhost_vdpa.dev; if (dev->vq_index + dev->nvqs == dev->vq_index_end) { g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); + } else { + s->vhost_vdpa.iova_tree = NULL; } } From f5a4e1a697e98c7bd0a663d53a378d8c6918ed72 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 13 Sep 2023 14:25:20 +0100 Subject: [PATCH 35/53] hw/cxl: Push cxl_decoder_count_enc() and cxl_decode_ig() into .c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no strong justification for keeping these in the header so push them down into the associated cxl-component-utils.c file. Suggested-by: Philippe Mathieu-Daudé Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20230913132523.29780-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 18 ++++++++++++++++++ include/hw/cxl/cxl_component.h | 18 ++---------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 378f1082ce..ea2d4770ec 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -13,6 +13,24 @@ #include "hw/pci/pci.h" #include "hw/cxl/cxl.h" +int cxl_decoder_count_enc(int count) +{ + switch (count) { + case 1: return 0; + case 2: return 1; + case 4: return 2; + case 6: return 3; + case 8: return 4; + case 10: return 5; + } + return 0; +} + +hwaddr cxl_decode_ig(int ig) +{ + return 1ULL << (ig + 8); +} + static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset, unsigned size) { diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 42c7e581a7..bdb3881a6b 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -225,26 +225,12 @@ void cxl_component_create_dvsec(CXLComponentState *cxl_cstate, enum reg_type cxl_dev_type, uint16_t length, uint16_t type, uint8_t rev, uint8_t *body); -static inline int cxl_decoder_count_enc(int count) -{ - switch (count) { - case 1: return 0; - case 2: return 1; - case 4: return 2; - case 6: return 3; - case 8: return 4; - case 10: return 5; - } - return 0; -} +int cxl_decoder_count_enc(int count); uint8_t cxl_interleave_ways_enc(int iw, Error **errp); uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp); -static inline hwaddr cxl_decode_ig(int ig) -{ - return 1ULL << (ig + 8); -} +hwaddr cxl_decode_ig(int ig); CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb); bool cxl_get_hb_passthrough(PCIHostState *hb); From 87de174ac49acaa37264e38129596c9819e4a2c5 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 13 Sep 2023 14:25:21 +0100 Subject: [PATCH 36/53] hw/cxl: Add utility functions decoder interleave ways and target count. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As an encoded version of these key configuration parameters is available in a register, provide functions to extract it again so as to avoid the need for duplicating the storage. Whilst here update the _enc() function to include additional values as defined in the CXL 3.0 specification. Whilst they are not currently used in the emulation, they may be in future and it is easier to compare with the specification if all values are covered. Add a spec reference for cxl_interleave_ways_enc() for consistency with the target count equivalent (and because it's nice to know where the magic numbers come from). Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20230913132523.29780-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 60 ++++++++++++++++++++++++++++++---- include/hw/cxl/cxl_component.h | 2 ++ 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index ea2d4770ec..5f38f2016f 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -13,15 +13,45 @@ #include "hw/pci/pci.h" #include "hw/cxl/cxl.h" +/* CXL r3.0 Section 8.2.4.19.1 CXL HDM Decoder Capability Register */ int cxl_decoder_count_enc(int count) { switch (count) { - case 1: return 0; - case 2: return 1; - case 4: return 2; - case 6: return 3; - case 8: return 4; - case 10: return 5; + case 1: return 0x0; + case 2: return 0x1; + case 4: return 0x2; + case 6: return 0x3; + case 8: return 0x4; + case 10: return 0x5; + /* Switches and Host Bridges may have more than 10 decoders */ + case 12: return 0x6; + case 14: return 0x7; + case 16: return 0x8; + case 20: return 0x9; + case 24: return 0xa; + case 28: return 0xb; + case 32: return 0xc; + } + return 0; +} + +int cxl_decoder_count_dec(int enc_cnt) +{ + switch (enc_cnt) { + case 0x0: return 1; + case 0x1: return 2; + case 0x2: return 4; + case 0x3: return 6; + case 0x4: return 8; + case 0x5: return 10; + /* Switches and Host Bridges may have more than 10 decoders */ + case 0x6: return 12; + case 0x7: return 14; + case 0x8: return 16; + case 0x9: return 20; + case 0xa: return 24; + case 0xb: return 28; + case 0xc: return 32; } return 0; } @@ -393,6 +423,7 @@ void cxl_component_create_dvsec(CXLComponentState *cxl, cxl->dvsec_offset += length; } +/* CXL r3.0 Section 8.2.4.19.7 CXL HDM Decoder n Control Register */ uint8_t cxl_interleave_ways_enc(int iw, Error **errp) { switch (iw) { @@ -410,6 +441,23 @@ uint8_t cxl_interleave_ways_enc(int iw, Error **errp) } } +int cxl_interleave_ways_dec(uint8_t iw_enc, Error **errp) +{ + switch (iw_enc) { + case 0x0: return 1; + case 0x1: return 2; + case 0x2: return 4; + case 0x3: return 8; + case 0x4: return 16; + case 0x8: return 3; + case 0x9: return 6; + case 0xa: return 12; + default: + error_setg(errp, "Encoded interleave ways: %d not supported", iw_enc); + return 0; + } +} + uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp) { switch (gran) { diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index bdb3881a6b..ef9e033919 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -226,8 +226,10 @@ void cxl_component_create_dvsec(CXLComponentState *cxl_cstate, uint16_t type, uint8_t rev, uint8_t *body); int cxl_decoder_count_enc(int count); +int cxl_decoder_count_dec(int enc_cnt); uint8_t cxl_interleave_ways_enc(int iw, Error **errp); +int cxl_interleave_ways_dec(uint8_t iw_enc, Error **errp); uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp); hwaddr cxl_decode_ig(int ig); From 61c44bcf510f4db51c28d0288e528cfdf0ebabc3 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 13 Sep 2023 14:25:22 +0100 Subject: [PATCH 37/53] hw/cxl: Fix and use same calculation for HDM decoder block size everywhere MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to avoid having the size of the per HDM decoder register block repeated in lots of places, create the register definitions for HDM decoder 1 and use the offset between the first registers in HDM decoder 0 and HDM decoder 1 to establish the offset. Calculate in each function as this is more obvious and leads to shorter line lengths than a single #define which would need a long name to be specific enough. Note that the code currently only supports one decoder, so the bugs this fixes don't actually affect anything. Signed-off-by: Jonathan Cameron Reviewed-by: Fan Ni Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230913132523.29780-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 19 +++++++++++-------- hw/cxl/cxl-host.c | 4 +++- hw/mem/cxl_type3.c | 24 +++++++++++++++--------- include/hw/cxl/cxl_component.h | 2 ++ 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 5f38f2016f..c0630ba5c1 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -210,6 +210,7 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, enum reg_type type) { int decoder_count = 1; + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; int i; ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT, @@ -222,19 +223,21 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, HDM_DECODER_ENABLE, 0); write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3; for (i = 0; i < decoder_count; i++) { - write_msk[R_CXL_HDM_DECODER0_BASE_LO + i * 0x20] = 0xf0000000; - write_msk[R_CXL_HDM_DECODER0_BASE_HI + i * 0x20] = 0xffffffff; - write_msk[R_CXL_HDM_DECODER0_SIZE_LO + i * 0x20] = 0xf0000000; - write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * 0x20] = 0xffffffff; - write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff; + write_msk[R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc] = 0x13ff; if (type == CXL2_DEVICE || type == CXL2_TYPE3_DEVICE || type == CXL2_LOGICAL_DEVICE) { - write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * 0x20] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] = + 0xf0000000; } else { - write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * 0x20] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] = + 0xffffffff; } - write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_HI + i * 0x20] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_HI + i * hdm_inc] = 0xffffffff; } } diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index f0920da956..73c5426476 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -101,12 +101,14 @@ void cxl_fmws_link_targets(CXLState *cxl_state, Error **errp) static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, uint8_t *target) { + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; uint32_t ctrl; uint32_t ig_enc; uint32_t iw_enc; uint32_t target_idx; + int i = 0; - ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; + ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc]; if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { return false; } diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 4cdcb3f7e7..9f3022189b 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -388,34 +388,36 @@ static void build_dvsecs(CXLType3Dev *ct3d) static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) { + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; uint32_t *cache_mem = cregs->cache_mem_registers; uint32_t ctrl; assert(which == 0); - ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL); + ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc); /* TODO: Sanity checks that the decoder is possible */ ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); - stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL, ctrl); + stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl); } static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which) { + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; uint32_t *cache_mem = cregs->cache_mem_registers; uint32_t ctrl; assert(which == 0); - ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL); + ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0); - stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL, ctrl); + stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl); } static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) @@ -772,26 +774,30 @@ static void ct3_exit(PCIDevice *pci_dev) /* TODO: Support multiple HDM decoders and DPA skip */ static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) { + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; uint64_t decoder_base, decoder_size, hpa_offset; uint32_t hdm0_ctrl; int ig, iw; + int i = 0; - decoder_base = (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI] << 32) | - cache_mem[R_CXL_HDM_DECODER0_BASE_LO]); + decoder_base = + (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc] << 32) | + cache_mem[R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc]); if ((uint64_t)host_addr < decoder_base) { return false; } hpa_offset = (uint64_t)host_addr - decoder_base; - decoder_size = ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI] << 32) | - cache_mem[R_CXL_HDM_DECODER0_SIZE_LO]; + decoder_size = + ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc] << 32) | + cache_mem[R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc]; if (hpa_offset >= decoder_size) { return false; } - hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; + hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc]; iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW); ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG); diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index ef9e033919..7c864d2044 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -148,6 +148,8 @@ REG32(CXL_HDM_DECODER_GLOBAL_CONTROL, CXL_HDM_REGISTERS_OFFSET + 4) FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 1, 1) HDM_DECODER_INIT(0); +/* Only used for HDM decoder registers block address increment */ +HDM_DECODER_INIT(1); /* 8.2.5.13 - CXL Extended Security Capability Structure (Root complex only) */ #define EXTSEC_ENTRY_MAX 256 From e967413fe0f2f3fe022658bb279aef95d24210ec Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 13 Sep 2023 14:25:23 +0100 Subject: [PATCH 38/53] hw/cxl: Support 4 HDM decoders at all levels of topology Support these decoders in CXL host bridges (pxb-cxl), CXL Switch USP and CXL Type 3 end points. Signed-off-by: Jonathan Cameron Message-Id: <20230913132523.29780-5-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 7 ++- hw/cxl/cxl-host.c | 67 +++++++++++++++-------- hw/mem/cxl_type3.c | 98 +++++++++++++++++++++++----------- include/hw/cxl/cxl_component.h | 10 +++- 4 files changed, 125 insertions(+), 57 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index c0630ba5c1..f3bbf0fd13 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -90,6 +90,9 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset, switch (offset) { case A_CXL_HDM_DECODER0_CTRL: + case A_CXL_HDM_DECODER1_CTRL: + case A_CXL_HDM_DECODER2_CTRL: + case A_CXL_HDM_DECODER3_CTRL: should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); should_uncommit = !should_commit; break; @@ -129,7 +132,7 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value, } if (offset >= A_CXL_HDM_DECODER_CAPABILITY && - offset <= A_CXL_HDM_DECODER0_TARGET_LIST_HI) { + offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) { dumb_hdm_handler(cxl_cstate, offset, value); } else { cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)] = value; @@ -209,7 +212,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk) static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, enum reg_type type) { - int decoder_count = 1; + int decoder_count = CXL_HDM_DECODER_COUNT; int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; int i; diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index 73c5426476..2aa776c79c 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -97,35 +97,58 @@ void cxl_fmws_link_targets(CXLState *cxl_state, Error **errp) } } -/* TODO: support, multiple hdm decoders */ static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, uint8_t *target) { int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; - uint32_t ctrl; - uint32_t ig_enc; - uint32_t iw_enc; - uint32_t target_idx; - int i = 0; + unsigned int hdm_count; + bool found = false; + int i; + uint32_t cap; - ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc]; - if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { - return false; + cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY); + hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap, + CXL_HDM_DECODER_CAPABILITY, + DECODER_COUNT)); + for (i = 0; i < hdm_count; i++) { + uint32_t ctrl, ig_enc, iw_enc, target_idx; + uint32_t low, high; + uint64_t base, size; + + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc); + base = (low & 0xf0000000) | ((uint64_t)high << 32); + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc); + size = (low & 0xf0000000) | ((uint64_t)high << 32); + if (addr < base || addr >= base + size) { + continue; + } + + ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc); + if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { + return false; + } + found = true; + ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG); + iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW); + target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc); + + if (target_idx < 4) { + uint32_t val = ldl_le_p(cache_mem + + R_CXL_HDM_DECODER0_TARGET_LIST_LO + + i * hdm_inc); + *target = extract32(val, target_idx * 8, 8); + } else { + uint32_t val = ldl_le_p(cache_mem + + R_CXL_HDM_DECODER0_TARGET_LIST_HI + + i * hdm_inc); + *target = extract32(val, (target_idx - 4) * 8, 8); + } + break; } - ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG); - iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW); - target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc); - - if (target_idx < 4) { - *target = extract32(cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO], - target_idx * 8, 8); - } else { - *target = extract32(cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_HI], - (target_idx - 4) * 8, 8); - } - - return true; + return found; } static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 9f3022189b..c02be4ce45 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -393,8 +393,6 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) uint32_t *cache_mem = cregs->cache_mem_registers; uint32_t ctrl; - assert(which == 0); - ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc); /* TODO: Sanity checks that the decoder is possible */ ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); @@ -410,8 +408,6 @@ static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which) uint32_t *cache_mem = cregs->cache_mem_registers; uint32_t ctrl; - assert(which == 0); - ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); @@ -500,6 +496,21 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, should_uncommit = !should_commit; which_hdm = 0; break; + case A_CXL_HDM_DECODER1_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + should_uncommit = !should_commit; + which_hdm = 1; + break; + case A_CXL_HDM_DECODER2_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + should_uncommit = !should_commit; + which_hdm = 2; + break; + case A_CXL_HDM_DECODER3_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + should_uncommit = !should_commit; + which_hdm = 3; + break; case A_CXL_RAS_UNC_ERR_STATUS: { uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL); @@ -771,40 +782,63 @@ static void ct3_exit(PCIDevice *pci_dev) } } -/* TODO: Support multiple HDM decoders and DPA skip */ static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) { int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; - uint64_t decoder_base, decoder_size, hpa_offset; - uint32_t hdm0_ctrl; - int ig, iw; - int i = 0; + unsigned int hdm_count; + uint32_t cap; + uint64_t dpa_base = 0; + int i; - decoder_base = - (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc] << 32) | - cache_mem[R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc]); - if ((uint64_t)host_addr < decoder_base) { - return false; + cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY); + hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap, + CXL_HDM_DECODER_CAPABILITY, + DECODER_COUNT)); + + for (i = 0; i < hdm_count; i++) { + uint64_t decoder_base, decoder_size, hpa_offset, skip; + uint32_t hdm_ctrl, low, high; + int ig, iw; + + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc); + decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000); + + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc); + decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000); + + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO + + i * hdm_inc); + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI + + i * hdm_inc); + skip = ((uint64_t)high << 32) | (low & 0xf0000000); + dpa_base += skip; + + hpa_offset = (uint64_t)host_addr - decoder_base; + + hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc); + iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW); + ig = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IG); + if (!FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { + return false; + } + if (((uint64_t)host_addr < decoder_base) || + (hpa_offset >= decoder_size)) { + dpa_base += decoder_size / + cxl_interleave_ways_dec(iw, &error_fatal); + continue; + } + + *dpa = dpa_base + + ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) | + ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) + >> iw)); + + return true; } - - hpa_offset = (uint64_t)host_addr - decoder_base; - - decoder_size = - ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc] << 32) | - cache_mem[R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc]; - if (hpa_offset >= decoder_size) { - return false; - } - - hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc]; - iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW); - ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG); - - *dpa = (MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) | - ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) >> iw); - - return true; + return false; } static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d, diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 7c864d2044..3c795a6278 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -135,6 +135,10 @@ REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18) REG32(CXL_HDM_DECODER##n##_TARGET_LIST_LO, \ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24) \ REG32(CXL_HDM_DECODER##n##_TARGET_LIST_HI, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28) \ + REG32(CXL_HDM_DECODER##n##_DPA_SKIP_LO, \ + CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24) \ + REG32(CXL_HDM_DECODER##n##_DPA_SKIP_HI, \ CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28) REG32(CXL_HDM_DECODER_CAPABILITY, CXL_HDM_REGISTERS_OFFSET) @@ -147,9 +151,13 @@ REG32(CXL_HDM_DECODER_GLOBAL_CONTROL, CXL_HDM_REGISTERS_OFFSET + 4) FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, POISON_ON_ERR_EN, 0, 1) FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 1, 1) +/* Support 4 decoders at all levels of topology */ +#define CXL_HDM_DECODER_COUNT 4 + HDM_DECODER_INIT(0); -/* Only used for HDM decoder registers block address increment */ HDM_DECODER_INIT(1); +HDM_DECODER_INIT(2); +HDM_DECODER_INIT(3); /* 8.2.5.13 - CXL Extended Security Capability Structure (Root complex only) */ #define EXTSEC_ENTRY_MAX 256 From 2c9ec2a827f5d36e9cf3c55d931cc0dca2f12092 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 13 Sep 2023 14:36:15 +0100 Subject: [PATCH 39/53] hw/pci-bridge/cxl-upstream: Add serial number extended capability support Will be needed so there is a defined serial number for information queries via the Switch CCI. Signed-off-by: Jonathan Cameron Message-Id: <20230913133615.29876-1-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/cxl_upstream.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c index 2b9cf0cc97..a57806fb31 100644 --- a/hw/pci-bridge/cxl_upstream.c +++ b/hw/pci-bridge/cxl_upstream.c @@ -14,14 +14,21 @@ #include "hw/pci/msi.h" #include "hw/pci/pcie.h" #include "hw/pci/pcie_port.h" +/* + * Null value of all Fs suggested by IEEE RA guidelines for use of + * EU, OUI and CID + */ +#define UI64_NULL (~0ULL) #define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 2 #define CXL_UPSTREAM_PORT_MSI_OFFSET 0x70 #define CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET 0x90 #define CXL_UPSTREAM_PORT_AER_OFFSET 0x100 -#define CXL_UPSTREAM_PORT_DVSEC_OFFSET \ +#define CXL_UPSTREAM_PORT_SN_OFFSET \ (CXL_UPSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF) +#define CXL_UPSTREAM_PORT_DVSEC_OFFSET \ + (CXL_UPSTREAM_PORT_SN_OFFSET + PCI_EXT_CAP_DSN_SIZEOF) typedef struct CXLUpstreamPort { /*< private >*/ @@ -30,6 +37,7 @@ typedef struct CXLUpstreamPort { /*< public >*/ CXLComponentState cxl_cstate; DOECap doe_cdat; + uint64_t sn; } CXLUpstreamPort; CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp) @@ -326,7 +334,9 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp) if (rc) { goto err_cap; } - + if (usp->sn != UI64_NULL) { + pcie_dev_ser_num_init(d, CXL_UPSTREAM_PORT_SN_OFFSET, usp->sn); + } cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET; cxl_cstate->pdev = d; build_dvsecs(cxl_cstate); @@ -366,6 +376,7 @@ static void cxl_usp_exitfn(PCIDevice *d) } static Property cxl_upstream_props[] = { + DEFINE_PROP_UINT64("sn", CXLUpstreamPort, sn, UI64_NULL), DEFINE_PROP_STRING("cdat", CXLUpstreamPort, cxl_cstate.cdat.filename), DEFINE_PROP_END_OF_LIST() }; From cbc9ae87b5f6f81c52a249e0b64100d5011fca53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 15 Sep 2023 19:08:34 +0200 Subject: [PATCH 40/53] vdpa net: fix error message setting virtio status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It incorrectly prints "error setting features", probably because a copy paste miss. Fixes: 152128d646 ("vdpa: move CVQ isolation check to net_init_vhost_vdpa") Reported-by: Peter Maydell Signed-off-by: Eugenio Pérez Message-Id: <20230915170836.3078172-2-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index fe519d908d..650125bb0f 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -1368,7 +1368,7 @@ static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); if (unlikely(r)) { - error_setg_errno(errp, -r, "Cannot set device features"); + error_setg_errno(errp, -r, "Cannot set status"); goto out; } From f1085882d028e5a1b227443cd6e96bbb63d66f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 15 Sep 2023 19:08:35 +0200 Subject: [PATCH 41/53] vdpa net: stop probing if cannot set features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise it continues the CVQ isolation probing. Fixes: 152128d646 ("vdpa: move CVQ isolation check to net_init_vhost_vdpa") Reported-by: Peter Maydell Signed-off-by: Eugenio Pérez Message-Id: <20230915170836.3078172-3-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- net/vhost-vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 650125bb0f..b688877f90 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -1364,6 +1364,7 @@ static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, r = ioctl(device_fd, VHOST_SET_FEATURES, &features); if (unlikely(r)) { error_setg_errno(errp, errno, "Cannot set features"); + goto out; } r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); From 845ec38ae1578dd2d42ff15c9979f1bf44b23418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 15 Sep 2023 19:08:36 +0200 Subject: [PATCH 42/53] vdpa net: follow VirtIO initialization properly at cvq isolation probing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch solves a few issues. The most obvious is that the feature set was done previous to ACKNOWLEDGE | DRIVER status bit set. Current vdpa devices are permissive with this, but it is better to follow the standard. Fixes: 152128d646 ("vdpa: move CVQ isolation check to net_init_vhost_vdpa") Signed-off-by: Eugenio Pérez Message-Id: <20230915170836.3078172-4-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index b688877f90..939c984d5b 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -1345,8 +1345,7 @@ static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, uint64_t backend_features; int64_t cvq_group; uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | - VIRTIO_CONFIG_S_DRIVER | - VIRTIO_CONFIG_S_FEATURES_OK; + VIRTIO_CONFIG_S_DRIVER; int r; ERRP_GUARD(); @@ -1361,15 +1360,22 @@ static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, return 0; } - r = ioctl(device_fd, VHOST_SET_FEATURES, &features); + r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); if (unlikely(r)) { - error_setg_errno(errp, errno, "Cannot set features"); + error_setg_errno(errp, -r, "Cannot set device status"); goto out; } + r = ioctl(device_fd, VHOST_SET_FEATURES, &features); + if (unlikely(r)) { + error_setg_errno(errp, -r, "Cannot set features"); + goto out; + } + + status |= VIRTIO_CONFIG_S_FEATURES_OK; r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); if (unlikely(r)) { - error_setg_errno(errp, -r, "Cannot set status"); + error_setg_errno(errp, -r, "Cannot set device status"); goto out; } From 0114c4513095598cdf1cd8d7dacdfff757628121 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 21 Sep 2023 20:46:11 +0900 Subject: [PATCH 43/53] amd_iommu: Fix APIC address check An MSI from I/O APIC may not exactly equal to APIC_DEFAULT_ADDRESS. In fact, Windows 17763.3650 configures I/O APIC to set the dest_mode bit. Cover the range assigned to APIC. Fixes: 577c470f43 ("x86_iommu/amd: Prepare for interrupt remap support") Signed-off-by: Akihiko Odaki Message-Id: <20230921114612.40671-1-akihiko.odaki@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 9 ++------- hw/i386/amd_iommu.h | 2 -- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index c98a3c6e11..8d0f2f99dd 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1246,13 +1246,8 @@ static int amdvi_int_remap_msi(AMDVIState *iommu, return -AMDVI_IR_ERR; } - if (origin->address & AMDVI_MSI_ADDR_HI_MASK) { - trace_amdvi_err("MSI address high 32 bits non-zero when " - "Interrupt Remapping enabled."); - return -AMDVI_IR_ERR; - } - - if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) { + if (origin->address < AMDVI_INT_ADDR_FIRST || + origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) { trace_amdvi_err("MSI is not from IOAPIC."); return -AMDVI_IR_ERR; } diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 6da893ee57..c5065a3e27 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -210,8 +210,6 @@ #define AMDVI_INT_ADDR_FIRST 0xfee00000 #define AMDVI_INT_ADDR_LAST 0xfeefffff #define AMDVI_INT_ADDR_SIZE (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST + 1) -#define AMDVI_MSI_ADDR_HI_MASK (0xffffffff00000000ULL) -#define AMDVI_MSI_ADDR_LO_MASK (0x00000000ffffffffULL) /* SB IOAPIC is always on this device in AMD systems */ #define AMDVI_IOAPIC_SB_DEVID PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0)) From cf0386509ece089213226855ae685e2228315ffe Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Fri, 22 Sep 2023 21:34:13 +0530 Subject: [PATCH 44/53] hw/i386/pc: improve physical address space bound check for 32-bit x86 systems 32-bit x86 systems do not have a reserved memory for hole64. On those 32-bit systems without PSE36 or PAE CPU features, hotplugging memory devices are not supported by QEMU as QEMU always places hotplugged memory above 4 GiB boundary which is beyond the physical address space of the processor. Linux guests also does not support memory hotplug on those systems. Please see Linux kernel commit b59d02ed08690 ("mm/memory_hotplug: disable the functionality for 32b") for more details. Therefore, the maximum limit of the guest physical address in the absence of additional memory devices effectively coincides with the end of "above 4G memory space" region for 32-bit x86 without PAE/PSE36. When users configure additional memory devices, after properly accounting for the additional device memory region to find the maximum value of the guest physical address, the address will be outside the range of the processor's physical address space. This change adds improvements to take above into consideration. For example, previously this was allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G With this change now it is no longer allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G qemu-system-x86_64: Address space limit 0xffffffff < 0x2bfffffff phys-bits too low (32) However, the following are allowed since on both cases physical address space of the processor is 36 bits: $ ./qemu-system-x86_64 -cpu pentium2 -m size=10G $ ./qemu-system-x86_64 -cpu pentium,pse36=on -m size=10G For 32-bit, without PAE/PSE36, hotplugging additional memory is no longer allowed. $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) $ ./qemu-system-i386 -machine q35 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) A new compatibility flag is introduced to make sure pc_max_used_gpa() keeps returning the old value for machines 8.1 and older. Therefore, the above is still allowed for older machine types in order to support compatibility. Hence, the following still works: $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2 $ ./qemu-system-i386 -machine pc-q35-8.1 -m size=1G,maxmem=3G,slots=2 Further, following is also allowed as with PSE36, the processor has 36-bit address space: $ ./qemu-system-i386 -cpu 486,pse36=on -m size=1G,maxmem=3G,slots=2 After calling CPUID with EAX=0x80000001, all AMD64 compliant processors have the longmode-capable-bit turned on in the extended feature flags (bit 29) in EDX. The absence of CPUID longmode can be used to differentiate between 32-bit and 64-bit processors and is the recommended approach. QEMU takes this approach elsewhere (for example, please see x86_cpu_realizefn()), With this change, pc_max_used_gpa() also uses the same method to detect 32-bit processors. Unit tests are modified to not run 32-bit x86 tests that use memory hotplug. Suggested-by: David Hildenbrand Signed-off-by: Ani Sinha Reviewed-by: David Hildenbrand Message-Id: <20230922160413.165702-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 32 +++++++++++++++++++++++++++++--- hw/i386/pc_piix.c | 4 ++++ hw/i386/pc_q35.c | 2 ++ include/hw/i386/pc.h | 6 ++++++ tests/qtest/bios-tables-test.c | 26 ++++++++++++++++++-------- tests/qtest/numa-test.c | 7 ++++++- 6 files changed, 65 insertions(+), 12 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 3db0743f31..a532d42cf4 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -907,13 +907,39 @@ static uint64_t pc_get_cxl_range_end(PCMachineState *pcms) static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size) { X86CPU *cpu = X86_CPU(first_cpu); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + MachineState *ms = MACHINE(pcms); - /* 32-bit systems don't have hole64 thus return max CPU address */ - if (cpu->phys_bits <= 32) { + if (cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + /* 64-bit systems */ + return pc_pci_hole64_start() + pci_hole64_size - 1; + } + + /* 32-bit systems */ + if (pcmc->broken_32bit_mem_addr_check) { + /* old value for compatibility reasons */ return ((hwaddr)1 << cpu->phys_bits) - 1; } - return pc_pci_hole64_start() + pci_hole64_size - 1; + /* + * 32-bit systems don't have hole64 but they might have a region for + * memory devices. Even if additional hotplugged memory devices might + * not be usable by most guest OSes, we need to still consider them for + * calculating the highest possible GPA so that we can properly report + * if someone configures them on a CPU that cannot possibly address them. + */ + if (pcmc->has_reserved_memory && + (ms->ram_size < ms->maxram_size)) { + hwaddr devmem_start; + ram_addr_t devmem_size; + + pc_get_device_memory_range(pcms, &devmem_start, &devmem_size); + devmem_start += devmem_size; + return devmem_start - 1; + } + + /* configuration without any memory hotplug */ + return pc_above_4g_end(pcms) - 1; } /* diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8321f36f97..71003759bb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -517,9 +517,13 @@ DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2", NULL, static void pc_i440fx_8_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_i440fx_8_2_machine_options(m); m->alias = NULL; m->is_default = false; + pcmc->broken_32bit_mem_addr_check = true; + compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len); compat_props_add(m->compat_props, pc_compat_8_1, pc_compat_8_1_len); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 2dd1158b70..a7386f2ca2 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -394,8 +394,10 @@ DEFINE_Q35_MACHINE(v8_2, "pc-q35-8.2", NULL, static void pc_q35_8_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_8_2_machine_options(m); m->alias = NULL; + pcmc->broken_32bit_mem_addr_check = true; compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len); compat_props_add(m->compat_props, pc_compat_8_1, pc_compat_8_1_len); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 0fabece236..bec38cb92c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -129,6 +129,12 @@ struct PCMachineClass { /* resizable acpi blob compat */ bool resizable_acpi_blob; + + /* + * whether the machine type implements broken 32-bit address space bound + * check for memory. + */ + bool broken_32bit_mem_addr_check; }; #define TYPE_PC_MACHINE "generic-pc-machine" diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index d1b80149f2..f8e03dfd46 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -2080,7 +2080,6 @@ int main(int argc, char *argv[]) test_acpi_piix4_no_acpi_pci_hotplug); qtest_add_func("acpi/piix4/ipmi", test_acpi_piix4_tcg_ipmi); qtest_add_func("acpi/piix4/cpuhp", test_acpi_piix4_tcg_cphp); - qtest_add_func("acpi/piix4/memhp", test_acpi_piix4_tcg_memhp); qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem); qtest_add_func("acpi/piix4/nosmm", test_acpi_piix4_tcg_nosmm); qtest_add_func("acpi/piix4/smm-compat", @@ -2088,9 +2087,15 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/piix4/smm-compat-nosmm", test_acpi_piix4_tcg_smm_compat_nosmm); qtest_add_func("acpi/piix4/nohpet", test_acpi_piix4_tcg_nohpet); - qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm); - qtest_add_func("acpi/piix4/acpihmat", - test_acpi_piix4_tcg_acpi_hmat); + + /* i386 does not support memory hotplug */ + if (strcmp(arch, "i386")) { + qtest_add_func("acpi/piix4/memhp", test_acpi_piix4_tcg_memhp); + qtest_add_func("acpi/piix4/dimmpxm", + test_acpi_piix4_tcg_dimm_pxm); + qtest_add_func("acpi/piix4/acpihmat", + test_acpi_piix4_tcg_acpi_hmat); + } #ifdef CONFIG_POSIX qtest_add_func("acpi/piix4/acpierst", test_acpi_piix4_acpi_erst); #endif @@ -2108,11 +2113,9 @@ int main(int argc, char *argv[]) test_acpi_q35_tcg_no_acpi_hotplug); qtest_add_func("acpi/q35/multif-bridge", test_acpi_q35_multif_bridge); - qtest_add_func("acpi/q35/mmio64", test_acpi_q35_tcg_mmio64); qtest_add_func("acpi/q35/ipmi", test_acpi_q35_tcg_ipmi); qtest_add_func("acpi/q35/smbus/ipmi", test_acpi_q35_tcg_smbus_ipmi); qtest_add_func("acpi/q35/cpuhp", test_acpi_q35_tcg_cphp); - qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp); qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem); qtest_add_func("acpi/q35/nosmm", test_acpi_q35_tcg_nosmm); qtest_add_func("acpi/q35/smm-compat", @@ -2120,10 +2123,17 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/smm-compat-nosmm", test_acpi_q35_tcg_smm_compat_nosmm); qtest_add_func("acpi/q35/nohpet", test_acpi_q35_tcg_nohpet); - qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); - qtest_add_func("acpi/q35/acpihmat", test_acpi_q35_tcg_acpi_hmat); qtest_add_func("acpi/q35/acpihmat-noinitiator", test_acpi_q35_tcg_acpi_hmat_noinitiator); + + /* i386 does not support memory hotplug */ + if (strcmp(arch, "i386")) { + qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp); + qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); + qtest_add_func("acpi/q35/acpihmat", + test_acpi_q35_tcg_acpi_hmat); + qtest_add_func("acpi/q35/mmio64", test_acpi_q35_tcg_mmio64); + } #ifdef CONFIG_POSIX qtest_add_func("acpi/q35/acpierst", test_acpi_q35_acpi_erst); #endif diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c index c5eb13f349..4f4404a4b1 100644 --- a/tests/qtest/numa-test.c +++ b/tests/qtest/numa-test.c @@ -568,7 +568,7 @@ int main(int argc, char **argv) qtest_add_data_func("/numa/mon/cpus/partial", args, test_mon_partial); qtest_add_data_func("/numa/qmp/cpus/query-cpus", args, test_query_cpus); - if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) { + if (!strcmp(arch, "x86_64")) { qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu); qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg); qtest_add_data_func("/numa/pc/hmat/build", args, pc_hmat_build_cfg); @@ -576,6 +576,11 @@ int main(int argc, char **argv) qtest_add_data_func("/numa/pc/hmat/erange", args, pc_hmat_erange_cfg); } + if (!strcmp(arch, "i386")) { + qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu); + qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg); + } + if (!strcmp(arch, "ppc64")) { qtest_add_data_func("/numa/spapr/cpu/explicit", args, spapr_numa_cpu); } From f1a153857abc1ba8835b12a01520df9f1b64e15b Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 25 Sep 2023 22:40:35 +0300 Subject: [PATCH 45/53] pcie_sriov: unregister_vfs(): fix error path local_err must be NULL before calling object_property_set_bool(), so we must clear it on each iteration. Let's also use more convenient error_reportf_err(). Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230925194040.68592-8-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 76a3b6917e..5ef8950940 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -196,19 +196,16 @@ static void register_vfs(PCIDevice *dev) static void unregister_vfs(PCIDevice *dev) { - Error *local_err = NULL; uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; uint16_t i; trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { + Error *err = NULL; PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - object_property_set_bool(OBJECT(vf), "realized", false, &local_err); - if (local_err) { - fprintf(stderr, "Failed to unplug: %s\n", - error_get_pretty(local_err)); - error_free(local_err); + if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) { + error_reportf_err(err, "Failed to unplug: "); } object_unparent(OBJECT(vf)); object_unref(OBJECT(vf)); From a6f4d2ec42f3feb6c399f5760a2567ca78897bd7 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 25 Sep 2023 22:40:32 +0300 Subject: [PATCH 46/53] libvhost-user.c: add assertion to vu_message_read_default Explain Coverity that we are not going to overflow vmsg->fds. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230925194040.68592-5-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- subprojects/libvhost-user/libvhost-user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 0469a50101..49b57c7ef4 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -322,6 +322,7 @@ vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { fd_size = cmsg->cmsg_len - CMSG_LEN(0); vmsg->fd_num = fd_size / sizeof(int); + assert(fd_size < VHOST_MEMORY_BASELINE_NREGIONS); memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); break; } From 850cd20b072cd330cb24aa1c92732b9722998d40 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 27 Sep 2023 15:50:33 +0200 Subject: [PATCH 47/53] virtio: use shadow_avail_idx while checking number of heads We do not need the most up to date number of heads, we only want to know if there is at least one. Use shadow variable as long as it is not equal to the last available index checked. This avoids expensive qatomic dereference of the RCU-protected memory region cache as well as the memory access itself. The change improves performance of the af-xdp network backend by 2-3%. Signed-off-by: Ilya Maximets Message-Id: <20230927135157.2316982-1-i.maximets@ovn.org> Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index c727e9201b..2058b838e9 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -997,7 +997,12 @@ void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, /* Called within rcu_read_lock(). */ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) { - uint16_t num_heads = vring_avail_idx(vq) - idx; + uint16_t avail_idx, num_heads; + + /* Use shadow index whenever possible. */ + avail_idx = (vq->shadow_avail_idx != idx) ? vq->shadow_avail_idx + : vring_avail_idx(vq); + num_heads = avail_idx - idx; /* Check it isn't doing very strange things with descriptor numbers. */ if (num_heads > vq->vring.num) { @@ -1005,8 +1010,15 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) idx, vq->shadow_avail_idx); return -EINVAL; } - /* On success, callers read a descriptor at vq->last_avail_idx. - * Make sure descriptor read does not bypass avail index read. */ + /* + * On success, callers read a descriptor at vq->last_avail_idx. + * Make sure descriptor read does not bypass avail index read. + * + * This is necessary even if we are using a shadow index, since + * the shadow index could have been initialized by calling + * vring_avail_idx() outside of this function, i.e., by a guest + * memory read not accompanied by a barrier. + */ if (num_heads) { smp_rmb(); } From d501f97d9607eff1750549e0270c034102786d33 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 27 Sep 2023 15:59:41 +0200 Subject: [PATCH 48/53] virtio: remove unnecessary thread fence while reading next descriptor It was supposed to be a compiler barrier and it was a compiler barrier initially called 'wmb' when virtio core support was introduced. Later all the instances of 'wmb' were switched to smp_wmb to fix memory ordering issues on non-x86 platforms. However, this one doesn't need to be an actual barrier, as its only purpose was to ensure that the value is not read twice. And since commit aa570d6fb6bd ("virtio: combine the read of a descriptor") there is no need for a barrier at all, since we're no longer reading guest memory here, but accessing a local structure. Signed-off-by: Ilya Maximets Message-Id: <20230927140016.2317404-2-i.maximets@ovn.org> Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 2058b838e9..87e8f990c5 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1060,8 +1060,6 @@ static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, /* Check they're not leading us off end of descriptors. */ *next = desc->next; - /* Make sure compiler knows to grab that: we don't want it changing! */ - smp_wmb(); if (*next >= max) { virtio_error(vdev, "Desc next is %u", *next); From 70f88436aa5a8aeddd33e4d06270146af5d5bb52 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 27 Sep 2023 15:59:42 +0200 Subject: [PATCH 49/53] virtio: remove unused next argument from virtqueue_split_read_next_desc() The 'next' was converted from a local variable to an output parameter in commit: 412e0e81b174 ("virtio: handle virtqueue_read_next_desc() errors") But all the actual uses of the 'i/next' as an output were removed a few months prior in commit: aa570d6fb6bd ("virtio: combine the read of a descriptor") Remove the unused argument to simplify the code. Also, adding a comment to the function to describe what it is actually doing, as it is not obvious that the 'desc' is both an input and an output argument. Signed-off-by: Ilya Maximets Message-Id: <20230927140016.2317404-3-i.maximets@ovn.org> Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 87e8f990c5..6facd64fbc 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1049,9 +1049,10 @@ enum { VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ }; +/* Reads the 'desc->next' descriptor into '*desc'. */ static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, MemoryRegionCache *desc_cache, - unsigned int max, unsigned int *next) + unsigned int max) { /* If this descriptor says it doesn't chain, we're done. */ if (!(desc->flags & VRING_DESC_F_NEXT)) { @@ -1059,14 +1060,12 @@ static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, } /* Check they're not leading us off end of descriptors. */ - *next = desc->next; - - if (*next >= max) { - virtio_error(vdev, "Desc next is %u", *next); + if (desc->next >= max) { + virtio_error(vdev, "Desc next is %u", desc->next); return VIRTQUEUE_READ_DESC_ERROR; } - vring_split_desc_read(vdev, desc, desc_cache, *next); + vring_split_desc_read(vdev, desc, desc_cache, desc->next); return VIRTQUEUE_READ_DESC_MORE; } @@ -1146,7 +1145,7 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq, goto done; } - rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max); } while (rc == VIRTQUEUE_READ_DESC_MORE); if (rc == VIRTQUEUE_READ_DESC_ERROR) { @@ -1601,7 +1600,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) goto err_undo_map; } - rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i); + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max); } while (rc == VIRTQUEUE_READ_DESC_MORE); if (rc == VIRTQUEUE_READ_DESC_ERROR) { @@ -4055,8 +4054,7 @@ VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path, list = node; ndescs++; - rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, - max, &i); + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max); } while (rc == VIRTQUEUE_READ_DESC_MORE); element->descs = list; done: From a6ceee591acdb9c9c772bf59544a57891308222e Mon Sep 17 00:00:00 2001 From: Albert Esteve Date: Mon, 2 Oct 2023 08:57:03 +0200 Subject: [PATCH 50/53] util/uuid: add a hash function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add hash function to uuid module using the djb2 hash algorithm. Add a couple simple unit tests for the hash function, checking collisions for similar UUIDs. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Albert Esteve Message-Id: <20231002065706.94707-2-aesteve@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/qemu/uuid.h | 2 ++ tests/unit/test-uuid.c | 27 +++++++++++++++++++++++++++ util/uuid.c | 14 ++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h index dc40ee1fc9..e24a1099e4 100644 --- a/include/qemu/uuid.h +++ b/include/qemu/uuid.h @@ -96,4 +96,6 @@ int qemu_uuid_parse(const char *str, QemuUUID *uuid); QemuUUID qemu_uuid_bswap(QemuUUID uuid); +uint32_t qemu_uuid_hash(const void *uuid); + #endif diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c index c111de5fc1..aedc125ae9 100644 --- a/tests/unit/test-uuid.c +++ b/tests/unit/test-uuid.c @@ -171,6 +171,32 @@ static void test_uuid_unparse_strdup(void) } } +static void test_uuid_hash(void) +{ + QemuUUID uuid; + int i; + + for (i = 0; i < 100; i++) { + qemu_uuid_generate(&uuid); + /* Obtain the UUID hash */ + uint32_t hash_a = qemu_uuid_hash(&uuid); + int data_idx = g_random_int_range(0, 15); + /* Change a single random byte of the UUID */ + if (uuid.data[data_idx] < 0xFF) { + uuid.data[data_idx]++; + } else { + uuid.data[data_idx]--; + } + /* Obtain the UUID hash again */ + uint32_t hash_b = qemu_uuid_hash(&uuid); + /* + * Both hashes shall be different (avoid collision) + * for any change in the UUID fields + */ + g_assert_cmpint(hash_a, !=, hash_b); + } +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); @@ -179,6 +205,7 @@ int main(int argc, char **argv) g_test_add_func("/uuid/parse", test_uuid_parse); g_test_add_func("/uuid/unparse", test_uuid_unparse); g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup); + g_test_add_func("/uuid/hash", test_uuid_hash); return g_test_run(); } diff --git a/util/uuid.c b/util/uuid.c index b1108dde78..d71aa79e5e 100644 --- a/util/uuid.c +++ b/util/uuid.c @@ -116,3 +116,17 @@ QemuUUID qemu_uuid_bswap(QemuUUID uuid) bswap16s(&uuid.fields.time_high_and_version); return uuid; } + +/* djb2 hash algorithm */ +uint32_t qemu_uuid_hash(const void *uuid) +{ + QemuUUID *qid = (QemuUUID *) uuid; + uint32_t h = 5381; + int i; + + for (i = 0; i < ARRAY_SIZE(qid->data); i++) { + h = (h << 5) + h + qid->data[i]; + } + + return h; +} From faefdba8474fbc30427a64caa4dcd6df611f5b60 Mon Sep 17 00:00:00 2001 From: Albert Esteve Date: Mon, 2 Oct 2023 08:57:04 +0200 Subject: [PATCH 51/53] hw/display: introduce virtio-dmabuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This API manages objects (in this iteration, dmabuf fds) that can be shared along different virtio devices, associated to a UUID. The API allows the different devices to add, remove and/or retrieve the objects by simply invoking the public functions that reside in the virtio-dmabuf file. For vhost backends, the API stores the pointer to the backend holding the object. Suggested-by: Gerd Hoffmann Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Albert Esteve Message-Id: <20231002065706.94707-3-aesteve@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 7 ++ hw/display/meson.build | 1 + hw/display/virtio-dmabuf.c | 138 ++++++++++++++++++++++++++++++ include/hw/virtio/virtio-dmabuf.h | 100 ++++++++++++++++++++++ tests/unit/meson.build | 1 + tests/unit/test-virtio-dmabuf.c | 137 +++++++++++++++++++++++++++++ 6 files changed, 384 insertions(+) create mode 100644 hw/display/virtio-dmabuf.c create mode 100644 include/hw/virtio/virtio-dmabuf.h create mode 100644 tests/unit/test-virtio-dmabuf.c diff --git a/MAINTAINERS b/MAINTAINERS index 355b1960ce..5e27ce3ceb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2154,6 +2154,13 @@ T: git https://gitlab.com/cohuck/qemu.git s390-next T: git https://github.com/borntraeger/qemu.git s390-next L: qemu-s390x@nongnu.org +virtio-dmabuf +M: Albert Esteve +S: Supported +F: hw/display/virtio-dmabuf.c +F: include/hw/virtio/virtio-dmabuf.h +F: tests/unit/test-virtio-dmabuf.c + virtiofs M: Stefan Hajnoczi S: Supported diff --git a/hw/display/meson.build b/hw/display/meson.build index 413ba4ab24..05619c6968 100644 --- a/hw/display/meson.build +++ b/hw/display/meson.build @@ -37,6 +37,7 @@ system_ss.add(when: 'CONFIG_MACFB', if_true: files('macfb.c')) system_ss.add(when: 'CONFIG_NEXTCUBE', if_true: files('next-fb.c')) system_ss.add(when: 'CONFIG_VGA', if_true: files('vga.c')) +system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('virtio-dmabuf.c')) if (config_all_devices.has_key('CONFIG_VGA_CIRRUS') or config_all_devices.has_key('CONFIG_VGA_PCI') or diff --git a/hw/display/virtio-dmabuf.c b/hw/display/virtio-dmabuf.c new file mode 100644 index 0000000000..4a8e430f3d --- /dev/null +++ b/hw/display/virtio-dmabuf.c @@ -0,0 +1,138 @@ +/* + * Virtio Shared dma-buf + * + * Copyright Red Hat, Inc. 2023 + * + * Authors: + * Albert Esteve + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "hw/virtio/virtio-dmabuf.h" + + +static GMutex lock; +static GHashTable *resource_uuids; + +/* + * uuid_equal_func: wrapper for UUID is_equal function to + * satisfy g_hash_table_new expected parameters signatures. + */ +static int uuid_equal_func(const void *lhv, const void *rhv) +{ + return qemu_uuid_is_equal(lhv, rhv); +} + +static bool virtio_add_resource(QemuUUID *uuid, VirtioSharedObject *value) +{ + bool result = false; + + g_mutex_lock(&lock); + if (resource_uuids == NULL) { + resource_uuids = g_hash_table_new_full(qemu_uuid_hash, + uuid_equal_func, + NULL, + g_free); + } + if (g_hash_table_lookup(resource_uuids, uuid) == NULL) { + result = g_hash_table_insert(resource_uuids, uuid, value); + } + g_mutex_unlock(&lock); + + return result; +} + +bool virtio_add_dmabuf(QemuUUID *uuid, int udmabuf_fd) +{ + bool result; + VirtioSharedObject *vso; + if (udmabuf_fd < 0) { + return false; + } + vso = g_new(VirtioSharedObject, 1); + vso->type = TYPE_DMABUF; + vso->value = GINT_TO_POINTER(udmabuf_fd); + result = virtio_add_resource(uuid, vso); + + return result; +} + +bool virtio_add_vhost_device(QemuUUID *uuid, struct vhost_dev *dev) +{ + bool result; + VirtioSharedObject *vso; + if (dev == NULL) { + return false; + } + vso = g_new(VirtioSharedObject, 1); + vso->type = TYPE_VHOST_DEV; + vso->value = dev; + result = virtio_add_resource(uuid, vso); + + return result; +} + +bool virtio_remove_resource(const QemuUUID *uuid) +{ + bool result; + g_mutex_lock(&lock); + result = g_hash_table_remove(resource_uuids, uuid); + g_mutex_unlock(&lock); + + return result; +} + +static VirtioSharedObject *get_shared_object(const QemuUUID *uuid) +{ + gpointer lookup_res = NULL; + + g_mutex_lock(&lock); + if (resource_uuids != NULL) { + lookup_res = g_hash_table_lookup(resource_uuids, uuid); + } + g_mutex_unlock(&lock); + + return (VirtioSharedObject *) lookup_res; +} + +int virtio_lookup_dmabuf(const QemuUUID *uuid) +{ + VirtioSharedObject *vso = get_shared_object(uuid); + if (vso == NULL) { + return -1; + } + assert(vso->type == TYPE_DMABUF); + return GPOINTER_TO_INT(vso->value); +} + +struct vhost_dev *virtio_lookup_vhost_device(const QemuUUID *uuid) +{ + VirtioSharedObject *vso = get_shared_object(uuid); + if (vso == NULL) { + return NULL; + } + assert(vso->type == TYPE_VHOST_DEV); + return (struct vhost_dev *) vso->value; +} + +SharedObjectType virtio_object_type(const QemuUUID *uuid) +{ + VirtioSharedObject *vso = get_shared_object(uuid); + if (vso == NULL) { + return TYPE_INVALID; + } + return vso->type; +} + +void virtio_free_resources(void) +{ + g_mutex_lock(&lock); + g_hash_table_destroy(resource_uuids); + /* Reference count shall be 0 after the implicit unref on destroy */ + resource_uuids = NULL; + g_mutex_unlock(&lock); +} diff --git a/include/hw/virtio/virtio-dmabuf.h b/include/hw/virtio/virtio-dmabuf.h new file mode 100644 index 0000000000..627c3b6db7 --- /dev/null +++ b/include/hw/virtio/virtio-dmabuf.h @@ -0,0 +1,100 @@ +/* + * Virtio Shared dma-buf + * + * Copyright Red Hat, Inc. 2023 + * + * Authors: + * Albert Esteve + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef VIRTIO_DMABUF_H +#define VIRTIO_DMABUF_H + +#include "qemu/uuid.h" +#include "vhost.h" + +typedef enum SharedObjectType { + TYPE_INVALID = 0, + TYPE_DMABUF, + TYPE_VHOST_DEV, +} SharedObjectType; + +typedef struct VirtioSharedObject { + SharedObjectType type; + gpointer value; +} VirtioSharedObject; + +/** + * virtio_add_dmabuf() - Add a new dma-buf resource to the lookup table + * @uuid: new resource's UUID + * @dmabuf_fd: the dma-buf descriptor that will be stored and shared with + * other virtio devices. The caller retains ownership over the + * descriptor and its lifecycle. + * + * Note: @dmabuf_fd must be a valid (non-negative) file descriptor. + * + * Return: true if the UUID did not exist and the resource has been added, + * false if another resource with the same UUID already existed. + * Note that if it finds a repeated UUID, the resource is not inserted in + * the lookup table. + */ +bool virtio_add_dmabuf(QemuUUID *uuid, int dmabuf_fd); + +/** + * virtio_add_vhost_device() - Add a new exporter vhost device that holds the + * resource with the associated UUID + * @uuid: new resource's UUID + * @dev: the pointer to the vhost device that holds the resource. The caller + * retains ownership over the device struct and its lifecycle. + * + * Return: true if the UUID did not exist and the device has been tracked, + * false if another resource with the same UUID already existed. + * Note that if it finds a repeated UUID, the resource is not inserted in + * the lookup table. + */ +bool virtio_add_vhost_device(QemuUUID *uuid, struct vhost_dev *dev); + +/** + * virtio_remove_resource() - Removes a resource from the lookup table + * @uuid: resource's UUID + * + * Return: true if the UUID has been found and removed from the lookup table. + */ +bool virtio_remove_resource(const QemuUUID *uuid); + +/** + * virtio_lookup_dmabuf() - Looks for a dma-buf resource in the lookup table + * @uuid: resource's UUID + * + * Return: the dma-buf file descriptor integer, or -1 if the key is not found. + */ +int virtio_lookup_dmabuf(const QemuUUID *uuid); + +/** + * virtio_lookup_vhost_device() - Looks for an exporter vhost device in the + * lookup table + * @uuid: resource's UUID + * + * Return: pointer to the vhost_dev struct, or NULL if the key is not found. + */ +struct vhost_dev *virtio_lookup_vhost_device(const QemuUUID *uuid); + +/** + * virtio_object_type() - Looks for the type of resource in the lookup table + * @uuid: resource's UUID + * + * Return: the type of resource associated with the UUID, or TYPE_INVALID if + * the key is not found. + */ +SharedObjectType virtio_object_type(const QemuUUID *uuid); + +/** + * virtio_free_resources() - Destroys all keys and values of the shared + * resources lookup table, and frees them + */ +void virtio_free_resources(void); + +#endif /* VIRTIO_DMABUF_H */ diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 0299ef6906..1977b302e2 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -50,6 +50,7 @@ tests = { 'test-qapi-util': [], 'test-interval-tree': [], 'test-xs-node': [qom], + 'test-virtio-dmabuf': [meson.project_source_root() / 'hw/display/virtio-dmabuf.c'], } if have_system or have_tools diff --git a/tests/unit/test-virtio-dmabuf.c b/tests/unit/test-virtio-dmabuf.c new file mode 100644 index 0000000000..a45ec52f42 --- /dev/null +++ b/tests/unit/test-virtio-dmabuf.c @@ -0,0 +1,137 @@ +/* + * QEMU tests for shared dma-buf API + * + * Copyright (c) 2023 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "hw/virtio/virtio-dmabuf.h" + + +static void test_add_remove_resources(void) +{ + QemuUUID uuid; + int i, dmabuf_fd; + + for (i = 0; i < 100; ++i) { + qemu_uuid_generate(&uuid); + dmabuf_fd = g_random_int_range(3, 500); + /* Add a new resource */ + g_assert(virtio_add_dmabuf(&uuid, dmabuf_fd)); + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, dmabuf_fd); + /* Remove the resource */ + g_assert(virtio_remove_resource(&uuid)); + /* Resource is not found anymore */ + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, -1); + } +} + +static void test_add_remove_dev(void) +{ + QemuUUID uuid; + struct vhost_dev *dev = g_new0(struct vhost_dev, 1); + int i; + + for (i = 0; i < 100; ++i) { + qemu_uuid_generate(&uuid); + virtio_add_vhost_device(&uuid, dev); + /* vhost device is found */ + g_assert(virtio_lookup_vhost_device(&uuid) != NULL); + /* Remove the vhost device */ + g_assert(virtio_remove_resource(&uuid)); + /* vhost device is not found anymore */ + g_assert(virtio_lookup_vhost_device(&uuid) == NULL); + } + g_free(dev); +} + +static void test_remove_invalid_resource(void) +{ + QemuUUID uuid; + int i; + + for (i = 0; i < 20; ++i) { + qemu_uuid_generate(&uuid); + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, -1); + /* Removing a resource that does not exist returns false */ + g_assert_false(virtio_remove_resource(&uuid)); + } +} + +static void test_add_invalid_resource(void) +{ + QemuUUID uuid; + struct vhost_dev *dev = NULL; + int i, dmabuf_fd = -2, alt_dmabuf = 2; + + for (i = 0; i < 20; ++i) { + qemu_uuid_generate(&uuid); + /* Add a new resource with invalid (negative) resource fd */ + g_assert_false(virtio_add_dmabuf(&uuid, dmabuf_fd)); + /* Resource is not found */ + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, -1); + /* Add a new vhost device with invalid (NULL) pointer */ + g_assert_false(virtio_add_vhost_device(&uuid, dev)); + /* vhost device is not found */ + g_assert(virtio_lookup_vhost_device(&uuid) == NULL); + } + + for (i = 0; i < 20; ++i) { + /* Add a valid resource */ + qemu_uuid_generate(&uuid); + dmabuf_fd = g_random_int_range(3, 500); + g_assert(virtio_add_dmabuf(&uuid, dmabuf_fd)); + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, dmabuf_fd); + /* Add a new resource with repeated uuid returns false */ + g_assert_false(virtio_add_dmabuf(&uuid, alt_dmabuf)); + /* The value for the uuid key is not replaced */ + g_assert_cmpint(virtio_lookup_dmabuf(&uuid), ==, dmabuf_fd); + } +} + +static void test_free_resources(void) +{ + QemuUUID uuids[20]; + int i, dmabuf_fd; + + for (i = 0; i < ARRAY_SIZE(uuids); ++i) { + qemu_uuid_generate(&uuids[i]); + dmabuf_fd = g_random_int_range(3, 500); + g_assert(virtio_add_dmabuf(&uuids[i], dmabuf_fd)); + g_assert_cmpint(virtio_lookup_dmabuf(&uuids[i]), ==, dmabuf_fd); + } + virtio_free_resources(); + for (i = 0; i < ARRAY_SIZE(uuids); ++i) { + /* None of the resources is found after free'd */ + g_assert_cmpint(virtio_lookup_dmabuf(&uuids[i]), ==, -1); + } + +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/virtio-dmabuf/add_rm_res", test_add_remove_resources); + g_test_add_func("/virtio-dmabuf/add_rm_dev", test_add_remove_dev); + g_test_add_func("/virtio-dmabuf/rm_invalid_res", + test_remove_invalid_resource); + g_test_add_func("/virtio-dmabuf/add_invalid_res", + test_add_invalid_resource); + g_test_add_func("/virtio-dmabuf/free_res", test_free_resources); + + return g_test_run(); +} From 160947666276c5b7f6bca4d746bcac2966635d79 Mon Sep 17 00:00:00 2001 From: Albert Esteve Date: Mon, 2 Oct 2023 08:57:05 +0200 Subject: [PATCH 52/53] vhost-user: add shared_object msg Add three new vhost-user protocol `VHOST_USER_BACKEND_SHARED_OBJECT_* messages`. These new messages are sent from vhost-user back-ends to interact with the virtio-dmabuf table in order to add or remove themselves as virtio exporters, or lookup for virtio dma-buf shared objects. The action taken in the front-end depends on the type stored in the virtio shared object hash table. When the table holds a pointer to a vhost backend for a given UUID, the front-end sends a VHOST_USER_GET_SHARED_OBJECT to the backend holding the shared object. The messages can only be sent after successfully negotiating a new VHOST_USER_PROTOCOL_F_SHARED_OBJECT vhost-user protocol feature bit. Finally, refactor code to send response message so that all common parts both for the common REPLY_ACK case, and other data responses, can call it and avoid code repetition. Signed-off-by: Albert Esteve Message-Id: <20231002065706.94707-4-aesteve@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/interop/vhost-user.rst | 57 ++++++++++ hw/virtio/vhost-user.c | 167 +++++++++++++++++++++++++++--- include/hw/virtio/vhost-backend.h | 3 + include/hw/virtio/vhost-user.h | 1 + 4 files changed, 216 insertions(+), 12 deletions(-) diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 5a070adbc1..415bb47a19 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -1440,6 +1440,18 @@ Front-end message types query the back-end for its device status as defined in the Virtio specification. +``VHOST_USER_GET_SHARED_OBJECT`` + :id: 41 + :equivalent ioctl: N/A + :request payload: ``struct VhostUserShared`` + :reply payload: dmabuf fd + + When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol + feature has been successfully negotiated, and the UUID is found + in the exporters cache, this message is submitted by the front-end + to retrieve a given dma-buf fd from a given back-end, determined by + the requested UUID. Back-end will reply passing the fd when the operation + is successful, or no fd otherwise. Back-end message types ---------------------- @@ -1528,6 +1540,51 @@ is sent by the front-end. The state.num field is currently reserved and must be set to 0. +``VHOST_USER_BACKEND_SHARED_OBJECT_ADD`` + :id: 6 + :equivalent ioctl: N/A + :request payload: ``struct VhostUserShared`` + :reply payload: N/A + + When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol + feature has been successfully negotiated, this message can be submitted + by the backends to add themselves as exporters to the virtio shared lookup + table. The back-end device gets associated with a UUID in the shared table. + The back-end is responsible of keeping its own table with exported dma-buf fds. + When another back-end tries to import the resource associated with the UUID, + it will send a message to the front-end, which will act as a proxy to the + exporter back-end. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and + the back-end sets the ``VHOST_USER_NEED_REPLY`` flag, the front-end must + respond with zero when operation is successfully completed, or non-zero + otherwise. + +``VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE`` + :id: 7 + :equivalent ioctl: N/A + :request payload: ``struct VhostUserShared`` + :reply payload: N/A + + When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol + feature has been successfully negotiated, this message can be submitted + by the backend to remove themselves from to the virtio-dmabuf shared + table API. The shared table will remove the back-end device associated with + the UUID. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and the + back-end sets the ``VHOST_USER_NEED_REPLY`` flag, the front-end must respond + with zero when operation is successfully completed, or non-zero otherwise. + +``VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP`` + :id: 8 + :equivalent ioctl: N/A + :request payload: ``struct VhostUserShared`` + :reply payload: dmabuf fd and ``u64`` + + When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol + feature has been successfully negotiated, this message can be submitted + by the backends to retrieve a given dma-buf fd from the virtio-dmabuf + shared table given a UUID. Frontend will reply passing the fd and a zero + when the operation is successful, or non-zero otherwise. Note that if the + operation fails, no fd is sent to the backend. + .. _reply_ack: VHOST_USER_PROTOCOL_F_REPLY_ACK diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index a096335921..3766b415f8 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "hw/virtio/virtio-dmabuf.h" #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user.h" @@ -21,6 +22,7 @@ #include "sysemu/kvm.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" +#include "qemu/uuid.h" #include "qemu/sockets.h" #include "sysemu/runstate.h" #include "sysemu/cryptodev.h" @@ -100,6 +102,7 @@ typedef enum VhostUserRequest { VHOST_USER_REM_MEM_REG = 38, VHOST_USER_SET_STATUS = 39, VHOST_USER_GET_STATUS = 40, + VHOST_USER_GET_SHARED_OBJECT = 41, VHOST_USER_MAX } VhostUserRequest; @@ -108,6 +111,9 @@ typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_IOTLB_MSG = 1, VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, + VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, + VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, VHOST_USER_BACKEND_MAX } VhostUserBackendRequest; @@ -181,6 +187,10 @@ typedef struct VhostUserInflight { uint16_t queue_size; } VhostUserInflight; +typedef struct VhostUserShared { + unsigned char uuid[16]; +} VhostUserShared; + typedef struct { VhostUserRequest request; @@ -205,6 +215,7 @@ typedef union { VhostUserCryptoSession session; VhostUserVringArea area; VhostUserInflight inflight; + VhostUserShared object; } VhostUserPayload; typedef struct VhostUserMsg { @@ -1580,6 +1591,139 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, return 0; } +static int +vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, + VhostUserShared *object) +{ + QemuUUID uuid; + + memcpy(uuid.data, object->uuid, sizeof(object->uuid)); + return virtio_add_vhost_device(&uuid, dev); +} + +static int +vhost_user_backend_handle_shared_object_remove(VhostUserShared *object) +{ + QemuUUID uuid; + + memcpy(uuid.data, object->uuid, sizeof(object->uuid)); + return virtio_remove_resource(&uuid); +} + +static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, + VhostUserPayload *payload, Error **errp) +{ + struct iovec iov[] = { + { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE }, + { .iov_base = payload, .iov_len = hdr->size }, + }; + + hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK; + hdr->flags |= VHOST_USER_REPLY_MASK; + + return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); +} + +static bool +vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, + VhostUserPayload *payload, Error **errp) +{ + hdr->size = sizeof(payload->u64); + return vhost_user_send_resp(ioc, hdr, payload, errp); +} + +int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, + int *dmabuf_fd) +{ + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->user->chr; + int ret; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_GET_SHARED_OBJECT, + .hdr.flags = VHOST_USER_VERSION, + }; + memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid)); + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + return ret; + } + + if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) { + error_report("Received unexpected msg type. " + "Expected %d received %d", + VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request); + return -EPROTO; + } + + *dmabuf_fd = qemu_chr_fe_get_msgfd(chr); + if (*dmabuf_fd < 0) { + error_report("Failed to get dmabuf fd"); + return -EIO; + } + + return 0; +} + +static int +vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, + QIOChannel *ioc, + VhostUserHeader *hdr, + VhostUserPayload *payload) +{ + QemuUUID uuid; + CharBackend *chr = u->user->chr; + Error *local_err = NULL; + int dmabuf_fd = -1; + int fd_num = 0; + + memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); + + payload->u64 = 0; + switch (virtio_object_type(&uuid)) { + case TYPE_DMABUF: + dmabuf_fd = virtio_lookup_dmabuf(&uuid); + break; + case TYPE_VHOST_DEV: + { + struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); + if (dev == NULL) { + payload->u64 = -EINVAL; + break; + } + int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); + if (ret < 0) { + payload->u64 = ret; + } + break; + } + case TYPE_INVALID: + payload->u64 = -EINVAL; + break; + } + + if (dmabuf_fd != -1) { + fd_num++; + } + + if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { + error_report("Failed to set msg fds."); + payload->u64 = -EINVAL; + } + + if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { + error_report_err(local_err); + return -EINVAL; + } + + return 0; +} + static void close_backend_channel(struct vhost_user *u) { g_source_destroy(u->backend_src); @@ -1637,6 +1781,16 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, fd ? fd[0] : -1); break; + case VHOST_USER_BACKEND_SHARED_OBJECT_ADD: + ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object); + break; + case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE: + ret = vhost_user_backend_handle_shared_object_remove(&payload.object); + break; + case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: + ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, + &hdr, &payload); + break; default: error_report("Received unexpected msg type: %d.", hdr.request); ret = -EINVAL; @@ -1647,21 +1801,10 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, * directly in their request handlers. */ if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { - struct iovec iovec[2]; - - - hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; - hdr.flags |= VHOST_USER_REPLY_MASK; - payload.u64 = !!ret; hdr.size = sizeof(payload.u64); - iovec[0].iov_base = &hdr; - iovec[0].iov_len = VHOST_USER_HDR_SIZE; - iovec[1].iov_base = &payload; - iovec[1].iov_len = hdr.size; - - if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { + if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) { error_report_err(local_err); goto err; } diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 31a251a9f5..1860b541d8 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -196,4 +196,7 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd); +int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, + int *dmabuf_fd); + #endif /* VHOST_BACKEND_H */ diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 80e2b4a463..9f9ddf878d 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -29,6 +29,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, VHOST_USER_PROTOCOL_F_STATUS = 16, + VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17, VHOST_USER_PROTOCOL_F_MAX }; From ce0f3b032a960726c0dddfb4f81f223215179f26 Mon Sep 17 00:00:00 2001 From: Albert Esteve Date: Mon, 2 Oct 2023 08:57:06 +0200 Subject: [PATCH 53/53] libvhost-user: handle shared_object msg In the libvhost-user library we need to handle VHOST_USER_GET_SHARED_OBJECT requests, and add helper functions to allow sending messages to interact with the virtio shared objects hash table. Signed-off-by: Albert Esteve Message-Id: <20231002065706.94707-5-aesteve@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- subprojects/libvhost-user/libvhost-user.c | 120 ++++++++++++++++++++++ subprojects/libvhost-user/libvhost-user.h | 55 +++++++++- 2 files changed, 174 insertions(+), 1 deletion(-) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 49b57c7ef4..051a611da3 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -161,6 +161,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_GET_MAX_MEM_SLOTS), REQ(VHOST_USER_ADD_MEM_REG), REQ(VHOST_USER_REM_MEM_REG), + REQ(VHOST_USER_GET_SHARED_OBJECT), REQ(VHOST_USER_MAX), }; #undef REQ @@ -901,6 +902,24 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { return false; } +static bool +vu_get_shared_object(VuDev *dev, VhostUserMsg *vmsg) +{ + int fd_num = 0; + int dmabuf_fd = -1; + if (dev->iface->get_shared_object) { + dmabuf_fd = dev->iface->get_shared_object( + dev, &vmsg->payload.object.uuid[0]); + } + if (dmabuf_fd != -1) { + DPRINT("dmabuf_fd found for requested UUID\n"); + vmsg->fds[fd_num++] = dmabuf_fd; + } + vmsg->fd_num = fd_num; + + return true; +} + static bool vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) { @@ -1404,6 +1423,105 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, return vu_process_message_reply(dev, &vmsg); } +bool +vu_lookup_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], + int *dmabuf_fd) +{ + bool result = false; + VhostUserMsg msg_reply; + VhostUserMsg msg = { + .request = VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP, + .size = sizeof(msg.payload.object), + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + }; + + memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN); + + if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) { + return false; + } + + pthread_mutex_lock(&dev->backend_mutex); + if (!vu_message_write(dev, dev->backend_fd, &msg)) { + goto out; + } + + if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) { + goto out; + } + + if (msg_reply.request != msg.request) { + DPRINT("Received unexpected msg type. Expected %d, received %d", + msg.request, msg_reply.request); + goto out; + } + + if (msg_reply.fd_num != 1) { + DPRINT("Received unexpected number of fds. Expected 1, received %d", + msg_reply.fd_num); + goto out; + } + + *dmabuf_fd = msg_reply.fds[0]; + result = *dmabuf_fd > 0 && msg_reply.payload.u64 == 0; +out: + pthread_mutex_unlock(&dev->backend_mutex); + + return result; +} + +static bool +vu_send_message(VuDev *dev, VhostUserMsg *vmsg) +{ + bool result = false; + pthread_mutex_lock(&dev->backend_mutex); + if (!vu_message_write(dev, dev->backend_fd, vmsg)) { + goto out; + } + + result = true; +out: + pthread_mutex_unlock(&dev->backend_mutex); + + return result; +} + +bool +vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]) +{ + VhostUserMsg msg = { + .request = VHOST_USER_BACKEND_SHARED_OBJECT_ADD, + .size = sizeof(msg.payload.object), + .flags = VHOST_USER_VERSION, + }; + + memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN); + + if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) { + return false; + } + + return vu_send_message(dev, &msg); +} + +bool +vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]) +{ + VhostUserMsg msg = { + .request = VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE, + .size = sizeof(msg.payload.object), + .flags = VHOST_USER_VERSION, + }; + + memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN); + + if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) { + return false; + } + + return vu_send_message(dev, &msg); +} + static bool vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg) { @@ -1944,6 +2062,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_add_mem_reg(dev, vmsg); case VHOST_USER_REM_MEM_REG: return vu_rem_mem_reg(dev, vmsg); + case VHOST_USER_GET_SHARED_OBJECT: + return vu_get_shared_object(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index 708370c5f5..b36a42a7ca 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -64,7 +64,8 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, - + /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */ + VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17, VHOST_USER_PROTOCOL_F_MAX }; @@ -109,6 +110,7 @@ typedef enum VhostUserRequest { VHOST_USER_GET_MAX_MEM_SLOTS = 36, VHOST_USER_ADD_MEM_REG = 37, VHOST_USER_REM_MEM_REG = 38, + VHOST_USER_GET_SHARED_OBJECT = 41, VHOST_USER_MAX } VhostUserRequest; @@ -119,6 +121,9 @@ typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, VHOST_USER_BACKEND_VRING_CALL = 4, VHOST_USER_BACKEND_VRING_ERR = 5, + VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, + VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, + VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, VHOST_USER_BACKEND_MAX } VhostUserBackendRequest; @@ -172,6 +177,12 @@ typedef struct VhostUserInflight { uint16_t queue_size; } VhostUserInflight; +#define UUID_LEN 16 + +typedef struct VhostUserShared { + unsigned char uuid[UUID_LEN]; +} VhostUserShared; + #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) # define VU_PACKED __attribute__((gcc_struct, packed)) #else @@ -199,6 +210,7 @@ typedef struct VhostUserMsg { VhostUserConfig config; VhostUserVringArea area; VhostUserInflight inflight; + VhostUserShared object; } payload; int fds[VHOST_MEMORY_BASELINE_NREGIONS]; @@ -232,6 +244,7 @@ typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len); typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data, uint32_t offset, uint32_t size, uint32_t flags); +typedef int (*vu_get_shared_object_cb) (VuDev *dev, const unsigned char *uuid); typedef struct VuDevIface { /* called by VHOST_USER_GET_FEATURES to get the features bitmask */ @@ -258,6 +271,8 @@ typedef struct VuDevIface { vu_get_config_cb get_config; /* set the config space of the device */ vu_set_config_cb set_config; + /* get virtio shared object from the underlying vhost implementation. */ + vu_get_shared_object_cb get_shared_object; } VuDevIface; typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx); @@ -541,6 +556,44 @@ void vu_set_queue_handler(VuDev *dev, VuVirtq *vq, bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, int size, int offset); +/** + * vu_lookup_shared_object: + * @dev: a VuDev context + * @uuid: UUID of the shared object + * @dmabuf_fd: output dma-buf file descriptor + * + * Lookup for a virtio shared object (i.e., dma-buf fd) associated with the + * received UUID. Result, if found, is stored in the dmabuf_fd argument. + * + * Returns: whether the virtio object was found. + */ +bool vu_lookup_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], + int *dmabuf_fd); + +/** + * vu_add_shared_object: + * @dev: a VuDev context + * @uuid: UUID of the shared object + * + * Registers this back-end as the exporter for the object associated with + * the received UUID. + * + * Returns: TRUE on success, FALSE on failure. + */ +bool vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]); + +/** + * vu_rm_shared_object: + * @dev: a VuDev context + * @uuid: UUID of the shared object + * + * Removes a shared object entry (i.e., back-end entry) associated with the + * received UUID key from the hash table. + * + * Returns: TRUE on success, FALSE on failure. + */ +bool vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]); + /** * vu_queue_set_notification: * @dev: a VuDev context