From b2bbadc63c5e907bbb7a19244929ef4c028f3d01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 27 Mar 2024 04:05:14 +0100 Subject: [PATCH 1/3] hw/xen: detect when running inside stubdomain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce global xen_is_stubdomain variable when qemu is running inside a stubdomain instead of dom0. This will be relevant for subsequent patches, as few things like accessing PCI config space need to be done differently. Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Anthony PERARD Message-Id: Signed-off-by: Anthony PERARD --- hw/i386/xen/xen-hvm.c | 22 ++++++++++++++++++++++ include/hw/xen/xen.h | 1 + system/globals.c | 1 + 3 files changed, 24 insertions(+) diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 006d219ad5..4f6446600c 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -584,6 +584,26 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data) xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0); } +static bool xen_check_stubdomain(struct xs_handle *xsh) +{ + char *dm_path = g_strdup_printf( + "/local/domain/%d/image/device-model-domid", xen_domid); + char *val; + int32_t dm_domid; + bool is_stubdom = false; + + val = xs_read(xsh, 0, dm_path, NULL); + if (val) { + if (sscanf(val, "%d", &dm_domid) == 1) { + is_stubdom = dm_domid != 0; + } + free(val); + } + + g_free(dm_path); + return is_stubdom; +} + void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) { MachineState *ms = MACHINE(pcms); @@ -596,6 +616,8 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) xen_register_ioreq(state, max_cpus, &xen_memory_listener); + xen_is_stubdomain = xen_check_stubdomain(state->xenstore); + QLIST_INIT(&xen_physmap); xen_read_physmap(state); diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h index 37ecc91fc3..ecb89ecfc1 100644 --- a/include/hw/xen/xen.h +++ b/include/hw/xen/xen.h @@ -36,6 +36,7 @@ enum xen_mode { extern uint32_t xen_domid; extern enum xen_mode xen_mode; extern bool xen_domid_restrict; +extern bool xen_is_stubdomain; int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); int xen_set_pci_link_route(uint8_t link, uint8_t irq); diff --git a/system/globals.c b/system/globals.c index e353584201..d602a04fa2 100644 --- a/system/globals.c +++ b/system/globals.c @@ -60,6 +60,7 @@ bool qemu_uuid_set; uint32_t xen_domid; enum xen_mode xen_mode = XEN_DISABLED; bool xen_domid_restrict; +bool xen_is_stubdomain; struct evtchn_backend_ops *xen_evtchn_ops; struct gnttab_backend_ops *xen_gnttab_ops; struct foreignmem_backend_ops *xen_foreignmem_ops; From 196fb962baeff16342279111cc927a153415f85f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 27 Mar 2024 04:05:15 +0100 Subject: [PATCH 2/3] xen: fix stubdom PCI addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running in a stubdomain, the config space access via sysfs needs to use BDF as seen inside stubdomain (connected via xen-pcifront), which is different from the real BDF. For other purposes (hypercall parameters etc), the real BDF needs to be used. Get the in-stubdomain BDF by looking up relevant PV PCI xenstore entries. Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Anthony PERARD Message-Id: <35049e99da634a74578a1ff2cb3ae4cc436ede33.1711506237.git-series.marmarek@invisiblethingslab.com> Signed-off-by: Anthony PERARD --- hw/xen/xen-host-pci-device.c | 76 +++++++++++++++++++++++++++++++++++- hw/xen/xen-host-pci-device.h | 6 +++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c index 8c6e9a1716..eaf32f2710 100644 --- a/hw/xen/xen-host-pci-device.c +++ b/hw/xen/xen-host-pci-device.c @@ -9,6 +9,8 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/cutils.h" +#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen-bus-helper.h" #include "xen-host-pci-device.h" #define XEN_HOST_PCI_MAX_EXT_CAP \ @@ -33,13 +35,73 @@ #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ #define IORESOURCE_MEM_64 0x00100000 +/* + * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF + * Passthough (stubdom) accesses are through PV frontend PCI device. Those + * either have a BDF identical to the backend's BDF (xen-backend.passthrough=1) + * or a local virtual BDF (xen-backend.passthrough=0) + * + * We are always given the backend's BDF and need to lookup the appropriate + * local BDF for sysfs access. + */ +static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp) +{ + unsigned int num_devs, len, i; + unsigned int domain, bus, dev, func; + char *be_path = NULL; + char path[16]; + + be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len); + if (!be_path) { + error_setg(errp, "Failed to read device/pci/0/backend"); + goto out; + } + + if (xs_node_scanf(xenstore, 0, be_path, "num_devs", NULL, + "%d", &num_devs) != 1) { + error_setg(errp, "Failed to read or parse %s/num_devs", be_path); + goto out; + } + + for (i = 0; i < num_devs; i++) { + snprintf(path, sizeof(path), "dev-%d", i); + if (xs_node_scanf(xenstore, 0, be_path, path, NULL, + "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) { + error_setg(errp, "Failed to read or parse %s/%s", be_path, path); + goto out; + } + if (domain != d->domain || + bus != d->bus || + dev != d->dev || + func != d->func) + continue; + snprintf(path, sizeof(path), "vdev-%d", i); + if (xs_node_scanf(xenstore, 0, be_path, path, NULL, + "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) { + error_setg(errp, "Failed to read or parse %s/%s", be_path, path); + goto out; + } + d->local_domain = domain; + d->local_bus = bus; + d->local_dev = dev; + d->local_func = func; + goto out; + } + error_setg(errp, "Failed to find PCI device %x:%x:%x.%x in xenstore", + d->domain, d->bus, d->dev, d->func); + +out: + free(be_path); +} + static void xen_host_pci_sysfs_path(const XenHostPCIDevice *d, const char *name, char *buf, ssize_t size) { int rc; rc = snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s", - d->domain, d->bus, d->dev, d->func, name); + d->local_domain, d->local_bus, d->local_dev, d->local_func, + name); assert(rc >= 0 && rc < size); } @@ -342,6 +404,18 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, d->dev = dev; d->func = func; + if (xen_is_stubdomain) { + xen_host_pci_fill_local_addr(d, errp); + if (*errp) { + goto error; + } + } else { + d->local_domain = d->domain; + d->local_bus = d->bus; + d->local_dev = d->dev; + d->local_func = d->func; + } + xen_host_pci_config_open(d, errp); if (*errp) { goto error; diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h index 4d8d34ecb0..270dcb27f7 100644 --- a/hw/xen/xen-host-pci-device.h +++ b/hw/xen/xen-host-pci-device.h @@ -23,6 +23,12 @@ typedef struct XenHostPCIDevice { uint8_t dev; uint8_t func; + /* different from the above in case of stubdomain */ + uint16_t local_domain; + uint8_t local_bus; + uint8_t local_dev; + uint8_t local_func; + uint16_t vendor_id; uint16_t device_id; uint32_t class_code; From 410b4d560dfa3b38a11ad19cf00180238651d9b7 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 4 Apr 2024 15:08:33 +0100 Subject: [PATCH 3/3] xen-hvm: Avoid livelock while handling buffered ioreqs A malicious or buggy guest may generated buffered ioreqs faster than QEMU can process them in handle_buffered_iopage(). The result is a livelock - QEMU continuously processes ioreqs on the main thread without iterating through the main loop which prevents handling other events, processing timers, etc. Without QEMU handling other events, it often results in the guest becoming unsable and makes it difficult to stop the source of buffered ioreqs. To avoid this, if we process a full page of buffered ioreqs, stop and reschedule an immediate timer to continue processing them. This lets QEMU go back to the main loop and catch up. Signed-off-by: Ross Lagerwall Reviewed-by: Paul Durrant Message-Id: <20240404140833.1557953-1-ross.lagerwall@citrix.com> Signed-off-by: Anthony PERARD --- hw/xen/xen-hvm-common.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index b8ace1c368..3a9d6f981b 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -475,11 +475,11 @@ static void handle_ioreq(XenIOState *state, ioreq_t *req) } } -static bool handle_buffered_iopage(XenIOState *state) +static unsigned int handle_buffered_iopage(XenIOState *state) { buffered_iopage_t *buf_page = state->buffered_io_page; buf_ioreq_t *buf_req = NULL; - bool handled_ioreq = false; + unsigned int handled = 0; ioreq_t req; int qw; @@ -492,7 +492,7 @@ static bool handle_buffered_iopage(XenIOState *state) req.count = 1; req.dir = IOREQ_WRITE; - for (;;) { + do { uint32_t rdptr = buf_page->read_pointer, wrptr; xen_rmb(); @@ -533,22 +533,30 @@ static bool handle_buffered_iopage(XenIOState *state) assert(!req.data_is_ptr); qatomic_add(&buf_page->read_pointer, qw + 1); - handled_ioreq = true; - } + handled += qw + 1; + } while (handled < IOREQ_BUFFER_SLOT_NUM); - return handled_ioreq; + return handled; } static void handle_buffered_io(void *opaque) { + unsigned int handled; XenIOState *state = opaque; - if (handle_buffered_iopage(state)) { + handled = handle_buffered_iopage(state); + if (handled >= IOREQ_BUFFER_SLOT_NUM) { + /* We handled a full page of ioreqs. Schedule a timer to continue + * processing while giving other stuff a chance to run. + */ timer_mod(state->buffered_io_timer, - BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); - } else { + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); + } else if (handled == 0) { timer_del(state->buffered_io_timer); qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port); + } else { + timer_mod(state->buffered_io_timer, + BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); } }