From 04a8f72e877b9a912ffd7afa84209909577d1d96 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Wed, 25 Apr 2018 14:46:47 +0100 Subject: [PATCH 1/3] xen/hvm: correct reporting of modified memory under physmap during migration When global_log_dirty is enabled VRAM modification tracking never worked correctly. The address that is passed to xen_hvm_modified_memory() is not the effective PFN but RAM block address which is not the same for VRAM. We need to make a translation for this address into PFN using physmap. Since there is no way to access physmap properly inside xen_hvm_modified_memory() let's make it a global structure. Signed-off-by: Igor Druzhinin Acked-by: Anthony PERARD Signed-off-by: Stefano Stabellini --- hw/i386/xen/xen-hvm.c | 37 ++++++++++++++++++----------------- hw/i386/xen/xen-mapcache.c | 2 +- include/sysemu/xen-mapcache.h | 5 ++--- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 6ffa3c22cc..2afab6573b 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -86,6 +86,8 @@ typedef struct XenPhysmap { QLIST_ENTRY(XenPhysmap) list; } XenPhysmap; +static QLIST_HEAD(, XenPhysmap) xen_physmap; + typedef struct XenIOState { ioservid_t ioservid; shared_iopage_t *shared_page; @@ -107,7 +109,6 @@ typedef struct XenIOState { MemoryListener memory_listener; MemoryListener io_listener; DeviceListener device_listener; - QLIST_HEAD(, XenPhysmap) physmap; hwaddr free_phys_offset; const XenPhysmap *log_for_dirtybit; @@ -274,14 +275,13 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr, g_free(pfn_list); } -static XenPhysmap *get_physmapping(XenIOState *state, - hwaddr start_addr, ram_addr_t size) +static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size) { XenPhysmap *physmap = NULL; start_addr &= TARGET_PAGE_MASK; - QLIST_FOREACH(physmap, &state->physmap, list) { + QLIST_FOREACH(physmap, &xen_physmap, list) { if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) { return physmap; } @@ -289,23 +289,21 @@ static XenPhysmap *get_physmapping(XenIOState *state, return NULL; } -#ifdef XEN_COMPAT_PHYSMAP -static hwaddr xen_phys_offset_to_gaddr(hwaddr start_addr, - ram_addr_t size, void *opaque) +static hwaddr xen_phys_offset_to_gaddr(hwaddr phys_offset, ram_addr_t size) { - hwaddr addr = start_addr & TARGET_PAGE_MASK; - XenIOState *xen_io_state = opaque; + hwaddr addr = phys_offset & TARGET_PAGE_MASK; XenPhysmap *physmap = NULL; - QLIST_FOREACH(physmap, &xen_io_state->physmap, list) { + QLIST_FOREACH(physmap, &xen_physmap, list) { if (range_covers_byte(physmap->phys_offset, physmap->size, addr)) { - return physmap->start_addr; + return physmap->start_addr + (phys_offset - physmap->phys_offset); } } - return start_addr; + return phys_offset; } +#ifdef XEN_COMPAT_PHYSMAP static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap) { char path[80], value[17]; @@ -355,7 +353,7 @@ static int xen_add_to_physmap(XenIOState *state, hwaddr phys_offset = memory_region_get_ram_addr(mr); const char *mr_name; - if (get_physmapping(state, start_addr, size)) { + if (get_physmapping(start_addr, size)) { return 0; } if (size <= 0) { @@ -384,7 +382,7 @@ go_physmap: physmap->name = mr_name; physmap->phys_offset = phys_offset; - QLIST_INSERT_HEAD(&state->physmap, physmap, list); + QLIST_INSERT_HEAD(&xen_physmap, physmap, list); if (runstate_check(RUN_STATE_INMIGRATE)) { /* Now when we have a physmap entry we can replace a dummy mapping with @@ -428,7 +426,7 @@ static int xen_remove_from_physmap(XenIOState *state, XenPhysmap *physmap = NULL; hwaddr phys_offset = 0; - physmap = get_physmapping(state, start_addr, size); + physmap = get_physmapping(start_addr, size); if (physmap == NULL) { return -1; } @@ -597,7 +595,7 @@ static void xen_sync_dirty_bitmap(XenIOState *state, int rc, i, j; const XenPhysmap *physmap = NULL; - physmap = get_physmapping(state, start_addr, size); + physmap = get_physmapping(start_addr, size); if (physmap == NULL) { /* not handled */ return; @@ -1222,7 +1220,7 @@ static void xen_read_physmap(XenIOState *state) xen_domid, entries[i]); physmap->name = xs_read(state->xenstore, 0, path, &len); - QLIST_INSERT_HEAD(&state->physmap, physmap, list); + QLIST_INSERT_HEAD(&xen_physmap, physmap, list); } free(entries); } @@ -1374,7 +1372,6 @@ void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory) qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); state->memory_listener = xen_memory_listener; - QLIST_INIT(&state->physmap); memory_listener_register(&state->memory_listener, &address_space_memory); state->log_for_dirtybit = NULL; @@ -1390,6 +1387,8 @@ void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory) goto err; } xen_be_register_common(); + + QLIST_INIT(&xen_physmap); xen_read_physmap(state); /* Disable ACPI build because Xen handles it */ @@ -1461,6 +1460,8 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) int rc; ram_addr_t start_pfn, nb_pages; + start = xen_phys_offset_to_gaddr(start, length); + if (length == 0) { length = TARGET_PAGE_SIZE; } diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c index efa35dc6e0..12fd932284 100644 --- a/hw/i386/xen/xen-mapcache.c +++ b/hw/i386/xen/xen-mapcache.c @@ -319,7 +319,7 @@ tryagain: mapcache->last_entry = NULL; #ifdef XEN_COMPAT_PHYSMAP if (!translated && mapcache->phys_offset_to_gaddr) { - phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size, mapcache->opaque); + phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size); translated = true; goto tryagain; } diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h index bd4d49e0a4..a03e2f1878 100644 --- a/include/sysemu/xen-mapcache.h +++ b/include/sysemu/xen-mapcache.h @@ -9,9 +9,8 @@ #ifndef XEN_MAPCACHE_H #define XEN_MAPCACHE_H -typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr start_addr, - ram_addr_t size, - void *opaque); +typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset, + ram_addr_t size); #ifdef CONFIG_XEN void xen_map_cache_init(phys_offset_to_gaddr_t f, From d3c49ebbe26b48615e14b8baa88a59cd33761ea6 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 15 May 2018 17:40:53 +0100 Subject: [PATCH 2/3] xen-hvm: try to use xenforeignmemory_map_resource() to map ioreq pages Xen 4.11 has a new API to directly map guest resources. Among the resources that can be mapped using this API are ioreq pages. This patch modifies QEMU to attempt to use the new API should it exist, falling back to the previous mechanism if it is unavailable. Signed-off-by: Paul Durrant Reviewed-by: Anthony PERARD Signed-off-by: Stefano Stabellini --- configure | 5 +++ hw/i386/xen/trace-events | 1 + hw/i386/xen/xen-hvm.c | 70 ++++++++++++++++++++++++++++--------- include/hw/xen/xen_common.h | 16 +++++++++ 4 files changed, 75 insertions(+), 17 deletions(-) diff --git a/configure b/configure index a6a4616c3e..be6edc7b81 100755 --- a/configure +++ b/configure @@ -2231,12 +2231,17 @@ EOF #undef XC_WANT_COMPAT_DEVICEMODEL_API #define __XEN_TOOLS__ #include +#include int main(void) { xendevicemodel_handle *xd; + xenforeignmemory_handle *xfmem; xd = xendevicemodel_open(0, 0); xendevicemodel_pin_memory_cacheattr(xd, 0, 0, 0, 0); + xfmem = xenforeignmemory_open(0, 0); + xenforeignmemory_map_resource(xfmem, 0, 0, 0, 0, 0, NULL, 0, 0); + return 0; } EOF diff --git a/hw/i386/xen/trace-events b/hw/i386/xen/trace-events index 8dab7bcfe0..38616b698f 100644 --- a/hw/i386/xen/trace-events +++ b/hw/i386/xen/trace-events @@ -15,6 +15,7 @@ cpu_ioreq_pio(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64 cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio read reg data=0x%"PRIx64" port=0x%"PRIx64" size=%d" cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=0x%"PRIx64" port=0x%"PRIx64" size=%d" cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" +xen_map_resource_ioreq(uint32_t id, void *addr) "id: %u addr: %p" # xen-mapcache.c xen_map_cache(uint64_t phys_addr) "want 0x%"PRIx64 diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 2afab6573b..54f99abfea 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -1237,13 +1237,39 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data) static int xen_map_ioreq_server(XenIOState *state) { + void *addr = NULL; + xenforeignmemory_resource_handle *fres; xen_pfn_t ioreq_pfn; xen_pfn_t bufioreq_pfn; evtchn_port_t bufioreq_evtchn; int rc; + /* + * Attempt to map using the resource API and fall back to normal + * foreign mapping if this is not supported. + */ + QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0); + QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1); + fres = xenforeignmemory_map_resource(xen_fmem, xen_domid, + XENMEM_resource_ioreq_server, + state->ioservid, 0, 2, + &addr, + PROT_READ | PROT_WRITE, 0); + if (fres != NULL) { + trace_xen_map_resource_ioreq(state->ioservid, addr); + state->buffered_io_page = addr; + state->shared_page = addr + TARGET_PAGE_SIZE; + } else if (errno != EOPNOTSUPP) { + error_report("failed to map ioreq server resources: error %d handle=%p", + errno, xen_xc); + return -1; + } + rc = xen_get_ioreq_server_info(xen_domid, state->ioservid, - &ioreq_pfn, &bufioreq_pfn, + (state->shared_page == NULL) ? + &ioreq_pfn : NULL, + (state->buffered_io_page == NULL) ? + &bufioreq_pfn : NULL, &bufioreq_evtchn); if (rc < 0) { error_report("failed to get ioreq server info: error %d handle=%p", @@ -1251,26 +1277,36 @@ static int xen_map_ioreq_server(XenIOState *state) return rc; } - DPRINTF("shared page at pfn %lx\n", ioreq_pfn); - DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn); - DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn); - - state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid, - PROT_READ | PROT_WRITE, - 1, &ioreq_pfn, NULL); if (state->shared_page == NULL) { - error_report("map shared IO page returned error %d handle=%p", - errno, xen_xc); + DPRINTF("shared page at pfn %lx\n", ioreq_pfn); + + state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid, + PROT_READ | PROT_WRITE, + 1, &ioreq_pfn, NULL); + if (state->shared_page == NULL) { + error_report("map shared IO page returned error %d handle=%p", + errno, xen_xc); + } + } + + if (state->buffered_io_page == NULL) { + DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn); + + state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid, + PROT_READ | PROT_WRITE, + 1, &bufioreq_pfn, + NULL); + if (state->buffered_io_page == NULL) { + error_report("map buffered IO page returned error %d", errno); + return -1; + } + } + + if (state->shared_page == NULL || state->buffered_io_page == NULL) { return -1; } - state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid, - PROT_READ | PROT_WRITE, - 1, &bufioreq_pfn, NULL); - if (state->buffered_io_page == NULL) { - error_report("map buffered IO page returned error %d", errno); - return -1; - } + DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn); state->bufioreq_remote_port = bufioreq_evtchn; diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index bbf207dcef..93f631e5bf 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -119,6 +119,22 @@ static inline int xendevicemodel_pin_memory_cacheattr( return xc_domain_pin_memory_cacheattr(xen_xc, domid, start, end, type); } +typedef void xenforeignmemory_resource_handle; + +#define XENMEM_resource_ioreq_server 0 + +#define XENMEM_resource_ioreq_server_frame_bufioreq 0 +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) + +static inline xenforeignmemory_resource_handle *xenforeignmemory_map_resource( + xenforeignmemory_handle *fmem, domid_t domid, unsigned int type, + unsigned int id, unsigned long frame, unsigned long nr_frames, + void **paddr, int prot, int flags) +{ + errno = EOPNOTSUPP; + return NULL; +} + #endif /* CONFIG_XEN_CTRL_INTERFACE_VERSION < 41100 */ #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41000 From dfb6578d69d60e464be36dafed9741dcfd73d2cf Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Thu, 31 May 2018 11:01:13 -0700 Subject: [PATCH 3/3] xen-hvm: stop faking I/O to access PCI config space This patch removes the current hackery where IOREQ_TYPE_PCI_CONFIG requests are handled by faking PIO to 0xcf8 and 0xcfc and replaces it with direct calls to pci_host_config_read/write_common(). Doing so necessitates mapping BDFs to PCIDevices but maintaining a simple QLIST in xen_device_realize/unrealize() will suffice. NOTE: whilst config space accesses are currently limited to PCI_CONFIG_SPACE_SIZE, this patch paves the way to increasing the limit to PCIE_CONFIG_SPACE_SIZE when Xen gains the ability to emulate MCFG table accesses. Signed-off-by: Paul Durrant Reviewed-by: Anthony PERARD Signed-off-by: Stefano Stabellini --- hw/i386/xen/trace-events | 2 + hw/i386/xen/xen-hvm.c | 102 +++++++++++++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 20 deletions(-) diff --git a/hw/i386/xen/trace-events b/hw/i386/xen/trace-events index 38616b698f..8a9077cd4e 100644 --- a/hw/i386/xen/trace-events +++ b/hw/i386/xen/trace-events @@ -16,6 +16,8 @@ cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) " cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=0x%"PRIx64" port=0x%"PRIx64" size=%d" cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" xen_map_resource_ioreq(uint32_t id, void *addr) "id: %u addr: %p" +cpu_ioreq_config_read(void *req, uint32_t sbdf, uint32_t reg, uint32_t size, uint32_t data) "I/O=%p sbdf=0x%x reg=%u size=%u data=0x%x" +cpu_ioreq_config_write(void *req, uint32_t sbdf, uint32_t reg, uint32_t size, uint32_t data) "I/O=%p sbdf=0x%x reg=%u size=%u data=0x%x" # xen-mapcache.c xen_map_cache(uint64_t phys_addr) "want 0x%"PRIx64 diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 54f99abfea..935a3676c8 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -12,6 +12,7 @@ #include "cpu.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" #include "hw/i386/pc.h" #include "hw/i386/apic-msidef.h" #include "hw/xen/xen_common.h" @@ -88,6 +89,12 @@ typedef struct XenPhysmap { static QLIST_HEAD(, XenPhysmap) xen_physmap; +typedef struct XenPciDevice { + PCIDevice *pci_dev; + uint32_t sbdf; + QLIST_ENTRY(XenPciDevice) entry; +} XenPciDevice; + typedef struct XenIOState { ioservid_t ioservid; shared_iopage_t *shared_page; @@ -108,6 +115,7 @@ typedef struct XenIOState { struct xs_handle *xenstore; MemoryListener memory_listener; MemoryListener io_listener; + QLIST_HEAD(, XenPciDevice) dev_list; DeviceListener device_listener; hwaddr free_phys_offset; const XenPhysmap *log_for_dirtybit; @@ -568,6 +576,12 @@ static void xen_device_realize(DeviceListener *listener, if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { PCIDevice *pci_dev = PCI_DEVICE(dev); + XenPciDevice *xendev = g_new(XenPciDevice, 1); + + xendev->pci_dev = pci_dev; + xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev), + pci_dev->devfn); + QLIST_INSERT_HEAD(&state->dev_list, xendev, entry); xen_map_pcidev(xen_domid, state->ioservid, pci_dev); } @@ -580,8 +594,17 @@ static void xen_device_unrealize(DeviceListener *listener, if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { PCIDevice *pci_dev = PCI_DEVICE(dev); + XenPciDevice *xendev, *next; xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev); + + QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) { + if (xendev->pci_dev == pci_dev) { + QLIST_REMOVE(xendev, entry); + g_free(xendev); + break; + } + } } } @@ -902,6 +925,62 @@ static void cpu_ioreq_move(ioreq_t *req) } } +static void cpu_ioreq_config(XenIOState *state, ioreq_t *req) +{ + uint32_t sbdf = req->addr >> 32; + uint32_t reg = req->addr; + XenPciDevice *xendev; + + if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) && + req->size != sizeof(uint32_t)) { + hw_error("PCI config access: bad size (%u)", req->size); + } + + if (req->count != 1) { + hw_error("PCI config access: bad count (%u)", req->count); + } + + QLIST_FOREACH(xendev, &state->dev_list, entry) { + if (xendev->sbdf != sbdf) { + continue; + } + + if (!req->data_is_ptr) { + if (req->dir == IOREQ_READ) { + req->data = pci_host_config_read_common( + xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, + req->size); + trace_cpu_ioreq_config_read(req, xendev->sbdf, reg, + req->size, req->data); + } else if (req->dir == IOREQ_WRITE) { + trace_cpu_ioreq_config_write(req, xendev->sbdf, reg, + req->size, req->data); + pci_host_config_write_common( + xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, + req->data, req->size); + } + } else { + uint32_t tmp; + + if (req->dir == IOREQ_READ) { + tmp = pci_host_config_read_common( + xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, + req->size); + trace_cpu_ioreq_config_read(req, xendev->sbdf, reg, + req->size, tmp); + write_phys_req_item(req->data, req, 0, &tmp); + } else if (req->dir == IOREQ_WRITE) { + read_phys_req_item(req->data, req, 0, &tmp); + trace_cpu_ioreq_config_write(req, xendev->sbdf, reg, + req->size, tmp); + pci_host_config_write_common( + xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, + tmp, req->size); + } + } + } +} + static void regs_to_cpu(vmware_regs_t *vmport_regs, ioreq_t *req) { X86CPU *cpu; @@ -974,27 +1053,9 @@ static void handle_ioreq(XenIOState *state, ioreq_t *req) case IOREQ_TYPE_INVALIDATE: xen_invalidate_map_cache(); break; - case IOREQ_TYPE_PCI_CONFIG: { - uint32_t sbdf = req->addr >> 32; - uint32_t val; - - /* Fake a write to port 0xCF8 so that - * the config space access will target the - * correct device model. - */ - val = (1u << 31) | - ((req->addr & 0x0f00) << 16) | - ((sbdf & 0xffff) << 8) | - (req->addr & 0xfc); - do_outp(0xcf8, 4, val); - - /* Now issue the config space access via - * port 0xCFC - */ - req->addr = 0xcfc | (req->addr & 0x03); - cpu_ioreq_pio(req); + case IOREQ_TYPE_PCI_CONFIG: + cpu_ioreq_config(state, req); break; - } default: hw_error("Invalid ioreq type 0x%x\n", req->type); } @@ -1415,6 +1476,7 @@ void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory) memory_listener_register(&state->io_listener, &address_space_io); state->device_listener = xen_device_listener; + QLIST_INIT(&state->dev_list); device_listener_register(&state->device_listener); /* Initialize backend core & drivers */