From d451008e0fdf7fb817c791397e7999d5f3687e58 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:42 -0600 Subject: [PATCH 01/19] vfio/pci: Cleanup RTL8168 quirk and tracing There's quite a bit of cleanup that can be done to the RTL8168 quirk, as well as the tracing to prevent a spew of uninteresting accesses for anything else the driver might choose to use the window registers for besides the MSI-X table. There should be no functional change, but it's now possible to get compact and useful traces by enabling vfio_rtl8168_quirk*, ex: vfio_rtl8168_quirk_write 0000:04:00.0 [address]: 0x1f000 vfio_rtl8168_quirk_read 0000:04:00.0 [address]: 0x8001f000 vfio_rtl8168_quirk_read 0000:04:00.0 [data]: 0xfee0100c vfio_rtl8168_quirk_write 0000:04:00.0 [address]: 0x1f004 vfio_rtl8168_quirk_read 0000:04:00.0 [address]: 0x8001f004 vfio_rtl8168_quirk_read 0000:04:00.0 [data]: 0x0 vfio_rtl8168_quirk_write 0000:04:00.0 [address]: 0x1f008 vfio_rtl8168_quirk_read 0000:04:00.0 [address]: 0x8001f008 vfio_rtl8168_quirk_read 0000:04:00.0 [data]: 0x49b1 vfio_rtl8168_quirk_write 0000:04:00.0 [address]: 0x1f00c vfio_rtl8168_quirk_read 0000:04:00.0 [address]: 0x8001f00c vfio_rtl8168_quirk_read 0000:04:00.0 [data]: 0x0 Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 88 ++++++++++++++++++++++----------------------------- trace-events | 10 +++--- 2 files changed, 41 insertions(+), 57 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 73d34b9b2e..77f92f1e97 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1508,44 +1508,29 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, { VFIOQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; + uint64_t val = 0; + + if (!quirk->data.flags) { /* Non-MSI-X table access */ + return vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + 0x70, size); + } switch (addr) { case 4: /* address */ - if (quirk->data.flags) { - trace_vfio_rtl8168_window_quirk_read_fake( - memory_region_name(&quirk->mem), - vdev->vbasedev.name); - - return quirk->data.address_match ^ 0x80000000U; - } + val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */ break; case 0: /* data */ - if (quirk->data.flags) { - uint64_t val; - - trace_vfio_rtl8168_window_quirk_read_table( - memory_region_name(&quirk->mem), - vdev->vbasedev.name); - - if (!(vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { - return 0; - } - + if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, - (hwaddr)(quirk->data.address_match - & 0xfff), - &val, - size, - MEMTXATTRS_UNSPECIFIED); - return val; + (hwaddr)(quirk->data.address_match & 0xfff), + &val, size, MEMTXATTRS_UNSPECIFIED); } + break; } - trace_vfio_rtl8168_window_quirk_read_direct(memory_region_name(&quirk->mem), - vdev->vbasedev.name); - - return vfio_region_read(&vdev->bars[quirk->data.bar].region, - addr + 0x70, size); + trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name, + addr ? "address" : "data", val); + return val; } static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, @@ -1556,36 +1541,36 @@ static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, switch (addr) { case 4: /* address */ - if ((data & 0x7fff0000) == 0x10000) { - if (data & 0x80000000U && - vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { - - trace_vfio_rtl8168_window_quirk_write_table( - memory_region_name(&quirk->mem), - vdev->vbasedev.name); - - memory_region_dispatch_write(&vdev->pdev.msix_table_mmio, - (hwaddr)(data & 0xfff), - (uint64_t)quirk->data.address_mask, - size, MEMTXATTRS_UNSPECIFIED); - } - - quirk->data.flags = 1; + if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */ + quirk->data.flags = 1; /* Activate reads */ quirk->data.address_match = data; - return; + trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data); + + if (data & 0x80000000U) { /* Do write */ + if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { + hwaddr offset = data & 0xfff; + uint64_t val = quirk->data.address_mask; + + trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name, + (uint16_t)offset, val); + + /* Write to the proper guest MSI-X table instead */ + memory_region_dispatch_write(&vdev->pdev.msix_table_mmio, + offset, val, size, + MEMTXATTRS_UNSPECIFIED); + } + return; /* Do not write guest MSI-X data to hardware */ + } + } else { + quirk->data.flags = 0; /* De-activate reads, non-MSI-X */ } - quirk->data.flags = 0; break; case 0: /* data */ quirk->data.address_mask = data; break; } - trace_vfio_rtl8168_window_quirk_write_direct( - memory_region_name(&quirk->mem), - vdev->vbasedev.name); - vfio_region_write(&vdev->bars[quirk->data.bar].region, addr + 0x70, data, size); } @@ -1622,8 +1607,9 @@ static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_rtl8168_bar2_window_quirk(vdev->vbasedev.name); + trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name); } + /* * Trap the BAR2 MMIO window to config space as well. */ diff --git a/trace-events b/trace-events index 88a2f141e5..eb0034280e 100644 --- a/trace-events +++ b/trace-events @@ -1554,12 +1554,10 @@ vfio_ati_3c3_quirk_read(uint64_t data) " (0x3c3, 1) = 0x%"PRIx64 vfio_vga_probe_ati_3c3_quirk(const char *name) "Enabled ATI/AMD quirk 0x3c3 BAR4for device %s" vfio_probe_ati_bar4_window_quirk(const char *name) "Enabled ATI/AMD BAR4 window quirk for device %s" #issue with ) -vfio_rtl8168_window_quirk_read_fake(const char *region_name, const char *name) "%s fake read(%s" -vfio_rtl8168_window_quirk_read_table(const char *region_name, const char *name) "%s MSI-X table read(%s" -vfio_rtl8168_window_quirk_read_direct(const char *region_name, const char *name) "%s direct read(%s" -vfio_rtl8168_window_quirk_write_table(const char *region_name, const char *name) "%s MSI-X table write(%s" -vfio_rtl8168_window_quirk_write_direct(const char *region_name, const char *name) "%s direct write(%s" -vfio_probe_rtl8168_bar2_window_quirk(const char *name) "Enabled RTL8168 BAR2 window quirk for device %s" +vfio_rtl8168_quirk_read(const char *name, const char *type, uint64_t val) "%s [%s]: 0x%"PRIx64 +vfio_rtl8168_quirk_write(const char *name, uint64_t val) "%s [address]: 0x%"PRIx64 +vfio_rtl8168_quirk_msix(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table write[0x%x]: 0x%"PRIx64 +vfio_rtl8168_quirk_enable(const char *name) "%s" vfio_probe_ati_bar2_4000_quirk(const char *name) "Enabled ATI/AMD BAR2 0x4000 quirk for device %s" vfio_nvidia_3d0_quirk_read(int size, uint64_t data) " (0x3d0, %d) = 0x%"PRIx64 vfio_nvidia_3d0_quirk_write(uint64_t data, int size) " (0x3d0, 0x%"PRIx64", %d)" From b5bd049fa907bccc4600ad1855e1c9c0e62f0be3 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:43 -0600 Subject: [PATCH 02/19] vfio/pci: Cleanup vfio_early_setup_msix() error path With the addition of the Chelsio quirk we have an error path out of vfio_early_setup_msix() that doesn't free the allocated VFIOMSIXInfo struct. This doesn't introduce a leak as it still gets freed in the vfio_put_device() path, but it's complicated and sloppy to rely on that. Restructure to free the allocated data on error and only link it into the vdev on success. Signed-off-by: Alex Williamson Reported-by: Laszlo Ersek Reviewed-by: Laszlo Ersek --- hw/vfio/pci.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 77f92f1e97..839751d039 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2203,6 +2203,7 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) uint16_t ctrl; uint32_t table, pba; int fd = vdev->vbasedev.fd; + VFIOMSIXInfo *msix; pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); if (!pos) { @@ -2228,21 +2229,19 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) table = le32_to_cpu(table); pba = le32_to_cpu(pba); - vdev->msix = g_malloc0(sizeof(*(vdev->msix))); - vdev->msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; - vdev->msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; - vdev->msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; - vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; - vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; + msix = g_malloc0(sizeof(*msix)); + msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; + msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; + msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; + msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; + msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; /* * Test the size of the pba_offset variable and catch if it extends outside * of the specified BAR. If it is the case, we need to apply a hardware * specific quirk if the device is known or we have a broken configuration. */ - if (vdev->msix->pba_offset >= - vdev->bars[vdev->msix->pba_bar].region.size) { - + if (msix->pba_offset >= vdev->bars[msix->pba_bar].region.size) { PCIDevice *pdev = &vdev->pdev; uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID); @@ -2254,18 +2253,18 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) * is 0x1000, so we hard code that here. */ if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) { - vdev->msix->pba_offset = 0x1000; + msix->pba_offset = 0x1000; } else { error_report("vfio: Hardware reports invalid configuration, " "MSIX PBA outside of specified BAR"); + g_free(msix); return -EINVAL; } } - trace_vfio_early_setup_msix(vdev->vbasedev.name, pos, - vdev->msix->table_bar, - vdev->msix->table_offset, - vdev->msix->entries); + trace_vfio_early_setup_msix(vdev->vbasedev.name, pos, msix->table_bar, + msix->table_offset, msix->entries); + vdev->msix = msix; return 0; } From 870cb6f104e5d3364288d894746dd88fe9ac59cb Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:43 -0600 Subject: [PATCH 03/19] vfio/pci: Rename INTx functions for easier tracing Rename functions and tracing callbacks so that we can trace vfio_intx* to see all the INTx related activities. Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 48 ++++++++++++++++++++++++------------------------ trace-events | 14 +++++++------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 839751d039..e1e62d09db 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -247,7 +247,7 @@ static void vfio_intx_interrupt(void *opaque) } } -static void vfio_eoi(VFIODevice *vbasedev) +static void vfio_intx_eoi(VFIODevice *vbasedev) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); @@ -255,14 +255,14 @@ static void vfio_eoi(VFIODevice *vbasedev) return; } - trace_vfio_eoi(vbasedev->name); + trace_vfio_intx_eoi(vbasedev->name); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX); } -static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) +static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM struct kvm_irqfd irqfd = { @@ -324,7 +324,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) vdev->intx.kvm_accel = true; - trace_vfio_enable_intx_kvm(vdev->vbasedev.name); + trace_vfio_intx_enable_kvm(vdev->vbasedev.name); return; @@ -339,7 +339,7 @@ fail: #endif } -static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev) +static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM struct kvm_irqfd irqfd = { @@ -376,11 +376,11 @@ static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev) /* If we've missed an event, let it re-fire through QEMU */ vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); - trace_vfio_disable_intx_kvm(vdev->vbasedev.name); + trace_vfio_intx_disable_kvm(vdev->vbasedev.name); #endif } -static void vfio_update_irq(PCIDevice *pdev) +static void vfio_intx_update(PCIDevice *pdev) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); PCIINTxRoute route; @@ -395,10 +395,10 @@ static void vfio_update_irq(PCIDevice *pdev) return; /* Nothing changed */ } - trace_vfio_update_irq(vdev->vbasedev.name, - vdev->intx.route.irq, route.irq); + trace_vfio_intx_update(vdev->vbasedev.name, + vdev->intx.route.irq, route.irq); - vfio_disable_intx_kvm(vdev); + vfio_intx_disable_kvm(vdev); vdev->intx.route = route; @@ -406,13 +406,13 @@ static void vfio_update_irq(PCIDevice *pdev) return; } - vfio_enable_intx_kvm(vdev); + vfio_intx_enable_kvm(vdev); /* Re-enable the interrupt in cased we missed an EOI */ - vfio_eoi(&vdev->vbasedev); + vfio_intx_eoi(&vdev->vbasedev); } -static int vfio_enable_intx(VFIOPCIDevice *vdev) +static int vfio_intx_enable(VFIOPCIDevice *vdev) { uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); int ret, argsz; @@ -467,21 +467,21 @@ static int vfio_enable_intx(VFIOPCIDevice *vdev) return -errno; } - vfio_enable_intx_kvm(vdev); + vfio_intx_enable_kvm(vdev); vdev->interrupt = VFIO_INT_INTx; - trace_vfio_enable_intx(vdev->vbasedev.name); + trace_vfio_intx_enable(vdev->vbasedev.name); return 0; } -static void vfio_disable_intx(VFIOPCIDevice *vdev) +static void vfio_intx_disable(VFIOPCIDevice *vdev) { int fd; timer_del(vdev->intx.mmap_timer); - vfio_disable_intx_kvm(vdev); + vfio_intx_disable_kvm(vdev); vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -493,7 +493,7 @@ static void vfio_disable_intx(VFIOPCIDevice *vdev) vdev->interrupt = VFIO_INT_NONE; - trace_vfio_disable_intx(vdev->vbasedev.name); + trace_vfio_intx_disable(vdev->vbasedev.name); } /* @@ -876,7 +876,7 @@ static void vfio_disable_msi_common(VFIOPCIDevice *vdev) vdev->nr_vectors = 0; vdev->interrupt = VFIO_INT_NONE; - vfio_enable_intx(vdev); + vfio_intx_enable(vdev); } static void vfio_disable_msix(VFIOPCIDevice *vdev) @@ -2154,7 +2154,7 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) } if (vdev->interrupt == VFIO_INT_INTx) { - vfio_disable_intx(vdev); + vfio_intx_disable(vdev); } } @@ -2777,7 +2777,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) static void vfio_pci_post_reset(VFIOPCIDevice *vdev) { - vfio_enable_intx(vdev); + vfio_intx_enable(vdev); } static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, @@ -3001,7 +3001,7 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev) static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, - .vfio_eoi = vfio_eoi, + .vfio_eoi = vfio_intx_eoi, }; static int vfio_populate_device(VFIOPCIDevice *vdev) @@ -3631,8 +3631,8 @@ static int vfio_initfn(PCIDevice *pdev) if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, vfio_intx_mmap_enable, vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_update_irq); - ret = vfio_enable_intx(vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_intx_update); + ret = vfio_intx_enable(vdev); if (ret) { goto out_teardown; } diff --git a/trace-events b/trace-events index eb0034280e..1290c09ece 100644 --- a/trace-events +++ b/trace-events @@ -1522,14 +1522,14 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x" pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x" -# hw/vfio/vfio-pci.c +# hw/vfio/pci.c vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c" -vfio_eoi(const char *name) " (%s) EOI" -vfio_enable_intx_kvm(const char *name) " (%s) KVM INTx accel enabled" -vfio_disable_intx_kvm(const char *name) " (%s) KVM INTx accel disabled" -vfio_update_irq(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d" -vfio_enable_intx(const char *name) " (%s)" -vfio_disable_intx(const char *name) " (%s)" +vfio_intx_eoi(const char *name) " (%s) EOI" +vfio_intx_enable_kvm(const char *name) " (%s) KVM INTx accel enabled" +vfio_intx_disable_kvm(const char *name) " (%s) KVM INTx accel disabled" +vfio_intx_update(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d" +vfio_intx_enable(const char *name) " (%s)" +vfio_intx_disable(const char *name) " (%s)" vfio_msi_interrupt(const char *name, int index, uint64_t addr, int data) " (%s) vector %d 0x%"PRIx64"/0x%x" vfio_msix_vector_do_use(const char *name, int index) " (%s) vector %d used" vfio_msix_vector_release(const char *name, int index) " (%s) vector %d released" From 0de70dc7bab1cd58d02e86ed27e291843983b13b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:43 -0600 Subject: [PATCH 04/19] vfio/pci: Rename MSI/X functions for easier tracing This allows vfio_msi* tracing. The MSI/X interrupt tracing is also pulled out of #ifdef DEBUG_VFIO to avoid a recompile for tracing this path. A few cycles to read the message is hardly anything if we're already in QEMU. Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 73 ++++++++++++++++++++++++--------------------------- trace-events | 12 ++++----- 2 files changed, 40 insertions(+), 45 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e1e62d09db..e4dee8e925 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -503,33 +503,28 @@ static void vfio_msi_interrupt(void *opaque) { VFIOMSIVector *vector = opaque; VFIOPCIDevice *vdev = vector->vdev; + MSIMessage (*get_msg)(PCIDevice *dev, unsigned vector); + void (*notify)(PCIDevice *dev, unsigned vector); + MSIMessage msg; int nr = vector - vdev->msi_vectors; if (!event_notifier_test_and_clear(&vector->interrupt)) { return; } -#ifdef DEBUG_VFIO - MSIMessage msg; - if (vdev->interrupt == VFIO_INT_MSIX) { - msg = msix_get_message(&vdev->pdev, nr); + get_msg = msix_get_message; + notify = msix_notify; } else if (vdev->interrupt == VFIO_INT_MSI) { - msg = msi_get_message(&vdev->pdev, nr); + get_msg = msi_get_message; + notify = msi_notify; } else { abort(); } + msg = get_msg(&vdev->pdev, nr); trace_vfio_msi_interrupt(vdev->vbasedev.name, nr, msg.address, msg.data); -#endif - - if (vdev->interrupt == VFIO_INT_MSIX) { - msix_notify(&vdev->pdev, nr); - } else if (vdev->interrupt == VFIO_INT_MSI) { - msi_notify(&vdev->pdev, nr); - } else { - error_report("vfio: MSI interrupt receieved, but not enabled?"); - } + notify(&vdev->pdev, nr); } static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) @@ -747,7 +742,7 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } -static void vfio_enable_msix(VFIOPCIDevice *vdev) +static void vfio_msix_enable(VFIOPCIDevice *vdev) { vfio_disable_interrupts(vdev); @@ -776,10 +771,10 @@ static void vfio_enable_msix(VFIOPCIDevice *vdev) error_report("vfio: msix_set_vector_notifiers failed"); } - trace_vfio_enable_msix(vdev->vbasedev.name); + trace_vfio_msix_enable(vdev->vbasedev.name); } -static void vfio_enable_msi(VFIOPCIDevice *vdev) +static void vfio_msi_enable(VFIOPCIDevice *vdev) { int ret, i; @@ -852,10 +847,10 @@ retry: return; } - trace_vfio_enable_msi(vdev->vbasedev.name, vdev->nr_vectors); + trace_vfio_msi_enable(vdev->vbasedev.name, vdev->nr_vectors); } -static void vfio_disable_msi_common(VFIOPCIDevice *vdev) +static void vfio_msi_disable_common(VFIOPCIDevice *vdev) { int i; @@ -879,7 +874,7 @@ static void vfio_disable_msi_common(VFIOPCIDevice *vdev) vfio_intx_enable(vdev); } -static void vfio_disable_msix(VFIOPCIDevice *vdev) +static void vfio_msix_disable(VFIOPCIDevice *vdev) { int i; @@ -900,17 +895,17 @@ static void vfio_disable_msix(VFIOPCIDevice *vdev) vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); } - vfio_disable_msi_common(vdev); + vfio_msi_disable_common(vdev); - trace_vfio_disable_msix(vdev->vbasedev.name); + trace_vfio_msix_disable(vdev->vbasedev.name); } -static void vfio_disable_msi(VFIOPCIDevice *vdev) +static void vfio_msi_disable(VFIOPCIDevice *vdev) { vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSI_IRQ_INDEX); - vfio_disable_msi_common(vdev); + vfio_msi_disable_common(vdev); - trace_vfio_disable_msi(vdev->vbasedev.name); + trace_vfio_msi_disable(vdev->vbasedev.name); } static void vfio_update_msi(VFIOPCIDevice *vdev) @@ -2109,11 +2104,11 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, if (!was_enabled) { if (is_enabled) { - vfio_enable_msi(vdev); + vfio_msi_enable(vdev); } } else { if (!is_enabled) { - vfio_disable_msi(vdev); + vfio_msi_disable(vdev); } else { vfio_update_msi(vdev); } @@ -2127,9 +2122,9 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, is_enabled = msix_enabled(pdev); if (!was_enabled && is_enabled) { - vfio_enable_msix(vdev); + vfio_msix_enable(vdev); } else if (was_enabled && !is_enabled) { - vfio_disable_msix(vdev); + vfio_msix_disable(vdev); } } else { /* Write everything to QEMU to keep emulated bits correct */ @@ -2148,9 +2143,9 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) * disable MSI/X and then cleanup by disabling INTx. */ if (vdev->interrupt == VFIO_INT_MSIX) { - vfio_disable_msix(vdev); + vfio_msix_disable(vdev); } else if (vdev->interrupt == VFIO_INT_MSI) { - vfio_disable_msi(vdev); + vfio_msi_disable(vdev); } if (vdev->interrupt == VFIO_INT_INTx) { @@ -2158,7 +2153,7 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) } } -static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos) +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos) { uint16_t ctrl; bool msi_64bit, msi_maskbit; @@ -2174,7 +2169,7 @@ static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos) msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); - trace_vfio_setup_msi(vdev->vbasedev.name, pos); + trace_vfio_msi_setup(vdev->vbasedev.name, pos); ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit); if (ret < 0) { @@ -2197,7 +2192,7 @@ static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos) * need to first look for where the MSI-X table lives. So we * unfortunately split MSI-X setup across two functions. */ -static int vfio_early_setup_msix(VFIOPCIDevice *vdev) +static int vfio_msix_early_setup(VFIOPCIDevice *vdev) { uint8_t pos; uint16_t ctrl; @@ -2262,14 +2257,14 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) } } - trace_vfio_early_setup_msix(vdev->vbasedev.name, pos, msix->table_bar, + trace_vfio_msix_early_setup(vdev->vbasedev.name, pos, msix->table_bar, msix->table_offset, msix->entries); vdev->msix = msix; return 0; } -static int vfio_setup_msix(VFIOPCIDevice *vdev, int pos) +static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos) { int ret; @@ -2692,14 +2687,14 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) switch (cap_id) { case PCI_CAP_ID_MSI: - ret = vfio_setup_msi(vdev, pos); + ret = vfio_msi_setup(vdev, pos); break; case PCI_CAP_ID_EXP: vfio_check_pcie_flr(vdev, pos); ret = vfio_setup_pcie_cap(vdev, pos, size); break; case PCI_CAP_ID_MSIX: - ret = vfio_setup_msix(vdev, pos); + ret = vfio_msix_setup(vdev, pos); break; case PCI_CAP_ID_PM: vfio_check_pm_reset(vdev, pos); @@ -3605,7 +3600,7 @@ static int vfio_initfn(PCIDevice *pdev) vfio_pci_size_rom(vdev); - ret = vfio_early_setup_msix(vdev); + ret = vfio_msix_early_setup(vdev); if (ret) { return ret; } diff --git a/trace-events b/trace-events index 1290c09ece..545cec30ed 100644 --- a/trace-events +++ b/trace-events @@ -1533,10 +1533,10 @@ vfio_intx_disable(const char *name) " (%s)" vfio_msi_interrupt(const char *name, int index, uint64_t addr, int data) " (%s) vector %d 0x%"PRIx64"/0x%x" vfio_msix_vector_do_use(const char *name, int index) " (%s) vector %d used" vfio_msix_vector_release(const char *name, int index) " (%s) vector %d released" -vfio_enable_msix(const char *name) " (%s)" -vfio_enable_msi(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" -vfio_disable_msix(const char *name) " (%s)" -vfio_disable_msi(const char *name) " (%s)" +vfio_msix_enable(const char *name) " (%s)" +vfio_msix_disable(const char *name) " (%s)" +vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" +vfio_msi_disable(const char *name) " (%s)" vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_rom_read(const char *name, uint64_t addr, int size, uint64_t data) " (%s, 0x%"PRIx64", 0x%x) = 0x%"PRIx64 vfio_pci_size_rom(const char *name, int size) "%s ROM size 0x%x" @@ -1568,8 +1568,8 @@ vfio_probe_nvidia_bar0_1800_quirk_id(int id) "Nvidia NV%02x" vfio_probe_nvidia_bar0_1800_quirk(const char *name) "Enabled NVIDIA BAR0 0x1800 quirk for device %s" vfio_pci_read_config(const char *name, int addr, int len, int val) " (%s, @0x%x, len=0x%x) %x" vfio_pci_write_config(const char *name, int addr, int val, int len) " (%s, @0x%x, 0x%x, len=0x%x)" -vfio_setup_msi(const char *name, int pos) "%s PCI MSI CAP @0x%x" -vfio_early_setup_msix(const char *name, int pos, int table_bar, int offset, int entries) "%s PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d" +vfio_msi_setup(const char *name, int pos) "%s PCI MSI CAP @0x%x" +vfio_msix_early_setup(const char *name, int pos, int table_bar, int offset, int entries) "%s PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d" vfio_check_pcie_flr(const char *name) "%s Supports FLR via PCIe cap" vfio_check_pm_reset(const char *name) "%s Supports PM reset" vfio_check_af_flr(const char *name) "%s Supports FLR via AF cap" From 46746dbaa8c2c421b9bda78193caad57d7fb1136 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:44 -0600 Subject: [PATCH 05/19] vfio/pci: Make interrupt bypass runtime configurable Tracing is more effective when we can completely disable all KVM bypass paths. Make these runtime rather than build-time configurable. Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 19 ++++++++++++------- include/hw/vfio/vfio-common.h | 5 ----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e4dee8e925..a92789dd26 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -167,6 +167,9 @@ typedef struct VFIOPCIDevice { bool has_flr; bool has_pm_reset; bool rom_read_failed; + bool no_kvm_intx; + bool no_kvm_msi; + bool no_kvm_msix; } VFIOPCIDevice; typedef struct VFIORomBlacklistEntry { @@ -274,7 +277,7 @@ static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev) int ret, argsz; int32_t *pfd; - if (!VFIO_ALLOW_KVM_INTX || !kvm_irqfds_enabled() || + if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || vdev->intx.route.mode != PCI_INTX_ENABLED || !kvm_resamplefds_enabled()) { return; @@ -571,13 +574,12 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) return ret; } -static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg, - bool msix) +static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + MSIMessage *msg, bool msix) { int virq; - if ((msix && !VFIO_ALLOW_KVM_MSIX) || - (!msix && !VFIO_ALLOW_KVM_MSI) || !msg) { + if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) { return; } @@ -650,7 +652,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vfio_update_kvm_msi_virq(vector, *msg); } } else { - vfio_add_kvm_msi_virq(vector, msg, true); + vfio_add_kvm_msi_virq(vdev, vector, msg, true); } /* @@ -803,7 +805,7 @@ retry: * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vfio_add_kvm_msi_virq(vector, &msg, false); + vfio_add_kvm_msi_virq(vdev, vector, &msg, false); } /* Set interrupt type prior to possible interrupts */ @@ -3729,6 +3731,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_REQ_BIT, true), DEFINE_PROP_BOOL("x-mmap", VFIOPCIDevice, vbasedev.allow_mmap, true), + DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false), + DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false), + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), /* * TODO - support passed fds... is this necessary? * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 59a321d479..100873e330 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -35,11 +35,6 @@ do { } while (0) #endif -/* Extra debugging, trap acceleration paths for more logging */ -#define VFIO_ALLOW_KVM_INTX 1 -#define VFIO_ALLOW_KVM_MSI 1 -#define VFIO_ALLOW_KVM_MSIX 1 - enum { VFIO_DEVICE_TYPE_PCI = 0, VFIO_DEVICE_TYPE_PLATFORM = 1, From 5e15d79b8681c7f4e2079833288785708e7520d3 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:44 -0600 Subject: [PATCH 06/19] vfio: Change polarity of our no-mmap option The default should be to allow mmap and new drivers shouldn't need to expose an option or set it to other than the allocation default in their initfn. Take advantage of the experimental flag to change this option to the correct polarity. Signed-off-by: Alex Williamson --- hw/vfio/common.c | 2 +- hw/vfio/pci.c | 2 +- hw/vfio/platform.c | 2 +- include/hw/vfio/vfio-common.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 6d21311575..0d341a33ff 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -496,7 +496,7 @@ int vfio_mmap_region(Object *obj, VFIORegion *region, int ret = 0; VFIODevice *vbasedev = region->vbasedev; - if (vbasedev->allow_mmap && size && region->flags & + if (!vbasedev->no_mmap && size && region->flags & VFIO_REGION_INFO_FLAG_MMAP) { int prot = 0; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index a92789dd26..31d7b93c85 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3730,7 +3730,7 @@ static Property vfio_pci_dev_properties[] = { VFIO_FEATURE_ENABLE_VGA_BIT, false), DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_REQ_BIT, true), - DEFINE_PROP_BOOL("x-mmap", VFIOPCIDevice, vbasedev.allow_mmap, true), + DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false), DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false), DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index de4ec52a07..a6726cd779 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -678,7 +678,7 @@ static const VMStateDescription vfio_platform_vmstate = { static Property vfio_platform_dev_properties[] = { DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name), - DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true), + DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false), DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, mmap_timeout, 1100), DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 100873e330..9b9901fbe2 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -97,7 +97,7 @@ typedef struct VFIODevice { int type; bool reset_works; bool needs_reset; - bool allow_mmap; + bool no_mmap; VFIODeviceOps *ops; unsigned int num_irqs; unsigned int num_regions; From 78f33d2bfd26ec552d9e824bcc1dbb8e2736ce34 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:44 -0600 Subject: [PATCH 07/19] vfio/pci: Extract PCI structures to a separate header Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 144 +-------------------------------------------- hw/vfio/pci.h | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 143 deletions(-) create mode 100644 hw/vfio/pci.h diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 31d7b93c85..7212abf072 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -26,156 +26,14 @@ #include #include "config.h" -#include "exec/address-spaces.h" -#include "exec/memory.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" -#include "hw/pci/pci.h" -#include "qemu-common.h" #include "qemu/error-report.h" -#include "qemu/event_notifier.h" -#include "qemu/queue.h" #include "qemu/range.h" #include "sysemu/kvm.h" #include "sysemu/sysemu.h" +#include "pci.h" #include "trace.h" -#include "hw/vfio/vfio.h" -#include "hw/vfio/vfio-common.h" - -struct VFIOPCIDevice; - -typedef struct VFIOQuirk { - MemoryRegion mem; - struct VFIOPCIDevice *vdev; - QLIST_ENTRY(VFIOQuirk) next; - struct { - uint32_t base_offset:TARGET_PAGE_BITS; - uint32_t address_offset:TARGET_PAGE_BITS; - uint32_t address_size:3; - uint32_t bar:3; - - uint32_t address_match; - uint32_t address_mask; - - uint32_t address_val:TARGET_PAGE_BITS; - uint32_t data_offset:TARGET_PAGE_BITS; - uint32_t data_size:3; - - uint8_t flags; - uint8_t read_flags; - uint8_t write_flags; - } data; -} VFIOQuirk; - -typedef struct VFIOBAR { - VFIORegion region; - bool ioport; - bool mem64; - QLIST_HEAD(, VFIOQuirk) quirks; -} VFIOBAR; - -typedef struct VFIOVGARegion { - MemoryRegion mem; - off_t offset; - int nr; - QLIST_HEAD(, VFIOQuirk) quirks; -} VFIOVGARegion; - -typedef struct VFIOVGA { - off_t fd_offset; - int fd; - VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS]; -} VFIOVGA; - -typedef struct VFIOINTx { - bool pending; /* interrupt pending */ - bool kvm_accel; /* set when QEMU bypass through KVM enabled */ - uint8_t pin; /* which pin to pull for qemu_set_irq */ - EventNotifier interrupt; /* eventfd triggered on interrupt */ - EventNotifier unmask; /* eventfd for unmask on QEMU bypass */ - PCIINTxRoute route; /* routing info for QEMU bypass */ - uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */ - QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */ -} VFIOINTx; - -typedef struct VFIOMSIVector { - /* - * Two interrupt paths are configured per vector. The first, is only used - * for interrupts injected via QEMU. This is typically the non-accel path, - * but may also be used when we want QEMU to handle masking and pending - * bits. The KVM path bypasses QEMU and is therefore higher performance, - * but requires masking at the device. virq is used to track the MSI route - * through KVM, thus kvm_interrupt is only available when virq is set to a - * valid (>= 0) value. - */ - EventNotifier interrupt; - EventNotifier kvm_interrupt; - struct VFIOPCIDevice *vdev; /* back pointer to device */ - int virq; - bool use; -} VFIOMSIVector; - -enum { - VFIO_INT_NONE = 0, - VFIO_INT_INTx = 1, - VFIO_INT_MSI = 2, - VFIO_INT_MSIX = 3, -}; - -/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */ -typedef struct VFIOMSIXInfo { - uint8_t table_bar; - uint8_t pba_bar; - uint16_t entries; - uint32_t table_offset; - uint32_t pba_offset; - MemoryRegion mmap_mem; - void *mmap; -} VFIOMSIXInfo; - -typedef struct VFIOPCIDevice { - PCIDevice pdev; - VFIODevice vbasedev; - VFIOINTx intx; - unsigned int config_size; - uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */ - off_t config_offset; /* Offset of config space region within device fd */ - unsigned int rom_size; - off_t rom_offset; /* Offset of ROM region within device fd */ - void *rom; - int msi_cap_size; - VFIOMSIVector *msi_vectors; - VFIOMSIXInfo *msix; - int nr_vectors; /* Number of MSI/MSIX vectors currently in use */ - int interrupt; /* Current interrupt type */ - VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ - VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ - PCIHostDeviceAddress host; - EventNotifier err_notifier; - EventNotifier req_notifier; - int (*resetfn)(struct VFIOPCIDevice *); - uint32_t features; -#define VFIO_FEATURE_ENABLE_VGA_BIT 0 -#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) -#define VFIO_FEATURE_ENABLE_REQ_BIT 1 -#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT) - int32_t bootindex; - uint8_t pm_cap; - bool has_vga; - bool pci_aer; - bool req_enabled; - bool has_flr; - bool has_pm_reset; - bool rom_read_failed; - bool no_kvm_intx; - bool no_kvm_msi; - bool no_kvm_msix; -} VFIOPCIDevice; - -typedef struct VFIORomBlacklistEntry { - uint16_t vendor_id; - uint16_t device_id; -} VFIORomBlacklistEntry; /* * List of device ids/vendor ids for which to disable diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h new file mode 100644 index 0000000000..a7a3a6a0f7 --- /dev/null +++ b/hw/vfio/pci.h @@ -0,0 +1,158 @@ +/* + * vfio based device assignment support - PCI devices + * + * Copyright Red Hat, Inc. 2012-2015 + * + * Authors: + * Alex Williamson + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#ifndef HW_VFIO_VFIO_PCI_H +#define HW_VFIO_VFIO_PCI_H + +#include "qemu-common.h" +#include "exec/memory.h" +#include "hw/pci/pci.h" +#include "hw/vfio/vfio-common.h" +#include "qemu/event_notifier.h" +#include "qemu/queue.h" +#include "qemu/timer.h" + +struct VFIOPCIDevice; + +typedef struct VFIOQuirk { + MemoryRegion mem; + struct VFIOPCIDevice *vdev; + QLIST_ENTRY(VFIOQuirk) next; + struct { + uint32_t base_offset:TARGET_PAGE_BITS; + uint32_t address_offset:TARGET_PAGE_BITS; + uint32_t address_size:3; + uint32_t bar:3; + + uint32_t address_match; + uint32_t address_mask; + + uint32_t address_val:TARGET_PAGE_BITS; + uint32_t data_offset:TARGET_PAGE_BITS; + uint32_t data_size:3; + + uint8_t flags; + uint8_t read_flags; + uint8_t write_flags; + } data; +} VFIOQuirk; + +typedef struct VFIOBAR { + VFIORegion region; + bool ioport; + bool mem64; + QLIST_HEAD(, VFIOQuirk) quirks; +} VFIOBAR; + +typedef struct VFIOVGARegion { + MemoryRegion mem; + off_t offset; + int nr; + QLIST_HEAD(, VFIOQuirk) quirks; +} VFIOVGARegion; + +typedef struct VFIOVGA { + off_t fd_offset; + int fd; + VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS]; +} VFIOVGA; + +typedef struct VFIOINTx { + bool pending; /* interrupt pending */ + bool kvm_accel; /* set when QEMU bypass through KVM enabled */ + uint8_t pin; /* which pin to pull for qemu_set_irq */ + EventNotifier interrupt; /* eventfd triggered on interrupt */ + EventNotifier unmask; /* eventfd for unmask on QEMU bypass */ + PCIINTxRoute route; /* routing info for QEMU bypass */ + uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */ + QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */ +} VFIOINTx; + +typedef struct VFIOMSIVector { + /* + * Two interrupt paths are configured per vector. The first, is only used + * for interrupts injected via QEMU. This is typically the non-accel path, + * but may also be used when we want QEMU to handle masking and pending + * bits. The KVM path bypasses QEMU and is therefore higher performance, + * but requires masking at the device. virq is used to track the MSI route + * through KVM, thus kvm_interrupt is only available when virq is set to a + * valid (>= 0) value. + */ + EventNotifier interrupt; + EventNotifier kvm_interrupt; + struct VFIOPCIDevice *vdev; /* back pointer to device */ + int virq; + bool use; +} VFIOMSIVector; + +enum { + VFIO_INT_NONE = 0, + VFIO_INT_INTx = 1, + VFIO_INT_MSI = 2, + VFIO_INT_MSIX = 3, +}; + +/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */ +typedef struct VFIOMSIXInfo { + uint8_t table_bar; + uint8_t pba_bar; + uint16_t entries; + uint32_t table_offset; + uint32_t pba_offset; + MemoryRegion mmap_mem; + void *mmap; +} VFIOMSIXInfo; + +typedef struct VFIOPCIDevice { + PCIDevice pdev; + VFIODevice vbasedev; + VFIOINTx intx; + unsigned int config_size; + uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */ + off_t config_offset; /* Offset of config space region within device fd */ + unsigned int rom_size; + off_t rom_offset; /* Offset of ROM region within device fd */ + void *rom; + int msi_cap_size; + VFIOMSIVector *msi_vectors; + VFIOMSIXInfo *msix; + int nr_vectors; /* Number of MSI/MSIX vectors currently in use */ + int interrupt; /* Current interrupt type */ + VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ + VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ + PCIHostDeviceAddress host; + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); + uint32_t features; +#define VFIO_FEATURE_ENABLE_VGA_BIT 0 +#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) +#define VFIO_FEATURE_ENABLE_REQ_BIT 1 +#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT) + int32_t bootindex; + uint8_t pm_cap; + bool has_vga; + bool pci_aer; + bool req_enabled; + bool has_flr; + bool has_pm_reset; + bool rom_read_failed; + bool no_kvm_intx; + bool no_kvm_msi; + bool no_kvm_msix; +} VFIOPCIDevice; + +typedef struct VFIORomBlacklistEntry { + uint16_t vendor_id; + uint16_t device_id; +} VFIORomBlacklistEntry; + +#endif /* HW_VFIO_VFIO_PCI_H */ From c00d61d8fa22b096b15e19ee2fde846ffc1c0b5d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:45 -0600 Subject: [PATCH 08/19] vfio/pci: Split quirks to a separate file Signed-off-by: Alex Williamson --- hw/vfio/Makefile.objs | 2 +- hw/vfio/pci-quirks.c | 887 ++++++++++++++++++++++++++++++++++++++++++ hw/vfio/pci.c | 886 +---------------------------------------- hw/vfio/pci.h | 15 + 4 files changed, 908 insertions(+), 882 deletions(-) create mode 100644 hw/vfio/pci-quirks.c diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs index d540c9d140..d3248633c1 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs @@ -1,6 +1,6 @@ ifeq ($(CONFIG_LINUX), y) obj-$(CONFIG_SOFTMMU) += common.o -obj-$(CONFIG_PCI) += pci.o +obj-$(CONFIG_PCI) += pci.o pci-quirks.o obj-$(CONFIG_SOFTMMU) += platform.o obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o endif diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c new file mode 100644 index 0000000000..1f9a8096a1 --- /dev/null +++ b/hw/vfio/pci-quirks.c @@ -0,0 +1,887 @@ +/* + * device quirks for PCI devices + * + * Copyright Red Hat, Inc. 2012-2015 + * + * Authors: + * Alex Williamson + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include "pci.h" +#include "trace.h" +#include "qemu/range.h" + +/* + * List of device ids/vendor ids for which to disable + * option rom loading. This avoids the guest hangs during rom + * execution as noticed with the BCM 57810 card for lack of a + * more better way to handle such issues. + * The user can still override by specifying a romfile or + * rombar=1. + * Please see https://bugs.launchpad.net/qemu/+bug/1284874 + * for an analysis of the 57810 card hang. When adding + * a new vendor id/device id combination below, please also add + * your card/environment details and information that could + * help in debugging to the bug tracking this issue + */ +static const VFIORomBlacklistEntry romblacklist[] = { + /* Broadcom BCM 57810 */ + { 0x14e4, 0x168e } +}; + +bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + uint16_t vendor_id, device_id; + int count = 0; + + vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); + device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); + + while (count < ARRAY_SIZE(romblacklist)) { + if (romblacklist[count].vendor_id == vendor_id && + romblacklist[count].device_id == device_id) { + return true; + } + count++; + } + + return false; +} + +/* + * Device specific quirks + */ + +/* Is range1 fully contained within range2? */ +static bool vfio_range_contained(uint64_t first1, uint64_t len1, + uint64_t first2, uint64_t len2) { + return (first1 >= first2 && first1 + len1 <= first2 + len2); +} + +static bool vfio_flags_enabled(uint8_t flags, uint8_t mask) +{ + return (mask && (flags & mask) == mask); +} + +static uint64_t vfio_generic_window_quirk_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + uint64_t data; + + if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && + ranges_overlap(addr, size, + quirk->data.data_offset, quirk->data.data_size)) { + hwaddr offset = addr - quirk->data.data_offset; + + if (!vfio_range_contained(addr, size, quirk->data.data_offset, + quirk->data.data_size)) { + hw_error("%s: window data read not fully contained: %s", + __func__, memory_region_name(&quirk->mem)); + } + + data = vfio_pci_read_config(&vdev->pdev, + quirk->data.address_val + offset, size); + + trace_vfio_generic_window_quirk_read(memory_region_name(&quirk->mem), + vdev->vbasedev.name, + quirk->data.bar, + addr, size, data); + } else { + data = vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + quirk->data.base_offset, size); + } + + return data; +} + +static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + + if (ranges_overlap(addr, size, + quirk->data.address_offset, quirk->data.address_size)) { + + if (addr != quirk->data.address_offset) { + hw_error("%s: offset write into address window: %s", + __func__, memory_region_name(&quirk->mem)); + } + + if ((data & ~quirk->data.address_mask) == quirk->data.address_match) { + quirk->data.flags |= quirk->data.write_flags | + quirk->data.read_flags; + quirk->data.address_val = data & quirk->data.address_mask; + } else { + quirk->data.flags &= ~(quirk->data.write_flags | + quirk->data.read_flags); + } + } + + if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) && + ranges_overlap(addr, size, + quirk->data.data_offset, quirk->data.data_size)) { + hwaddr offset = addr - quirk->data.data_offset; + + if (!vfio_range_contained(addr, size, quirk->data.data_offset, + quirk->data.data_size)) { + hw_error("%s: window data write not fully contained: %s", + __func__, memory_region_name(&quirk->mem)); + } + + vfio_pci_write_config(&vdev->pdev, + quirk->data.address_val + offset, data, size); + trace_vfio_generic_window_quirk_write(memory_region_name(&quirk->mem), + vdev->vbasedev.name, + quirk->data.bar, + addr, data, size); + return; + } + + vfio_region_write(&vdev->bars[quirk->data.bar].region, + addr + quirk->data.base_offset, data, size); +} + +static const MemoryRegionOps vfio_generic_window_quirk = { + .read = vfio_generic_window_quirk_read, + .write = vfio_generic_window_quirk_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t vfio_generic_quirk_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; + hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; + uint64_t data; + + if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && + ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) { + if (!vfio_range_contained(addr, size, offset, + quirk->data.address_mask + 1)) { + hw_error("%s: read not fully contained: %s", + __func__, memory_region_name(&quirk->mem)); + } + + data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); + + trace_vfio_generic_quirk_read(memory_region_name(&quirk->mem), + vdev->vbasedev.name, quirk->data.bar, + addr + base, size, data); + } else { + data = vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + base, size); + } + + return data; +} + +static void vfio_generic_quirk_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; + hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; + + if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) && + ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) { + if (!vfio_range_contained(addr, size, offset, + quirk->data.address_mask + 1)) { + hw_error("%s: write not fully contained: %s", + __func__, memory_region_name(&quirk->mem)); + } + + vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); + + trace_vfio_generic_quirk_write(memory_region_name(&quirk->mem), + vdev->vbasedev.name, quirk->data.bar, + addr + base, data, size); + } else { + vfio_region_write(&vdev->bars[quirk->data.bar].region, + addr + base, data, size); + } +} + +static const MemoryRegionOps vfio_generic_quirk = { + .read = vfio_generic_quirk_read, + .write = vfio_generic_quirk_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +#define PCI_VENDOR_ID_ATI 0x1002 + +/* + * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR + * through VGA register 0x3c3. On newer cards, the I/O port BAR is always + * BAR4 (older cards like the X550 used BAR1, but we don't care to support + * those). Note that on bare metal, a read of 0x3c3 doesn't always return the + * I/O port BAR address. Originally this was coded to return the virtual BAR + * address only if the physical register read returns the actual BAR address, + * but users have reported greater success if we return the virtual address + * unconditionally. + */ +static uint64_t vfio_ati_3c3_quirk_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + uint64_t data = vfio_pci_read_config(&vdev->pdev, + PCI_BASE_ADDRESS_0 + (4 * 4) + 1, + size); + trace_vfio_ati_3c3_quirk_read(data); + + return data; +} + +static const MemoryRegionOps vfio_ati_3c3_quirk = { + .read = vfio_ati_3c3_quirk_read, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOQuirk *quirk; + + if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { + return; + } + + /* + * As long as the BAR is >= 256 bytes it will be aligned such that the + * lower byte is always zero. Filter out anything else, if it exists. + */ + if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) { + return; + } + + quirk = g_malloc0(sizeof(*quirk)); + quirk->vdev = vdev; + + memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, quirk, + "vfio-ati-3c3-quirk", 1); + memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + 3 /* offset 3 bytes from 0x3c0 */, &quirk->mem); + + QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, + quirk, next); + + trace_vfio_vga_probe_ati_3c3_quirk(vdev->vbasedev.name); +} + +/* + * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI + * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access + * the MMIO space directly, but a window to this space is provided through + * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the + * data register. When the address is programmed to a range of 0x4000-0x4fff + * PCI configuration space is available. Experimentation seems to indicate + * that only read-only access is provided, but we drop writes when the window + * is enabled to config space nonetheless. + */ +static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOQuirk *quirk; + + if (!vdev->has_vga || nr != 4 || + pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { + return; + } + + quirk = g_malloc0(sizeof(*quirk)); + quirk->vdev = vdev; + quirk->data.address_size = 4; + quirk->data.data_offset = 4; + quirk->data.data_size = 4; + quirk->data.address_match = 0x4000; + quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; + quirk->data.bar = nr; + quirk->data.read_flags = quirk->data.write_flags = 1; + + memory_region_init_io(&quirk->mem, OBJECT(vdev), + &vfio_generic_window_quirk, quirk, + "vfio-ati-bar4-window-quirk", 8); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + quirk->data.base_offset, &quirk->mem, 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + + trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name); +} + +/* + * Trap the BAR2 MMIO window to config space as well. + */ +static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOQuirk *quirk; + + /* Only enable on newer devices where BAR2 is 64bit */ + if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 || + pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { + return; + } + + quirk = g_malloc0(sizeof(*quirk)); + quirk->vdev = vdev; + quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; + quirk->data.address_match = 0x4000; + quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; + quirk->data.bar = nr; + + memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk, + "vfio-ati-bar2-4000-quirk", + TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + quirk->data.address_match & TARGET_PAGE_MASK, + &quirk->mem, 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + + trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name); +} + +/* + * Older ATI/AMD cards like the X550 have a similar window to that above. + * I/O port BAR1 provides a window to a mirror of PCI config space located + * in BAR2 at offset 0xf00. We don't care to support such older cards, but + * note it for future reference. + */ + +#define PCI_VENDOR_ID_NVIDIA 0x10de + +/* + * Nvidia has several different methods to get to config space, the + * nouveu project has several of these documented here: + * https://github.com/pathscale/envytools/tree/master/hwdocs + * + * The first quirk is actually not documented in envytools and is found + * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an + * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access + * the mirror of PCI config space found at BAR0 offset 0x1800. The access + * sequence first writes 0x338 to I/O port 0x3d4. The target offset is + * then written to 0x3d0. Finally 0x538 is written for a read and 0x738 + * is written for a write to 0x3d4. The BAR0 offset is then accessible + * through 0x3d0. This quirk doesn't seem to be necessary on newer cards + * that use the I/O port BAR5 window but it doesn't hurt to leave it. + */ +enum { + NV_3D0_NONE = 0, + NV_3D0_SELECT, + NV_3D0_WINDOW, + NV_3D0_READ, + NV_3D0_WRITE, +}; + +static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + PCIDevice *pdev = &vdev->pdev; + uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + addr + quirk->data.base_offset, size); + + if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) { + data = vfio_pci_read_config(pdev, quirk->data.address_val, size); + trace_vfio_nvidia_3d0_quirk_read(size, data); + } + + quirk->data.flags = NV_3D0_NONE; + + return data; +} + +static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + PCIDevice *pdev = &vdev->pdev; + + switch (quirk->data.flags) { + case NV_3D0_NONE: + if (addr == quirk->data.address_offset && data == 0x338) { + quirk->data.flags = NV_3D0_SELECT; + } + break; + case NV_3D0_SELECT: + quirk->data.flags = NV_3D0_NONE; + if (addr == quirk->data.data_offset && + (data & ~quirk->data.address_mask) == quirk->data.address_match) { + quirk->data.flags = NV_3D0_WINDOW; + quirk->data.address_val = data & quirk->data.address_mask; + } + break; + case NV_3D0_WINDOW: + quirk->data.flags = NV_3D0_NONE; + if (addr == quirk->data.address_offset) { + if (data == 0x538) { + quirk->data.flags = NV_3D0_READ; + } else if (data == 0x738) { + quirk->data.flags = NV_3D0_WRITE; + } + } + break; + case NV_3D0_WRITE: + quirk->data.flags = NV_3D0_NONE; + if (addr == quirk->data.data_offset) { + vfio_pci_write_config(pdev, quirk->data.address_val, data, size); + trace_vfio_nvidia_3d0_quirk_write(data, size); + return; + } + break; + } + + vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + addr + quirk->data.base_offset, data, size); +} + +static const MemoryRegionOps vfio_nvidia_3d0_quirk = { + .read = vfio_nvidia_3d0_quirk_read, + .write = vfio_nvidia_3d0_quirk_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOQuirk *quirk; + + if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA || + !vdev->bars[1].region.size) { + return; + } + + quirk = g_malloc0(sizeof(*quirk)); + quirk->vdev = vdev; + quirk->data.base_offset = 0x10; + quirk->data.address_offset = 4; + quirk->data.address_size = 2; + quirk->data.address_match = 0x1800; + quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; + quirk->data.data_offset = 0; + quirk->data.data_size = 4; + + memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk, + quirk, "vfio-nvidia-3d0-quirk", 6); + memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + quirk->data.base_offset, &quirk->mem); + + QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, + quirk, next); + + trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name); +} + +/* + * The second quirk is documented in envytools. The I/O port BAR5 is just + * a set of address/data ports to the MMIO BARs. The BAR we care about is + * again BAR0. This backdoor is apparently a bit newer than the one above + * so we need to not only trap 256 bytes @0x1800, but all of PCI config + * space, including extended space is available at the 4k @0x88000. + */ +enum { + NV_BAR5_ADDRESS = 0x1, + NV_BAR5_ENABLE = 0x2, + NV_BAR5_MASTER = 0x4, + NV_BAR5_VALID = 0x7, +}; + +static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOQuirk *quirk = opaque; + + switch (addr) { + case 0x0: + if (data & 0x1) { + quirk->data.flags |= NV_BAR5_MASTER; + } else { + quirk->data.flags &= ~NV_BAR5_MASTER; + } + break; + case 0x4: + if (data & 0x1) { + quirk->data.flags |= NV_BAR5_ENABLE; + } else { + quirk->data.flags &= ~NV_BAR5_ENABLE; + } + break; + case 0x8: + if (quirk->data.flags & NV_BAR5_MASTER) { + if ((data & ~0xfff) == 0x88000) { + quirk->data.flags |= NV_BAR5_ADDRESS; + quirk->data.address_val = data & 0xfff; + } else if ((data & ~0xff) == 0x1800) { + quirk->data.flags |= NV_BAR5_ADDRESS; + quirk->data.address_val = data & 0xff; + } else { + quirk->data.flags &= ~NV_BAR5_ADDRESS; + } + } + break; + } + + vfio_generic_window_quirk_write(opaque, addr, data, size); +} + +static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = { + .read = vfio_generic_window_quirk_read, + .write = vfio_nvidia_bar5_window_quirk_write, + .valid.min_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOQuirk *quirk; + + if (!vdev->has_vga || nr != 5 || + pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { + return; + } + + quirk = g_malloc0(sizeof(*quirk)); + quirk->vdev = vdev; + quirk->data.read_flags = quirk->data.write_flags = NV_BAR5_VALID; + quirk->data.address_offset = 0x8; + quirk->data.address_size = 0; /* actually 4, but avoids generic code */ + quirk->data.data_offset = 0xc; + quirk->data.data_size = 4; + quirk->data.bar = nr; + + memory_region_init_io(&quirk->mem, OBJECT(vdev), + &vfio_nvidia_bar5_window_quirk, quirk, + "vfio-nvidia-bar5-window-quirk", 16); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + 0, &quirk->mem, 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + + trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name); +} + +static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + PCIDevice *pdev = &vdev->pdev; + hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; + + vfio_generic_quirk_write(opaque, addr, data, size); + + /* + * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the + * MSI capability ID register. Both the ID and next register are + * read-only, so we allow writes covering either of those to real hw. + * NB - only fixed for the 0x88000 MMIO window. + */ + if ((pdev->cap_present & QEMU_PCI_CAP_MSI) && + vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) { + vfio_region_write(&vdev->bars[quirk->data.bar].region, + addr + base, data, size); + } +} + +static const MemoryRegionOps vfio_nvidia_88000_quirk = { + .read = vfio_generic_quirk_read, + .write = vfio_nvidia_88000_quirk_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +/* + * Finally, BAR0 itself. We want to redirect any accesses to either + * 0x1800 or 0x88000 through the PCI config space access functions. + * + * NB - quirk at a page granularity or else they don't seem to work when + * BARs are mmap'd + * + * Here's offset 0x88000... + */ +static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOQuirk *quirk; + uint16_t vendor, class; + + vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); + class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); + + if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA || + class != PCI_CLASS_DISPLAY_VGA) { + return; + } + + quirk = g_malloc0(sizeof(*quirk)); + quirk->vdev = vdev; + quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; + quirk->data.address_match = 0x88000; + quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; + quirk->data.bar = nr; + + memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk, + quirk, "vfio-nvidia-bar0-88000-quirk", + TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + quirk->data.address_match & TARGET_PAGE_MASK, + &quirk->mem, 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + + trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name); +} + +/* + * And here's the same for BAR0 offset 0x1800... + */ +static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOQuirk *quirk; + + if (!vdev->has_vga || nr != 0 || + pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { + return; + } + + /* Log the chipset ID */ + trace_vfio_probe_nvidia_bar0_1800_quirk_id( + (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20) + & 0xff); + + quirk = g_malloc0(sizeof(*quirk)); + quirk->vdev = vdev; + quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; + quirk->data.address_match = 0x1800; + quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; + quirk->data.bar = nr; + + memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk, + "vfio-nvidia-bar0-1800-quirk", + TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + quirk->data.address_match & TARGET_PAGE_MASK, + &quirk->mem, 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + + trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name); +} + +/* + * TODO - Some Nvidia devices provide config access to their companion HDA + * device and even to their parent bridge via these config space mirrors. + * Add quirks for those regions. + */ + +#define PCI_VENDOR_ID_REALTEK 0x10ec + +/* + * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2 + * offset 0x70 there is a dword data register, offset 0x74 is a dword address + * register. According to the Linux r8169 driver, the MSI-X table is addressed + * when the "type" portion of the address register is set to 0x1. This appears + * to be bits 16:30. Bit 31 is both a write indicator and some sort of + * "address latched" indicator. Bits 12:15 are a mask field, which we can + * ignore because the MSI-X table should always be accessed as a dword (full + * mask). Bits 0:11 is offset within the type. + * + * Example trace: + * + * Read from MSI-X table offset 0 + * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr + * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch + * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data + * + * Write 0xfee00000 to MSI-X table offset 0 + * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data + * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write + * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete + */ +static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + uint64_t val = 0; + + if (!quirk->data.flags) { /* Non-MSI-X table access */ + return vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + 0x70, size); + } + + switch (addr) { + case 4: /* address */ + val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */ + break; + case 0: /* data */ + if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { + memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, + (hwaddr)(quirk->data.address_match & 0xfff), + &val, size, MEMTXATTRS_UNSPECIFIED); + } + break; + } + + trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name, + addr ? "address" : "data", val); + return val; +} + +static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + + switch (addr) { + case 4: /* address */ + if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */ + quirk->data.flags = 1; /* Activate reads */ + quirk->data.address_match = data; + + trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data); + + if (data & 0x80000000U) { /* Do write */ + if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { + hwaddr offset = data & 0xfff; + uint64_t val = quirk->data.address_mask; + + trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name, + (uint16_t)offset, val); + + /* Write to the proper guest MSI-X table instead */ + memory_region_dispatch_write(&vdev->pdev.msix_table_mmio, + offset, val, size, + MEMTXATTRS_UNSPECIFIED); + } + return; /* Do not write guest MSI-X data to hardware */ + } + } else { + quirk->data.flags = 0; /* De-activate reads, non-MSI-X */ + } + break; + case 0: /* data */ + quirk->data.address_mask = data; + break; + } + + vfio_region_write(&vdev->bars[quirk->data.bar].region, + addr + 0x70, data, size); +} + +static const MemoryRegionOps vfio_rtl8168_window_quirk = { + .read = vfio_rtl8168_window_quirk_read, + .write = vfio_rtl8168_window_quirk_write, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + .unaligned = false, + }, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOQuirk *quirk; + + if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK || + pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) { + return; + } + + quirk = g_malloc0(sizeof(*quirk)); + quirk->vdev = vdev; + quirk->data.bar = nr; + + memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk, + quirk, "vfio-rtl8168-window-quirk", 8); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + 0x70, &quirk->mem, 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + + trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name); +} + +/* + * Common quirk probe entry points. + */ +void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) +{ + vfio_vga_probe_ati_3c3_quirk(vdev); + vfio_vga_probe_nvidia_3d0_quirk(vdev); +} + +void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) +{ + VFIOQuirk *quirk; + int i; + + for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { + QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) { + memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem); + } + } +} + +void vfio_vga_quirk_free(VFIOPCIDevice *vdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { + while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { + VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); + object_unparent(OBJECT(&quirk->mem)); + QLIST_REMOVE(quirk, next); + g_free(quirk); + } + } +} + +void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) +{ + vfio_probe_ati_bar4_window_quirk(vdev, nr); + vfio_probe_ati_bar2_4000_quirk(vdev, nr); + vfio_probe_nvidia_bar5_window_quirk(vdev, nr); + vfio_probe_nvidia_bar0_88000_quirk(vdev, nr); + vfio_probe_nvidia_bar0_1800_quirk(vdev, nr); + vfio_probe_rtl8168_bar2_window_quirk(vdev, nr); +} + +void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) +{ + VFIOBAR *bar = &vdev->bars[nr]; + VFIOQuirk *quirk; + + QLIST_FOREACH(quirk, &bar->quirks, next) { + memory_region_del_subregion(&bar->region.mem, &quirk->mem); + } +} + +void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) +{ + VFIOBAR *bar = &vdev->bars[nr]; + + while (!QLIST_EMPTY(&bar->quirks)) { + VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); + object_unparent(OBJECT(&quirk->mem)); + QLIST_REMOVE(quirk, next); + g_free(quirk); + } +} diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7212abf072..6a0ec453b3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -35,30 +35,9 @@ #include "pci.h" #include "trace.h" -/* - * List of device ids/vendor ids for which to disable - * option rom loading. This avoids the guest hangs during rom - * execution as noticed with the BCM 57810 card for lack of a - * more better way to handle such issues. - * The user can still override by specifying a romfile or - * rombar=1. - * Please see https://bugs.launchpad.net/qemu/+bug/1284874 - * for an analysis of the 57810 card hang. When adding - * a new vendor id/device id combination below, please also add - * your card/environment details and information that could - * help in debugging to the bug tracking this issue - */ -static const VFIORomBlacklistEntry romblacklist[] = { - /* Broadcom BCM 57810 */ - { 0x14e4, 0x168e } -}; - #define MSIX_CAP_LENGTH 12 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); -static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); -static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, - uint32_t val, int len); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); /* @@ -888,26 +867,6 @@ static const MemoryRegionOps vfio_rom_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev) -{ - PCIDevice *pdev = &vdev->pdev; - uint16_t vendor_id, device_id; - int count = 0; - - vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); - device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); - - while (count < ARRAY_SIZE(romblacklist)) { - if (romblacklist[count].vendor_id == vendor_id && - romblacklist[count].device_id == device_id) { - return true; - } - count++; - } - - return false; -} - static void vfio_pci_size_rom(VFIOPCIDevice *vdev) { uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); @@ -985,7 +944,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) vdev->rom_read_failed = false; } -static void vfio_vga_write(void *opaque, hwaddr addr, +void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { VFIOVGARegion *region = opaque; @@ -1021,7 +980,7 @@ static void vfio_vga_write(void *opaque, hwaddr addr, trace_vfio_vga_write(region->offset + addr, data, size); } -static uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size) +uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size) { VFIOVGARegion *region = opaque; VFIOVGA *vga = container_of(region, VFIOVGA, region[region->nr]); @@ -1066,845 +1025,10 @@ static const MemoryRegionOps vfio_vga_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -/* - * Device specific quirks - */ - -/* Is range1 fully contained within range2? */ -static bool vfio_range_contained(uint64_t first1, uint64_t len1, - uint64_t first2, uint64_t len2) { - return (first1 >= first2 && first1 + len1 <= first2 + len2); -} - -static bool vfio_flags_enabled(uint8_t flags, uint8_t mask) -{ - return (mask && (flags & mask) == mask); -} - -static uint64_t vfio_generic_window_quirk_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - uint64_t data; - - if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && - ranges_overlap(addr, size, - quirk->data.data_offset, quirk->data.data_size)) { - hwaddr offset = addr - quirk->data.data_offset; - - if (!vfio_range_contained(addr, size, quirk->data.data_offset, - quirk->data.data_size)) { - hw_error("%s: window data read not fully contained: %s", - __func__, memory_region_name(&quirk->mem)); - } - - data = vfio_pci_read_config(&vdev->pdev, - quirk->data.address_val + offset, size); - - trace_vfio_generic_window_quirk_read(memory_region_name(&quirk->mem), - vdev->vbasedev.name, - quirk->data.bar, - addr, size, data); - } else { - data = vfio_region_read(&vdev->bars[quirk->data.bar].region, - addr + quirk->data.base_offset, size); - } - - return data; -} - -static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - - if (ranges_overlap(addr, size, - quirk->data.address_offset, quirk->data.address_size)) { - - if (addr != quirk->data.address_offset) { - hw_error("%s: offset write into address window: %s", - __func__, memory_region_name(&quirk->mem)); - } - - if ((data & ~quirk->data.address_mask) == quirk->data.address_match) { - quirk->data.flags |= quirk->data.write_flags | - quirk->data.read_flags; - quirk->data.address_val = data & quirk->data.address_mask; - } else { - quirk->data.flags &= ~(quirk->data.write_flags | - quirk->data.read_flags); - } - } - - if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) && - ranges_overlap(addr, size, - quirk->data.data_offset, quirk->data.data_size)) { - hwaddr offset = addr - quirk->data.data_offset; - - if (!vfio_range_contained(addr, size, quirk->data.data_offset, - quirk->data.data_size)) { - hw_error("%s: window data write not fully contained: %s", - __func__, memory_region_name(&quirk->mem)); - } - - vfio_pci_write_config(&vdev->pdev, - quirk->data.address_val + offset, data, size); - trace_vfio_generic_window_quirk_write(memory_region_name(&quirk->mem), - vdev->vbasedev.name, - quirk->data.bar, - addr, data, size); - return; - } - - vfio_region_write(&vdev->bars[quirk->data.bar].region, - addr + quirk->data.base_offset, data, size); -} - -static const MemoryRegionOps vfio_generic_window_quirk = { - .read = vfio_generic_window_quirk_read, - .write = vfio_generic_window_quirk_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static uint64_t vfio_generic_quirk_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; - hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; - uint64_t data; - - if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && - ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) { - if (!vfio_range_contained(addr, size, offset, - quirk->data.address_mask + 1)) { - hw_error("%s: read not fully contained: %s", - __func__, memory_region_name(&quirk->mem)); - } - - data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); - - trace_vfio_generic_quirk_read(memory_region_name(&quirk->mem), - vdev->vbasedev.name, quirk->data.bar, - addr + base, size, data); - } else { - data = vfio_region_read(&vdev->bars[quirk->data.bar].region, - addr + base, size); - } - - return data; -} - -static void vfio_generic_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; - hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; - - if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) && - ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) { - if (!vfio_range_contained(addr, size, offset, - quirk->data.address_mask + 1)) { - hw_error("%s: write not fully contained: %s", - __func__, memory_region_name(&quirk->mem)); - } - - vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); - - trace_vfio_generic_quirk_write(memory_region_name(&quirk->mem), - vdev->vbasedev.name, quirk->data.bar, - addr + base, data, size); - } else { - vfio_region_write(&vdev->bars[quirk->data.bar].region, - addr + base, data, size); - } -} - -static const MemoryRegionOps vfio_generic_quirk = { - .read = vfio_generic_quirk_read, - .write = vfio_generic_quirk_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -#define PCI_VENDOR_ID_ATI 0x1002 - -/* - * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR - * through VGA register 0x3c3. On newer cards, the I/O port BAR is always - * BAR4 (older cards like the X550 used BAR1, but we don't care to support - * those). Note that on bare metal, a read of 0x3c3 doesn't always return the - * I/O port BAR address. Originally this was coded to return the virtual BAR - * address only if the physical register read returns the actual BAR address, - * but users have reported greater success if we return the virtual address - * unconditionally. - */ -static uint64_t vfio_ati_3c3_quirk_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - uint64_t data = vfio_pci_read_config(&vdev->pdev, - PCI_BASE_ADDRESS_0 + (4 * 4) + 1, - size); - trace_vfio_ati_3c3_quirk_read(data); - - return data; -} - -static const MemoryRegionOps vfio_ati_3c3_quirk = { - .read = vfio_ati_3c3_quirk_read, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - - if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { - return; - } - - /* - * As long as the BAR is >= 256 bytes it will be aligned such that the - * lower byte is always zero. Filter out anything else, if it exists. - */ - if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) { - return; - } - - quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, quirk, - "vfio-ati-3c3-quirk", 1); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, - 3 /* offset 3 bytes from 0x3c0 */, &quirk->mem); - - QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, - quirk, next); - - trace_vfio_vga_probe_ati_3c3_quirk(vdev->vbasedev.name); -} - -/* - * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI - * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access - * the MMIO space directly, but a window to this space is provided through - * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the - * data register. When the address is programmed to a range of 0x4000-0x4fff - * PCI configuration space is available. Experimentation seems to indicate - * that only read-only access is provided, but we drop writes when the window - * is enabled to config space nonetheless. - */ -static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - - if (!vdev->has_vga || nr != 4 || - pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { - return; - } - - quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.address_size = 4; - quirk->data.data_offset = 4; - quirk->data.data_size = 4; - quirk->data.address_match = 0x4000; - quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - quirk->data.bar = nr; - quirk->data.read_flags = quirk->data.write_flags = 1; - - memory_region_init_io(&quirk->mem, OBJECT(vdev), - &vfio_generic_window_quirk, quirk, - "vfio-ati-bar4-window-quirk", 8); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - quirk->data.base_offset, &quirk->mem, 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - - trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name); -} - -#define PCI_VENDOR_ID_REALTEK 0x10ec - -/* - * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2 - * offset 0x70 there is a dword data register, offset 0x74 is a dword address - * register. According to the Linux r8169 driver, the MSI-X table is addressed - * when the "type" portion of the address register is set to 0x1. This appears - * to be bits 16:30. Bit 31 is both a write indicator and some sort of - * "address latched" indicator. Bits 12:15 are a mask field, which we can - * ignore because the MSI-X table should always be accessed as a dword (full - * mask). Bits 0:11 is offset within the type. - * - * Example trace: - * - * Read from MSI-X table offset 0 - * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr - * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch - * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data - * - * Write 0xfee00000 to MSI-X table offset 0 - * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data - * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write - * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete - */ - -static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - uint64_t val = 0; - - if (!quirk->data.flags) { /* Non-MSI-X table access */ - return vfio_region_read(&vdev->bars[quirk->data.bar].region, - addr + 0x70, size); - } - - switch (addr) { - case 4: /* address */ - val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */ - break; - case 0: /* data */ - if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { - memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, - (hwaddr)(quirk->data.address_match & 0xfff), - &val, size, MEMTXATTRS_UNSPECIFIED); - } - break; - } - - trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name, - addr ? "address" : "data", val); - return val; -} - -static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - - switch (addr) { - case 4: /* address */ - if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */ - quirk->data.flags = 1; /* Activate reads */ - quirk->data.address_match = data; - - trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data); - - if (data & 0x80000000U) { /* Do write */ - if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { - hwaddr offset = data & 0xfff; - uint64_t val = quirk->data.address_mask; - - trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name, - (uint16_t)offset, val); - - /* Write to the proper guest MSI-X table instead */ - memory_region_dispatch_write(&vdev->pdev.msix_table_mmio, - offset, val, size, - MEMTXATTRS_UNSPECIFIED); - } - return; /* Do not write guest MSI-X data to hardware */ - } - } else { - quirk->data.flags = 0; /* De-activate reads, non-MSI-X */ - } - break; - case 0: /* data */ - quirk->data.address_mask = data; - break; - } - - vfio_region_write(&vdev->bars[quirk->data.bar].region, - addr + 0x70, data, size); -} - -static const MemoryRegionOps vfio_rtl8168_window_quirk = { - .read = vfio_rtl8168_window_quirk_read, - .write = vfio_rtl8168_window_quirk_write, - .valid = { - .min_access_size = 4, - .max_access_size = 4, - .unaligned = false, - }, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - - if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK || - pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) { - return; - } - - quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.bar = nr; - - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk, - quirk, "vfio-rtl8168-window-quirk", 8); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - 0x70, &quirk->mem, 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - - trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name); -} - -/* - * Trap the BAR2 MMIO window to config space as well. - */ -static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - - /* Only enable on newer devices where BAR2 is 64bit */ - if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 || - pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { - return; - } - - quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; - quirk->data.address_match = 0x4000; - quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - quirk->data.bar = nr; - - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk, - "vfio-ati-bar2-4000-quirk", - TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - quirk->data.address_match & TARGET_PAGE_MASK, - &quirk->mem, 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - - trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name); -} - -/* - * Older ATI/AMD cards like the X550 have a similar window to that above. - * I/O port BAR1 provides a window to a mirror of PCI config space located - * in BAR2 at offset 0xf00. We don't care to support such older cards, but - * note it for future reference. - */ - -#define PCI_VENDOR_ID_NVIDIA 0x10de - -/* - * Nvidia has several different methods to get to config space, the - * nouveu project has several of these documented here: - * https://github.com/pathscale/envytools/tree/master/hwdocs - * - * The first quirk is actually not documented in envytools and is found - * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an - * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access - * the mirror of PCI config space found at BAR0 offset 0x1800. The access - * sequence first writes 0x338 to I/O port 0x3d4. The target offset is - * then written to 0x3d0. Finally 0x538 is written for a read and 0x738 - * is written for a write to 0x3d4. The BAR0 offset is then accessible - * through 0x3d0. This quirk doesn't seem to be necessary on newer cards - * that use the I/O port BAR5 window but it doesn't hurt to leave it. - */ -enum { - NV_3D0_NONE = 0, - NV_3D0_SELECT, - NV_3D0_WINDOW, - NV_3D0_READ, - NV_3D0_WRITE, -}; - -static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - PCIDevice *pdev = &vdev->pdev; - uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], - addr + quirk->data.base_offset, size); - - if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) { - data = vfio_pci_read_config(pdev, quirk->data.address_val, size); - trace_vfio_nvidia_3d0_quirk_read(size, data); - } - - quirk->data.flags = NV_3D0_NONE; - - return data; -} - -static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - PCIDevice *pdev = &vdev->pdev; - - switch (quirk->data.flags) { - case NV_3D0_NONE: - if (addr == quirk->data.address_offset && data == 0x338) { - quirk->data.flags = NV_3D0_SELECT; - } - break; - case NV_3D0_SELECT: - quirk->data.flags = NV_3D0_NONE; - if (addr == quirk->data.data_offset && - (data & ~quirk->data.address_mask) == quirk->data.address_match) { - quirk->data.flags = NV_3D0_WINDOW; - quirk->data.address_val = data & quirk->data.address_mask; - } - break; - case NV_3D0_WINDOW: - quirk->data.flags = NV_3D0_NONE; - if (addr == quirk->data.address_offset) { - if (data == 0x538) { - quirk->data.flags = NV_3D0_READ; - } else if (data == 0x738) { - quirk->data.flags = NV_3D0_WRITE; - } - } - break; - case NV_3D0_WRITE: - quirk->data.flags = NV_3D0_NONE; - if (addr == quirk->data.data_offset) { - vfio_pci_write_config(pdev, quirk->data.address_val, data, size); - trace_vfio_nvidia_3d0_quirk_write(data, size); - return; - } - break; - } - - vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], - addr + quirk->data.base_offset, data, size); -} - -static const MemoryRegionOps vfio_nvidia_3d0_quirk = { - .read = vfio_nvidia_3d0_quirk_read, - .write = vfio_nvidia_3d0_quirk_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - - if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA || - !vdev->bars[1].region.size) { - return; - } - - quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.base_offset = 0x10; - quirk->data.address_offset = 4; - quirk->data.address_size = 2; - quirk->data.address_match = 0x1800; - quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; - quirk->data.data_offset = 0; - quirk->data.data_size = 4; - - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk, - quirk, "vfio-nvidia-3d0-quirk", 6); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, - quirk->data.base_offset, &quirk->mem); - - QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, - quirk, next); - - trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name); -} - -/* - * The second quirk is documented in envytools. The I/O port BAR5 is just - * a set of address/data ports to the MMIO BARs. The BAR we care about is - * again BAR0. This backdoor is apparently a bit newer than the one above - * so we need to not only trap 256 bytes @0x1800, but all of PCI config - * space, including extended space is available at the 4k @0x88000. - */ -enum { - NV_BAR5_ADDRESS = 0x1, - NV_BAR5_ENABLE = 0x2, - NV_BAR5_MASTER = 0x4, - NV_BAR5_VALID = 0x7, -}; - -static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOQuirk *quirk = opaque; - - switch (addr) { - case 0x0: - if (data & 0x1) { - quirk->data.flags |= NV_BAR5_MASTER; - } else { - quirk->data.flags &= ~NV_BAR5_MASTER; - } - break; - case 0x4: - if (data & 0x1) { - quirk->data.flags |= NV_BAR5_ENABLE; - } else { - quirk->data.flags &= ~NV_BAR5_ENABLE; - } - break; - case 0x8: - if (quirk->data.flags & NV_BAR5_MASTER) { - if ((data & ~0xfff) == 0x88000) { - quirk->data.flags |= NV_BAR5_ADDRESS; - quirk->data.address_val = data & 0xfff; - } else if ((data & ~0xff) == 0x1800) { - quirk->data.flags |= NV_BAR5_ADDRESS; - quirk->data.address_val = data & 0xff; - } else { - quirk->data.flags &= ~NV_BAR5_ADDRESS; - } - } - break; - } - - vfio_generic_window_quirk_write(opaque, addr, data, size); -} - -static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = { - .read = vfio_generic_window_quirk_read, - .write = vfio_nvidia_bar5_window_quirk_write, - .valid.min_access_size = 4, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - - if (!vdev->has_vga || nr != 5 || - pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { - return; - } - - quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.read_flags = quirk->data.write_flags = NV_BAR5_VALID; - quirk->data.address_offset = 0x8; - quirk->data.address_size = 0; /* actually 4, but avoids generic code */ - quirk->data.data_offset = 0xc; - quirk->data.data_size = 4; - quirk->data.bar = nr; - - memory_region_init_io(&quirk->mem, OBJECT(vdev), - &vfio_nvidia_bar5_window_quirk, quirk, - "vfio-nvidia-bar5-window-quirk", 16); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - 0, &quirk->mem, 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - - trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name); -} - -static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - PCIDevice *pdev = &vdev->pdev; - hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; - - vfio_generic_quirk_write(opaque, addr, data, size); - - /* - * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the - * MSI capability ID register. Both the ID and next register are - * read-only, so we allow writes covering either of those to real hw. - * NB - only fixed for the 0x88000 MMIO window. - */ - if ((pdev->cap_present & QEMU_PCI_CAP_MSI) && - vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) { - vfio_region_write(&vdev->bars[quirk->data.bar].region, - addr + base, data, size); - } -} - -static const MemoryRegionOps vfio_nvidia_88000_quirk = { - .read = vfio_generic_quirk_read, - .write = vfio_nvidia_88000_quirk_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -/* - * Finally, BAR0 itself. We want to redirect any accesses to either - * 0x1800 or 0x88000 through the PCI config space access functions. - * - * NB - quirk at a page granularity or else they don't seem to work when - * BARs are mmap'd - * - * Here's offset 0x88000... - */ -static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - uint16_t vendor, class; - - vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); - class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); - - if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA || - class != PCI_CLASS_DISPLAY_VGA) { - return; - } - - quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; - quirk->data.address_match = 0x88000; - quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - quirk->data.bar = nr; - - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk, - quirk, "vfio-nvidia-bar0-88000-quirk", - TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - quirk->data.address_match & TARGET_PAGE_MASK, - &quirk->mem, 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - - trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name); -} - -/* - * And here's the same for BAR0 offset 0x1800... - */ -static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - - if (!vdev->has_vga || nr != 0 || - pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { - return; - } - - /* Log the chipset ID */ - trace_vfio_probe_nvidia_bar0_1800_quirk_id( - (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20) - & 0xff); - - quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; - quirk->data.address_match = 0x1800; - quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; - quirk->data.bar = nr; - - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk, - "vfio-nvidia-bar0-1800-quirk", - TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - quirk->data.address_match & TARGET_PAGE_MASK, - &quirk->mem, 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - - trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name); -} - -/* - * TODO - Some Nvidia devices provide config access to their companion HDA - * device and even to their parent bridge via these config space mirrors. - * Add quirks for those regions. - */ - -/* - * Common quirk probe entry points. - */ -static void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) -{ - vfio_vga_probe_ati_3c3_quirk(vdev); - vfio_vga_probe_nvidia_3d0_quirk(vdev); -} - -static void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) -{ - VFIOQuirk *quirk; - int i; - - for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { - QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) { - memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem); - } - } -} - -static void vfio_vga_quirk_free(VFIOPCIDevice *vdev) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { - while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { - VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); - object_unparent(OBJECT(&quirk->mem)); - QLIST_REMOVE(quirk, next); - g_free(quirk); - } - } -} - -static void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) -{ - vfio_probe_ati_bar4_window_quirk(vdev, nr); - vfio_probe_ati_bar2_4000_quirk(vdev, nr); - vfio_probe_nvidia_bar5_window_quirk(vdev, nr); - vfio_probe_nvidia_bar0_88000_quirk(vdev, nr); - vfio_probe_nvidia_bar0_1800_quirk(vdev, nr); - vfio_probe_rtl8168_bar2_window_quirk(vdev, nr); -} - -static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - VFIOQuirk *quirk; - - QLIST_FOREACH(quirk, &bar->quirks, next) { - memory_region_del_subregion(&bar->region.mem, &quirk->mem); - } -} - -static void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - - while (!QLIST_EMPTY(&bar->quirks)) { - VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); - object_unparent(OBJECT(&quirk->mem)); - QLIST_REMOVE(quirk, next); - g_free(quirk); - } -} - /* * PCI config space */ -static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) +uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; @@ -1937,8 +1061,8 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) return val; } -static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, - uint32_t val, int len) +void vfio_pci_write_config(PCIDevice *pdev, + uint32_t addr, uint32_t val, int len) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); uint32_t val_le = cpu_to_le32(val); diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index a7a3a6a0f7..ff94929b45 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -155,4 +155,19 @@ typedef struct VFIORomBlacklistEntry { uint16_t device_id; } VFIORomBlacklistEntry; +uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); +void vfio_pci_write_config(PCIDevice *pdev, + uint32_t addr, uint32_t val, int len); + +uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); +void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); + +bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev); +void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); +void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev); +void vfio_vga_quirk_free(VFIOPCIDevice *vdev); +void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr); +void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr); +void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr); + #endif /* HW_VFIO_VFIO_PCI_H */ From 056dfcb695cde3c62b7dc1d5ed6d2e38b3a73e29 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:45 -0600 Subject: [PATCH 09/19] vfio/pci: Cleanup ROM blacklist quirk Create a vendor:device ID helper that we'll also use as we rework the rest of the quirks. Re-reading the config entries, even if we get more blacklist entries, is trivial overhead and only incurred during device setup. There's no need to typedef the blacklist structure, it's a static private data type used once. The elements get bumped up to uint32_t to avoid future maintenance issues if PCI_ANY_ID gets used for a blacklist entry (avoiding an actual hardware match). Our test loop is also crying out to be simplified as a for loop. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 40 +++++++++++++++++++++++++--------------- hw/vfio/pci.h | 5 ----- trace-events | 3 +++ 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 1f9a8096a1..17e300abad 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -14,6 +14,19 @@ #include "trace.h" #include "qemu/range.h" +#define PCI_ANY_ID (~0) + +/* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ +static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device) +{ + PCIDevice *pdev = &vdev->pdev; + + return (vendor == PCI_ANY_ID || + vendor == pci_get_word(pdev->config + PCI_VENDOR_ID)) && + (device == PCI_ANY_ID || + device == pci_get_word(pdev->config + PCI_DEVICE_ID)); +} + /* * List of device ids/vendor ids for which to disable * option rom loading. This avoids the guest hangs during rom @@ -27,28 +40,25 @@ * your card/environment details and information that could * help in debugging to the bug tracking this issue */ -static const VFIORomBlacklistEntry romblacklist[] = { - /* Broadcom BCM 57810 */ - { 0x14e4, 0x168e } +static const struct { + uint32_t vendor; + uint32_t device; +} romblacklist[] = { + { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */ }; bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; - uint16_t vendor_id, device_id; - int count = 0; + int i; - vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); - device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); - - while (count < ARRAY_SIZE(romblacklist)) { - if (romblacklist[count].vendor_id == vendor_id && - romblacklist[count].device_id == device_id) { - return true; + for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) { + if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) { + trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name, + romblacklist[i].vendor, + romblacklist[i].device); + return true; } - count++; } - return false; } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index ff94929b45..f6dbe7ff9e 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -150,11 +150,6 @@ typedef struct VFIOPCIDevice { bool no_kvm_msix; } VFIOPCIDevice; -typedef struct VFIORomBlacklistEntry { - uint16_t vendor_id; - uint16_t device_id; -} VFIORomBlacklistEntry; - uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len); diff --git a/trace-events b/trace-events index 545cec30ed..f783a6aff0 100644 --- a/trace-events +++ b/trace-events @@ -1585,6 +1585,9 @@ vfio_pci_reset(const char *name) " (%s)" vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET" vfio_pci_reset_pm(const char *name) "%s PCI PM Reset" +# hw/vfio/pci-quirks. +vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" + # hw/vfio/vfio-common.c vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64 From 8c4f234853d9d438dc1733ca98674b1139a87c99 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:46 -0600 Subject: [PATCH 10/19] vfio/pci: Foundation for new quirk structure VFIOQuirk hosts a single memory region and a fixed set of data fields that try to handle all the quirk cases, but end up making those that don't exactly match really confusing. This patch introduces a struct intended to provide more flexibility and simpler code. VFIOQuirk is stripped to its basics, an opaque data pointer for quirk specific data and a pointer to an array of MemoryRegions with a counter. This still allows us to have common teardown routines, but adds much greater flexibility to support multiple memory regions and quirk specific data structures that are easier to maintain. The existing VFIOQuirk is transformed into VFIOLegacyQuirk, which further patches will eliminate entirely. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 235 ++++++++++++++++++++++++++----------------- hw/vfio/pci.h | 12 ++- 2 files changed, 150 insertions(+), 97 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 17e300abad..429fdad068 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -80,7 +80,7 @@ static bool vfio_flags_enabled(uint8_t flags, uint8_t mask) static uint64_t vfio_generic_window_quirk_read(void *opaque, hwaddr addr, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; uint64_t data; @@ -92,13 +92,13 @@ static uint64_t vfio_generic_window_quirk_read(void *opaque, if (!vfio_range_contained(addr, size, quirk->data.data_offset, quirk->data.data_size)) { hw_error("%s: window data read not fully contained: %s", - __func__, memory_region_name(&quirk->mem)); + __func__, memory_region_name(quirk->mem)); } data = vfio_pci_read_config(&vdev->pdev, quirk->data.address_val + offset, size); - trace_vfio_generic_window_quirk_read(memory_region_name(&quirk->mem), + trace_vfio_generic_window_quirk_read(memory_region_name(quirk->mem), vdev->vbasedev.name, quirk->data.bar, addr, size, data); @@ -113,7 +113,7 @@ static uint64_t vfio_generic_window_quirk_read(void *opaque, static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; if (ranges_overlap(addr, size, @@ -121,7 +121,7 @@ static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, if (addr != quirk->data.address_offset) { hw_error("%s: offset write into address window: %s", - __func__, memory_region_name(&quirk->mem)); + __func__, memory_region_name(quirk->mem)); } if ((data & ~quirk->data.address_mask) == quirk->data.address_match) { @@ -142,12 +142,12 @@ static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, if (!vfio_range_contained(addr, size, quirk->data.data_offset, quirk->data.data_size)) { hw_error("%s: window data write not fully contained: %s", - __func__, memory_region_name(&quirk->mem)); + __func__, memory_region_name(quirk->mem)); } vfio_pci_write_config(&vdev->pdev, quirk->data.address_val + offset, data, size); - trace_vfio_generic_window_quirk_write(memory_region_name(&quirk->mem), + trace_vfio_generic_window_quirk_write(memory_region_name(quirk->mem), vdev->vbasedev.name, quirk->data.bar, addr, data, size); @@ -167,7 +167,7 @@ static const MemoryRegionOps vfio_generic_window_quirk = { static uint64_t vfio_generic_quirk_read(void *opaque, hwaddr addr, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; @@ -178,12 +178,12 @@ static uint64_t vfio_generic_quirk_read(void *opaque, if (!vfio_range_contained(addr, size, offset, quirk->data.address_mask + 1)) { hw_error("%s: read not fully contained: %s", - __func__, memory_region_name(&quirk->mem)); + __func__, memory_region_name(quirk->mem)); } data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); - trace_vfio_generic_quirk_read(memory_region_name(&quirk->mem), + trace_vfio_generic_quirk_read(memory_region_name(quirk->mem), vdev->vbasedev.name, quirk->data.bar, addr + base, size, data); } else { @@ -197,7 +197,7 @@ static uint64_t vfio_generic_quirk_read(void *opaque, static void vfio_generic_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; @@ -207,12 +207,12 @@ static void vfio_generic_quirk_write(void *opaque, hwaddr addr, if (!vfio_range_contained(addr, size, offset, quirk->data.address_mask + 1)) { hw_error("%s: write not fully contained: %s", - __func__, memory_region_name(&quirk->mem)); + __func__, memory_region_name(quirk->mem)); } vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); - trace_vfio_generic_quirk_write(memory_region_name(&quirk->mem), + trace_vfio_generic_quirk_write(memory_region_name(quirk->mem), vdev->vbasedev.name, quirk->data.bar, addr + base, data, size); } else { @@ -242,7 +242,7 @@ static const MemoryRegionOps vfio_generic_quirk = { static uint64_t vfio_ati_3c3_quirk_read(void *opaque, hwaddr addr, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; uint64_t data = vfio_pci_read_config(&vdev->pdev, PCI_BASE_ADDRESS_0 + (4 * 4) + 1, @@ -261,6 +261,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; + VFIOLegacyQuirk *legacy; if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { return; @@ -275,12 +276,15 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) } quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; + legacy = quirk->data = g_malloc0(sizeof(*legacy)); + quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + legacy->vdev = vdev; - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, quirk, + memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, legacy, "vfio-ati-3c3-quirk", 1); memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, - 3 /* offset 3 bytes from 0x3c0 */, &quirk->mem); + 3 /* offset 3 bytes from 0x3c0 */, quirk->mem); QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); @@ -302,6 +306,7 @@ static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; + VFIOLegacyQuirk *legacy; if (!vdev->has_vga || nr != 4 || pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { @@ -309,20 +314,23 @@ static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.address_size = 4; - quirk->data.data_offset = 4; - quirk->data.data_size = 4; - quirk->data.address_match = 0x4000; - quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - quirk->data.bar = nr; - quirk->data.read_flags = quirk->data.write_flags = 1; + quirk->data = legacy = g_malloc0(sizeof(*legacy)); + quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + legacy->vdev = vdev; + legacy->data.address_size = 4; + legacy->data.data_offset = 4; + legacy->data.data_size = 4; + legacy->data.address_match = 0x4000; + legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; + legacy->data.bar = nr; + legacy->data.read_flags = legacy->data.write_flags = 1; - memory_region_init_io(&quirk->mem, OBJECT(vdev), - &vfio_generic_window_quirk, quirk, + memory_region_init_io(quirk->mem, OBJECT(vdev), + &vfio_generic_window_quirk, legacy, "vfio-ati-bar4-window-quirk", 8); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - quirk->data.base_offset, &quirk->mem, 1); + legacy->data.base_offset, quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -336,6 +344,7 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; + VFIOLegacyQuirk *legacy; /* Only enable on newer devices where BAR2 is 64bit */ if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 || @@ -344,18 +353,21 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; - quirk->data.address_match = 0x4000; - quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - quirk->data.bar = nr; + quirk->data = legacy = g_malloc0(sizeof(*legacy)); + quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + legacy->vdev = vdev; + legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; + legacy->data.address_match = 0x4000; + legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; + legacy->data.bar = nr; - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk, + memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy, "vfio-ati-bar2-4000-quirk", - TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); + TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - quirk->data.address_match & TARGET_PAGE_MASK, - &quirk->mem, 1); + legacy->data.address_match & TARGET_PAGE_MASK, + quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -397,7 +409,7 @@ enum { static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, hwaddr addr, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; PCIDevice *pdev = &vdev->pdev; uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], @@ -416,7 +428,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; PCIDevice *pdev = &vdev->pdev; @@ -468,6 +480,7 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; + VFIOLegacyQuirk *legacy; if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA || !vdev->bars[1].region.size) { @@ -475,19 +488,22 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) } quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.base_offset = 0x10; - quirk->data.address_offset = 4; - quirk->data.address_size = 2; - quirk->data.address_match = 0x1800; - quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; - quirk->data.data_offset = 0; - quirk->data.data_size = 4; + quirk->data = legacy = g_malloc0(sizeof(*legacy)); + quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + legacy->vdev = vdev; + legacy->data.base_offset = 0x10; + legacy->data.address_offset = 4; + legacy->data.address_size = 2; + legacy->data.address_match = 0x1800; + legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; + legacy->data.data_offset = 0; + legacy->data.data_size = 4; - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk, - quirk, "vfio-nvidia-3d0-quirk", 6); + memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk, + legacy, "vfio-nvidia-3d0-quirk", 6); memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, - quirk->data.base_offset, &quirk->mem); + legacy->data.base_offset, quirk->mem); QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); @@ -512,7 +528,7 @@ enum { static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; switch (addr) { case 0x0: @@ -558,6 +574,7 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; + VFIOLegacyQuirk *legacy; if (!vdev->has_vga || nr != 5 || pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { @@ -565,19 +582,22 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.read_flags = quirk->data.write_flags = NV_BAR5_VALID; - quirk->data.address_offset = 0x8; - quirk->data.address_size = 0; /* actually 4, but avoids generic code */ - quirk->data.data_offset = 0xc; - quirk->data.data_size = 4; - quirk->data.bar = nr; + quirk->data = legacy = g_malloc0(sizeof(*legacy)); + quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + legacy->vdev = vdev; + legacy->data.read_flags = legacy->data.write_flags = NV_BAR5_VALID; + legacy->data.address_offset = 0x8; + legacy->data.address_size = 0; /* actually 4, but avoids generic code */ + legacy->data.data_offset = 0xc; + legacy->data.data_size = 4; + legacy->data.bar = nr; - memory_region_init_io(&quirk->mem, OBJECT(vdev), - &vfio_nvidia_bar5_window_quirk, quirk, + memory_region_init_io(quirk->mem, OBJECT(vdev), + &vfio_nvidia_bar5_window_quirk, legacy, "vfio-nvidia-bar5-window-quirk", 16); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - 0, &quirk->mem, 1); + 0, quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -587,7 +607,7 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; PCIDevice *pdev = &vdev->pdev; hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; @@ -626,6 +646,7 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; + VFIOLegacyQuirk *legacy; uint16_t vendor, class; vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); @@ -637,18 +658,21 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; - quirk->data.address_match = 0x88000; - quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - quirk->data.bar = nr; + quirk->data = legacy = g_malloc0(sizeof(*legacy)); + quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + legacy->vdev = vdev; + legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; + legacy->data.address_match = 0x88000; + legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; + legacy->data.bar = nr; - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk, - quirk, "vfio-nvidia-bar0-88000-quirk", - TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); + memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk, + legacy, "vfio-nvidia-bar0-88000-quirk", + TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - quirk->data.address_match & TARGET_PAGE_MASK, - &quirk->mem, 1); + legacy->data.address_match & TARGET_PAGE_MASK, + quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -662,6 +686,7 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; + VFIOLegacyQuirk *legacy; if (!vdev->has_vga || nr != 0 || pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { @@ -674,18 +699,21 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) & 0xff); quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1; - quirk->data.address_match = 0x1800; - quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; - quirk->data.bar = nr; + quirk->data = legacy = g_malloc0(sizeof(*legacy)); + quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + legacy->vdev = vdev; + legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; + legacy->data.address_match = 0x1800; + legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; + legacy->data.bar = nr; - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk, + memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy, "vfio-nvidia-bar0-1800-quirk", - TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); + TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - quirk->data.address_match & TARGET_PAGE_MASK, - &quirk->mem, 1); + legacy->data.address_match & TARGET_PAGE_MASK, + quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -725,7 +753,7 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, hwaddr addr, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; uint64_t val = 0; @@ -755,7 +783,7 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - VFIOQuirk *quirk = opaque; + VFIOLegacyQuirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; switch (addr) { @@ -809,6 +837,7 @@ static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; + VFIOLegacyQuirk *legacy; if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK || pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) { @@ -816,13 +845,16 @@ static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->vdev = vdev; - quirk->data.bar = nr; + quirk->data = legacy = g_malloc0(sizeof(*legacy)); + quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + legacy->vdev = vdev; + legacy->data.bar = nr; - memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk, - quirk, "vfio-rtl8168-window-quirk", 8); + memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk, + legacy, "vfio-rtl8168-window-quirk", 8); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - 0x70, &quirk->mem, 1); + 0x70, quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -841,24 +873,31 @@ void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) { VFIOQuirk *quirk; - int i; + int i, j; for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) { - memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem); + for (j = 0; j < quirk->nr_mem; j++) { + memory_region_del_subregion(&vdev->vga.region[i].mem, + &quirk->mem[j]); + } } } } void vfio_vga_quirk_free(VFIOPCIDevice *vdev) { - int i; + int i, j; for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); - object_unparent(OBJECT(&quirk->mem)); QLIST_REMOVE(quirk, next); + for (j = 0; j < quirk->nr_mem; j++) { + object_unparent(OBJECT(&quirk->mem[j])); + } + g_free(quirk->mem); + g_free(quirk->data); g_free(quirk); } } @@ -878,20 +917,28 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; VFIOQuirk *quirk; + int i; QLIST_FOREACH(quirk, &bar->quirks, next) { - memory_region_del_subregion(&bar->region.mem, &quirk->mem); + for (i = 0; i < quirk->nr_mem; i++) { + memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]); + } } } void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; + int i; while (!QLIST_EMPTY(&bar->quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); - object_unparent(OBJECT(&quirk->mem)); QLIST_REMOVE(quirk, next); + for (i = 0; i < quirk->nr_mem; i++) { + object_unparent(OBJECT(&quirk->mem[i])); + } + g_free(quirk->mem); + g_free(quirk->data); g_free(quirk); } } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index f6dbe7ff9e..8696976293 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -22,10 +22,9 @@ struct VFIOPCIDevice; -typedef struct VFIOQuirk { - MemoryRegion mem; +typedef struct VFIOLegacyQuirk { struct VFIOPCIDevice *vdev; - QLIST_ENTRY(VFIOQuirk) next; + MemoryRegion *mem; struct { uint32_t base_offset:TARGET_PAGE_BITS; uint32_t address_offset:TARGET_PAGE_BITS; @@ -43,6 +42,13 @@ typedef struct VFIOQuirk { uint8_t read_flags; uint8_t write_flags; } data; +} VFIOLegacyQuirk; + +typedef struct VFIOQuirk { + QLIST_ENTRY(VFIOQuirk) next; + void *data; + int nr_mem; + MemoryRegion *mem; } VFIOQuirk; typedef struct VFIOBAR { From b946d286114e09a81c303c7ec8ec3f7b33dff9e8 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:47 -0600 Subject: [PATCH 11/19] vfio/pci: Cleanup ATI 0x3c3 quirk This is an easy quirk that really doesn't need a data structure if its own. We can pass vdev as the opaque data and access to the MemoryRegion isn't required. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 26 +++++++++----------------- trace-events | 4 ++-- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 429fdad068..44c7701870 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -242,12 +242,11 @@ static const MemoryRegionOps vfio_generic_quirk = { static uint64_t vfio_ati_3c3_quirk_read(void *opaque, hwaddr addr, unsigned size) { - VFIOLegacyQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = opaque; uint64_t data = vfio_pci_read_config(&vdev->pdev, - PCI_BASE_ADDRESS_0 + (4 * 4) + 1, - size); - trace_vfio_ati_3c3_quirk_read(data); + PCI_BASE_ADDRESS_4 + 1, size); + + trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data); return data; } @@ -259,29 +258,22 @@ static const MemoryRegionOps vfio_ati_3c3_quirk = { static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; - VFIOLegacyQuirk *legacy; - - if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { - return; - } /* * As long as the BAR is >= 256 bytes it will be aligned such that the * lower byte is always zero. Filter out anything else, if it exists. */ - if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) { + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) || + !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) { return; } quirk = g_malloc0(sizeof(*quirk)); - legacy = quirk->data = g_malloc0(sizeof(*legacy)); - quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1); quirk->nr_mem = 1; - legacy->vdev = vdev; - memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, legacy, + memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev, "vfio-ati-3c3-quirk", 1); memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, 3 /* offset 3 bytes from 0x3c0 */, quirk->mem); @@ -289,7 +281,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); - trace_vfio_vga_probe_ati_3c3_quirk(vdev->vbasedev.name); + trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name); } /* diff --git a/trace-events b/trace-events index f783a6aff0..47391e610f 100644 --- a/trace-events +++ b/trace-events @@ -1550,8 +1550,6 @@ vfio_generic_window_quirk_write(const char * region_name, const char *name, int vfio_generic_quirk_read(const char * region_name, const char *name, int index, uint64_t addr, int size, uint64_t data) "%s read(%s:BAR%d+0x%"PRIx64", %d = 0x%"PRIx64 # remove ) vfio_generic_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d" -vfio_ati_3c3_quirk_read(uint64_t data) " (0x3c3, 1) = 0x%"PRIx64 -vfio_vga_probe_ati_3c3_quirk(const char *name) "Enabled ATI/AMD quirk 0x3c3 BAR4for device %s" vfio_probe_ati_bar4_window_quirk(const char *name) "Enabled ATI/AMD BAR4 window quirk for device %s" #issue with ) vfio_rtl8168_quirk_read(const char *name, const char *type, uint64_t val) "%s [%s]: 0x%"PRIx64 @@ -1587,6 +1585,8 @@ vfio_pci_reset_pm(const char *name) "%s PCI PM Reset" # hw/vfio/pci-quirks. vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" +vfio_quirk_ati_3c3_read(const char *name, uint64_t data) "%s 0x%"PRIx64 +vfio_quirk_ati_3c3_probe(const char *name) "%s" # hw/vfio/vfio-common.c vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" From 6029a424be37e0d7949546af7593b9b604611480 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:47 -0600 Subject: [PATCH 12/19] vfio/pci: Cleanup Nvidia 0x3d0 quirk The Nvidia 0x3d0 quirk makes use of a two separate registers and gives us our first chance to make use of separate memory regions for each to simplify the code a bit. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 172 +++++++++++++++++++++++++++---------------- trace-events | 7 +- 2 files changed, 111 insertions(+), 68 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 44c7701870..438b08274b 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -390,76 +390,121 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) * through 0x3d0. This quirk doesn't seem to be necessary on newer cards * that use the I/O port BAR5 window but it doesn't hurt to leave it. */ -enum { - NV_3D0_NONE = 0, - NV_3D0_SELECT, - NV_3D0_WINDOW, - NV_3D0_READ, - NV_3D0_WRITE, +typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State; +static const char *nv3d0_states[] = { "NONE", "SELECT", + "WINDOW", "READ", "WRITE" }; + +typedef struct VFIONvidia3d0Quirk { + VFIOPCIDevice *vdev; + VFIONvidia3d0State state; + uint32_t offset; +} VFIONvidia3d0Quirk; + +static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIONvidia3d0Quirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + + quirk->state = NONE; + + return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + addr + 0x14, size); +} + +static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIONvidia3d0Quirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + VFIONvidia3d0State old_state = quirk->state; + + quirk->state = NONE; + + switch (data) { + case 0x338: + if (old_state == NONE) { + quirk->state = SELECT; + trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name, + nv3d0_states[quirk->state]); + } + break; + case 0x538: + if (old_state == WINDOW) { + quirk->state = READ; + trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name, + nv3d0_states[quirk->state]); + } + break; + case 0x738: + if (old_state == WINDOW) { + quirk->state = WRITE; + trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name, + nv3d0_states[quirk->state]); + } + break; + } + + vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + addr + 0x14, data, size); +} + +static const MemoryRegionOps vfio_nvidia_3d4_quirk = { + .read = vfio_nvidia_3d4_quirk_read, + .write = vfio_nvidia_3d4_quirk_write, + .endianness = DEVICE_LITTLE_ENDIAN, }; static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, hwaddr addr, unsigned size) { - VFIOLegacyQuirk *quirk = opaque; + VFIONvidia3d0Quirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; - PCIDevice *pdev = &vdev->pdev; + VFIONvidia3d0State old_state = quirk->state; uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], - addr + quirk->data.base_offset, size); + addr + 0x10, size); - if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) { - data = vfio_pci_read_config(pdev, quirk->data.address_val, size); - trace_vfio_nvidia_3d0_quirk_read(size, data); + quirk->state = NONE; + + if (old_state == READ && + (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) { + uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1); + + data = vfio_pci_read_config(&vdev->pdev, offset, size); + trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name, + offset, size, data); } - quirk->data.flags = NV_3D0_NONE; - return data; } static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - VFIOLegacyQuirk *quirk = opaque; + VFIONvidia3d0Quirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; - PCIDevice *pdev = &vdev->pdev; + VFIONvidia3d0State old_state = quirk->state; - switch (quirk->data.flags) { - case NV_3D0_NONE: - if (addr == quirk->data.address_offset && data == 0x338) { - quirk->data.flags = NV_3D0_SELECT; - } - break; - case NV_3D0_SELECT: - quirk->data.flags = NV_3D0_NONE; - if (addr == quirk->data.data_offset && - (data & ~quirk->data.address_mask) == quirk->data.address_match) { - quirk->data.flags = NV_3D0_WINDOW; - quirk->data.address_val = data & quirk->data.address_mask; - } - break; - case NV_3D0_WINDOW: - quirk->data.flags = NV_3D0_NONE; - if (addr == quirk->data.address_offset) { - if (data == 0x538) { - quirk->data.flags = NV_3D0_READ; - } else if (data == 0x738) { - quirk->data.flags = NV_3D0_WRITE; - } - } - break; - case NV_3D0_WRITE: - quirk->data.flags = NV_3D0_NONE; - if (addr == quirk->data.data_offset) { - vfio_pci_write_config(pdev, quirk->data.address_val, data, size); - trace_vfio_nvidia_3d0_quirk_write(data, size); + quirk->state = NONE; + + if (old_state == SELECT) { + quirk->offset = (uint32_t)data; + quirk->state = WINDOW; + trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name, + nv3d0_states[quirk->state]); + } else if (old_state == WRITE) { + if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) { + uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1); + + vfio_pci_write_config(&vdev->pdev, offset, data, size); + trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name, + offset, data, size); return; } - break; } vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], - addr + quirk->data.base_offset, data, size); + addr + 0x10, data, size); } static const MemoryRegionOps vfio_nvidia_3d0_quirk = { @@ -470,37 +515,34 @@ static const MemoryRegionOps vfio_nvidia_3d0_quirk = { static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; - VFIOLegacyQuirk *legacy; + VFIONvidia3d0Quirk *data; - if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA || + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || !vdev->bars[1].region.size) { return; } quirk = g_malloc0(sizeof(*quirk)); - quirk->data = legacy = g_malloc0(sizeof(*legacy)); - quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); - quirk->nr_mem = 1; - legacy->vdev = vdev; - legacy->data.base_offset = 0x10; - legacy->data.address_offset = 4; - legacy->data.address_size = 2; - legacy->data.address_match = 0x1800; - legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; - legacy->data.data_offset = 0; - legacy->data.data_size = 4; + quirk->data = data = g_malloc0(sizeof(*data)); + quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2); + quirk->nr_mem = 2; + data->vdev = vdev; - memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk, - legacy, "vfio-nvidia-3d0-quirk", 6); + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk, + data, "vfio-nvidia-3d4-quirk", 2); memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, - legacy->data.base_offset, quirk->mem); + 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]); + + memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk, + data, "vfio-nvidia-3d0-quirk", 2); + memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]); QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); - trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name); + trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name); } /* diff --git a/trace-events b/trace-events index 47391e610f..3020b9ae4f 100644 --- a/trace-events +++ b/trace-events @@ -1557,9 +1557,6 @@ vfio_rtl8168_quirk_write(const char *name, uint64_t val) "%s [address]: 0x%"PRIx vfio_rtl8168_quirk_msix(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table write[0x%x]: 0x%"PRIx64 vfio_rtl8168_quirk_enable(const char *name) "%s" vfio_probe_ati_bar2_4000_quirk(const char *name) "Enabled ATI/AMD BAR2 0x4000 quirk for device %s" -vfio_nvidia_3d0_quirk_read(int size, uint64_t data) " (0x3d0, %d) = 0x%"PRIx64 -vfio_nvidia_3d0_quirk_write(uint64_t data, int size) " (0x3d0, 0x%"PRIx64", %d)" -vfio_vga_probe_nvidia_3d0_quirk(const char *name) "Enabled NVIDIA VGA 0x3d0 quirk for device %s" vfio_probe_nvidia_bar5_window_quirk(const char *name) "Enabled NVIDIA BAR5 window quirk for device %s" vfio_probe_nvidia_bar0_88000_quirk(const char *name) "Enabled NVIDIA BAR0 0x88000 quirk for device %s" vfio_probe_nvidia_bar0_1800_quirk_id(int id) "Nvidia NV%02x" @@ -1587,6 +1584,10 @@ vfio_pci_reset_pm(const char *name) "%s PCI PM Reset" vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" vfio_quirk_ati_3c3_read(const char *name, uint64_t data) "%s 0x%"PRIx64 vfio_quirk_ati_3c3_probe(const char *name) "%s" +vfio_quirk_nvidia_3d0_state(const char *name, const char *state) "%s %s" +vfio_quirk_nvidia_3d0_read(const char *name, uint8_t offset, unsigned size, uint64_t val) " (%s, @0x%x, len=0x%x) %"PRIx64 +vfio_quirk_nvidia_3d0_write(const char *name, uint8_t offset, uint64_t data, unsigned size) "(%s, @0x%x, 0x%"PRIx64", len=0x%x)" +vfio_quirk_nvidia_3d0_probe(const char *name) "%s" # hw/vfio/vfio-common.c vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" From 954258a5f11b51abd1ceed7c96d1204d4cef1353 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:47 -0600 Subject: [PATCH 13/19] vfio/pci: Rework RTL8168 quirk Another rework of this quirk, this time to update to the new quirk structure. We can handle the address and data registers with separate MemoryRegions and a quirk specific data structure, making the code much more understandable. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 171 +++++++++++++++++++++++++------------------ trace-events | 8 +- 2 files changed, 102 insertions(+), 77 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 438b08274b..b3bb2f8f3a 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -784,81 +784,63 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete */ -static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, - hwaddr addr, unsigned size) +typedef struct VFIOrtl8168Quirk { + VFIOPCIDevice *vdev; + uint32_t addr; + uint32_t data; + bool enabled; +} VFIOrtl8168Quirk; + +static uint64_t vfio_rtl8168_quirk_address_read(void *opaque, + hwaddr addr, unsigned size) { - VFIOLegacyQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - uint64_t val = 0; + VFIOrtl8168Quirk *rtl = opaque; + VFIOPCIDevice *vdev = rtl->vdev; + uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size); - if (!quirk->data.flags) { /* Non-MSI-X table access */ - return vfio_region_read(&vdev->bars[quirk->data.bar].region, - addr + 0x70, size); + if (rtl->enabled) { + data = rtl->addr ^ 0x80000000U; /* latch/complete */ + trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data); } - switch (addr) { - case 4: /* address */ - val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */ - break; - case 0: /* data */ - if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { - memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, - (hwaddr)(quirk->data.address_match & 0xfff), - &val, size, MEMTXATTRS_UNSPECIFIED); - } - break; - } - - trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name, - addr ? "address" : "data", val); - return val; + return data; } -static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) +static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) { - VFIOLegacyQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; + VFIOrtl8168Quirk *rtl = opaque; + VFIOPCIDevice *vdev = rtl->vdev; - switch (addr) { - case 4: /* address */ - if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */ - quirk->data.flags = 1; /* Activate reads */ - quirk->data.address_match = data; + rtl->enabled = false; - trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data); + if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */ + rtl->enabled = true; + rtl->addr = (uint32_t)data; - if (data & 0x80000000U) { /* Do write */ - if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { - hwaddr offset = data & 0xfff; - uint64_t val = quirk->data.address_mask; + if (data & 0x80000000U) { /* Do write */ + if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { + hwaddr offset = data & 0xfff; + uint64_t val = rtl->data; - trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name, - (uint16_t)offset, val); + trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name, + (uint16_t)offset, val); - /* Write to the proper guest MSI-X table instead */ - memory_region_dispatch_write(&vdev->pdev.msix_table_mmio, - offset, val, size, - MEMTXATTRS_UNSPECIFIED); - } - return; /* Do not write guest MSI-X data to hardware */ + /* Write to the proper guest MSI-X table instead */ + memory_region_dispatch_write(&vdev->pdev.msix_table_mmio, + offset, val, size, + MEMTXATTRS_UNSPECIFIED); } - } else { - quirk->data.flags = 0; /* De-activate reads, non-MSI-X */ + return; /* Do not write guest MSI-X data to hardware */ } - break; - case 0: /* data */ - quirk->data.address_mask = data; - break; } - vfio_region_write(&vdev->bars[quirk->data.bar].region, - addr + 0x70, data, size); + vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size); } -static const MemoryRegionOps vfio_rtl8168_window_quirk = { - .read = vfio_rtl8168_window_quirk_read, - .write = vfio_rtl8168_window_quirk_write, +static const MemoryRegionOps vfio_rtl_address_quirk = { + .read = vfio_rtl8168_quirk_address_read, + .write = vfio_rtl8168_quirk_address_write, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -867,32 +849,75 @@ static const MemoryRegionOps vfio_rtl8168_window_quirk = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) +static uint64_t vfio_rtl8168_quirk_data_read(void *opaque, + hwaddr addr, unsigned size) { - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - VFIOLegacyQuirk *legacy; + VFIOrtl8168Quirk *rtl = opaque; + VFIOPCIDevice *vdev = rtl->vdev; + uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size); - if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK || - pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) { + if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { + hwaddr offset = rtl->addr & 0xfff; + memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset, + &data, size, MEMTXATTRS_UNSPECIFIED); + trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data); + } + + return data; +} + +static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOrtl8168Quirk *rtl = opaque; + VFIOPCIDevice *vdev = rtl->vdev; + + rtl->data = (uint32_t)data; + + vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size); +} + +static const MemoryRegionOps vfio_rtl_data_quirk = { + .read = vfio_rtl8168_quirk_data_read, + .write = vfio_rtl8168_quirk_data_write, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + .unaligned = false, + }, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) +{ + VFIOQuirk *quirk; + VFIOrtl8168Quirk *rtl; + + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) { return; } quirk = g_malloc0(sizeof(*quirk)); - quirk->data = legacy = g_malloc0(sizeof(*legacy)); - quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); - quirk->nr_mem = 1; - legacy->vdev = vdev; - legacy->data.bar = nr; + quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2); + quirk->nr_mem = 2; + quirk->data = rtl = g_malloc0(sizeof(*rtl)); + rtl->vdev = vdev; - memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk, - legacy, "vfio-rtl8168-window-quirk", 8); + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), + &vfio_rtl_address_quirk, rtl, + "vfio-rtl8168-window-address-quirk", 4); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - 0x70, quirk->mem, 1); + 0x74, &quirk->mem[0], 1); + + memory_region_init_io(&quirk->mem[1], OBJECT(vdev), + &vfio_rtl_data_quirk, rtl, + "vfio-rtl8168-window-data-quirk", 4); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + 0x70, &quirk->mem[1], 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name); + trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); } /* @@ -944,7 +969,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) vfio_probe_nvidia_bar5_window_quirk(vdev, nr); vfio_probe_nvidia_bar0_88000_quirk(vdev, nr); vfio_probe_nvidia_bar0_1800_quirk(vdev, nr); - vfio_probe_rtl8168_bar2_window_quirk(vdev, nr); + vfio_probe_rtl8168_bar2_quirk(vdev, nr); } void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) diff --git a/trace-events b/trace-events index 3020b9ae4f..b54411fe15 100644 --- a/trace-events +++ b/trace-events @@ -1552,10 +1552,6 @@ vfio_generic_quirk_read(const char * region_name, const char *name, int index, u vfio_generic_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d" vfio_probe_ati_bar4_window_quirk(const char *name) "Enabled ATI/AMD BAR4 window quirk for device %s" #issue with ) -vfio_rtl8168_quirk_read(const char *name, const char *type, uint64_t val) "%s [%s]: 0x%"PRIx64 -vfio_rtl8168_quirk_write(const char *name, uint64_t val) "%s [address]: 0x%"PRIx64 -vfio_rtl8168_quirk_msix(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table write[0x%x]: 0x%"PRIx64 -vfio_rtl8168_quirk_enable(const char *name) "%s" vfio_probe_ati_bar2_4000_quirk(const char *name) "Enabled ATI/AMD BAR2 0x4000 quirk for device %s" vfio_probe_nvidia_bar5_window_quirk(const char *name) "Enabled NVIDIA BAR5 window quirk for device %s" vfio_probe_nvidia_bar0_88000_quirk(const char *name) "Enabled NVIDIA BAR0 0x88000 quirk for device %s" @@ -1588,6 +1584,10 @@ vfio_quirk_nvidia_3d0_state(const char *name, const char *state) "%s %s" vfio_quirk_nvidia_3d0_read(const char *name, uint8_t offset, unsigned size, uint64_t val) " (%s, @0x%x, len=0x%x) %"PRIx64 vfio_quirk_nvidia_3d0_write(const char *name, uint8_t offset, uint64_t data, unsigned size) "(%s, @0x%x, 0x%"PRIx64", len=0x%x)" vfio_quirk_nvidia_3d0_probe(const char *name) "%s" +vfio_quirk_rtl8168_fake_latch(const char *name, uint64_t val) "%s 0x%"PRIx64 +vfio_quirk_rtl8168_msix_write(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table write[0x%x]: 0x%"PRIx64 +vfio_quirk_rtl8168_msix_read(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table read[0x%x]: 0x%"PRIx64 +vfio_quirk_rtl8168_probe(const char *name) "%s" # hw/vfio/vfio-common.c vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" From 0e54f24a5b4bb756715928058b60a7d5f70ccd7f Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:48 -0600 Subject: [PATCH 14/19] vfio/pci: Config window quirks Config windows make use of an address register and a data register. In VGA cards, these are often used to provide real mode code in the BIOS an easy way to access MMIO registers since the window often resides in an I/O port register. When the MMIO register has a mirror of PCI config space, we need to trap those accesses and redirect them to emulated config space. The previous version of this functionality made use of a single MemoryRegion and single match address. This version uses separate MemoryRegions for each of the address and data registers and allows for multiple match addresses. This is useful for Nvidia cards which have two ranges which index into PCI config space. The previous implementation is left for the follow-on patch for a more reviewable diff. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 362 ++++++++++++++++++++++++++++++++----------- trace-events | 8 +- 2 files changed, 278 insertions(+), 92 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index b3bb2f8f3a..89e81213dd 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -63,9 +63,127 @@ bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev) } /* - * Device specific quirks + * Device specific region quirks (mostly backdoors to PCI config space) */ +/* + * The generic window quirks operate on an address and data register, + * vfio_generic_window_address_quirk handles the address register and + * vfio_generic_window_data_quirk handles the data register. These ops + * pass reads and writes through to hardware until a value matching the + * stored address match/mask is written. When this occurs, the data + * register access emulated PCI config space for the device rather than + * passing through accesses. This enables devices where PCI config space + * is accessible behind a window register to maintain the virtualization + * provided through vfio. + */ +typedef struct VFIOConfigWindowMatch { + uint32_t match; + uint32_t mask; +} VFIOConfigWindowMatch; + +typedef struct VFIOConfigWindowQuirk { + struct VFIOPCIDevice *vdev; + + uint32_t address_val; + + uint32_t address_offset; + uint32_t data_offset; + + bool window_enabled; + uint8_t bar; + + MemoryRegion *addr_mem; + MemoryRegion *data_mem; + + uint32_t nr_matches; + VFIOConfigWindowMatch matches[]; +} VFIOConfigWindowQuirk; + +static uint64_t vfio_generic_window_quirk_address_read(void *opaque, + hwaddr addr, + unsigned size) +{ + VFIOConfigWindowQuirk *window = opaque; + VFIOPCIDevice *vdev = window->vdev; + + return vfio_region_read(&vdev->bars[window->bar].region, + addr + window->address_offset, size); +} + +static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr, + uint64_t data, + unsigned size) +{ + VFIOConfigWindowQuirk *window = opaque; + VFIOPCIDevice *vdev = window->vdev; + int i; + + window->window_enabled = false; + + vfio_region_write(&vdev->bars[window->bar].region, + addr + window->address_offset, data, size); + + for (i = 0; i < window->nr_matches; i++) { + if ((data & ~window->matches[i].mask) == window->matches[i].match) { + window->window_enabled = true; + window->address_val = data & window->matches[i].mask; + trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name, + memory_region_name(window->addr_mem), data); + break; + } + } +} + +static const MemoryRegionOps vfio_generic_window_address_quirk = { + .read = vfio_generic_window_quirk_address_read, + .write = vfio_generic_window_quirk_address_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t vfio_generic_window_quirk_data_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOConfigWindowQuirk *window = opaque; + VFIOPCIDevice *vdev = window->vdev; + uint64_t data; + + /* Always read data reg, discard if window enabled */ + data = vfio_region_read(&vdev->bars[window->bar].region, + addr + window->data_offset, size); + + if (window->window_enabled) { + data = vfio_pci_read_config(&vdev->pdev, window->address_val, size); + trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name, + memory_region_name(window->data_mem), data); + } + + return data; +} + +static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOConfigWindowQuirk *window = opaque; + VFIOPCIDevice *vdev = window->vdev; + + if (window->window_enabled) { + vfio_pci_write_config(&vdev->pdev, window->address_val, data, size); + trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name, + memory_region_name(window->data_mem), data); + return; + } + + vfio_region_write(&vdev->bars[window->bar].region, + addr + window->data_offset, data, size); +} + +static const MemoryRegionOps vfio_generic_window_data_quirk = { + .read = vfio_generic_window_quirk_data_read, + .write = vfio_generic_window_quirk_data_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + /* Is range1 fully contained within range2? */ static bool vfio_range_contained(uint64_t first1, uint64_t len1, uint64_t first2, uint64_t len2) { @@ -285,48 +403,57 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) } /* - * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI + * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access * the MMIO space directly, but a window to this space is provided through * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the * data register. When the address is programmed to a range of 0x4000-0x4fff * PCI configuration space is available. Experimentation seems to indicate - * that only read-only access is provided, but we drop writes when the window - * is enabled to config space nonetheless. + * that read-only may be provided by hardware. */ -static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) +static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) { - PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; - VFIOLegacyQuirk *legacy; + VFIOConfigWindowQuirk *window; - if (!vdev->has_vga || nr != 4 || - pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { + /* This windows doesn't seem to be used except by legacy VGA code */ + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) || + !vdev->has_vga || nr != 4) { return; } quirk = g_malloc0(sizeof(*quirk)); - quirk->data = legacy = g_malloc0(sizeof(*legacy)); - quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); - quirk->nr_mem = 1; - legacy->vdev = vdev; - legacy->data.address_size = 4; - legacy->data.data_offset = 4; - legacy->data.data_size = 4; - legacy->data.address_match = 0x4000; - legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - legacy->data.bar = nr; - legacy->data.read_flags = legacy->data.write_flags = 1; + quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2); + quirk->nr_mem = 2; + window = quirk->data = g_malloc0(sizeof(*window) + + sizeof(VFIOConfigWindowMatch)); + window->vdev = vdev; + window->address_offset = 0; + window->data_offset = 4; + window->nr_matches = 1; + window->matches[0].match = 0x4000; + window->matches[0].mask = PCIE_CONFIG_SPACE_SIZE - 1; + window->bar = nr; + window->addr_mem = &quirk->mem[0]; + window->data_mem = &quirk->mem[1]; - memory_region_init_io(quirk->mem, OBJECT(vdev), - &vfio_generic_window_quirk, legacy, - "vfio-ati-bar4-window-quirk", 8); + memory_region_init_io(window->addr_mem, OBJECT(vdev), + &vfio_generic_window_address_quirk, window, + "vfio-ati-bar4-window-address-quirk", 4); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - legacy->data.base_offset, quirk->mem, 1); + window->address_offset, + window->addr_mem, 1); + + memory_region_init_io(window->data_mem, OBJECT(vdev), + &vfio_generic_window_data_quirk, window, + "vfio-ati-bar4-window-data-quirk", 4); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + window->data_offset, + window->data_mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name); + trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name); } /* @@ -552,90 +679,145 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) * so we need to not only trap 256 bytes @0x1800, but all of PCI config * space, including extended space is available at the 4k @0x88000. */ -enum { - NV_BAR5_ADDRESS = 0x1, - NV_BAR5_ENABLE = 0x2, - NV_BAR5_MASTER = 0x4, - NV_BAR5_VALID = 0x7, -}; +typedef struct VFIONvidiaBAR5Quirk { + uint32_t master; + uint32_t enable; + MemoryRegion *addr_mem; + MemoryRegion *data_mem; + bool enabled; + VFIOConfigWindowQuirk window; /* last for match data */ +} VFIONvidiaBAR5Quirk; -static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) +static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5) { - VFIOLegacyQuirk *quirk = opaque; + VFIOPCIDevice *vdev = bar5->window.vdev; - switch (addr) { - case 0x0: - if (data & 0x1) { - quirk->data.flags |= NV_BAR5_MASTER; - } else { - quirk->data.flags &= ~NV_BAR5_MASTER; - } - break; - case 0x4: - if (data & 0x1) { - quirk->data.flags |= NV_BAR5_ENABLE; - } else { - quirk->data.flags &= ~NV_BAR5_ENABLE; - } - break; - case 0x8: - if (quirk->data.flags & NV_BAR5_MASTER) { - if ((data & ~0xfff) == 0x88000) { - quirk->data.flags |= NV_BAR5_ADDRESS; - quirk->data.address_val = data & 0xfff; - } else if ((data & ~0xff) == 0x1800) { - quirk->data.flags |= NV_BAR5_ADDRESS; - quirk->data.address_val = data & 0xff; - } else { - quirk->data.flags &= ~NV_BAR5_ADDRESS; - } - } - break; + if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) { + return; } - vfio_generic_window_quirk_write(opaque, addr, data, size); + bar5->enabled = !bar5->enabled; + trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name, + bar5->enabled ? "Enable" : "Disable"); + memory_region_set_enabled(bar5->addr_mem, bar5->enabled); + memory_region_set_enabled(bar5->data_mem, bar5->enabled); } -static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = { - .read = vfio_generic_window_quirk_read, - .write = vfio_nvidia_bar5_window_quirk_write, - .valid.min_access_size = 4, +static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIONvidiaBAR5Quirk *bar5 = opaque; + VFIOPCIDevice *vdev = bar5->window.vdev; + + return vfio_region_read(&vdev->bars[5].region, addr, size); +} + +static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIONvidiaBAR5Quirk *bar5 = opaque; + VFIOPCIDevice *vdev = bar5->window.vdev; + + vfio_region_write(&vdev->bars[5].region, addr, data, size); + + bar5->master = data; + vfio_nvidia_bar5_enable(bar5); +} + +static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = { + .read = vfio_nvidia_bar5_quirk_master_read, + .write = vfio_nvidia_bar5_quirk_master_write, .endianness = DEVICE_LITTLE_ENDIAN, }; -static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) +static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque, + hwaddr addr, unsigned size) { - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - VFIOLegacyQuirk *legacy; + VFIONvidiaBAR5Quirk *bar5 = opaque; + VFIOPCIDevice *vdev = bar5->window.vdev; - if (!vdev->has_vga || nr != 5 || - pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { + return vfio_region_read(&vdev->bars[5].region, addr + 4, size); +} + +static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIONvidiaBAR5Quirk *bar5 = opaque; + VFIOPCIDevice *vdev = bar5->window.vdev; + + vfio_region_write(&vdev->bars[5].region, addr + 4, data, size); + + bar5->enable = data; + vfio_nvidia_bar5_enable(bar5); +} + +static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = { + .read = vfio_nvidia_bar5_quirk_enable_read, + .write = vfio_nvidia_bar5_quirk_enable_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) +{ + VFIOQuirk *quirk; + VFIONvidiaBAR5Quirk *bar5; + VFIOConfigWindowQuirk *window; + + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || + !vdev->has_vga || nr != 5) { return; } quirk = g_malloc0(sizeof(*quirk)); - quirk->data = legacy = g_malloc0(sizeof(*legacy)); - quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); - quirk->nr_mem = 1; - legacy->vdev = vdev; - legacy->data.read_flags = legacy->data.write_flags = NV_BAR5_VALID; - legacy->data.address_offset = 0x8; - legacy->data.address_size = 0; /* actually 4, but avoids generic code */ - legacy->data.data_offset = 0xc; - legacy->data.data_size = 4; - legacy->data.bar = nr; + quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 4); + quirk->nr_mem = 4; + bar5 = quirk->data = g_malloc0(sizeof(*bar5) + + (sizeof(VFIOConfigWindowMatch) * 2)); + window = &bar5->window; - memory_region_init_io(quirk->mem, OBJECT(vdev), - &vfio_nvidia_bar5_window_quirk, legacy, - "vfio-nvidia-bar5-window-quirk", 16); + window->vdev = vdev; + window->address_offset = 0x8; + window->data_offset = 0xc; + window->nr_matches = 2; + window->matches[0].match = 0x1800; + window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1; + window->matches[1].match = 0x88000; + window->matches[1].mask = PCIE_CONFIG_SPACE_SIZE - 1; + window->bar = nr; + window->addr_mem = bar5->addr_mem = &quirk->mem[0]; + window->data_mem = bar5->data_mem = &quirk->mem[1]; + + memory_region_init_io(window->addr_mem, OBJECT(vdev), + &vfio_generic_window_address_quirk, window, + "vfio-nvidia-bar5-window-address-quirk", 4); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - 0, quirk->mem, 1); + window->address_offset, + window->addr_mem, 1); + memory_region_set_enabled(window->addr_mem, false); + + memory_region_init_io(window->data_mem, OBJECT(vdev), + &vfio_generic_window_data_quirk, window, + "vfio-nvidia-bar5-window-data-quirk", 4); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + window->data_offset, + window->data_mem, 1); + memory_region_set_enabled(window->data_mem, false); + + memory_region_init_io(&quirk->mem[2], OBJECT(vdev), + &vfio_nvidia_bar5_quirk_master, bar5, + "vfio-nvidia-bar5-master-quirk", 4); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + 0, &quirk->mem[2], 1); + + memory_region_init_io(&quirk->mem[3], OBJECT(vdev), + &vfio_nvidia_bar5_quirk_enable, bar5, + "vfio-nvidia-bar5-enable-quirk", 4); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + 4, &quirk->mem[3], 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name); + trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name); } static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, @@ -964,9 +1146,9 @@ void vfio_vga_quirk_free(VFIOPCIDevice *vdev) void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) { - vfio_probe_ati_bar4_window_quirk(vdev, nr); + vfio_probe_ati_bar4_quirk(vdev, nr); vfio_probe_ati_bar2_4000_quirk(vdev, nr); - vfio_probe_nvidia_bar5_window_quirk(vdev, nr); + vfio_probe_nvidia_bar5_quirk(vdev, nr); vfio_probe_nvidia_bar0_88000_quirk(vdev, nr); vfio_probe_nvidia_bar0_1800_quirk(vdev, nr); vfio_probe_rtl8168_bar2_quirk(vdev, nr); diff --git a/trace-events b/trace-events index b54411fe15..5b80ee08be 100644 --- a/trace-events +++ b/trace-events @@ -1550,10 +1550,8 @@ vfio_generic_window_quirk_write(const char * region_name, const char *name, int vfio_generic_quirk_read(const char * region_name, const char *name, int index, uint64_t addr, int size, uint64_t data) "%s read(%s:BAR%d+0x%"PRIx64", %d = 0x%"PRIx64 # remove ) vfio_generic_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d" -vfio_probe_ati_bar4_window_quirk(const char *name) "Enabled ATI/AMD BAR4 window quirk for device %s" #issue with ) vfio_probe_ati_bar2_4000_quirk(const char *name) "Enabled ATI/AMD BAR2 0x4000 quirk for device %s" -vfio_probe_nvidia_bar5_window_quirk(const char *name) "Enabled NVIDIA BAR5 window quirk for device %s" vfio_probe_nvidia_bar0_88000_quirk(const char *name) "Enabled NVIDIA BAR0 0x88000 quirk for device %s" vfio_probe_nvidia_bar0_1800_quirk_id(int id) "Nvidia NV%02x" vfio_probe_nvidia_bar0_1800_quirk(const char *name) "Enabled NVIDIA BAR0 0x1800 quirk for device %s" @@ -1578,12 +1576,18 @@ vfio_pci_reset_pm(const char *name) "%s PCI PM Reset" # hw/vfio/pci-quirks. vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" +vfio_quirk_generic_window_address_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 +vfio_quirk_generic_window_data_read(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 +vfio_quirk_generic_window_data_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 vfio_quirk_ati_3c3_read(const char *name, uint64_t data) "%s 0x%"PRIx64 vfio_quirk_ati_3c3_probe(const char *name) "%s" +vfio_quirk_ati_bar4_probe(const char *name) "%s" vfio_quirk_nvidia_3d0_state(const char *name, const char *state) "%s %s" vfio_quirk_nvidia_3d0_read(const char *name, uint8_t offset, unsigned size, uint64_t val) " (%s, @0x%x, len=0x%x) %"PRIx64 vfio_quirk_nvidia_3d0_write(const char *name, uint8_t offset, uint64_t data, unsigned size) "(%s, @0x%x, 0x%"PRIx64", len=0x%x)" vfio_quirk_nvidia_3d0_probe(const char *name) "%s" +vfio_quirk_nvidia_bar5_state(const char *name, const char *state) "%s %s" +vfio_quirk_nvidia_bar5_probe(const char *name) "%s" vfio_quirk_rtl8168_fake_latch(const char *name, uint64_t val) "%s 0x%"PRIx64 vfio_quirk_rtl8168_msix_write(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table write[0x%x]: 0x%"PRIx64 vfio_quirk_rtl8168_msix_read(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table read[0x%x]: 0x%"PRIx64 From 0d38fb1c5f921acc050d5f80a2ff4e627b565494 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:48 -0600 Subject: [PATCH 15/19] vfio/pci: Config mirror quirk Re-implement our mirror quirk using the new infrastructure. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 222 +++++++++++++++++++++++-------------------- trace-events | 9 +- 2 files changed, 125 insertions(+), 106 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 89e81213dd..961aa566a2 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -27,6 +27,14 @@ static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device) device == pci_get_word(pdev->config + PCI_DEVICE_ID)); } +static bool vfio_is_vga(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); + + return class == PCI_CLASS_DISPLAY_VGA; +} + /* * List of device ids/vendor ids for which to disable * option rom loading. This avoids the guest hangs during rom @@ -184,6 +192,55 @@ static const MemoryRegionOps vfio_generic_window_data_quirk = { .endianness = DEVICE_LITTLE_ENDIAN, }; +/* + * The generic mirror quirk handles devices which expose PCI config space + * through a region within a BAR. When enabled, reads and writes are + * redirected through to emulated PCI config space. XXX if PCI config space + * used memory regions, this could just be an alias. + */ +typedef struct VFIOConfigMirrorQuirk { + struct VFIOPCIDevice *vdev; + uint32_t offset; + uint8_t bar; + MemoryRegion *mem; +} VFIOConfigMirrorQuirk; + +static uint64_t vfio_generic_quirk_mirror_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOConfigMirrorQuirk *mirror = opaque; + VFIOPCIDevice *vdev = mirror->vdev; + uint64_t data; + + /* Read and discard in case the hardware cares */ + (void)vfio_region_read(&vdev->bars[mirror->bar].region, + addr + mirror->offset, size); + + data = vfio_pci_read_config(&vdev->pdev, addr, size); + trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name, + memory_region_name(mirror->mem), + addr, data); + return data; +} + +static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOConfigMirrorQuirk *mirror = opaque; + VFIOPCIDevice *vdev = mirror->vdev; + + vfio_pci_write_config(&vdev->pdev, addr, data, size); + trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name, + memory_region_name(mirror->mem), + addr, data); +} + +static const MemoryRegionOps vfio_generic_mirror_quirk = { + .read = vfio_generic_quirk_mirror_read, + .write = vfio_generic_quirk_mirror_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + /* Is range1 fully contained within range2? */ static bool vfio_range_contained(uint64_t first1, uint64_t len1, uint64_t first2, uint64_t len2) { @@ -457,40 +514,36 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) } /* - * Trap the BAR2 MMIO window to config space as well. + * Trap the BAR2 MMIO mirror to config space as well. */ -static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) +static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr) { - PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; - VFIOLegacyQuirk *legacy; + VFIOConfigMirrorQuirk *mirror; /* Only enable on newer devices where BAR2 is 64bit */ - if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 || - pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) || + !vdev->has_vga || nr != 2 || !vdev->bars[2].mem64) { return; } quirk = g_malloc0(sizeof(*quirk)); - quirk->data = legacy = g_malloc0(sizeof(*legacy)); - quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + mirror = quirk->data = g_malloc0(sizeof(*mirror)); + mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1); quirk->nr_mem = 1; - legacy->vdev = vdev; - legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; - legacy->data.address_match = 0x4000; - legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - legacy->data.bar = nr; + mirror->vdev = vdev; + mirror->offset = 0x4000; + mirror->bar = nr; - memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy, - "vfio-ati-bar2-4000-quirk", - TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); + memory_region_init_io(mirror->mem, OBJECT(vdev), + &vfio_generic_mirror_quirk, mirror, + "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - legacy->data.address_match & TARGET_PAGE_MASK, - quirk->mem, 1); + mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name); + trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name); } /* @@ -820,120 +873,86 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name); } -static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) +/* + * Finally, BAR0 itself. We want to redirect any accesses to either + * 0x1800 or 0x88000 through the PCI config space access functions. + */ +static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) { - VFIOLegacyQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; + VFIOConfigMirrorQuirk *mirror = opaque; + VFIOPCIDevice *vdev = mirror->vdev; PCIDevice *pdev = &vdev->pdev; - hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; - vfio_generic_quirk_write(opaque, addr, data, size); + vfio_generic_quirk_mirror_write(opaque, addr, data, size); /* * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the * MSI capability ID register. Both the ID and next register are * read-only, so we allow writes covering either of those to real hw. - * NB - only fixed for the 0x88000 MMIO window. */ if ((pdev->cap_present & QEMU_PCI_CAP_MSI) && vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) { - vfio_region_write(&vdev->bars[quirk->data.bar].region, - addr + base, data, size); + vfio_region_write(&vdev->bars[mirror->bar].region, + addr + mirror->offset, data, size); + trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name); } } -static const MemoryRegionOps vfio_nvidia_88000_quirk = { - .read = vfio_generic_quirk_read, - .write = vfio_nvidia_88000_quirk_write, +static const MemoryRegionOps vfio_nvidia_mirror_quirk = { + .read = vfio_generic_quirk_mirror_read, + .write = vfio_nvidia_quirk_mirror_write, .endianness = DEVICE_LITTLE_ENDIAN, }; -/* - * Finally, BAR0 itself. We want to redirect any accesses to either - * 0x1800 or 0x88000 through the PCI config space access functions. - * - * NB - quirk at a page granularity or else they don't seem to work when - * BARs are mmap'd - * - * Here's offset 0x88000... - */ -static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) +static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) { - PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; - VFIOLegacyQuirk *legacy; - uint16_t vendor, class; + VFIOConfigMirrorQuirk *mirror; - vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); - class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); - - if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA || - class != PCI_CLASS_DISPLAY_VGA) { + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || + !vfio_is_vga(vdev) || nr != 0) { return; } quirk = g_malloc0(sizeof(*quirk)); - quirk->data = legacy = g_malloc0(sizeof(*legacy)); - quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + mirror = quirk->data = g_malloc0(sizeof(*mirror)); + mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1); quirk->nr_mem = 1; - legacy->vdev = vdev; - legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; - legacy->data.address_match = 0x88000; - legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; - legacy->data.bar = nr; + mirror->vdev = vdev; + mirror->offset = 0x88000; + mirror->bar = nr; - memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk, - legacy, "vfio-nvidia-bar0-88000-quirk", - TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); + memory_region_init_io(mirror->mem, OBJECT(vdev), + &vfio_nvidia_mirror_quirk, mirror, + "vfio-nvidia-bar0-88000-mirror-quirk", + PCIE_CONFIG_SPACE_SIZE); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - legacy->data.address_match & TARGET_PAGE_MASK, - quirk->mem, 1); + mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name); -} + /* The 0x1800 offset mirror only seems to get used by legacy VGA */ + if (vdev->has_vga) { + quirk = g_malloc0(sizeof(*quirk)); + mirror = quirk->data = g_malloc0(sizeof(*mirror)); + mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1); + quirk->nr_mem = 1; + mirror->vdev = vdev; + mirror->offset = 0x1800; + mirror->bar = nr; -/* - * And here's the same for BAR0 offset 0x1800... - */ -static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) -{ - PCIDevice *pdev = &vdev->pdev; - VFIOQuirk *quirk; - VFIOLegacyQuirk *legacy; + memory_region_init_io(mirror->mem, OBJECT(vdev), + &vfio_nvidia_mirror_quirk, mirror, + "vfio-nvidia-bar0-1800-mirror-quirk", + PCI_CONFIG_SPACE_SIZE); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + mirror->offset, mirror->mem, 1); - if (!vdev->has_vga || nr != 0 || - pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { - return; + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); } - /* Log the chipset ID */ - trace_vfio_probe_nvidia_bar0_1800_quirk_id( - (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20) - & 0xff); - - quirk = g_malloc0(sizeof(*quirk)); - quirk->data = legacy = g_malloc0(sizeof(*legacy)); - quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); - quirk->nr_mem = 1; - legacy->vdev = vdev; - legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; - legacy->data.address_match = 0x1800; - legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; - legacy->data.bar = nr; - - memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy, - "vfio-nvidia-bar0-1800-quirk", - TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, - legacy->data.address_match & TARGET_PAGE_MASK, - quirk->mem, 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - - trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name); + trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name); } /* @@ -1147,10 +1166,9 @@ void vfio_vga_quirk_free(VFIOPCIDevice *vdev) void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) { vfio_probe_ati_bar4_quirk(vdev, nr); - vfio_probe_ati_bar2_4000_quirk(vdev, nr); + vfio_probe_ati_bar2_quirk(vdev, nr); vfio_probe_nvidia_bar5_quirk(vdev, nr); - vfio_probe_nvidia_bar0_88000_quirk(vdev, nr); - vfio_probe_nvidia_bar0_1800_quirk(vdev, nr); + vfio_probe_nvidia_bar0_quirk(vdev, nr); vfio_probe_rtl8168_bar2_quirk(vdev, nr); } diff --git a/trace-events b/trace-events index 5b80ee08be..adc6b34108 100644 --- a/trace-events +++ b/trace-events @@ -1551,10 +1551,6 @@ vfio_generic_quirk_read(const char * region_name, const char *name, int index, u # remove ) vfio_generic_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d" #issue with ) -vfio_probe_ati_bar2_4000_quirk(const char *name) "Enabled ATI/AMD BAR2 0x4000 quirk for device %s" -vfio_probe_nvidia_bar0_88000_quirk(const char *name) "Enabled NVIDIA BAR0 0x88000 quirk for device %s" -vfio_probe_nvidia_bar0_1800_quirk_id(int id) "Nvidia NV%02x" -vfio_probe_nvidia_bar0_1800_quirk(const char *name) "Enabled NVIDIA BAR0 0x1800 quirk for device %s" vfio_pci_read_config(const char *name, int addr, int len, int val) " (%s, @0x%x, len=0x%x) %x" vfio_pci_write_config(const char *name, int addr, int val, int len) " (%s, @0x%x, 0x%x, len=0x%x)" vfio_msi_setup(const char *name, int pos) "%s PCI MSI CAP @0x%x" @@ -1579,15 +1575,20 @@ vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04 vfio_quirk_generic_window_address_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 vfio_quirk_generic_window_data_read(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 vfio_quirk_generic_window_data_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 +vfio_quirk_generic_mirror_read(const char *name, const char * region_name, uint64_t addr, uint64_t data) "%s %s 0x%"PRIx64": 0x%"PRIx64 +vfio_quirk_generic_mirror_write(const char *name, const char * region_name, uint64_t addr, uint64_t data) "%s %s 0x%"PRIx64": 0x%"PRIx64 vfio_quirk_ati_3c3_read(const char *name, uint64_t data) "%s 0x%"PRIx64 vfio_quirk_ati_3c3_probe(const char *name) "%s" vfio_quirk_ati_bar4_probe(const char *name) "%s" +vfio_quirk_ati_bar2_probe(const char *name) "%s" vfio_quirk_nvidia_3d0_state(const char *name, const char *state) "%s %s" vfio_quirk_nvidia_3d0_read(const char *name, uint8_t offset, unsigned size, uint64_t val) " (%s, @0x%x, len=0x%x) %"PRIx64 vfio_quirk_nvidia_3d0_write(const char *name, uint8_t offset, uint64_t data, unsigned size) "(%s, @0x%x, 0x%"PRIx64", len=0x%x)" vfio_quirk_nvidia_3d0_probe(const char *name) "%s" vfio_quirk_nvidia_bar5_state(const char *name, const char *state) "%s %s" vfio_quirk_nvidia_bar5_probe(const char *name) "%s" +vfio_quirk_nvidia_bar0_msi_ack(const char *name) "%s" +vfio_quirk_nvidia_bar0_probe(const char *name) "%s" vfio_quirk_rtl8168_fake_latch(const char *name, uint64_t val) "%s 0x%"PRIx64 vfio_quirk_rtl8168_msix_write(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table write[0x%x]: 0x%"PRIx64 vfio_quirk_rtl8168_msix_read(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table read[0x%x]: 0x%"PRIx64 From 958d553405462e95b9d15e8ca6cfb602f7815277 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:48 -0600 Subject: [PATCH 16/19] vfio/pci: Remove old config window and mirror quirks These are now unused. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 155 ------------------------------------------- hw/vfio/pci.h | 22 ------ trace-events | 9 --- 3 files changed, 186 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 961aa566a2..c70c004e24 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -247,161 +247,6 @@ static bool vfio_range_contained(uint64_t first1, uint64_t len1, return (first1 >= first2 && first1 + len1 <= first2 + len2); } -static bool vfio_flags_enabled(uint8_t flags, uint8_t mask) -{ - return (mask && (flags & mask) == mask); -} - -static uint64_t vfio_generic_window_quirk_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOLegacyQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - uint64_t data; - - if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && - ranges_overlap(addr, size, - quirk->data.data_offset, quirk->data.data_size)) { - hwaddr offset = addr - quirk->data.data_offset; - - if (!vfio_range_contained(addr, size, quirk->data.data_offset, - quirk->data.data_size)) { - hw_error("%s: window data read not fully contained: %s", - __func__, memory_region_name(quirk->mem)); - } - - data = vfio_pci_read_config(&vdev->pdev, - quirk->data.address_val + offset, size); - - trace_vfio_generic_window_quirk_read(memory_region_name(quirk->mem), - vdev->vbasedev.name, - quirk->data.bar, - addr, size, data); - } else { - data = vfio_region_read(&vdev->bars[quirk->data.bar].region, - addr + quirk->data.base_offset, size); - } - - return data; -} - -static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOLegacyQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - - if (ranges_overlap(addr, size, - quirk->data.address_offset, quirk->data.address_size)) { - - if (addr != quirk->data.address_offset) { - hw_error("%s: offset write into address window: %s", - __func__, memory_region_name(quirk->mem)); - } - - if ((data & ~quirk->data.address_mask) == quirk->data.address_match) { - quirk->data.flags |= quirk->data.write_flags | - quirk->data.read_flags; - quirk->data.address_val = data & quirk->data.address_mask; - } else { - quirk->data.flags &= ~(quirk->data.write_flags | - quirk->data.read_flags); - } - } - - if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) && - ranges_overlap(addr, size, - quirk->data.data_offset, quirk->data.data_size)) { - hwaddr offset = addr - quirk->data.data_offset; - - if (!vfio_range_contained(addr, size, quirk->data.data_offset, - quirk->data.data_size)) { - hw_error("%s: window data write not fully contained: %s", - __func__, memory_region_name(quirk->mem)); - } - - vfio_pci_write_config(&vdev->pdev, - quirk->data.address_val + offset, data, size); - trace_vfio_generic_window_quirk_write(memory_region_name(quirk->mem), - vdev->vbasedev.name, - quirk->data.bar, - addr, data, size); - return; - } - - vfio_region_write(&vdev->bars[quirk->data.bar].region, - addr + quirk->data.base_offset, data, size); -} - -static const MemoryRegionOps vfio_generic_window_quirk = { - .read = vfio_generic_window_quirk_read, - .write = vfio_generic_window_quirk_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static uint64_t vfio_generic_quirk_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOLegacyQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; - hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; - uint64_t data; - - if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && - ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) { - if (!vfio_range_contained(addr, size, offset, - quirk->data.address_mask + 1)) { - hw_error("%s: read not fully contained: %s", - __func__, memory_region_name(quirk->mem)); - } - - data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); - - trace_vfio_generic_quirk_read(memory_region_name(quirk->mem), - vdev->vbasedev.name, quirk->data.bar, - addr + base, size, data); - } else { - data = vfio_region_read(&vdev->bars[quirk->data.bar].region, - addr + base, size); - } - - return data; -} - -static void vfio_generic_quirk_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOLegacyQuirk *quirk = opaque; - VFIOPCIDevice *vdev = quirk->vdev; - hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; - hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; - - if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) && - ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) { - if (!vfio_range_contained(addr, size, offset, - quirk->data.address_mask + 1)) { - hw_error("%s: write not fully contained: %s", - __func__, memory_region_name(quirk->mem)); - } - - vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); - - trace_vfio_generic_quirk_write(memory_region_name(quirk->mem), - vdev->vbasedev.name, quirk->data.bar, - addr + base, data, size); - } else { - vfio_region_write(&vdev->bars[quirk->data.bar].region, - addr + base, data, size); - } -} - -static const MemoryRegionOps vfio_generic_quirk = { - .read = vfio_generic_quirk_read, - .write = vfio_generic_quirk_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - #define PCI_VENDOR_ID_ATI 0x1002 /* diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 8696976293..ddaf3e9b95 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -22,28 +22,6 @@ struct VFIOPCIDevice; -typedef struct VFIOLegacyQuirk { - struct VFIOPCIDevice *vdev; - MemoryRegion *mem; - struct { - uint32_t base_offset:TARGET_PAGE_BITS; - uint32_t address_offset:TARGET_PAGE_BITS; - uint32_t address_size:3; - uint32_t bar:3; - - uint32_t address_match; - uint32_t address_mask; - - uint32_t address_val:TARGET_PAGE_BITS; - uint32_t data_offset:TARGET_PAGE_BITS; - uint32_t data_size:3; - - uint8_t flags; - uint8_t read_flags; - uint8_t write_flags; - } data; -} VFIOLegacyQuirk; - typedef struct VFIOQuirk { QLIST_ENTRY(VFIOQuirk) next; void *data; diff --git a/trace-events b/trace-events index adc6b34108..b3599988be 100644 --- a/trace-events +++ b/trace-events @@ -1542,15 +1542,6 @@ vfio_rom_read(const char *name, uint64_t addr, int size, uint64_t data) " (%s, 0 vfio_pci_size_rom(const char *name, int size) "%s ROM size 0x%x" vfio_vga_write(uint64_t addr, uint64_t data, int size) " (0x%"PRIx64", 0x%"PRIx64", %d)" vfio_vga_read(uint64_t addr, int size, uint64_t data) " (0x%"PRIx64", %d) = 0x%"PRIx64 -# remove ) = -vfio_generic_window_quirk_read(const char * region_name, const char *name, int index, uint64_t addr, int size, uint64_t data) "%s read(%s:BAR%d+0x%"PRIx64", %d = 0x%"PRIx64 -## remove ) -vfio_generic_window_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d" -# remove ) = -vfio_generic_quirk_read(const char * region_name, const char *name, int index, uint64_t addr, int size, uint64_t data) "%s read(%s:BAR%d+0x%"PRIx64", %d = 0x%"PRIx64 -# remove ) -vfio_generic_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d" -#issue with ) vfio_pci_read_config(const char *name, int addr, int len, int val) " (%s, @0x%x, len=0x%x) %x" vfio_pci_write_config(const char *name, int addr, int val, int len) " (%s, @0x%x, 0x%x, len=0x%x)" vfio_msi_setup(const char *name, int pos) "%s PCI MSI CAP @0x%x" From c9c5000991148383d628aac59f1593937be572e4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:49 -0600 Subject: [PATCH 17/19] vfio/pci: Move AMD device specific reset to quirks This is just another quirk, for reset rather than affecting memory regions. Move it to our new quirks file. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 168 +++++++++++++++++++++++++++++++++++++++++++ hw/vfio/pci.c | 158 +--------------------------------------- hw/vfio/pci.h | 1 + trace-events | 7 ++ 4 files changed, 177 insertions(+), 157 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index c70c004e24..9b51a64816 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1046,3 +1046,171 @@ void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) g_free(quirk); } } + +/* + * Reset quirks + */ + +/* + * AMD Radeon PCI config reset, based on Linux: + * drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running() + * drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset + * drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc() + * drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock() + * IDs: include/drm/drm_pciids.h + * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0 + * + * Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the + * hardware that should be fixed on future ASICs. The symptom of this is that + * once the accerlated driver loads, Windows guests will bsod on subsequent + * attmpts to load the driver, such as after VM reset or shutdown/restart. To + * work around this, we do an AMD specific PCI config reset, followed by an SMC + * reset. The PCI config reset only works if SMC firmware is running, so we + * have a dependency on the state of the device as to whether this reset will + * be effective. There are still cases where we won't be able to kick the + * device into working, but this greatly improves the usability overall. The + * config reset magic is relatively common on AMD GPUs, but the setup and SMC + * poking is largely ASIC specific. + */ +static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev) +{ + uint32_t clk, pc_c; + + /* + * Registers 200h and 204h are index and data registers for accessing + * indirect configuration registers within the device. + */ + vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4); + clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4); + pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + + return (!(clk & 1) && (0x20100 <= pc_c)); +} + +/* + * The scope of a config reset is controlled by a mode bit in the misc register + * and a fuse, exposed as a bit in another register. The fuse is the default + * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula + * scope = !(misc ^ fuse), where the resulting scope is defined the same as + * the fuse. A truth table therefore tells us that if misc == fuse, we need + * to flip the value of the bit in the misc register. + */ +static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev) +{ + uint32_t misc, fuse; + bool a, b; + + vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4); + fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + b = fuse & 64; + + vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4); + misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + a = misc & 2; + + if (a == b) { + vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4); + vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */ + } +} + +static int vfio_radeon_reset(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + int i, ret = 0; + uint32_t data; + + /* Defer to a kernel implemented reset */ + if (vdev->vbasedev.reset_works) { + trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name); + return -ENODEV; + } + + /* Enable only memory BAR access */ + vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2); + + /* Reset only works if SMC firmware is loaded and running */ + if (!vfio_radeon_smc_is_running(vdev)) { + ret = -EINVAL; + trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name); + goto out; + } + + /* Make sure only the GFX function is reset */ + vfio_radeon_set_gfx_only_reset(vdev); + + /* AMD PCI config reset */ + vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4); + usleep(100); + + /* Read back the memory size to make sure we're out of reset */ + for (i = 0; i < 100000; i++) { + if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) { + goto reset_smc; + } + usleep(1); + } + + trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name); + +reset_smc: + /* Reset SMC */ + vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4); + data = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + data |= 1; + vfio_region_write(&vdev->bars[5].region, 0x204, data, 4); + + /* Disable SMC clock */ + vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4); + data = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + data |= 1; + vfio_region_write(&vdev->bars[5].region, 0x204, data, 4); + + trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name); + +out: + /* Restore PCI command register */ + vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2); + + return ret; +} + +void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + uint16_t vendor, device; + + vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); + device = pci_get_word(pdev->config + PCI_DEVICE_ID); + + switch (vendor) { + case 0x1002: + switch (device) { + /* Bonaire */ + case 0x6649: /* Bonaire [FirePro W5100] */ + case 0x6650: + case 0x6651: + case 0x6658: /* Bonaire XTX [Radeon R7 260X] */ + case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */ + case 0x665d: /* Bonaire [Radeon R7 200 Series] */ + /* Hawaii */ + case 0x67A0: /* Hawaii XT GL [FirePro W9100] */ + case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */ + case 0x67A2: + case 0x67A8: + case 0x67A9: + case 0x67AA: + case 0x67B0: /* Hawaii XT [Radeon R9 290X] */ + case 0x67B1: /* Hawaii PRO [Radeon R9 290] */ + case 0x67B8: + case 0x67B9: + case 0x67BA: + case 0x67BE: + vdev->resetfn = vfio_radeon_reset; + trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name); + break; + } + break; + } +} diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 6a0ec453b3..c60a6a7641 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2315,162 +2315,6 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) vdev->req_enabled = false; } -/* - * AMD Radeon PCI config reset, based on Linux: - * drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running() - * drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset - * drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc() - * drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock() - * IDs: include/drm/drm_pciids.h - * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0 - * - * Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the - * hardware that should be fixed on future ASICs. The symptom of this is that - * once the accerlated driver loads, Windows guests will bsod on subsequent - * attmpts to load the driver, such as after VM reset or shutdown/restart. To - * work around this, we do an AMD specific PCI config reset, followed by an SMC - * reset. The PCI config reset only works if SMC firmware is running, so we - * have a dependency on the state of the device as to whether this reset will - * be effective. There are still cases where we won't be able to kick the - * device into working, but this greatly improves the usability overall. The - * config reset magic is relatively common on AMD GPUs, but the setup and SMC - * poking is largely ASIC specific. - */ -static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev) -{ - uint32_t clk, pc_c; - - /* - * Registers 200h and 204h are index and data registers for accessing - * indirect configuration registers within the device. - */ - vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4); - clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4); - vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4); - pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4); - - return (!(clk & 1) && (0x20100 <= pc_c)); -} - -/* - * The scope of a config reset is controlled by a mode bit in the misc register - * and a fuse, exposed as a bit in another register. The fuse is the default - * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula - * scope = !(misc ^ fuse), where the resulting scope is defined the same as - * the fuse. A truth table therefore tells us that if misc == fuse, we need - * to flip the value of the bit in the misc register. - */ -static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev) -{ - uint32_t misc, fuse; - bool a, b; - - vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4); - fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4); - b = fuse & 64; - - vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4); - misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4); - a = misc & 2; - - if (a == b) { - vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4); - vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */ - } -} - -static int vfio_radeon_reset(VFIOPCIDevice *vdev) -{ - PCIDevice *pdev = &vdev->pdev; - int i, ret = 0; - uint32_t data; - - /* Defer to a kernel implemented reset */ - if (vdev->vbasedev.reset_works) { - return -ENODEV; - } - - /* Enable only memory BAR access */ - vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2); - - /* Reset only works if SMC firmware is loaded and running */ - if (!vfio_radeon_smc_is_running(vdev)) { - ret = -EINVAL; - goto out; - } - - /* Make sure only the GFX function is reset */ - vfio_radeon_set_gfx_only_reset(vdev); - - /* AMD PCI config reset */ - vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4); - usleep(100); - - /* Read back the memory size to make sure we're out of reset */ - for (i = 0; i < 100000; i++) { - if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) { - break; - } - usleep(1); - } - - /* Reset SMC */ - vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4); - data = vfio_region_read(&vdev->bars[5].region, 0x204, 4); - data |= 1; - vfio_region_write(&vdev->bars[5].region, 0x204, data, 4); - - /* Disable SMC clock */ - vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4); - data = vfio_region_read(&vdev->bars[5].region, 0x204, 4); - data |= 1; - vfio_region_write(&vdev->bars[5].region, 0x204, data, 4); - -out: - /* Restore PCI command register */ - vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2); - - return ret; -} - -static void vfio_setup_resetfn(VFIOPCIDevice *vdev) -{ - PCIDevice *pdev = &vdev->pdev; - uint16_t vendor, device; - - vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); - device = pci_get_word(pdev->config + PCI_DEVICE_ID); - - switch (vendor) { - case 0x1002: - switch (device) { - /* Bonaire */ - case 0x6649: /* Bonaire [FirePro W5100] */ - case 0x6650: - case 0x6651: - case 0x6658: /* Bonaire XTX [Radeon R7 260X] */ - case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */ - case 0x665d: /* Bonaire [Radeon R7 200 Series] */ - /* Hawaii */ - case 0x67A0: /* Hawaii XT GL [FirePro W9100] */ - case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */ - case 0x67A2: - case 0x67A8: - case 0x67A9: - case 0x67AA: - case 0x67B0: /* Hawaii XT [Radeon R9 290X] */ - case 0x67B1: /* Hawaii PRO [Radeon R9 290] */ - case 0x67B8: - case 0x67B9: - case 0x67BA: - case 0x67BE: - vdev->resetfn = vfio_radeon_reset; - break; - } - break; - } -} - static int vfio_initfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); @@ -2619,7 +2463,7 @@ static int vfio_initfn(PCIDevice *pdev) vfio_register_err_notifier(vdev); vfio_register_req_notifier(vdev); - vfio_setup_resetfn(vdev); + vfio_setup_resetfn_quirk(vdev); return 0; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index ddaf3e9b95..e6959529a9 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -148,5 +148,6 @@ void vfio_vga_quirk_free(VFIOPCIDevice *vdev); void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr); void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr); void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr); +void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev); #endif /* HW_VFIO_VFIO_PCI_H */ diff --git a/trace-events b/trace-events index b3599988be..bc55171c6e 100644 --- a/trace-events +++ b/trace-events @@ -1585,6 +1585,13 @@ vfio_quirk_rtl8168_msix_write(const char *name, uint16_t offset, uint64_t val) " vfio_quirk_rtl8168_msix_read(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table read[0x%x]: 0x%"PRIx64 vfio_quirk_rtl8168_probe(const char *name) "%s" +vfio_quirk_ati_bonaire_reset_skipped(const char *name) "%s" +vfio_quirk_ati_bonaire_reset_no_smc(const char *name) "%s" +vfio_quirk_ati_bonaire_reset_timeout(const char *name) "%s" +vfio_quirk_ati_bonaire_reset_done(const char *name) "%s" +vfio_quirk_ati_bonaire_reset(const char *name) "%s" + + # hw/vfio/vfio-common.c vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64 From ff635e3775447b7e797f1bad8cf33403199faba1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:49 -0600 Subject: [PATCH 18/19] vfio/pci: Cache vendor and device ID Simplify access to commonly referenced PCI vendor and device ID by caching it on the VFIOPCIDevice struct. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 18 ++++-------------- hw/vfio/pci.c | 10 +++++----- hw/vfio/pci.h | 2 ++ 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 9b51a64816..3717e01a42 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -19,12 +19,8 @@ /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device) { - PCIDevice *pdev = &vdev->pdev; - - return (vendor == PCI_ANY_ID || - vendor == pci_get_word(pdev->config + PCI_VENDOR_ID)) && - (device == PCI_ANY_ID || - device == pci_get_word(pdev->config + PCI_DEVICE_ID)); + return (vendor == PCI_ANY_ID || vendor == vdev->vendor_id) && + (device == PCI_ANY_ID || device == vdev->device_id); } static bool vfio_is_vga(VFIOPCIDevice *vdev) @@ -1178,15 +1174,9 @@ out: void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; - uint16_t vendor, device; - - vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); - device = pci_get_word(pdev->config + PCI_DEVICE_ID); - - switch (vendor) { + switch (vdev->vendor_id) { case 0x1002: - switch (device) { + switch (vdev->device_id) { /* Bonaire */ case 0x6649: /* Bonaire [FirePro W5100] */ case 0x6650: diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index c60a6a7641..b944165333 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1221,17 +1221,14 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev) * specific quirk if the device is known or we have a broken configuration. */ if (msix->pba_offset >= vdev->bars[msix->pba_bar].region.size) { - PCIDevice *pdev = &vdev->pdev; - uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); - uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID); - /* * Chelsio T5 Virtual Function devices are encoded as 0x58xx for T5 * adapters. The T5 hardware returns an incorrect value of 0x8000 for * the VF PBA offset while the BAR itself is only 8k. The correct value * is 0x1000, so we hard code that here. */ - if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) { + if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO && + (vdev->device_id & 0xff00) == 0x5800) { msix->pba_offset = 0x1000; } else { error_report("vfio: Hardware reports invalid configuration, " @@ -2407,6 +2404,9 @@ static int vfio_initfn(PCIDevice *pdev) /* QEMU can choose to expose the ROM or not */ memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); + vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); + vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); + /* QEMU can change multi-function devices to single function, or reverse */ vdev->emulated_config_bits[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_MULTI_FUNCTION; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index e6959529a9..797e083ca3 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); + uint16_t vendor_id; + uint16_t device_id; uint32_t features; #define VFIO_FEATURE_ENABLE_VGA_BIT 0 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) From 89dcccc5931cc8afc2ccc7cd378695165768148b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 23 Sep 2015 13:04:49 -0600 Subject: [PATCH 19/19] vfio/pci: Add emulated PCI IDs Specifying an emulated PCI vendor/device ID can be useful for testing various quirk paths, even though the behavior and functionality of the device with bogus IDs is fully unsupportable. We need to use a uint32_t for the vendor/device IDs, even though the registers themselves are only 16-bit in order to be able to determine whether the value is valid and user set. The same support is added for subsystem vendor/device ID, though these have the possibility of being useful and supported for more than a testing tool. An emulated platform might want to impose their own subsystem IDs or at least hide the physical subsystem ID. Windows guests will often reinstall drivers due to a change in subsystem IDs, something that VM users may want to avoid. Of course careful attention would be required to ensure that guest drivers do not rely on the subsystem ID as a basis for device driver quirks. All of these options are added using the standard experimental option prefix and should not be considered stable. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 2 -- hw/vfio/pci.c | 55 ++++++++++++++++++++++++++++++++++++++++++-- hw/vfio/pci.h | 8 +++++-- trace-events | 4 ++++ 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 3717e01a42..2bdaef19f1 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -14,8 +14,6 @@ #include "trace.h" #include "qemu/range.h" -#define PCI_ANY_ID (~0) - /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device) { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index b944165333..dcabb6d3b3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2404,8 +2404,53 @@ static int vfio_initfn(PCIDevice *pdev) /* QEMU can choose to expose the ROM or not */ memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); - vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); - vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); + /* + * The PCI spec reserves vendor ID 0xffff as an invalid value. The + * device ID is managed by the vendor and need only be a 16-bit value. + * Allow any 16-bit value for subsystem so they can be hidden or changed. + */ + if (vdev->vendor_id != PCI_ANY_ID) { + if (vdev->vendor_id >= 0xffff) { + error_report("vfio: Invalid PCI vendor ID provided"); + return -EINVAL; + } + vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); + trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id); + } else { + vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); + } + + if (vdev->device_id != PCI_ANY_ID) { + if (vdev->device_id > 0xffff) { + error_report("vfio: Invalid PCI device ID provided"); + return -EINVAL; + } + vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); + trace_vfio_pci_emulated_device_id(vdev->vbasedev.name, vdev->device_id); + } else { + vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); + } + + if (vdev->sub_vendor_id != PCI_ANY_ID) { + if (vdev->sub_vendor_id > 0xffff) { + error_report("vfio: Invalid PCI subsystem vendor ID provided"); + return -EINVAL; + } + vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, + vdev->sub_vendor_id, ~0); + trace_vfio_pci_emulated_sub_vendor_id(vdev->vbasedev.name, + vdev->sub_vendor_id); + } + + if (vdev->sub_device_id != PCI_ANY_ID) { + if (vdev->sub_device_id > 0xffff) { + error_report("vfio: Invalid PCI subsystem device ID provided"); + return -EINVAL; + } + vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); + trace_vfio_pci_emulated_sub_device_id(vdev->vbasedev.name, + vdev->sub_device_id); + } /* QEMU can change multi-function devices to single function, or reverse */ vdev->emulated_config_bits[PCI_HEADER_TYPE] = @@ -2560,6 +2605,12 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false), DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false), DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice, + sub_vendor_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, + sub_device_id, PCI_ANY_ID), /* * TODO - support passed fds... is this necessary? * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 797e083ca3..f004d52b69 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -20,6 +20,8 @@ #include "qemu/queue.h" #include "qemu/timer.h" +#define PCI_ANY_ID (~0) + struct VFIOPCIDevice; typedef struct VFIOQuirk { @@ -116,8 +118,10 @@ typedef struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); - uint16_t vendor_id; - uint16_t device_id; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; + uint32_t sub_device_id; uint32_t features; #define VFIO_FEATURE_ENABLE_VGA_BIT 0 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) diff --git a/trace-events b/trace-events index bc55171c6e..25c53e0c7c 100644 --- a/trace-events +++ b/trace-events @@ -1560,6 +1560,10 @@ vfio_initfn(const char *name, int group_id) " (%s) group %d" vfio_pci_reset(const char *name) " (%s)" vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET" vfio_pci_reset_pm(const char *name) "%s PCI PM Reset" +vfio_pci_emulated_vendor_id(const char *name, uint16_t val) "%s %04x" +vfio_pci_emulated_device_id(const char *name, uint16_t val) "%s %04x" +vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s %04x" +vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s %04x" # hw/vfio/pci-quirks. vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x"