From 385f57cf9ad7566aa4fc125370f14902ea62aae5 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 19 Dec 2014 14:40:06 -0700 Subject: [PATCH 01/14] vfio: migration to trace points This patch removes all DPRINTF and replace them by trace points. A few DPRINTF used in error cases were transformed into error_report. Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/misc/vfio.c | 403 +++++++++++++++++++++++++------------------------ trace-events | 75 ++++++++- 2 files changed, 280 insertions(+), 198 deletions(-) diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index a315c3acdc..6c36c8b687 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -40,15 +40,7 @@ #include "sysemu/kvm.h" #include "sysemu/sysemu.h" #include "hw/misc/vfio.h" - -/* #define DEBUG_VFIO */ -#ifdef DEBUG_VFIO -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif +#include "trace.h" /* Extra debugging, trap acceleration paths for more logging */ #define VFIO_ALLOW_MMAP 1 @@ -365,9 +357,9 @@ static void vfio_intx_interrupt(void *opaque) return; } - DPRINTF("%s(%04x:%02x:%02x.%x) Pin %c\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, - 'A' + vdev->intx.pin); + trace_vfio_intx_interrupt(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + 'A' + vdev->intx.pin); vdev->intx.pending = true; pci_irq_assert(&vdev->pdev); @@ -384,8 +376,8 @@ static void vfio_eoi(VFIODevice *vdev) return; } - DPRINTF("%s(%04x:%02x:%02x.%x) EOI\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_eoi(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -454,9 +446,8 @@ static void vfio_enable_intx_kvm(VFIODevice *vdev) vdev->intx.kvm_accel = true; - DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel enabled\n", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_enable_intx_kvm(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); return; @@ -508,9 +499,8 @@ static void vfio_disable_intx_kvm(VFIODevice *vdev) /* If we've missed an event, let it re-fire through QEMU */ vfio_unmask_intx(vdev); - DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel disabled\n", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_disable_intx_kvm(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); #endif } @@ -529,9 +519,9 @@ static void vfio_update_irq(PCIDevice *pdev) return; /* Nothing changed */ } - DPRINTF("%s(%04x:%02x:%02x.%x) IRQ moved %d -> %d\n", __func__, - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, vdev->intx.route.irq, route.irq); + trace_vfio_update_irq(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + vdev->intx.route.irq, route.irq); vfio_disable_intx_kvm(vdev); @@ -606,8 +596,8 @@ static int vfio_enable_intx(VFIODevice *vdev) vdev->interrupt = VFIO_INT_INTx; - DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_enable_intx(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); return 0; } @@ -629,8 +619,8 @@ static void vfio_disable_intx(VFIODevice *vdev) vdev->interrupt = VFIO_INT_NONE; - DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_disable_intx(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); } /* @@ -657,9 +647,9 @@ static void vfio_msi_interrupt(void *opaque) abort(); } - DPRINTF("%s(%04x:%02x:%02x.%x) vector %d 0x%"PRIx64"/0x%x\n", __func__, - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, nr, msg.address, msg.data); + trace_vfio_msi_interrupt(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + nr, msg.address, msg.data); #endif if (vdev->interrupt == VFIO_INT_MSIX) { @@ -766,9 +756,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, VFIOMSIVector *vector; int ret; - DPRINTF("%s(%04x:%02x:%02x.%x) vector %d used\n", __func__, - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, nr); + trace_vfio_msix_vector_do_use(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + nr); vector = &vdev->msi_vectors[nr]; @@ -854,9 +844,9 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; - DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__, - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, nr); + trace_vfio_msix_vector_release(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + nr); /* * There are still old guests that mask and unmask vectors on every @@ -919,8 +909,8 @@ static void vfio_enable_msix(VFIODevice *vdev) error_report("vfio: msix_set_vector_notifiers failed"); } - DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_enable_msix(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); } static void vfio_enable_msi(VFIODevice *vdev) @@ -996,9 +986,9 @@ retry: return; } - DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__, - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, vdev->nr_vectors); + trace_vfio_enable_msi(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + vdev->nr_vectors); } static void vfio_disable_msi_common(VFIODevice *vdev) @@ -1048,8 +1038,8 @@ static void vfio_disable_msix(VFIODevice *vdev) vfio_disable_msi_common(vdev); - DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_disable_msix(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); } static void vfio_disable_msi(VFIODevice *vdev) @@ -1057,8 +1047,8 @@ static void vfio_disable_msi(VFIODevice *vdev) vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX); vfio_disable_msi_common(vdev); - DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_disable_msi(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); } static void vfio_update_msi(VFIODevice *vdev) @@ -1116,10 +1106,9 @@ static void vfio_bar_write(void *opaque, hwaddr addr, { VFIODevice *vdev = container_of(bar, VFIODevice, bars[bar->nr]); - DPRINTF("%s(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ", %d)\n", __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, bar->nr, addr, - data, size); + trace_vfio_bar_write(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + region->nr, addr, data, size); } #endif @@ -1171,10 +1160,9 @@ static uint64_t vfio_bar_read(void *opaque, { VFIODevice *vdev = container_of(bar, VFIODevice, bars[bar->nr]); - DPRINTF("%s(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx - ", %d) = 0x%"PRIx64"\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, - bar->nr, addr, size, data); + trace_vfio_bar_read(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + region->nr, addr, size, data); } #endif @@ -1205,11 +1193,11 @@ static void vfio_pci_load_rom(VFIODevice *vdev) return; } - DPRINTF("Device %04x:%02x:%02x.%x ROM:\n", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); - DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n", - (unsigned long)reg_info.size, (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_pci_load_rom(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + (unsigned long)reg_info.size, + (unsigned long)reg_info.offset, + (unsigned long)reg_info.flags); vdev->rom_size = size = reg_info.size; vdev->rom_offset = reg_info.offset; @@ -1280,9 +1268,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) break; } - DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n", - __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, addr, size, data); + trace_vfio_rom_read(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + addr, size, data); return data; } @@ -1378,8 +1366,9 @@ static void vfio_pci_size_rom(VFIODevice *vdev) } } - DPRINTF("%04x:%02x:%02x.%x ROM size 0x%x\n", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, size); + trace_vfio_pci_size_rom(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + size); snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom", vdev->host.domain, vdev->host.bus, vdev->host.slot, @@ -1428,8 +1417,7 @@ static void vfio_vga_write(void *opaque, hwaddr addr, __func__, region->offset + addr, data, size); } - DPRINTF("%s(0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n", - __func__, region->offset + addr, data, size); + trace_vfio_vga_write(region->offset + addr, data, size); } static uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size) @@ -1466,8 +1454,7 @@ static uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size) break; } - DPRINTF("%s(0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n", - __func__, region->offset + addr, size, data); + trace_vfio_vga_read(region->offset + addr, size, data); return data; } @@ -1514,10 +1501,13 @@ static uint64_t vfio_generic_window_quirk_read(void *opaque, data = vfio_pci_read_config(&vdev->pdev, quirk->data.address_val + offset, size); - DPRINTF("%s read(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx", %d) = 0x%" - PRIx64"\n", memory_region_name(&quirk->mem), vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, - quirk->data.bar, addr, size, data); + trace_vfio_generic_window_quirk_read(memory_region_name(&quirk->mem), + vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function, + quirk->data.bar, + addr, size, data); } else { data = vfio_bar_read(&vdev->bars[quirk->data.bar], addr + quirk->data.base_offset, size); @@ -1563,10 +1553,14 @@ static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, vfio_pci_write_config(&vdev->pdev, quirk->data.address_val + offset, data, size); - DPRINTF("%s write(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx", 0x%" - PRIx64", %d)\n", memory_region_name(&quirk->mem), - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, quirk->data.bar, addr, data, size); + + trace_vfio_generic_window_quirk_write(memory_region_name(&quirk->mem), + vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function, + quirk->data.bar, + addr, data, size); return; } @@ -1599,10 +1593,13 @@ static uint64_t vfio_generic_quirk_read(void *opaque, data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); - DPRINTF("%s read(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx", %d) = 0x%" - PRIx64"\n", memory_region_name(&quirk->mem), vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, - quirk->data.bar, addr + base, size, data); + trace_vfio_generic_quirk_read(memory_region_name(&quirk->mem), + vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function, + quirk->data.bar, + addr + base, size, data); } else { data = vfio_bar_read(&vdev->bars[quirk->data.bar], addr + base, size); } @@ -1628,10 +1625,13 @@ static void vfio_generic_quirk_write(void *opaque, hwaddr addr, vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); - DPRINTF("%s write(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx", 0x%" - PRIx64", %d)\n", memory_region_name(&quirk->mem), - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, quirk->data.bar, addr + base, data, size); + trace_vfio_generic_quirk_write(memory_region_name(&quirk->mem), + vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function, + quirk->data.bar, + addr + base, data, size); } else { vfio_bar_write(&vdev->bars[quirk->data.bar], addr + base, data, size); } @@ -1663,7 +1663,7 @@ static uint64_t vfio_ati_3c3_quirk_read(void *opaque, uint64_t data = vfio_pci_read_config(&vdev->pdev, PCI_BASE_ADDRESS_0 + (4 * 4) + 1, size); - DPRINTF("%s(0x3c3, 1) = 0x%"PRIx64"\n", __func__, data); + trace_vfio_ati_3c3_quirk_read(data); return data; } @@ -1701,9 +1701,8 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIODevice *vdev) QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); - DPRINTF("Enabled ATI/AMD quirk 0x3c3 BAR4for device %04x:%02x:%02x.%x\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_vga_probe_ati_3c3_quirk(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); } /* @@ -1744,9 +1743,10 @@ static void vfio_probe_ati_bar4_window_quirk(VFIODevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - DPRINTF("Enabled ATI/AMD BAR4 window quirk for device %04x:%02x:%02x.%x\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_probe_ati_bar4_window_quirk(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function); } #define PCI_VENDOR_ID_REALTEK 0x10ec @@ -1783,9 +1783,10 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, switch (addr) { case 4: /* address */ if (quirk->data.flags) { - DPRINTF("%s fake read(%04x:%02x:%02x.%d)\n", - memory_region_name(&quirk->mem), vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_rtl8168_window_quirk_read_fake( + memory_region_name(&quirk->mem), + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); return quirk->data.address_match ^ 0x10000000U; } @@ -1794,9 +1795,11 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, if (quirk->data.flags) { uint64_t val; - DPRINTF("%s MSI-X table read(%04x:%02x:%02x.%d)\n", - memory_region_name(&quirk->mem), vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_rtl8168_window_quirk_read_table( + memory_region_name(&quirk->mem), + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function + ); if (!(vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { return 0; @@ -1809,9 +1812,10 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, } } - DPRINTF("%s direct read(%04x:%02x:%02x.%d)\n", - memory_region_name(&quirk->mem), vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_rtl8168_window_quirk_read_direct( + memory_region_name(&quirk->mem), + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); return vfio_bar_read(&vdev->bars[quirk->data.bar], addr + 0x70, size); } @@ -1828,9 +1832,10 @@ static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, if (data & 0x10000000U && vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { - DPRINTF("%s MSI-X table write(%04x:%02x:%02x.%d)\n", - memory_region_name(&quirk->mem), vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_rtl8168_window_quirk_write_table( + memory_region_name(&quirk->mem), + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); io_mem_write(&vdev->pdev.msix_table_mmio, (hwaddr)(quirk->data.address_match & 0xfff), @@ -1849,9 +1854,10 @@ static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, break; } - DPRINTF("%s direct write(%04x:%02x:%02x.%d)\n", - memory_region_name(&quirk->mem), vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_rtl8168_window_quirk_write_direct( + memory_region_name(&quirk->mem), + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); vfio_bar_write(&vdev->bars[quirk->data.bar], addr + 0x70, data, size); } @@ -1888,9 +1894,10 @@ static void vfio_probe_rtl8168_bar2_window_quirk(VFIODevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - DPRINTF("Enabled RTL8168 BAR2 window quirk for device %04x:%02x:%02x.%x\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_probe_rtl8168_bar2_window_quirk(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function); } /* * Trap the BAR2 MMIO window to config space as well. @@ -1922,9 +1929,10 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIODevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - DPRINTF("Enabled ATI/AMD BAR2 0x4000 quirk for device %04x:%02x:%02x.%x\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_probe_ati_bar2_4000_quirk(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function); } /* @@ -1970,7 +1978,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) { data = vfio_pci_read_config(pdev, quirk->data.address_val, size); - DPRINTF("%s(0x3d0, %d) = 0x%"PRIx64"\n", __func__, size, data); + trace_vfio_nvidia_3d0_quirk_read(size, data); } quirk->data.flags = NV_3D0_NONE; @@ -2013,7 +2021,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, quirk->data.flags = NV_3D0_NONE; if (addr == quirk->data.data_offset) { vfio_pci_write_config(pdev, quirk->data.address_val, data, size); - DPRINTF("%s(0x3d0, 0x%"PRIx64", %d)\n", __func__, data, size); + trace_vfio_nvidia_3d0_quirk_write(data, size); return; } break; @@ -2057,9 +2065,10 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIODevice *vdev) QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); - DPRINTF("Enabled NVIDIA VGA 0x3d0 quirk for device %04x:%02x:%02x.%x\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function); } /* @@ -2147,9 +2156,10 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIODevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - DPRINTF("Enabled NVIDIA BAR5 window quirk for device %04x:%02x:%02x.%x\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_probe_nvidia_bar5_window_quirk(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function); } static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, @@ -2219,9 +2229,10 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIODevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - DPRINTF("Enabled NVIDIA BAR0 0x88000 quirk for device %04x:%02x:%02x.%x\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function); } /* @@ -2238,7 +2249,7 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIODevice *vdev, int nr) } /* Log the chipset ID */ - DPRINTF("Nvidia NV%02x\n", + trace_vfio_probe_nvidia_bar0_1800_quirk_id( (unsigned int)(vfio_bar_read(&vdev->bars[0], 0, 4) >> 20) & 0xff); quirk = g_malloc0(sizeof(*quirk)); @@ -2257,9 +2268,10 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIODevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - DPRINTF("Enabled NVIDIA BAR0 0x1800 quirk for device %04x:%02x:%02x.%x\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function); } /* @@ -2345,9 +2357,9 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) val = (emu_val & emu_bits) | (phys_val & ~emu_bits); - DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__, - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, addr, len, val); + trace_vfio_pci_read_config(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + addr, len, val); return val; } @@ -2358,9 +2370,9 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); uint32_t val_le = cpu_to_le32(val); - DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, 0x%x, len=0x%x)\n", __func__, - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, addr, val, len); + trace_vfio_pci_write_config(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + addr, val, len); /* Write everything to VFIO, let it filter out what we can't write */ if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != len) { @@ -2422,7 +2434,7 @@ static int vfio_dma_unmap(VFIOContainer *container, }; if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno); + error_report("VFIO_UNMAP_DMA: %d\n", -errno); return -errno; } @@ -2455,7 +2467,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, return 0; } - DPRINTF("VFIO_MAP_DMA: %d\n", -errno); + error_report("VFIO_MAP_DMA: %d\n", -errno); return -errno; } @@ -2483,8 +2495,8 @@ static void vfio_iommu_map_notify(Notifier *n, void *data) void *vaddr; int ret; - DPRINTF("iommu map @ %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", - iotlb->iova, iotlb->iova + iotlb->addr_mask); + trace_vfio_iommu_map_notify(iotlb->iova, + iotlb->iova + iotlb->addr_mask); /* * The IOMMU TLB entry we have just covers translation through @@ -2495,7 +2507,7 @@ static void vfio_iommu_map_notify(Notifier *n, void *data) iotlb->translated_addr, &xlat, &len, iotlb->perm & IOMMU_WO); if (!memory_region_is_ram(mr)) { - DPRINTF("iommu map to non memory area %"HWADDR_PRIx"\n", + error_report("iommu map to non memory area %"HWADDR_PRIx"\n", xlat); return; } @@ -2504,7 +2516,7 @@ static void vfio_iommu_map_notify(Notifier *n, void *data) * check that it did not truncate too much. */ if (len & iotlb->addr_mask) { - DPRINTF("iommu has granularity incompatible with target AS\n"); + error_report("iommu has granularity incompatible with target AS\n"); return; } @@ -2542,7 +2554,7 @@ static void vfio_listener_region_add(MemoryListener *listener, int ret; if (vfio_listener_skipped_section(section)) { - DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n", + trace_vfio_listener_region_add_skip( section->offset_within_address_space, section->offset_within_address_space + int128_get64(int128_sub(section->size, int128_one()))); @@ -2569,8 +2581,8 @@ static void vfio_listener_region_add(MemoryListener *listener, if (memory_region_is_iommu(section->mr)) { VFIOGuestIOMMU *giommu; - DPRINTF("region_add [iommu] %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", - iova, int128_get64(int128_sub(llend, int128_one()))); + trace_vfio_listener_region_add_iommu(iova, + int128_get64(int128_sub(llend, int128_one()))); /* * FIXME: We should do some checking to see if the * capabilities of the host VFIO IOMMU are adequate to model @@ -2612,8 +2624,7 @@ static void vfio_listener_region_add(MemoryListener *listener, section->offset_within_region + (iova - section->offset_within_address_space); - DPRINTF("region_add [ram] %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n", - iova, end - 1, vaddr); + trace_vfio_listener_region_add_ram(iova, end - 1, vaddr); ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly); if (ret) { @@ -2645,7 +2656,7 @@ static void vfio_listener_region_del(MemoryListener *listener, int ret; if (vfio_listener_skipped_section(section)) { - DPRINTF("SKIPPING region_del %"HWADDR_PRIx" - %"PRIx64"\n", + trace_vfio_listener_region_del_skip( section->offset_within_address_space, section->offset_within_address_space + int128_get64(int128_sub(section->size, int128_one()))); @@ -2687,8 +2698,7 @@ static void vfio_listener_region_del(MemoryListener *listener, return; } - DPRINTF("region_del %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", - iova, end - 1); + trace_vfio_listener_region_del(iova, end - 1); ret = vfio_dma_unmap(container, iova, end - iova); memory_region_unref(section->mr); @@ -2743,8 +2753,8 @@ static int vfio_setup_msi(VFIODevice *vdev, int pos) msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); - DPRINTF("%04x:%02x:%02x.%x PCI MSI CAP @0x%x\n", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, pos); + trace_vfio_setup_msi(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, pos); ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit); if (ret < 0) { @@ -2804,11 +2814,11 @@ static int vfio_early_setup_msix(VFIODevice *vdev) vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; - DPRINTF("%04x:%02x:%02x.%x " - "PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, pos, vdev->msix->table_bar, - vdev->msix->table_offset, vdev->msix->entries); + trace_vfio_early_setup_msix(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + pos, vdev->msix->table_bar, + vdev->msix->table_offset, + vdev->msix->entries); return 0; } @@ -3177,9 +3187,8 @@ static void vfio_check_pcie_flr(VFIODevice *vdev, uint8_t pos) uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP); if (cap & PCI_EXP_DEVCAP_FLR) { - DPRINTF("%04x:%02x:%02x.%x Supports FLR via PCIe cap\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_check_pcie_flr(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); vdev->has_flr = true; } } @@ -3189,9 +3198,8 @@ static void vfio_check_pm_reset(VFIODevice *vdev, uint8_t pos) uint16_t csr = pci_get_word(vdev->pdev.config + pos + PCI_PM_CTRL); if (!(csr & PCI_PM_CTRL_NO_SOFT_RESET)) { - DPRINTF("%04x:%02x:%02x.%x Supports PM reset\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_check_pm_reset(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); vdev->has_pm_reset = true; } } @@ -3201,9 +3209,8 @@ static void vfio_check_af_flr(VFIODevice *vdev, uint8_t pos) uint8_t cap = pci_get_byte(vdev->pdev.config + pos + PCI_AF_CAP); if ((cap & PCI_AF_CAP_TP) && (cap & PCI_AF_CAP_FLR)) { - DPRINTF("%04x:%02x:%02x.%x Supports FLR via AF cap\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_check_af_flr(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); vdev->has_flr = true; } } @@ -3354,9 +3361,9 @@ static int vfio_pci_hot_reset(VFIODevice *vdev, bool single) int ret, i, count; bool multi = false; - DPRINTF("%s(%04x:%02x:%02x.%x) %s\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, - single ? "one" : "multi"); + trace_vfio_pci_hot_reset(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, + single ? "one" : "multi"); vfio_pci_pre_reset(vdev); vdev->needs_reset = false; @@ -3387,9 +3394,10 @@ static int vfio_pci_hot_reset(VFIODevice *vdev, bool single) goto out_single; } - DPRINTF("%04x:%02x:%02x.%x: hot reset dependent devices:\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + trace_vfio_pci_hot_reset_has_dep_devices(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function); /* Verify that we have all the groups required */ for (i = 0; i < info->count; i++) { @@ -3401,7 +3409,7 @@ static int vfio_pci_hot_reset(VFIODevice *vdev, bool single) host.slot = PCI_SLOT(devices[i].devfn); host.function = PCI_FUNC(devices[i].devfn); - DPRINTF("\t%04x:%02x:%02x.%x group %d\n", host.domain, + trace_vfio_pci_hot_reset_dep_devices(host.domain, host.bus, host.slot, host.function, devices[i].group_id); if (vfio_pci_host_match(&host, &vdev->host)) { @@ -3429,7 +3437,7 @@ static int vfio_pci_hot_reset(VFIODevice *vdev, bool single) QLIST_FOREACH(tmp, &group->device_list, next) { if (vfio_pci_host_match(&host, &tmp->host)) { if (single) { - DPRINTF("vfio: found another in-use device " + error_report("vfio: found another in-use device " "%04x:%02x:%02x.%x\n", host.domain, host.bus, host.slot, host.function); ret = -EINVAL; @@ -3444,7 +3452,7 @@ static int vfio_pci_hot_reset(VFIODevice *vdev, bool single) } if (!single && !multi) { - DPRINTF("vfio: No other in-use devices for multi hot reset\n"); + error_report("vfio: No other in-use devices for multi hot reset\n"); ret = -EINVAL; goto out_single; } @@ -3478,9 +3486,11 @@ static int vfio_pci_hot_reset(VFIODevice *vdev, bool single) ret = ioctl(vdev->fd, VFIO_DEVICE_PCI_HOT_RESET, reset); g_free(reset); - DPRINTF("%04x:%02x:%02x.%x hot reset: %s\n", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, - ret ? "%m" : "Success"); + trace_vfio_pci_hot_reset_result(vdev->host.domain, + vdev->host.bus, + vdev->host.slot, + vdev->host.function, + ret ? "%m" : "Success"); out: /* Re-enable INTx on affected devices */ @@ -3587,7 +3597,7 @@ static void vfio_kvm_device_add_group(VFIOGroup *group) }; if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) { - DPRINTF("KVM_CREATE_DEVICE: %m\n"); + error_report("KVM_CREATE_DEVICE: %m\n"); return; } @@ -3794,7 +3804,7 @@ static void vfio_disconnect_container(VFIOGroup *group) container->iommu_data.release(container); } QLIST_REMOVE(container, next); - DPRINTF("vfio_disconnect_container: close container->fd\n"); + trace_vfio_disconnect_container(container->fd); close(container->fd); g_free(container); @@ -3878,7 +3888,7 @@ static void vfio_put_group(VFIOGroup *group) vfio_kvm_device_del_group(group); vfio_disconnect_container(group); QLIST_REMOVE(group, next); - DPRINTF("vfio_put_group: close group->fd\n"); + trace_vfio_put_group(group->fd); close(group->fd); g_free(group); @@ -3914,8 +3924,8 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev) goto error; } - DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name, - dev_info.flags, dev_info.num_regions, dev_info.num_irqs); + trace_vfio_get_device_irq(name, dev_info.flags, + dev_info.num_regions, dev_info.num_irqs); if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) { error_report("vfio: Um, this isn't a PCI device"); @@ -3944,10 +3954,10 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev) goto error; } - DPRINTF("Device %s region %d:\n", name, i); - DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n", - (unsigned long)reg_info.size, (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_get_device_region(name, i, + (unsigned long)reg_info.size, + (unsigned long)reg_info.offset, + (unsigned long)reg_info.flags); vdev->bars[i].flags = reg_info.flags; vdev->bars[i].size = reg_info.size; @@ -3965,10 +3975,9 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev) goto error; } - DPRINTF("Device %s config:\n", name); - DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n", - (unsigned long)reg_info.size, (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_get_device_config(name, (unsigned long)reg_info.size, + (unsigned long)reg_info.offset, + (unsigned long)reg_info.flags); vdev->config_size = reg_info.size; if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { @@ -4021,7 +4030,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev) ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure: %m\n"); + trace_vfio_get_device_get_irq_info_failure(); ret = 0; } else if (irq_info.count == 1) { vdev->pci_aer = true; @@ -4045,7 +4054,7 @@ static void vfio_put_device(VFIODevice *vdev) { QLIST_REMOVE(vdev, next); vdev->group = NULL; - DPRINTF("vfio_put_device: close vdev->fd\n"); + trace_vfio_put_device(vdev->fd); close(vdev->fd); if (vdev->msix) { g_free(vdev->msix); @@ -4194,8 +4203,8 @@ static int vfio_initfn(PCIDevice *pdev) return -errno; } - DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, groupid); + trace_vfio_initfn(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function, groupid); group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev)); if (!group) { @@ -4335,15 +4344,15 @@ static void vfio_pci_reset(DeviceState *dev) PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev); VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); - DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_pci_reset(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); vfio_pci_pre_reset(vdev); if (vdev->reset_works && (vdev->has_flr || !vdev->has_pm_reset) && !ioctl(vdev->fd, VFIO_DEVICE_RESET)) { - DPRINTF("%04x:%02x:%02x.%x FLR/VFIO_DEVICE_RESET\n", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_pci_reset_flr(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); goto post_reset; } @@ -4355,8 +4364,8 @@ static void vfio_pci_reset(DeviceState *dev) /* If nothing else works and the device supports PM reset, use it */ if (vdev->reset_works && vdev->has_pm_reset && !ioctl(vdev->fd, VFIO_DEVICE_RESET)) { - DPRINTF("%04x:%02x:%02x.%x PCI PM Reset\n", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + trace_vfio_pci_reset_pm(vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); goto post_reset; } diff --git a/trace-events b/trace-events index b5722ea8ae..cfe2db4255 100644 --- a/trace-events +++ b/trace-events @@ -1351,7 +1351,80 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x" pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x" -# hw/acpi/memory_hotplug.c +# hw/vfio/vfio-pci.c +vfio_intx_interrupt(int domain, int bus, int slot, int fn, char line) "(%04x:%02x:%02x.%x) Pin %c" +vfio_eoi(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x) EOI" +vfio_enable_intx_kvm(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x) KVM INTx accel enabled" +vfio_disable_intx_kvm(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x) KVM INTx accel disabled" +vfio_update_irq(int domain, int bus, int slot, int fn, int new_irq, int target_irq) " (%04x:%02x:%02x.%x) IRQ moved %d -> %d" +vfio_enable_intx(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" +vfio_disable_intx(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" +vfio_msi_interrupt(int domain, int bus, int slot, int fn, int index, uint64_t addr, int data) "(%04x:%02x:%02x.%x) vector %d 0x%"PRIx64"/0x%x" +vfio_msix_vector_do_use(int domain, int bus, int slot, int fn, int index) "(%04x:%02x:%02x.%x) vector %d used" +vfio_msix_vector_release(int domain, int bus, int slot, int fn, int index) "(%04x:%02x:%02x.%x) vector %d released" +vfio_enable_msix(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" +vfio_enable_msi(int domain, int bus, int slot, int fn, int nr_vectors) "(%04x:%02x:%02x.%x) Enabled %d MSI vectors" +vfio_disable_msix(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" +vfio_disable_msi(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" +vfio_pci_load_rom(int domain, int bus, int slot, int fn, unsigned long size, unsigned long offset, unsigned long flags) "Device %04x:%02x:%02x.%x ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" +vfio_rom_read(int domain, int bus, int slot, int fn, uint64_t addr, int size, uint64_t data) "(%04x:%02x:%02x.%x, 0x%"PRIx64", 0x%x) = 0x%"PRIx64 +vfio_pci_size_rom(int domain, int bus, int slot, int fn, int size) "%04x:%02x:%02x.%x ROM size 0x%x" +vfio_vga_write(uint64_t addr, uint64_t data, int size) "(0x%"PRIx64", 0x%"PRIx64", %d)" +vfio_vga_read(uint64_t addr, int size, uint64_t data) "(0x%"PRIx64", %d) = 0x%"PRIx64 +vfio_generic_window_quirk_read(const char * region_name, int domain, int bus, int slot, int fn, int index, uint64_t addr, int size, uint64_t data) "%s read(%04x:%02x:%02x.%x:BAR%d+0x%"PRIx64", %d) = 0x%"PRIx64 +vfio_generic_window_quirk_write(const char * region_name, int domain, int bus, int slot, int fn, int index, uint64_t addr, uint64_t data, int size) "%s write(%04x:%02x:%02x.%x:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d)" +vfio_generic_quirk_read(const char * region_name, int domain, int bus, int slot, int fn, int index, uint64_t addr, int size, uint64_t data) "%s read(%04x:%02x:%02x.%x:BAR%d+0x%"PRIx64", %d) = 0x%"PRIx64 +vfio_generic_quirk_write(const char * region_name, int domain, int bus, int slot, int fn, int index, uint64_t addr, uint64_t data, int size) "%s write(%04x:%02x:%02x.%x:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d)" +vfio_ati_3c3_quirk_read(uint64_t data) " (0x3c3, 1) = 0x%"PRIx64 +vfio_vga_probe_ati_3c3_quirk(int domain, int bus, int slot, int fn) "Enabled ATI/AMD quirk 0x3c3 BAR4 for device %04x:%02x:%02x.%x" +vfio_probe_ati_bar4_window_quirk(int domain, int bus, int slot, int fn) "Enabled ATI/AMD BAR4 window quirk for device %04x:%02x:%02x.%x" +vfio_rtl8168_window_quirk_read_fake(const char *region_name, int domain, int bus, int slot, int fn) "%s fake read(%04x:%02x:%02x.%d)" +vfio_rtl8168_window_quirk_read_table(const char *region_name, int domain, int bus, int slot, int fn) "%s MSI-X table read(%04x:%02x:%02x.%d)" +vfio_rtl8168_window_quirk_read_direct(const char *region_name, int domain, int bus, int slot, int fn) "%s direct read(%04x:%02x:%02x.%d)" +vfio_rtl8168_window_quirk_write_table(const char *region_name, int domain, int bus, int slot, int fn) "%s MSI-X table write(%04x:%02x:%02x.%d)" +vfio_rtl8168_window_quirk_write_direct(const char *region_name, int domain, int bus, int slot, int fn) "%s direct write(%04x:%02x:%02x.%d)" +vfio_probe_rtl8168_bar2_window_quirk(int domain, int bus, int slot, int fn) "Enabled RTL8168 BAR2 window quirk for device %04x:%02x:%02x.%x" +vfio_probe_ati_bar2_4000_quirk(int domain, int bus, int slot, int fn) "Enabled ATI/AMD BAR2 0x4000 quirk for device %04x:%02x:%02x.%x" +vfio_nvidia_3d0_quirk_read(int size, uint64_t data) " (0x3d0, %d) = 0x%"PRIx64 +vfio_nvidia_3d0_quirk_write(uint64_t data, int size) " (0x3d0, 0x%"PRIx64", %d)" +vfio_vga_probe_nvidia_3d0_quirk(int domain, int bus, int slot, int fn) "Enabled NVIDIA VGA 0x3d0 quirk for device %04x:%02x:%02x.%x" +vfio_probe_nvidia_bar5_window_quirk(int domain, int bus, int slot, int fn) "Enabled NVIDIA BAR5 window quirk for device %04x:%02x:%02x.%x" +vfio_probe_nvidia_bar0_88000_quirk(int domain, int bus, int slot, int fn) "Enabled NVIDIA BAR0 0x88000 quirk for device %04x:%02x:%02x.%x" +vfio_probe_nvidia_bar0_1800_quirk_id(int id) "Nvidia NV%02x" +vfio_probe_nvidia_bar0_1800_quirk(int domain, int bus, int slot, int fn) "Enabled NVIDIA BAR0 0x1800 quirk for device %04x:%02x:%02x.%x" +vfio_pci_read_config(int domain, int bus, int slot, int fn, int addr, int len, int val) " (%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x" +vfio_pci_write_config(int domain, int bus, int slot, int fn, int addr, int val, int len) " (%04x:%02x:%02x.%x, @0x%x, 0x%x, len=0x%x)" +vfio_setup_msi(int domain, int bus, int slot, int fn, int pos) "%04x:%02x:%02x.%x PCI MSI CAP @0x%x" +vfio_early_setup_msix(int domain, int bus, int slot, int fn, int pos, int table_bar, int offset, int entries) "%04x:%02x:%02x.%x PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d" +vfio_check_pcie_flr(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x Supports FLR via PCIe cap" +vfio_check_pm_reset(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x Supports PM reset" +vfio_check_af_flr(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x Supports FLR via AF cap" +vfio_pci_hot_reset(int domain, int bus, int slot, int fn, const char *type) " (%04x:%02x:%02x.%x) %s" +vfio_pci_hot_reset_has_dep_devices(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x: hot reset dependent devices:" +vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" +vfio_pci_hot_reset_result(int domain, int bus, int slot, int fn, const char *result) "%04x:%02x:%02x.%x hot reset: %s" +vfio_get_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" +vfio_get_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" +vfio_get_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" +vfio_get_device_irq(const char *name, unsigned flags, unsigned num_regions, unsigned num_irqs) "Device %s flags: %u, regions: %u, irgs: %u" +vfio_initfn(int domain, int bus, int slot, int fn, int group_id) " (%04x:%02x:%02x.%x) group %d" +vfio_pci_reset(int domain, int bus, int slot, int fn) " (%04x:%02x:%02x.%x)" +vfio_pci_reset_flr(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x FLR/VFIO_DEVICE_RESET" +vfio_pci_reset_pm(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x PCI PM Reset" + +vfio_bar_write(int domain, int bus, int slot, int fn, int index, uint64_t addr, uint64_t data, unsigned size) " (%04x:%02x:%02x.%x:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" +vfio_bar_read(int domain, int bus, int slot, int fn, int index, uint64_t addr, unsigned size, uint64_t data) " (%04x:%02x:%02x.%x:region%d+0x%"PRIx64", %d) = 0x%"PRIx64 +vfio_iommu_map_notify(uint64_t iova_start, uint64_t iova_end) "iommu map @ %"PRIx64" - %"PRIx64 +vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add %"PRIx64" - %"PRIx64 +vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] %"PRIx64" - %"PRIx64 +vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] %"PRIx64" - %"PRIx64" [%p]" +vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del %"PRIx64" - %"PRIx64 +vfio_listener_region_del(uint64_t start, uint64_t end) "region_del %"PRIx64" - %"PRIx64 +vfio_disconnect_container(int fd) "close container->fd=%d" +vfio_put_group(int fd) "close group->fd=%d" +vfio_put_device(int fd) "close vdev->fd=%d" + +#hw/acpi/memory_hotplug.c mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 mhp_acpi_read_addr_lo(uint32_t slot, uint32_t addr) "slot[0x%"PRIx32"] addr lo: 0x%"PRIx32 mhp_acpi_read_addr_hi(uint32_t slot, uint32_t addr) "slot[0x%"PRIx32"] addr hi: 0x%"PRIx32 From c67676711c7202f48bc43d2f80125eaea355755e Mon Sep 17 00:00:00 2001 From: Frank Blaschka Date: Fri, 19 Dec 2014 14:40:06 -0700 Subject: [PATCH 02/14] vfio: fix adding memory listener to the right address space Depending on the device, container->space->as contains the valid AddressSpace. Using address_space_memory breaks devices sitting behind an iommu (and using a separate address space). Signed-off-by: Frank Blaschka Signed-off-by: Alex Williamson --- hw/misc/vfio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 6c36c8b687..51844cf70d 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -3713,7 +3713,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) container->iommu_data.release = vfio_listener_release; memory_listener_register(&container->iommu_data.type1.listener, - &address_space_memory); + container->space->as); if (container->iommu_data.type1.error) { ret = container->iommu_data.type1.error; From cf7087db10e2dc112e02782f1d1eb56ec42c728b Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 19 Dec 2014 15:24:06 -0700 Subject: [PATCH 03/14] vfio: move hw/misc/vfio.c to hw/vfio/pci.c Move vfio.h into include/hw/vfio This is done in preparation for the addition of VFIO platform device support. Signed-off-by: Kim Phillips Signed-off-by: Alex Williamson --- LICENSE | 2 +- MAINTAINERS | 2 +- hw/Makefile.objs | 1 + hw/misc/Makefile.objs | 1 - hw/ppc/spapr_pci_vfio.c | 2 +- hw/vfio/Makefile.objs | 3 +++ hw/{misc/vfio.c => vfio/pci.c} | 2 +- include/hw/{misc => vfio}/vfio.h | 0 8 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 hw/vfio/Makefile.objs rename hw/{misc/vfio.c => vfio/pci.c} (99%) rename include/hw/{misc => vfio}/vfio.h (100%) diff --git a/LICENSE b/LICENSE index da70e94932..0e0b4b9553 100644 --- a/LICENSE +++ b/LICENSE @@ -11,7 +11,7 @@ option) any later version. As of July 2013, contributions under version 2 of the GNU General Public License (and no later version) are only accepted for the following files -or directories: bsd-user/, linux-user/, hw/misc/vfio.c, hw/xen/xen_pt*. +or directories: bsd-user/, linux-user/, hw/vfio/, hw/xen/xen_pt*. 3) The Tiny Code Generator (TCG) is released under the BSD license (see license headers in files). diff --git a/MAINTAINERS b/MAINTAINERS index d72d6e37d0..01cfb05b76 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -657,7 +657,7 @@ F: hw/usb/dev-serial.c VFIO M: Alex Williamson S: Supported -F: hw/misc/vfio.c +F: hw/vfio/* vhost M: Michael S. Tsirkin diff --git a/hw/Makefile.objs b/hw/Makefile.objs index 52a1464051..73afa41b32 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -26,6 +26,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += ssi/ devices-dirs-$(CONFIG_SOFTMMU) += timer/ devices-dirs-$(CONFIG_TPM) += tpm/ devices-dirs-$(CONFIG_SOFTMMU) += usb/ +devices-dirs-$(CONFIG_SOFTMMU) += vfio/ devices-dirs-$(CONFIG_VIRTIO) += virtio/ devices-dirs-$(CONFIG_SOFTMMU) += watchdog/ devices-dirs-$(CONFIG_SOFTMMU) += xen/ diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index 979e532fdf..e47fea8530 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -21,7 +21,6 @@ common-obj-$(CONFIG_MACIO) += macio/ ifeq ($(CONFIG_PCI), y) obj-$(CONFIG_KVM) += ivshmem.o -obj-$(CONFIG_LINUX) += vfio.o endif obj-$(CONFIG_REALVIEW) += arm_sysctl.o diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index d3bddf2887..144912bf54 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -20,7 +20,7 @@ #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" #include "linux/vfio.h" -#include "hw/misc/vfio.h" +#include "hw/vfio/vfio.h" static Property spapr_phb_vfio_properties[] = { DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1), diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs new file mode 100644 index 0000000000..31c7dabb02 --- /dev/null +++ b/hw/vfio/Makefile.objs @@ -0,0 +1,3 @@ +ifeq ($(CONFIG_LINUX), y) +obj-$(CONFIG_PCI) += pci.o +endif diff --git a/hw/misc/vfio.c b/hw/vfio/pci.c similarity index 99% rename from hw/misc/vfio.c rename to hw/vfio/pci.c index 51844cf70d..41244811f3 100644 --- a/hw/misc/vfio.c +++ b/hw/vfio/pci.c @@ -39,8 +39,8 @@ #include "qemu/range.h" #include "sysemu/kvm.h" #include "sysemu/sysemu.h" -#include "hw/misc/vfio.h" #include "trace.h" +#include "hw/vfio/vfio.h" /* Extra debugging, trap acceleration paths for more logging */ #define VFIO_ALLOW_MMAP 1 diff --git a/include/hw/misc/vfio.h b/include/hw/vfio/vfio.h similarity index 100% rename from include/hw/misc/vfio.h rename to include/hw/vfio/vfio.h From 9ee27d7381c2d540ee976c7cbae941c66bb66e70 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 19 Dec 2014 15:24:15 -0700 Subject: [PATCH 04/14] hw/vfio/pci: Rename VFIODevice into VFIOPCIDevice This prepares for the introduction of VFIOPlatformDevice Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 210 +++++++++++++++++++++++++------------------------- 1 file changed, 106 insertions(+), 104 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 41244811f3..f882dc70a4 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -48,11 +48,11 @@ #define VFIO_ALLOW_KVM_MSI 1 #define VFIO_ALLOW_KVM_MSIX 1 -struct VFIODevice; +struct VFIOPCIDevice; typedef struct VFIOQuirk { MemoryRegion mem; - struct VFIODevice *vdev; + struct VFIOPCIDevice *vdev; QLIST_ENTRY(VFIOQuirk) next; struct { uint32_t base_offset:TARGET_PAGE_BITS; @@ -123,7 +123,7 @@ typedef struct VFIOMSIVector { */ EventNotifier interrupt; EventNotifier kvm_interrupt; - struct VFIODevice *vdev; /* back pointer to device */ + struct VFIOPCIDevice *vdev; /* back pointer to device */ int virq; bool use; } VFIOMSIVector; @@ -185,7 +185,7 @@ typedef struct VFIOMSIXInfo { void *mmap; } VFIOMSIXInfo; -typedef struct VFIODevice { +typedef struct VFIOPCIDevice { PCIDevice pdev; int fd; VFIOINTx intx; @@ -203,7 +203,7 @@ typedef struct VFIODevice { VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ PCIHostDeviceAddress host; - QLIST_ENTRY(VFIODevice) next; + QLIST_ENTRY(VFIOPCIDevice) next; struct VFIOGroup *group; EventNotifier err_notifier; uint32_t features; @@ -218,13 +218,13 @@ typedef struct VFIODevice { bool has_pm_reset; bool needs_reset; bool rom_read_failed; -} VFIODevice; +} VFIOPCIDevice; typedef struct VFIOGroup { int fd; int groupid; VFIOContainer *container; - QLIST_HEAD(, VFIODevice) device_list; + QLIST_HEAD(, VFIOPCIDevice) device_list; QLIST_ENTRY(VFIOGroup) next; QLIST_ENTRY(VFIOGroup) container_next; } VFIOGroup; @@ -268,16 +268,16 @@ static QLIST_HEAD(, VFIOGroup) static int vfio_kvm_device_fd = -1; #endif -static void vfio_disable_interrupts(VFIODevice *vdev); +static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len); -static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled); +static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); /* * Common VFIO interrupt disable */ -static void vfio_disable_irqindex(VFIODevice *vdev, int index) +static void vfio_disable_irqindex(VFIOPCIDevice *vdev, int index) { struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), @@ -293,7 +293,7 @@ static void vfio_disable_irqindex(VFIODevice *vdev, int index) /* * INTx */ -static void vfio_unmask_intx(VFIODevice *vdev) +static void vfio_unmask_intx(VFIOPCIDevice *vdev) { struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), @@ -307,7 +307,7 @@ static void vfio_unmask_intx(VFIODevice *vdev) } #ifdef CONFIG_KVM /* Unused outside of CONFIG_KVM code */ -static void vfio_mask_intx(VFIODevice *vdev) +static void vfio_mask_intx(VFIOPCIDevice *vdev) { struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), @@ -338,7 +338,7 @@ static void vfio_mask_intx(VFIODevice *vdev) */ static void vfio_intx_mmap_enable(void *opaque) { - VFIODevice *vdev = opaque; + VFIOPCIDevice *vdev = opaque; if (vdev->intx.pending) { timer_mod(vdev->intx.mmap_timer, @@ -351,7 +351,7 @@ static void vfio_intx_mmap_enable(void *opaque) static void vfio_intx_interrupt(void *opaque) { - VFIODevice *vdev = opaque; + VFIOPCIDevice *vdev = opaque; if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) { return; @@ -370,7 +370,7 @@ static void vfio_intx_interrupt(void *opaque) } } -static void vfio_eoi(VFIODevice *vdev) +static void vfio_eoi(VFIOPCIDevice *vdev) { if (!vdev->intx.pending) { return; @@ -384,7 +384,7 @@ static void vfio_eoi(VFIODevice *vdev) vfio_unmask_intx(vdev); } -static void vfio_enable_intx_kvm(VFIODevice *vdev) +static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM struct kvm_irqfd irqfd = { @@ -462,7 +462,7 @@ fail: #endif } -static void vfio_disable_intx_kvm(VFIODevice *vdev) +static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM struct kvm_irqfd irqfd = { @@ -506,7 +506,7 @@ static void vfio_disable_intx_kvm(VFIODevice *vdev) static void vfio_update_irq(PCIDevice *pdev) { - VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); PCIINTxRoute route; if (vdev->interrupt != VFIO_INT_INTx) { @@ -537,7 +537,7 @@ static void vfio_update_irq(PCIDevice *pdev) vfio_eoi(vdev); } -static int vfio_enable_intx(VFIODevice *vdev) +static int vfio_enable_intx(VFIOPCIDevice *vdev) { uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); int ret, argsz; @@ -602,7 +602,7 @@ static int vfio_enable_intx(VFIODevice *vdev) return 0; } -static void vfio_disable_intx(VFIODevice *vdev) +static void vfio_disable_intx(VFIOPCIDevice *vdev) { int fd; @@ -629,7 +629,7 @@ static void vfio_disable_intx(VFIODevice *vdev) static void vfio_msi_interrupt(void *opaque) { VFIOMSIVector *vector = opaque; - VFIODevice *vdev = vector->vdev; + VFIOPCIDevice *vdev = vector->vdev; int nr = vector - vdev->msi_vectors; if (!event_notifier_test_and_clear(&vector->interrupt)) { @@ -661,7 +661,7 @@ static void vfio_msi_interrupt(void *opaque) } } -static int vfio_enable_vectors(VFIODevice *vdev, bool msix) +static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) { struct vfio_irq_set *irq_set; int ret = 0, i, argsz; @@ -752,7 +752,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg) static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { - VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIOMSIVector *vector; int ret; @@ -841,7 +841,7 @@ static int vfio_msix_vector_use(PCIDevice *pdev, static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) { - VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; trace_vfio_msix_vector_release(vdev->host.domain, vdev->host.bus, @@ -880,7 +880,7 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } -static void vfio_enable_msix(VFIODevice *vdev) +static void vfio_enable_msix(VFIOPCIDevice *vdev) { vfio_disable_interrupts(vdev); @@ -913,7 +913,7 @@ static void vfio_enable_msix(VFIODevice *vdev) vdev->host.slot, vdev->host.function); } -static void vfio_enable_msi(VFIODevice *vdev) +static void vfio_enable_msi(VFIOPCIDevice *vdev) { int ret, i; @@ -991,7 +991,7 @@ retry: vdev->nr_vectors); } -static void vfio_disable_msi_common(VFIODevice *vdev) +static void vfio_disable_msi_common(VFIOPCIDevice *vdev) { int i; @@ -1015,7 +1015,7 @@ static void vfio_disable_msi_common(VFIODevice *vdev) vfio_enable_intx(vdev); } -static void vfio_disable_msix(VFIODevice *vdev) +static void vfio_disable_msix(VFIOPCIDevice *vdev) { int i; @@ -1042,7 +1042,7 @@ static void vfio_disable_msix(VFIODevice *vdev) vdev->host.slot, vdev->host.function); } -static void vfio_disable_msi(VFIODevice *vdev) +static void vfio_disable_msi(VFIOPCIDevice *vdev) { vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX); vfio_disable_msi_common(vdev); @@ -1051,7 +1051,7 @@ static void vfio_disable_msi(VFIODevice *vdev) vdev->host.slot, vdev->host.function); } -static void vfio_update_msi(VFIODevice *vdev) +static void vfio_update_msi(VFIOPCIDevice *vdev) { int i; @@ -1104,7 +1104,7 @@ static void vfio_bar_write(void *opaque, hwaddr addr, #ifdef DEBUG_VFIO { - VFIODevice *vdev = container_of(bar, VFIODevice, bars[bar->nr]); + VFIOPCIDevice *vdev = container_of(bar, VFIOPCIDevice, bars[bar->nr]); trace_vfio_bar_write(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function, @@ -1120,7 +1120,7 @@ static void vfio_bar_write(void *opaque, hwaddr addr, * which access will service the interrupt, so we're potentially * getting quite a few host interrupts per guest interrupt. */ - vfio_eoi(container_of(bar, VFIODevice, bars[bar->nr])); + vfio_eoi(container_of(bar, VFIOPCIDevice, bars[bar->nr])); } static uint64_t vfio_bar_read(void *opaque, @@ -1158,7 +1158,7 @@ static uint64_t vfio_bar_read(void *opaque, #ifdef DEBUG_VFIO { - VFIODevice *vdev = container_of(bar, VFIODevice, bars[bar->nr]); + VFIOPCIDevice *vdev = container_of(bar, VFIOPCIDevice, bars[bar->nr]); trace_vfio_bar_read(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function, @@ -1167,7 +1167,7 @@ static uint64_t vfio_bar_read(void *opaque, #endif /* Same as write above */ - vfio_eoi(container_of(bar, VFIODevice, bars[bar->nr])); + vfio_eoi(container_of(bar, VFIOPCIDevice, bars[bar->nr])); return data; } @@ -1178,7 +1178,7 @@ static const MemoryRegionOps vfio_bar_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void vfio_pci_load_rom(VFIODevice *vdev) +static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { struct vfio_region_info reg_info = { .argsz = sizeof(reg_info), @@ -1236,7 +1236,7 @@ static void vfio_pci_load_rom(VFIODevice *vdev) static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) { - VFIODevice *vdev = opaque; + VFIOPCIDevice *vdev = opaque; union { uint8_t byte; uint16_t word; @@ -1286,7 +1286,7 @@ static const MemoryRegionOps vfio_rom_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static bool vfio_blacklist_opt_rom(VFIODevice *vdev) +static bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; uint16_t vendor_id, device_id; @@ -1306,7 +1306,7 @@ static bool vfio_blacklist_opt_rom(VFIODevice *vdev) return false; } -static void vfio_pci_size_rom(VFIODevice *vdev) +static void vfio_pci_size_rom(VFIOPCIDevice *vdev) { uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; @@ -1484,7 +1484,7 @@ static uint64_t vfio_generic_window_quirk_read(void *opaque, hwaddr addr, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; uint64_t data; if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && @@ -1520,7 +1520,7 @@ static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; if (ranges_overlap(addr, size, quirk->data.address_offset, quirk->data.address_size)) { @@ -1578,7 +1578,7 @@ static uint64_t vfio_generic_quirk_read(void *opaque, hwaddr addr, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; uint64_t data; @@ -1611,7 +1611,7 @@ static void vfio_generic_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; @@ -1659,7 +1659,7 @@ static uint64_t vfio_ati_3c3_quirk_read(void *opaque, hwaddr addr, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; uint64_t data = vfio_pci_read_config(&vdev->pdev, PCI_BASE_ADDRESS_0 + (4 * 4) + 1, size); @@ -1673,7 +1673,7 @@ static const MemoryRegionOps vfio_ati_3c3_quirk = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void vfio_vga_probe_ati_3c3_quirk(VFIODevice *vdev) +static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; @@ -1715,7 +1715,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIODevice *vdev) * that only read-only access is provided, but we drop writes when the window * is enabled to config space nonetheless. */ -static void vfio_probe_ati_bar4_window_quirk(VFIODevice *vdev, int nr) +static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; @@ -1778,7 +1778,7 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, hwaddr addr, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; switch (addr) { case 4: /* address */ @@ -1824,7 +1824,7 @@ static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; switch (addr) { case 4: /* address */ @@ -1873,7 +1873,7 @@ static const MemoryRegionOps vfio_rtl8168_window_quirk = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void vfio_probe_rtl8168_bar2_window_quirk(VFIODevice *vdev, int nr) +static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; @@ -1902,7 +1902,7 @@ static void vfio_probe_rtl8168_bar2_window_quirk(VFIODevice *vdev, int nr) /* * Trap the BAR2 MMIO window to config space as well. */ -static void vfio_probe_ati_bar2_4000_quirk(VFIODevice *vdev, int nr) +static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; @@ -1971,7 +1971,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, hwaddr addr, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; PCIDevice *pdev = &vdev->pdev; uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], addr + quirk->data.base_offset, size); @@ -1990,7 +1990,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; PCIDevice *pdev = &vdev->pdev; switch (quirk->data.flags) { @@ -2037,7 +2037,7 @@ static const MemoryRegionOps vfio_nvidia_3d0_quirk = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void vfio_vga_probe_nvidia_3d0_quirk(VFIODevice *vdev) +static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; @@ -2130,7 +2130,7 @@ static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void vfio_probe_nvidia_bar5_window_quirk(VFIODevice *vdev, int nr) +static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; @@ -2166,7 +2166,7 @@ static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { VFIOQuirk *quirk = opaque; - VFIODevice *vdev = quirk->vdev; + VFIOPCIDevice *vdev = quirk->vdev; PCIDevice *pdev = &vdev->pdev; hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; @@ -2199,7 +2199,7 @@ static const MemoryRegionOps vfio_nvidia_88000_quirk = { * * Here's offset 0x88000... */ -static void vfio_probe_nvidia_bar0_88000_quirk(VFIODevice *vdev, int nr) +static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; @@ -2238,7 +2238,7 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIODevice *vdev, int nr) /* * And here's the same for BAR0 offset 0x1800... */ -static void vfio_probe_nvidia_bar0_1800_quirk(VFIODevice *vdev, int nr) +static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) { PCIDevice *pdev = &vdev->pdev; VFIOQuirk *quirk; @@ -2283,13 +2283,13 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIODevice *vdev, int nr) /* * Common quirk probe entry points. */ -static void vfio_vga_quirk_setup(VFIODevice *vdev) +static void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) { vfio_vga_probe_ati_3c3_quirk(vdev); vfio_vga_probe_nvidia_3d0_quirk(vdev); } -static void vfio_vga_quirk_teardown(VFIODevice *vdev) +static void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) { int i; @@ -2304,7 +2304,7 @@ static void vfio_vga_quirk_teardown(VFIODevice *vdev) } } -static void vfio_bar_quirk_setup(VFIODevice *vdev, int nr) +static void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) { vfio_probe_ati_bar4_window_quirk(vdev, nr); vfio_probe_ati_bar2_4000_quirk(vdev, nr); @@ -2314,7 +2314,7 @@ static void vfio_bar_quirk_setup(VFIODevice *vdev, int nr) vfio_probe_rtl8168_bar2_window_quirk(vdev, nr); } -static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr) +static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; @@ -2332,7 +2332,7 @@ static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr) */ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) { - VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); @@ -2367,7 +2367,7 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len) { - VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); uint32_t val_le = cpu_to_le32(val); trace_vfio_pci_write_config(vdev->host.domain, vdev->host.bus, @@ -2722,7 +2722,7 @@ static void vfio_listener_release(VFIOContainer *container) /* * Interrupt setup */ -static void vfio_disable_interrupts(VFIODevice *vdev) +static void vfio_disable_interrupts(VFIOPCIDevice *vdev) { switch (vdev->interrupt) { case VFIO_INT_INTx: @@ -2737,7 +2737,7 @@ static void vfio_disable_interrupts(VFIODevice *vdev) } } -static int vfio_setup_msi(VFIODevice *vdev, int pos) +static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos) { uint16_t ctrl; bool msi_64bit, msi_maskbit; @@ -2777,7 +2777,7 @@ static int vfio_setup_msi(VFIODevice *vdev, int pos) * need to first look for where the MSI-X table lives. So we * unfortunately split MSI-X setup across two functions. */ -static int vfio_early_setup_msix(VFIODevice *vdev) +static int vfio_early_setup_msix(VFIOPCIDevice *vdev) { uint8_t pos; uint16_t ctrl; @@ -2823,7 +2823,7 @@ static int vfio_early_setup_msix(VFIODevice *vdev) return 0; } -static int vfio_setup_msix(VFIODevice *vdev, int pos) +static int vfio_setup_msix(VFIOPCIDevice *vdev, int pos) { int ret; @@ -2843,7 +2843,7 @@ static int vfio_setup_msix(VFIODevice *vdev, int pos) return 0; } -static void vfio_teardown_msi(VFIODevice *vdev) +static void vfio_teardown_msi(VFIOPCIDevice *vdev) { msi_uninit(&vdev->pdev); @@ -2856,7 +2856,7 @@ static void vfio_teardown_msi(VFIODevice *vdev) /* * Resource setup */ -static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled) +static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) { int i; @@ -2874,7 +2874,7 @@ static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled) } } -static void vfio_unmap_bar(VFIODevice *vdev, int nr) +static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; @@ -2893,7 +2893,7 @@ static void vfio_unmap_bar(VFIODevice *vdev, int nr) } } -static int vfio_mmap_bar(VFIODevice *vdev, VFIOBAR *bar, +static int vfio_mmap_bar(VFIOPCIDevice *vdev, VFIOBAR *bar, MemoryRegion *mem, MemoryRegion *submem, void **map, size_t size, off_t offset, const char *name) @@ -2932,7 +2932,7 @@ empty_region: return ret; } -static void vfio_map_bar(VFIODevice *vdev, int nr) +static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; unsigned size = bar->size; @@ -3001,7 +3001,7 @@ static void vfio_map_bar(VFIODevice *vdev, int nr) vfio_bar_quirk_setup(vdev, nr); } -static void vfio_map_bars(VFIODevice *vdev) +static void vfio_map_bars(VFIOPCIDevice *vdev) { int i; @@ -3033,7 +3033,7 @@ static void vfio_map_bars(VFIODevice *vdev) } } -static void vfio_unmap_bars(VFIODevice *vdev) +static void vfio_unmap_bars(VFIOPCIDevice *vdev) { int i; @@ -3069,7 +3069,7 @@ static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask) pci_set_word(buf, (pci_get_word(buf) & ~mask) | val); } -static void vfio_add_emulated_word(VFIODevice *vdev, int pos, +static void vfio_add_emulated_word(VFIOPCIDevice *vdev, int pos, uint16_t val, uint16_t mask) { vfio_set_word_bits(vdev->pdev.config + pos, val, mask); @@ -3082,7 +3082,7 @@ static void vfio_set_long_bits(uint8_t *buf, uint32_t val, uint32_t mask) pci_set_long(buf, (pci_get_long(buf) & ~mask) | val); } -static void vfio_add_emulated_long(VFIODevice *vdev, int pos, +static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos, uint32_t val, uint32_t mask) { vfio_set_long_bits(vdev->pdev.config + pos, val, mask); @@ -3090,7 +3090,7 @@ static void vfio_add_emulated_long(VFIODevice *vdev, int pos, vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); } -static int vfio_setup_pcie_cap(VFIODevice *vdev, int pos, uint8_t size) +static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size) { uint16_t flags; uint8_t type; @@ -3182,7 +3182,7 @@ static int vfio_setup_pcie_cap(VFIODevice *vdev, int pos, uint8_t size) return pos; } -static void vfio_check_pcie_flr(VFIODevice *vdev, uint8_t pos) +static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos) { uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP); @@ -3193,7 +3193,7 @@ static void vfio_check_pcie_flr(VFIODevice *vdev, uint8_t pos) } } -static void vfio_check_pm_reset(VFIODevice *vdev, uint8_t pos) +static void vfio_check_pm_reset(VFIOPCIDevice *vdev, uint8_t pos) { uint16_t csr = pci_get_word(vdev->pdev.config + pos + PCI_PM_CTRL); @@ -3204,7 +3204,7 @@ static void vfio_check_pm_reset(VFIODevice *vdev, uint8_t pos) } } -static void vfio_check_af_flr(VFIODevice *vdev, uint8_t pos) +static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos) { uint8_t cap = pci_get_byte(vdev->pdev.config + pos + PCI_AF_CAP); @@ -3215,7 +3215,7 @@ static void vfio_check_af_flr(VFIODevice *vdev, uint8_t pos) } } -static int vfio_add_std_cap(VFIODevice *vdev, uint8_t pos) +static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) { PCIDevice *pdev = &vdev->pdev; uint8_t cap_id, next, size; @@ -3290,7 +3290,7 @@ static int vfio_add_std_cap(VFIODevice *vdev, uint8_t pos) return 0; } -static int vfio_add_capabilities(VFIODevice *vdev) +static int vfio_add_capabilities(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; @@ -3302,7 +3302,7 @@ static int vfio_add_capabilities(VFIODevice *vdev) return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]); } -static void vfio_pci_pre_reset(VFIODevice *vdev) +static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; uint16_t cmd; @@ -3339,7 +3339,7 @@ static void vfio_pci_pre_reset(VFIODevice *vdev) vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); } -static void vfio_pci_post_reset(VFIODevice *vdev) +static void vfio_pci_post_reset(VFIOPCIDevice *vdev) { vfio_enable_intx(vdev); } @@ -3351,7 +3351,7 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, host1->slot == host2->slot && host1->function == host2->function); } -static int vfio_pci_hot_reset(VFIODevice *vdev, bool single) +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) { VFIOGroup *group; struct vfio_pci_hot_reset_info *info; @@ -3402,7 +3402,7 @@ static int vfio_pci_hot_reset(VFIODevice *vdev, bool single) /* Verify that we have all the groups required */ for (i = 0; i < info->count; i++) { PCIHostDeviceAddress host; - VFIODevice *tmp; + VFIOPCIDevice *tmp; host.domain = devices[i].segment; host.bus = devices[i].bus; @@ -3496,7 +3496,7 @@ out: /* Re-enable INTx on affected devices */ for (i = 0; i < info->count; i++) { PCIHostDeviceAddress host; - VFIODevice *tmp; + VFIOPCIDevice *tmp; host.domain = devices[i].segment; host.bus = devices[i].bus; @@ -3546,12 +3546,12 @@ out_single: * _one() will only do a hot reset for the one in-use devices case, calling * _multi() will do nothing if a _one() would have been sufficient. */ -static int vfio_pci_hot_reset_one(VFIODevice *vdev) +static int vfio_pci_hot_reset_one(VFIOPCIDevice *vdev) { return vfio_pci_hot_reset(vdev, true); } -static int vfio_pci_hot_reset_multi(VFIODevice *vdev) +static int vfio_pci_hot_reset_multi(VFIOPCIDevice *vdev) { return vfio_pci_hot_reset(vdev, false); } @@ -3559,7 +3559,7 @@ static int vfio_pci_hot_reset_multi(VFIODevice *vdev) static void vfio_pci_reset_handler(void *opaque) { VFIOGroup *group; - VFIODevice *vdev; + VFIOPCIDevice *vdev; QLIST_FOREACH(group, &group_list, next) { QLIST_FOREACH(vdev, &group->device_list, next) { @@ -3897,7 +3897,8 @@ static void vfio_put_group(VFIOGroup *group) } } -static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev) +static int vfio_get_device(VFIOGroup *group, const char *name, + VFIOPCIDevice *vdev) { struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; @@ -4050,7 +4051,7 @@ error: return ret; } -static void vfio_put_device(VFIODevice *vdev) +static void vfio_put_device(VFIOPCIDevice *vdev) { QLIST_REMOVE(vdev, next); vdev->group = NULL; @@ -4064,7 +4065,7 @@ static void vfio_put_device(VFIODevice *vdev) static void vfio_err_notifier_handler(void *opaque) { - VFIODevice *vdev = opaque; + VFIOPCIDevice *vdev = opaque; if (!event_notifier_test_and_clear(&vdev->err_notifier)) { return; @@ -4093,7 +4094,7 @@ static void vfio_err_notifier_handler(void *opaque) * and continue after disabling error recovery support for the * device. */ -static void vfio_register_err_notifier(VFIODevice *vdev) +static void vfio_register_err_notifier(VFIOPCIDevice *vdev) { int ret; int argsz; @@ -4134,7 +4135,7 @@ static void vfio_register_err_notifier(VFIODevice *vdev) g_free(irq_set); } -static void vfio_unregister_err_notifier(VFIODevice *vdev) +static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev) { int argsz; struct vfio_irq_set *irq_set; @@ -4169,7 +4170,7 @@ static void vfio_unregister_err_notifier(VFIODevice *vdev) static int vfio_initfn(PCIDevice *pdev) { - VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + VFIOPCIDevice *pvdev, *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIOGroup *group; char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; ssize_t len; @@ -4322,7 +4323,7 @@ out_put: static void vfio_exitfn(PCIDevice *pdev) { - VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIOGroup *group = vdev->group; vfio_unregister_err_notifier(vdev); @@ -4342,7 +4343,7 @@ static void vfio_exitfn(PCIDevice *pdev) static void vfio_pci_reset(DeviceState *dev) { PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev); - VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); trace_vfio_pci_reset(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); @@ -4376,7 +4377,7 @@ post_reset: static void vfio_instance_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, PCI_DEVICE(obj)); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(obj)); device_add_bootindex_property(obj, &vdev->bootindex, "bootindex", NULL, @@ -4384,15 +4385,16 @@ static void vfio_instance_init(Object *obj) } static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host), - DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIODevice, + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice, intx.mmap_timeout, 1100), - DEFINE_PROP_BIT("x-vga", VFIODevice, features, + DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_VGA_BIT, false), + DEFINE_PROP_INT32("bootindex", VFIOPCIDevice, bootindex, -1), /* * TODO - support passed fds... is this necessary? - * DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name), - * DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name), + * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), + * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), */ DEFINE_PROP_END_OF_LIST(), }; @@ -4422,7 +4424,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) static const TypeInfo vfio_pci_dev_info = { .name = "vfio-pci", .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(VFIODevice), + .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_dev_class_init, .instance_init = vfio_instance_init, }; From 079eb19cbb3079536788dfd58832824804815e48 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 19 Dec 2014 15:24:24 -0700 Subject: [PATCH 05/14] hw/vfio/pci: generalize mask/unmask to any IRQ index To prepare for platform device introduction, rename vfio_mask_intx and vfio_unmask_intx into vfio_mask_single_irqindex and respectively unmask_single_irqindex. Also use a nex index parameter. With that name and prototype the function will be usable for other indexes than VFIO_PCI_INTX_IRQ_INDEX. Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index f882dc70a4..7083151300 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -293,12 +293,12 @@ static void vfio_disable_irqindex(VFIOPCIDevice *vdev, int index) /* * INTx */ -static void vfio_unmask_intx(VFIOPCIDevice *vdev) +static void vfio_unmask_single_irqindex(VFIOPCIDevice *vdev, int index) { struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, - .index = VFIO_PCI_INTX_IRQ_INDEX, + .index = index, .start = 0, .count = 1, }; @@ -307,12 +307,12 @@ static void vfio_unmask_intx(VFIOPCIDevice *vdev) } #ifdef CONFIG_KVM /* Unused outside of CONFIG_KVM code */ -static void vfio_mask_intx(VFIOPCIDevice *vdev) +static void vfio_mask_single_irqindex(VFIOPCIDevice *vdev, int index) { struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, - .index = VFIO_PCI_INTX_IRQ_INDEX, + .index = index, .start = 0, .count = 1, }; @@ -381,7 +381,7 @@ static void vfio_eoi(VFIOPCIDevice *vdev) vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); - vfio_unmask_intx(vdev); + vfio_unmask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); } static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) @@ -404,7 +404,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) /* Get to a known interrupt state */ qemu_set_fd_handler(irqfd.fd, NULL, NULL, vdev); - vfio_mask_intx(vdev); + vfio_mask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -442,7 +442,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) } /* Let'em rip */ - vfio_unmask_intx(vdev); + vfio_unmask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.kvm_accel = true; @@ -458,7 +458,7 @@ fail_irqfd: event_notifier_cleanup(&vdev->intx.unmask); fail: qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev); - vfio_unmask_intx(vdev); + vfio_unmask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); #endif } @@ -479,7 +479,7 @@ static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev) * Get to a known state, hardware masked, QEMU ready to accept new * interrupts, QEMU IRQ de-asserted. */ - vfio_mask_intx(vdev); + vfio_mask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -497,7 +497,7 @@ static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev) vdev->intx.kvm_accel = false; /* If we've missed an event, let it re-fire through QEMU */ - vfio_unmask_intx(vdev); + vfio_unmask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); trace_vfio_disable_intx_kvm(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); From 5546a621a8801351601537b311539486b9b3ee79 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 19 Dec 2014 15:24:31 -0700 Subject: [PATCH 06/14] hw/vfio/pci: introduce minimalist VFIODevice with fd Introduce a new base VFIODevice strcut that will be used by both PCI and Platform VFIO device. Move VFIOPCIDevice fd field there. Obviously other fields from VFIOPCIDevice will be moved there but this patch file is introduced to ease the review. Also vfio_mask_single_irqindex, vfio_unmask_single_irqindex, vfio_disable_irqindex now take a VFIODevice handle as argument. Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 117 +++++++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7083151300..22d0c85fea 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -185,9 +185,13 @@ typedef struct VFIOMSIXInfo { void *mmap; } VFIOMSIXInfo; +typedef struct VFIODevice { + int fd; +} VFIODevice; + typedef struct VFIOPCIDevice { PCIDevice pdev; - int fd; + VFIODevice vbasedev; VFIOINTx intx; unsigned int config_size; uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */ @@ -277,7 +281,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); /* * Common VFIO interrupt disable */ -static void vfio_disable_irqindex(VFIOPCIDevice *vdev, int index) +static void vfio_disable_irqindex(VFIODevice *vbasedev, int index) { struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), @@ -287,13 +291,13 @@ static void vfio_disable_irqindex(VFIOPCIDevice *vdev, int index) .count = 0, }; - ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); } /* * INTx */ -static void vfio_unmask_single_irqindex(VFIOPCIDevice *vdev, int index) +static void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) { struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), @@ -303,11 +307,11 @@ static void vfio_unmask_single_irqindex(VFIOPCIDevice *vdev, int index) .count = 1, }; - ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); } #ifdef CONFIG_KVM /* Unused outside of CONFIG_KVM code */ -static void vfio_mask_single_irqindex(VFIOPCIDevice *vdev, int index) +static void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) { struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), @@ -317,7 +321,7 @@ static void vfio_mask_single_irqindex(VFIOPCIDevice *vdev, int index) .count = 1, }; - ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); } #endif @@ -381,7 +385,7 @@ static void vfio_eoi(VFIOPCIDevice *vdev) vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); - vfio_unmask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); } static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) @@ -404,7 +408,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) /* Get to a known interrupt state */ qemu_set_fd_handler(irqfd.fd, NULL, NULL, vdev); - vfio_mask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_mask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -434,7 +438,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) *pfd = irqfd.resamplefd; - ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); g_free(irq_set); if (ret) { error_report("vfio: Error: Failed to setup INTx unmask fd: %m"); @@ -442,7 +446,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) } /* Let'em rip */ - vfio_unmask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.kvm_accel = true; @@ -458,7 +462,7 @@ fail_irqfd: event_notifier_cleanup(&vdev->intx.unmask); fail: qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev); - vfio_unmask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); #endif } @@ -479,7 +483,7 @@ static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev) * Get to a known state, hardware masked, QEMU ready to accept new * interrupts, QEMU IRQ de-asserted. */ - vfio_mask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_mask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -497,7 +501,7 @@ static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev) vdev->intx.kvm_accel = false; /* If we've missed an event, let it re-fire through QEMU */ - vfio_unmask_single_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); trace_vfio_disable_intx_kvm(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); @@ -583,7 +587,7 @@ static int vfio_enable_intx(VFIOPCIDevice *vdev) *pfd = event_notifier_get_fd(&vdev->intx.interrupt); qemu_set_fd_handler(*pfd, vfio_intx_interrupt, NULL, vdev); - ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); g_free(irq_set); if (ret) { error_report("vfio: Error: Failed to setup INTx fd: %m"); @@ -608,7 +612,7 @@ static void vfio_disable_intx(VFIOPCIDevice *vdev) timer_del(vdev->intx.mmap_timer); vfio_disable_intx_kvm(vdev); - vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); vfio_mmap_set_enabled(vdev, true); @@ -698,7 +702,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) fds[i] = fd; } - ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); g_free(irq_set); @@ -795,7 +799,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, * increase them as needed. */ if (vdev->nr_vectors < nr + 1) { - vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX); + vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); vdev->nr_vectors = nr + 1; ret = vfio_enable_vectors(vdev, true); if (ret) { @@ -823,7 +827,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, *pfd = event_notifier_get_fd(&vector->interrupt); } - ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); g_free(irq_set); if (ret) { error_report("vfio: failed to modify vector, %d", ret); @@ -874,7 +878,7 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) *pfd = event_notifier_get_fd(&vector->interrupt); - ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); g_free(irq_set); } @@ -1033,7 +1037,7 @@ static void vfio_disable_msix(VFIOPCIDevice *vdev) } if (vdev->nr_vectors) { - vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX); + vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); } vfio_disable_msi_common(vdev); @@ -1044,7 +1048,7 @@ static void vfio_disable_msix(VFIOPCIDevice *vdev) static void vfio_disable_msi(VFIOPCIDevice *vdev) { - vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX); + vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSI_IRQ_INDEX); vfio_disable_msi_common(vdev); trace_vfio_disable_msi(vdev->host.domain, vdev->host.bus, @@ -1188,7 +1192,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) off_t off = 0; size_t bytes; - if (ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { + if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { error_report("vfio: Error getting ROM info: %m"); return; } @@ -1218,7 +1222,8 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) memset(vdev->rom, 0xff, size); while (size) { - bytes = pread(vdev->fd, vdev->rom + off, size, vdev->rom_offset + off); + bytes = pread(vdev->vbasedev.fd, vdev->rom + off, + size, vdev->rom_offset + off); if (bytes == 0) { break; } else if (bytes > 0) { @@ -1312,6 +1317,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; DeviceState *dev = DEVICE(vdev); char name[32]; + int fd = vdev->vbasedev.fd; if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ @@ -1330,10 +1336,10 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) * Use the same size ROM BAR as the physical device. The contents * will get filled in later when the guest tries to read it. */ - if (pread(vdev->fd, &orig, 4, offset) != 4 || - pwrite(vdev->fd, &size, 4, offset) != 4 || - pread(vdev->fd, &size, 4, offset) != 4 || - pwrite(vdev->fd, &orig, 4, offset) != 4) { + if (pread(fd, &orig, 4, offset) != 4 || + pwrite(fd, &size, 4, offset) != 4 || + pread(fd, &size, 4, offset) != 4 || + pwrite(fd, &orig, 4, offset) != 4) { error_report("%s(%04x:%02x:%02x.%x) failed: %m", __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); @@ -2345,7 +2351,8 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { ssize_t ret; - ret = pread(vdev->fd, &phys_val, len, vdev->config_offset + addr); + ret = pread(vdev->vbasedev.fd, &phys_val, len, + vdev->config_offset + addr); if (ret != len) { error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m", __func__, vdev->host.domain, vdev->host.bus, @@ -2375,7 +2382,8 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, addr, val, len); /* Write everything to VFIO, let it filter out what we can't write */ - if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != len) { + if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) + != len) { error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m", __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function, addr, val, len); @@ -2743,7 +2751,7 @@ static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos) bool msi_64bit, msi_maskbit; int ret, entries; - if (pread(vdev->fd, &ctrl, sizeof(ctrl), + if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { return -errno; } @@ -2782,23 +2790,24 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) uint8_t pos; uint16_t ctrl; uint32_t table, pba; + int fd = vdev->vbasedev.fd; pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); if (!pos) { return 0; } - if (pread(vdev->fd, &ctrl, sizeof(ctrl), + if (pread(fd, &ctrl, sizeof(ctrl), vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { return -errno; } - if (pread(vdev->fd, &table, sizeof(table), + if (pread(fd, &table, sizeof(table), vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) { return -errno; } - if (pread(vdev->fd, &pba, sizeof(pba), + if (pread(fd, &pba, sizeof(pba), vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) { return -errno; } @@ -2951,7 +2960,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) vdev->host.function, nr); /* Determine what type of BAR this is for registration */ - ret = pread(vdev->fd, &pci_bar, sizeof(pci_bar), + ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); if (ret != sizeof(pci_bar)) { error_report("vfio: Failed to read BAR %d (%m)", nr); @@ -3371,7 +3380,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) info = g_malloc0(sizeof(*info)); info->argsz = sizeof(*info); - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); if (ret && errno != ENOSPC) { ret = -errno; if (!vdev->has_pm_reset) { @@ -3387,7 +3396,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) info->argsz = sizeof(*info) + (count * sizeof(*devices)); devices = &info->devices[0]; - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); if (ret) { ret = -errno; error_report("vfio: hot reset info failed: %m"); @@ -3483,7 +3492,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) } /* Bus reset! */ - ret = ioctl(vdev->fd, VFIO_DEVICE_PCI_HOT_RESET, reset); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); g_free(reset); trace_vfio_pci_hot_reset_result(vdev->host.domain, @@ -3914,12 +3923,12 @@ static int vfio_get_device(VFIOGroup *group, const char *name, return ret; } - vdev->fd = ret; + vdev->vbasedev.fd = ret; vdev->group = group; QLIST_INSERT_HEAD(&group->device_list, vdev, next); /* Sanity check device */ - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &dev_info); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info); if (ret) { error_report("vfio: error getting device info: %m"); goto error; @@ -3949,7 +3958,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) { reg_info.index = i; - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto error; @@ -3963,14 +3972,14 @@ static int vfio_get_device(VFIOGroup *group, const char *name, vdev->bars[i].flags = reg_info.flags; vdev->bars[i].size = reg_info.size; vdev->bars[i].fd_offset = reg_info.offset; - vdev->bars[i].fd = vdev->fd; + vdev->bars[i].fd = vdev->vbasedev.fd; vdev->bars[i].nr = i; QLIST_INIT(&vdev->bars[i].quirks); } reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX; - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); if (ret) { error_report("vfio: Error getting config info: %m"); goto error; @@ -3993,7 +4002,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, .index = VFIO_PCI_VGA_REGION_INDEX, }; - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info); if (ret) { error_report( "vfio: Device does not support requested feature x-vga"); @@ -4010,7 +4019,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, } vdev->vga.fd_offset = vga_info.offset; - vdev->vga.fd = vdev->fd; + vdev->vga.fd = vdev->vbasedev.fd; vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; @@ -4028,7 +4037,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, } irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ trace_vfio_get_device_get_irq_info_failure(); @@ -4046,7 +4055,7 @@ error: if (ret) { QLIST_REMOVE(vdev, next); vdev->group = NULL; - close(vdev->fd); + close(vdev->vbasedev.fd); } return ret; } @@ -4055,8 +4064,8 @@ static void vfio_put_device(VFIOPCIDevice *vdev) { QLIST_REMOVE(vdev, next); vdev->group = NULL; - trace_vfio_put_device(vdev->fd); - close(vdev->fd); + trace_vfio_put_device(vdev->vbasedev.fd); + close(vdev->vbasedev.fd); if (vdev->msix) { g_free(vdev->msix); vdev->msix = NULL; @@ -4125,7 +4134,7 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev) *pfd = event_notifier_get_fd(&vdev->err_notifier); qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev); - ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { error_report("vfio: Failed to set up error notification"); qemu_set_fd_handler(*pfd, NULL, NULL, vdev); @@ -4158,7 +4167,7 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev) pfd = (int32_t *)&irq_set->data; *pfd = -1; - ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { error_report("vfio: Failed to de-assign error fd: %m"); } @@ -4237,7 +4246,7 @@ static int vfio_initfn(PCIDevice *pdev) } /* Get a copy of config space */ - ret = pread(vdev->fd, vdev->pdev.config, + ret = pread(vdev->vbasedev.fd, vdev->pdev.config, MIN(pci_config_size(&vdev->pdev), vdev->config_size), vdev->config_offset); if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { @@ -4351,7 +4360,7 @@ static void vfio_pci_reset(DeviceState *dev) vfio_pci_pre_reset(vdev); if (vdev->reset_works && (vdev->has_flr || !vdev->has_pm_reset) && - !ioctl(vdev->fd, VFIO_DEVICE_RESET)) { + !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { trace_vfio_pci_reset_flr(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); goto post_reset; @@ -4364,7 +4373,7 @@ static void vfio_pci_reset(DeviceState *dev) /* If nothing else works and the device supports PM reset, use it */ if (vdev->reset_works && vdev->has_pm_reset && - !ioctl(vdev->fd, VFIO_DEVICE_RESET)) { + !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { trace_vfio_pci_reset_pm(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); goto post_reset; From 462037c9e85b27149e71d7f5c7f41375ca6e47d5 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 22 Dec 2014 09:54:31 -0700 Subject: [PATCH 07/14] hw/vfio/pci: add type, name and group fields in VFIODevice Add 3 new fields in the VFIODevice struct. Type is set to VFIO_DEVICE_TYPE_PCI. The type enum value will later be used to discriminate between VFIO PCI and platform devices. The name is set to domain:bus:slot:function. Currently used to test whether the device already is attached to the group. Later on, the name will be used to simplify all traces. The group is simply moved from VFIOPCIDevice to VFIODevice. Signed-off-by: Eric Auger [Fix g_strdup_printf() usage] Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 22d0c85fea..be7b8ffc12 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -48,6 +48,10 @@ #define VFIO_ALLOW_KVM_MSI 1 #define VFIO_ALLOW_KVM_MSIX 1 +enum { + VFIO_DEVICE_TYPE_PCI = 0, +}; + struct VFIOPCIDevice; typedef struct VFIOQuirk { @@ -186,7 +190,10 @@ typedef struct VFIOMSIXInfo { } VFIOMSIXInfo; typedef struct VFIODevice { + struct VFIOGroup *group; + char *name; int fd; + int type; } VFIODevice; typedef struct VFIOPCIDevice { @@ -208,7 +215,6 @@ typedef struct VFIOPCIDevice { VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ PCIHostDeviceAddress host; QLIST_ENTRY(VFIOPCIDevice) next; - struct VFIOGroup *group; EventNotifier err_notifier; uint32_t features; #define VFIO_FEATURE_ENABLE_VGA_BIT 0 @@ -3924,7 +3930,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, } vdev->vbasedev.fd = ret; - vdev->group = group; + vdev->vbasedev.group = group; QLIST_INSERT_HEAD(&group->device_list, vdev, next); /* Sanity check device */ @@ -4054,7 +4060,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, error: if (ret) { QLIST_REMOVE(vdev, next); - vdev->group = NULL; + vdev->vbasedev.group = NULL; close(vdev->vbasedev.fd); } return ret; @@ -4063,9 +4069,10 @@ error: static void vfio_put_device(VFIOPCIDevice *vdev) { QLIST_REMOVE(vdev, next); - vdev->group = NULL; + vdev->vbasedev.group = NULL; trace_vfio_put_device(vdev->vbasedev.fd); close(vdev->vbasedev.fd); + g_free(vdev->vbasedev.name); if (vdev->msix) { g_free(vdev->msix); vdev->msix = NULL; @@ -4197,6 +4204,11 @@ static int vfio_initfn(PCIDevice *pdev) return -errno; } + vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI; + vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x", + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); + strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1); len = readlink(path, iommu_group_path, sizeof(path)); @@ -4227,10 +4239,7 @@ static int vfio_initfn(PCIDevice *pdev) vdev->host.function); QLIST_FOREACH(pvdev, &group->device_list, next) { - if (pvdev->host.domain == vdev->host.domain && - pvdev->host.bus == vdev->host.bus && - pvdev->host.slot == vdev->host.slot && - pvdev->host.function == vdev->host.function) { + if (strcmp(pvdev->vbasedev.name, vdev->vbasedev.name) == 0) { error_report("vfio: error: device %s is already attached", path); vfio_put_group(group); @@ -4333,7 +4342,7 @@ out_put: static void vfio_exitfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); - VFIOGroup *group = vdev->group; + VFIOGroup *group = vdev->vbasedev.group; vfio_unregister_err_notifier(vdev); pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); From b47d8efa9f430c332bf96ce6eede169eb48422ad Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 22 Dec 2014 09:54:35 -0700 Subject: [PATCH 08/14] hw/vfio/pci: handle reset at VFIODevice Since we can potentially have both PCI and platform devices in the same VFIO group, this latter now owns a list of VFIODevices. A unified reset handler, vfio_reset_handler, is registered, looping through this VFIODevice list. 2 specialized operations are introduced (vfio_compute_needs_reset and vfio_hot_reset_multi): they allow to implement type specific behavior. also reset_works and needs_reset VFIOPCIDevice fields are moved into VFIODevice. Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 93 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 30 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index be7b8ffc12..4ac5ad6e50 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -189,13 +189,24 @@ typedef struct VFIOMSIXInfo { void *mmap; } VFIOMSIXInfo; +typedef struct VFIODeviceOps VFIODeviceOps; + typedef struct VFIODevice { + QLIST_ENTRY(VFIODevice) next; struct VFIOGroup *group; char *name; int fd; int type; + bool reset_works; + bool needs_reset; + VFIODeviceOps *ops; } VFIODevice; +struct VFIODeviceOps { + void (*vfio_compute_needs_reset)(VFIODevice *vdev); + int (*vfio_hot_reset_multi)(VFIODevice *vdev); +}; + typedef struct VFIOPCIDevice { PCIDevice pdev; VFIODevice vbasedev; @@ -214,19 +225,16 @@ typedef struct VFIOPCIDevice { VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ PCIHostDeviceAddress host; - QLIST_ENTRY(VFIOPCIDevice) next; EventNotifier err_notifier; uint32_t features; #define VFIO_FEATURE_ENABLE_VGA_BIT 0 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) int32_t bootindex; uint8_t pm_cap; - bool reset_works; bool has_vga; bool pci_aer; bool has_flr; bool has_pm_reset; - bool needs_reset; bool rom_read_failed; } VFIOPCIDevice; @@ -234,7 +242,7 @@ typedef struct VFIOGroup { int fd; int groupid; VFIOContainer *container; - QLIST_HEAD(, VFIOPCIDevice) device_list; + QLIST_HEAD(, VFIODevice) device_list; QLIST_ENTRY(VFIOGroup) next; QLIST_ENTRY(VFIOGroup) container_next; } VFIOGroup; @@ -3381,7 +3389,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) single ? "one" : "multi"); vfio_pci_pre_reset(vdev); - vdev->needs_reset = false; + vdev->vbasedev.needs_reset = false; info = g_malloc0(sizeof(*info)); info->argsz = sizeof(*info); @@ -3418,6 +3426,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) for (i = 0; i < info->count; i++) { PCIHostDeviceAddress host; VFIOPCIDevice *tmp; + VFIODevice *vbasedev_iter; host.domain = devices[i].segment; host.bus = devices[i].bus; @@ -3449,7 +3458,11 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) } /* Prep dependent devices for reset and clear our marker. */ - QLIST_FOREACH(tmp, &group->device_list, next) { + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { + continue; + } + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); if (vfio_pci_host_match(&host, &tmp->host)) { if (single) { error_report("vfio: found another in-use device " @@ -3459,7 +3472,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) goto out_single; } vfio_pci_pre_reset(tmp); - tmp->needs_reset = false; + tmp->vbasedev.needs_reset = false; multi = true; break; } @@ -3512,6 +3525,7 @@ out: for (i = 0; i < info->count; i++) { PCIHostDeviceAddress host; VFIOPCIDevice *tmp; + VFIODevice *vbasedev_iter; host.domain = devices[i].segment; host.bus = devices[i].bus; @@ -3532,7 +3546,11 @@ out: break; } - QLIST_FOREACH(tmp, &group->device_list, next) { + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { + continue; + } + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); if (vfio_pci_host_match(&host, &tmp->host)) { vfio_pci_post_reset(tmp); break; @@ -3566,28 +3584,40 @@ static int vfio_pci_hot_reset_one(VFIOPCIDevice *vdev) return vfio_pci_hot_reset(vdev, true); } -static int vfio_pci_hot_reset_multi(VFIOPCIDevice *vdev) +static int vfio_pci_hot_reset_multi(VFIODevice *vbasedev) { + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); return vfio_pci_hot_reset(vdev, false); } -static void vfio_pci_reset_handler(void *opaque) +static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev) +{ + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + if (!vbasedev->reset_works || (!vdev->has_flr && vdev->has_pm_reset)) { + vbasedev->needs_reset = true; + } +} + +static VFIODeviceOps vfio_pci_ops = { + .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, + .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, +}; + +static void vfio_reset_handler(void *opaque) { VFIOGroup *group; - VFIOPCIDevice *vdev; + VFIODevice *vbasedev; QLIST_FOREACH(group, &group_list, next) { - QLIST_FOREACH(vdev, &group->device_list, next) { - if (!vdev->reset_works || (!vdev->has_flr && vdev->has_pm_reset)) { - vdev->needs_reset = true; - } + QLIST_FOREACH(vbasedev, &group->device_list, next) { + vbasedev->ops->vfio_compute_needs_reset(vbasedev); } } QLIST_FOREACH(group, &group_list, next) { - QLIST_FOREACH(vdev, &group->device_list, next) { - if (vdev->needs_reset) { - vfio_pci_hot_reset_multi(vdev); + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (vbasedev->needs_reset) { + vbasedev->ops->vfio_hot_reset_multi(vbasedev); } } } @@ -3876,7 +3906,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as) } if (QLIST_EMPTY(&group_list)) { - qemu_register_reset(vfio_pci_reset_handler, NULL); + qemu_register_reset(vfio_reset_handler, NULL); } QLIST_INSERT_HEAD(&group_list, group, next); @@ -3908,7 +3938,7 @@ static void vfio_put_group(VFIOGroup *group) g_free(group); if (QLIST_EMPTY(&group_list)) { - qemu_unregister_reset(vfio_pci_reset_handler, NULL); + qemu_unregister_reset(vfio_reset_handler, NULL); } } @@ -3931,7 +3961,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, vdev->vbasedev.fd = ret; vdev->vbasedev.group = group; - QLIST_INSERT_HEAD(&group->device_list, vdev, next); + QLIST_INSERT_HEAD(&group->device_list, &vdev->vbasedev, next); /* Sanity check device */ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info); @@ -3948,7 +3978,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, goto error; } - vdev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); + vdev->vbasedev.reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); if (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { error_report("vfio: unexpected number of io regions %u", @@ -4059,7 +4089,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, error: if (ret) { - QLIST_REMOVE(vdev, next); + QLIST_REMOVE(&vdev->vbasedev, next); vdev->vbasedev.group = NULL; close(vdev->vbasedev.fd); } @@ -4068,7 +4098,7 @@ error: static void vfio_put_device(VFIOPCIDevice *vdev) { - QLIST_REMOVE(vdev, next); + QLIST_REMOVE(&vdev->vbasedev, next); vdev->vbasedev.group = NULL; trace_vfio_put_device(vdev->vbasedev.fd); close(vdev->vbasedev.fd); @@ -4186,7 +4216,8 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev) static int vfio_initfn(PCIDevice *pdev) { - VFIOPCIDevice *pvdev, *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIODevice *vbasedev_iter; VFIOGroup *group; char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; ssize_t len; @@ -4204,6 +4235,8 @@ static int vfio_initfn(PCIDevice *pdev) return -errno; } + vdev->vbasedev.ops = &vfio_pci_ops; + vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI; vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x", vdev->host.domain, vdev->host.bus, @@ -4238,9 +4271,8 @@ static int vfio_initfn(PCIDevice *pdev) vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); - QLIST_FOREACH(pvdev, &group->device_list, next) { - if (strcmp(pvdev->vbasedev.name, vdev->vbasedev.name) == 0) { - + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) { error_report("vfio: error: device %s is already attached", path); vfio_put_group(group); return -EBUSY; @@ -4368,7 +4400,8 @@ static void vfio_pci_reset(DeviceState *dev) vfio_pci_pre_reset(vdev); - if (vdev->reset_works && (vdev->has_flr || !vdev->has_pm_reset) && + if (vdev->vbasedev.reset_works && + (vdev->has_flr || !vdev->has_pm_reset) && !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { trace_vfio_pci_reset_flr(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); @@ -4381,7 +4414,7 @@ static void vfio_pci_reset(DeviceState *dev) } /* If nothing else works and the device supports PM reset, use it */ - if (vdev->reset_works && vdev->has_pm_reset && + if (vdev->vbasedev.reset_works && vdev->has_pm_reset && !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { trace_vfio_pci_reset_pm(vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); From a664477db8dac84cc046e9d79701eefda1d58703 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 22 Dec 2014 09:54:37 -0700 Subject: [PATCH 09/14] hw/vfio/pci: Introduce VFIORegion This structure is going to be shared by VFIOPCIDevice and VFIOPlatformDevice. VFIOBAR includes it. vfio_eoi becomes an ops of VFIODevice specialized by parent device. This makes possible to transform vfio_bar_write/read into generic vfio_region_write/read that will be used by VFIOPlatformDevice too. vfio_mmap_bar becomes vfio_map_region Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 193 ++++++++++++++++++++++++++------------------------ trace-events | 4 +- 2 files changed, 103 insertions(+), 94 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 4ac5ad6e50..6456348cbb 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -77,15 +77,19 @@ typedef struct VFIOQuirk { } data; } VFIOQuirk; -typedef struct VFIOBAR { - off_t fd_offset; /* offset of BAR within device fd */ - int fd; /* device fd, allows us to pass VFIOBAR as opaque data */ +typedef struct VFIORegion { + struct VFIODevice *vbasedev; + off_t fd_offset; /* offset of region within device fd */ MemoryRegion mem; /* slow, read/write access */ MemoryRegion mmap_mem; /* direct mapped access */ void *mmap; size_t size; uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ - uint8_t nr; /* cache the BAR number for debug */ + uint8_t nr; /* cache the region number for debug */ +} VFIORegion; + +typedef struct VFIOBAR { + VFIORegion region; bool ioport; bool mem64; QLIST_HEAD(, VFIOQuirk) quirks; @@ -205,6 +209,7 @@ typedef struct VFIODevice { struct VFIODeviceOps { void (*vfio_compute_needs_reset)(VFIODevice *vdev); int (*vfio_hot_reset_multi)(VFIODevice *vdev); + void (*vfio_eoi)(VFIODevice *vdev); }; typedef struct VFIOPCIDevice { @@ -388,8 +393,10 @@ static void vfio_intx_interrupt(void *opaque) } } -static void vfio_eoi(VFIOPCIDevice *vdev) +static void vfio_eoi(VFIODevice *vbasedev) { + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + if (!vdev->intx.pending) { return; } @@ -399,7 +406,7 @@ static void vfio_eoi(VFIOPCIDevice *vdev) vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); - vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); + vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX); } static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) @@ -552,7 +559,7 @@ static void vfio_update_irq(PCIDevice *pdev) vfio_enable_intx_kvm(vdev); /* Re-enable the interrupt in cased we missed an EOI */ - vfio_eoi(vdev); + vfio_eoi(&vdev->vbasedev); } static int vfio_enable_intx(VFIOPCIDevice *vdev) @@ -1089,10 +1096,11 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) /* * IO Port/MMIO - Beware of the endians, VFIO is always little endian */ -static void vfio_bar_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) +static void vfio_region_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) { - VFIOBAR *bar = opaque; + VFIORegion *region = opaque; + VFIODevice *vbasedev = region->vbasedev; union { uint8_t byte; uint16_t word; @@ -1115,20 +1123,14 @@ static void vfio_bar_write(void *opaque, hwaddr addr, break; } - if (pwrite(bar->fd, &buf, size, bar->fd_offset + addr) != size) { - error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m", - __func__, addr, data, size); + if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 + ",%d) failed: %m", + __func__, vbasedev->name, region->nr, + addr, data, size); } -#ifdef DEBUG_VFIO - { - VFIOPCIDevice *vdev = container_of(bar, VFIOPCIDevice, bars[bar->nr]); - - trace_vfio_bar_write(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - region->nr, addr, data, size); - } -#endif + trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); /* * A read or write to a BAR always signals an INTx EOI. This will @@ -1138,13 +1140,14 @@ static void vfio_bar_write(void *opaque, hwaddr addr, * which access will service the interrupt, so we're potentially * getting quite a few host interrupts per guest interrupt. */ - vfio_eoi(container_of(bar, VFIOPCIDevice, bars[bar->nr])); + vbasedev->ops->vfio_eoi(vbasedev); } -static uint64_t vfio_bar_read(void *opaque, - hwaddr addr, unsigned size) +static uint64_t vfio_region_read(void *opaque, + hwaddr addr, unsigned size) { - VFIOBAR *bar = opaque; + VFIORegion *region = opaque; + VFIODevice *vbasedev = region->vbasedev; union { uint8_t byte; uint16_t word; @@ -1153,9 +1156,10 @@ static uint64_t vfio_bar_read(void *opaque, } buf; uint64_t data = 0; - if (pread(bar->fd, &buf, size, bar->fd_offset + addr) != size) { - error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m", - __func__, addr, size); + if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", + __func__, vbasedev->name, region->nr, + addr, size); return (uint64_t)-1; } @@ -1174,25 +1178,17 @@ static uint64_t vfio_bar_read(void *opaque, break; } -#ifdef DEBUG_VFIO - { - VFIOPCIDevice *vdev = container_of(bar, VFIOPCIDevice, bars[bar->nr]); - - trace_vfio_bar_read(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - region->nr, addr, size, data); - } -#endif + trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data); /* Same as write above */ - vfio_eoi(container_of(bar, VFIOPCIDevice, bars[bar->nr])); + vbasedev->ops->vfio_eoi(vbasedev); return data; } -static const MemoryRegionOps vfio_bar_ops = { - .read = vfio_bar_read, - .write = vfio_bar_write, +static const MemoryRegionOps vfio_region_ops = { + .read = vfio_region_read, + .write = vfio_region_write, .endianness = DEVICE_LITTLE_ENDIAN, }; @@ -1529,8 +1525,8 @@ static uint64_t vfio_generic_window_quirk_read(void *opaque, quirk->data.bar, addr, size, data); } else { - data = vfio_bar_read(&vdev->bars[quirk->data.bar], - addr + quirk->data.base_offset, size); + data = vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + quirk->data.base_offset, size); } return data; @@ -1584,7 +1580,7 @@ static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, return; } - vfio_bar_write(&vdev->bars[quirk->data.bar], + vfio_region_write(&vdev->bars[quirk->data.bar].region, addr + quirk->data.base_offset, data, size); } @@ -1621,7 +1617,8 @@ static uint64_t vfio_generic_quirk_read(void *opaque, quirk->data.bar, addr + base, size, data); } else { - data = vfio_bar_read(&vdev->bars[quirk->data.bar], addr + base, size); + data = vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + base, size); } return data; @@ -1653,7 +1650,8 @@ static void vfio_generic_quirk_write(void *opaque, hwaddr addr, quirk->data.bar, addr + base, data, size); } else { - vfio_bar_write(&vdev->bars[quirk->data.bar], addr + base, data, size); + vfio_region_write(&vdev->bars[quirk->data.bar].region, + addr + base, data, size); } } @@ -1706,7 +1704,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) * As long as the BAR is >= 256 bytes it will be aligned such that the * lower byte is always zero. Filter out anything else, if it exists. */ - if (!vdev->bars[4].ioport || vdev->bars[4].size < 256) { + if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) { return; } @@ -1758,7 +1756,7 @@ static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_window_quirk, quirk, "vfio-ati-bar4-window-quirk", 8); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, quirk->data.base_offset, &quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -1837,7 +1835,8 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); - return vfio_bar_read(&vdev->bars[quirk->data.bar], addr + 0x70, size); + return vfio_region_read(&vdev->bars[quirk->data.bar].region, + addr + 0x70, size); } static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, @@ -1879,7 +1878,8 @@ static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.function); - vfio_bar_write(&vdev->bars[quirk->data.bar], addr + 0x70, data, size); + vfio_region_write(&vdev->bars[quirk->data.bar].region, + addr + 0x70, data, size); } static const MemoryRegionOps vfio_rtl8168_window_quirk = { @@ -1909,7 +1909,7 @@ static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk, quirk, "vfio-rtl8168-window-quirk", 8); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, 0x70, &quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -1943,7 +1943,7 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk, "vfio-ati-bar2-4000-quirk", TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, quirk->data.address_match & TARGET_PAGE_MASK, &quirk->mem, 1); @@ -2063,7 +2063,7 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) VFIOQuirk *quirk; if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA || - !vdev->bars[1].size) { + !vdev->bars[1].region.size) { return; } @@ -2172,7 +2172,8 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_bar5_window_quirk, quirk, "vfio-nvidia-bar5-window-quirk", 16); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1); + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + 0, &quirk->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -2200,7 +2201,8 @@ static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, */ if ((pdev->cap_present & QEMU_PCI_CAP_MSI) && vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) { - vfio_bar_write(&vdev->bars[quirk->data.bar], addr + base, data, size); + vfio_region_write(&vdev->bars[quirk->data.bar].region, + addr + base, data, size); } } @@ -2243,7 +2245,7 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk, quirk, "vfio-nvidia-bar0-88000-quirk", TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, quirk->data.address_match & TARGET_PAGE_MASK, &quirk->mem, 1); @@ -2270,7 +2272,8 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) /* Log the chipset ID */ trace_vfio_probe_nvidia_bar0_1800_quirk_id( - (unsigned int)(vfio_bar_read(&vdev->bars[0], 0, 4) >> 20) & 0xff); + (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20) + & 0xff); quirk = g_malloc0(sizeof(*quirk)); quirk->vdev = vdev; @@ -2282,7 +2285,7 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk, "vfio-nvidia-bar0-1800-quirk", TARGET_PAGE_ALIGN(quirk->data.address_mask + 1)); - memory_region_add_subregion_overlap(&vdev->bars[nr].mem, + memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, quirk->data.address_match & TARGET_PAGE_MASK, &quirk->mem, 1); @@ -2340,7 +2343,7 @@ static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) while (!QLIST_EMPTY(&bar->quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); - memory_region_del_subregion(&bar->mem, &quirk->mem); + memory_region_del_subregion(&bar->region.mem, &quirk->mem); object_unparent(OBJECT(&quirk->mem)); QLIST_REMOVE(quirk, next); g_free(quirk); @@ -2851,9 +2854,9 @@ static int vfio_setup_msix(VFIOPCIDevice *vdev, int pos) int ret; ret = msix_init(&vdev->pdev, vdev->msix->entries, - &vdev->bars[vdev->msix->table_bar].mem, + &vdev->bars[vdev->msix->table_bar].region.mem, vdev->msix->table_bar, vdev->msix->table_offset, - &vdev->bars[vdev->msix->pba_bar].mem, + &vdev->bars[vdev->msix->pba_bar].region.mem, vdev->msix->pba_bar, vdev->msix->pba_offset, pos); if (ret < 0) { if (ret == -ENOTSUP) { @@ -2871,8 +2874,9 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) msi_uninit(&vdev->pdev); if (vdev->msix) { - msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem, - &vdev->bars[vdev->msix->pba_bar].mem); + msix_uninit(&vdev->pdev, + &vdev->bars[vdev->msix->table_bar].region.mem, + &vdev->bars[vdev->msix->pba_bar].region.mem); } } @@ -2886,11 +2890,11 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) for (i = 0; i < PCI_ROM_SLOT; i++) { VFIOBAR *bar = &vdev->bars[i]; - if (!bar->size) { + if (!bar->region.size) { continue; } - memory_region_set_enabled(&bar->mmap_mem, enabled); + memory_region_set_enabled(&bar->region.mmap_mem, enabled); if (vdev->msix && vdev->msix->table_bar == i) { memory_region_set_enabled(&vdev->msix->mmap_mem, enabled); } @@ -2901,53 +2905,55 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - if (!bar->size) { + if (!bar->region.size) { return; } vfio_bar_quirk_teardown(vdev, nr); - memory_region_del_subregion(&bar->mem, &bar->mmap_mem); - munmap(bar->mmap, memory_region_size(&bar->mmap_mem)); + memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem); + munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); if (vdev->msix && vdev->msix->table_bar == nr) { - memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem); + memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem); munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); } } -static int vfio_mmap_bar(VFIOPCIDevice *vdev, VFIOBAR *bar, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name) +static int vfio_mmap_region(Object *obj, VFIORegion *region, + MemoryRegion *mem, MemoryRegion *submem, + void **map, size_t size, off_t offset, + const char *name) { int ret = 0; + VFIODevice *vbasedev = region->vbasedev; - if (VFIO_ALLOW_MMAP && size && bar->flags & VFIO_REGION_INFO_FLAG_MMAP) { + if (VFIO_ALLOW_MMAP && size && region->flags & + VFIO_REGION_INFO_FLAG_MMAP) { int prot = 0; - if (bar->flags & VFIO_REGION_INFO_FLAG_READ) { + if (region->flags & VFIO_REGION_INFO_FLAG_READ) { prot |= PROT_READ; } - if (bar->flags & VFIO_REGION_INFO_FLAG_WRITE) { + if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { prot |= PROT_WRITE; } *map = mmap(NULL, size, prot, MAP_SHARED, - bar->fd, bar->fd_offset + offset); + vbasedev->fd, region->fd_offset + offset); if (*map == MAP_FAILED) { *map = NULL; ret = -errno; goto empty_region; } - memory_region_init_ram_ptr(submem, OBJECT(vdev), name, size, *map); + memory_region_init_ram_ptr(submem, obj, name, size, *map); memory_region_set_skip_dump(submem); } else { empty_region: /* Create a zero sized sub-region to make cleanup easy. */ - memory_region_init(submem, OBJECT(vdev), name, 0); + memory_region_init(submem, obj, name, 0); } memory_region_add_subregion(mem, offset, submem); @@ -2958,7 +2964,7 @@ empty_region: static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - unsigned size = bar->size; + unsigned size = bar->region.size; char name[64]; uint32_t pci_bar; uint8_t type; @@ -2988,9 +2994,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) ~PCI_BASE_ADDRESS_MEM_MASK); /* A "slow" read/write mapping underlies all BARs */ - memory_region_init_io(&bar->mem, OBJECT(vdev), &vfio_bar_ops, + memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops, bar, name, size); - pci_register_bar(&vdev->pdev, nr, type, &bar->mem); + pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem); /* * We can't mmap areas overlapping the MSIX vector table, so we @@ -3001,8 +3007,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) } strncat(name, " mmap", sizeof(name) - strlen(name) - 1); - if (vfio_mmap_bar(vdev, bar, &bar->mem, - &bar->mmap_mem, &bar->mmap, size, 0, name)) { + if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, + &bar->region.mmap_mem, &bar->region.mmap, + size, 0, name)) { error_report("%s unsupported. Performance may be slow", name); } @@ -3012,10 +3019,11 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) start = HOST_PAGE_ALIGN(vdev->msix->table_offset + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); - size = start < bar->size ? bar->size - start : 0; + size = start < bar->region.size ? bar->region.size - start : 0; strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); /* VFIOMSIXInfo contains another MemoryRegion for this mapping */ - if (vfio_mmap_bar(vdev, bar, &bar->mem, &vdev->msix->mmap_mem, + if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, + &vdev->msix->mmap_mem, &vdev->msix->mmap, size, start, name)) { error_report("%s unsupported. Performance may be slow", name); } @@ -3601,6 +3609,7 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev) static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, + .vfio_eoi = vfio_eoi, }; static void vfio_reset_handler(void *opaque) @@ -4005,11 +4014,11 @@ static int vfio_get_device(VFIOGroup *group, const char *name, (unsigned long)reg_info.offset, (unsigned long)reg_info.flags); - vdev->bars[i].flags = reg_info.flags; - vdev->bars[i].size = reg_info.size; - vdev->bars[i].fd_offset = reg_info.offset; - vdev->bars[i].fd = vdev->vbasedev.fd; - vdev->bars[i].nr = i; + vdev->bars[i].region.vbasedev = &vdev->vbasedev; + vdev->bars[i].region.flags = reg_info.flags; + vdev->bars[i].region.size = reg_info.size; + vdev->bars[i].region.fd_offset = reg_info.offset; + vdev->bars[i].region.nr = i; QLIST_INIT(&vdev->bars[i].quirks); } diff --git a/trace-events b/trace-events index cfe2db4255..7a931781ea 100644 --- a/trace-events +++ b/trace-events @@ -1412,8 +1412,8 @@ vfio_pci_reset(int domain, int bus, int slot, int fn) " (%04x:%02x:%02x.%x)" vfio_pci_reset_flr(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x FLR/VFIO_DEVICE_RESET" vfio_pci_reset_pm(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x PCI PM Reset" -vfio_bar_write(int domain, int bus, int slot, int fn, int index, uint64_t addr, uint64_t data, unsigned size) " (%04x:%02x:%02x.%x:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" -vfio_bar_read(int domain, int bus, int slot, int fn, int index, uint64_t addr, unsigned size, uint64_t data) " (%04x:%02x:%02x.%x:region%d+0x%"PRIx64", %d) = 0x%"PRIx64 +vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" +vfio_region_read(const char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64 vfio_iommu_map_notify(uint64_t iova_start, uint64_t iova_end) "iommu map @ %"PRIx64" - %"PRIx64 vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add %"PRIx64" - %"PRIx64 vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] %"PRIx64" - %"PRIx64 From d13dd2d7a957eacbb9e669c43dd9e19db969755b Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 22 Dec 2014 09:54:38 -0700 Subject: [PATCH 10/14] hw/vfio/pci: split vfio_get_device vfio_get_device now takes a VFIODevice as argument. The function is split into 2 parts: vfio_get_device which is generic and vfio_populate_device which is bus specific. 3 new fields are introduced in VFIODevice to store dev_info. vfio_put_base_device is created. Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 130 ++++++++++++++++++++++++++++++-------------------- trace-events | 10 ++-- 2 files changed, 83 insertions(+), 57 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 6456348cbb..e965f3e5b8 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -204,12 +204,16 @@ typedef struct VFIODevice { bool reset_works; bool needs_reset; VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; + unsigned int flags; } VFIODevice; struct VFIODeviceOps { void (*vfio_compute_needs_reset)(VFIODevice *vdev); int (*vfio_hot_reset_multi)(VFIODevice *vdev); void (*vfio_eoi)(VFIODevice *vdev); + int (*vfio_populate_device)(VFIODevice *vdev); }; typedef struct VFIOPCIDevice { @@ -296,6 +300,8 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); +static void vfio_put_base_device(VFIODevice *vbasedev); +static int vfio_populate_device(VFIODevice *vbasedev); /* * Common VFIO interrupt disable @@ -3610,6 +3616,7 @@ static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, .vfio_eoi = vfio_eoi, + .vfio_populate_device = vfio_populate_device, }; static void vfio_reset_handler(void *opaque) @@ -3951,70 +3958,45 @@ static void vfio_put_group(VFIOGroup *group) } } -static int vfio_get_device(VFIOGroup *group, const char *name, - VFIOPCIDevice *vdev) +static int vfio_populate_device(VFIODevice *vbasedev) { - struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; - int ret, i; - - ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); - if (ret < 0) { - error_report("vfio: error getting device %s from group %d: %m", - name, group->groupid); - error_printf("Verify all devices in group %d are bound to vfio-pci " - "or pci-stub and not already in use\n", group->groupid); - return ret; - } - - vdev->vbasedev.fd = ret; - vdev->vbasedev.group = group; - QLIST_INSERT_HEAD(&group->device_list, &vdev->vbasedev, next); + int i, ret = -1; /* Sanity check device */ - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info); - if (ret) { - error_report("vfio: error getting device info: %m"); - goto error; - } - - trace_vfio_get_device_irq(name, dev_info.flags, - dev_info.num_regions, dev_info.num_irqs); - - if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) { + if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PCI)) { error_report("vfio: Um, this isn't a PCI device"); goto error; } - vdev->vbasedev.reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - - if (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { + if (vbasedev->num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { error_report("vfio: unexpected number of io regions %u", - dev_info.num_regions); + vbasedev->num_regions); goto error; } - if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { - error_report("vfio: unexpected number of irqs %u", dev_info.num_irqs); + if (vbasedev->num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { + error_report("vfio: unexpected number of irqs %u", vbasedev->num_irqs); goto error; } for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) { reg_info.index = i; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto error; } - trace_vfio_get_device_region(name, i, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_populate_device_region(vbasedev->name, i, + (unsigned long)reg_info.size, + (unsigned long)reg_info.offset, + (unsigned long)reg_info.flags); - vdev->bars[i].region.vbasedev = &vdev->vbasedev; + vdev->bars[i].region.vbasedev = vbasedev; vdev->bars[i].region.flags = reg_info.flags; vdev->bars[i].region.size = reg_info.size; vdev->bars[i].region.fd_offset = reg_info.offset; @@ -4030,9 +4012,10 @@ static int vfio_get_device(VFIOGroup *group, const char *name, goto error; } - trace_vfio_get_device_config(name, (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_populate_device_config(vdev->vbasedev.name, + (unsigned long)reg_info.size, + (unsigned long)reg_info.offset, + (unsigned long)reg_info.flags); vdev->config_size = reg_info.size; if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { @@ -4041,7 +4024,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, vdev->config_offset = reg_info.offset; if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) && - dev_info.num_regions > VFIO_PCI_VGA_REGION_INDEX) { + vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { struct vfio_region_info vga_info = { .argsz = sizeof(vga_info), .index = VFIO_PCI_VGA_REGION_INDEX, @@ -4085,7 +4068,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - trace_vfio_get_device_get_irq_info_failure(); + trace_vfio_populate_device_get_irq_info_failure(); ret = 0; } else if (irq_info.count == 1) { vdev->pci_aer = true; @@ -4097,25 +4080,68 @@ static int vfio_get_device(VFIOGroup *group, const char *name, } error: + return ret; +} + +static int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev) +{ + struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; + int ret; + + ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + if (ret < 0) { + error_report("vfio: error getting device %s from group %d: %m", + name, group->groupid); + error_printf("Verify all devices in group %d are bound to vfio- " + "or pci-stub and not already in use\n", group->groupid); + return ret; + } + + vbasedev->fd = ret; + vbasedev->group = group; + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_INFO, &dev_info); if (ret) { - QLIST_REMOVE(&vdev->vbasedev, next); - vdev->vbasedev.group = NULL; - close(vdev->vbasedev.fd); + error_report("vfio: error getting device info: %m"); + goto error; + } + + vbasedev->num_irqs = dev_info.num_irqs; + vbasedev->num_regions = dev_info.num_regions; + vbasedev->flags = dev_info.flags; + + trace_vfio_get_device(name, dev_info.flags, + dev_info.num_regions, dev_info.num_irqs); + + vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); + + ret = vbasedev->ops->vfio_populate_device(vbasedev); + +error: + if (ret) { + vfio_put_base_device(vbasedev); } return ret; } +void vfio_put_base_device(VFIODevice *vbasedev) +{ + QLIST_REMOVE(vbasedev, next); + vbasedev->group = NULL; + trace_vfio_put_base_device(vbasedev->fd); + close(vbasedev->fd); +} + static void vfio_put_device(VFIOPCIDevice *vdev) { - QLIST_REMOVE(&vdev->vbasedev, next); - vdev->vbasedev.group = NULL; - trace_vfio_put_device(vdev->vbasedev.fd); - close(vdev->vbasedev.fd); g_free(vdev->vbasedev.name); if (vdev->msix) { g_free(vdev->msix); vdev->msix = NULL; } + vfio_put_base_device(&vdev->vbasedev); } static void vfio_err_notifier_handler(void *opaque) @@ -4288,7 +4314,7 @@ static int vfio_initfn(PCIDevice *pdev) } } - ret = vfio_get_device(group, path, vdev); + ret = vfio_get_device(group, path, &vdev->vbasedev); if (ret) { error_report("vfio: failed to get device %s", path); vfio_put_group(group); diff --git a/trace-events b/trace-events index 7a931781ea..55a559bbf5 100644 --- a/trace-events +++ b/trace-events @@ -1403,10 +1403,10 @@ vfio_pci_hot_reset(int domain, int bus, int slot, int fn, const char *type) " (% vfio_pci_hot_reset_has_dep_devices(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x: hot reset dependent devices:" vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" vfio_pci_hot_reset_result(int domain, int bus, int slot, int fn, const char *result) "%04x:%02x:%02x.%x hot reset: %s" -vfio_get_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" -vfio_get_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" -vfio_get_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" -vfio_get_device_irq(const char *name, unsigned flags, unsigned num_regions, unsigned num_irqs) "Device %s flags: %u, regions: %u, irgs: %u" +vfio_populate_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" +vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" +vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" +vfio_get_device(const char *name, unsigned flags, unsigned num_regions, unsigned num_irqs) "Device %s flags: %u, regions: %u, irgs: %u" vfio_initfn(int domain, int bus, int slot, int fn, int group_id) " (%04x:%02x:%02x.%x) group %d" vfio_pci_reset(int domain, int bus, int slot, int fn) " (%04x:%02x:%02x.%x)" vfio_pci_reset_flr(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x FLR/VFIO_DEVICE_RESET" @@ -1422,7 +1422,7 @@ vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del vfio_listener_region_del(uint64_t start, uint64_t end) "region_del %"PRIx64" - %"PRIx64 vfio_disconnect_container(int fd) "close container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" -vfio_put_device(int fd) "close vdev->fd=%d" +vfio_put_base_device(int fd) "close vdev->fd=%d" #hw/acpi/memory_hotplug.c mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 From 62356b729281e7d6672193a98a072b1c7de04dd8 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 22 Dec 2014 09:54:46 -0700 Subject: [PATCH 11/14] hw/vfio/pci: rename group_list into vfio_group_list better fit in the rest of the namespace Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e965f3e5b8..48387bc556 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -282,7 +282,7 @@ static const VFIORomBlacklistEntry romblacklist[] = { #define MSIX_CAP_LENGTH 12 static QLIST_HEAD(, VFIOGroup) - group_list = QLIST_HEAD_INITIALIZER(group_list); + vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); #ifdef CONFIG_KVM /* @@ -3454,7 +3454,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) continue; } - QLIST_FOREACH(group, &group_list, next) { + QLIST_FOREACH(group, &vfio_group_list, next) { if (group->groupid == devices[i].group_id) { break; } @@ -3501,7 +3501,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) /* Determine how many group fds need to be passed */ count = 0; - QLIST_FOREACH(group, &group_list, next) { + QLIST_FOREACH(group, &vfio_group_list, next) { for (i = 0; i < info->count; i++) { if (group->groupid == devices[i].group_id) { count++; @@ -3515,7 +3515,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) fds = &reset->group_fds[0]; /* Fill in group fds */ - QLIST_FOREACH(group, &group_list, next) { + QLIST_FOREACH(group, &vfio_group_list, next) { for (i = 0; i < info->count; i++) { if (group->groupid == devices[i].group_id) { fds[reset->count++] = group->fd; @@ -3550,7 +3550,7 @@ out: continue; } - QLIST_FOREACH(group, &group_list, next) { + QLIST_FOREACH(group, &vfio_group_list, next) { if (group->groupid == devices[i].group_id) { break; } @@ -3624,13 +3624,13 @@ static void vfio_reset_handler(void *opaque) VFIOGroup *group; VFIODevice *vbasedev; - QLIST_FOREACH(group, &group_list, next) { + QLIST_FOREACH(group, &vfio_group_list, next) { QLIST_FOREACH(vbasedev, &group->device_list, next) { vbasedev->ops->vfio_compute_needs_reset(vbasedev); } } - QLIST_FOREACH(group, &group_list, next) { + QLIST_FOREACH(group, &vfio_group_list, next) { QLIST_FOREACH(vbasedev, &group->device_list, next) { if (vbasedev->needs_reset) { vbasedev->ops->vfio_hot_reset_multi(vbasedev); @@ -3879,7 +3879,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as) char path[32]; struct vfio_group_status status = { .argsz = sizeof(status) }; - QLIST_FOREACH(group, &group_list, next) { + QLIST_FOREACH(group, &vfio_group_list, next) { if (group->groupid == groupid) { /* Found it. Now is it already in the right context? */ if (group->container->space->as == as) { @@ -3921,11 +3921,11 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as) goto close_fd_exit; } - if (QLIST_EMPTY(&group_list)) { + if (QLIST_EMPTY(&vfio_group_list)) { qemu_register_reset(vfio_reset_handler, NULL); } - QLIST_INSERT_HEAD(&group_list, group, next); + QLIST_INSERT_HEAD(&vfio_group_list, group, next); vfio_kvm_device_add_group(group); @@ -3953,7 +3953,7 @@ static void vfio_put_group(VFIOGroup *group) close(group->fd); g_free(group); - if (QLIST_EMPTY(&group_list)) { + if (QLIST_EMPTY(&vfio_group_list)) { qemu_unregister_reset(vfio_reset_handler, NULL); } } From df92ee444884ba66b5cc95e3ff3d5579f89ed4aa Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 22 Dec 2014 09:54:49 -0700 Subject: [PATCH 12/14] hw/vfio/pci: use name field in format strings Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 213 ++++++++++++++------------------------------------ trace-events | 109 ++++++++++++++------------ 2 files changed, 116 insertions(+), 206 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 48387bc556..423d9bb9da 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -386,9 +386,7 @@ static void vfio_intx_interrupt(void *opaque) return; } - trace_vfio_intx_interrupt(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - 'A' + vdev->intx.pin); + trace_vfio_intx_interrupt(vdev->vbasedev.name, 'A' + vdev->intx.pin); vdev->intx.pending = true; pci_irq_assert(&vdev->pdev); @@ -407,8 +405,7 @@ static void vfio_eoi(VFIODevice *vbasedev) return; } - trace_vfio_eoi(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_eoi(vbasedev->name); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -477,8 +474,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev) vdev->intx.kvm_accel = true; - trace_vfio_enable_intx_kvm(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_enable_intx_kvm(vdev->vbasedev.name); return; @@ -530,8 +526,7 @@ static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev) /* If we've missed an event, let it re-fire through QEMU */ vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); - trace_vfio_disable_intx_kvm(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_disable_intx_kvm(vdev->vbasedev.name); #endif } @@ -550,8 +545,7 @@ static void vfio_update_irq(PCIDevice *pdev) return; /* Nothing changed */ } - trace_vfio_update_irq(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, + trace_vfio_update_irq(vdev->vbasedev.name, vdev->intx.route.irq, route.irq); vfio_disable_intx_kvm(vdev); @@ -627,8 +621,7 @@ static int vfio_enable_intx(VFIOPCIDevice *vdev) vdev->interrupt = VFIO_INT_INTx; - trace_vfio_enable_intx(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_enable_intx(vdev->vbasedev.name); return 0; } @@ -650,8 +643,7 @@ static void vfio_disable_intx(VFIOPCIDevice *vdev) vdev->interrupt = VFIO_INT_NONE; - trace_vfio_disable_intx(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_disable_intx(vdev->vbasedev.name); } /* @@ -678,9 +670,7 @@ static void vfio_msi_interrupt(void *opaque) abort(); } - trace_vfio_msi_interrupt(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - nr, msg.address, msg.data); + trace_vfio_msi_interrupt(vbasedev->name, nr, msg.address, msg.data); #endif if (vdev->interrupt == VFIO_INT_MSIX) { @@ -787,9 +777,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, VFIOMSIVector *vector; int ret; - trace_vfio_msix_vector_do_use(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - nr); + trace_vfio_msix_vector_do_use(vdev->vbasedev.name, nr); vector = &vdev->msi_vectors[nr]; @@ -875,9 +863,7 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; - trace_vfio_msix_vector_release(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - nr); + trace_vfio_msix_vector_release(vdev->vbasedev.name, nr); /* * There are still old guests that mask and unmask vectors on every @@ -940,8 +926,7 @@ static void vfio_enable_msix(VFIOPCIDevice *vdev) error_report("vfio: msix_set_vector_notifiers failed"); } - trace_vfio_enable_msix(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_enable_msix(vdev->vbasedev.name); } static void vfio_enable_msi(VFIOPCIDevice *vdev) @@ -1017,9 +1002,7 @@ retry: return; } - trace_vfio_enable_msi(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - vdev->nr_vectors); + trace_vfio_enable_msi(vdev->vbasedev.name, vdev->nr_vectors); } static void vfio_disable_msi_common(VFIOPCIDevice *vdev) @@ -1069,8 +1052,7 @@ static void vfio_disable_msix(VFIOPCIDevice *vdev) vfio_disable_msi_common(vdev); - trace_vfio_disable_msix(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_disable_msix(vdev->vbasedev.name); } static void vfio_disable_msi(VFIOPCIDevice *vdev) @@ -1078,8 +1060,7 @@ static void vfio_disable_msi(VFIOPCIDevice *vdev) vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSI_IRQ_INDEX); vfio_disable_msi_common(vdev); - trace_vfio_disable_msi(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_disable_msi(vdev->vbasedev.name); } static void vfio_update_msi(VFIOPCIDevice *vdev) @@ -1213,9 +1194,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) return; } - trace_vfio_pci_load_rom(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - (unsigned long)reg_info.size, + trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info.size, (unsigned long)reg_info.offset, (unsigned long)reg_info.flags); @@ -1225,9 +1204,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) if (!vdev->rom_size) { vdev->rom_read_failed = true; error_report("vfio-pci: Cannot read device rom at " - "%04x:%02x:%02x.%x", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + "%s", vdev->vbasedev.name); error_printf("Device option ROM contents are probably invalid " "(check dmesg).\nSkip option ROM probe with rombar=0, " "or load from file with romfile=\n"); @@ -1289,9 +1266,7 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) break; } - trace_vfio_rom_read(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - addr, size, data); + trace_vfio_rom_read(vdev->vbasedev.name, addr, size, data); return data; } @@ -1388,9 +1363,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) } } - trace_vfio_pci_size_rom(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - size); + trace_vfio_pci_size_rom(vdev->vbasedev.name, size); snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom", vdev->host.domain, vdev->host.bus, vdev->host.slot, @@ -1524,10 +1497,7 @@ static uint64_t vfio_generic_window_quirk_read(void *opaque, quirk->data.address_val + offset, size); trace_vfio_generic_window_quirk_read(memory_region_name(&quirk->mem), - vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function, + vdev->vbasedev.name, quirk->data.bar, addr, size, data); } else { @@ -1575,14 +1545,10 @@ static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, vfio_pci_write_config(&vdev->pdev, quirk->data.address_val + offset, data, size); - trace_vfio_generic_window_quirk_write(memory_region_name(&quirk->mem), - vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function, - quirk->data.bar, - addr, data, size); + vdev->vbasedev.name, + quirk->data.bar, + addr, data, size); return; } @@ -1616,11 +1582,7 @@ static uint64_t vfio_generic_quirk_read(void *opaque, data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); trace_vfio_generic_quirk_read(memory_region_name(&quirk->mem), - vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function, - quirk->data.bar, + vdev->vbasedev.name, quirk->data.bar, addr + base, size, data); } else { data = vfio_region_read(&vdev->bars[quirk->data.bar].region, @@ -1649,11 +1611,7 @@ static void vfio_generic_quirk_write(void *opaque, hwaddr addr, vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); trace_vfio_generic_quirk_write(memory_region_name(&quirk->mem), - vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function, - quirk->data.bar, + vdev->vbasedev.name, quirk->data.bar, addr + base, data, size); } else { vfio_region_write(&vdev->bars[quirk->data.bar].region, @@ -1725,8 +1683,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); - trace_vfio_vga_probe_ati_3c3_quirk(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_vga_probe_ati_3c3_quirk(vdev->vbasedev.name); } /* @@ -1767,10 +1724,7 @@ static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_ati_bar4_window_quirk(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function); + trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name); } #define PCI_VENDOR_ID_REALTEK 0x10ec @@ -1809,8 +1763,7 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, if (quirk->data.flags) { trace_vfio_rtl8168_window_quirk_read_fake( memory_region_name(&quirk->mem), - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + vdev->vbasedev.name); return quirk->data.address_match ^ 0x10000000U; } @@ -1821,9 +1774,7 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, trace_vfio_rtl8168_window_quirk_read_table( memory_region_name(&quirk->mem), - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function - ); + vdev->vbasedev.name); if (!(vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { return 0; @@ -1836,10 +1787,8 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, } } - trace_vfio_rtl8168_window_quirk_read_direct( - memory_region_name(&quirk->mem), - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_rtl8168_window_quirk_read_direct(memory_region_name(&quirk->mem), + vdev->vbasedev.name); return vfio_region_read(&vdev->bars[quirk->data.bar].region, addr + 0x70, size); @@ -1859,8 +1808,7 @@ static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, trace_vfio_rtl8168_window_quirk_write_table( memory_region_name(&quirk->mem), - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + vdev->vbasedev.name); io_mem_write(&vdev->pdev.msix_table_mmio, (hwaddr)(quirk->data.address_match & 0xfff), @@ -1881,8 +1829,7 @@ static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, trace_vfio_rtl8168_window_quirk_write_direct( memory_region_name(&quirk->mem), - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + vdev->vbasedev.name); vfio_region_write(&vdev->bars[quirk->data.bar].region, addr + 0x70, data, size); @@ -1920,10 +1867,7 @@ static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_rtl8168_bar2_window_quirk(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function); + trace_vfio_probe_rtl8168_bar2_window_quirk(vdev->vbasedev.name); } /* * Trap the BAR2 MMIO window to config space as well. @@ -1955,10 +1899,7 @@ static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_ati_bar2_4000_quirk(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function); + trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name); } /* @@ -2091,10 +2032,7 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); - trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function); + trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name); } /* @@ -2183,10 +2121,7 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_nvidia_bar5_window_quirk(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function); + trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name); } static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, @@ -2257,10 +2192,7 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function); + trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name); } /* @@ -2297,10 +2229,7 @@ static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); - trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function); + trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name); } /* @@ -2387,9 +2316,7 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) val = (emu_val & emu_bits) | (phys_val & ~emu_bits); - trace_vfio_pci_read_config(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - addr, len, val); + trace_vfio_pci_read_config(vdev->vbasedev.name, addr, len, val); return val; } @@ -2400,9 +2327,7 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); uint32_t val_le = cpu_to_le32(val); - trace_vfio_pci_write_config(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - addr, val, len); + trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); /* Write everything to VFIO, let it filter out what we can't write */ if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) @@ -2539,7 +2464,7 @@ static void vfio_iommu_map_notify(Notifier *n, void *data) &xlat, &len, iotlb->perm & IOMMU_WO); if (!memory_region_is_ram(mr)) { error_report("iommu map to non memory area %"HWADDR_PRIx"\n", - xlat); + xlat); return; } /* @@ -2784,8 +2709,7 @@ static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos) msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); - trace_vfio_setup_msi(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, pos); + trace_vfio_setup_msi(vdev->vbasedev.name, pos); ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit); if (ret < 0) { @@ -2846,9 +2770,8 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; - trace_vfio_early_setup_msix(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - pos, vdev->msix->table_bar, + trace_vfio_early_setup_msix(vdev->vbasedev.name, pos, + vdev->msix->table_bar, vdev->msix->table_offset, vdev->msix->entries); @@ -3224,8 +3147,7 @@ static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos) uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP); if (cap & PCI_EXP_DEVCAP_FLR) { - trace_vfio_check_pcie_flr(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_check_pcie_flr(vdev->vbasedev.name); vdev->has_flr = true; } } @@ -3235,8 +3157,7 @@ static void vfio_check_pm_reset(VFIOPCIDevice *vdev, uint8_t pos) uint16_t csr = pci_get_word(vdev->pdev.config + pos + PCI_PM_CTRL); if (!(csr & PCI_PM_CTRL_NO_SOFT_RESET)) { - trace_vfio_check_pm_reset(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_check_pm_reset(vdev->vbasedev.name); vdev->has_pm_reset = true; } } @@ -3246,8 +3167,7 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos) uint8_t cap = pci_get_byte(vdev->pdev.config + pos + PCI_AF_CAP); if ((cap & PCI_AF_CAP_TP) && (cap & PCI_AF_CAP_FLR)) { - trace_vfio_check_af_flr(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_check_af_flr(vdev->vbasedev.name); vdev->has_flr = true; } } @@ -3398,9 +3318,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) int ret, i, count; bool multi = false; - trace_vfio_pci_hot_reset(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, - single ? "one" : "multi"); + trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); vfio_pci_pre_reset(vdev); vdev->vbasedev.needs_reset = false; @@ -3431,10 +3349,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) goto out_single; } - trace_vfio_pci_hot_reset_has_dep_devices(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function); + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); /* Verify that we have all the groups required */ for (i = 0; i < info->count; i++) { @@ -3462,10 +3377,9 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) if (!group) { if (!vdev->has_pm_reset) { - error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, " + error_report("vfio: Cannot reset device %s, " "depends on group %d which is not owned.", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, devices[i].group_id); + vdev->vbasedev.name, devices[i].group_id); } ret = -EPERM; goto out; @@ -3480,8 +3394,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) if (vfio_pci_host_match(&host, &tmp->host)) { if (single) { error_report("vfio: found another in-use device " - "%04x:%02x:%02x.%x\n", host.domain, host.bus, - host.slot, host.function); + "%s\n", vbasedev_iter->name); ret = -EINVAL; goto out_single; } @@ -3528,10 +3441,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); g_free(reset); - trace_vfio_pci_hot_reset_result(vdev->host.domain, - vdev->host.bus, - vdev->host.slot, - vdev->host.function, + trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, ret ? "%m" : "Success"); out: @@ -4073,10 +3983,9 @@ static int vfio_populate_device(VFIODevice *vbasedev) } else if (irq_info.count == 1) { vdev->pci_aer = true; } else { - error_report("vfio: %04x:%02x:%02x.%x " + error_report("vfio: %s " "Could not enable error recovery for the device", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + vbasedev->name); } error: @@ -4293,8 +4202,7 @@ static int vfio_initfn(PCIDevice *pdev) return -errno; } - trace_vfio_initfn(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, groupid); + trace_vfio_initfn(vdev->vbasedev.name, groupid); group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev)); if (!group) { @@ -4430,16 +4338,14 @@ static void vfio_pci_reset(DeviceState *dev) PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev); VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); - trace_vfio_pci_reset(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_pci_reset(vdev->vbasedev.name); vfio_pci_pre_reset(vdev); if (vdev->vbasedev.reset_works && (vdev->has_flr || !vdev->has_pm_reset) && !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { - trace_vfio_pci_reset_flr(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_pci_reset_flr(vdev->vbasedev.name); goto post_reset; } @@ -4451,8 +4357,7 @@ static void vfio_pci_reset(DeviceState *dev) /* If nothing else works and the device supports PM reset, use it */ if (vdev->vbasedev.reset_works && vdev->has_pm_reset && !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { - trace_vfio_pci_reset_pm(vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + trace_vfio_pci_reset_pm(vdev->vbasedev.name); goto post_reset; } diff --git a/trace-events b/trace-events index 55a559bbf5..0e7aa53407 100644 --- a/trace-events +++ b/trace-events @@ -1352,68 +1352,72 @@ pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsi pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x" # hw/vfio/vfio-pci.c -vfio_intx_interrupt(int domain, int bus, int slot, int fn, char line) "(%04x:%02x:%02x.%x) Pin %c" -vfio_eoi(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x) EOI" -vfio_enable_intx_kvm(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x) KVM INTx accel enabled" -vfio_disable_intx_kvm(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x) KVM INTx accel disabled" -vfio_update_irq(int domain, int bus, int slot, int fn, int new_irq, int target_irq) " (%04x:%02x:%02x.%x) IRQ moved %d -> %d" -vfio_enable_intx(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" -vfio_disable_intx(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" -vfio_msi_interrupt(int domain, int bus, int slot, int fn, int index, uint64_t addr, int data) "(%04x:%02x:%02x.%x) vector %d 0x%"PRIx64"/0x%x" -vfio_msix_vector_do_use(int domain, int bus, int slot, int fn, int index) "(%04x:%02x:%02x.%x) vector %d used" -vfio_msix_vector_release(int domain, int bus, int slot, int fn, int index) "(%04x:%02x:%02x.%x) vector %d released" -vfio_enable_msix(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" -vfio_enable_msi(int domain, int bus, int slot, int fn, int nr_vectors) "(%04x:%02x:%02x.%x) Enabled %d MSI vectors" -vfio_disable_msix(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" -vfio_disable_msi(int domain, int bus, int slot, int fn) "(%04x:%02x:%02x.%x)" -vfio_pci_load_rom(int domain, int bus, int slot, int fn, unsigned long size, unsigned long offset, unsigned long flags) "Device %04x:%02x:%02x.%x ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" -vfio_rom_read(int domain, int bus, int slot, int fn, uint64_t addr, int size, uint64_t data) "(%04x:%02x:%02x.%x, 0x%"PRIx64", 0x%x) = 0x%"PRIx64 -vfio_pci_size_rom(int domain, int bus, int slot, int fn, int size) "%04x:%02x:%02x.%x ROM size 0x%x" -vfio_vga_write(uint64_t addr, uint64_t data, int size) "(0x%"PRIx64", 0x%"PRIx64", %d)" -vfio_vga_read(uint64_t addr, int size, uint64_t data) "(0x%"PRIx64", %d) = 0x%"PRIx64 -vfio_generic_window_quirk_read(const char * region_name, int domain, int bus, int slot, int fn, int index, uint64_t addr, int size, uint64_t data) "%s read(%04x:%02x:%02x.%x:BAR%d+0x%"PRIx64", %d) = 0x%"PRIx64 -vfio_generic_window_quirk_write(const char * region_name, int domain, int bus, int slot, int fn, int index, uint64_t addr, uint64_t data, int size) "%s write(%04x:%02x:%02x.%x:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d)" -vfio_generic_quirk_read(const char * region_name, int domain, int bus, int slot, int fn, int index, uint64_t addr, int size, uint64_t data) "%s read(%04x:%02x:%02x.%x:BAR%d+0x%"PRIx64", %d) = 0x%"PRIx64 -vfio_generic_quirk_write(const char * region_name, int domain, int bus, int slot, int fn, int index, uint64_t addr, uint64_t data, int size) "%s write(%04x:%02x:%02x.%x:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d)" +vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c" +vfio_eoi(const char *name) " (%s) EOI" +vfio_enable_intx_kvm(const char *name) " (%s) KVM INTx accel enabled" +vfio_disable_intx_kvm(const char *name) " (%s) KVM INTx accel disabled" +vfio_update_irq(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d" +vfio_enable_intx(const char *name) " (%s)" +vfio_disable_intx(const char *name) " (%s)" +vfio_msi_interrupt(const char *name, int index, uint64_t addr, int data) " (%s) vector %d 0x%"PRIx64"/0x%x" +vfio_msix_vector_do_use(const char *name, int index) " (%s) vector %d used" +vfio_msix_vector_release(const char *name, int index) " (%s) vector %d released" +vfio_enable_msix(const char *name) " (%s)" +vfio_enable_msi(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" +vfio_disable_msix(const char *name) " (%s)" +vfio_disable_msi(const char *name) " (%s)" +vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" +vfio_rom_read(const char *name, uint64_t addr, int size, uint64_t data) " (%s, 0x%"PRIx64", 0x%x) = 0x%"PRIx64 +vfio_pci_size_rom(const char *name, int size) "%s ROM size 0x%x" +vfio_vga_write(uint64_t addr, uint64_t data, int size) " (0x%"PRIx64", 0x%"PRIx64", %d)" +vfio_vga_read(uint64_t addr, int size, uint64_t data) " (0x%"PRIx64", %d) = 0x%"PRIx64 +# remove ) = +vfio_generic_window_quirk_read(const char * region_name, const char *name, int index, uint64_t addr, int size, uint64_t data) "%s read(%s:BAR%d+0x%"PRIx64", %d = 0x%"PRIx64 +## remove ) +vfio_generic_window_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d" +# remove ) = +vfio_generic_quirk_read(const char * region_name, const char *name, int index, uint64_t addr, int size, uint64_t data) "%s read(%s:BAR%d+0x%"PRIx64", %d = 0x%"PRIx64 +# remove ) +vfio_generic_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d" vfio_ati_3c3_quirk_read(uint64_t data) " (0x3c3, 1) = 0x%"PRIx64 -vfio_vga_probe_ati_3c3_quirk(int domain, int bus, int slot, int fn) "Enabled ATI/AMD quirk 0x3c3 BAR4 for device %04x:%02x:%02x.%x" -vfio_probe_ati_bar4_window_quirk(int domain, int bus, int slot, int fn) "Enabled ATI/AMD BAR4 window quirk for device %04x:%02x:%02x.%x" -vfio_rtl8168_window_quirk_read_fake(const char *region_name, int domain, int bus, int slot, int fn) "%s fake read(%04x:%02x:%02x.%d)" -vfio_rtl8168_window_quirk_read_table(const char *region_name, int domain, int bus, int slot, int fn) "%s MSI-X table read(%04x:%02x:%02x.%d)" -vfio_rtl8168_window_quirk_read_direct(const char *region_name, int domain, int bus, int slot, int fn) "%s direct read(%04x:%02x:%02x.%d)" -vfio_rtl8168_window_quirk_write_table(const char *region_name, int domain, int bus, int slot, int fn) "%s MSI-X table write(%04x:%02x:%02x.%d)" -vfio_rtl8168_window_quirk_write_direct(const char *region_name, int domain, int bus, int slot, int fn) "%s direct write(%04x:%02x:%02x.%d)" -vfio_probe_rtl8168_bar2_window_quirk(int domain, int bus, int slot, int fn) "Enabled RTL8168 BAR2 window quirk for device %04x:%02x:%02x.%x" -vfio_probe_ati_bar2_4000_quirk(int domain, int bus, int slot, int fn) "Enabled ATI/AMD BAR2 0x4000 quirk for device %04x:%02x:%02x.%x" +vfio_vga_probe_ati_3c3_quirk(const char *name) "Enabled ATI/AMD quirk 0x3c3 BAR4for device %s" +vfio_probe_ati_bar4_window_quirk(const char *name) "Enabled ATI/AMD BAR4 window quirk for device %s" +#issue with ) +vfio_rtl8168_window_quirk_read_fake(const char *region_name, const char *name) "%s fake read(%s" +vfio_rtl8168_window_quirk_read_table(const char *region_name, const char *name) "%s MSI-X table read(%s" +vfio_rtl8168_window_quirk_read_direct(const char *region_name, const char *name) "%s direct read(%s" +vfio_rtl8168_window_quirk_write_table(const char *region_name, const char *name) "%s MSI-X table write(%s" +vfio_rtl8168_window_quirk_write_direct(const char *region_name, const char *name) "%s direct write(%s" +vfio_probe_rtl8168_bar2_window_quirk(const char *name) "Enabled RTL8168 BAR2 window quirk for device %s" +vfio_probe_ati_bar2_4000_quirk(const char *name) "Enabled ATI/AMD BAR2 0x4000 quirk for device %s" vfio_nvidia_3d0_quirk_read(int size, uint64_t data) " (0x3d0, %d) = 0x%"PRIx64 vfio_nvidia_3d0_quirk_write(uint64_t data, int size) " (0x3d0, 0x%"PRIx64", %d)" -vfio_vga_probe_nvidia_3d0_quirk(int domain, int bus, int slot, int fn) "Enabled NVIDIA VGA 0x3d0 quirk for device %04x:%02x:%02x.%x" -vfio_probe_nvidia_bar5_window_quirk(int domain, int bus, int slot, int fn) "Enabled NVIDIA BAR5 window quirk for device %04x:%02x:%02x.%x" -vfio_probe_nvidia_bar0_88000_quirk(int domain, int bus, int slot, int fn) "Enabled NVIDIA BAR0 0x88000 quirk for device %04x:%02x:%02x.%x" +vfio_vga_probe_nvidia_3d0_quirk(const char *name) "Enabled NVIDIA VGA 0x3d0 quirk for device %s" +vfio_probe_nvidia_bar5_window_quirk(const char *name) "Enabled NVIDIA BAR5 window quirk for device %s" +vfio_probe_nvidia_bar0_88000_quirk(const char *name) "Enabled NVIDIA BAR0 0x88000 quirk for device %s" vfio_probe_nvidia_bar0_1800_quirk_id(int id) "Nvidia NV%02x" -vfio_probe_nvidia_bar0_1800_quirk(int domain, int bus, int slot, int fn) "Enabled NVIDIA BAR0 0x1800 quirk for device %04x:%02x:%02x.%x" -vfio_pci_read_config(int domain, int bus, int slot, int fn, int addr, int len, int val) " (%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x" -vfio_pci_write_config(int domain, int bus, int slot, int fn, int addr, int val, int len) " (%04x:%02x:%02x.%x, @0x%x, 0x%x, len=0x%x)" -vfio_setup_msi(int domain, int bus, int slot, int fn, int pos) "%04x:%02x:%02x.%x PCI MSI CAP @0x%x" -vfio_early_setup_msix(int domain, int bus, int slot, int fn, int pos, int table_bar, int offset, int entries) "%04x:%02x:%02x.%x PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d" -vfio_check_pcie_flr(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x Supports FLR via PCIe cap" -vfio_check_pm_reset(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x Supports PM reset" -vfio_check_af_flr(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x Supports FLR via AF cap" -vfio_pci_hot_reset(int domain, int bus, int slot, int fn, const char *type) " (%04x:%02x:%02x.%x) %s" -vfio_pci_hot_reset_has_dep_devices(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x: hot reset dependent devices:" +vfio_probe_nvidia_bar0_1800_quirk(const char *name) "Enabled NVIDIA BAR0 0x1800 quirk for device %s" +vfio_pci_read_config(const char *name, int addr, int len, int val) " (%s, @0x%x, len=0x%x) %x" +vfio_pci_write_config(const char *name, int addr, int val, int len) " (%s, @0x%x, 0x%x, len=0x%x)" +vfio_setup_msi(const char *name, int pos) "%s PCI MSI CAP @0x%x" +vfio_early_setup_msix(const char *name, int pos, int table_bar, int offset, int entries) "%s PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d" +vfio_check_pcie_flr(const char *name) "%s Supports FLR via PCIe cap" +vfio_check_pm_reset(const char *name) "%s Supports PM reset" +vfio_check_af_flr(const char *name) "%s Supports FLR via AF cap" +vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s" +vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:" vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" -vfio_pci_hot_reset_result(int domain, int bus, int slot, int fn, const char *result) "%04x:%02x:%02x.%x hot reset: %s" +vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" vfio_populate_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" -vfio_get_device(const char *name, unsigned flags, unsigned num_regions, unsigned num_irqs) "Device %s flags: %u, regions: %u, irgs: %u" -vfio_initfn(int domain, int bus, int slot, int fn, int group_id) " (%04x:%02x:%02x.%x) group %d" -vfio_pci_reset(int domain, int bus, int slot, int fn) " (%04x:%02x:%02x.%x)" -vfio_pci_reset_flr(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x FLR/VFIO_DEVICE_RESET" -vfio_pci_reset_pm(int domain, int bus, int slot, int fn) "%04x:%02x:%02x.%x PCI PM Reset" +vfio_initfn(const char *name, int group_id) " (%s) group %d" +vfio_pci_reset(const char *name) " (%s)" +vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET" +vfio_pci_reset_pm(const char *name) "%s PCI PM Reset" vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" -vfio_region_read(const char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64 +vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64 vfio_iommu_map_notify(uint64_t iova_start, uint64_t iova_end) "iommu map @ %"PRIx64" - %"PRIx64 vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add %"PRIx64" - %"PRIx64 vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] %"PRIx64" - %"PRIx64 @@ -1422,6 +1426,7 @@ vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del vfio_listener_region_del(uint64_t start, uint64_t end) "region_del %"PRIx64" - %"PRIx64 vfio_disconnect_container(int fd) "close container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" +vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" vfio_put_base_device(int fd) "close vdev->fd=%d" #hw/acpi/memory_hotplug.c From e2c7d025ada047a3f0225f89ff36626d1bd46e47 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 22 Dec 2014 09:54:51 -0700 Subject: [PATCH 13/14] hw/vfio: create common module A new common module is created. It implements all functions that have no device specificity (PCI, Platform). This patch only consists in move (no functional changes) Signed-off-by: Kim Phillips Signed-off-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/Makefile.objs | 1 + hw/vfio/common.c | 959 ++++++++++++++++++++++++++++++ hw/vfio/pci.c | 1028 +-------------------------------- include/hw/vfio/vfio-common.h | 151 +++++ trace-events | 1 + 5 files changed, 1113 insertions(+), 1027 deletions(-) create mode 100644 hw/vfio/common.c create mode 100644 include/hw/vfio/vfio-common.h diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs index 31c7dabb02..e31f30ec09 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs @@ -1,3 +1,4 @@ ifeq ($(CONFIG_LINUX), y) +obj-$(CONFIG_SOFTMMU) += common.o obj-$(CONFIG_PCI) += pci.o endif diff --git a/hw/vfio/common.c b/hw/vfio/common.c new file mode 100644 index 0000000000..4f15c86b80 --- /dev/null +++ b/hw/vfio/common.c @@ -0,0 +1,959 @@ +/* + * generic functions used by VFIO devices + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Alex Williamson + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Based on qemu-kvm device-assignment: + * Adapted for KVM by Qumranet. + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ + +#include +#include +#include + +#include "hw/vfio/vfio-common.h" +#include "hw/vfio/vfio.h" +#include "exec/address-spaces.h" +#include "exec/memory.h" +#include "hw/hw.h" +#include "qemu/error-report.h" +#include "sysemu/kvm.h" +#include "trace.h" + +struct vfio_group_head vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_address_spaces); +struct vfio_as_head vfio_address_spaces = + QLIST_HEAD_INITIALIZER(vfio_address_spaces); + +#ifdef CONFIG_KVM +/* + * We have a single VFIO pseudo device per KVM VM. Once created it lives + * for the life of the VM. Closing the file descriptor only drops our + * reference to it and the device's reference to kvm. Therefore once + * initialized, this file descriptor is only released on QEMU exit and + * we'll re-use it should another vfio device be attached before then. + */ +static int vfio_kvm_device_fd = -1; +#endif + +/* + * Common VFIO interrupt disable + */ +void vfio_disable_irqindex(VFIODevice *vbasedev, int index) +{ + struct vfio_irq_set irq_set = { + .argsz = sizeof(irq_set), + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, + .index = index, + .start = 0, + .count = 0, + }; + + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +} + +void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) +{ + struct vfio_irq_set irq_set = { + .argsz = sizeof(irq_set), + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, + .index = index, + .start = 0, + .count = 1, + }; + + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +} + +void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) +{ + struct vfio_irq_set irq_set = { + .argsz = sizeof(irq_set), + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, + .index = index, + .start = 0, + .count = 1, + }; + + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +} + +/* + * IO Port/MMIO - Beware of the endians, VFIO is always little endian + */ +void vfio_region_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIORegion *region = opaque; + VFIODevice *vbasedev = region->vbasedev; + union { + uint8_t byte; + uint16_t word; + uint32_t dword; + uint64_t qword; + } buf; + + switch (size) { + case 1: + buf.byte = data; + break; + case 2: + buf.word = cpu_to_le16(data); + break; + case 4: + buf.dword = cpu_to_le32(data); + break; + default: + hw_error("vfio: unsupported write size, %d bytes", size); + break; + } + + if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 + ",%d) failed: %m", + __func__, vbasedev->name, region->nr, + addr, data, size); + } + + trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); + + /* + * A read or write to a BAR always signals an INTx EOI. This will + * do nothing if not pending (including not in INTx mode). We assume + * that a BAR access is in response to an interrupt and that BAR + * accesses will service the interrupt. Unfortunately, we don't know + * which access will service the interrupt, so we're potentially + * getting quite a few host interrupts per guest interrupt. + */ + vbasedev->ops->vfio_eoi(vbasedev); +} + +uint64_t vfio_region_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIORegion *region = opaque; + VFIODevice *vbasedev = region->vbasedev; + union { + uint8_t byte; + uint16_t word; + uint32_t dword; + uint64_t qword; + } buf; + uint64_t data = 0; + + if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", + __func__, vbasedev->name, region->nr, + addr, size); + return (uint64_t)-1; + } + switch (size) { + case 1: + data = buf.byte; + break; + case 2: + data = le16_to_cpu(buf.word); + break; + case 4: + data = le32_to_cpu(buf.dword); + break; + default: + hw_error("vfio: unsupported read size, %d bytes", size); + break; + } + + trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data); + + /* Same as write above */ + vbasedev->ops->vfio_eoi(vbasedev); + + return data; +} + +const MemoryRegionOps vfio_region_ops = { + .read = vfio_region_read, + .write = vfio_region_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +/* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +static int vfio_dma_unmap(VFIOContainer *container, + hwaddr iova, ram_addr_t size) +{ + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, + .iova = iova, + .size = size, + }; + + if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + error_report("VFIO_UNMAP_DMA: %d\n", -errno); + return -errno; + } + + return 0; +} + +static int vfio_dma_map(VFIOContainer *container, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) +{ + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, + .vaddr = (__u64)(uintptr_t)vaddr, + .iova = iova, + .size = size, + }; + + if (!readonly) { + map.flags |= VFIO_DMA_MAP_FLAG_WRITE; + } + + /* + * Try the mapping, if it fails with EBUSY, unmap the region and try + * again. This shouldn't be necessary, but we sometimes see it in + * the the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || + (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { + return 0; + } + + error_report("VFIO_MAP_DMA: %d\n", -errno); + return -errno; +} + +static bool vfio_listener_skipped_section(MemoryRegionSection *section) +{ + return (!memory_region_is_ram(section->mr) && + !memory_region_is_iommu(section->mr)) || + /* + * Sizing an enabled 64-bit BAR can cause spurious mappings to + * addresses in the upper part of the 64-bit address space. These + * are never accessed by the CPU and beyond the address width of + * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width. + */ + section->offset_within_address_space & (1ULL << 63); +} + +static void vfio_iommu_map_notify(Notifier *n, void *data) +{ + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); + VFIOContainer *container = giommu->container; + IOMMUTLBEntry *iotlb = data; + MemoryRegion *mr; + hwaddr xlat; + hwaddr len = iotlb->addr_mask + 1; + void *vaddr; + int ret; + + trace_vfio_iommu_map_notify(iotlb->iova, + iotlb->iova + iotlb->addr_mask); + + /* + * The IOMMU TLB entry we have just covers translation through + * this IOMMU to its immediate target. We need to translate + * it the rest of the way through to memory. + */ + mr = address_space_translate(&address_space_memory, + iotlb->translated_addr, + &xlat, &len, iotlb->perm & IOMMU_WO); + if (!memory_region_is_ram(mr)) { + error_report("iommu map to non memory area %"HWADDR_PRIx"\n", + xlat); + return; + } + /* + * Translation truncates length to the IOMMU page size, + * check that it did not truncate too much. + */ + if (len & iotlb->addr_mask) { + error_report("iommu has granularity incompatible with target AS\n"); + return; + } + + if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { + vaddr = memory_region_get_ram_ptr(mr) + xlat; + ret = vfio_dma_map(container, iotlb->iova, + iotlb->addr_mask + 1, vaddr, + !(iotlb->perm & IOMMU_WO) || mr->readonly); + if (ret) { + error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%m)", + container, iotlb->iova, + iotlb->addr_mask + 1, vaddr, ret); + } + } else { + ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", + container, iotlb->iova, + iotlb->addr_mask + 1, ret); + } + } +} + +static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + hwaddr iova, end; + Int128 llend; + void *vaddr; + int ret; + + if (vfio_listener_skipped_section(section)) { + trace_vfio_listener_region_add_skip( + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(int128_sub(section->size, int128_one()))); + return; + } + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { + error_report("%s received unaligned region", __func__); + return; + } + + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + + if (int128_ge(int128_make64(iova), llend)) { + return; + } + + memory_region_ref(section->mr); + + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + + trace_vfio_listener_region_add_iommu(iova, + int128_get64(int128_sub(llend, int128_one()))); + /* + * FIXME: We should do some checking to see if the + * capabilities of the host VFIO IOMMU are adequate to model + * the guest IOMMU + * + * FIXME: For VFIO iommu types which have KVM acceleration to + * avoid bouncing all map/unmaps through qemu this way, this + * would be the right place to wire that up (tell the KVM + * device emulation the VFIO iommu handles to use). + */ + /* + * This assumes that the guest IOMMU is empty of + * mappings at this point. + * + * One way of doing this is: + * 1. Avoid sharing IOMMUs between emulated devices or different + * IOMMU groups. + * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if + * there are some mappings in IOMMU. + * + * VFIO on SPAPR does that. Other IOMMU models may do that different, + * they must make sure there are no existing mappings or + * loop through existing mappings to map them into VFIO. + */ + giommu = g_malloc0(sizeof(*giommu)); + giommu->iommu = section->mr; + giommu->container = container; + giommu->n.notify = vfio_iommu_map_notify; + QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); + memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); + + return; + } + + /* Here we assume that memory_region_is_ram(section->mr)==true */ + + end = int128_get64(llend); + vaddr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region + + (iova - section->offset_within_address_space); + + trace_vfio_listener_region_add_ram(iova, end - 1, vaddr); + + ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly); + if (ret) { + error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%m)", + container, iova, end - iova, vaddr, ret); + + /* + * On the initfn path, store the first error in the container so we + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ + if (!container->iommu_data.type1.initialized) { + if (!container->iommu_data.type1.error) { + container->iommu_data.type1.error = ret; + } + } else { + hw_error("vfio: DMA mapping failed, unable to continue"); + } + } +} + +static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + iommu_data.type1.listener); + hwaddr iova, end; + int ret; + + if (vfio_listener_skipped_section(section)) { + trace_vfio_listener_region_del_skip( + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(int128_sub(section->size, int128_one()))); + return; + } + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { + error_report("%s received unaligned region", __func__); + return; + } + + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + + QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { + if (giommu->iommu == section->mr) { + memory_region_unregister_iommu_notifier(&giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + break; + } + } + + /* + * FIXME: We assume the one big unmap below is adequate to + * remove any individual page mappings in the IOMMU which + * might have been copied into VFIO. This works for a page table + * based IOMMU where a big unmap flattens a large range of IO-PTEs. + * That may not be true for all IOMMU types. + */ + } + + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + end = (section->offset_within_address_space + int128_get64(section->size)) & + TARGET_PAGE_MASK; + + if (iova >= end) { + return; + } + + trace_vfio_listener_region_del(iova, end - 1); + + ret = vfio_dma_unmap(container, iova, end - iova); + memory_region_unref(section->mr); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", + container, iova, end - iova, ret); + } +} + +const MemoryListener vfio_memory_listener = { + .region_add = vfio_listener_region_add, + .region_del = vfio_listener_region_del, +}; + +void vfio_listener_release(VFIOContainer *container) +{ + memory_listener_unregister(&container->iommu_data.type1.listener); +} + +int vfio_mmap_region(Object *obj, VFIORegion *region, + MemoryRegion *mem, MemoryRegion *submem, + void **map, size_t size, off_t offset, + const char *name) +{ + int ret = 0; + VFIODevice *vbasedev = region->vbasedev; + + if (VFIO_ALLOW_MMAP && size && region->flags & + VFIO_REGION_INFO_FLAG_MMAP) { + int prot = 0; + + if (region->flags & VFIO_REGION_INFO_FLAG_READ) { + prot |= PROT_READ; + } + + if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { + prot |= PROT_WRITE; + } + + *map = mmap(NULL, size, prot, MAP_SHARED, + vbasedev->fd, + region->fd_offset + offset); + if (*map == MAP_FAILED) { + *map = NULL; + ret = -errno; + goto empty_region; + } + + memory_region_init_ram_ptr(submem, obj, name, size, *map); + memory_region_set_skip_dump(submem); + } else { +empty_region: + /* Create a zero sized sub-region to make cleanup easy. */ + memory_region_init(submem, obj, name, 0); + } + + memory_region_add_subregion(mem, offset, submem); + + return ret; +} + +void vfio_reset_handler(void *opaque) +{ + VFIOGroup *group; + VFIODevice *vbasedev; + + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev, &group->device_list, next) { + vbasedev->ops->vfio_compute_needs_reset(vbasedev); + } + } + + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (vbasedev->needs_reset) { + vbasedev->ops->vfio_hot_reset_multi(vbasedev); + } + } + } +} + +static void vfio_kvm_device_add_group(VFIOGroup *group) +{ +#ifdef CONFIG_KVM + struct kvm_device_attr attr = { + .group = KVM_DEV_VFIO_GROUP, + .attr = KVM_DEV_VFIO_GROUP_ADD, + .addr = (uint64_t)(unsigned long)&group->fd, + }; + + if (!kvm_enabled()) { + return; + } + + if (vfio_kvm_device_fd < 0) { + struct kvm_create_device cd = { + .type = KVM_DEV_TYPE_VFIO, + }; + + if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) { + error_report("KVM_CREATE_DEVICE: %m\n"); + return; + } + + vfio_kvm_device_fd = cd.fd; + } + + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { + error_report("Failed to add group %d to KVM VFIO device: %m", + group->groupid); + } +#endif +} + +static void vfio_kvm_device_del_group(VFIOGroup *group) +{ +#ifdef CONFIG_KVM + struct kvm_device_attr attr = { + .group = KVM_DEV_VFIO_GROUP, + .attr = KVM_DEV_VFIO_GROUP_DEL, + .addr = (uint64_t)(unsigned long)&group->fd, + }; + + if (vfio_kvm_device_fd < 0) { + return; + } + + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { + error_report("Failed to remove group %d from KVM VFIO device: %m", + group->groupid); + } +#endif +} + +static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) +{ + VFIOAddressSpace *space; + + QLIST_FOREACH(space, &vfio_address_spaces, list) { + if (space->as == as) { + return space; + } + } + + /* No suitable VFIOAddressSpace, create a new one */ + space = g_malloc0(sizeof(*space)); + space->as = as; + QLIST_INIT(&space->containers); + + QLIST_INSERT_HEAD(&vfio_address_spaces, space, list); + + return space; +} + +static void vfio_put_address_space(VFIOAddressSpace *space) +{ + if (QLIST_EMPTY(&space->containers)) { + QLIST_REMOVE(space, list); + g_free(space); + } +} + +static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) +{ + VFIOContainer *container; + int ret, fd; + VFIOAddressSpace *space; + + space = vfio_get_address_space(as); + + QLIST_FOREACH(container, &space->containers, next) { + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + return 0; + } + } + + fd = qemu_open("/dev/vfio/vfio", O_RDWR); + if (fd < 0) { + error_report("vfio: failed to open /dev/vfio/vfio: %m"); + ret = -errno; + goto put_space_exit; + } + + ret = ioctl(fd, VFIO_GET_API_VERSION); + if (ret != VFIO_API_VERSION) { + error_report("vfio: supported vfio version: %d, " + "reported version: %d", VFIO_API_VERSION, ret); + ret = -EINVAL; + goto close_fd_exit; + } + + container = g_malloc0(sizeof(*container)); + container->space = space; + container->fd = fd; + if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) { + ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); + if (ret) { + error_report("vfio: failed to set group container: %m"); + ret = -errno; + goto free_container_exit; + } + + ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); + if (ret) { + error_report("vfio: failed to set iommu for container: %m"); + ret = -errno; + goto free_container_exit; + } + + container->iommu_data.type1.listener = vfio_memory_listener; + container->iommu_data.release = vfio_listener_release; + + memory_listener_register(&container->iommu_data.type1.listener, + container->space->as); + + if (container->iommu_data.type1.error) { + ret = container->iommu_data.type1.error; + error_report("vfio: memory listener initialization failed for container"); + goto listener_release_exit; + } + + container->iommu_data.type1.initialized = true; + + } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) { + ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); + if (ret) { + error_report("vfio: failed to set group container: %m"); + ret = -errno; + goto free_container_exit; + } + ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU); + if (ret) { + error_report("vfio: failed to set iommu for container: %m"); + ret = -errno; + goto free_container_exit; + } + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called + * when container fd is closed so we do not call it explicitly + * in this file. + */ + ret = ioctl(fd, VFIO_IOMMU_ENABLE); + if (ret) { + error_report("vfio: failed to enable container: %m"); + ret = -errno; + goto free_container_exit; + } + + container->iommu_data.type1.listener = vfio_memory_listener; + container->iommu_data.release = vfio_listener_release; + + memory_listener_register(&container->iommu_data.type1.listener, + container->space->as); + + } else { + error_report("vfio: No available IOMMU models"); + ret = -EINVAL; + goto free_container_exit; + } + + QLIST_INIT(&container->group_list); + QLIST_INSERT_HEAD(&space->containers, container, next); + + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + + return 0; +listener_release_exit: + vfio_listener_release(container); + +free_container_exit: + g_free(container); + +close_fd_exit: + close(fd); + +put_space_exit: + vfio_put_address_space(space); + + return ret; +} + +static void vfio_disconnect_container(VFIOGroup *group) +{ + VFIOContainer *container = group->container; + + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { + error_report("vfio: error disconnecting group %d from container", + group->groupid); + } + + QLIST_REMOVE(group, container_next); + group->container = NULL; + + if (QLIST_EMPTY(&container->group_list)) { + VFIOAddressSpace *space = container->space; + + if (container->iommu_data.release) { + container->iommu_data.release(container); + } + QLIST_REMOVE(container, next); + trace_vfio_disconnect_container(container->fd); + close(container->fd); + g_free(container); + + vfio_put_address_space(space); + } +} + +VFIOGroup *vfio_get_group(int groupid, AddressSpace *as) +{ + VFIOGroup *group; + char path[32]; + struct vfio_group_status status = { .argsz = sizeof(status) }; + + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == groupid) { + /* Found it. Now is it already in the right context? */ + if (group->container->space->as == as) { + return group; + } else { + error_report("vfio: group %d used in multiple address spaces", + group->groupid); + return NULL; + } + } + } + + group = g_malloc0(sizeof(*group)); + + snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); + group->fd = qemu_open(path, O_RDWR); + if (group->fd < 0) { + error_report("vfio: error opening %s: %m", path); + goto free_group_exit; + } + + if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { + error_report("vfio: error getting group status: %m"); + goto close_fd_exit; + } + + if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { + error_report("vfio: error, group %d is not viable, please ensure " + "all devices within the iommu_group are bound to their " + "vfio bus driver.", groupid); + goto close_fd_exit; + } + + group->groupid = groupid; + QLIST_INIT(&group->device_list); + + if (vfio_connect_container(group, as)) { + error_report("vfio: failed to setup container for group %d", groupid); + goto close_fd_exit; + } + + if (QLIST_EMPTY(&vfio_group_list)) { + qemu_register_reset(vfio_reset_handler, NULL); + } + + QLIST_INSERT_HEAD(&vfio_group_list, group, next); + + vfio_kvm_device_add_group(group); + + return group; + +close_fd_exit: + close(group->fd); + +free_group_exit: + g_free(group); + + return NULL; +} + +void vfio_put_group(VFIOGroup *group) +{ + if (!QLIST_EMPTY(&group->device_list)) { + return; + } + + vfio_kvm_device_del_group(group); + vfio_disconnect_container(group); + QLIST_REMOVE(group, next); + trace_vfio_put_group(group->fd); + close(group->fd); + g_free(group); + + if (QLIST_EMPTY(&vfio_group_list)) { + qemu_unregister_reset(vfio_reset_handler, NULL); + } +} + +int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev) +{ + struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; + int ret; + + ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + if (ret < 0) { + error_report("vfio: error getting device %s from group %d: %m", + name, group->groupid); + error_printf("Verify all devices in group %d are bound to vfio- " + "or pci-stub and not already in use\n", group->groupid); + return ret; + } + + vbasedev->fd = ret; + vbasedev->group = group; + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_INFO, &dev_info); + if (ret) { + error_report("vfio: error getting device info: %m"); + goto error; + } + + vbasedev->num_irqs = dev_info.num_irqs; + vbasedev->num_regions = dev_info.num_regions; + vbasedev->flags = dev_info.flags; + + trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions, + dev_info.num_irqs); + + vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); + + ret = vbasedev->ops->vfio_populate_device(vbasedev); + +error: + if (ret) { + vfio_put_base_device(vbasedev); + } + return ret; +} + +void vfio_put_base_device(VFIODevice *vbasedev) +{ + QLIST_REMOVE(vbasedev, next); + vbasedev->group = NULL; + trace_vfio_put_base_device(vbasedev->fd); + close(vbasedev->fd); +} + +static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, + int req, void *param) +{ + VFIOGroup *group; + VFIOContainer *container; + int ret = -1; + + group = vfio_get_group(groupid, as); + if (!group) { + error_report("vfio: group %d not registered", groupid); + return ret; + } + + container = group->container; + if (group->container) { + ret = ioctl(container->fd, req, param); + if (ret < 0) { + error_report("vfio: failed to ioctl container: ret=%d, %s", + ret, strerror(errno)); + } + } + + vfio_put_group(group); + + return ret; +} + +int vfio_container_ioctl(AddressSpace *as, int32_t groupid, + int req, void *param) +{ + /* We allow only certain ioctls to the container */ + switch (req) { + case VFIO_CHECK_EXTENSION: + case VFIO_IOMMU_SPAPR_TCE_GET_INFO: + break; + default: + /* Return an error on unknown requests */ + error_report("vfio: unsupported ioctl %X", req); + return -1; + } + + return vfio_container_do_ioctl(as, groupid, req, param); +} diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 423d9bb9da..e380959dab 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -41,16 +41,7 @@ #include "sysemu/sysemu.h" #include "trace.h" #include "hw/vfio/vfio.h" - -/* Extra debugging, trap acceleration paths for more logging */ -#define VFIO_ALLOW_MMAP 1 -#define VFIO_ALLOW_KVM_INTX 1 -#define VFIO_ALLOW_KVM_MSI 1 -#define VFIO_ALLOW_KVM_MSIX 1 - -enum { - VFIO_DEVICE_TYPE_PCI = 0, -}; +#include "hw/vfio/vfio-common.h" struct VFIOPCIDevice; @@ -77,17 +68,6 @@ typedef struct VFIOQuirk { } data; } VFIOQuirk; -typedef struct VFIORegion { - struct VFIODevice *vbasedev; - off_t fd_offset; /* offset of region within device fd */ - MemoryRegion mem; /* slow, read/write access */ - MemoryRegion mmap_mem; /* direct mapped access */ - void *mmap; - size_t size; - uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ - uint8_t nr; /* cache the region number for debug */ -} VFIORegion; - typedef struct VFIOBAR { VFIORegion region; bool ioport; @@ -143,45 +123,6 @@ enum { VFIO_INT_MSIX = 3, }; -typedef struct VFIOAddressSpace { - AddressSpace *as; - QLIST_HEAD(, VFIOContainer) containers; - QLIST_ENTRY(VFIOAddressSpace) list; -} VFIOAddressSpace; - -static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces = - QLIST_HEAD_INITIALIZER(vfio_address_spaces); - -struct VFIOGroup; - -typedef struct VFIOType1 { - MemoryListener listener; - int error; - bool initialized; -} VFIOType1; - -typedef struct VFIOContainer { - VFIOAddressSpace *space; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - struct { - /* enable abstraction to support various iommu backends */ - union { - VFIOType1 type1; - }; - void (*release)(struct VFIOContainer *); - } iommu_data; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_ENTRY(VFIOContainer) next; -} VFIOContainer; - -typedef struct VFIOGuestIOMMU { - VFIOContainer *container; - MemoryRegion *iommu; - Notifier n; - QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; -} VFIOGuestIOMMU; - /* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */ typedef struct VFIOMSIXInfo { uint8_t table_bar; @@ -193,29 +134,6 @@ typedef struct VFIOMSIXInfo { void *mmap; } VFIOMSIXInfo; -typedef struct VFIODeviceOps VFIODeviceOps; - -typedef struct VFIODevice { - QLIST_ENTRY(VFIODevice) next; - struct VFIOGroup *group; - char *name; - int fd; - int type; - bool reset_works; - bool needs_reset; - VFIODeviceOps *ops; - unsigned int num_irqs; - unsigned int num_regions; - unsigned int flags; -} VFIODevice; - -struct VFIODeviceOps { - void (*vfio_compute_needs_reset)(VFIODevice *vdev); - int (*vfio_hot_reset_multi)(VFIODevice *vdev); - void (*vfio_eoi)(VFIODevice *vdev); - int (*vfio_populate_device)(VFIODevice *vdev); -}; - typedef struct VFIOPCIDevice { PCIDevice pdev; VFIODevice vbasedev; @@ -247,15 +165,6 @@ typedef struct VFIOPCIDevice { bool rom_read_failed; } VFIOPCIDevice; -typedef struct VFIOGroup { - int fd; - int groupid; - VFIOContainer *container; - QLIST_HEAD(, VFIODevice) device_list; - QLIST_ENTRY(VFIOGroup) next; - QLIST_ENTRY(VFIOGroup) container_next; -} VFIOGroup; - typedef struct VFIORomBlacklistEntry { uint16_t vendor_id; uint16_t device_id; @@ -281,75 +190,13 @@ static const VFIORomBlacklistEntry romblacklist[] = { #define MSIX_CAP_LENGTH 12 -static QLIST_HEAD(, VFIOGroup) - vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); - -#ifdef CONFIG_KVM -/* - * We have a single VFIO pseudo device per KVM VM. Once created it lives - * for the life of the VM. Closing the file descriptor only drops our - * reference to it and the device's reference to kvm. Therefore once - * initialized, this file descriptor is only released on QEMU exit and - * we'll re-use it should another vfio device be attached before then. - */ -static int vfio_kvm_device_fd = -1; -#endif - static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -static void vfio_put_base_device(VFIODevice *vbasedev); static int vfio_populate_device(VFIODevice *vbasedev); -/* - * Common VFIO interrupt disable - */ -static void vfio_disable_irqindex(VFIODevice *vbasedev, int index) -{ - struct vfio_irq_set irq_set = { - .argsz = sizeof(irq_set), - .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, - .index = index, - .start = 0, - .count = 0, - }; - - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); -} - -/* - * INTx - */ -static void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) -{ - struct vfio_irq_set irq_set = { - .argsz = sizeof(irq_set), - .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, - .index = index, - .start = 0, - .count = 1, - }; - - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); -} - -#ifdef CONFIG_KVM /* Unused outside of CONFIG_KVM code */ -static void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) -{ - struct vfio_irq_set irq_set = { - .argsz = sizeof(irq_set), - .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, - .index = index, - .start = 0, - .count = 1, - }; - - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); -} -#endif - /* * Disabling BAR mmaping can be slow, but toggling it around INTx can * also be a huge overhead. We try to get the best of both worlds by @@ -1080,105 +927,6 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) } } -/* - * IO Port/MMIO - Beware of the endians, VFIO is always little endian - */ -static void vfio_region_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIORegion *region = opaque; - VFIODevice *vbasedev = region->vbasedev; - union { - uint8_t byte; - uint16_t word; - uint32_t dword; - uint64_t qword; - } buf; - - switch (size) { - case 1: - buf.byte = data; - break; - case 2: - buf.word = cpu_to_le16(data); - break; - case 4: - buf.dword = cpu_to_le32(data); - break; - default: - hw_error("vfio: unsupported write size, %d bytes", size); - break; - } - - if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ",%d) failed: %m", - __func__, vbasedev->name, region->nr, - addr, data, size); - } - - trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); - - /* - * A read or write to a BAR always signals an INTx EOI. This will - * do nothing if not pending (including not in INTx mode). We assume - * that a BAR access is in response to an interrupt and that BAR - * accesses will service the interrupt. Unfortunately, we don't know - * which access will service the interrupt, so we're potentially - * getting quite a few host interrupts per guest interrupt. - */ - vbasedev->ops->vfio_eoi(vbasedev); -} - -static uint64_t vfio_region_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIORegion *region = opaque; - VFIODevice *vbasedev = region->vbasedev; - union { - uint8_t byte; - uint16_t word; - uint32_t dword; - uint64_t qword; - } buf; - uint64_t data = 0; - - if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", - __func__, vbasedev->name, region->nr, - addr, size); - return (uint64_t)-1; - } - - switch (size) { - case 1: - data = buf.byte; - break; - case 2: - data = le16_to_cpu(buf.word); - break; - case 4: - data = le32_to_cpu(buf.dword); - break; - default: - hw_error("vfio: unsupported read size, %d bytes", size); - break; - } - - trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data); - - /* Same as write above */ - vbasedev->ops->vfio_eoi(vbasedev); - - return data; -} - -static const MemoryRegionOps vfio_region_ops = { - .read = vfio_region_read, - .write = vfio_region_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { struct vfio_region_info reg_info = { @@ -2376,305 +2124,6 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, } } -/* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ -static int vfio_dma_unmap(VFIOContainer *container, - hwaddr iova, ram_addr_t size) -{ - struct vfio_iommu_type1_dma_unmap unmap = { - .argsz = sizeof(unmap), - .flags = 0, - .iova = iova, - .size = size, - }; - - if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - error_report("VFIO_UNMAP_DMA: %d\n", -errno); - return -errno; - } - - return 0; -} - -static int vfio_dma_map(VFIOContainer *container, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) -{ - struct vfio_iommu_type1_dma_map map = { - .argsz = sizeof(map), - .flags = VFIO_DMA_MAP_FLAG_READ, - .vaddr = (__u64)(uintptr_t)vaddr, - .iova = iova, - .size = size, - }; - - if (!readonly) { - map.flags |= VFIO_DMA_MAP_FLAG_WRITE; - } - - /* - * Try the mapping, if it fails with EBUSY, unmap the region and try - * again. This shouldn't be necessary, but we sometimes see it in - * the the VGA ROM space. - */ - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || - (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { - return 0; - } - - error_report("VFIO_MAP_DMA: %d\n", -errno); - return -errno; -} - -static bool vfio_listener_skipped_section(MemoryRegionSection *section) -{ - return (!memory_region_is_ram(section->mr) && - !memory_region_is_iommu(section->mr)) || - /* - * Sizing an enabled 64-bit BAR can cause spurious mappings to - * addresses in the upper part of the 64-bit address space. These - * are never accessed by the CPU and beyond the address width of - * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width. - */ - section->offset_within_address_space & (1ULL << 63); -} - -static void vfio_iommu_map_notify(Notifier *n, void *data) -{ - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); - VFIOContainer *container = giommu->container; - IOMMUTLBEntry *iotlb = data; - MemoryRegion *mr; - hwaddr xlat; - hwaddr len = iotlb->addr_mask + 1; - void *vaddr; - int ret; - - trace_vfio_iommu_map_notify(iotlb->iova, - iotlb->iova + iotlb->addr_mask); - - /* - * The IOMMU TLB entry we have just covers translation through - * this IOMMU to its immediate target. We need to translate - * it the rest of the way through to memory. - */ - mr = address_space_translate(&address_space_memory, - iotlb->translated_addr, - &xlat, &len, iotlb->perm & IOMMU_WO); - if (!memory_region_is_ram(mr)) { - error_report("iommu map to non memory area %"HWADDR_PRIx"\n", - xlat); - return; - } - /* - * Translation truncates length to the IOMMU page size, - * check that it did not truncate too much. - */ - if (len & iotlb->addr_mask) { - error_report("iommu has granularity incompatible with target AS\n"); - return; - } - - if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { - vaddr = memory_region_get_ram_ptr(mr) + xlat; - - ret = vfio_dma_map(container, iotlb->iova, - iotlb->addr_mask + 1, vaddr, - !(iotlb->perm & IOMMU_WO) || mr->readonly); - if (ret) { - error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%m)", - container, iotlb->iova, - iotlb->addr_mask + 1, vaddr, ret); - } - } else { - ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1); - if (ret) { - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%m)", - container, iotlb->iova, - iotlb->addr_mask + 1, ret); - } - } -} - -static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.type1.listener); - hwaddr iova, end; - Int128 llend; - void *vaddr; - int ret; - - if (vfio_listener_skipped_section(section)) { - trace_vfio_listener_region_add_skip( - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(int128_sub(section->size, int128_one()))); - return; - } - - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { - error_report("%s received unaligned region", __func__); - return; - } - - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); - llend = int128_make64(section->offset_within_address_space); - llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); - - if (int128_ge(int128_make64(iova), llend)) { - return; - } - - memory_region_ref(section->mr); - - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - - trace_vfio_listener_region_add_iommu(iova, - int128_get64(int128_sub(llend, int128_one()))); - /* - * FIXME: We should do some checking to see if the - * capabilities of the host VFIO IOMMU are adequate to model - * the guest IOMMU - * - * FIXME: For VFIO iommu types which have KVM acceleration to - * avoid bouncing all map/unmaps through qemu this way, this - * would be the right place to wire that up (tell the KVM - * device emulation the VFIO iommu handles to use). - */ - /* - * This assumes that the guest IOMMU is empty of - * mappings at this point. - * - * One way of doing this is: - * 1. Avoid sharing IOMMUs between emulated devices or different - * IOMMU groups. - * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if - * there are some mappings in IOMMU. - * - * VFIO on SPAPR does that. Other IOMMU models may do that different, - * they must make sure there are no existing mappings or - * loop through existing mappings to map them into VFIO. - */ - giommu = g_malloc0(sizeof(*giommu)); - giommu->iommu = section->mr; - giommu->container = container; - giommu->n.notify = vfio_iommu_map_notify; - QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); - memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); - - return; - } - - /* Here we assume that memory_region_is_ram(section->mr)==true */ - - end = int128_get64(llend); - vaddr = memory_region_get_ram_ptr(section->mr) + - section->offset_within_region + - (iova - section->offset_within_address_space); - - trace_vfio_listener_region_add_ram(iova, end - 1, vaddr); - - ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly); - if (ret) { - error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%m)", - container, iova, end - iova, vaddr, ret); - - /* - * On the initfn path, store the first error in the container so we - * can gracefully fail. Runtime, there's not much we can do other - * than throw a hardware error. - */ - if (!container->iommu_data.type1.initialized) { - if (!container->iommu_data.type1.error) { - container->iommu_data.type1.error = ret; - } - } else { - hw_error("vfio: DMA mapping failed, unable to continue"); - } - } -} - -static void vfio_listener_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.type1.listener); - hwaddr iova, end; - int ret; - - if (vfio_listener_skipped_section(section)) { - trace_vfio_listener_region_del_skip( - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(int128_sub(section->size, int128_one()))); - return; - } - - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { - error_report("%s received unaligned region", __func__); - return; - } - - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - - QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { - if (giommu->iommu == section->mr) { - memory_region_unregister_iommu_notifier(&giommu->n); - QLIST_REMOVE(giommu, giommu_next); - g_free(giommu); - break; - } - } - - /* - * FIXME: We assume the one big unmap below is adequate to - * remove any individual page mappings in the IOMMU which - * might have been copied into VFIO. This works for a page table - * based IOMMU where a big unmap flattens a large range of IO-PTEs. - * That may not be true for all IOMMU types. - */ - } - - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); - end = (section->offset_within_address_space + int128_get64(section->size)) & - TARGET_PAGE_MASK; - - if (iova >= end) { - return; - } - - trace_vfio_listener_region_del(iova, end - 1); - - ret = vfio_dma_unmap(container, iova, end - iova); - memory_region_unref(section->mr); - if (ret) { - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%m)", - container, iova, end - iova, ret); - } -} - -static MemoryListener vfio_memory_listener = { - .region_add = vfio_listener_region_add, - .region_del = vfio_listener_region_del, -}; - -static void vfio_listener_release(VFIOContainer *container) -{ - memory_listener_unregister(&container->iommu_data.type1.listener); -} - /* * Interrupt setup */ @@ -2849,47 +2298,6 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) } } -static int vfio_mmap_region(Object *obj, VFIORegion *region, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name) -{ - int ret = 0; - VFIODevice *vbasedev = region->vbasedev; - - if (VFIO_ALLOW_MMAP && size && region->flags & - VFIO_REGION_INFO_FLAG_MMAP) { - int prot = 0; - - if (region->flags & VFIO_REGION_INFO_FLAG_READ) { - prot |= PROT_READ; - } - - if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { - prot |= PROT_WRITE; - } - - *map = mmap(NULL, size, prot, MAP_SHARED, - vbasedev->fd, region->fd_offset + offset); - if (*map == MAP_FAILED) { - *map = NULL; - ret = -errno; - goto empty_region; - } - - memory_region_init_ram_ptr(submem, obj, name, size, *map); - memory_region_set_skip_dump(submem); - } else { -empty_region: - /* Create a zero sized sub-region to make cleanup easy. */ - memory_region_init(submem, obj, name, 0); - } - - memory_region_add_subregion(mem, offset, submem); - - return ret; -} - static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; @@ -3529,345 +2937,6 @@ static VFIODeviceOps vfio_pci_ops = { .vfio_populate_device = vfio_populate_device, }; -static void vfio_reset_handler(void *opaque) -{ - VFIOGroup *group; - VFIODevice *vbasedev; - - QLIST_FOREACH(group, &vfio_group_list, next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - vbasedev->ops->vfio_compute_needs_reset(vbasedev); - } - } - - QLIST_FOREACH(group, &vfio_group_list, next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (vbasedev->needs_reset) { - vbasedev->ops->vfio_hot_reset_multi(vbasedev); - } - } - } -} - -static void vfio_kvm_device_add_group(VFIOGroup *group) -{ -#ifdef CONFIG_KVM - struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_ADD, - .addr = (uint64_t)(unsigned long)&group->fd, - }; - - if (!kvm_enabled()) { - return; - } - - if (vfio_kvm_device_fd < 0) { - struct kvm_create_device cd = { - .type = KVM_DEV_TYPE_VFIO, - }; - - if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) { - error_report("KVM_CREATE_DEVICE: %m\n"); - return; - } - - vfio_kvm_device_fd = cd.fd; - } - - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_report("Failed to add group %d to KVM VFIO device: %m", - group->groupid); - } -#endif -} - -static void vfio_kvm_device_del_group(VFIOGroup *group) -{ -#ifdef CONFIG_KVM - struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_DEL, - .addr = (uint64_t)(unsigned long)&group->fd, - }; - - if (vfio_kvm_device_fd < 0) { - return; - } - - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_report("Failed to remove group %d from KVM VFIO device: %m", - group->groupid); - } -#endif -} - -static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) -{ - VFIOAddressSpace *space; - - QLIST_FOREACH(space, &vfio_address_spaces, list) { - if (space->as == as) { - return space; - } - } - - /* No suitable VFIOAddressSpace, create a new one */ - space = g_malloc0(sizeof(*space)); - space->as = as; - QLIST_INIT(&space->containers); - - QLIST_INSERT_HEAD(&vfio_address_spaces, space, list); - - return space; -} - -static void vfio_put_address_space(VFIOAddressSpace *space) -{ - if (QLIST_EMPTY(&space->containers)) { - QLIST_REMOVE(space, list); - g_free(space); - } -} - -static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) -{ - VFIOContainer *container; - int ret, fd; - VFIOAddressSpace *space; - - space = vfio_get_address_space(as); - - QLIST_FOREACH(container, &space->containers, next) { - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - return 0; - } - } - - fd = qemu_open("/dev/vfio/vfio", O_RDWR); - if (fd < 0) { - error_report("vfio: failed to open /dev/vfio/vfio: %m"); - ret = -errno; - goto put_space_exit; - } - - ret = ioctl(fd, VFIO_GET_API_VERSION); - if (ret != VFIO_API_VERSION) { - error_report("vfio: supported vfio version: %d, " - "reported version: %d", VFIO_API_VERSION, ret); - ret = -EINVAL; - goto close_fd_exit; - } - - container = g_malloc0(sizeof(*container)); - container->space = space; - container->fd = fd; - - if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) { - ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); - if (ret) { - error_report("vfio: failed to set group container: %m"); - ret = -errno; - goto free_container_exit; - } - - ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); - if (ret) { - error_report("vfio: failed to set iommu for container: %m"); - ret = -errno; - goto free_container_exit; - } - - container->iommu_data.type1.listener = vfio_memory_listener; - container->iommu_data.release = vfio_listener_release; - - memory_listener_register(&container->iommu_data.type1.listener, - container->space->as); - - if (container->iommu_data.type1.error) { - ret = container->iommu_data.type1.error; - error_report("vfio: memory listener initialization failed for container"); - goto listener_release_exit; - } - - container->iommu_data.type1.initialized = true; - - } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) { - ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); - if (ret) { - error_report("vfio: failed to set group container: %m"); - ret = -errno; - goto free_container_exit; - } - - ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU); - if (ret) { - error_report("vfio: failed to set iommu for container: %m"); - ret = -errno; - goto free_container_exit; - } - - /* - * The host kernel code implementing VFIO_IOMMU_DISABLE is called - * when container fd is closed so we do not call it explicitly - * in this file. - */ - ret = ioctl(fd, VFIO_IOMMU_ENABLE); - if (ret) { - error_report("vfio: failed to enable container: %m"); - ret = -errno; - goto free_container_exit; - } - - container->iommu_data.type1.listener = vfio_memory_listener; - container->iommu_data.release = vfio_listener_release; - - memory_listener_register(&container->iommu_data.type1.listener, - container->space->as); - - } else { - error_report("vfio: No available IOMMU models"); - ret = -EINVAL; - goto free_container_exit; - } - - QLIST_INIT(&container->group_list); - QLIST_INSERT_HEAD(&space->containers, container, next); - - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - - return 0; - -listener_release_exit: - vfio_listener_release(container); - -free_container_exit: - g_free(container); - -close_fd_exit: - close(fd); - -put_space_exit: - vfio_put_address_space(space); - - return ret; -} - -static void vfio_disconnect_container(VFIOGroup *group) -{ - VFIOContainer *container = group->container; - - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { - error_report("vfio: error disconnecting group %d from container", - group->groupid); - } - - QLIST_REMOVE(group, container_next); - group->container = NULL; - - if (QLIST_EMPTY(&container->group_list)) { - VFIOAddressSpace *space = container->space; - - if (container->iommu_data.release) { - container->iommu_data.release(container); - } - QLIST_REMOVE(container, next); - trace_vfio_disconnect_container(container->fd); - close(container->fd); - g_free(container); - - vfio_put_address_space(space); - } -} - -static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as) -{ - VFIOGroup *group; - char path[32]; - struct vfio_group_status status = { .argsz = sizeof(status) }; - - QLIST_FOREACH(group, &vfio_group_list, next) { - if (group->groupid == groupid) { - /* Found it. Now is it already in the right context? */ - if (group->container->space->as == as) { - return group; - } else { - error_report("vfio: group %d used in multiple address spaces", - group->groupid); - return NULL; - } - } - } - - group = g_malloc0(sizeof(*group)); - - snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); - group->fd = qemu_open(path, O_RDWR); - if (group->fd < 0) { - error_report("vfio: error opening %s: %m", path); - goto free_group_exit; - } - - if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { - error_report("vfio: error getting group status: %m"); - goto close_fd_exit; - } - - if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { - error_report("vfio: error, group %d is not viable, please ensure " - "all devices within the iommu_group are bound to their " - "vfio bus driver.", groupid); - goto close_fd_exit; - } - - group->groupid = groupid; - QLIST_INIT(&group->device_list); - - if (vfio_connect_container(group, as)) { - error_report("vfio: failed to setup container for group %d", groupid); - goto close_fd_exit; - } - - if (QLIST_EMPTY(&vfio_group_list)) { - qemu_register_reset(vfio_reset_handler, NULL); - } - - QLIST_INSERT_HEAD(&vfio_group_list, group, next); - - vfio_kvm_device_add_group(group); - - return group; - -close_fd_exit: - close(group->fd); - -free_group_exit: - g_free(group); - - return NULL; -} - -static void vfio_put_group(VFIOGroup *group) -{ - if (!QLIST_EMPTY(&group->device_list)) { - return; - } - - vfio_kvm_device_del_group(group); - vfio_disconnect_container(group); - QLIST_REMOVE(group, next); - trace_vfio_put_group(group->fd); - close(group->fd); - g_free(group); - - if (QLIST_EMPTY(&vfio_group_list)) { - qemu_unregister_reset(vfio_reset_handler, NULL); - } -} - static int vfio_populate_device(VFIODevice *vbasedev) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); @@ -3992,57 +3061,6 @@ error: return ret; } -static int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev) -{ - struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; - int ret; - - ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); - if (ret < 0) { - error_report("vfio: error getting device %s from group %d: %m", - name, group->groupid); - error_printf("Verify all devices in group %d are bound to vfio- " - "or pci-stub and not already in use\n", group->groupid); - return ret; - } - - vbasedev->fd = ret; - vbasedev->group = group; - QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_INFO, &dev_info); - if (ret) { - error_report("vfio: error getting device info: %m"); - goto error; - } - - vbasedev->num_irqs = dev_info.num_irqs; - vbasedev->num_regions = dev_info.num_regions; - vbasedev->flags = dev_info.flags; - - trace_vfio_get_device(name, dev_info.flags, - dev_info.num_regions, dev_info.num_irqs); - - vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - - ret = vbasedev->ops->vfio_populate_device(vbasedev); - -error: - if (ret) { - vfio_put_base_device(vbasedev); - } - return ret; -} - -void vfio_put_base_device(VFIODevice *vbasedev) -{ - QLIST_REMOVE(vbasedev, next); - vbasedev->group = NULL; - trace_vfio_put_base_device(vbasedev->fd); - close(vbasedev->fd); -} - static void vfio_put_device(VFIOPCIDevice *vdev) { g_free(vdev->vbasedev.name); @@ -4426,47 +3444,3 @@ static void register_vfio_pci_dev_type(void) } type_init(register_vfio_pci_dev_type) - -static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, - int req, void *param) -{ - VFIOGroup *group; - VFIOContainer *container; - int ret = -1; - - group = vfio_get_group(groupid, as); - if (!group) { - error_report("vfio: group %d not registered", groupid); - return ret; - } - - container = group->container; - if (group->container) { - ret = ioctl(container->fd, req, param); - if (ret < 0) { - error_report("vfio: failed to ioctl container: ret=%d, %s", - ret, strerror(errno)); - } - } - - vfio_put_group(group); - - return ret; -} - -int vfio_container_ioctl(AddressSpace *as, int32_t groupid, - int req, void *param) -{ - /* We allow only certain ioctls to the container */ - switch (req) { - case VFIO_CHECK_EXTENSION: - case VFIO_IOMMU_SPAPR_TCE_GET_INFO: - break; - default: - /* Return an error on unknown requests */ - error_report("vfio: unsupported ioctl %X", req); - return -1; - } - - return vfio_container_do_ioctl(as, groupid, req, param); -} diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h new file mode 100644 index 0000000000..1d5bfe8fcb --- /dev/null +++ b/include/hw/vfio/vfio-common.h @@ -0,0 +1,151 @@ +/* + * common header for vfio based device assignment support + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Alex Williamson + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Based on qemu-kvm device-assignment: + * Adapted for KVM by Qumranet. + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ +#ifndef HW_VFIO_VFIO_COMMON_H +#define HW_VFIO_VFIO_COMMON_H + +#include "qemu-common.h" +#include "exec/address-spaces.h" +#include "exec/memory.h" +#include "qemu/queue.h" +#include "qemu/notify.h" + +/*#define DEBUG_VFIO*/ +#ifdef DEBUG_VFIO +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/* Extra debugging, trap acceleration paths for more logging */ +#define VFIO_ALLOW_MMAP 1 +#define VFIO_ALLOW_KVM_INTX 1 +#define VFIO_ALLOW_KVM_MSI 1 +#define VFIO_ALLOW_KVM_MSIX 1 + +enum { + VFIO_DEVICE_TYPE_PCI = 0, +}; + +typedef struct VFIORegion { + struct VFIODevice *vbasedev; + off_t fd_offset; /* offset of region within device fd */ + MemoryRegion mem; /* slow, read/write access */ + MemoryRegion mmap_mem; /* direct mapped access */ + void *mmap; + size_t size; + uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ + uint8_t nr; /* cache the region number for debug */ +} VFIORegion; + +typedef struct VFIOAddressSpace { + AddressSpace *as; + QLIST_HEAD(, VFIOContainer) containers; + QLIST_ENTRY(VFIOAddressSpace) list; +} VFIOAddressSpace; + +struct VFIOGroup; + +typedef struct VFIOType1 { + MemoryListener listener; + int error; + bool initialized; +} VFIOType1; + +typedef struct VFIOContainer { + VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + struct { + /* enable abstraction to support various iommu backends */ + union { + VFIOType1 type1; + }; + void (*release)(struct VFIOContainer *); + } iommu_data; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_ENTRY(VFIOContainer) next; +} VFIOContainer; + +typedef struct VFIOGuestIOMMU { + VFIOContainer *container; + MemoryRegion *iommu; + Notifier n; + QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; +} VFIOGuestIOMMU; + +typedef struct VFIODeviceOps VFIODeviceOps; + +typedef struct VFIODevice { + QLIST_ENTRY(VFIODevice) next; + struct VFIOGroup *group; + char *name; + int fd; + int type; + bool reset_works; + bool needs_reset; + VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; + unsigned int flags; +} VFIODevice; + +struct VFIODeviceOps { + void (*vfio_compute_needs_reset)(VFIODevice *vdev); + int (*vfio_hot_reset_multi)(VFIODevice *vdev); + void (*vfio_eoi)(VFIODevice *vdev); + int (*vfio_populate_device)(VFIODevice *vdev); +}; + +typedef struct VFIOGroup { + int fd; + int groupid; + VFIOContainer *container; + QLIST_HEAD(, VFIODevice) device_list; + QLIST_ENTRY(VFIOGroup) next; + QLIST_ENTRY(VFIOGroup) container_next; +} VFIOGroup; + +void vfio_put_base_device(VFIODevice *vbasedev); +void vfio_disable_irqindex(VFIODevice *vbasedev, int index); +void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); +void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index); +void vfio_region_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size); +uint64_t vfio_region_read(void *opaque, + hwaddr addr, unsigned size); +void vfio_listener_release(VFIOContainer *container); +int vfio_mmap_region(Object *vdev, VFIORegion *region, + MemoryRegion *mem, MemoryRegion *submem, + void **map, size_t size, off_t offset, + const char *name); +void vfio_reset_handler(void *opaque); +VFIOGroup *vfio_get_group(int groupid, AddressSpace *as); +void vfio_put_group(VFIOGroup *group); +int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev); + +extern const MemoryRegionOps vfio_region_ops; +extern const MemoryListener vfio_memory_listener; +extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; +extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; + +#endif /* !HW_VFIO_VFIO_COMMON_H */ diff --git a/trace-events b/trace-events index 0e7aa53407..8acbcce0f0 100644 --- a/trace-events +++ b/trace-events @@ -1416,6 +1416,7 @@ vfio_pci_reset(const char *name) " (%s)" vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET" vfio_pci_reset_pm(const char *name) "%s PCI PM Reset" +# hw/vfio/vfio-common.c vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)" vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64 vfio_iommu_map_notify(uint64_t iova_start, uint64_t iova_end) "iommu map @ %"PRIx64" - %"PRIx64 From dcbfc5cefb22e9219f8253dba87de33104ca73fe Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Dec 2014 10:37:27 -0700 Subject: [PATCH 14/14] vfio: Cleanup error_report()s With the conversion to tracepoints, a couple previous DPRINTKs are now quite a bit more visible and are really just informational. Remove these and add a bit more description to another. Signed-off-by: Alex Williamson --- hw/vfio/common.c | 2 +- hw/vfio/pci.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 4f15c86b80..cf483fffa9 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -566,7 +566,7 @@ static void vfio_kvm_device_add_group(VFIOGroup *group) }; if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) { - error_report("KVM_CREATE_DEVICE: %m\n"); + error_report("Failed to create KVM VFIO device: %m\n"); return; } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e380959dab..b4e73d1f35 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2801,8 +2801,6 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); if (vfio_pci_host_match(&host, &tmp->host)) { if (single) { - error_report("vfio: found another in-use device " - "%s\n", vbasedev_iter->name); ret = -EINVAL; goto out_single; } @@ -2815,7 +2813,6 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) } if (!single && !multi) { - error_report("vfio: No other in-use devices for multi hot reset\n"); ret = -EINVAL; goto out_single; }