From 4bd683d6f19dfe05a628af5deca2009e2a1783bf Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 15 Aug 2024 14:52:45 +0100 Subject: [PATCH 1/8] hw/vfio/pci.c: Use correct type in trace_vfio_msix_early_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tracepoint trace_vfio_msix_early_setup() uses "int" for the type of the table_bar argument, but we use this to print a uint32_t. Coverity warns that this means that we could end up treating it as a negative number. We only use this in printing the value in the tracepoint, so mishandling it as a negative number would be harmless, but it's better to use the right type in the tracepoint. Use uint64_t to match how we print the table_offset in the vfio_msix_relo() tracepoint. Resolves: Coverity CID 1547690 Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater --- hw/vfio/trace-events | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 98bd4dccea..c475c273fd 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -27,7 +27,7 @@ vfio_vga_read(uint64_t addr, int size, uint64_t data) " (0x%"PRIx64", %d) = 0x%" vfio_pci_read_config(const char *name, int addr, int len, int val) " (%s, @0x%x, len=0x%x) 0x%x" vfio_pci_write_config(const char *name, int addr, int val, int len) " (%s, @0x%x, 0x%x, len=0x%x)" vfio_msi_setup(const char *name, int pos) "%s PCI MSI CAP @0x%x" -vfio_msix_early_setup(const char *name, int pos, int table_bar, int offset, int entries, bool noresize) "%s PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d, noresize %d" +vfio_msix_early_setup(const char *name, int pos, int table_bar, uint64_t offset, int entries, bool noresize) "%s PCI MSI-X CAP @0x%x, BAR %d, offset 0x%"PRIx64", entries %d, noresize %d" vfio_check_pcie_flr(const char *name) "%s Supports FLR via PCIe cap" vfio_check_pm_reset(const char *name) "%s Supports PM reset" vfio_check_af_flr(const char *name) "%s Supports FLR via AF cap" From e433f208973ffee4ca9159ebc7c8f6ab520358fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corvin=20K=C3=B6hne?= Date: Wed, 28 Aug 2024 15:43:22 +0200 Subject: [PATCH 2/8] vfio/igd: return an invalid generation for unknown devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel changes it's specification quite often e.g. the location and size of the BDSM register has change for gen 11 devices and later. This causes our emulation to fail on those devices. So, it's impossible for us to use a suitable default value for unknown devices. Instead of returning a random generation value and hoping that everthing works fine, we should verify that different devices are working and add them to our list of known devices. Signed-off-by: Corvin Köhne Reviewed-by: Alex Williamson --- hw/vfio/igd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index d320d032a7..650a323dda 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -90,7 +90,11 @@ static int igd_gen(VFIOPCIDevice *vdev) return 8; } - return 8; /* Assume newer is compatible */ + /* + * Unfortunately, Intel changes it's specification quite often. This makes + * it impossible to use a suitable default value for unknown devices. + */ + return -1; } typedef struct VFIOIGDQuirk { From abd9dda914eac1fdcf21ce9b3ddd79ebe0172db2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corvin=20K=C3=B6hne?= Date: Wed, 28 Aug 2024 15:43:23 +0200 Subject: [PATCH 3/8] vfio/igd: support legacy mode for all known generations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're soon going to add support for legacy mode to ElkhartLake and TigerLake devices. Those are gen 11 and 12 devices. At the moment, all devices identified by our igd_gen function do support legacy mode. This won't change when adding our new devices of gen 11 and 12. Therefore, it makes more sense to accept legacy mode for all known devices instead of maintaining a long list of known good generations. If we add a new generation to igd_gen which doesn't support legacy mode for some reason, it'll be easy to advance the check to reject legacy mode for this specific generation. Signed-off-by: Corvin Köhne Reviewed-by: Alex Williamson --- hw/vfio/igd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index 650a323dda..d5e57656a8 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -416,7 +416,7 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) * devices maintain compatibility with generation 8. */ gen = igd_gen(vdev); - if (gen != 6 && gen != 8) { + if (gen == -1) { error_report("IGD device %s is unsupported in legacy mode, " "try SandyBridge or newer", vdev->vbasedev.name); return; From 7bafcd17672934284d59b82a2e2a876566c3f6f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corvin=20K=C3=B6hne?= Date: Wed, 28 Aug 2024 15:43:24 +0200 Subject: [PATCH 4/8] vfio/igd: use new BDSM register location and size for gen 11 and later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel changed the location and size of the BDSM register for gen 11 devices and later. We have to adjust our emulation for these devices to properly support them. Signed-off-by: Corvin Köhne Reviewed-by: Alex Williamson --- hw/vfio/igd.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index d5e57656a8..0b6533bbf7 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -100,11 +100,12 @@ static int igd_gen(VFIOPCIDevice *vdev) typedef struct VFIOIGDQuirk { struct VFIOPCIDevice *vdev; uint32_t index; - uint32_t bdsm; + uint64_t bdsm; } VFIOIGDQuirk; #define IGD_GMCH 0x50 /* Graphics Control Register */ #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ +#define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ /* @@ -313,9 +314,13 @@ static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr, */ if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) { if (gen < 8 || (igd->index % 8 == 1)) { - uint32_t base; + uint64_t base; - base = pci_get_long(vdev->pdev.config + IGD_BDSM); + if (gen < 11) { + base = pci_get_long(vdev->pdev.config + IGD_BDSM); + } else { + base = pci_get_quad(vdev->pdev.config + IGD_BDSM_GEN11); + } if (!base) { hw_error("vfio-igd: Guest attempted to program IGD GTT before " "BIOS reserved stolen memory. Unsupported BIOS?"); @@ -519,7 +524,13 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) igd = quirk->data = g_malloc0(sizeof(*igd)); igd->vdev = vdev; igd->index = ~0; - igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4); + if (gen < 11) { + igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4); + } else { + igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11, 4); + igd->bdsm |= + (uint64_t)vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11 + 4, 4) << 32; + } igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk, @@ -577,9 +588,15 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); /* BDSM is read-write, emulated. The BIOS needs to be able to write it */ - pci_set_long(vdev->pdev.config + IGD_BDSM, 0); - pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); + if (gen < 11) { + pci_set_long(vdev->pdev.config + IGD_BDSM, 0); + pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); + } else { + pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0); + pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0); + pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); + } /* * This IOBAR gives us access to GTTADR, which allows us to write to From 11b5ce95beecfd51d1b17858d23fe9cbb0b5783f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corvin=20K=C3=B6hne?= Date: Wed, 28 Aug 2024 15:43:25 +0200 Subject: [PATCH 5/8] vfio/igd: add new bar0 quirk to emulate BDSM mirror MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BDSM register is mirrored into MMIO space at least for gen 11 and later devices. Unfortunately, the Windows driver reads the register value from MMIO space instead of PCI config space for those devices [1]. Therefore, we either have to keep a 1:1 mapping for the host and guest address or we have to emulate the MMIO register too. Using the igd in legacy mode is already hard due to it's many constraints. Keeping a 1:1 mapping may not work in all cases and makes it even harder to use. An MMIO emulation has to trap the whole MMIO page. This makes accesses to this page slower compared to using second level address translation. Nevertheless, it doesn't have any constraints and I haven't noticed any performance degradation yet making it a better solution. [1] https://github.com/projectacrn/acrn-hypervisor/blob/5c351bee0f6ae46250eefc07f44b4a31e770f3cf/devicemodel/hw/pci/passthrough.c#L650-L653 Signed-off-by: Corvin Köhne Reviewed-by: Alex Williamson --- hw/vfio/igd.c | 98 ++++++++++++++++++++++++++++++++++++++++++++ hw/vfio/pci-quirks.c | 1 + hw/vfio/pci.h | 1 + 3 files changed, 100 insertions(+) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index 0b6533bbf7..0d68c6a451 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -374,6 +374,104 @@ static const MemoryRegionOps vfio_igd_index_quirk = { .endianness = DEVICE_LITTLE_ENDIAN, }; +#define IGD_BDSM_MMIO_OFFSET 0x1080C0 + +static uint64_t vfio_igd_quirk_bdsm_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOPCIDevice *vdev = opaque; + uint64_t offset; + + offset = IGD_BDSM_GEN11 + addr; + + switch (size) { + case 1: + return pci_get_byte(vdev->pdev.config + offset); + case 2: + return pci_get_word(vdev->pdev.config + offset); + case 4: + return pci_get_long(vdev->pdev.config + offset); + case 8: + return pci_get_quad(vdev->pdev.config + offset); + default: + hw_error("igd: unsupported read size, %u bytes", size); + break; + } + + return 0; +} + +static void vfio_igd_quirk_bdsm_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOPCIDevice *vdev = opaque; + uint64_t offset; + + offset = IGD_BDSM_GEN11 + addr; + + switch (size) { + case 1: + pci_set_byte(vdev->pdev.config + offset, data); + break; + case 2: + pci_set_word(vdev->pdev.config + offset, data); + break; + case 4: + pci_set_long(vdev->pdev.config + offset, data); + break; + case 8: + pci_set_quad(vdev->pdev.config + offset, data); + break; + default: + hw_error("igd: unsupported read size, %u bytes", size); + break; + } +} + +static const MemoryRegionOps vfio_igd_bdsm_quirk = { + .read = vfio_igd_quirk_bdsm_read, + .write = vfio_igd_quirk_bdsm_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) +{ + VFIOQuirk *quirk; + int gen; + + /* + * This must be an Intel VGA device at address 00:02.0 for us to even + * consider enabling legacy mode. Some driver have dependencies on the PCI + * bus address. + */ + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || + !vfio_is_vga(vdev) || nr != 0 || + &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), + 0, PCI_DEVFN(0x2, 0))) { + return; + } + + /* + * Only on IGD devices of gen 11 and above, the BDSM register is mirrored + * into MMIO space and read from MMIO space by the Windows driver. + */ + gen = igd_gen(vdev); + if (gen < 11) { + return; + } + + quirk = vfio_quirk_alloc(1); + quirk->data = vdev; + + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_bdsm_quirk, + vdev, "vfio-igd-bdsm-quirk", 8); + memory_region_add_subregion_overlap(vdev->bars[0].region.mem, + IGD_BDSM_MMIO_OFFSET, &quirk->mem[0], + 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); +} + void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) { g_autofree struct vfio_region_info *rom = NULL; diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 39dae72497..d37f722cce 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1259,6 +1259,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) vfio_probe_nvidia_bar0_quirk(vdev, nr); vfio_probe_rtl8168_bar2_quirk(vdev, nr); #ifdef CONFIG_VFIO_IGD + vfio_probe_igd_bar0_quirk(vdev, nr); vfio_probe_igd_bar4_quirk(vdev, nr); #endif } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index bf67df2fbc..5ad090a229 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -215,6 +215,7 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev); bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp); void vfio_quirk_reset(VFIOPCIDevice *vdev); VFIOQuirk *vfio_quirk_alloc(int nr_mem); +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr); void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); extern const PropertyInfo qdev_prop_nv_gpudirect_clique; From 9c86b9fb43cf724c4adcf234dc4c6d20d83ee370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corvin=20K=C3=B6hne?= Date: Wed, 28 Aug 2024 15:43:26 +0200 Subject: [PATCH 6/8] vfio/igd: add ID's for ElkhartLake and TigerLake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ElkhartLake and TigerLake devices were tested in legacy mode with Linux and Windows VMs. Both are working properly. It's likely that other Intel GPUs of gen 11 and 12 like IceLake device are working too. However, we're only adding known good devices for now. Signed-off-by: Corvin Köhne Reviewed-by: Alex Williamson --- hw/vfio/igd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index 0d68c6a451..8a41b16421 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -88,6 +88,12 @@ static int igd_gen(VFIOPCIDevice *vdev) case 0x2200: case 0x5900: return 8; + /* ElkhartLake */ + case 0x4500: + return 11; + /* TigerLake */ + case 0x9A00: + return 12; } /* From 971ca22f041b8a1e67314a777caf3ce6f2832034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corvin=20K=C3=B6hne?= Date: Wed, 28 Aug 2024 15:43:27 +0200 Subject: [PATCH 7/8] vfio/igd: don't set stolen memory size to zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stolen memory is required for the GOP (EFI) driver and the Windows driver. While the GOP driver seems to work with any stolen memory size, the Windows driver will crash if the size doesn't match the size allocated by the host BIOS. For that reason, it doesn't make sense to overwrite the stolen memory size. It's true that this wastes some VM memory. In the worst case, the stolen memory can take up more than a GB. However, that's uncommon. Additionally, it's likely that a bunch of RAM is assigned to VMs making use of GPU passthrough. Signed-off-by: Corvin Köhne Reviewed-by: Alex Williamson --- hw/vfio/igd.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index 8a41b16421..0751c43eae 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -478,6 +478,23 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); } +static int igd_get_stolen_mb(int gen, uint32_t gmch) +{ + int gms; + + if (gen < 8) { + gms = (gmch >> 3) & 0x1f; + } else { + gms = (gmch >> 8) & 0xff; + } + + if (gms > 0x10) { + error_report("Unsupported IGD GMS value 0x%x", gms); + return 0; + } + return gms * 32; +} + void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) { g_autofree struct vfio_region_info *rom = NULL; @@ -655,23 +672,7 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) ggms_mb = 1 << ggms_mb; } - /* - * Assume we have no GMS memory, but allow it to be overridden by device - * option (experimental). The spec doesn't actually allow zero GMS when - * when IVD (IGD VGA Disable) is clear, but the claim is that it's unused, - * so let's not waste VM memory for it. - */ - gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8)); - - if (vdev->igd_gms) { - if (vdev->igd_gms <= 0x10) { - gms_mb = vdev->igd_gms * 32; - gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8); - } else { - error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms); - vdev->igd_gms = 0; - } - } + gms_mb = igd_get_stolen_mb(gen, gmch); /* * Request reserved memory for stolen memory via fw_cfg. VM firmware From 8719224166832ff8230d7dd8599f42bd60e2eb96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corvin=20K=C3=B6hne?= Date: Wed, 28 Aug 2024 15:43:28 +0200 Subject: [PATCH 8/8] vfio/igd: correctly calculate stolen memory size for gen 9 and later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have to update the calculation of the stolen memory size because we've seen devices using values of 0xf0 and above for the graphics mode select field. The new calculation was taken from the linux kernel [1]. [1] https://github.com/torvalds/linux/blob/7c626ce4bae1ac14f60076d00eafe71af30450ba/arch/x86/kernel/early-quirks.c#L455-L460 Signed-off-by: Corvin Köhne Reviewed-by: Alex Williamson --- hw/vfio/igd.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index 0751c43eae..a95d441f68 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -488,11 +488,18 @@ static int igd_get_stolen_mb(int gen, uint32_t gmch) gms = (gmch >> 8) & 0xff; } - if (gms > 0x10) { - error_report("Unsupported IGD GMS value 0x%x", gms); - return 0; + if (gen < 9) { + if (gms > 0x10) { + error_report("Unsupported IGD GMS value 0x%x", gms); + return 0; + } + return gms * 32; + } else { + if (gms < 0xf0) + return gms * 32; + else + return gms * 4 + 4; } - return gms * 32; } void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)