diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index b9e606e364..913796f437 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -27,6 +27,7 @@ #include "trace.h" #include "qapi/error.h" #include "qemu/error-report.h" +#include "qemu/units.h" #include "monitor/monitor.h" /* @@ -406,8 +407,35 @@ int vfio_region_mmap(VFIORegion *region) prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; for (i = 0; i < region->nr_mmaps; i++) { - region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, - MAP_SHARED, region->vbasedev->fd, + size_t align = MIN(1ULL << ctz64(region->mmaps[i].size), 1 * GiB); + void *map_base, *map_align; + + /* + * Align the mmap for more efficient mapping in the kernel. Ideally + * we'd know the PMD and PUD mapping sizes to use as discrete alignment + * intervals, but we don't. As of Linux v6.12, the largest PUD size + * supporting huge pfnmap is 1GiB (ARCH_SUPPORTS_PUD_PFNMAP is only set + * on x86_64). Align by power-of-two size, capped at 1GiB. + * + * NB. qemu_memalign() and friends actually allocate memory, whereas + * the region size here can exceed host memory, therefore we manually + * create an oversized anonymous mapping and clean it up for alignment. + */ + map_base = mmap(0, region->mmaps[i].size + align, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (map_base == MAP_FAILED) { + ret = -errno; + goto no_mmap; + } + + map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align); + munmap(map_base, map_align - map_base); + munmap(map_align + region->mmaps[i].size, + align - (map_align - map_base)); + + region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot, + MAP_SHARED | MAP_FIXED, + region->vbasedev->fd, region->fd_offset + region->mmaps[i].offset); if (region->mmaps[i].mmap == MAP_FAILED) {