mirror of https://github.com/xemu-project/xemu.git
VFIO update 2018-08-17
- Enhance balloon inhibitor for multiple users and use around vfio device assignment (Alex Williamson) -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.14 (GNU/Linux) iQIcBAABAgAGBQJbdvs5AAoJECObm247sIsioA4QAKWqKWnqOu4AwIdADzOEKTv+ jSG+M6S6uEBbhk6hC+scnIlyA6MwiTmh8uJYDOTPy/4S9kv4+Bj/LykoJxk1T3UI jREP+EHMUe13YfG//5yIb0HLIBEI7VJeBteyksboZ8KTO4kMt4RCnk1lpfARxIjB ouMIngGq9ltOWAann9tcjGaPXfKjB9LSSYyhQMlqSzl7LckOsGAgmdFU7uurlTrq a5kB2oRNP7CSbF/fDTsWd8nsCGy357Cou42KpsTRQN+TeLmqExQXguCQLszBup3H VYwS0FzpR1ix7jOuafHLcBh9VgSEFYWf71oznMoPDiv2vVjKGsDG3Tzu5Xqz0vsI ynKXCXNbXZhpazZzw8ThXoLs8mWLWSrtqvjQItDmrl0kboY6ZwOZc8m+PQmIqtHQ RDcVlCT0TecwYXBZ5G930JpcEsUFBKfZrla+v2W0ON6HVA6eVBweSata5r08XvHK P2rbHgO6NSDZTMZUOborh/sbINb61PNW7h7inigHzpDJxjseIz/vO+mVjfsCZMed dSu/0VYCbA0cS2IK21zZFsR4LNlL4yzFzhONdBqbZD7zhiZL25IcZV/TVzHJE+zS su7uDSLNihJYOXAXO7s0Ne2vO5lZzpYykVFOAmQQicxcYZz+u8LxXY+NlTjKJhEv x3fobaflymrG7xkRy3xq =JQ6k -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20180817.0' into staging VFIO update 2018-08-17 - Enhance balloon inhibitor for multiple users and use around vfio device assignment (Alex Williamson) # gpg: Signature made Fri 17 Aug 2018 17:43:37 BST # gpg: using RSA key 239B9B6E3BB08B22 # gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>" # gpg: aka "Alex Williamson <alex@shazbot.org>" # gpg: aka "Alex Williamson <alwillia@redhat.com>" # gpg: aka "Alex Williamson <alex.l.williamson@gmail.com>" # Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B 8A90 239B 9B6E 3BB0 8B22 * remotes/awilliam/tags/vfio-update-20180817.0: vfio/ccw/pci: Allow devices to opt-in for ballooning vfio: Inhibit ballooning based on group attachment to a container kvm: Use inhibit to prevent ballooning without synchronous mmu balloon: Allow multiple inhibit users Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
a544c9110d
|
@ -39,6 +39,7 @@
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
#include "hw/irq.h"
|
#include "hw/irq.h"
|
||||||
#include "sysemu/sev.h"
|
#include "sysemu/sev.h"
|
||||||
|
#include "sysemu/balloon.h"
|
||||||
|
|
||||||
#include "hw/boards.h"
|
#include "hw/boards.h"
|
||||||
|
|
||||||
|
@ -1698,6 +1699,9 @@ static int kvm_init(MachineState *ms)
|
||||||
s->many_ioeventfds = kvm_check_many_ioeventfds();
|
s->many_ioeventfds = kvm_check_many_ioeventfds();
|
||||||
|
|
||||||
s->sync_mmu = !!kvm_vm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
|
s->sync_mmu = !!kvm_vm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
|
||||||
|
if (!s->sync_mmu) {
|
||||||
|
qemu_balloon_inhibit(true);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
13
balloon.c
13
balloon.c
|
@ -26,6 +26,7 @@
|
||||||
|
|
||||||
#include "qemu/osdep.h"
|
#include "qemu/osdep.h"
|
||||||
#include "qemu-common.h"
|
#include "qemu-common.h"
|
||||||
|
#include "qemu/atomic.h"
|
||||||
#include "exec/cpu-common.h"
|
#include "exec/cpu-common.h"
|
||||||
#include "sysemu/kvm.h"
|
#include "sysemu/kvm.h"
|
||||||
#include "sysemu/balloon.h"
|
#include "sysemu/balloon.h"
|
||||||
|
@ -37,16 +38,22 @@
|
||||||
static QEMUBalloonEvent *balloon_event_fn;
|
static QEMUBalloonEvent *balloon_event_fn;
|
||||||
static QEMUBalloonStatus *balloon_stat_fn;
|
static QEMUBalloonStatus *balloon_stat_fn;
|
||||||
static void *balloon_opaque;
|
static void *balloon_opaque;
|
||||||
static bool balloon_inhibited;
|
static int balloon_inhibit_count;
|
||||||
|
|
||||||
bool qemu_balloon_is_inhibited(void)
|
bool qemu_balloon_is_inhibited(void)
|
||||||
{
|
{
|
||||||
return balloon_inhibited;
|
return atomic_read(&balloon_inhibit_count) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void qemu_balloon_inhibit(bool state)
|
void qemu_balloon_inhibit(bool state)
|
||||||
{
|
{
|
||||||
balloon_inhibited = state;
|
if (state) {
|
||||||
|
atomic_inc(&balloon_inhibit_count);
|
||||||
|
} else {
|
||||||
|
atomic_dec(&balloon_inhibit_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(atomic_read(&balloon_inhibit_count) >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool have_balloon(Error **errp)
|
static bool have_balloon(Error **errp)
|
||||||
|
|
|
@ -349,6 +349,15 @@ static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All vfio-ccw devices are believed to operate in a way compatible with
|
||||||
|
* memory ballooning, ie. pages pinned in the host are in the current
|
||||||
|
* working set of the guest driver and therefore never overlap with pages
|
||||||
|
* available to the guest balloon driver. This needs to be set before
|
||||||
|
* vfio_get_device() for vfio common to handle the balloon inhibitor.
|
||||||
|
*/
|
||||||
|
vcdev->vdev.balloon_allowed = true;
|
||||||
|
|
||||||
if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
|
if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
|
||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
#include "hw/hw.h"
|
#include "hw/hw.h"
|
||||||
#include "qemu/error-report.h"
|
#include "qemu/error-report.h"
|
||||||
#include "qemu/range.h"
|
#include "qemu/range.h"
|
||||||
|
#include "sysemu/balloon.h"
|
||||||
#include "sysemu/kvm.h"
|
#include "sysemu/kvm.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
#include "qapi/error.h"
|
#include "qapi/error.h"
|
||||||
|
@ -1044,6 +1045,33 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
|
||||||
space = vfio_get_address_space(as);
|
space = vfio_get_address_space(as);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* VFIO is currently incompatible with memory ballooning insofar as the
|
||||||
|
* madvise to purge (zap) the page from QEMU's address space does not
|
||||||
|
* interact with the memory API and therefore leaves stale virtual to
|
||||||
|
* physical mappings in the IOMMU if the page was previously pinned. We
|
||||||
|
* therefore add a balloon inhibit for each group added to a container,
|
||||||
|
* whether the container is used individually or shared. This provides
|
||||||
|
* us with options to allow devices within a group to opt-in and allow
|
||||||
|
* ballooning, so long as it is done consistently for a group (for instance
|
||||||
|
* if the device is an mdev device where it is known that the host vendor
|
||||||
|
* driver will never pin pages outside of the working set of the guest
|
||||||
|
* driver, which would thus not be ballooning candidates).
|
||||||
|
*
|
||||||
|
* The first opportunity to induce pinning occurs here where we attempt to
|
||||||
|
* attach the group to existing containers within the AddressSpace. If any
|
||||||
|
* pages are already zapped from the virtual address space, such as from a
|
||||||
|
* previous ballooning opt-in, new pinning will cause valid mappings to be
|
||||||
|
* re-established. Likewise, when the overall MemoryListener for a new
|
||||||
|
* container is registered, a replay of mappings within the AddressSpace
|
||||||
|
* will occur, re-establishing any previously zapped pages as well.
|
||||||
|
*
|
||||||
|
* NB. Balloon inhibiting does not currently block operation of the
|
||||||
|
* balloon driver or revoke previously pinned pages, it only prevents
|
||||||
|
* calling madvise to modify the virtual mapping of ballooned pages.
|
||||||
|
*/
|
||||||
|
qemu_balloon_inhibit(true);
|
||||||
|
|
||||||
QLIST_FOREACH(container, &space->containers, next) {
|
QLIST_FOREACH(container, &space->containers, next) {
|
||||||
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
|
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
|
||||||
group->container = container;
|
group->container = container;
|
||||||
|
@ -1232,6 +1260,7 @@ close_fd_exit:
|
||||||
close(fd);
|
close(fd);
|
||||||
|
|
||||||
put_space_exit:
|
put_space_exit:
|
||||||
|
qemu_balloon_inhibit(false);
|
||||||
vfio_put_address_space(space);
|
vfio_put_address_space(space);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1352,6 +1381,9 @@ void vfio_put_group(VFIOGroup *group)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!group->balloon_allowed) {
|
||||||
|
qemu_balloon_inhibit(false);
|
||||||
|
}
|
||||||
vfio_kvm_device_del_group(group);
|
vfio_kvm_device_del_group(group);
|
||||||
vfio_disconnect_container(group);
|
vfio_disconnect_container(group);
|
||||||
QLIST_REMOVE(group, next);
|
QLIST_REMOVE(group, next);
|
||||||
|
@ -1387,6 +1419,25 @@ int vfio_get_device(VFIOGroup *group, const char *name,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clear the balloon inhibitor for this group if the driver knows the
|
||||||
|
* device operates compatibly with ballooning. Setting must be consistent
|
||||||
|
* per group, but since compatibility is really only possible with mdev
|
||||||
|
* currently, we expect singleton groups.
|
||||||
|
*/
|
||||||
|
if (vbasedev->balloon_allowed != group->balloon_allowed) {
|
||||||
|
if (!QLIST_EMPTY(&group->device_list)) {
|
||||||
|
error_setg(errp,
|
||||||
|
"Inconsistent device balloon setting within group");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!group->balloon_allowed) {
|
||||||
|
group->balloon_allowed = true;
|
||||||
|
qemu_balloon_inhibit(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vbasedev->fd = fd;
|
vbasedev->fd = fd;
|
||||||
vbasedev->group = group;
|
vbasedev->group = group;
|
||||||
QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
|
QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
|
||||||
|
|
|
@ -2804,12 +2804,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||||
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
|
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
|
||||||
VFIODevice *vbasedev_iter;
|
VFIODevice *vbasedev_iter;
|
||||||
VFIOGroup *group;
|
VFIOGroup *group;
|
||||||
char *tmp, group_path[PATH_MAX], *group_name;
|
char *tmp, *subsys, group_path[PATH_MAX], *group_name;
|
||||||
Error *err = NULL;
|
Error *err = NULL;
|
||||||
ssize_t len;
|
ssize_t len;
|
||||||
struct stat st;
|
struct stat st;
|
||||||
int groupid;
|
int groupid;
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
bool is_mdev;
|
||||||
|
|
||||||
if (!vdev->vbasedev.sysfsdev) {
|
if (!vdev->vbasedev.sysfsdev) {
|
||||||
if (!(~vdev->host.domain || ~vdev->host.bus ||
|
if (!(~vdev->host.domain || ~vdev->host.bus ||
|
||||||
|
@ -2869,6 +2870,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mediated devices *might* operate compatibly with memory ballooning, but
|
||||||
|
* we cannot know for certain, it depends on whether the mdev vendor driver
|
||||||
|
* stays in sync with the active working set of the guest driver. Prevent
|
||||||
|
* the x-balloon-allowed option unless this is minimally an mdev device.
|
||||||
|
*/
|
||||||
|
tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
|
||||||
|
subsys = realpath(tmp, NULL);
|
||||||
|
g_free(tmp);
|
||||||
|
is_mdev = (strcmp(subsys, "/sys/bus/mdev") == 0);
|
||||||
|
free(subsys);
|
||||||
|
|
||||||
|
trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
|
||||||
|
|
||||||
|
if (vdev->vbasedev.balloon_allowed && !is_mdev) {
|
||||||
|
error_setg(errp, "x-balloon-allowed only potentially compatible "
|
||||||
|
"with mdev devices");
|
||||||
|
vfio_put_group(group);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
|
ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
vfio_put_group(group);
|
vfio_put_group(group);
|
||||||
|
@ -3170,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = {
|
||||||
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
|
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
|
||||||
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
|
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
|
||||||
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
|
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
|
||||||
|
DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
|
||||||
|
vbasedev.balloon_allowed, false),
|
||||||
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
|
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
|
||||||
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
|
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
|
||||||
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
|
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
|
||||||
|
|
|
@ -39,6 +39,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %
|
||||||
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
|
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
|
||||||
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
|
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
|
||||||
vfio_realize(const char *name, int group_id) " (%s) group %d"
|
vfio_realize(const char *name, int group_id) " (%s) group %d"
|
||||||
|
vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
|
||||||
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
|
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
|
||||||
vfio_pci_reset(const char *name) " (%s)"
|
vfio_pci_reset(const char *name) " (%s)"
|
||||||
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
|
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
|
||||||
|
|
|
@ -21,7 +21,6 @@
|
||||||
#include "hw/mem/pc-dimm.h"
|
#include "hw/mem/pc-dimm.h"
|
||||||
#include "sysemu/balloon.h"
|
#include "sysemu/balloon.h"
|
||||||
#include "hw/virtio/virtio-balloon.h"
|
#include "hw/virtio/virtio-balloon.h"
|
||||||
#include "sysemu/kvm.h"
|
|
||||||
#include "exec/address-spaces.h"
|
#include "exec/address-spaces.h"
|
||||||
#include "qapi/error.h"
|
#include "qapi/error.h"
|
||||||
#include "qapi/qapi-events-misc.h"
|
#include "qapi/qapi-events-misc.h"
|
||||||
|
@ -36,8 +35,7 @@
|
||||||
|
|
||||||
static void balloon_page(void *addr, int deflate)
|
static void balloon_page(void *addr, int deflate)
|
||||||
{
|
{
|
||||||
if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
|
if (!qemu_balloon_is_inhibited()) {
|
||||||
kvm_has_sync_mmu())) {
|
|
||||||
qemu_madvise(addr, BALLOON_PAGE_SIZE,
|
qemu_madvise(addr, BALLOON_PAGE_SIZE,
|
||||||
deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
|
deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
|
||||||
}
|
}
|
||||||
|
|
|
@ -112,6 +112,7 @@ typedef struct VFIODevice {
|
||||||
bool reset_works;
|
bool reset_works;
|
||||||
bool needs_reset;
|
bool needs_reset;
|
||||||
bool no_mmap;
|
bool no_mmap;
|
||||||
|
bool balloon_allowed;
|
||||||
VFIODeviceOps *ops;
|
VFIODeviceOps *ops;
|
||||||
unsigned int num_irqs;
|
unsigned int num_irqs;
|
||||||
unsigned int num_regions;
|
unsigned int num_regions;
|
||||||
|
@ -131,6 +132,7 @@ typedef struct VFIOGroup {
|
||||||
QLIST_HEAD(, VFIODevice) device_list;
|
QLIST_HEAD(, VFIODevice) device_list;
|
||||||
QLIST_ENTRY(VFIOGroup) next;
|
QLIST_ENTRY(VFIOGroup) next;
|
||||||
QLIST_ENTRY(VFIOGroup) container_next;
|
QLIST_ENTRY(VFIOGroup) container_next;
|
||||||
|
bool balloon_allowed;
|
||||||
} VFIOGroup;
|
} VFIOGroup;
|
||||||
|
|
||||||
typedef struct VFIODMABuf {
|
typedef struct VFIODMABuf {
|
||||||
|
|
Loading…
Reference in New Issue