mirror of https://github.com/xemu-project/xemu.git
Migration Pull request
In this try - rebase to latest upstream - same than previous patch - fix compilation on non linux (userfaultfd.h) (me) - query-migrationthreads (jiang) - fix race on reading MultiFDPages_t.block (zhenzhong) - fix flush of zero copy page send reuest (zhenzhong) Please apply. Previous try: It includes: - David Hildenbrand fixes for virtio-men - David Gilbert canary to detect problems - Fix for rdma return values (Fiona) - Peter Xu uffd_open fixes - Peter Xu show right downtime for postcopy - manish.mishra msg fix fixes - my vfio changes. Please apply. -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmPhobYACgkQ9IfvGFhy 1yMNaA/9EHDPqrI1HL/VkJG4nNOOsQR7RbburXEberZOzvLjnqpjUD3Ls9qV6rx+ ieHa5T4imYJFk72Wa5vx4r1/dCjtJD2W6jg5+/0nTvYAHrs1U1VRqpuTr0HiXdbJ ZLLCnW5eDyO3eMaOX0MUkgHgL0FNkc/Lq5ViCTFsMu9O9xMuDLLdAC3cdvslKuOu X1gKByr9jT817Y9e36amYmRaJKC6Cr/PIekNVFu12HBW79pPusLX8KWEf4RBw4HR sPwTvMCR/BwZ0+2Lppan60G5rt/ZxDu40oU7y+RHlfWqevl4hDM84/nhjMvEgzc5 a4Ahe2ERGLwwnC8z3l7v9+pEzSGzDoPcnRGvZcpUpk68wTDtxd5Bdq8CwmNUfL07 VzWcYpH0yvmwjBba9jfn9fAVgnG5rVp558XcYLIII3wEToty3UDtm43wSdj2CGr6 cu+IPAp+n/I5G9SRYBTU9ozJz45ttnEe0hxUtZ4I3MuhzHi1VEDAqTWM/X0LyS41 TB3Y5B2KKpJYbPyZEH4nyTeetR2k7alTFzahCgKqVfOgL0nJx54petjS1K+B1P72 g6lhP9WnQ33W+M8S7J/aGEaDJd1lFyFB2Rdjn2ZZnASH/fR9j0mFmXWvulXtjFNp Sfim3887+Iv4Uzw4VWEe3mM5Ypi/Ba2CmuTjy/pM08Ey8X1Qs5o= =ZQbR -----END PGP SIGNATURE----- Merge tag 'migration-20230206-pull-request' of https://gitlab.com/juan.quintela/qemu into staging Migration Pull request In this try - rebase to latest upstream - same than previous patch - fix compilation on non linux (userfaultfd.h) (me) - query-migrationthreads (jiang) - fix race on reading MultiFDPages_t.block (zhenzhong) - fix flush of zero copy page send reuest (zhenzhong) Please apply. Previous try: It includes: - David Hildenbrand fixes for virtio-men - David Gilbert canary to detect problems - Fix for rdma return values (Fiona) - Peter Xu uffd_open fixes - Peter Xu show right downtime for postcopy - manish.mishra msg fix fixes - my vfio changes. Please apply. # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmPhobYACgkQ9IfvGFhy # 1yMNaA/9EHDPqrI1HL/VkJG4nNOOsQR7RbburXEberZOzvLjnqpjUD3Ls9qV6rx+ # ieHa5T4imYJFk72Wa5vx4r1/dCjtJD2W6jg5+/0nTvYAHrs1U1VRqpuTr0HiXdbJ # ZLLCnW5eDyO3eMaOX0MUkgHgL0FNkc/Lq5ViCTFsMu9O9xMuDLLdAC3cdvslKuOu # X1gKByr9jT817Y9e36amYmRaJKC6Cr/PIekNVFu12HBW79pPusLX8KWEf4RBw4HR # sPwTvMCR/BwZ0+2Lppan60G5rt/ZxDu40oU7y+RHlfWqevl4hDM84/nhjMvEgzc5 # a4Ahe2ERGLwwnC8z3l7v9+pEzSGzDoPcnRGvZcpUpk68wTDtxd5Bdq8CwmNUfL07 # VzWcYpH0yvmwjBba9jfn9fAVgnG5rVp558XcYLIII3wEToty3UDtm43wSdj2CGr6 # cu+IPAp+n/I5G9SRYBTU9ozJz45ttnEe0hxUtZ4I3MuhzHi1VEDAqTWM/X0LyS41 # TB3Y5B2KKpJYbPyZEH4nyTeetR2k7alTFzahCgKqVfOgL0nJx54petjS1K+B1P72 # g6lhP9WnQ33W+M8S7J/aGEaDJd1lFyFB2Rdjn2ZZnASH/fR9j0mFmXWvulXtjFNp # Sfim3887+Iv4Uzw4VWEe3mM5Ypi/Ba2CmuTjy/pM08Ey8X1Qs5o= # =ZQbR # -----END PGP SIGNATURE----- # gpg: Signature made Tue 07 Feb 2023 00:56:22 GMT # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * tag 'migration-20230206-pull-request' of https://gitlab.com/juan.quintela/qemu: (30 commits) migration: save/delete migration thread info migration: Introduce interface query-migrationthreads multifd: Fix flush of zero copy page send request multifd: Fix a race on reading MultiFDPages_t.block migration: check magic value for deciding the mapping of channels io: Add support for MSG_PEEK for socket channel migration/dirtyrate: Show sample pages only in page-sampling mode migration: Perform vmsd structure check during tests migration: Add canary to VMSTATE_END_OF_LIST migration/rdma: fix return value for qio_channel_rdma_{readv,writev} migration: Show downtime during postcopy phase virtio-mem: Proper support for preallocation with migration virtio-mem: Migrate immutable properties early virtio-mem: Fail if a memory backend with "prealloc=on" is specified migration/ram: Factor out check for advised postcopy migration/vmstate: Introduce VMSTATE_WITH_TMP_TEST() and VMSTATE_BITMAP_TEST() migration/savevm: Allow immutable device state to be migrated early (i.e., before RAM) migration/savevm: Prepare vmdesc json writer in qemu_savevm_state_setup() migration/savevm: Move more savevm handling into vmstate_save() migration/ram: Optimize ram_write_tracking_start() for RamDiscardManager ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
b86307ecef
|
@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
|
|||
if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
|
||||
ret = qio_channel_readv_full(s->ioc, &iov, 1,
|
||||
&msgfds, &msgfds_num,
|
||||
NULL);
|
||||
0, NULL);
|
||||
} else {
|
||||
ret = qio_channel_readv_full(s->ioc, &iov, 1,
|
||||
NULL, NULL,
|
||||
NULL);
|
||||
0, NULL);
|
||||
}
|
||||
|
||||
if (msgfds_num) {
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
# Boards:
|
||||
#
|
||||
CONFIG_ISAPC=n
|
||||
CONFIG_I440FX=n
|
||||
CONFIG_Q35=n
|
||||
CONFIG_MICROVM=y
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
# Boards:
|
||||
#
|
||||
CONFIG_ISAPC=y
|
||||
CONFIG_I440FX=y
|
||||
CONFIG_Q35=y
|
||||
CONFIG_MICROVM=y
|
|
@ -482,15 +482,17 @@ An iterative device must provide:
|
|||
- A ``load_setup`` function that initialises the data structures on the
|
||||
destination.
|
||||
|
||||
- A ``save_live_pending`` function that is called repeatedly and must
|
||||
indicate how much more data the iterative data must save. The core
|
||||
migration code will use this to determine when to pause the CPUs
|
||||
and complete the migration.
|
||||
- A ``state_pending_exact`` function that indicates how much more
|
||||
data we must save. The core migration code will use this to
|
||||
determine when to pause the CPUs and complete the migration.
|
||||
|
||||
- A ``save_live_iterate`` function (called after ``save_live_pending``
|
||||
when there is significant data still to be sent). It should send
|
||||
a chunk of data until the point that stream bandwidth limits tell it
|
||||
to stop. Each call generates one section.
|
||||
- A ``state_pending_estimate`` function that indicates how much more
|
||||
data we must save. When the estimated amount is smaller than the
|
||||
threshold, we call ``state_pending_exact``.
|
||||
|
||||
- A ``save_live_iterate`` function should send a chunk of data until
|
||||
the point that stream bandwidth limits tell it to stop. Each call
|
||||
generates one section.
|
||||
|
||||
- A ``save_live_complete_precopy`` function that must transmit the
|
||||
last section for the device containing any remaining data.
|
||||
|
|
|
@ -28,7 +28,7 @@ VFIO implements the device hooks for the iterative approach as follows:
|
|||
* A ``load_setup`` function that sets up the migration region on the
|
||||
destination and sets _RESUMING flag in the VFIO device state.
|
||||
|
||||
* A ``save_live_pending`` function that reads pending_bytes from the vendor
|
||||
* A ``state_pending_exact`` function that reads pending_bytes from the vendor
|
||||
driver, which indicates the amount of data that the vendor driver has yet to
|
||||
save for the VFIO device.
|
||||
|
||||
|
@ -114,7 +114,7 @@ Live migration save path
|
|||
(RUNNING, _SETUP, _RUNNING|_SAVING)
|
||||
|
|
||||
(RUNNING, _ACTIVE, _RUNNING|_SAVING)
|
||||
If device is active, get pending_bytes by .save_live_pending()
|
||||
If device is active, get pending_bytes by .state_pending_exact()
|
||||
If total pending_bytes >= threshold_size, call .save_live_iterate()
|
||||
Data of VFIO device for pre-copy phase is copied
|
||||
Iterate till total pending bytes converge and are less than threshold
|
||||
|
|
|
@ -41,7 +41,9 @@
|
|||
#include "hw/virtio/virtio-pci.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
|
||||
GlobalProperty hw_compat_7_2[] = {};
|
||||
GlobalProperty hw_compat_7_2[] = {
|
||||
{ "virtio-mem", "x-early-migration", "false" },
|
||||
};
|
||||
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
|
||||
|
||||
GlobalProperty hw_compat_7_1[] = {
|
||||
|
|
|
@ -182,10 +182,10 @@ static int cmma_save_setup(QEMUFile *f, void *opaque)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void cmma_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
static void cmma_state_pending(void *opaque,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
S390StAttribState *sas = S390_STATTRIB(opaque);
|
||||
S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
|
||||
|
@ -371,7 +371,8 @@ static SaveVMHandlers savevm_s390_stattrib_handlers = {
|
|||
.save_setup = cmma_save_setup,
|
||||
.save_live_iterate = cmma_save_iterate,
|
||||
.save_live_complete_precopy = cmma_save_complete,
|
||||
.save_live_pending = cmma_save_pending,
|
||||
.state_pending_exact = cmma_state_pending,
|
||||
.state_pending_estimate = cmma_state_pending,
|
||||
.save_cleanup = cmma_save_cleanup,
|
||||
.load_state = cmma_load,
|
||||
.is_active = cmma_active,
|
||||
|
|
|
@ -456,11 +456,10 @@ static void vfio_save_cleanup(void *opaque)
|
|||
trace_vfio_save_cleanup(vbasedev->name);
|
||||
}
|
||||
|
||||
static void vfio_save_pending(QEMUFile *f, void *opaque,
|
||||
uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
static void vfio_state_pending(void *opaque,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
@ -473,7 +472,7 @@ static void vfio_save_pending(QEMUFile *f, void *opaque,
|
|||
|
||||
*res_precopy_only += migration->pending_bytes;
|
||||
|
||||
trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
|
||||
trace_vfio_state_pending(vbasedev->name, *res_precopy_only,
|
||||
*res_postcopy_only, *res_compatible);
|
||||
}
|
||||
|
||||
|
@ -515,9 +514,9 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
|
|||
}
|
||||
|
||||
/*
|
||||
* Reset pending_bytes as .save_live_pending is not called during savevm or
|
||||
* snapshot case, in such case vfio_update_pending() at the start of this
|
||||
* function updates pending_bytes.
|
||||
* Reset pending_bytes as state_pending* are not called during
|
||||
* savevm or snapshot case, in such case vfio_update_pending() at
|
||||
* the start of this function updates pending_bytes.
|
||||
*/
|
||||
migration->pending_bytes = 0;
|
||||
trace_vfio_save_iterate(vbasedev->name, data_size);
|
||||
|
@ -685,7 +684,8 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
|
|||
static SaveVMHandlers savevm_vfio_handlers = {
|
||||
.save_setup = vfio_save_setup,
|
||||
.save_cleanup = vfio_save_cleanup,
|
||||
.save_live_pending = vfio_save_pending,
|
||||
.state_pending_exact = vfio_state_pending,
|
||||
.state_pending_estimate = vfio_state_pending,
|
||||
.save_live_iterate = vfio_save_iterate,
|
||||
.save_live_complete_precopy = vfio_save_complete_precopy,
|
||||
.save_state = vfio_save_state,
|
||||
|
|
|
@ -157,7 +157,7 @@ vfio_save_cleanup(const char *name) " (%s)"
|
|||
vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
|
||||
vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
|
||||
vfio_save_device_config_state(const char *name) " (%s)"
|
||||
vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
|
||||
vfio_state_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
|
||||
vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
|
||||
vfio_save_complete_precopy(const char *name) " (%s)"
|
||||
vfio_load_device_config_state(const char *name) " (%s)"
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
#include CONFIG_DEVICES
|
||||
#include "trace.h"
|
||||
|
||||
static const VMStateDescription vmstate_virtio_mem_device_early;
|
||||
|
||||
/*
|
||||
* We only had legacy x86 guests that did not support
|
||||
* VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
|
||||
|
@ -202,6 +204,30 @@ static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg,
|
||||
virtio_mem_range_cb cb)
|
||||
{
|
||||
unsigned long first_bit, last_bit;
|
||||
uint64_t offset, size;
|
||||
int ret = 0;
|
||||
|
||||
first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size);
|
||||
while (first_bit < vmem->bitmap_size) {
|
||||
offset = first_bit * vmem->block_size;
|
||||
last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
first_bit + 1) - 1;
|
||||
size = (last_bit - first_bit + 1) * vmem->block_size;
|
||||
|
||||
ret = cb(vmem, arg, offset, size);
|
||||
if (ret) {
|
||||
break;
|
||||
}
|
||||
first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
last_bit + 2);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the memory section to cover the intersection with the given range.
|
||||
*
|
||||
|
@ -772,6 +798,12 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
|||
error_setg(errp, "'%s' property specifies an unsupported memdev",
|
||||
VIRTIO_MEM_MEMDEV_PROP);
|
||||
return;
|
||||
} else if (vmem->memdev->prealloc) {
|
||||
error_setg(errp, "'%s' property specifies a memdev with preallocation"
|
||||
" enabled: %s. Instead, specify 'prealloc=on' for the"
|
||||
" virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP,
|
||||
object_get_canonical_path_component(OBJECT(vmem->memdev)));
|
||||
return;
|
||||
}
|
||||
|
||||
if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
|
||||
|
@ -872,6 +904,10 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
|||
|
||||
host_memory_backend_set_mapped(vmem->memdev, true);
|
||||
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
|
||||
if (vmem->early_migration) {
|
||||
vmstate_register(VMSTATE_IF(vmem), VMSTATE_INSTANCE_ID_ANY,
|
||||
&vmstate_virtio_mem_device_early, vmem);
|
||||
}
|
||||
qemu_register_reset(virtio_mem_system_reset, vmem);
|
||||
|
||||
/*
|
||||
|
@ -893,6 +929,10 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
|
|||
*/
|
||||
memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
|
||||
qemu_unregister_reset(virtio_mem_system_reset, vmem);
|
||||
if (vmem->early_migration) {
|
||||
vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early,
|
||||
vmem);
|
||||
}
|
||||
vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
|
||||
host_memory_backend_set_mapped(vmem->memdev, false);
|
||||
virtio_del_queue(vdev, 0);
|
||||
|
@ -922,6 +962,10 @@ static int virtio_mem_post_load(void *opaque, int version_id)
|
|||
RamDiscardListener *rdl;
|
||||
int ret;
|
||||
|
||||
if (vmem->prealloc && !vmem->early_migration) {
|
||||
warn_report("Proper preallocation with migration requires a newer QEMU machine");
|
||||
}
|
||||
|
||||
/*
|
||||
* We started out with all memory discarded and our memory region is mapped
|
||||
* into an address space. Replay, now that we updated the bitmap.
|
||||
|
@ -941,6 +985,64 @@ static int virtio_mem_post_load(void *opaque, int version_id)
|
|||
return virtio_mem_restore_unplugged(vmem);
|
||||
}
|
||||
|
||||
static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
|
||||
int fd = memory_region_get_fd(&vmem->memdev->mr);
|
||||
Error *local_err = NULL;
|
||||
|
||||
qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int virtio_mem_post_load_early(void *opaque, int version_id)
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(opaque);
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
int ret;
|
||||
|
||||
if (!vmem->prealloc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We restored the bitmap and verified that the basic properties
|
||||
* match on source and destination, so we can go ahead and preallocate
|
||||
* memory for all plugged memory blocks, before actual RAM migration starts
|
||||
* touching this memory.
|
||||
*/
|
||||
ret = virtio_mem_for_each_plugged_range(vmem, NULL,
|
||||
virtio_mem_prealloc_range_cb);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is tricky: postcopy wants to start with a clean slate. On
|
||||
* POSTCOPY_INCOMING_ADVISE, postcopy code discards all (ordinarily
|
||||
* preallocated) RAM such that postcopy will work as expected later.
|
||||
*
|
||||
* However, we run after POSTCOPY_INCOMING_ADVISE -- but before actual
|
||||
* RAM migration. So let's discard all memory again. This looks like an
|
||||
* expensive NOP, but actually serves a purpose: we made sure that we
|
||||
* were able to allocate all required backend memory once. We cannot
|
||||
* guarantee that the backend memory we will free will remain free
|
||||
* until we need it during postcopy, but at least we can catch the
|
||||
* obvious setup issues this way.
|
||||
*/
|
||||
if (migration_incoming_postcopy_advised()) {
|
||||
if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef struct VirtIOMEMMigSanityChecks {
|
||||
VirtIOMEM *parent;
|
||||
uint64_t addr;
|
||||
|
@ -1009,18 +1111,54 @@ static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
|
|||
},
|
||||
};
|
||||
|
||||
static bool virtio_mem_vmstate_field_exists(void *opaque, int version_id)
|
||||
{
|
||||
const VirtIOMEM *vmem = VIRTIO_MEM(opaque);
|
||||
|
||||
/* With early migration, these fields were already migrated. */
|
||||
return !vmem->early_migration;
|
||||
}
|
||||
|
||||
static const VMStateDescription vmstate_virtio_mem_device = {
|
||||
.name = "virtio-mem-device",
|
||||
.minimum_version_id = 1,
|
||||
.version_id = 1,
|
||||
.priority = MIG_PRI_VIRTIO_MEM,
|
||||
.post_load = virtio_mem_post_load,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_WITH_TMP_TEST(VirtIOMEM, virtio_mem_vmstate_field_exists,
|
||||
VirtIOMEMMigSanityChecks,
|
||||
vmstate_virtio_mem_sanity_checks),
|
||||
VMSTATE_UINT64(usable_region_size, VirtIOMEM),
|
||||
VMSTATE_UINT64_TEST(size, VirtIOMEM, virtio_mem_vmstate_field_exists),
|
||||
VMSTATE_UINT64(requested_size, VirtIOMEM),
|
||||
VMSTATE_BITMAP_TEST(bitmap, VirtIOMEM, virtio_mem_vmstate_field_exists,
|
||||
0, bitmap_size),
|
||||
VMSTATE_END_OF_LIST()
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Transfer properties that are immutable while migration is active early,
|
||||
* such that we have have this information around before migrating any RAM
|
||||
* content.
|
||||
*
|
||||
* Note that virtio_mem_is_busy() makes sure these properties can no longer
|
||||
* change on the migration source until migration completed.
|
||||
*
|
||||
* With QEMU compat machines, we transmit these properties later, via
|
||||
* vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists().
|
||||
*/
|
||||
static const VMStateDescription vmstate_virtio_mem_device_early = {
|
||||
.name = "virtio-mem-device-early",
|
||||
.minimum_version_id = 1,
|
||||
.version_id = 1,
|
||||
.early_setup = true,
|
||||
.post_load = virtio_mem_post_load_early,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
|
||||
vmstate_virtio_mem_sanity_checks),
|
||||
VMSTATE_UINT64(usable_region_size, VirtIOMEM),
|
||||
VMSTATE_UINT64(size, VirtIOMEM),
|
||||
VMSTATE_UINT64(requested_size, VirtIOMEM),
|
||||
VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
|
||||
VMSTATE_END_OF_LIST()
|
||||
},
|
||||
|
@ -1205,6 +1343,8 @@ static Property virtio_mem_properties[] = {
|
|||
DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
|
||||
unplugged_inaccessible, ON_OFF_AUTO_AUTO),
|
||||
#endif
|
||||
DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
|
||||
early_migration, true),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass,
|
|||
#define VIRTIO_MEM_BLOCK_SIZE_PROP "block-size"
|
||||
#define VIRTIO_MEM_ADDR_PROP "memaddr"
|
||||
#define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible"
|
||||
#define VIRTIO_MEM_EARLY_MIGRATION_PROP "x-early-migration"
|
||||
#define VIRTIO_MEM_PREALLOC_PROP "prealloc"
|
||||
|
||||
struct VirtIOMEM {
|
||||
|
@ -74,6 +75,13 @@ struct VirtIOMEM {
|
|||
/* whether to prealloc memory when plugging new blocks */
|
||||
bool prealloc;
|
||||
|
||||
/*
|
||||
* Whether we migrate properties that are immutable while migration is
|
||||
* active early, before state of other devices and especially, before
|
||||
* migrating any RAM content.
|
||||
*/
|
||||
bool early_migration;
|
||||
|
||||
/* notifiers to notify when "size" changes */
|
||||
NotifierList size_change_notifiers;
|
||||
|
||||
|
|
|
@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
|
|||
|
||||
#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
|
||||
|
||||
#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
|
||||
|
||||
typedef enum QIOChannelFeature QIOChannelFeature;
|
||||
|
||||
enum QIOChannelFeature {
|
||||
|
@ -41,6 +43,7 @@ enum QIOChannelFeature {
|
|||
QIO_CHANNEL_FEATURE_SHUTDOWN,
|
||||
QIO_CHANNEL_FEATURE_LISTEN,
|
||||
QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
|
||||
QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
|
||||
};
|
||||
|
||||
|
||||
|
@ -114,6 +117,7 @@ struct QIOChannelClass {
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp);
|
||||
int (*io_close)(QIOChannel *ioc,
|
||||
Error **errp);
|
||||
|
@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
|
|||
* @niov: the length of the @iov array
|
||||
* @fds: pointer to an array that will received file handles
|
||||
* @nfds: pointer filled with number of elements in @fds on return
|
||||
* @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
|
||||
* @errp: pointer to a NULL-initialized error object
|
||||
*
|
||||
* Read data from the IO channel, storing it in the
|
||||
|
@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp);
|
||||
|
||||
|
||||
|
|
|
@ -67,8 +67,10 @@ bool migration_has_failed(MigrationState *);
|
|||
/* ...and after the device transmission */
|
||||
bool migration_in_postcopy_after_devices(MigrationState *);
|
||||
void migration_global_dump(Monitor *mon);
|
||||
/* True if incomming migration entered POSTCOPY_INCOMING_DISCARD */
|
||||
/* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */
|
||||
bool migration_in_incoming_postcopy(void);
|
||||
/* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */
|
||||
bool migration_incoming_postcopy_advised(void);
|
||||
/* True if background snapshot is active */
|
||||
bool migration_in_bg_snapshot(void);
|
||||
|
||||
|
|
|
@ -46,11 +46,6 @@ typedef struct SaveVMHandlers {
|
|||
|
||||
/* This runs outside the iothread lock! */
|
||||
int (*save_setup)(QEMUFile *f, void *opaque);
|
||||
void (*save_live_pending)(QEMUFile *f, void *opaque,
|
||||
uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
/* Note for save_live_pending:
|
||||
* - res_precopy_only is for data which must be migrated in precopy phase
|
||||
* or in stopped state, in other words - before target vm start
|
||||
|
@ -61,8 +56,16 @@ typedef struct SaveVMHandlers {
|
|||
* Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
|
||||
* whole amount of pending data.
|
||||
*/
|
||||
|
||||
|
||||
/* This estimates the remaining data to transfer */
|
||||
void (*state_pending_estimate)(void *opaque,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
/* This calculate the exact remaining data to transfer */
|
||||
void (*state_pending_exact)(void *opaque,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
LoadStateHandler *load_state;
|
||||
int (*load_setup)(QEMUFile *f, void *opaque);
|
||||
int (*load_cleanup)(void *opaque);
|
||||
|
|
|
@ -147,6 +147,9 @@ enum VMStateFlags {
|
|||
* VMStateField.struct_version_id to tell which version of the
|
||||
* structure we are referencing to use. */
|
||||
VMS_VSTRUCT = 0x8000,
|
||||
|
||||
/* Marker for end of list */
|
||||
VMS_END = 0x10000
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
|
@ -178,7 +181,21 @@ struct VMStateField {
|
|||
|
||||
struct VMStateDescription {
|
||||
const char *name;
|
||||
int unmigratable;
|
||||
bool unmigratable;
|
||||
/*
|
||||
* This VMSD describes something that should be sent during setup phase
|
||||
* of migration. It plays similar role as save_setup() for explicitly
|
||||
* registered vmstate entries, so it can be seen as a way to describe
|
||||
* save_setup() in VMSD structures.
|
||||
*
|
||||
* Note that for now, a SaveStateEntry cannot have a VMSD and
|
||||
* operations (e.g., save_setup()) set at the same time. Consequently,
|
||||
* save_setup() and a VMSD with early_setup set to true are mutually
|
||||
* exclusive. For this reason, also early_setup VMSDs are migrated in a
|
||||
* QEMU_VM_SECTION_FULL section, while save_setup() data is migrated in
|
||||
* a QEMU_VM_SECTION_START section.
|
||||
*/
|
||||
bool early_setup;
|
||||
int version_id;
|
||||
int minimum_version_id;
|
||||
MigrationPriority priority;
|
||||
|
@ -705,8 +722,9 @@ extern const VMStateInfo vmstate_info_qlist;
|
|||
* '_state' type
|
||||
* That the pointer is right at the start of _tmp_type.
|
||||
*/
|
||||
#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) { \
|
||||
#define VMSTATE_WITH_TMP_TEST(_state, _test, _tmp_type, _vmsd) { \
|
||||
.name = "tmp", \
|
||||
.field_exists = (_test), \
|
||||
.size = sizeof(_tmp_type) + \
|
||||
QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \
|
||||
type_check_pointer(_state, \
|
||||
|
@ -715,6 +733,9 @@ extern const VMStateInfo vmstate_info_qlist;
|
|||
.info = &vmstate_info_tmp, \
|
||||
}
|
||||
|
||||
#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \
|
||||
VMSTATE_WITH_TMP_TEST(_state, NULL, _tmp_type, _vmsd)
|
||||
|
||||
#define VMSTATE_UNUSED_BUFFER(_test, _version, _size) { \
|
||||
.name = "unused", \
|
||||
.field_exists = (_test), \
|
||||
|
@ -738,8 +759,9 @@ extern const VMStateInfo vmstate_info_qlist;
|
|||
/* _field_size should be a int32_t field in the _state struct giving the
|
||||
* size of the bitmap _field in bits.
|
||||
*/
|
||||
#define VMSTATE_BITMAP(_field, _state, _version, _field_size) { \
|
||||
#define VMSTATE_BITMAP_TEST(_field, _state, _test, _version, _field_size) { \
|
||||
.name = (stringify(_field)), \
|
||||
.field_exists = (_test), \
|
||||
.version_id = (_version), \
|
||||
.size_offset = vmstate_offset_value(_state, _field_size, int32_t),\
|
||||
.info = &vmstate_info_bitmap, \
|
||||
|
@ -747,6 +769,9 @@ extern const VMStateInfo vmstate_info_qlist;
|
|||
.offset = offsetof(_state, _field), \
|
||||
}
|
||||
|
||||
#define VMSTATE_BITMAP(_field, _state, _version, _field_size) \
|
||||
VMSTATE_BITMAP_TEST(_field, _state, NULL, _version, _field_size)
|
||||
|
||||
/* For migrating a QTAILQ.
|
||||
* Target QTAILQ needs be properly initialized.
|
||||
* _type: type of QTAILQ element
|
||||
|
@ -1161,7 +1186,9 @@ extern const VMStateInfo vmstate_info_qlist;
|
|||
VMSTATE_UNUSED_BUFFER(_test, 0, _size)
|
||||
|
||||
#define VMSTATE_END_OF_LIST() \
|
||||
{}
|
||||
{ \
|
||||
.flags = VMS_END, \
|
||||
}
|
||||
|
||||
int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
|
||||
void *opaque, int version_id);
|
||||
|
|
|
@ -13,10 +13,20 @@
|
|||
#ifndef USERFAULTFD_H
|
||||
#define USERFAULTFD_H
|
||||
|
||||
#ifdef CONFIG_LINUX
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "exec/hwaddr.h"
|
||||
#include <linux/userfaultfd.h>
|
||||
|
||||
/**
|
||||
* uffd_open(): Open an userfaultfd handle for current context.
|
||||
*
|
||||
* @flags: The flags we want to pass in when creating the handle.
|
||||
*
|
||||
* Returns: the uffd handle if >=0, or <0 if error happens.
|
||||
*/
|
||||
int uffd_open(int flags);
|
||||
int uffd_query_features(uint64_t *features);
|
||||
int uffd_create_fd(uint64_t features, bool non_blocking);
|
||||
void uffd_close_fd(int uffd_fd);
|
||||
|
@ -32,4 +42,6 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length);
|
|||
int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count);
|
||||
bool uffd_poll_events(int uffd_fd, int tmo);
|
||||
|
||||
#endif /* CONFIG_LINUX */
|
||||
|
||||
#endif /* USERFAULTFD_H */
|
||||
|
|
|
@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
|
||||
|
|
|
@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
|
||||
|
|
|
@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
|
||||
|
|
|
@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds G_GNUC_UNUSED,
|
||||
size_t *nfds G_GNUC_UNUSED,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
|
||||
|
|
|
@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
|
|||
}
|
||||
#endif
|
||||
|
||||
qio_channel_set_feature(QIO_CHANNEL(ioc),
|
||||
QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
|
|||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
qio_channel_set_feature(QIO_CHANNEL(cioc),
|
||||
QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
|
||||
|
||||
trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
|
||||
return cioc;
|
||||
|
||||
|
@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
|
@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
|||
|
||||
}
|
||||
|
||||
if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
|
||||
sflags |= MSG_PEEK;
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = recvmsg(sioc->fd, &msg, sflags);
|
||||
if (ret < 0) {
|
||||
|
@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
ssize_t done = 0;
|
||||
ssize_t i;
|
||||
int sflags = 0;
|
||||
|
||||
if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
|
||||
sflags |= MSG_PEEK;
|
||||
}
|
||||
|
||||
for (i = 0; i < niov; i++) {
|
||||
ssize_t ret;
|
||||
|
@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
|||
ret = recv(sioc->fd,
|
||||
iov[i].iov_base,
|
||||
iov[i].iov_len,
|
||||
0);
|
||||
sflags);
|
||||
if (ret < 0) {
|
||||
if (errno == EAGAIN) {
|
||||
if (done) {
|
||||
|
|
|
@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
|
||||
|
|
|
@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
|
||||
|
|
16
io/channel.c
16
io/channel.c
|
@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
|
||||
|
@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
|||
return -1;
|
||||
}
|
||||
|
||||
return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
|
||||
if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
|
||||
!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
|
||||
error_setg_errno(errp, EINVAL,
|
||||
"Channel does not support peek read");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
|
|||
while ((nlocal_iov > 0) || local_fds) {
|
||||
ssize_t len;
|
||||
len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
|
||||
local_nfds, errp);
|
||||
local_nfds, 0, errp);
|
||||
if (len == QIO_CHANNEL_ERR_BLOCK) {
|
||||
if (qemu_in_coroutine()) {
|
||||
qio_channel_yield(ioc, G_IO_IN);
|
||||
|
@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
Error **errp)
|
||||
{
|
||||
return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
|
||||
return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
|
|||
Error **errp)
|
||||
{
|
||||
struct iovec iov = { .iov_base = buf, .iov_len = buflen };
|
||||
return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
|
||||
return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -762,11 +762,10 @@ static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
|
||||
uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
static void dirty_bitmap_state_pending(void *opaque,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
DBMSaveState *s = &((DBMState *)opaque)->save;
|
||||
SaveBitmapState *dbms;
|
||||
|
@ -784,7 +783,7 @@ static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
|
|||
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
trace_dirty_bitmap_save_pending(pending, max_size);
|
||||
trace_dirty_bitmap_state_pending(pending);
|
||||
|
||||
*res_postcopy_only += pending;
|
||||
}
|
||||
|
@ -1253,7 +1252,8 @@ static SaveVMHandlers savevm_dirty_bitmap_handlers = {
|
|||
.save_live_complete_postcopy = dirty_bitmap_save_complete,
|
||||
.save_live_complete_precopy = dirty_bitmap_save_complete,
|
||||
.has_postcopy = dirty_bitmap_has_postcopy,
|
||||
.save_live_pending = dirty_bitmap_save_pending,
|
||||
.state_pending_exact = dirty_bitmap_state_pending,
|
||||
.state_pending_estimate = dirty_bitmap_state_pending,
|
||||
.save_live_iterate = dirty_bitmap_save_iterate,
|
||||
.is_active_iterate = dirty_bitmap_is_active_iterate,
|
||||
.load_state = dirty_bitmap_load,
|
||||
|
|
|
@ -863,10 +863,10 @@ static int block_save_complete(QEMUFile *f, void *opaque)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
static void block_state_pending(void *opaque,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
/* Estimate pending number of bytes to send */
|
||||
uint64_t pending;
|
||||
|
@ -885,7 +885,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
|
|||
pending = BLK_MIG_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
trace_migration_block_save_pending(pending);
|
||||
trace_migration_block_state_pending(pending);
|
||||
/* We don't do postcopy */
|
||||
*res_precopy_only += pending;
|
||||
}
|
||||
|
@ -1020,7 +1020,8 @@ static SaveVMHandlers savevm_block_handlers = {
|
|||
.save_setup = block_save_setup,
|
||||
.save_live_iterate = block_save_iterate,
|
||||
.save_live_complete_precopy = block_save_complete,
|
||||
.save_live_pending = block_save_pending,
|
||||
.state_pending_exact = block_state_pending,
|
||||
.state_pending_estimate = block_state_pending,
|
||||
.load_state = block_load,
|
||||
.save_cleanup = block_migration_cleanup,
|
||||
.is_active = block_is_active,
|
||||
|
|
|
@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc);
|
||||
|
|
|
@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s,
|
|||
migrate_fd_connect(s, error);
|
||||
error_free(error);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @migration_channel_read_peek - Peek at migration channel, without
|
||||
* actually removing it from channel buffer.
|
||||
*
|
||||
* @ioc: the channel object
|
||||
* @buf: the memory region to read data into
|
||||
* @buflen: the number of bytes to read in @buf
|
||||
* @errp: pointer to a NULL-initialized error object
|
||||
*
|
||||
* Returns 0 if successful, returns -1 and sets @errp if fails.
|
||||
*/
|
||||
int migration_channel_read_peek(QIOChannel *ioc,
|
||||
const char *buf,
|
||||
const size_t buflen,
|
||||
Error **errp)
|
||||
{
|
||||
ssize_t len = 0;
|
||||
struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
|
||||
|
||||
while (true) {
|
||||
len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL,
|
||||
QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
|
||||
|
||||
if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) {
|
||||
error_setg(errp,
|
||||
"Failed to peek at channel");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (len == buflen) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* 1ms sleep. */
|
||||
if (qemu_in_coroutine()) {
|
||||
qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
|
||||
} else {
|
||||
g_usleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s,
|
|||
QIOChannel *ioc,
|
||||
const char *hostname,
|
||||
Error *error_in);
|
||||
|
||||
int migration_channel_read_peek(QIOChannel *ioc,
|
||||
const char *buf,
|
||||
const size_t buflen,
|
||||
Error **errp);
|
||||
#endif
|
||||
|
|
|
@ -714,8 +714,8 @@ void qmp_calc_dirty_rate(int64_t calc_time,
|
|||
mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
|
||||
}
|
||||
|
||||
if (has_sample_pages && mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
|
||||
error_setg(errp, "either sample-pages or dirty-ring can be specified.");
|
||||
if (has_sample_pages && mode != DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
|
||||
error_setg(errp, "sample-pages is used only in page-sampling mode");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -785,8 +785,10 @@ void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
|
|||
DirtyRateStatus_str(info->status));
|
||||
monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
|
||||
info->start_time);
|
||||
monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
|
||||
info->sample_pages);
|
||||
if (info->mode == DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
|
||||
monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
|
||||
info->sample_pages);
|
||||
}
|
||||
monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
|
||||
info->calc_time);
|
||||
monitor_printf(mon, "Mode: %s\n",
|
||||
|
|
|
@ -26,6 +26,7 @@ softmmu_ss.add(files(
|
|||
'savevm.c',
|
||||
'socket.c',
|
||||
'tls.c',
|
||||
'threadinfo.c',
|
||||
), gnutls)
|
||||
|
||||
softmmu_ss.add(when: rdma, if_true: files('rdma.c'))
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "migration.h"
|
||||
#include "savevm.h"
|
||||
#include "qemu-file.h"
|
||||
#include "channel.h"
|
||||
#include "migration/vmstate.h"
|
||||
#include "block/block.h"
|
||||
#include "qapi/error.h"
|
||||
|
@ -57,6 +58,7 @@
|
|||
#include "net/announce.h"
|
||||
#include "qemu/queue.h"
|
||||
#include "multifd.h"
|
||||
#include "threadinfo.h"
|
||||
#include "qemu/yank.h"
|
||||
#include "sysemu/cpus.h"
|
||||
#include "yank_functions.h"
|
||||
|
@ -664,10 +666,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp)
|
|||
{
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
|
||||
if (multifd_load_setup(errp) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!mis->from_src_file) {
|
||||
mis->from_src_file = f;
|
||||
}
|
||||
|
@ -734,31 +732,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
|
|||
{
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
Error *local_err = NULL;
|
||||
bool start_migration;
|
||||
QEMUFile *f;
|
||||
bool default_channel = true;
|
||||
uint32_t channel_magic = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (!mis->from_src_file) {
|
||||
/* The first connection (multifd may have multiple) */
|
||||
if (migrate_use_multifd() && !migrate_postcopy_ram() &&
|
||||
qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
|
||||
/*
|
||||
* With multiple channels, it is possible that we receive channels
|
||||
* out of order on destination side, causing incorrect mapping of
|
||||
* source channels on destination side. Check channel MAGIC to
|
||||
* decide type of channel. Please note this is best effort, postcopy
|
||||
* preempt channel does not send any magic number so avoid it for
|
||||
* postcopy live migration. Also tls live migration already does
|
||||
* tls handshake while initializing main channel so with tls this
|
||||
* issue is not possible.
|
||||
*/
|
||||
ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
|
||||
sizeof(channel_magic), &local_err);
|
||||
|
||||
if (ret != 0) {
|
||||
error_propagate(errp, local_err);
|
||||
return;
|
||||
}
|
||||
|
||||
default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
|
||||
} else {
|
||||
default_channel = !mis->from_src_file;
|
||||
}
|
||||
|
||||
if (multifd_load_setup(errp) != 0) {
|
||||
error_setg(errp, "Failed to setup multifd channels");
|
||||
return;
|
||||
}
|
||||
|
||||
if (default_channel) {
|
||||
f = qemu_file_new_input(ioc);
|
||||
|
||||
if (!migration_incoming_setup(f, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Common migration only needs one channel, so we can start
|
||||
* right now. Some features need more than one channel, we wait.
|
||||
*/
|
||||
start_migration = !migration_needs_multiple_sockets();
|
||||
} else {
|
||||
/* Multiple connections */
|
||||
assert(migration_needs_multiple_sockets());
|
||||
if (migrate_use_multifd()) {
|
||||
start_migration = multifd_recv_new_channel(ioc, &local_err);
|
||||
multifd_recv_new_channel(ioc, &local_err);
|
||||
} else {
|
||||
assert(migrate_postcopy_preempt());
|
||||
f = qemu_file_new_input(ioc);
|
||||
start_migration = postcopy_preempt_new_channel(mis, f);
|
||||
postcopy_preempt_new_channel(mis, f);
|
||||
}
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
|
@ -766,7 +789,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
|
|||
}
|
||||
}
|
||||
|
||||
if (start_migration) {
|
||||
if (migration_has_all_channels()) {
|
||||
/* If it's a recovery, we're done */
|
||||
if (postcopy_try_recover()) {
|
||||
return;
|
||||
|
@ -1051,20 +1074,30 @@ bool migration_is_running(int state)
|
|||
}
|
||||
}
|
||||
|
||||
static bool migrate_show_downtime(MigrationState *s)
|
||||
{
|
||||
return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
|
||||
}
|
||||
|
||||
static void populate_time_info(MigrationInfo *info, MigrationState *s)
|
||||
{
|
||||
info->has_status = true;
|
||||
info->has_setup_time = true;
|
||||
info->setup_time = s->setup_time;
|
||||
|
||||
if (s->state == MIGRATION_STATUS_COMPLETED) {
|
||||
info->has_total_time = true;
|
||||
info->total_time = s->total_time;
|
||||
info->has_downtime = true;
|
||||
info->downtime = s->downtime;
|
||||
} else {
|
||||
info->has_total_time = true;
|
||||
info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
|
||||
s->start_time;
|
||||
}
|
||||
|
||||
if (migrate_show_downtime(s)) {
|
||||
info->has_downtime = true;
|
||||
info->downtime = s->downtime;
|
||||
} else {
|
||||
info->has_expected_downtime = true;
|
||||
info->expected_downtime = s->expected_downtime;
|
||||
}
|
||||
|
@ -1933,6 +1966,8 @@ static void migrate_fd_cleanup(MigrationState *s)
|
|||
|
||||
g_free(s->hostname);
|
||||
s->hostname = NULL;
|
||||
json_writer_free(s->vmdesc);
|
||||
s->vmdesc = NULL;
|
||||
|
||||
qemu_savevm_state_cleanup();
|
||||
|
||||
|
@ -2124,6 +2159,13 @@ bool migration_in_incoming_postcopy(void)
|
|||
return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
|
||||
}
|
||||
|
||||
bool migration_incoming_postcopy_advised(void)
|
||||
{
|
||||
PostcopyState ps = postcopy_state_get();
|
||||
|
||||
return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
|
||||
}
|
||||
|
||||
bool migration_in_bg_snapshot(void)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
|
@ -3778,33 +3820,39 @@ typedef enum {
|
|||
*/
|
||||
static MigIterateState migration_iteration_run(MigrationState *s)
|
||||
{
|
||||
uint64_t pending_size, pend_pre, pend_compat, pend_post;
|
||||
uint64_t pend_pre, pend_compat, pend_post;
|
||||
bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
|
||||
|
||||
qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
|
||||
&pend_compat, &pend_post);
|
||||
pending_size = pend_pre + pend_compat + pend_post;
|
||||
qemu_savevm_state_pending_estimate(&pend_pre, &pend_compat, &pend_post);
|
||||
uint64_t pending_size = pend_pre + pend_compat + pend_post;
|
||||
|
||||
trace_migrate_pending(pending_size, s->threshold_size,
|
||||
pend_pre, pend_compat, pend_post);
|
||||
trace_migrate_pending_estimate(pending_size,
|
||||
pend_pre, pend_compat, pend_post);
|
||||
|
||||
if (pending_size && pending_size >= s->threshold_size) {
|
||||
/* Still a significant amount to transfer */
|
||||
if (!in_postcopy && pend_pre <= s->threshold_size &&
|
||||
qatomic_read(&s->start_postcopy)) {
|
||||
if (postcopy_start(s)) {
|
||||
error_report("%s: postcopy failed to start", __func__);
|
||||
}
|
||||
return MIG_ITERATE_SKIP;
|
||||
}
|
||||
/* Just another iteration step */
|
||||
qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
|
||||
} else {
|
||||
if (pend_pre + pend_compat <= s->threshold_size) {
|
||||
qemu_savevm_state_pending_exact(&pend_pre, &pend_compat, &pend_post);
|
||||
pending_size = pend_pre + pend_compat + pend_post;
|
||||
trace_migrate_pending_exact(pending_size,
|
||||
pend_pre, pend_compat, pend_post);
|
||||
}
|
||||
|
||||
if (!pending_size || pending_size < s->threshold_size) {
|
||||
trace_migration_thread_low_pending(pending_size);
|
||||
migration_completion(s);
|
||||
return MIG_ITERATE_BREAK;
|
||||
}
|
||||
|
||||
/* Still a significant amount to transfer */
|
||||
if (!in_postcopy && pend_pre <= s->threshold_size &&
|
||||
qatomic_read(&s->start_postcopy)) {
|
||||
if (postcopy_start(s)) {
|
||||
error_report("%s: postcopy failed to start", __func__);
|
||||
}
|
||||
return MIG_ITERATE_SKIP;
|
||||
}
|
||||
|
||||
/* Just another iteration step */
|
||||
qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
|
||||
return MIG_ITERATE_RESUME;
|
||||
}
|
||||
|
||||
|
@ -3981,10 +4029,13 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
|
|||
static void *migration_thread(void *opaque)
|
||||
{
|
||||
MigrationState *s = opaque;
|
||||
MigrationThread *thread = NULL;
|
||||
int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
||||
MigThrError thr_error;
|
||||
bool urgent = false;
|
||||
|
||||
thread = MigrationThreadAdd("live_migration", qemu_get_thread_id());
|
||||
|
||||
rcu_register_thread();
|
||||
|
||||
object_ref(OBJECT(s));
|
||||
|
@ -4061,6 +4112,7 @@ static void *migration_thread(void *opaque)
|
|||
migration_iteration_finish(s);
|
||||
object_unref(OBJECT(s));
|
||||
rcu_unregister_thread();
|
||||
MigrationThreadDel(thread);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "exec/cpu-common.h"
|
||||
#include "hw/qdev-core.h"
|
||||
#include "qapi/qapi-types-migration.h"
|
||||
#include "qapi/qmp/json-writer.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/coroutine_int.h"
|
||||
#include "io/channel.h"
|
||||
|
@ -366,6 +367,9 @@ struct MigrationState {
|
|||
* This save hostname when out-going migration starts
|
||||
*/
|
||||
char *hostname;
|
||||
|
||||
/* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */
|
||||
JSONWriter *vmdesc;
|
||||
};
|
||||
|
||||
void migrate_set_state(int *state, int old_state, int new_state);
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "qemu-file.h"
|
||||
#include "trace.h"
|
||||
#include "multifd.h"
|
||||
#include "threadinfo.h"
|
||||
|
||||
#include "qemu/yank.h"
|
||||
#include "io/channel-socket.h"
|
||||
|
@ -442,6 +443,7 @@ static int multifd_send_pages(QEMUFile *f)
|
|||
int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
|
||||
{
|
||||
MultiFDPages_t *pages = multifd_send_state->pages;
|
||||
bool changed = false;
|
||||
|
||||
if (!pages->block) {
|
||||
pages->block = block;
|
||||
|
@ -454,14 +456,16 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
|
|||
if (pages->num < pages->allocated) {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (multifd_send_pages(f) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pages->block != block) {
|
||||
return multifd_queue_page(f, block, offset);
|
||||
if (changed) {
|
||||
return multifd_queue_page(f, block, offset);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -627,16 +631,16 @@ int multifd_send_sync_main(QEMUFile *f)
|
|||
stat64_add(&ram_atomic_counters.transferred, p->packet_len);
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
qemu_sem_post(&p->sem);
|
||||
|
||||
if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < migrate_multifd_channels(); i++) {
|
||||
MultiFDSendParams *p = &multifd_send_state->params[i];
|
||||
|
||||
trace_multifd_send_sync_main_wait(p->id);
|
||||
qemu_sem_wait(&p->sem_sync);
|
||||
|
||||
if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
trace_multifd_send_sync_main(multifd_send_state->packet_num);
|
||||
|
||||
|
@ -646,10 +650,13 @@ int multifd_send_sync_main(QEMUFile *f)
|
|||
static void *multifd_send_thread(void *opaque)
|
||||
{
|
||||
MultiFDSendParams *p = opaque;
|
||||
MigrationThread *thread = NULL;
|
||||
Error *local_err = NULL;
|
||||
int ret = 0;
|
||||
bool use_zero_copy_send = migrate_use_zero_copy_send();
|
||||
|
||||
thread = MigrationThreadAdd(p->name, qemu_get_thread_id());
|
||||
|
||||
trace_multifd_send_thread_start(p->id);
|
||||
rcu_register_thread();
|
||||
|
||||
|
@ -759,6 +766,7 @@ out:
|
|||
qemu_mutex_unlock(&p->mutex);
|
||||
|
||||
rcu_unregister_thread();
|
||||
MigrationThreadDel(thread);
|
||||
trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
|
||||
|
||||
return NULL;
|
||||
|
@ -1164,9 +1172,14 @@ int multifd_load_setup(Error **errp)
|
|||
uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
|
||||
uint8_t i;
|
||||
|
||||
if (!migrate_use_multifd()) {
|
||||
/*
|
||||
* Return successfully if multiFD recv state is already initialised
|
||||
* or multiFD is not enabled.
|
||||
*/
|
||||
if (multifd_recv_state || !migrate_use_multifd()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!migrate_multi_channels_is_allowed()) {
|
||||
error_setg(errp, "multifd is not supported by current protocol");
|
||||
return -1;
|
||||
|
@ -1227,11 +1240,9 @@ bool multifd_recv_all_channels_created(void)
|
|||
|
||||
/*
|
||||
* Try to receive all multifd channels to get ready for the migration.
|
||||
* - Return true and do not set @errp when correctly receiving all channels;
|
||||
* - Return false and do not set @errp when correctly receiving the current one;
|
||||
* - Return false and set @errp when failing to receive the current channel.
|
||||
* Sets @errp when failing to receive the current channel.
|
||||
*/
|
||||
bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
||||
void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
||||
{
|
||||
MultiFDRecvParams *p;
|
||||
Error *local_err = NULL;
|
||||
|
@ -1244,7 +1255,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
|||
"failed to receive packet"
|
||||
" via multifd channel %d: ",
|
||||
qatomic_read(&multifd_recv_state->count));
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
trace_multifd_recv_new_channel(id);
|
||||
|
||||
|
@ -1254,7 +1265,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
|||
id);
|
||||
multifd_recv_terminate_threads(local_err);
|
||||
error_propagate(errp, local_err);
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
p->c = ioc;
|
||||
object_ref(OBJECT(ioc));
|
||||
|
@ -1265,6 +1276,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
|||
qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
qatomic_inc(&multifd_recv_state->count);
|
||||
return qatomic_read(&multifd_recv_state->count) ==
|
||||
migrate_multifd_channels();
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -18,7 +18,7 @@ void multifd_save_cleanup(void);
|
|||
int multifd_load_setup(Error **errp);
|
||||
int multifd_load_cleanup(Error **errp);
|
||||
bool multifd_recv_all_channels_created(void);
|
||||
bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
||||
void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
||||
void multifd_recv_sync_main(void);
|
||||
int multifd_send_sync_main(QEMUFile *f);
|
||||
int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "qemu-file.h"
|
||||
#include "yank_functions.h"
|
||||
#include "tls.h"
|
||||
#include "qemu/userfaultfd.h"
|
||||
|
||||
/* Arbitrary limit on size of each discard command,
|
||||
* keeps them around ~200 bytes
|
||||
|
@ -226,11 +227,9 @@ static bool receive_ufd_features(uint64_t *features)
|
|||
int ufd;
|
||||
bool ret = true;
|
||||
|
||||
/* if we are here __NR_userfaultfd should exists */
|
||||
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
|
||||
ufd = uffd_open(O_CLOEXEC);
|
||||
if (ufd == -1) {
|
||||
error_report("%s: syscall __NR_userfaultfd failed: %s", __func__,
|
||||
strerror(errno));
|
||||
error_report("%s: uffd_open() failed: %s", __func__, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -375,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
|
|||
goto out;
|
||||
}
|
||||
|
||||
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
|
||||
ufd = uffd_open(O_CLOEXEC);
|
||||
if (ufd == -1) {
|
||||
error_report("%s: userfaultfd not available: %s", __func__,
|
||||
strerror(errno));
|
||||
|
@ -1160,7 +1159,7 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis)
|
|||
int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
|
||||
{
|
||||
/* Open the fd for the kernel to give us userfaults */
|
||||
mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||
mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK);
|
||||
if (mis->userfault_fd == -1) {
|
||||
error_report("%s: Failed to open userfault fd: %s", __func__,
|
||||
strerror(errno));
|
||||
|
@ -1539,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
|
|||
}
|
||||
}
|
||||
|
||||
bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
|
||||
void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
|
||||
{
|
||||
/*
|
||||
* The new loading channel has its own threads, so it needs to be
|
||||
|
@ -1548,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
|
|||
qemu_file_set_blocking(file, true);
|
||||
mis->postcopy_qemufile_dst = file;
|
||||
trace_postcopy_preempt_new_channel();
|
||||
|
||||
/* Start the migration immediately */
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -190,7 +190,7 @@ enum PostcopyChannels {
|
|||
RAM_CHANNEL_MAX,
|
||||
};
|
||||
|
||||
bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
|
||||
void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
|
||||
int postcopy_preempt_setup(MigrationState *s, Error **errp);
|
||||
int postcopy_preempt_wait_channel(MigrationState *s);
|
||||
|
||||
|
|
120
migration/ram.c
120
migration/ram.c
|
@ -1774,13 +1774,15 @@ out:
|
|||
static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
|
||||
ram_addr_t size)
|
||||
{
|
||||
const ram_addr_t end = offset + size;
|
||||
|
||||
/*
|
||||
* We read one byte of each page; this will preallocate page tables if
|
||||
* required and populate the shared zeropage on MAP_PRIVATE anonymous memory
|
||||
* where no page was populated yet. This might require adaption when
|
||||
* supporting other mappings, like shmem.
|
||||
*/
|
||||
for (; offset < size; offset += block->page_size) {
|
||||
for (; offset < end; offset += block->page_size) {
|
||||
char tmp = *((char *)block->host + offset);
|
||||
|
||||
/* Don't optimize the read out */
|
||||
|
@ -1863,6 +1865,39 @@ void ram_write_tracking_prepare(void)
|
|||
}
|
||||
}
|
||||
|
||||
static inline int uffd_protect_section(MemoryRegionSection *section,
|
||||
void *opaque)
|
||||
{
|
||||
const hwaddr size = int128_get64(section->size);
|
||||
const hwaddr offset = section->offset_within_region;
|
||||
RAMBlock *rb = section->mr->ram_block;
|
||||
int uffd_fd = (uintptr_t)opaque;
|
||||
|
||||
return uffd_change_protection(uffd_fd, rb->host + offset, size, true,
|
||||
false);
|
||||
}
|
||||
|
||||
static int ram_block_uffd_protect(RAMBlock *rb, int uffd_fd)
|
||||
{
|
||||
assert(rb->flags & RAM_UF_WRITEPROTECT);
|
||||
|
||||
/* See ram_block_populate_read() */
|
||||
if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
|
||||
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
|
||||
MemoryRegionSection section = {
|
||||
.mr = rb->mr,
|
||||
.offset_within_region = 0,
|
||||
.size = rb->mr->size,
|
||||
};
|
||||
|
||||
return ram_discard_manager_replay_populated(rdm, §ion,
|
||||
uffd_protect_section,
|
||||
(void *)(uintptr_t)uffd_fd);
|
||||
}
|
||||
return uffd_change_protection(uffd_fd, rb->host,
|
||||
rb->used_length, true, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* ram_write_tracking_start: start UFFD-WP memory tracking
|
||||
*
|
||||
|
@ -1894,14 +1929,14 @@ int ram_write_tracking_start(void)
|
|||
block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
|
||||
goto fail;
|
||||
}
|
||||
/* Apply UFFD write protection to the block memory range */
|
||||
if (uffd_change_protection(rs->uffdio_fd, block->host,
|
||||
block->max_length, true, false)) {
|
||||
goto fail;
|
||||
}
|
||||
block->flags |= RAM_UF_WRITEPROTECT;
|
||||
memory_region_ref(block->mr);
|
||||
|
||||
/* Apply UFFD write protection to the block memory range */
|
||||
if (ram_block_uffd_protect(block, uffd_fd)) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
|
||||
block->host, block->max_length);
|
||||
}
|
||||
|
@ -1915,12 +1950,6 @@ fail:
|
|||
if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* In case some memory block failed to be write-protected
|
||||
* remove protection and unregister all succeeded RAM blocks
|
||||
*/
|
||||
uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
|
||||
false, false);
|
||||
uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
|
||||
/* Cleanup flags and remove reference */
|
||||
block->flags &= ~RAM_UF_WRITEPROTECT;
|
||||
|
@ -1946,9 +1975,6 @@ void ram_write_tracking_stop(void)
|
|||
if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
|
||||
continue;
|
||||
}
|
||||
/* Remove protection and unregister all affected RAM blocks */
|
||||
uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
|
||||
false, false);
|
||||
uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
|
||||
|
||||
trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
|
||||
|
@ -2319,8 +2345,25 @@ static void pss_host_page_prepare(PageSearchStatus *pss)
|
|||
size_t guest_pfns = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
|
||||
|
||||
pss->host_page_sending = true;
|
||||
pss->host_page_start = ROUND_DOWN(pss->page, guest_pfns);
|
||||
pss->host_page_end = ROUND_UP(pss->page + 1, guest_pfns);
|
||||
if (guest_pfns <= 1) {
|
||||
/*
|
||||
* This covers both when guest psize == host psize, or when guest
|
||||
* has larger psize than the host (guest_pfns==0).
|
||||
*
|
||||
* For the latter, we always send one whole guest page per
|
||||
* iteration of the host page (example: an Alpha VM on x86 host
|
||||
* will have guest psize 8K while host psize 4K).
|
||||
*/
|
||||
pss->host_page_start = pss->page;
|
||||
pss->host_page_end = pss->page + 1;
|
||||
} else {
|
||||
/*
|
||||
* The host page spans over multiple guest pages, we send them
|
||||
* within the same host page iteration.
|
||||
*/
|
||||
pss->host_page_start = ROUND_DOWN(pss->page, guest_pfns);
|
||||
pss->host_page_end = ROUND_UP(pss->page + 1, guest_pfns);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3392,19 +3435,35 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
static void ram_state_pending_estimate(void *opaque,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
RAMState **temp = opaque;
|
||||
RAMState *rs = *temp;
|
||||
uint64_t remaining_size;
|
||||
|
||||
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
|
||||
uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
|
||||
|
||||
if (!migration_in_postcopy() &&
|
||||
remaining_size < max_size) {
|
||||
if (migrate_postcopy_ram()) {
|
||||
/* We can do postcopy, and all the data is postcopiable */
|
||||
*res_postcopy_only += remaining_size;
|
||||
} else {
|
||||
*res_precopy_only += remaining_size;
|
||||
}
|
||||
}
|
||||
|
||||
static void ram_state_pending_exact(void *opaque,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
RAMState **temp = opaque;
|
||||
RAMState *rs = *temp;
|
||||
|
||||
uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
|
||||
|
||||
if (!migration_in_postcopy()) {
|
||||
qemu_mutex_lock_iothread();
|
||||
WITH_RCU_READ_LOCK_GUARD() {
|
||||
migration_bitmap_sync_precopy(rs);
|
||||
|
@ -4091,12 +4150,6 @@ int ram_load_postcopy(QEMUFile *f, int channel)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static bool postcopy_is_advised(void)
|
||||
{
|
||||
PostcopyState ps = postcopy_state_get();
|
||||
return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
|
||||
}
|
||||
|
||||
static bool postcopy_is_running(void)
|
||||
{
|
||||
PostcopyState ps = postcopy_state_get();
|
||||
|
@ -4167,7 +4220,7 @@ static int ram_load_precopy(QEMUFile *f)
|
|||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
|
||||
/* ADVISE is earlier, it shows the source has the postcopy capability on */
|
||||
bool postcopy_advised = postcopy_is_advised();
|
||||
bool postcopy_advised = migration_incoming_postcopy_advised();
|
||||
if (!migrate_use_compression()) {
|
||||
invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
|
||||
}
|
||||
|
@ -4560,7 +4613,8 @@ static SaveVMHandlers savevm_ram_handlers = {
|
|||
.save_live_complete_postcopy = ram_save_complete,
|
||||
.save_live_complete_precopy = ram_save_complete,
|
||||
.has_postcopy = ram_has_postcopy,
|
||||
.save_live_pending = ram_save_pending,
|
||||
.state_pending_exact = ram_state_pending_exact,
|
||||
.state_pending_estimate = ram_state_pending_estimate,
|
||||
.load_state = ram_load,
|
||||
.save_cleanup = ram_save_cleanup,
|
||||
.load_setup = ram_load_setup,
|
||||
|
|
|
@ -2785,7 +2785,8 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
|
|||
rdma = qatomic_rcu_read(&rioc->rdmaout);
|
||||
|
||||
if (!rdma) {
|
||||
return -EIO;
|
||||
error_setg(errp, "RDMA control channel output is not set");
|
||||
return -1;
|
||||
}
|
||||
|
||||
CHECK_ERROR_STATE();
|
||||
|
@ -2797,7 +2798,8 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
|
|||
ret = qemu_rdma_write_flush(f, rdma);
|
||||
if (ret < 0) {
|
||||
rdma->error_state = ret;
|
||||
return ret;
|
||||
error_setg(errp, "qemu_rdma_write_flush returned %d", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < niov; i++) {
|
||||
|
@ -2816,7 +2818,8 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
|
|||
|
||||
if (ret < 0) {
|
||||
rdma->error_state = ret;
|
||||
return ret;
|
||||
error_setg(errp, "qemu_rdma_exchange_send returned %d", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
data += len;
|
||||
|
@ -2854,6 +2857,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
|
|||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
|
||||
|
@ -2867,7 +2871,8 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
|
|||
rdma = qatomic_rcu_read(&rioc->rdmain);
|
||||
|
||||
if (!rdma) {
|
||||
return -EIO;
|
||||
error_setg(errp, "RDMA control channel input is not set");
|
||||
return -1;
|
||||
}
|
||||
|
||||
CHECK_ERROR_STATE();
|
||||
|
@ -2903,7 +2908,8 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
|
|||
|
||||
if (ret < 0) {
|
||||
rdma->error_state = ret;
|
||||
return ret;
|
||||
error_setg(errp, "qemu_rdma_exchange_recv returned %d", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -42,7 +42,6 @@
|
|||
#include "postcopy-ram.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qapi-commands-migration.h"
|
||||
#include "qapi/qmp/json-writer.h"
|
||||
#include "qapi/clone-visitor.h"
|
||||
#include "qapi/qapi-builtin-visit.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
|
@ -67,6 +66,7 @@
|
|||
#include "net/announce.h"
|
||||
#include "qemu/yank.h"
|
||||
#include "yank_functions.h"
|
||||
#include "sysemu/qtest.h"
|
||||
|
||||
const unsigned int postcopy_ram_discard_version;
|
||||
|
||||
|
@ -586,6 +586,7 @@ static void dump_vmstate_vmsd(FILE *out_file,
|
|||
field++;
|
||||
first = false;
|
||||
}
|
||||
assert(field->flags == VMS_END);
|
||||
fprintf(out_file, "\n%*s]", indent, "");
|
||||
}
|
||||
if (vmsd->subsections != NULL) {
|
||||
|
@ -804,6 +805,42 @@ void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform some basic checks on vmsd's at registration
|
||||
* time.
|
||||
*/
|
||||
static void vmstate_check(const VMStateDescription *vmsd)
|
||||
{
|
||||
const VMStateField *field = vmsd->fields;
|
||||
const VMStateDescription **subsection = vmsd->subsections;
|
||||
|
||||
if (field) {
|
||||
while (field->name) {
|
||||
if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
|
||||
/* Recurse to sub structures */
|
||||
vmstate_check(field->vmsd);
|
||||
}
|
||||
/* Carry on */
|
||||
field++;
|
||||
}
|
||||
/* Check for the end of field list canary */
|
||||
if (field->flags != VMS_END) {
|
||||
error_report("VMSTATE not ending with VMS_END: %s", vmsd->name);
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
while (subsection && *subsection) {
|
||||
/*
|
||||
* The name of a subsection should start with the name of the
|
||||
* current object.
|
||||
*/
|
||||
assert(!strncmp(vmsd->name, (*subsection)->name, strlen(vmsd->name)));
|
||||
vmstate_check(*subsection);
|
||||
subsection++;
|
||||
}
|
||||
}
|
||||
|
||||
int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
|
||||
const VMStateDescription *vmsd,
|
||||
void *opaque, int alias_id,
|
||||
|
@ -849,6 +886,11 @@ int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
|
|||
} else {
|
||||
se->instance_id = instance_id;
|
||||
}
|
||||
|
||||
/* Perform a recursive sanity check during the test runs */
|
||||
if (qtest_enabled()) {
|
||||
vmstate_check(vmsd);
|
||||
}
|
||||
assert(!se->compat || se->instance_id == 0);
|
||||
savevm_state_handler_insert(se);
|
||||
return 0;
|
||||
|
@ -898,17 +940,6 @@ static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
|
|||
}
|
||||
}
|
||||
|
||||
static int vmstate_save(QEMUFile *f, SaveStateEntry *se,
|
||||
JSONWriter *vmdesc)
|
||||
{
|
||||
trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
|
||||
if (!se->vmsd) {
|
||||
vmstate_save_old_style(f, se, vmdesc);
|
||||
return 0;
|
||||
}
|
||||
return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
|
||||
*/
|
||||
|
@ -942,6 +973,43 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
|
|||
}
|
||||
}
|
||||
|
||||
static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
|
||||
return 0;
|
||||
}
|
||||
if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
|
||||
trace_savevm_section_skip(se->idstr, se->section_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
trace_savevm_section_start(se->idstr, se->section_id);
|
||||
save_section_header(f, se, QEMU_VM_SECTION_FULL);
|
||||
if (vmdesc) {
|
||||
json_writer_start_object(vmdesc, NULL);
|
||||
json_writer_str(vmdesc, "name", se->idstr);
|
||||
json_writer_int64(vmdesc, "instance_id", se->instance_id);
|
||||
}
|
||||
|
||||
trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
|
||||
if (!se->vmsd) {
|
||||
vmstate_save_old_style(f, se, vmdesc);
|
||||
} else {
|
||||
ret = vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
trace_savevm_section_end(se->idstr, se->section_id, 0);
|
||||
save_section_footer(f, se);
|
||||
if (vmdesc) {
|
||||
json_writer_end_object(vmdesc);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/**
|
||||
* qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
|
||||
* command and associated data.
|
||||
|
@ -1164,12 +1232,27 @@ bool qemu_savevm_state_guest_unplug_pending(void)
|
|||
|
||||
void qemu_savevm_state_setup(QEMUFile *f)
|
||||
{
|
||||
MigrationState *ms = migrate_get_current();
|
||||
SaveStateEntry *se;
|
||||
Error *local_err = NULL;
|
||||
int ret;
|
||||
|
||||
ms->vmdesc = json_writer_new(false);
|
||||
json_writer_start_object(ms->vmdesc, NULL);
|
||||
json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
|
||||
json_writer_start_array(ms->vmdesc, "devices");
|
||||
|
||||
trace_savevm_state_setup();
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
if (se->vmsd && se->vmsd->early_setup) {
|
||||
ret = vmstate_save(f, se, ms->vmdesc);
|
||||
if (ret) {
|
||||
qemu_file_set_error(f, ret);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!se->ops || !se->ops->save_setup) {
|
||||
continue;
|
||||
}
|
||||
|
@ -1365,41 +1448,23 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
|
|||
bool in_postcopy,
|
||||
bool inactivate_disks)
|
||||
{
|
||||
g_autoptr(JSONWriter) vmdesc = NULL;
|
||||
MigrationState *ms = migrate_get_current();
|
||||
JSONWriter *vmdesc = ms->vmdesc;
|
||||
int vmdesc_len;
|
||||
SaveStateEntry *se;
|
||||
int ret;
|
||||
|
||||
vmdesc = json_writer_new(false);
|
||||
json_writer_start_object(vmdesc, NULL);
|
||||
json_writer_int64(vmdesc, "page_size", qemu_target_page_size());
|
||||
json_writer_start_array(vmdesc, "devices");
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
|
||||
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
|
||||
continue;
|
||||
}
|
||||
if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
|
||||
trace_savevm_section_skip(se->idstr, se->section_id);
|
||||
if (se->vmsd && se->vmsd->early_setup) {
|
||||
/* Already saved during qemu_savevm_state_setup(). */
|
||||
continue;
|
||||
}
|
||||
|
||||
trace_savevm_section_start(se->idstr, se->section_id);
|
||||
|
||||
json_writer_start_object(vmdesc, NULL);
|
||||
json_writer_str(vmdesc, "name", se->idstr);
|
||||
json_writer_int64(vmdesc, "instance_id", se->instance_id);
|
||||
|
||||
save_section_header(f, se, QEMU_VM_SECTION_FULL);
|
||||
ret = vmstate_save(f, se, vmdesc);
|
||||
if (ret) {
|
||||
qemu_file_set_error(f, ret);
|
||||
return ret;
|
||||
}
|
||||
trace_savevm_section_end(se->idstr, se->section_id, 0);
|
||||
save_section_footer(f, se);
|
||||
|
||||
json_writer_end_object(vmdesc);
|
||||
}
|
||||
|
||||
if (inactivate_disks) {
|
||||
|
@ -1428,6 +1493,10 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
|
|||
qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
|
||||
}
|
||||
|
||||
/* Free it now to detect any inconsistencies. */
|
||||
json_writer_free(vmdesc);
|
||||
ms->vmdesc = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1472,10 +1541,9 @@ flush:
|
|||
* the result is split into the amount for units that can and
|
||||
* for units that can't do postcopy.
|
||||
*/
|
||||
void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
SaveStateEntry *se;
|
||||
|
||||
|
@ -1483,9 +1551,8 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
|
|||
*res_compatible = 0;
|
||||
*res_postcopy_only = 0;
|
||||
|
||||
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
if (!se->ops || !se->ops->save_live_pending) {
|
||||
if (!se->ops || !se->ops->state_pending_exact) {
|
||||
continue;
|
||||
}
|
||||
if (se->ops->is_active) {
|
||||
|
@ -1493,9 +1560,34 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
|
|||
continue;
|
||||
}
|
||||
}
|
||||
se->ops->save_live_pending(f, se->opaque, threshold_size,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
se->ops->state_pending_exact(se->opaque,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
SaveStateEntry *se;
|
||||
|
||||
*res_precopy_only = 0;
|
||||
*res_compatible = 0;
|
||||
*res_postcopy_only = 0;
|
||||
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
if (!se->ops || !se->ops->state_pending_estimate) {
|
||||
continue;
|
||||
}
|
||||
if (se->ops->is_active) {
|
||||
if (!se->ops->is_active(se->opaque)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
se->ops->state_pending_estimate(se->opaque,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1595,21 +1687,10 @@ int qemu_save_device_state(QEMUFile *f)
|
|||
if (se->is_ram) {
|
||||
continue;
|
||||
}
|
||||
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
|
||||
continue;
|
||||
}
|
||||
if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
save_section_header(f, se, QEMU_VM_SECTION_FULL);
|
||||
|
||||
ret = vmstate_save(f, se, NULL);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
save_section_footer(f, se);
|
||||
}
|
||||
|
||||
qemu_put_byte(f, QEMU_VM_EOF);
|
||||
|
|
|
@ -40,10 +40,12 @@ void qemu_savevm_state_cleanup(void);
|
|||
void qemu_savevm_state_complete_postcopy(QEMUFile *f);
|
||||
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
|
||||
bool inactivate_disks);
|
||||
void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
|
||||
void qemu_savevm_send_open_return_path(QEMUFile *f);
|
||||
int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Migration Threads info
|
||||
*
|
||||
* Copyright (c) 2022 HUAWEI TECHNOLOGIES CO., LTD.
|
||||
*
|
||||
* Authors:
|
||||
* Jiang Jiacheng <jiangjiacheng@huawei.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "threadinfo.h"
|
||||
|
||||
static QLIST_HEAD(, MigrationThread) migration_threads;
|
||||
|
||||
MigrationThread *MigrationThreadAdd(const char *name, int thread_id)
|
||||
{
|
||||
MigrationThread *thread = g_new0(MigrationThread, 1);
|
||||
thread->name = name;
|
||||
thread->thread_id = thread_id;
|
||||
|
||||
QLIST_INSERT_HEAD(&migration_threads, thread, node);
|
||||
|
||||
return thread;
|
||||
}
|
||||
|
||||
void MigrationThreadDel(MigrationThread *thread)
|
||||
{
|
||||
if (thread) {
|
||||
QLIST_REMOVE(thread, node);
|
||||
g_free(thread);
|
||||
}
|
||||
}
|
||||
|
||||
MigrationThreadInfoList *qmp_query_migrationthreads(Error **errp)
|
||||
{
|
||||
MigrationThreadInfoList *head = NULL;
|
||||
MigrationThreadInfoList **tail = &head;
|
||||
MigrationThread *thread = NULL;
|
||||
|
||||
QLIST_FOREACH(thread, &migration_threads, node) {
|
||||
MigrationThreadInfo *info = g_new0(MigrationThreadInfo, 1);
|
||||
info->name = g_strdup(thread->name);
|
||||
info->thread_id = thread->thread_id;
|
||||
|
||||
QAPI_LIST_APPEND(tail, info);
|
||||
}
|
||||
|
||||
return head;
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Migration Threads info
|
||||
*
|
||||
* Copyright (c) 2022 HUAWEI TECHNOLOGIES CO., LTD.
|
||||
*
|
||||
* Authors:
|
||||
* Jiang Jiacheng <jiangjiacheng@huawei.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/queue.h"
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qapi-commands-migration.h"
|
||||
|
||||
typedef struct MigrationThread MigrationThread;
|
||||
|
||||
struct MigrationThread {
|
||||
const char *name; /* the name of migration thread */
|
||||
int thread_id; /* ID of the underlying host thread */
|
||||
QLIST_ENTRY(MigrationThread) node;
|
||||
};
|
||||
|
||||
MigrationThread *MigrationThreadAdd(const char *name, int thread_id);
|
||||
|
||||
void MigrationThreadDel(MigrationThread *info);
|
|
@ -150,7 +150,8 @@ migrate_fd_cleanup(void) ""
|
|||
migrate_fd_error(const char *error_desc) "error=%s"
|
||||
migrate_fd_cancel(void) ""
|
||||
migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx"
|
||||
migrate_pending(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
|
||||
migrate_pending_exact(uint64_t size, uint64_t pre, uint64_t compat, uint64_t post) "exact pending size %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
|
||||
migrate_pending_estimate(uint64_t size, uint64_t pre, uint64_t compat, uint64_t post) "estimate pending size %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
|
||||
migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
|
||||
migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi64
|
||||
migration_completion_file_err(void) ""
|
||||
|
@ -330,7 +331,7 @@ send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uin
|
|||
dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
|
||||
dirty_bitmap_save_complete_enter(void) ""
|
||||
dirty_bitmap_save_complete_finish(void) ""
|
||||
dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
|
||||
dirty_bitmap_state_pending(uint64_t pending) "pending %" PRIu64
|
||||
dirty_bitmap_load_complete(void) ""
|
||||
dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
|
||||
dirty_bitmap_load_bits_zeroes(void) ""
|
||||
|
@ -355,7 +356,7 @@ migration_block_save_device_dirty(int64_t sector) "Error reading sector %" PRId6
|
|||
migration_block_flush_blks(const char *action, int submitted, int read_done, int transferred) "%s submitted %d read_done %d transferred %d"
|
||||
migration_block_save(const char *mig_stage, int submitted, int transferred) "Enter save live %s submitted %d transferred %d"
|
||||
migration_block_save_complete(void) "Block migration completed"
|
||||
migration_block_save_pending(uint64_t pending) "Enter save live pending %" PRIu64
|
||||
migration_block_state_pending(uint64_t pending) "Enter save live pending %" PRIu64
|
||||
|
||||
# page_cache.c
|
||||
migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64
|
||||
|
|
|
@ -154,6 +154,7 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
|
|||
}
|
||||
field++;
|
||||
}
|
||||
assert(field->flags == VMS_END);
|
||||
ret = vmstate_subsection_load(f, vmsd, opaque);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
|
@ -408,6 +409,7 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
|
|||
}
|
||||
field++;
|
||||
}
|
||||
assert(field->flags == VMS_END);
|
||||
|
||||
if (vmdesc) {
|
||||
json_writer_end_array(vmdesc);
|
||||
|
|
|
@ -1958,6 +1958,35 @@
|
|||
{ 'command': 'query-vcpu-dirty-limit',
|
||||
'returns': [ 'DirtyLimitInfo' ] }
|
||||
|
||||
##
|
||||
# @MigrationThreadInfo:
|
||||
#
|
||||
# Information about migrationthreads
|
||||
#
|
||||
# @name: the name of migration thread
|
||||
#
|
||||
# @thread-id: ID of the underlying host thread
|
||||
#
|
||||
# Since: 7.2
|
||||
##
|
||||
{ 'struct': 'MigrationThreadInfo',
|
||||
'data': {'name': 'str',
|
||||
'thread-id': 'int'} }
|
||||
|
||||
##
|
||||
# @query-migrationthreads:
|
||||
#
|
||||
# Returns information of migration threads
|
||||
#
|
||||
# data: migration thread name
|
||||
#
|
||||
# returns: information about migration threads
|
||||
#
|
||||
# Since: 7.2
|
||||
##
|
||||
{ 'command': 'query-migrationthreads',
|
||||
'returns': ['MigrationThreadInfo'] }
|
||||
|
||||
##
|
||||
# @snapshot-save:
|
||||
#
|
||||
|
|
|
@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
|
|||
iov.iov_base = buf;
|
||||
iov.iov_len = sz;
|
||||
n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
|
||||
&fds, &nfds, errp);
|
||||
&fds, &nfds, 0, errp);
|
||||
|
||||
if (n_read == QIO_CHANNEL_ERR_BLOCK) {
|
||||
qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
|
||||
|
|
|
@ -61,14 +61,14 @@ static bool uffd_feature_thread_id;
|
|||
#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/userfaultfd.h>
|
||||
#include "qemu/userfaultfd.h"
|
||||
|
||||
static bool ufd_version_check(void)
|
||||
{
|
||||
struct uffdio_api api_struct;
|
||||
uint64_t ioctl_mask;
|
||||
|
||||
int ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
|
||||
int ufd = uffd_open(O_CLOEXEC);
|
||||
|
||||
if (ufd == -1) {
|
||||
g_test_message("Skipping test: userfaultfd not available");
|
||||
|
|
|
@ -115,7 +115,7 @@ void *tpm_emu_ctrl_thread(void *data)
|
|||
int *pfd = NULL;
|
||||
size_t nfd = 0;
|
||||
|
||||
qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
|
||||
qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
|
||||
cmd = be32_to_cpu(cmd);
|
||||
g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
|
||||
g_assert_cmpint(nfd, ==, 1);
|
||||
|
|
|
@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void)
|
|||
G_N_ELEMENTS(iorecv),
|
||||
&fdrecv,
|
||||
&nfdrecv,
|
||||
0,
|
||||
&error_abort);
|
||||
|
||||
g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
|
||||
|
|
|
@ -19,6 +19,15 @@
|
|||
#include <sys/syscall.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
int uffd_open(int flags)
|
||||
{
|
||||
#if defined(__NR_userfaultfd)
|
||||
return syscall(__NR_userfaultfd, flags);
|
||||
#else
|
||||
return -EINVAL;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* uffd_query_features: query UFFD features
|
||||
*
|
||||
|
@ -32,7 +41,7 @@ int uffd_query_features(uint64_t *features)
|
|||
struct uffdio_api api_struct = { 0 };
|
||||
int ret = -1;
|
||||
|
||||
uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC);
|
||||
uffd_fd = uffd_open(O_CLOEXEC);
|
||||
if (uffd_fd < 0) {
|
||||
trace_uffd_query_features_nosys(errno);
|
||||
return -1;
|
||||
|
@ -69,7 +78,7 @@ int uffd_create_fd(uint64_t features, bool non_blocking)
|
|||
uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
|
||||
|
||||
flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
|
||||
uffd_fd = syscall(__NR_userfaultfd, flags);
|
||||
uffd_fd = uffd_open(flags);
|
||||
if (uffd_fd < 0) {
|
||||
trace_uffd_create_fd_nosys(errno);
|
||||
return -1;
|
||||
|
|
|
@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
|
|||
* qio_channel_readv_full may have short reads, keeping calling it
|
||||
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
|
||||
*/
|
||||
rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
|
||||
rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
|
||||
if (rc < 0) {
|
||||
if (rc == QIO_CHANNEL_ERR_BLOCK) {
|
||||
assert(local_err == NULL);
|
||||
|
|
Loading…
Reference in New Issue