Migration pull request

- Avihai's fix to allow vmstate iterators to not starve for VFIO
 - Maksim's fix on additional check on precopy load error
 - Fabiano's fix on fdatasync() hang in mapped-ram
 - Jonathan's fix on vring cached access over MMIO regions
 - Cedric's cleanup patches 1-4 out of his error report series
 - Yu's fix for RDMA migration (which used to be broken even for 8.2)
 - Anthony's small cleanup/fix on err message
 - Steve's patches on privatize migration.h
 - Xiang's patchset to enable zero page detections in multifd threads
 -----BEGIN PGP SIGNATURE-----
 
 iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZe9+uBIccGV0ZXJ4QHJl
 ZGhhdC5jb20ACgkQO1/MzfOr1wamaQD/SvmpMEcuRndT9LPSxzXowAGDZTBpYUfv
 5XAbx80dS9IBAO8PJJgQJIBHBeacyLBjHP9CsdVtgw5/VW+wCsbfV4AB
 =xavb
 -----END PGP SIGNATURE-----

Merge tag 'migration-20240311-pull-request' of https://gitlab.com/peterx/qemu into staging

Migration pull request

- Avihai's fix to allow vmstate iterators to not starve for VFIO
- Maksim's fix on additional check on precopy load error
- Fabiano's fix on fdatasync() hang in mapped-ram
- Jonathan's fix on vring cached access over MMIO regions
- Cedric's cleanup patches 1-4 out of his error report series
- Yu's fix for RDMA migration (which used to be broken even for 8.2)
- Anthony's small cleanup/fix on err message
- Steve's patches on privatize migration.h
- Xiang's patchset to enable zero page detections in multifd threads

# -----BEGIN PGP SIGNATURE-----
#
# iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZe9+uBIccGV0ZXJ4QHJl
# ZGhhdC5jb20ACgkQO1/MzfOr1wamaQD/SvmpMEcuRndT9LPSxzXowAGDZTBpYUfv
# 5XAbx80dS9IBAO8PJJgQJIBHBeacyLBjHP9CsdVtgw5/VW+wCsbfV4AB
# =xavb
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 11 Mar 2024 21:59:20 GMT
# gpg:                using EDDSA key B9184DC20CC457DACF7DD1A93B5FCCCDF3ABD706
# gpg:                issuer "peterx@redhat.com"
# gpg: Good signature from "Peter Xu <xzpeter@gmail.com>" [marginal]
# gpg:                 aka "Peter Xu <peterx@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: B918 4DC2 0CC4 57DA CF7D  D1A9 3B5F CCCD F3AB D706

* tag 'migration-20240311-pull-request' of https://gitlab.com/peterx/qemu: (34 commits)
  migration/multifd: Add new migration test cases for legacy zero page checking.
  migration/multifd: Enable multifd zero page checking by default.
  migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page.
  migration/multifd: Implement zero page transmission on the multifd thread.
  migration/multifd: Add new migration option zero-page-detection.
  migration/multifd: Allow clearing of the file_bmap from multifd
  migration/multifd: Allow zero pages in file migration
  migration: purge MigrationState from public interface
  migration: delete unused accessors
  migration: privatize colo interfaces
  migration: migration_file_set_error
  migration: migration_is_device
  migration: migration_thread_is_self
  migration: export vcpu_dirty_limit_period
  migration: export migration_is_running
  migration: export migration_is_active
  migration: export migration_is_setup_or_active
  migration: remove migration.h references
  migration: export fewer options
  migration: Fix format in error message
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2024-03-12 11:35:41 +00:00
commit 8f3f329f5e
44 changed files with 979 additions and 295 deletions

View File

@ -44,7 +44,8 @@ over any transport.
- file migration: do the migration using a file that is passed to QEMU - file migration: do the migration using a file that is passed to QEMU
by path. A file offset option is supported to allow a management by path. A file offset option is supported to allow a management
application to add its own metadata to the start of the file without application to add its own metadata to the start of the file without
QEMU interference. QEMU interference. Note that QEMU does not flush cached file
data/metadata at the end of migration.
In addition, support is included for migration using RDMA, which In addition, support is included for migration using RDMA, which
transports the page data using ``RDMA``, where the hardware takes care of transports the page data using ``RDMA``, where the hardware takes care of

View File

@ -32,7 +32,9 @@
#include "hw/virtio/virtio-net.h" #include "hw/virtio/virtio-net.h"
#include "audio/audio.h" #include "audio/audio.h"
GlobalProperty hw_compat_8_2[] = {}; GlobalProperty hw_compat_8_2[] = {
{ "migration", "zero-page-detection", "legacy"},
};
const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2); const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
GlobalProperty hw_compat_8_1[] = { GlobalProperty hw_compat_8_1[] = {

View File

@ -693,6 +693,16 @@ const PropertyInfo qdev_prop_granule_mode = {
.set_default_value = qdev_propinfo_set_default_value_enum, .set_default_value = qdev_propinfo_set_default_value_enum,
}; };
const PropertyInfo qdev_prop_zero_page_detection = {
.name = "ZeroPageDetection",
.description = "zero_page_detection values, "
"none,legacy,multifd",
.enum_table = &ZeroPageDetection_lookup,
.get = qdev_propinfo_get_enum,
.set = qdev_propinfo_set_enum,
.set_default_value = qdev_propinfo_set_default_value_enum,
};
/* --- Reserved Region --- */ /* --- Reserved Region --- */
/* /*

View File

@ -39,7 +39,6 @@
#include "sysemu/runstate.h" #include "sysemu/runstate.h"
#include "trace.h" #include "trace.h"
#include "qapi/error.h" #include "qapi/error.h"
#include "migration/migration.h"
#include "migration/misc.h" #include "migration/misc.h"
#include "migration/blocker.h" #include "migration/blocker.h"
#include "migration/qemu-file.h" #include "migration/qemu-file.h"
@ -150,14 +149,8 @@ bool vfio_viommu_preset(VFIODevice *vbasedev)
static void vfio_set_migration_error(int err) static void vfio_set_migration_error(int err)
{ {
MigrationState *ms = migrate_get_current(); if (migration_is_setup_or_active()) {
migration_file_set_error(err);
if (migration_is_setup_or_active(ms->state)) {
WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
if (ms->to_dst_file) {
qemu_file_set_error(ms->to_dst_file, err);
}
}
} }
} }
@ -180,10 +173,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer)
{ {
VFIODevice *vbasedev; VFIODevice *vbasedev;
MigrationState *ms = migrate_get_current();
if (ms->state != MIGRATION_STATUS_ACTIVE && if (!migration_is_active() && !migration_is_device()) {
ms->state != MIGRATION_STATUS_DEVICE) {
return false; return false;
} }
@ -225,7 +216,7 @@ vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer)
{ {
VFIODevice *vbasedev; VFIODevice *vbasedev;
if (!migration_is_active(migrate_get_current())) { if (!migration_is_active()) {
return false; return false;
} }

View File

@ -32,7 +32,6 @@
#include "sysemu/reset.h" #include "sysemu/reset.h"
#include "trace.h" #include "trace.h"
#include "qapi/error.h" #include "qapi/error.h"
#include "migration/migration.h"
#include "pci.h" #include "pci.h"
VFIOGroupList vfio_group_list = VFIOGroupList vfio_group_list =

View File

@ -17,14 +17,12 @@
#include "sysemu/runstate.h" #include "sysemu/runstate.h"
#include "hw/vfio/vfio-common.h" #include "hw/vfio/vfio-common.h"
#include "migration/migration.h" #include "migration/misc.h"
#include "migration/options.h"
#include "migration/savevm.h" #include "migration/savevm.h"
#include "migration/vmstate.h" #include "migration/vmstate.h"
#include "migration/qemu-file.h" #include "migration/qemu-file.h"
#include "migration/register.h" #include "migration/register.h"
#include "migration/blocker.h" #include "migration/blocker.h"
#include "migration/misc.h"
#include "qapi/error.h" #include "qapi/error.h"
#include "exec/ramlist.h" #include "exec/ramlist.h"
#include "exec/ram_addr.h" #include "exec/ram_addr.h"
@ -505,6 +503,12 @@ static bool vfio_is_active_iterate(void *opaque)
return vfio_device_state_is_precopy(vbasedev); return vfio_device_state_is_precopy(vbasedev);
} }
/*
* Note about migration rate limiting: VFIO migration buffer size is currently
* limited to 1MB, so there is no need to check if migration rate exceeded (as
* in the worst case it will exceed by 1MB). However, if the buffer size is
* later changed to a bigger value, migration rate should be enforced here.
*/
static int vfio_save_iterate(QEMUFile *f, void *opaque) static int vfio_save_iterate(QEMUFile *f, void *opaque)
{ {
VFIODevice *vbasedev = opaque; VFIODevice *vbasedev = opaque;
@ -529,11 +533,7 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size,
migration->precopy_dirty_size); migration->precopy_dirty_size);
/* return !migration->precopy_init_size && !migration->precopy_dirty_size;
* A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero.
* Return 1 so following handlers will not be potentially blocked.
*/
return 1;
} }
static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
@ -713,9 +713,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running,
* Migration should be aborted in this case, but vm_state_notify() * Migration should be aborted in this case, but vm_state_notify()
* currently does not support reporting failures. * currently does not support reporting failures.
*/ */
if (migrate_get_current()->to_dst_file) { migration_file_set_error(ret);
qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
}
} }
trace_vfio_vmstate_change_prepare(vbasedev->name, running, trace_vfio_vmstate_change_prepare(vbasedev->name, running,
@ -745,9 +743,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
* Migration should be aborted in this case, but vm_state_notify() * Migration should be aborted in this case, but vm_state_notify()
* currently does not support reporting failures. * currently does not support reporting failures.
*/ */
if (migrate_get_current()->to_dst_file) { migration_file_set_error(ret);
qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
}
} }
trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),

View File

@ -26,7 +26,6 @@
#include "qemu/sockets.h" #include "qemu/sockets.h"
#include "sysemu/runstate.h" #include "sysemu/runstate.h"
#include "sysemu/cryptodev.h" #include "sysemu/cryptodev.h"
#include "migration/migration.h"
#include "migration/postcopy-ram.h" #include "migration/postcopy-ram.h"
#include "trace.h" #include "trace.h"
#include "exec/ramblock.h" #include "exec/ramblock.h"

View File

@ -31,8 +31,6 @@
#include "trace.h" #include "trace.h"
#include "qemu/error-report.h" #include "qemu/error-report.h"
#include "migration/misc.h" #include "migration/misc.h"
#include "migration/migration.h"
#include "migration/options.h"
#include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-access.h"

View File

@ -9,6 +9,7 @@ extern const PropertyInfo qdev_prop_reserved_region;
extern const PropertyInfo qdev_prop_multifd_compression; extern const PropertyInfo qdev_prop_multifd_compression;
extern const PropertyInfo qdev_prop_mig_mode; extern const PropertyInfo qdev_prop_mig_mode;
extern const PropertyInfo qdev_prop_granule_mode; extern const PropertyInfo qdev_prop_granule_mode;
extern const PropertyInfo qdev_prop_zero_page_detection;
extern const PropertyInfo qdev_prop_losttickpolicy; extern const PropertyInfo qdev_prop_losttickpolicy;
extern const PropertyInfo qdev_prop_blockdev_on_error; extern const PropertyInfo qdev_prop_blockdev_on_error;
extern const PropertyInfo qdev_prop_bios_chs_trans; extern const PropertyInfo qdev_prop_bios_chs_trans;
@ -50,6 +51,9 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list;
MigMode) MigMode)
#define DEFINE_PROP_GRANULE_MODE(_n, _s, _f, _d) \ #define DEFINE_PROP_GRANULE_MODE(_n, _s, _f, _d) \
DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_granule_mode, GranuleMode) DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_granule_mode, GranuleMode)
#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \
DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \
ZeroPageDetection)
#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
LostTickPolicy) LostTickPolicy)

View File

@ -0,0 +1,25 @@
/*
* QEMU public migration capabilities
*
* Copyright (c) 2012-2023 Red Hat Inc
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_MIGRATION_CLIENT_OPTIONS_H
#define QEMU_MIGRATION_CLIENT_OPTIONS_H
/* capabilities */
bool migrate_background_snapshot(void);
bool migrate_dirty_limit(void);
bool migrate_postcopy_ram(void);
bool migrate_switchover_ack(void);
/* parameters */
MigMode migrate_mode(void);
uint64_t migrate_vcpu_dirty_limit_period(void);
#endif

View File

@ -17,6 +17,7 @@
#include "qemu/notify.h" #include "qemu/notify.h"
#include "qapi/qapi-types-migration.h" #include "qapi/qapi-types-migration.h"
#include "qapi/qapi-types-net.h" #include "qapi/qapi-types-net.h"
#include "migration/client-options.h"
/* migration/ram.c */ /* migration/ram.c */
@ -59,8 +60,10 @@ void dump_vmstate_json_to_file(FILE *out_fp);
void migration_object_init(void); void migration_object_init(void);
void migration_shutdown(void); void migration_shutdown(void);
bool migration_is_idle(void); bool migration_is_idle(void);
bool migration_is_active(MigrationState *); bool migration_is_active(void);
bool migrate_mode_is_cpr(MigrationState *); bool migration_is_device(void);
bool migration_thread_is_self(void);
bool migration_is_setup_or_active(void);
typedef enum MigrationEventType { typedef enum MigrationEventType {
MIG_EVENT_PRECOPY_SETUP, MIG_EVENT_PRECOPY_SETUP,
@ -99,16 +102,15 @@ void migration_add_notifier_mode(NotifierWithReturn *notify,
MigrationNotifyFunc func, MigMode mode); MigrationNotifyFunc func, MigMode mode);
void migration_remove_notifier(NotifierWithReturn *notify); void migration_remove_notifier(NotifierWithReturn *notify);
int migration_call_notifiers(MigrationState *s, MigrationEventType type, bool migration_is_running(void);
Error **errp); void migration_file_set_error(int err);
bool migration_in_setup(MigrationState *);
bool migration_has_finished(MigrationState *);
bool migration_has_failed(MigrationState *);
/* ...and after the device transmission */
/* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */
bool migration_in_incoming_postcopy(void); bool migration_in_incoming_postcopy(void);
/* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */ /* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */
bool migration_incoming_postcopy_advised(void); bool migration_incoming_postcopy_advised(void);
/* True if background snapshot is active */ /* True if background snapshot is active */
bool migration_in_bg_snapshot(void); bool migration_in_bg_snapshot(void);

View File

@ -16,30 +16,130 @@
#include "hw/vmstate-if.h" #include "hw/vmstate-if.h"
/**
* struct SaveVMHandlers: handler structure to finely control
* migration of complex subsystems and devices, such as RAM, block and
* VFIO.
*/
typedef struct SaveVMHandlers { typedef struct SaveVMHandlers {
/* This runs inside the BQL. */
SaveStateHandler *save_state;
/* /* The following handlers run inside the BQL. */
* save_prepare is called early, even before migration starts, and can be
* used to perform early checks. /**
* @save_state
*
* Saves state section on the source using the latest state format
* version.
*
* Legacy method. Should be deprecated when all users are ported
* to VMStateDescription.
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*/
void (*save_state)(QEMUFile *f, void *opaque);
/**
* @save_prepare
*
* Called early, even before migration starts, and can be used to
* perform early checks.
*
* @opaque: data pointer passed to register_savevm_live()
* @errp: pointer to Error*, to store an error if it happens.
*
* Returns zero to indicate success and negative for error
*/ */
int (*save_prepare)(void *opaque, Error **errp); int (*save_prepare)(void *opaque, Error **errp);
/**
* @save_setup
*
* Initializes the data structures on the source and transmits
* first section containing information on the device
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*save_setup)(QEMUFile *f, void *opaque); int (*save_setup)(QEMUFile *f, void *opaque);
/**
* @save_cleanup
*
* Uninitializes the data structures on the source
*
* @opaque: data pointer passed to register_savevm_live()
*/
void (*save_cleanup)(void *opaque); void (*save_cleanup)(void *opaque);
/**
* @save_live_complete_postcopy
*
* Called at the end of postcopy for all postcopyable devices.
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
/**
* @save_live_complete_precopy
*
* Transmits the last section for the device containing any
* remaining data at the end of a precopy phase. When postcopy is
* enabled, devices that support postcopy will skip this step,
* where the final data will be flushed at the end of postcopy via
* @save_live_complete_postcopy instead.
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
/* This runs both outside and inside the BQL. */ /* This runs both outside and inside the BQL. */
/**
* @is_active
*
* Will skip a state section if not active
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns true if state section is active else false
*/
bool (*is_active)(void *opaque); bool (*is_active)(void *opaque);
/**
* @has_postcopy
*
* Checks if a device supports postcopy
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns true for postcopy support else false
*/
bool (*has_postcopy)(void *opaque); bool (*has_postcopy)(void *opaque);
/* is_active_iterate /**
* If it is not NULL then qemu_savevm_state_iterate will skip iteration if * @is_active_iterate
* it returns false. For example, it is needed for only-postcopy-states, *
* which needs to be handled by qemu_savevm_state_setup and * As #SaveVMHandlers.is_active(), will skip an inactive state
* qemu_savevm_state_pending, but do not need iterations until not in * section in qemu_savevm_state_iterate.
* postcopy stage. *
* For example, it is needed for only-postcopy-states, which needs
* to be handled by qemu_savevm_state_setup() and
* qemu_savevm_state_pending(), but do not need iterations until
* not in postcopy stage.
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns true if state section is active else false
*/ */
bool (*is_active_iterate)(void *opaque); bool (*is_active_iterate)(void *opaque);
@ -48,44 +148,155 @@ typedef struct SaveVMHandlers {
* use data that is local to the migration thread or protected * use data that is local to the migration thread or protected
* by other locks. * by other locks.
*/ */
/**
* @save_live_iterate
*
* Should send a chunk of data until the point that stream
* bandwidth limits tell it to stop. Each call generates one
* section.
*
* @f: QEMUFile where to send the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns 0 to indicate that there is still more data to send,
* 1 that there is no more data to send and
* negative to indicate an error.
*/
int (*save_live_iterate)(QEMUFile *f, void *opaque); int (*save_live_iterate)(QEMUFile *f, void *opaque);
/* This runs outside the BQL! */ /* This runs outside the BQL! */
/* Note for save_live_pending:
* must_precopy: /**
* - must be migrated in precopy or in stopped state * @state_pending_estimate
* - i.e. must be migrated before target start
* *
* can_postcopy: * This estimates the remaining data to transfer
* - can migrate in postcopy or in stopped state
* - i.e. can migrate after target start
* - some can also be migrated during precopy (RAM)
* - some must be migrated after source stops (block-dirty-bitmap)
* *
* Sum of can_postcopy and must_postcopy is the whole amount of * Sum of @can_postcopy and @must_postcopy is the whole amount of
* pending data. * pending data.
*
* @opaque: data pointer passed to register_savevm_live()
* @must_precopy: amount of data that must be migrated in precopy
* or in stopped state, i.e. that must be migrated
* before target start.
* @can_postcopy: amount of data that can be migrated in postcopy
* or in stopped state, i.e. after target start.
* Some can also be migrated during precopy (RAM).
* Some must be migrated after source stops
* (block-dirty-bitmap)
*/ */
/* This estimates the remaining data to transfer */
void (*state_pending_estimate)(void *opaque, uint64_t *must_precopy, void (*state_pending_estimate)(void *opaque, uint64_t *must_precopy,
uint64_t *can_postcopy); uint64_t *can_postcopy);
/* This calculate the exact remaining data to transfer */
/**
* @state_pending_exact
*
* This calculates the exact remaining data to transfer
*
* Sum of @can_postcopy and @must_postcopy is the whole amount of
* pending data.
*
* @opaque: data pointer passed to register_savevm_live()
* @must_precopy: amount of data that must be migrated in precopy
* or in stopped state, i.e. that must be migrated
* before target start.
* @can_postcopy: amount of data that can be migrated in postcopy
* or in stopped state, i.e. after target start.
* Some can also be migrated during precopy (RAM).
* Some must be migrated after source stops
* (block-dirty-bitmap)
*/
void (*state_pending_exact)(void *opaque, uint64_t *must_precopy, void (*state_pending_exact)(void *opaque, uint64_t *must_precopy,
uint64_t *can_postcopy); uint64_t *can_postcopy);
LoadStateHandler *load_state;
/**
* @load_state
*
* Load sections generated by any of the save functions that
* generate sections.
*
* Legacy method. Should be deprecated when all users are ported
* to VMStateDescription.
*
* @f: QEMUFile where to receive the data
* @opaque: data pointer passed to register_savevm_live()
* @version_id: the maximum version_id supported
*
* Returns zero to indicate success and negative for error
*/
int (*load_state)(QEMUFile *f, void *opaque, int version_id);
/**
* @load_setup
*
* Initializes the data structures on the destination.
*
* @f: QEMUFile where to receive the data
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*load_setup)(QEMUFile *f, void *opaque); int (*load_setup)(QEMUFile *f, void *opaque);
/**
* @load_cleanup
*
* Uninitializes the data structures on the destination.
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*load_cleanup)(void *opaque); int (*load_cleanup)(void *opaque);
/* Called when postcopy migration wants to resume from failure */
/**
* @resume_prepare
*
* Called when postcopy migration wants to resume from failure
*
* @s: Current migration state
* @opaque: data pointer passed to register_savevm_live()
*
* Returns zero to indicate success and negative for error
*/
int (*resume_prepare)(MigrationState *s, void *opaque); int (*resume_prepare)(MigrationState *s, void *opaque);
/* Checks if switchover ack should be used. Called only in dest */
/**
* @switchover_ack_needed
*
* Checks if switchover ack should be used. Called only on
* destination.
*
* @opaque: data pointer passed to register_savevm_live()
*
* Returns true if switchover ack should be used and false
* otherwise
*/
bool (*switchover_ack_needed)(void *opaque); bool (*switchover_ack_needed)(void *opaque);
} SaveVMHandlers; } SaveVMHandlers;
/**
* register_savevm_live: Register a set of custom migration handlers
*
* @idstr: state section identifier
* @instance_id: instance id
* @version_id: version id supported
* @ops: SaveVMHandlers structure
* @opaque: data pointer passed to SaveVMHandlers handlers
*/
int register_savevm_live(const char *idstr, int register_savevm_live(const char *idstr,
uint32_t instance_id, uint32_t instance_id,
int version_id, int version_id,
const SaveVMHandlers *ops, const SaveVMHandlers *ops,
void *opaque); void *opaque);
/**
* unregister_savevm: Unregister custom migration handlers
*
* @obj: object associated with state section
* @idstr: state section identifier
* @opaque: data pointer passed to register_savevm_live()
*/
void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque); void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque);
#endif #endif

View File

@ -151,8 +151,6 @@ typedef struct IRQState *qemu_irq;
/* /*
* Function types * Function types
*/ */
typedef void SaveStateHandler(QEMUFile *f, void *opaque);
typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
typedef void (*qemu_irq_handler)(void *opaque, int n, int level); typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
#endif /* QEMU_TYPEDEFS_H */ #endif /* QEMU_TYPEDEFS_H */

View File

@ -242,11 +242,6 @@ static int qio_channel_file_close(QIOChannel *ioc,
{ {
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
if (qemu_fdatasync(fioc->fd) < 0) {
error_setg_errno(errp, errno,
"Unable to synchronize file data with storage device");
return -1;
}
if (qemu_close(fioc->fd) < 0) { if (qemu_close(fioc->fd) < 0) {
error_setg_errno(errp, errno, error_setg_errno(errp, errno,
"Unable to close file"); "Unable to close file");

View File

@ -63,9 +63,9 @@ static bool colo_runstate_is_stopped(void)
return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
} }
static void colo_checkpoint_notify(void *opaque) static void colo_checkpoint_notify(void)
{ {
MigrationState *s = opaque; MigrationState *s = migrate_get_current();
int64_t next_notify_time; int64_t next_notify_time;
qemu_event_set(&s->colo_checkpoint_event); qemu_event_set(&s->colo_checkpoint_event);
@ -74,10 +74,15 @@ static void colo_checkpoint_notify(void *opaque)
timer_mod(s->colo_delay_timer, next_notify_time); timer_mod(s->colo_delay_timer, next_notify_time);
} }
static void colo_checkpoint_notify_timer(void *opaque)
{
colo_checkpoint_notify();
}
void colo_checkpoint_delay_set(void) void colo_checkpoint_delay_set(void)
{ {
if (migration_in_colo_state()) { if (migration_in_colo_state()) {
colo_checkpoint_notify(migrate_get_current()); colo_checkpoint_notify();
} }
} }
@ -162,7 +167,7 @@ static void primary_vm_do_failover(void)
* kick COLO thread which might wait at * kick COLO thread which might wait at
* qemu_sem_wait(&s->colo_checkpoint_sem). * qemu_sem_wait(&s->colo_checkpoint_sem).
*/ */
colo_checkpoint_notify(s); colo_checkpoint_notify();
/* /*
* Wake up COLO thread which may blocked in recv() or send(), * Wake up COLO thread which may blocked in recv() or send(),
@ -518,7 +523,7 @@ out:
static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
{ {
colo_checkpoint_notify(data); colo_checkpoint_notify();
} }
static void colo_process_checkpoint(MigrationState *s) static void colo_process_checkpoint(MigrationState *s)
@ -642,7 +647,7 @@ void migrate_start_colo_process(MigrationState *s)
bql_unlock(); bql_unlock();
qemu_event_init(&s->colo_checkpoint_event, false); qemu_event_init(&s->colo_checkpoint_event, false);
s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
colo_checkpoint_notify, s); colo_checkpoint_notify_timer, NULL);
qemu_sem_init(&s->colo_exit_sem, 0); qemu_sem_init(&s->colo_exit_sem, 0);
colo_process_checkpoint(s); colo_process_checkpoint(s);

View File

@ -159,7 +159,7 @@ void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp)
int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov, int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov,
int niov, RAMBlock *block, Error **errp) int niov, RAMBlock *block, Error **errp)
{ {
ssize_t ret = -1; ssize_t ret = 0;
int i, slice_idx, slice_num; int i, slice_idx, slice_num;
uintptr_t base, next, offset; uintptr_t base, next, offset;
size_t len; size_t len;
@ -191,7 +191,7 @@ int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov,
*/ */
offset = (uintptr_t) iov[slice_idx].iov_base - (uintptr_t) block->host; offset = (uintptr_t) iov[slice_idx].iov_base - (uintptr_t) block->host;
if (offset >= block->used_length) { if (offset >= block->used_length) {
error_setg(errp, "offset " RAM_ADDR_FMT error_setg(errp, "offset %" PRIxPTR
"outside of ramblock %s range", offset, block->idstr); "outside of ramblock %s range", offset, block->idstr);
ret = -1; ret = -1;
break; break;

View File

@ -22,6 +22,7 @@ system_ss.add(files(
'migration.c', 'migration.c',
'multifd.c', 'multifd.c',
'multifd-zlib.c', 'multifd-zlib.c',
'multifd-zero-page.c',
'ram-compress.c', 'ram-compress.c',
'options.c', 'options.c',
'postcopy-ram.c', 'postcopy-ram.c',

View File

@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "%s: %s\n", monitor_printf(mon, "%s: %s\n",
MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION), MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
MultiFDCompression_str(params->multifd_compression)); MultiFDCompression_str(params->multifd_compression));
assert(params->has_zero_page_detection);
monitor_printf(mon, "%s: %s\n",
MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION),
qapi_enum_lookup(&ZeroPageDetection_lookup,
params->zero_page_detection));
monitor_printf(mon, "%s: %" PRIu64 " bytes\n", monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE), MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
params->xbzrle_cache_size); params->xbzrle_cache_size);
@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_multifd_zstd_level = true; p->has_multifd_zstd_level = true;
visit_type_uint8(v, param, &p->multifd_zstd_level, &err); visit_type_uint8(v, param, &p->multifd_zstd_level, &err);
break; break;
case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION:
p->has_zero_page_detection = true;
visit_type_ZeroPageDetection(v, param, &p->zero_page_detection, &err);
break;
case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
p->has_xbzrle_cache_size = true; p->has_xbzrle_cache_size = true;
if (!visit_type_size(v, param, &cache_size, &err)) { if (!visit_type_size(v, param, &cache_size, &err)) {

View File

@ -1081,9 +1081,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
* Return true if we're already in the middle of a migration * Return true if we're already in the middle of a migration
* (i.e. any of the active or setup states) * (i.e. any of the active or setup states)
*/ */
bool migration_is_setup_or_active(int state) bool migration_is_setup_or_active(void)
{ {
switch (state) { MigrationState *s = current_migration;
switch (s->state) {
case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED: case MIGRATION_STATUS_POSTCOPY_PAUSED:
@ -1101,9 +1103,11 @@ bool migration_is_setup_or_active(int state)
} }
} }
bool migration_is_running(int state) bool migration_is_running(void)
{ {
switch (state) { MigrationState *s = current_migration;
switch (s->state) {
case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_PAUSED: case MIGRATION_STATUS_POSTCOPY_PAUSED:
@ -1404,7 +1408,7 @@ static void migrate_fd_cleanup(MigrationState *s)
qemu_fclose(tmp); qemu_fclose(tmp);
} }
assert(!migration_is_active(s)); assert(!migration_is_active());
if (s->state == MIGRATION_STATUS_CANCELLING) { if (s->state == MIGRATION_STATUS_CANCELLING) {
migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
@ -1475,7 +1479,7 @@ static void migrate_fd_cancel(MigrationState *s)
do { do {
old_state = s->state; old_state = s->state;
if (!migration_is_running(old_state)) { if (!migration_is_running()) {
break; break;
} }
/* If the migration is paused, kick it out of the pause */ /* If the migration is paused, kick it out of the pause */
@ -1544,16 +1548,6 @@ int migration_call_notifiers(MigrationState *s, MigrationEventType type,
return ret; return ret;
} }
bool migration_in_setup(MigrationState *s)
{
return s->state == MIGRATION_STATUS_SETUP;
}
bool migration_has_finished(MigrationState *s)
{
return s->state == MIGRATION_STATUS_COMPLETED;
}
bool migration_has_failed(MigrationState *s) bool migration_has_failed(MigrationState *s)
{ {
return (s->state == MIGRATION_STATUS_CANCELLED || return (s->state == MIGRATION_STATUS_CANCELLED ||
@ -1601,10 +1595,8 @@ bool migration_incoming_postcopy_advised(void)
bool migration_in_bg_snapshot(void) bool migration_in_bg_snapshot(void)
{ {
MigrationState *s = migrate_get_current();
return migrate_background_snapshot() && return migrate_background_snapshot() &&
migration_is_setup_or_active(s->state); migration_is_setup_or_active();
} }
bool migration_is_idle(void) bool migration_is_idle(void)
@ -1637,12 +1629,28 @@ bool migration_is_idle(void)
return false; return false;
} }
bool migration_is_active(MigrationState *s) bool migration_is_active(void)
{ {
MigrationState *s = current_migration;
return (s->state == MIGRATION_STATUS_ACTIVE || return (s->state == MIGRATION_STATUS_ACTIVE ||
s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
} }
bool migration_is_device(void)
{
MigrationState *s = current_migration;
return s->state == MIGRATION_STATUS_DEVICE;
}
bool migration_thread_is_self(void)
{
MigrationState *s = current_migration;
return qemu_thread_is_self(&s->thread);
}
bool migrate_mode_is_cpr(MigrationState *s) bool migrate_mode_is_cpr(MigrationState *s)
{ {
return s->parameters.mode == MIG_MODE_CPR_REBOOT; return s->parameters.mode == MIG_MODE_CPR_REBOOT;
@ -1960,7 +1968,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return true; return true;
} }
if (migration_is_running(s->state)) { if (migration_is_running()) {
error_setg(errp, QERR_MIGRATION_ACTIVE); error_setg(errp, QERR_MIGRATION_ACTIVE);
return false; return false;
} }
@ -2297,7 +2305,7 @@ static void *source_return_path_thread(void *opaque)
trace_source_return_path_thread_entry(); trace_source_return_path_thread_entry();
rcu_register_thread(); rcu_register_thread();
while (migration_is_setup_or_active(ms->state)) { while (migration_is_setup_or_active()) {
trace_source_return_path_thread_loop_top(); trace_source_return_path_thread_loop_top();
header_type = qemu_get_be16(rp); header_type = qemu_get_be16(rp);
@ -3020,6 +3028,17 @@ static MigThrError postcopy_pause(MigrationState *s)
} }
} }
void migration_file_set_error(int err)
{
MigrationState *s = current_migration;
WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
if (s->to_dst_file) {
qemu_file_set_error(s->to_dst_file, err);
}
}
}
static MigThrError migration_detect_error(MigrationState *s) static MigThrError migration_detect_error(MigrationState *s)
{ {
int ret; int ret;
@ -3461,7 +3480,7 @@ static void *migration_thread(void *opaque)
trace_migration_thread_setup_complete(); trace_migration_thread_setup_complete();
while (migration_is_active(s)) { while (migration_is_active()) {
if (urgent || !migration_rate_exceeded(s->to_dst_file)) { if (urgent || !migration_rate_exceeded(s->to_dst_file)) {
MigIterateState iter_state = migration_iteration_run(s); MigIterateState iter_state = migration_iteration_run(s);
if (iter_state == MIG_ITERATE_SKIP) { if (iter_state == MIG_ITERATE_SKIP) {
@ -3607,7 +3626,7 @@ static void *bg_migration_thread(void *opaque)
migration_bh_schedule(bg_migration_vm_start_bh, s); migration_bh_schedule(bg_migration_vm_start_bh, s);
bql_unlock(); bql_unlock();
while (migration_is_active(s)) { while (migration_is_active()) {
MigIterateState iter_state = bg_migration_iteration_run(s); MigIterateState iter_state = bg_migration_iteration_run(s);
if (iter_state == MIG_ITERATE_SKIP) { if (iter_state == MIG_ITERATE_SKIP) {
continue; continue;

View File

@ -26,6 +26,7 @@
#include "qom/object.h" #include "qom/object.h"
#include "postcopy-ram.h" #include "postcopy-ram.h"
#include "sysemu/runstate.h" #include "sysemu/runstate.h"
#include "migration/misc.h"
struct PostcopyBlocktimeContext; struct PostcopyBlocktimeContext;
@ -479,8 +480,8 @@ bool migrate_has_error(MigrationState *s);
void migrate_fd_connect(MigrationState *s, Error *error_in); void migrate_fd_connect(MigrationState *s, Error *error_in);
bool migration_is_setup_or_active(int state); int migration_call_notifiers(MigrationState *s, MigrationEventType type,
bool migration_is_running(int state); Error **errp);
int migrate_init(MigrationState *s, Error **errp); int migrate_init(MigrationState *s, Error **errp);
bool migration_is_blocked(Error **errp); bool migration_is_blocked(Error **errp);
@ -488,6 +489,8 @@ bool migration_is_blocked(Error **errp);
bool migration_in_postcopy(void); bool migration_in_postcopy(void);
bool migration_postcopy_is_alive(int state); bool migration_postcopy_is_alive(int state);
MigrationState *migrate_get_current(void); MigrationState *migrate_get_current(void);
bool migration_has_failed(MigrationState *);
bool migrate_mode_is_cpr(MigrationState *);
uint64_t ram_get_total_transferred_pages(void); uint64_t ram_get_total_transferred_pages(void);

View File

@ -0,0 +1,87 @@
/*
* Multifd zero page detection implementation.
*
* Copyright (c) 2024 Bytedance Inc
*
* Authors:
* Hao Xiang <hao.xiang@bytedance.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "exec/ramblock.h"
#include "migration.h"
#include "multifd.h"
#include "options.h"
#include "ram.h"
static bool multifd_zero_page_enabled(void)
{
return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD;
}
static void swap_page_offset(ram_addr_t *pages_offset, int a, int b)
{
ram_addr_t temp;
if (a == b) {
return;
}
temp = pages_offset[a];
pages_offset[a] = pages_offset[b];
pages_offset[b] = temp;
}
/**
* multifd_send_zero_page_detect: Perform zero page detection on all pages.
*
* Sorts normal pages before zero pages in p->pages->offset and updates
* p->pages->normal_num.
*
* @param p A pointer to the send params.
*/
void multifd_send_zero_page_detect(MultiFDSendParams *p)
{
MultiFDPages_t *pages = p->pages;
RAMBlock *rb = pages->block;
int i = 0;
int j = pages->num - 1;
if (!multifd_zero_page_enabled()) {
pages->normal_num = pages->num;
return;
}
/*
* Sort the page offset array by moving all normal pages to
* the left and all zero pages to the right of the array.
*/
while (i <= j) {
uint64_t offset = pages->offset[i];
if (!buffer_is_zero(rb->host + offset, p->page_size)) {
i++;
continue;
}
swap_page_offset(pages->offset, i, j);
ram_release_page(rb->idstr, offset);
j--;
}
pages->normal_num = i;
}
void multifd_recv_zero_page_process(MultiFDRecvParams *p)
{
for (int i = 0; i < p->zero_num; i++) {
void *page = p->host + p->zero[i];
if (!buffer_is_zero(page, p->page_size)) {
memset(page, 0, p->page_size);
}
}
}

View File

@ -123,13 +123,15 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
int ret; int ret;
uint32_t i; uint32_t i;
multifd_send_prepare_header(p); if (!multifd_send_prepare_common(p)) {
goto out;
}
for (i = 0; i < pages->num; i++) { for (i = 0; i < pages->normal_num; i++) {
uint32_t available = z->zbuff_len - out_size; uint32_t available = z->zbuff_len - out_size;
int flush = Z_NO_FLUSH; int flush = Z_NO_FLUSH;
if (i == pages->num - 1) { if (i == pages->normal_num - 1) {
flush = Z_SYNC_FLUSH; flush = Z_SYNC_FLUSH;
} }
@ -172,10 +174,10 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
p->iov[p->iovs_num].iov_len = out_size; p->iov[p->iovs_num].iov_len = out_size;
p->iovs_num++; p->iovs_num++;
p->next_packet_size = out_size; p->next_packet_size = out_size;
out:
p->flags |= MULTIFD_FLAG_ZLIB; p->flags |= MULTIFD_FLAG_ZLIB;
multifd_send_fill_packet(p); multifd_send_fill_packet(p);
return 0; return 0;
} }
@ -261,6 +263,14 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp)
p->id, flags, MULTIFD_FLAG_ZLIB); p->id, flags, MULTIFD_FLAG_ZLIB);
return -1; return -1;
} }
multifd_recv_zero_page_process(p);
if (!p->normal_num) {
assert(in_size == 0);
return 0;
}
ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp);
if (ret != 0) { if (ret != 0) {
@ -310,6 +320,7 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp)
p->id, out_size, expected_size); p->id, out_size, expected_size);
return -1; return -1;
} }
return 0; return 0;
} }

View File

@ -118,16 +118,18 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
int ret; int ret;
uint32_t i; uint32_t i;
multifd_send_prepare_header(p); if (!multifd_send_prepare_common(p)) {
goto out;
}
z->out.dst = z->zbuff; z->out.dst = z->zbuff;
z->out.size = z->zbuff_len; z->out.size = z->zbuff_len;
z->out.pos = 0; z->out.pos = 0;
for (i = 0; i < pages->num; i++) { for (i = 0; i < pages->normal_num; i++) {
ZSTD_EndDirective flush = ZSTD_e_continue; ZSTD_EndDirective flush = ZSTD_e_continue;
if (i == pages->num - 1) { if (i == pages->normal_num - 1) {
flush = ZSTD_e_flush; flush = ZSTD_e_flush;
} }
z->in.src = p->pages->block->host + pages->offset[i]; z->in.src = p->pages->block->host + pages->offset[i];
@ -161,10 +163,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
p->iov[p->iovs_num].iov_len = z->out.pos; p->iov[p->iovs_num].iov_len = z->out.pos;
p->iovs_num++; p->iovs_num++;
p->next_packet_size = z->out.pos; p->next_packet_size = z->out.pos;
out:
p->flags |= MULTIFD_FLAG_ZSTD; p->flags |= MULTIFD_FLAG_ZSTD;
multifd_send_fill_packet(p); multifd_send_fill_packet(p);
return 0; return 0;
} }
@ -257,6 +259,14 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp)
p->id, flags, MULTIFD_FLAG_ZSTD); p->id, flags, MULTIFD_FLAG_ZSTD);
return -1; return -1;
} }
multifd_recv_zero_page_process(p);
if (!p->normal_num) {
assert(in_size == 0);
return 0;
}
ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp);
if (ret != 0) { if (ret != 0) {

View File

@ -11,6 +11,7 @@
*/ */
#include "qemu/osdep.h" #include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "qemu/rcu.h" #include "qemu/rcu.h"
#include "exec/target_page.h" #include "exec/target_page.h"
#include "sysemu/sysemu.h" #include "sysemu/sysemu.h"
@ -111,11 +112,16 @@ void multifd_send_channel_created(void)
static void multifd_set_file_bitmap(MultiFDSendParams *p) static void multifd_set_file_bitmap(MultiFDSendParams *p)
{ {
MultiFDPages_t *pages = p->pages; MultiFDPages_t *pages = p->pages;
uint32_t zero_num = p->pages->num - p->pages->normal_num;
assert(pages->block); assert(pages->block);
for (int i = 0; i < p->pages->num; i++) { for (int i = 0; i < p->pages->normal_num; i++) {
ramblock_set_file_bmap_atomic(pages->block, pages->offset[i]); ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
}
for (int i = p->pages->num; i < zero_num; i++) {
ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
} }
} }
@ -153,13 +159,13 @@ static void multifd_send_prepare_iovs(MultiFDSendParams *p)
{ {
MultiFDPages_t *pages = p->pages; MultiFDPages_t *pages = p->pages;
for (int i = 0; i < pages->num; i++) { for (int i = 0; i < pages->normal_num; i++) {
p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
p->iov[p->iovs_num].iov_len = p->page_size; p->iov[p->iovs_num].iov_len = p->page_size;
p->iovs_num++; p->iovs_num++;
} }
p->next_packet_size = pages->num * p->page_size; p->next_packet_size = pages->normal_num * p->page_size;
} }
/** /**
@ -178,6 +184,8 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
bool use_zero_copy_send = migrate_zero_copy_send(); bool use_zero_copy_send = migrate_zero_copy_send();
int ret; int ret;
multifd_send_zero_page_detect(p);
if (!multifd_use_packets()) { if (!multifd_use_packets()) {
multifd_send_prepare_iovs(p); multifd_send_prepare_iovs(p);
multifd_set_file_bitmap(p); multifd_set_file_bitmap(p);
@ -261,6 +269,13 @@ static int nocomp_recv(MultiFDRecvParams *p, Error **errp)
p->id, flags, MULTIFD_FLAG_NOCOMP); p->id, flags, MULTIFD_FLAG_NOCOMP);
return -1; return -1;
} }
multifd_recv_zero_page_process(p);
if (!p->normal_num) {
return 0;
}
for (int i = 0; i < p->normal_num; i++) { for (int i = 0; i < p->normal_num; i++) {
p->iov[i].iov_base = p->host + p->normal[i]; p->iov[i].iov_base = p->host + p->normal[i];
p->iov[i].iov_len = p->page_size; p->iov[i].iov_len = p->page_size;
@ -295,6 +310,7 @@ static void multifd_pages_reset(MultiFDPages_t *pages)
* overwritten later when reused. * overwritten later when reused.
*/ */
pages->num = 0; pages->num = 0;
pages->normal_num = 0;
pages->block = NULL; pages->block = NULL;
} }
@ -386,11 +402,13 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
MultiFDPacket_t *packet = p->packet; MultiFDPacket_t *packet = p->packet;
MultiFDPages_t *pages = p->pages; MultiFDPages_t *pages = p->pages;
uint64_t packet_num; uint64_t packet_num;
uint32_t zero_num = pages->num - pages->normal_num;
int i; int i;
packet->flags = cpu_to_be32(p->flags); packet->flags = cpu_to_be32(p->flags);
packet->pages_alloc = cpu_to_be32(p->pages->allocated); packet->pages_alloc = cpu_to_be32(p->pages->allocated);
packet->normal_pages = cpu_to_be32(pages->num); packet->normal_pages = cpu_to_be32(pages->normal_num);
packet->zero_pages = cpu_to_be32(zero_num);
packet->next_packet_size = cpu_to_be32(p->next_packet_size); packet->next_packet_size = cpu_to_be32(p->next_packet_size);
packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num); packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num);
@ -408,10 +426,11 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
} }
p->packets_sent++; p->packets_sent++;
p->total_normal_pages += pages->num; p->total_normal_pages += pages->normal_num;
p->total_zero_pages += zero_num;
trace_multifd_send(p->id, packet_num, pages->num, p->flags, trace_multifd_send(p->id, packet_num, pages->normal_num, zero_num,
p->next_packet_size); p->flags, p->next_packet_size);
} }
static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
@ -452,20 +471,29 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
p->normal_num = be32_to_cpu(packet->normal_pages); p->normal_num = be32_to_cpu(packet->normal_pages);
if (p->normal_num > packet->pages_alloc) { if (p->normal_num > packet->pages_alloc) {
error_setg(errp, "multifd: received packet " error_setg(errp, "multifd: received packet "
"with %u pages and expected maximum pages are %u", "with %u normal pages and expected maximum pages are %u",
p->normal_num, packet->pages_alloc) ; p->normal_num, packet->pages_alloc) ;
return -1; return -1;
} }
p->zero_num = be32_to_cpu(packet->zero_pages);
if (p->zero_num > packet->pages_alloc - p->normal_num) {
error_setg(errp, "multifd: received packet "
"with %u zero pages and expected maximum zero pages are %u",
p->zero_num, packet->pages_alloc - p->normal_num) ;
return -1;
}
p->next_packet_size = be32_to_cpu(packet->next_packet_size); p->next_packet_size = be32_to_cpu(packet->next_packet_size);
p->packet_num = be64_to_cpu(packet->packet_num); p->packet_num = be64_to_cpu(packet->packet_num);
p->packets_recved++; p->packets_recved++;
p->total_normal_pages += p->normal_num; p->total_normal_pages += p->normal_num;
p->total_zero_pages += p->zero_num;
trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags, trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->zero_num,
p->next_packet_size); p->flags, p->next_packet_size);
if (p->normal_num == 0) { if (p->normal_num == 0 && p->zero_num == 0) {
return 0; return 0;
} }
@ -491,6 +519,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
p->normal[i] = offset; p->normal[i] = offset;
} }
for (i = 0; i < p->zero_num; i++) {
uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
if (offset > (p->block->used_length - p->page_size)) {
error_setg(errp, "multifd: offset too long %" PRIu64
" (max " RAM_ADDR_FMT ")",
offset, p->block->used_length);
return -1;
}
p->zero[i] = offset;
}
return 0; return 0;
} }
@ -710,16 +750,26 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
if (p->c) { if (p->c) {
migration_ioc_unregister_yank(p->c); migration_ioc_unregister_yank(p->c);
/* /*
* An explicit close() on the channel here is normally not * The object_unref() cannot guarantee the fd will always be
* required, but can be helpful for "file:" iochannels, where it * released because finalize() of the iochannel is only
* will include fdatasync() to make sure the data is flushed to the * triggered on the last reference and it's not guaranteed
* disk backend. * that we always hold the last refcount when reaching here.
* *
* The object_unref() cannot guarantee that because: (1) finalize() * Closing the fd explicitly has the benefit that if there is any
* of the iochannel is only triggered on the last reference, and * registered I/O handler callbacks on such fd, that will get a
* it's not guaranteed that we always hold the last refcount when * POLLNVAL event and will further trigger the cleanup to finally
* reaching here, and, (2) even if finalize() is invoked, it only * release the IOC.
* does a close(fd) without data flush. *
* FIXME: It should logically be guaranteed that all multifd
* channels have no I/O handler callback registered when reaching
* here, because migration thread will wait for all multifd channel
* establishments to complete during setup. Since
* migrate_fd_cleanup() will be scheduled in main thread too, all
* previous callbacks should guarantee to be completed when
* reaching here. See multifd_send_state.channels_created and its
* usage. In the future, we could replace this with an assert
* making sure we're the last reference, or simply drop it if above
* is more clear to be justified.
*/ */
qio_channel_close(p->c, &error_abort); qio_channel_close(p->c, &error_abort);
object_unref(OBJECT(p->c)); object_unref(OBJECT(p->c));
@ -908,6 +958,8 @@ static void *multifd_send_thread(void *opaque)
stat64_add(&mig_stats.multifd_bytes, stat64_add(&mig_stats.multifd_bytes,
p->next_packet_size + p->packet_len); p->next_packet_size + p->packet_len);
stat64_add(&mig_stats.normal_pages, pages->normal_num);
stat64_add(&mig_stats.zero_pages, pages->num - pages->normal_num);
multifd_pages_reset(p->pages); multifd_pages_reset(p->pages);
p->next_packet_size = 0; p->next_packet_size = 0;
@ -955,7 +1007,8 @@ out:
rcu_unregister_thread(); rcu_unregister_thread();
migration_threads_remove(thread); migration_threads_remove(thread);
trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages,
p->total_zero_pages);
return NULL; return NULL;
} }
@ -1306,6 +1359,8 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p)
p->iov = NULL; p->iov = NULL;
g_free(p->normal); g_free(p->normal);
p->normal = NULL; p->normal = NULL;
g_free(p->zero);
p->zero = NULL;
multifd_recv_state->ops->recv_cleanup(p); multifd_recv_state->ops->recv_cleanup(p);
} }
@ -1439,7 +1494,7 @@ static void *multifd_recv_thread(void *opaque)
flags = p->flags; flags = p->flags;
/* recv methods don't know how to handle the SYNC flag */ /* recv methods don't know how to handle the SYNC flag */
p->flags &= ~MULTIFD_FLAG_SYNC; p->flags &= ~MULTIFD_FLAG_SYNC;
has_data = !!p->normal_num; has_data = p->normal_num || p->zero_num;
qemu_mutex_unlock(&p->mutex); qemu_mutex_unlock(&p->mutex);
} else { } else {
/* /*
@ -1497,7 +1552,9 @@ static void *multifd_recv_thread(void *opaque)
} }
rcu_unregister_thread(); rcu_unregister_thread();
trace_multifd_recv_thread_end(p->id, p->packets_recved, p->total_normal_pages); trace_multifd_recv_thread_end(p->id, p->packets_recved,
p->total_normal_pages,
p->total_zero_pages);
return NULL; return NULL;
} }
@ -1549,6 +1606,7 @@ int multifd_recv_setup(Error **errp)
p->name = g_strdup_printf("multifdrecv_%d", i); p->name = g_strdup_printf("multifdrecv_%d", i);
p->iov = g_new0(struct iovec, page_count); p->iov = g_new0(struct iovec, page_count);
p->normal = g_new0(ram_addr_t, page_count); p->normal = g_new0(ram_addr_t, page_count);
p->zero = g_new0(ram_addr_t, page_count);
p->page_count = page_count; p->page_count = page_count;
p->page_size = qemu_target_page_size(); p->page_size = qemu_target_page_size();
} }
@ -1623,3 +1681,17 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
QEMU_THREAD_JOINABLE); QEMU_THREAD_JOINABLE);
qatomic_inc(&multifd_recv_state->count); qatomic_inc(&multifd_recv_state->count);
} }
bool multifd_send_prepare_common(MultiFDSendParams *p)
{
multifd_send_zero_page_detect(p);
if (!p->pages->normal_num) {
p->next_packet_size = 0;
return false;
}
multifd_send_prepare_header(p);
return true;
}

View File

@ -55,14 +55,24 @@ typedef struct {
/* size of the next packet that contains pages */ /* size of the next packet that contains pages */
uint32_t next_packet_size; uint32_t next_packet_size;
uint64_t packet_num; uint64_t packet_num;
uint64_t unused[4]; /* Reserved for future use */ /* zero pages */
uint32_t zero_pages;
uint32_t unused32[1]; /* Reserved for future use */
uint64_t unused64[3]; /* Reserved for future use */
char ramblock[256]; char ramblock[256];
/*
* This array contains the pointers to:
* - normal pages (initial normal_pages entries)
* - zero pages (following zero_pages entries)
*/
uint64_t offset[]; uint64_t offset[];
} __attribute__((packed)) MultiFDPacket_t; } __attribute__((packed)) MultiFDPacket_t;
typedef struct { typedef struct {
/* number of used pages */ /* number of used pages */
uint32_t num; uint32_t num;
/* number of normal pages */
uint32_t normal_num;
/* number of allocated pages */ /* number of allocated pages */
uint32_t allocated; uint32_t allocated;
/* offset of each page */ /* offset of each page */
@ -136,6 +146,8 @@ typedef struct {
uint64_t packets_sent; uint64_t packets_sent;
/* non zero pages sent through this channel */ /* non zero pages sent through this channel */
uint64_t total_normal_pages; uint64_t total_normal_pages;
/* zero pages sent through this channel */
uint64_t total_zero_pages;
/* buffers to send */ /* buffers to send */
struct iovec *iov; struct iovec *iov;
/* number of iovs used */ /* number of iovs used */
@ -194,12 +206,18 @@ typedef struct {
uint8_t *host; uint8_t *host;
/* non zero pages recv through this channel */ /* non zero pages recv through this channel */
uint64_t total_normal_pages; uint64_t total_normal_pages;
/* zero pages recv through this channel */
uint64_t total_zero_pages;
/* buffers to recv */ /* buffers to recv */
struct iovec *iov; struct iovec *iov;
/* Pages that are not zero */ /* Pages that are not zero */
ram_addr_t *normal; ram_addr_t *normal;
/* num of non zero pages */ /* num of non zero pages */
uint32_t normal_num; uint32_t normal_num;
/* Pages that are zero */
ram_addr_t *zero;
/* num of zero pages */
uint32_t zero_num;
/* used for de-compression methods */ /* used for de-compression methods */
void *compress_data; void *compress_data;
} MultiFDRecvParams; } MultiFDRecvParams;
@ -221,6 +239,9 @@ typedef struct {
void multifd_register_ops(int method, MultiFDMethods *ops); void multifd_register_ops(int method, MultiFDMethods *ops);
void multifd_send_fill_packet(MultiFDSendParams *p); void multifd_send_fill_packet(MultiFDSendParams *p);
bool multifd_send_prepare_common(MultiFDSendParams *p);
void multifd_send_zero_page_detect(MultiFDSendParams *p);
void multifd_recv_zero_page_process(MultiFDRecvParams *p);
static inline void multifd_send_prepare_header(MultiFDSendParams *p) static inline void multifd_send_prepare_header(MultiFDSendParams *p)
{ {

View File

@ -179,6 +179,9 @@ Property migration_properties[] = {
DEFINE_PROP_MIG_MODE("mode", MigrationState, DEFINE_PROP_MIG_MODE("mode", MigrationState,
parameters.mode, parameters.mode,
MIG_MODE_NORMAL), MIG_MODE_NORMAL),
DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
ZERO_PAGE_DETECTION_MULTIFD),
/* Migration capabilities */ /* Migration capabilities */
DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@ -681,7 +684,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp)
MigrationState *s = migrate_get_current(); MigrationState *s = migrate_get_current();
bool new_caps[MIGRATION_CAPABILITY__MAX]; bool new_caps[MIGRATION_CAPABILITY__MAX];
if (migration_is_running(s->state)) { if (migration_is_running()) {
error_setg(errp, QERR_MIGRATION_ACTIVE); error_setg(errp, QERR_MIGRATION_ACTIVE);
return false; return false;
} }
@ -725,7 +728,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
MigrationCapabilityStatusList *cap; MigrationCapabilityStatusList *cap;
bool new_caps[MIGRATION_CAPABILITY__MAX]; bool new_caps[MIGRATION_CAPABILITY__MAX];
if (migration_is_running(s->state) || migration_in_colo_state()) { if (migration_is_running() || migration_in_colo_state()) {
error_setg(errp, QERR_MIGRATION_ACTIVE); error_setg(errp, QERR_MIGRATION_ACTIVE);
return; return;
} }
@ -924,6 +927,13 @@ const char *migrate_tls_hostname(void)
return s->parameters.tls_hostname; return s->parameters.tls_hostname;
} }
uint64_t migrate_vcpu_dirty_limit_period(void)
{
MigrationState *s = migrate_get_current();
return s->parameters.x_vcpu_dirty_limit_period;
}
uint64_t migrate_xbzrle_cache_size(void) uint64_t migrate_xbzrle_cache_size(void)
{ {
MigrationState *s = migrate_get_current(); MigrationState *s = migrate_get_current();
@ -931,6 +941,13 @@ uint64_t migrate_xbzrle_cache_size(void)
return s->parameters.xbzrle_cache_size; return s->parameters.xbzrle_cache_size;
} }
ZeroPageDetection migrate_zero_page_detection(void)
{
MigrationState *s = migrate_get_current();
return s->parameters.zero_page_detection;
}
/* parameter setters */ /* parameter setters */
void migrate_set_block_incremental(bool value) void migrate_set_block_incremental(bool value)
@ -1041,6 +1058,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit; params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit;
params->has_mode = true; params->has_mode = true;
params->mode = s->parameters.mode; params->mode = s->parameters.mode;
params->has_zero_page_detection = true;
params->zero_page_detection = s->parameters.zero_page_detection;
return params; return params;
} }
@ -1077,6 +1096,7 @@ void migrate_params_init(MigrationParameters *params)
params->has_x_vcpu_dirty_limit_period = true; params->has_x_vcpu_dirty_limit_period = true;
params->has_vcpu_dirty_limit = true; params->has_vcpu_dirty_limit = true;
params->has_mode = true; params->has_mode = true;
params->has_zero_page_detection = true;
} }
/* /*
@ -1391,6 +1411,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
if (params->has_mode) { if (params->has_mode) {
dest->mode = params->mode; dest->mode = params->mode;
} }
if (params->has_zero_page_detection) {
dest->zero_page_detection = params->zero_page_detection;
}
} }
static void migrate_params_apply(MigrateSetParameters *params, Error **errp) static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@ -1541,6 +1565,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
if (params->has_mode) { if (params->has_mode) {
s->parameters.mode = params->mode; s->parameters.mode = params->mode;
} }
if (params->has_zero_page_detection) {
s->parameters.zero_page_detection = params->zero_page_detection;
}
} }
void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)

View File

@ -16,6 +16,7 @@
#include "hw/qdev-properties.h" #include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h" #include "hw/qdev-properties-system.h"
#include "migration/client-options.h"
/* migration properties */ /* migration properties */
@ -24,12 +25,10 @@ extern Property migration_properties[];
/* capabilities */ /* capabilities */
bool migrate_auto_converge(void); bool migrate_auto_converge(void);
bool migrate_background_snapshot(void);
bool migrate_block(void); bool migrate_block(void);
bool migrate_colo(void); bool migrate_colo(void);
bool migrate_compress(void); bool migrate_compress(void);
bool migrate_dirty_bitmaps(void); bool migrate_dirty_bitmaps(void);
bool migrate_dirty_limit(void);
bool migrate_events(void); bool migrate_events(void);
bool migrate_mapped_ram(void); bool migrate_mapped_ram(void);
bool migrate_ignore_shared(void); bool migrate_ignore_shared(void);
@ -38,11 +37,9 @@ bool migrate_multifd(void);
bool migrate_pause_before_switchover(void); bool migrate_pause_before_switchover(void);
bool migrate_postcopy_blocktime(void); bool migrate_postcopy_blocktime(void);
bool migrate_postcopy_preempt(void); bool migrate_postcopy_preempt(void);
bool migrate_postcopy_ram(void);
bool migrate_rdma_pin_all(void); bool migrate_rdma_pin_all(void);
bool migrate_release_ram(void); bool migrate_release_ram(void);
bool migrate_return_path(void); bool migrate_return_path(void);
bool migrate_switchover_ack(void);
bool migrate_validate_uuid(void); bool migrate_validate_uuid(void);
bool migrate_xbzrle(void); bool migrate_xbzrle(void);
bool migrate_zero_blocks(void); bool migrate_zero_blocks(void);
@ -84,7 +81,6 @@ uint8_t migrate_max_cpu_throttle(void);
uint64_t migrate_max_bandwidth(void); uint64_t migrate_max_bandwidth(void);
uint64_t migrate_avail_switchover_bandwidth(void); uint64_t migrate_avail_switchover_bandwidth(void);
uint64_t migrate_max_postcopy_bandwidth(void); uint64_t migrate_max_postcopy_bandwidth(void);
MigMode migrate_mode(void);
int migrate_multifd_channels(void); int migrate_multifd_channels(void);
MultiFDCompression migrate_multifd_compression(void); MultiFDCompression migrate_multifd_compression(void);
int migrate_multifd_zlib_level(void); int migrate_multifd_zlib_level(void);
@ -94,6 +90,7 @@ const char *migrate_tls_authz(void);
const char *migrate_tls_creds(void); const char *migrate_tls_creds(void);
const char *migrate_tls_hostname(void); const char *migrate_tls_hostname(void);
uint64_t migrate_xbzrle_cache_size(void); uint64_t migrate_xbzrle_cache_size(void);
ZeroPageDetection migrate_zero_page_detection(void);
/* parameters setters */ /* parameters setters */

View File

@ -63,6 +63,8 @@ struct QEMUFile {
*/ */
int qemu_file_shutdown(QEMUFile *f) int qemu_file_shutdown(QEMUFile *f)
{ {
Error *err = NULL;
/* /*
* We must set qemufile error before the real shutdown(), otherwise * We must set qemufile error before the real shutdown(), otherwise
* there can be a race window where we thought IO all went though * there can be a race window where we thought IO all went though
@ -91,7 +93,8 @@ int qemu_file_shutdown(QEMUFile *f)
return -ENOSYS; return -ENOSYS;
} }
if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL) < 0) { if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, &err) < 0) {
error_report_err(err);
return -EIO; return -EIO;
} }

View File

@ -1140,6 +1140,10 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
QEMUFile *file = pss->pss_channel; QEMUFile *file = pss->pss_channel;
int len = 0; int len = 0;
if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
return 0;
}
if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) { if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
return 0; return 0;
} }
@ -1284,7 +1288,6 @@ static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
if (!multifd_queue_page(block, offset)) { if (!multifd_queue_page(block, offset)) {
return -1; return -1;
} }
stat64_add(&mig_stats.normal_pages, 1);
return 1; return 1;
} }
@ -2076,7 +2079,6 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,
*/ */
static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
{ {
RAMBlock *block = pss->block;
ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
int res; int res;
@ -2092,17 +2094,33 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
return 1; return 1;
} }
return ram_save_page(rs, pss);
}
/**
* ram_save_target_page_multifd: send one target page to multifd workers
*
* Returns 1 if the page was queued, -1 otherwise.
*
* @rs: current RAM state
* @pss: data about the page we want to send
*/
static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
{
RAMBlock *block = pss->block;
ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
/* /*
* Do not use multifd in postcopy as one whole host page should be * While using multifd live migration, we still need to handle zero
* placed. Meanwhile postcopy requires atomic update of pages, so even * page checking on the migration main thread.
* if host page size == guest page size the dest guest during run may
* still see partially copied pages which is data corruption.
*/ */
if (migrate_multifd() && !migration_in_postcopy()) { if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
return ram_save_multifd_page(block, offset); if (save_zero_page(rs, pss, offset)) {
return 1;
}
} }
return ram_save_page(rs, pss); return ram_save_multifd_page(block, offset);
} }
/* Should be called before sending a host page */ /* Should be called before sending a host page */
@ -2909,10 +2927,9 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
RAMBlock *block; RAMBlock *block;
ram_addr_t offset; ram_addr_t offset;
size_t used_len, start, npages; size_t used_len, start, npages;
MigrationState *s = migrate_get_current();
/* This function is currently expected to be used during live migration */ /* This function is currently expected to be used during live migration */
if (!migration_is_setup_or_active(s->state)) { if (!migration_is_setup_or_active()) {
return; return;
} }
@ -3110,7 +3127,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
} }
migration_ops = g_malloc0(sizeof(MigrationOps)); migration_ops = g_malloc0(sizeof(MigrationOps));
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
if (migrate_multifd()) {
migration_ops->ram_save_target_page = ram_save_target_page_multifd;
} else {
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
}
bql_unlock(); bql_unlock();
ret = multifd_send_sync_main(); ret = multifd_send_sync_main();
@ -3150,9 +3172,13 @@ static void ram_save_file_bmap(QEMUFile *f)
} }
} }
void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset) void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset, bool set)
{ {
set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap); if (set) {
set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
} else {
clear_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
}
} }
/** /**
@ -3263,7 +3289,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
out: out:
if (ret >= 0 if (ret >= 0
&& migration_is_setup_or_active(migrate_get_current()->state)) { && migration_is_setup_or_active()) {
if (migrate_multifd() && migrate_multifd_flush_after_each_section() && if (migrate_multifd() && migrate_multifd_flush_after_each_section() &&
!migrate_mapped_ram()) { !migrate_mapped_ram()) {
ret = multifd_send_sync_main(); ret = multifd_send_sync_main();
@ -4214,6 +4240,12 @@ static int ram_load_precopy(QEMUFile *f)
i++; i++;
addr = qemu_get_be64(f); addr = qemu_get_be64(f);
ret = qemu_file_get_error(f);
if (ret) {
error_report("Getting RAM address failed");
break;
}
flags = addr & ~TARGET_PAGE_MASK; flags = addr & ~TARGET_PAGE_MASK;
addr &= TARGET_PAGE_MASK; addr &= TARGET_PAGE_MASK;

View File

@ -75,7 +75,8 @@ bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp);
bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start); bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start);
void postcopy_preempt_shutdown_file(MigrationState *s); void postcopy_preempt_shutdown_file(MigrationState *s);
void *postcopy_preempt_thread(void *opaque); void *postcopy_preempt_thread(void *opaque);
void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset); void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset,
bool set);
/* ram cache */ /* ram cache */
int colo_init_ram_cache(void); int colo_init_ram_cache(void);

View File

@ -3357,7 +3357,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
goto err_rdma_dest_wait; goto err_rdma_dest_wait;
} }
isock->host = rdma->host; isock->host = g_strdup(rdma->host);
isock->port = g_strdup_printf("%d", rdma->port); isock->port = g_strdup_printf("%d", rdma->port);
/* /*

View File

@ -1317,7 +1317,7 @@ void qemu_savevm_state_setup(QEMUFile *f)
MigrationState *ms = migrate_get_current(); MigrationState *ms = migrate_get_current();
SaveStateEntry *se; SaveStateEntry *se;
Error *local_err = NULL; Error *local_err = NULL;
int ret; int ret = 0;
json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size()); json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
json_writer_start_array(ms->vmdesc, "devices"); json_writer_start_array(ms->vmdesc, "devices");
@ -1351,6 +1351,10 @@ void qemu_savevm_state_setup(QEMUFile *f)
} }
} }
if (ret) {
return;
}
if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) { if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
error_report_err(local_err); error_report_err(local_err);
} }
@ -1390,7 +1394,8 @@ int qemu_savevm_state_resume_prepare(MigrationState *s)
int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
{ {
SaveStateEntry *se; SaveStateEntry *se;
int ret = 1; bool all_finished = true;
int ret;
trace_savevm_state_iterate(); trace_savevm_state_iterate();
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
@ -1431,16 +1436,12 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
"%d(%s): %d", "%d(%s): %d",
se->section_id, se->idstr, ret); se->section_id, se->idstr, ret);
qemu_file_set_error(f, ret); qemu_file_set_error(f, ret);
} return ret;
if (ret <= 0) { } else if (!ret) {
/* Do not proceed to the next vmstate before this one reported all_finished = false;
completion of the current stage. This serializes the migration
and reduces the probability that a faster changing state is
synchronized over and over again. */
break;
} }
} }
return ret; return all_finished;
} }
static bool should_send_vmdesc(void) static bool should_send_vmdesc(void)
@ -1705,7 +1706,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
MigrationState *ms = migrate_get_current(); MigrationState *ms = migrate_get_current();
MigrationStatus status; MigrationStatus status;
if (migration_is_running(ms->state)) { if (migration_is_running()) {
error_setg(errp, QERR_MIGRATION_ACTIVE); error_setg(errp, QERR_MIGRATION_ACTIVE);
return -EINVAL; return -EINVAL;
} }

View File

@ -128,21 +128,21 @@ postcopy_preempt_reset_channel(void) ""
# multifd.c # multifd.c
multifd_new_send_channel_async(uint8_t id) "channel %u" multifd_new_send_channel_async(uint8_t id) "channel %u"
multifd_new_send_channel_async_error(uint8_t id, void *err) "channel=%u err=%p" multifd_new_send_channel_async_error(uint8_t id, void *err) "channel=%u err=%p"
multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" multifd_recv(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t zero, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u"
multifd_recv_new_channel(uint8_t id) "channel %u" multifd_recv_new_channel(uint8_t id) "channel %u"
multifd_recv_sync_main(long packet_num) "packet num %ld" multifd_recv_sync_main(long packet_num) "packet num %ld"
multifd_recv_sync_main_signal(uint8_t id) "channel %u" multifd_recv_sync_main_signal(uint8_t id) "channel %u"
multifd_recv_sync_main_wait(uint8_t id) "iter %u" multifd_recv_sync_main_wait(uint8_t id) "iter %u"
multifd_recv_terminate_threads(bool error) "error %d" multifd_recv_terminate_threads(bool error) "error %d"
multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64
multifd_recv_thread_start(uint8_t id) "%u" multifd_recv_thread_start(uint8_t id) "%u"
multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal_pages, uint32_t zero_pages, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u"
multifd_send_error(uint8_t id) "channel %u" multifd_send_error(uint8_t id) "channel %u"
multifd_send_sync_main(long packet_num) "packet num %ld" multifd_send_sync_main(long packet_num) "packet num %ld"
multifd_send_sync_main_signal(uint8_t id) "channel %u" multifd_send_sync_main_signal(uint8_t id) "channel %u"
multifd_send_sync_main_wait(uint8_t id) "channel %u" multifd_send_sync_main_wait(uint8_t id) "channel %u"
multifd_send_terminate_threads(void) "" multifd_send_terminate_threads(void) ""
multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64
multifd_send_thread_start(uint8_t id) "%u" multifd_send_thread_start(uint8_t id) "%u"
multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s"
multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s"

View File

@ -28,7 +28,6 @@
#include "sysemu/iothread.h" #include "sysemu/iothread.h"
#include "net/colo-compare.h" #include "net/colo-compare.h"
#include "migration/colo.h" #include "migration/colo.h"
#include "migration/migration.h"
#include "util.h" #include "util.h"
#include "block/aio-wait.h" #include "block/aio-wait.h"
@ -189,7 +188,7 @@ static void colo_compare_inconsistency_notify(CompareState *s)
notify_remote_frame(s); notify_remote_frame(s);
} else { } else {
notifier_list_notify(&colo_compare_notifiers, notifier_list_notify(&colo_compare_notifiers,
migrate_get_current()); NULL);
} }
} }

View File

@ -26,7 +26,6 @@
#include <err.h> #include <err.h>
#include "standard-headers/linux/virtio_net.h" #include "standard-headers/linux/virtio_net.h"
#include "monitor/monitor.h" #include "monitor/monitor.h"
#include "migration/migration.h"
#include "migration/misc.h" #include "migration/misc.h"
#include "hw/virtio/vhost.h" #include "hw/virtio/vhost.h"
@ -355,7 +354,7 @@ static int vhost_vdpa_net_data_start(NetClientState *nc)
assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
if (s->always_svq || if (s->always_svq ||
migration_is_setup_or_active(migrate_get_current()->state)) { migration_is_setup_or_active()) {
v->shadow_vqs_enabled = true; v->shadow_vqs_enabled = true;
} else { } else {
v->shadow_vqs_enabled = false; v->shadow_vqs_enabled = false;

View File

@ -670,6 +670,23 @@
{ 'enum': 'MigMode', { 'enum': 'MigMode',
'data': [ 'normal', 'cpr-reboot' ] } 'data': [ 'normal', 'cpr-reboot' ] }
##
# @ZeroPageDetection:
#
# @none: Do not perform zero page checking.
#
# @legacy: Perform zero page checking in main migration thread.
#
# @multifd: Perform zero page checking in multifd sender thread if
# multifd migration is enabled, else in the main migration
# thread as for @legacy.
#
# Since: 9.0
#
##
{ 'enum': 'ZeroPageDetection',
'data': [ 'none', 'legacy', 'multifd' ] }
## ##
# @BitmapMigrationBitmapAliasTransform: # @BitmapMigrationBitmapAliasTransform:
# #
@ -891,6 +908,10 @@
# @mode: Migration mode. See description in @MigMode. Default is 'normal'. # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
# (Since 8.2) # (Since 8.2)
# #
# @zero-page-detection: Whether and how to detect zero pages.
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
# Features: # Features:
# #
# @deprecated: Member @block-incremental is deprecated. Use # @deprecated: Member @block-incremental is deprecated. Use
@ -924,7 +945,8 @@
'block-bitmap-mapping', 'block-bitmap-mapping',
{ 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
'vcpu-dirty-limit', 'vcpu-dirty-limit',
'mode'] } 'mode',
'zero-page-detection'] }
## ##
# @MigrateSetParameters: # @MigrateSetParameters:
@ -1083,6 +1105,10 @@
# @mode: Migration mode. See description in @MigMode. Default is 'normal'. # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
# (Since 8.2) # (Since 8.2)
# #
# @zero-page-detection: Whether and how to detect zero pages.
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
# Features: # Features:
# #
# @deprecated: Member @block-incremental is deprecated. Use # @deprecated: Member @block-incremental is deprecated. Use
@ -1136,7 +1162,8 @@
'*x-vcpu-dirty-limit-period': { 'type': 'uint64', '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
'features': [ 'unstable' ] }, 'features': [ 'unstable' ] },
'*vcpu-dirty-limit': 'uint64', '*vcpu-dirty-limit': 'uint64',
'*mode': 'MigMode'} } '*mode': 'MigMode',
'*zero-page-detection': 'ZeroPageDetection'} }
## ##
# @migrate-set-parameters: # @migrate-set-parameters:
@ -1311,6 +1338,10 @@
# @mode: Migration mode. See description in @MigMode. Default is 'normal'. # @mode: Migration mode. See description in @MigMode. Default is 'normal'.
# (Since 8.2) # (Since 8.2)
# #
# @zero-page-detection: Whether and how to detect zero pages.
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
# Features: # Features:
# #
# @deprecated: Member @block-incremental is deprecated. Use # @deprecated: Member @block-incremental is deprecated. Use
@ -1361,7 +1392,8 @@
'*x-vcpu-dirty-limit-period': { 'type': 'uint64', '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
'features': [ 'unstable' ] }, 'features': [ 'unstable' ] },
'*vcpu-dirty-limit': 'uint64', '*vcpu-dirty-limit': 'uint64',
'*mode': 'MigMode'} } '*mode': 'MigMode',
'*zero-page-detection': 'ZeroPageDetection'} }
## ##
# @query-migrate-parameters: # @query-migrate-parameters:

View File

@ -2,7 +2,6 @@
#include "qemu/notify.h" #include "qemu/notify.h"
#include "net/colo-compare.h" #include "net/colo-compare.h"
#include "migration/colo.h" #include "migration/colo.h"
#include "migration/migration.h"
#include "qemu/error-report.h" #include "qemu/error-report.h"
#include "qapi/qapi-commands-migration.h" #include "qapi/qapi-commands-migration.h"

View File

@ -25,8 +25,6 @@
#include "sysemu/kvm.h" #include "sysemu/kvm.h"
#include "trace.h" #include "trace.h"
#include "migration/misc.h" #include "migration/misc.h"
#include "migration/migration.h"
#include "migration/options.h"
/* /*
* Dirtylimit stop working if dirty page rate error * Dirtylimit stop working if dirty page rate error
@ -78,14 +76,13 @@ static bool dirtylimit_quit;
static void vcpu_dirty_rate_stat_collect(void) static void vcpu_dirty_rate_stat_collect(void)
{ {
MigrationState *s = migrate_get_current();
VcpuStat stat; VcpuStat stat;
int i = 0; int i = 0;
int64_t period = DIRTYLIMIT_CALC_TIME_MS; int64_t period = DIRTYLIMIT_CALC_TIME_MS;
if (migrate_dirty_limit() && if (migrate_dirty_limit() &&
migration_is_active(s)) { migration_is_active()) {
period = s->parameters.x_vcpu_dirty_limit_period; period = migrate_vcpu_dirty_limit_period();
} }
/* calculate vcpu dirtyrate */ /* calculate vcpu dirtyrate */
@ -450,10 +447,8 @@ static void dirtylimit_cleanup(void)
*/ */
static bool dirtylimit_is_allowed(void) static bool dirtylimit_is_allowed(void)
{ {
MigrationState *ms = migrate_get_current(); if (migration_is_running() &&
!migration_thread_is_self() &&
if (migration_is_running(ms->state) &&
(!qemu_thread_is_self(&ms->thread)) &&
migrate_dirty_limit() && migrate_dirty_limit() &&
dirtylimit_in_service()) { dirtylimit_in_service()) {
return false; return false;

View File

@ -2681,53 +2681,69 @@ static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs,
return false; return false;
} }
static MemTxResult flatview_write_continue_step(MemTxAttrs attrs,
const uint8_t *buf,
hwaddr len, hwaddr mr_addr,
hwaddr *l, MemoryRegion *mr)
{
if (!flatview_access_allowed(mr, attrs, mr_addr, *l)) {
return MEMTX_ACCESS_ERROR;
}
if (!memory_access_is_direct(mr, true)) {
uint64_t val;
MemTxResult result;
bool release_lock = prepare_mmio_access(mr);
*l = memory_access_size(mr, *l, mr_addr);
/*
* XXX: could force current_cpu to NULL to avoid
* potential bugs
*/
/*
* Assure Coverity (and ourselves) that we are not going to OVERRUN
* the buffer by following ldn_he_p().
*/
#ifdef QEMU_STATIC_ANALYSIS
assert((*l == 1 && len >= 1) ||
(*l == 2 && len >= 2) ||
(*l == 4 && len >= 4) ||
(*l == 8 && len >= 8));
#endif
val = ldn_he_p(buf, *l);
result = memory_region_dispatch_write(mr, mr_addr, val,
size_memop(*l), attrs);
if (release_lock) {
bql_unlock();
}
return result;
} else {
/* RAM case */
uint8_t *ram_ptr = qemu_ram_ptr_length(mr->ram_block, mr_addr, l,
false);
memmove(ram_ptr, buf, *l);
invalidate_and_set_dirty(mr, mr_addr, *l);
return MEMTX_OK;
}
}
/* Called within RCU critical section. */ /* Called within RCU critical section. */
static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
MemTxAttrs attrs, MemTxAttrs attrs,
const void *ptr, const void *ptr,
hwaddr len, hwaddr addr1, hwaddr len, hwaddr mr_addr,
hwaddr l, MemoryRegion *mr) hwaddr l, MemoryRegion *mr)
{ {
uint8_t *ram_ptr;
uint64_t val;
MemTxResult result = MEMTX_OK; MemTxResult result = MEMTX_OK;
bool release_lock = false;
const uint8_t *buf = ptr; const uint8_t *buf = ptr;
for (;;) { for (;;) {
if (!flatview_access_allowed(mr, attrs, addr1, l)) { result |= flatview_write_continue_step(attrs, buf, len, mr_addr, &l,
result |= MEMTX_ACCESS_ERROR; mr);
/* Keep going. */
} else if (!memory_access_is_direct(mr, true)) {
release_lock |= prepare_mmio_access(mr);
l = memory_access_size(mr, l, addr1);
/* XXX: could force current_cpu to NULL to avoid
potential bugs */
/*
* Assure Coverity (and ourselves) that we are not going to OVERRUN
* the buffer by following ldn_he_p().
*/
#ifdef QEMU_STATIC_ANALYSIS
assert((l == 1 && len >= 1) ||
(l == 2 && len >= 2) ||
(l == 4 && len >= 4) ||
(l == 8 && len >= 8));
#endif
val = ldn_he_p(buf, l);
result |= memory_region_dispatch_write(mr, addr1, val,
size_memop(l), attrs);
} else {
/* RAM case */
ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
memmove(ram_ptr, buf, l);
invalidate_and_set_dirty(mr, addr1, l);
}
if (release_lock) {
bql_unlock();
release_lock = false;
}
len -= l; len -= l;
buf += l; buf += l;
@ -2738,7 +2754,7 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
} }
l = len; l = len;
mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs);
} }
return result; return result;
@ -2749,63 +2765,76 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
const void *buf, hwaddr len) const void *buf, hwaddr len)
{ {
hwaddr l; hwaddr l;
hwaddr addr1; hwaddr mr_addr;
MemoryRegion *mr; MemoryRegion *mr;
l = len; l = len;
mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs);
if (!flatview_access_allowed(mr, attrs, addr, len)) { if (!flatview_access_allowed(mr, attrs, addr, len)) {
return MEMTX_ACCESS_ERROR; return MEMTX_ACCESS_ERROR;
} }
return flatview_write_continue(fv, addr, attrs, buf, len, return flatview_write_continue(fv, addr, attrs, buf, len,
addr1, l, mr); mr_addr, l, mr);
}
static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf,
hwaddr len, hwaddr mr_addr,
hwaddr *l,
MemoryRegion *mr)
{
if (!flatview_access_allowed(mr, attrs, mr_addr, *l)) {
return MEMTX_ACCESS_ERROR;
}
if (!memory_access_is_direct(mr, false)) {
/* I/O case */
uint64_t val;
MemTxResult result;
bool release_lock = prepare_mmio_access(mr);
*l = memory_access_size(mr, *l, mr_addr);
result = memory_region_dispatch_read(mr, mr_addr, &val, size_memop(*l),
attrs);
/*
* Assure Coverity (and ourselves) that we are not going to OVERRUN
* the buffer by following stn_he_p().
*/
#ifdef QEMU_STATIC_ANALYSIS
assert((*l == 1 && len >= 1) ||
(*l == 2 && len >= 2) ||
(*l == 4 && len >= 4) ||
(*l == 8 && len >= 8));
#endif
stn_he_p(buf, *l, val);
if (release_lock) {
bql_unlock();
}
return result;
} else {
/* RAM case */
uint8_t *ram_ptr = qemu_ram_ptr_length(mr->ram_block, mr_addr, l,
false);
memcpy(buf, ram_ptr, *l);
return MEMTX_OK;
}
} }
/* Called within RCU critical section. */ /* Called within RCU critical section. */
MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
MemTxAttrs attrs, void *ptr, MemTxAttrs attrs, void *ptr,
hwaddr len, hwaddr addr1, hwaddr l, hwaddr len, hwaddr mr_addr, hwaddr l,
MemoryRegion *mr) MemoryRegion *mr)
{ {
uint8_t *ram_ptr;
uint64_t val;
MemTxResult result = MEMTX_OK; MemTxResult result = MEMTX_OK;
bool release_lock = false;
uint8_t *buf = ptr; uint8_t *buf = ptr;
fuzz_dma_read_cb(addr, len, mr); fuzz_dma_read_cb(addr, len, mr);
for (;;) { for (;;) {
if (!flatview_access_allowed(mr, attrs, addr1, l)) { result |= flatview_read_continue_step(attrs, buf, len, mr_addr, &l, mr);
result |= MEMTX_ACCESS_ERROR;
/* Keep going. */
} else if (!memory_access_is_direct(mr, false)) {
/* I/O case */
release_lock |= prepare_mmio_access(mr);
l = memory_access_size(mr, l, addr1);
result |= memory_region_dispatch_read(mr, addr1, &val,
size_memop(l), attrs);
/*
* Assure Coverity (and ourselves) that we are not going to OVERRUN
* the buffer by following stn_he_p().
*/
#ifdef QEMU_STATIC_ANALYSIS
assert((l == 1 && len >= 1) ||
(l == 2 && len >= 2) ||
(l == 4 && len >= 4) ||
(l == 8 && len >= 8));
#endif
stn_he_p(buf, l, val);
} else {
/* RAM case */
ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
memcpy(buf, ram_ptr, l);
}
if (release_lock) {
bql_unlock();
release_lock = false;
}
len -= l; len -= l;
buf += l; buf += l;
@ -2816,7 +2845,7 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
} }
l = len; l = len;
mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs);
} }
return result; return result;
@ -2827,16 +2856,16 @@ static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
MemTxAttrs attrs, void *buf, hwaddr len) MemTxAttrs attrs, void *buf, hwaddr len)
{ {
hwaddr l; hwaddr l;
hwaddr addr1; hwaddr mr_addr;
MemoryRegion *mr; MemoryRegion *mr;
l = len; l = len;
mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs);
if (!flatview_access_allowed(mr, attrs, addr, len)) { if (!flatview_access_allowed(mr, attrs, addr, len)) {
return MEMTX_ACCESS_ERROR; return MEMTX_ACCESS_ERROR;
} }
return flatview_read_continue(fv, addr, attrs, buf, len, return flatview_read_continue(fv, addr, attrs, buf, len,
addr1, l, mr); mr_addr, l, mr);
} }
MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
@ -3341,6 +3370,59 @@ static inline MemoryRegion *address_space_translate_cached(
return section.mr; return section.mr;
} }
/* Called within RCU critical section. */
static MemTxResult address_space_write_continue_cached(MemTxAttrs attrs,
const void *ptr,
hwaddr len,
hwaddr mr_addr,
hwaddr l,
MemoryRegion *mr)
{
MemTxResult result = MEMTX_OK;
const uint8_t *buf = ptr;
for (;;) {
result |= flatview_write_continue_step(attrs, buf, len, mr_addr, &l,
mr);
len -= l;
buf += l;
mr_addr += l;
if (!len) {
break;
}
l = len;
}
return result;
}
/* Called within RCU critical section. */
static MemTxResult address_space_read_continue_cached(MemTxAttrs attrs,
void *ptr, hwaddr len,
hwaddr mr_addr, hwaddr l,
MemoryRegion *mr)
{
MemTxResult result = MEMTX_OK;
uint8_t *buf = ptr;
for (;;) {
result |= flatview_read_continue_step(attrs, buf, len, mr_addr, &l, mr);
len -= l;
buf += l;
mr_addr += l;
if (!len) {
break;
}
l = len;
}
return result;
}
/* Called from RCU critical section. address_space_read_cached uses this /* Called from RCU critical section. address_space_read_cached uses this
* out of line function when the target is an MMIO or IOMMU region. * out of line function when the target is an MMIO or IOMMU region.
*/ */
@ -3348,15 +3430,14 @@ MemTxResult
address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr, address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr,
void *buf, hwaddr len) void *buf, hwaddr len)
{ {
hwaddr addr1, l; hwaddr mr_addr, l;
MemoryRegion *mr; MemoryRegion *mr;
l = len; l = len;
mr = address_space_translate_cached(cache, addr, &addr1, &l, false, mr = address_space_translate_cached(cache, addr, &mr_addr, &l, false,
MEMTXATTRS_UNSPECIFIED); MEMTXATTRS_UNSPECIFIED);
return flatview_read_continue(cache->fv, return address_space_read_continue_cached(MEMTXATTRS_UNSPECIFIED,
addr, MEMTXATTRS_UNSPECIFIED, buf, len, buf, len, mr_addr, l, mr);
addr1, l, mr);
} }
/* Called from RCU critical section. address_space_write_cached uses this /* Called from RCU critical section. address_space_write_cached uses this
@ -3366,15 +3447,14 @@ MemTxResult
address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr,
const void *buf, hwaddr len) const void *buf, hwaddr len)
{ {
hwaddr addr1, l; hwaddr mr_addr, l;
MemoryRegion *mr; MemoryRegion *mr;
l = len; l = len;
mr = address_space_translate_cached(cache, addr, &addr1, &l, true, mr = address_space_translate_cached(cache, addr, &mr_addr, &l, true,
MEMTXATTRS_UNSPECIFIED); MEMTXATTRS_UNSPECIFIED);
return flatview_write_continue(cache->fv, return address_space_write_continue_cached(MEMTXATTRS_UNSPECIFIED,
addr, MEMTXATTRS_UNSPECIFIED, buf, len, buf, len, mr_addr, l, mr);
addr1, l, mr);
} }
#define ARG1_DECL MemoryRegionCache *cache #define ARG1_DECL MemoryRegionCache *cache

View File

@ -38,7 +38,6 @@
#include "qemu/option_int.h" #include "qemu/option_int.h"
#include "sysemu/block-backend.h" #include "sysemu/block-backend.h"
#include "migration/misc.h" #include "migration/misc.h"
#include "migration/migration.h"
#include "qemu/cutils.h" #include "qemu/cutils.h"
#include "hw/qdev-properties.h" #include "hw/qdev-properties.h"
#include "hw/clock.h" #include "hw/clock.h"

View File

@ -22,7 +22,6 @@
#include "hw/irq.h" #include "hw/irq.h"
#include "qemu/log.h" #include "qemu/log.h"
#include "hw/loader.h" #include "hw/loader.h"
#include "migration/migration.h"
#include "sysemu/runstate.h" #include "sysemu/runstate.h"
#include "cpu-csr.h" #include "cpu-csr.h"
#include "kvm_loongarch.h" #include "kvm_loongarch.h"

View File

@ -44,7 +44,7 @@
#include "kvm_riscv.h" #include "kvm_riscv.h"
#include "sbi_ecall_interface.h" #include "sbi_ecall_interface.h"
#include "chardev/char-fe.h" #include "chardev/char-fe.h"
#include "migration/migration.h" #include "migration/misc.h"
#include "sysemu/runstate.h" #include "sysemu/runstate.h"
#include "hw/riscv/numa.h" #include "hw/riscv/numa.h"
@ -729,7 +729,7 @@ static void kvm_riscv_put_regs_timer(CPUState *cs)
* frequency. Therefore, we should check whether they are the same here * frequency. Therefore, we should check whether they are the same here
* during the migration. * during the migration.
*/ */
if (migration_is_running(migrate_get_current()->state)) { if (migration_is_running()) {
KVM_RISCV_GET_TIMER(cs, frequency, reg); KVM_RISCV_GET_TIMER(cs, frequency, reg);
if (reg != env->kvm_timer_frequency) { if (reg != env->kvm_timer_frequency) {
error_report("Dst Hosts timer frequency != Src Hosts"); error_report("Dst Hosts timer frequency != Src Hosts");

View File

@ -2771,6 +2771,24 @@ test_migrate_precopy_tcp_multifd_start(QTestState *from,
return test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); return test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
} }
static void *
test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from,
QTestState *to)
{
test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
migrate_set_parameter_str(from, "zero-page-detection", "legacy");
return NULL;
}
static void *
test_migration_precopy_tcp_multifd_start_no_zero_page(QTestState *from,
QTestState *to)
{
test_migrate_precopy_tcp_multifd_start_common(from, to, "none");
migrate_set_parameter_str(from, "zero-page-detection", "none");
return NULL;
}
static void * static void *
test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from, test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from,
QTestState *to) QTestState *to)
@ -2812,6 +2830,36 @@ static void test_multifd_tcp_none(void)
test_precopy_common(&args); test_precopy_common(&args);
} }
static void test_multifd_tcp_zero_page_legacy(void)
{
MigrateCommon args = {
.listen_uri = "defer",
.start_hook = test_migrate_precopy_tcp_multifd_start_zero_page_legacy,
/*
* Multifd is more complicated than most of the features, it
* directly takes guest page buffers when sending, make sure
* everything will work alright even if guest page is changing.
*/
.live = true,
};
test_precopy_common(&args);
}
static void test_multifd_tcp_no_zero_page(void)
{
MigrateCommon args = {
.listen_uri = "defer",
.start_hook = test_migration_precopy_tcp_multifd_start_no_zero_page,
/*
* Multifd is more complicated than most of the features, it
* directly takes guest page buffers when sending, make sure
* everything will work alright even if guest page is changing.
*/
.live = true,
};
test_precopy_common(&args);
}
static void test_multifd_tcp_zlib(void) static void test_multifd_tcp_zlib(void)
{ {
MigrateCommon args = { MigrateCommon args = {
@ -3729,6 +3777,10 @@ int main(int argc, char **argv)
} }
migration_test_add("/migration/multifd/tcp/plain/none", migration_test_add("/migration/multifd/tcp/plain/none",
test_multifd_tcp_none); test_multifd_tcp_none);
migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy",
test_multifd_tcp_zero_page_legacy);
migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
test_multifd_tcp_no_zero_page);
migration_test_add("/migration/multifd/tcp/plain/cancel", migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel); test_multifd_tcp_cancel);
migration_test_add("/migration/multifd/tcp/plain/zlib", migration_test_add("/migration/multifd/tcp/plain/zlib",

View File

@ -24,7 +24,6 @@
#include "qemu/osdep.h" #include "qemu/osdep.h"
#include "../migration/migration.h"
#include "migration/vmstate.h" #include "migration/vmstate.h"
#include "migration/qemu-file-types.h" #include "migration/qemu-file-types.h"
#include "../migration/qemu-file.h" #include "../migration/qemu-file.h"