From 73b4987858cd2e0f5bd9ec626f2f95cce6cf84a1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Jan 2024 17:03:13 +0100 Subject: [PATCH 01/14] userfaultfd: use 1ULL to build ioctl masks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to use the Linux-internal __u64 type, 1ULL is guaranteed to be wide enough. Signed-off-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20240117160313.175609-1-pbonzini@redhat.com Signed-off-by: Peter Xu --- migration/postcopy-ram.c | 16 +++++++--------- subprojects/libvhost-user/libvhost-user.c | 2 +- tests/qtest/migration-test.c | 4 ++-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 5408e028c6..893ec8fa89 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -102,11 +102,9 @@ void postcopy_thread_create(MigrationIncomingState *mis, * are target OS specific. */ #if defined(__linux__) - #include #include #include -#include /* for __u64 */ #endif #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) @@ -272,8 +270,8 @@ static bool request_ufd_features(int ufd, uint64_t features) return false; } - ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | - (__u64)1 << _UFFDIO_UNREGISTER; + ioctl_mask = 1ULL << _UFFDIO_REGISTER | + 1ULL << _UFFDIO_UNREGISTER; if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { error_report("Missing userfault features: %" PRIx64, (uint64_t)(~api_struct.ioctls & ioctl_mask)); @@ -462,9 +460,9 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) goto out; } - feature_mask = (__u64)1 << _UFFDIO_WAKE | - (__u64)1 << _UFFDIO_COPY | - (__u64)1 << _UFFDIO_ZEROPAGE; + feature_mask = 1ULL << _UFFDIO_WAKE | + 1ULL << _UFFDIO_COPY | + 1ULL << _UFFDIO_ZEROPAGE; if ((reg_struct.ioctls & feature_mask) != feature_mask) { error_setg(errp, "Missing userfault map features: %" PRIx64, (uint64_t)(~reg_struct.ioctls & feature_mask)); @@ -733,11 +731,11 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque) error_report("%s userfault register: %s", __func__, strerror(errno)); return -1; } - if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { + if (!(reg_struct.ioctls & (1ULL << _UFFDIO_COPY))) { error_report("%s userfault: Region doesn't support COPY", __func__); return -1; } - if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) { + if (reg_struct.ioctls & (1ULL << _UFFDIO_ZEROPAGE)) { qemu_ram_set_uf_zeroable(rb); } diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 6684057370..a3b158c671 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -684,7 +684,7 @@ generate_faults(VuDev *dev) { dev->postcopy_ufd, strerror(errno)); return false; } - if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { + if (!(reg_struct.ioctls & (1ULL << _UFFDIO_COPY))) { vu_panic(dev, "%s Region (%d) doesn't support COPY", __func__, i); return false; diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index d3066e119f..7675519cfa 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -104,8 +104,8 @@ static bool ufd_version_check(void) } uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID; - ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | - (__u64)1 << _UFFDIO_UNREGISTER; + ioctl_mask = 1ULL << _UFFDIO_REGISTER | + 1ULL << _UFFDIO_UNREGISTER; if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { g_test_message("Skipping test: Missing userfault feature"); return false; From 918f620d30a9b0095b7824b8d77a2d6059a439d9 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Wed, 17 Jan 2024 15:07:22 +0100 Subject: [PATCH 02/14] migration: Plug memory leak on HMP migrate error path hmp_migrate() leaks @caps when qmp_migrate() fails. Plug the leak with g_autoptr(). Fixes: 967f2de5c9ec (migration: Implement MigrateChannelList to hmp migration flow.) v8.2.0-rc0 Fixes: CID 1533125 Signed-off-by: Markus Armbruster Link: https://lore.kernel.org/r/20240117140722.3979657-1-armbru@redhat.com [peterx: fix CID number as reported by Peter Maydell] Signed-off-by: Peter Xu --- migration/migration-hmp-cmds.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index 740a219aa4..99b49df5dd 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -764,7 +764,7 @@ void hmp_migrate(Monitor *mon, const QDict *qdict) bool resume = qdict_get_try_bool(qdict, "resume", false); const char *uri = qdict_get_str(qdict, "uri"); Error *err = NULL; - MigrationChannelList *caps = NULL; + g_autoptr(MigrationChannelList) caps = NULL; g_autoptr(MigrationChannel) channel = NULL; if (inc) { @@ -789,8 +789,6 @@ void hmp_migrate(Monitor *mon, const QDict *qdict) return; } - qapi_free_MigrationChannelList(caps); - if (!detach) { HMPMigrationStatus *status; From a8629e0c2f6778a96f161f15215060cfab5ef2a4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 17 Jan 2024 15:58:46 +0800 Subject: [PATCH 03/14] migration: Make threshold_size an uint64_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's always used to compare against another uint64_t. Make it always clear that it's never a negative. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240117075848.139045-2-peterx@redhat.com Signed-off-by: Peter Xu --- migration/migration.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/migration.h b/migration/migration.h index 17972dac34..a589ae8650 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -296,7 +296,7 @@ struct MigrationState { * this threshold; it's calculated from the requested downtime and * measured bandwidth, or avail-switchover-bandwidth if specified. */ - int64_t threshold_size; + uint64_t threshold_size; /* params from 'migrate-set-parameters' */ MigrationParameters parameters; From b0504edd401dc77ddf09070bb0b22ad7573b4c77 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 17 Jan 2024 15:58:47 +0800 Subject: [PATCH 04/14] migration: Drop unnecessary check in ram's pending_exact() When the migration frameworks fetches the exact pending sizes, it means this check: remaining_size < s->threshold_size Must have been done already, actually at migration_iteration_run(): if (must_precopy <= s->threshold_size) { qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy); That should be after one round of ram_state_pending_estimate(). It makes the 2nd check meaningless and can be dropped. To say it in another way, when reaching ->state_pending_exact(), we unconditionally sync dirty bits for precopy. Then we can drop migrate_get_current() there too. Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240117075848.139045-3-peterx@redhat.com Signed-off-by: Peter Xu --- migration/ram.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index c0cdcccb75..d5b7cd5ac2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3213,21 +3213,20 @@ static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy, static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy, uint64_t *can_postcopy) { - MigrationState *s = migrate_get_current(); RAMState **temp = opaque; RAMState *rs = *temp; + uint64_t remaining_size; - uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; - - if (!migration_in_postcopy() && remaining_size < s->threshold_size) { + if (!migration_in_postcopy()) { bql_lock(); WITH_RCU_READ_LOCK_GUARD() { migration_bitmap_sync_precopy(rs, false); } bql_unlock(); - remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } + remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; + if (migrate_postcopy_ram()) { /* We can do postcopy, and all the data is postcopiable */ *can_postcopy += remaining_size; From 434b8adcf34dd43f19a4ec851eab33c8722d4877 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 17 Jan 2024 15:58:48 +0800 Subject: [PATCH 05/14] analyze-migration.py: Remove trick on parsing ramblocks RAM_SAVE_FLAG_MEM_SIZE contains the total length of ramblock idstr to know whether scanning of ramblocks is complete. Drop the trick. Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240117075848.139045-4-peterx@redhat.com Signed-off-by: Peter Xu --- scripts/analyze-migration.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py index a39dfb8766..8a254a5b6a 100755 --- a/scripts/analyze-migration.py +++ b/scripts/analyze-migration.py @@ -151,17 +151,12 @@ class RamSection(object): addr &= ~(self.TARGET_PAGE_SIZE - 1) if flags & self.RAM_SAVE_FLAG_MEM_SIZE: - while True: + total_length = addr + while total_length > 0: namelen = self.file.read8() - # We assume that no RAM chunk is big enough to ever - # hit the first byte of the address, so when we see - # a zero here we know it has to be an address, not the - # length of the next block. - if namelen == 0: - self.file.file.seek(-1, 1) - break self.name = self.file.readstr(len = namelen) len = self.file.read64() + total_length -= len self.sizeinfo[self.name] = '0x%016x' % len if self.write_memory: print(self.name) From 94766edb35946e36cd6c9a070ae013cc09e411f0 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 18 Jan 2024 13:49:50 -0300 Subject: [PATCH 06/14] ci: Add a migration compatibility test job The migration tests have support for being passed two QEMU binaries to test migration compatibility. Add a CI job that builds the lastest release of QEMU and another job that uses that version plus an already present build of the current version and run the migration tests with the two, both as source and destination. I.e.: old QEMU (n-1) -> current QEMU (development tree) current QEMU (development tree) -> old QEMU (n-1) The purpose of this CI job is to ensure the code we're about to merge will not cause a migration compatibility problem when migrating the next release (which will contain that code) to/from the previous release. The version of migration-test used will be the one matching the older QEMU. That way we can avoid special-casing new tests that wouldn't be compatible with the older QEMU. Note: for user forks, the version tags need to be pushed to gitlab otherwise it won't be able to checkout a different version. Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240118164951.30350-3-farosas@suse.de Signed-off-by: Peter Xu --- .gitlab-ci.d/buildtest.yml | 60 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index e1c7801598..f0b0edc634 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -167,6 +167,66 @@ build-system-centos: x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu MAKE_CHECK_ARGS: check-build +# Previous QEMU release. Used for cross-version migration tests. +build-previous-qemu: + extends: .native_build_job_template + artifacts: + when: on_success + expire_in: 2 days + paths: + - build-previous + exclude: + - build-previous/**/*.p + - build-previous/**/*.a.p + - build-previous/**/*.fa.p + - build-previous/**/*.c.o + - build-previous/**/*.c.o.d + - build-previous/**/*.fa + needs: + job: amd64-opensuse-leap-container + variables: + IMAGE: opensuse-leap + TARGETS: x86_64-softmmu aarch64-softmmu + before_script: + - export QEMU_PREV_VERSION="$(sed 's/\([0-9.]*\)\.[0-9]*/v\1.0/' VERSION)" + - git checkout $QEMU_PREV_VERSION + after_script: + - mv build build-previous + +.migration-compat-common: + extends: .common_test_job_template + needs: + - job: build-previous-qemu + - job: build-system-opensuse + # The old QEMU could have bugs unrelated to migration that are + # already fixed in the current development branch, so this test + # might fail. + allow_failure: true + variables: + IMAGE: opensuse-leap + MAKE_CHECK_ARGS: check-build + script: + # Use the migration-tests from the older QEMU tree. This avoids + # testing an old QEMU against new features/tests that it is not + # compatible with. + - cd build-previous + # old to new + - QTEST_QEMU_BINARY_SRC=./qemu-system-${TARGET} + QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test + # new to old + - QTEST_QEMU_BINARY_DST=./qemu-system-${TARGET} + QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test + +migration-compat-aarch64: + extends: .migration-compat-common + variables: + TARGET: aarch64 + +migration-compat-x86_64: + extends: .migration-compat-common + variables: + TARGET: x86_64 + check-system-centos: extends: .native_test_job_template needs: From 079687e4182242c65055c5ac438ce4f61c20553e Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 18 Jan 2024 13:49:51 -0300 Subject: [PATCH 07/14] ci: Disable migration compatibility tests for aarch64 Until 9.0 is out, we need to keep the aarch64 job disabled because the tests always use the n-1 version of migration-test. That happens to be broken for aarch64 in 8.2. Once 9.0 is out, it will become the n-1 version and it will bring the fixed tests. We can revert this patch when 9.0 releases. Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240118164951.30350-4-farosas@suse.de [peterx: use _SKIPPED rather than _OPTIONAL] Signed-off-by: Peter Xu --- .gitlab-ci.d/buildtest.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index f0b0edc634..79bbc8585b 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -217,10 +217,14 @@ build-previous-qemu: - QTEST_QEMU_BINARY_DST=./qemu-system-${TARGET} QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test +# This job is disabled until we release 9.0. The existing +# migration-test in 8.2 is broken on aarch64. The fix was already +# commited, but it will only take effect once 9.0 is out. migration-compat-aarch64: extends: .migration-compat-common variables: TARGET: aarch64 + QEMU_JOB_SKIPPED: 1 migration-compat-x86_64: extends: .migration-compat-common From 0a5d1108aba6308752a82201a441f957e5937211 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Mon, 11 Sep 2023 14:13:18 -0300 Subject: [PATCH 08/14] migration/yank: Use channel features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop using outside knowledge about the io channels when registering yank functions. Query for features instead. The yank method for all channels used with migration code currently is to call the qio_channel_shutdown() function, so query for QIO_CHANNEL_FEATURE_SHUTDOWN. We could add a separate feature in the future for indicating whether a channel supports yanking, but that seems overkill at the moment. Signed-off-by: Fabiano Rosas Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/20230911171320.24372-9-farosas@suse.de Signed-off-by: Peter Xu --- migration/yank_functions.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/migration/yank_functions.c b/migration/yank_functions.c index d5a710a3f2..979e60c762 100644 --- a/migration/yank_functions.c +++ b/migration/yank_functions.c @@ -8,12 +8,9 @@ */ #include "qemu/osdep.h" -#include "qapi/error.h" #include "io/channel.h" #include "yank_functions.h" #include "qemu/yank.h" -#include "io/channel-socket.h" -#include "io/channel-tls.h" #include "qemu-file.h" void migration_yank_iochannel(void *opaque) @@ -26,8 +23,7 @@ void migration_yank_iochannel(void *opaque) /* Return whether yank is supported on this ioc */ static bool migration_ioc_yank_supported(QIOChannel *ioc) { - return object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS); + return qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); } void migration_ioc_register_yank(QIOChannel *ioc) From 27eb8499edb2bc952c29ddae0bdac9fc959bf7b1 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 19 Jan 2024 20:39:18 -0300 Subject: [PATCH 09/14] migration: Fix use-after-free of migration state object We're currently allowing the process_incoming_migration_bh bottom-half to run without holding a reference to the 'current_migration' object, which leads to a segmentation fault if the BH is still live after migration_shutdown() has dropped the last reference to current_migration. In my system the bug manifests as migrate_multifd() returning true when it shouldn't and multifd_load_shutdown() calling multifd_recv_terminate_threads() which crashes due to an uninitialized multifd_recv_state. Fix the issue by holding a reference to the object when scheduling the BH and dropping it before returning from the BH. The same is already done for the cleanup_bh at migrate_fd_cleanup_schedule(). Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1969 Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240119233922.32588-2-farosas@suse.de Signed-off-by: Peter Xu --- migration/migration.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 219447dea1..cf17b68e57 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -648,6 +648,7 @@ static void process_incoming_migration_bh(void *opaque) MIGRATION_STATUS_COMPLETED); qemu_bh_delete(mis->bh); migration_incoming_state_destroy(); + object_unref(OBJECT(migrate_get_current())); } static void coroutine_fn @@ -713,6 +714,7 @@ process_incoming_migration_co(void *opaque) } mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); + object_ref(OBJECT(migrate_get_current())); qemu_bh_schedule(mis->bh); return; fail: From 59094cfa7aa3bcca85cf5ab2110431124da24d84 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 19 Jan 2024 20:39:19 -0300 Subject: [PATCH 10/14] migration: Take reference to migration state around bg_migration_vm_start_bh We need to hold a reference to the current_migration object around async calls to avoid it been freed while still in use. Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240119233922.32588-3-farosas@suse.de Signed-off-by: Peter Xu --- migration/migration.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index cf17b68e57..b1213b59ce 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3382,6 +3382,7 @@ static void bg_migration_vm_start_bh(void *opaque) vm_resume(s->vm_old_state); migration_downtime_end(s); + object_unref(OBJECT(s)); } /** @@ -3486,6 +3487,7 @@ static void *bg_migration_thread(void *opaque) * writes to virtio VQs memory which is in write-protected region. */ s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); + object_ref(OBJECT(s)); qemu_bh_schedule(s->vm_start_bh); bql_unlock(); From 9cf268965dba03b718c514adc06c2a5d444778e1 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 19 Jan 2024 20:39:20 -0300 Subject: [PATCH 11/14] migration: Reference migration state around loadvm_postcopy_handle_run_bh We need to hold a reference to the current_migration object around async calls to avoid it been freed while still in use. Even on this load-side function, we might still use the MigrationState, e.g to check for capabilities. Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240119233922.32588-4-farosas@suse.de Signed-off-by: Peter Xu --- migration/savevm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/savevm.c b/migration/savevm.c index 6410705ebe..93387350c7 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2174,6 +2174,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) qemu_bh_delete(mis->bh); trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started"); + object_unref(OBJECT(migration_get_current())); } /* After all discards we can start running and asking for pages */ @@ -2189,6 +2190,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) postcopy_state_set(POSTCOPY_INCOMING_RUNNING); mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis); + object_ref(OBJECT(migration_get_current())); qemu_bh_schedule(mis->bh); /* We need to finish reading the stream from the package From 699d9476a065881117004def5c052fbed21f862b Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 19 Jan 2024 20:39:21 -0300 Subject: [PATCH 12/14] migration: Add a wrapper to qemu_bh_schedule Wrap qemu_bh_schedule() to ensure we always hold a reference to the current_migration object. Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240119233922.32588-5-farosas@suse.de Signed-off-by: Peter Xu --- migration/migration.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index b1213b59ce..0e7f101d64 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -199,6 +199,16 @@ void migration_object_init(void) dirty_bitmap_mig_init(); } +static void migration_bh_schedule(MigrationState *s, QEMUBH *bh) +{ + /* + * Ref the state for bh, because it may be called when + * there're already no other refs + */ + object_ref(OBJECT(s)); + qemu_bh_schedule(bh); +} + void migration_cancel(const Error *error) { if (error) { @@ -714,8 +724,7 @@ process_incoming_migration_co(void *opaque) } mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); - object_ref(OBJECT(migrate_get_current())); - qemu_bh_schedule(mis->bh); + migration_bh_schedule(migrate_get_current(), mis->bh); return; fail: migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, @@ -1332,16 +1341,6 @@ static void migrate_fd_cleanup(MigrationState *s) yank_unregister_instance(MIGRATION_YANK_INSTANCE); } -static void migrate_fd_cleanup_schedule(MigrationState *s) -{ - /* - * Ref the state for bh, because it may be called when - * there're already no other refs - */ - object_ref(OBJECT(s)); - qemu_bh_schedule(s->cleanup_bh); -} - static void migrate_fd_cleanup_bh(void *opaque) { MigrationState *s = opaque; @@ -3140,7 +3139,7 @@ static void migration_iteration_finish(MigrationState *s) error_report("%s: Unknown ending state %d", __func__, s->state); break; } - migrate_fd_cleanup_schedule(s); + migration_bh_schedule(s, s->cleanup_bh); bql_unlock(); } @@ -3171,7 +3170,7 @@ static void bg_migration_iteration_finish(MigrationState *s) break; } - migrate_fd_cleanup_schedule(s); + migration_bh_schedule(s, s->cleanup_bh); bql_unlock(); } @@ -3487,9 +3486,7 @@ static void *bg_migration_thread(void *opaque) * writes to virtio VQs memory which is in write-protected region. */ s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); - object_ref(OBJECT(s)); - qemu_bh_schedule(s->vm_start_bh); - + migration_bh_schedule(s, s->vm_start_bh); bql_unlock(); while (migration_is_active(s)) { From 44d0d456d7c40c2f8900b958c01c1a2ef6acf302 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 19 Jan 2024 20:39:22 -0300 Subject: [PATCH 13/14] migration: Centralize BH creation and dispatch Now that the migration state reference counting is correct, further wrap the bottom half dispatch process to avoid future issues. Move BH creation and scheduling together and wrap the dispatch with an intermediary function that will ensure we always keep the ref/unref balanced. Also move the responsibility of deleting the BH into the wrapper and remove the now unnecessary pointers. Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240119233922.32588-6-farosas@suse.de Signed-off-by: Peter Xu --- migration/migration.c | 65 +++++++++++++++++++++++++------------------ migration/migration.h | 5 +--- migration/savevm.c | 7 +---- 3 files changed, 40 insertions(+), 37 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 0e7f101d64..d5f705ceef 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -199,8 +199,39 @@ void migration_object_init(void) dirty_bitmap_mig_init(); } -static void migration_bh_schedule(MigrationState *s, QEMUBH *bh) +typedef struct { + QEMUBH *bh; + QEMUBHFunc *cb; + void *opaque; +} MigrationBH; + +static void migration_bh_dispatch_bh(void *opaque) { + MigrationState *s = migrate_get_current(); + MigrationBH *migbh = opaque; + + /* cleanup this BH */ + qemu_bh_delete(migbh->bh); + migbh->bh = NULL; + + /* dispatch the other one */ + migbh->cb(migbh->opaque); + object_unref(OBJECT(s)); + + g_free(migbh); +} + +void migration_bh_schedule(QEMUBHFunc *cb, void *opaque) +{ + MigrationState *s = migrate_get_current(); + MigrationBH *migbh = g_new0(MigrationBH, 1); + QEMUBH *bh = qemu_bh_new(migration_bh_dispatch_bh, migbh); + + /* Store these to dispatch when the BH runs */ + migbh->bh = bh; + migbh->cb = cb; + migbh->opaque = opaque; + /* * Ref the state for bh, because it may be called when * there're already no other refs @@ -656,9 +687,7 @@ static void process_incoming_migration_bh(void *opaque) */ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED); - qemu_bh_delete(mis->bh); migration_incoming_state_destroy(); - object_unref(OBJECT(migrate_get_current())); } static void coroutine_fn @@ -723,8 +752,7 @@ process_incoming_migration_co(void *opaque) goto fail; } - mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); - migration_bh_schedule(migrate_get_current(), mis->bh); + migration_bh_schedule(process_incoming_migration_bh, mis); return; fail: migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, @@ -1285,9 +1313,6 @@ void migrate_set_state(int *state, int old_state, int new_state) static void migrate_fd_cleanup(MigrationState *s) { - qemu_bh_delete(s->cleanup_bh); - s->cleanup_bh = NULL; - g_free(s->hostname); s->hostname = NULL; json_writer_free(s->vmdesc); @@ -1343,9 +1368,7 @@ static void migrate_fd_cleanup(MigrationState *s) static void migrate_fd_cleanup_bh(void *opaque) { - MigrationState *s = opaque; - migrate_fd_cleanup(s); - object_unref(OBJECT(s)); + migrate_fd_cleanup(opaque); } void migrate_set_error(MigrationState *s, const Error *error) @@ -1568,8 +1591,6 @@ int migrate_init(MigrationState *s, Error **errp) * parameters/capabilities that the user set, and * locks. */ - s->cleanup_bh = 0; - s->vm_start_bh = 0; s->to_dst_file = NULL; s->state = MIGRATION_STATUS_NONE; s->rp_state.from_dst_file = NULL; @@ -3139,7 +3160,8 @@ static void migration_iteration_finish(MigrationState *s) error_report("%s: Unknown ending state %d", __func__, s->state); break; } - migration_bh_schedule(s, s->cleanup_bh); + + migration_bh_schedule(migrate_fd_cleanup_bh, s); bql_unlock(); } @@ -3170,7 +3192,7 @@ static void bg_migration_iteration_finish(MigrationState *s) break; } - migration_bh_schedule(s, s->cleanup_bh); + migration_bh_schedule(migrate_fd_cleanup_bh, s); bql_unlock(); } @@ -3376,12 +3398,8 @@ static void bg_migration_vm_start_bh(void *opaque) { MigrationState *s = opaque; - qemu_bh_delete(s->vm_start_bh); - s->vm_start_bh = NULL; - vm_resume(s->vm_old_state); migration_downtime_end(s); - object_unref(OBJECT(s)); } /** @@ -3485,8 +3503,7 @@ static void *bg_migration_thread(void *opaque) * calling VM state change notifiers from vm_start() would initiate * writes to virtio VQs memory which is in write-protected region. */ - s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); - migration_bh_schedule(s, s->vm_start_bh); + migration_bh_schedule(bg_migration_vm_start_bh, s); bql_unlock(); while (migration_is_active(s)) { @@ -3542,12 +3559,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) migrate_error_free(s); s->expected_downtime = migrate_downtime_limit(); - if (resume) { - assert(s->cleanup_bh); - } else { - assert(!s->cleanup_bh); - s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); - } if (error_in) { migrate_fd_error(s, error_in); if (resume) { diff --git a/migration/migration.h b/migration/migration.h index a589ae8650..f2c8b8f286 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -159,8 +159,6 @@ struct MigrationIncomingState { /* PostCopyFD's for external userfaultfds & handlers of shared memory */ GArray *postcopy_remote_fds; - QEMUBH *bh; - int state; /* @@ -255,8 +253,6 @@ struct MigrationState { /*< public >*/ QemuThread thread; - QEMUBH *vm_start_bh; - QEMUBH *cleanup_bh; /* Protected by qemu_file_lock */ QEMUFile *to_dst_file; /* Postcopy specific transfer channel */ @@ -528,6 +524,7 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); void migration_make_urgent_request(void); void migration_consume_urgent_request(void); bool migration_rate_limit(void); +void migration_bh_schedule(QEMUBHFunc *cb, void *opaque); void migration_cancel(const Error *error); void migration_populate_vfio_info(MigrationInfo *info); diff --git a/migration/savevm.c b/migration/savevm.c index 93387350c7..d612c8a902 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2171,10 +2171,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) runstate_set(RUN_STATE_PAUSED); } - qemu_bh_delete(mis->bh); - trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started"); - object_unref(OBJECT(migration_get_current())); } /* After all discards we can start running and asking for pages */ @@ -2189,9 +2186,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) } postcopy_state_set(POSTCOPY_INCOMING_RUNNING); - mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis); - object_ref(OBJECT(migration_get_current())); - qemu_bh_schedule(mis->bh); + migration_bh_schedule(loadvm_postcopy_handle_run_bh, mis); /* We need to finish reading the stream from the package * and also stop reading anything more from the stream that loaded the From 57fd4b4e10756448acd6c90ce041ba8dc9313efc Mon Sep 17 00:00:00 2001 From: Het Gala Date: Tue, 23 Jan 2024 06:42:19 +0000 Subject: [PATCH 14/14] Make 'uri' optional for migrate QAPI 'uri' argument should be optional, as 'uri' and 'channels' arguments are mutally exclusive in nature. Fixes: 074dbce5fcce (migration: New migrate and migrate-incoming argument 'channels') Signed-off-by: Het Gala Link: https://lore.kernel.org/r/20240123064219.40514-1-het.gala@nutanix.com Signed-off-by: Peter Xu --- qapi/migration.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qapi/migration.json b/qapi/migration.json index 489b591c23..d3e2b864c5 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1757,7 +1757,7 @@ # ## { 'command': 'migrate', - 'data': {'uri': 'str', + 'data': {'*uri': 'str', '*channels': [ 'MigrationChannel' ], '*blk': { 'type': 'bool', 'features': [ 'deprecated' ] }, '*inc': { 'type': 'bool', 'features': [ 'deprecated' ] },