From a17ec44dba741de97e63efcda28852e73fca06dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:05 +0000 Subject: [PATCH 01/18] tests: improve error message when saving TLS PSK file fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Peter Xu Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-3-berrange@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tests/unit/crypto-tls-psk-helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/crypto-tls-psk-helpers.c b/tests/unit/crypto-tls-psk-helpers.c index 7f8a488961..4bea7c6fa2 100644 --- a/tests/unit/crypto-tls-psk-helpers.c +++ b/tests/unit/crypto-tls-psk-helpers.c @@ -30,7 +30,7 @@ void test_tls_psk_init(const char *pskfile) fp = fopen(pskfile, "w"); if (fp == NULL) { - g_critical("Failed to create pskfile %s", pskfile); + g_critical("Failed to create pskfile %s: %s", pskfile, strerror(errno)); abort(); } /* Don't do this in real applications! Use psktool. */ From dcd23e9cae61f3156155db93ae699d30340158d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:06 +0000 Subject: [PATCH 02/18] tests: support QTEST_TRACE env variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When debugging failing qtests it is useful to be able to turn on trace output to stderr. The QTEST_TRACE env variable contents get injected as a '-trace ' command line arg Reviewed-by: Peter Xu Reviewed-by: Thomas Huth Reviewed-by: Juan Quintela Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-4-berrange@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/libqtest.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index 2b9bdb947d..7db7ef3a68 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -259,6 +259,9 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) gchar *qmp_socket_path; gchar *command; const char *qemu_binary = qtest_qemu_binary(); + const char *trace = g_getenv("QTEST_TRACE"); + g_autofree char *tracearg = trace ? + g_strdup_printf("-trace %s ", trace) : g_strdup(""); s = g_new(QTestState, 1); @@ -281,14 +284,15 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) qtest_add_abrt_handler(kill_qemu_hook_func, s); - command = g_strdup_printf("exec %s " + command = g_strdup_printf("exec %s %s" "-qtest unix:%s " "-qtest-log %s " "-chardev socket,path=%s,id=char0 " "-mon chardev=char0,mode=control " "-display none " "%s" - " -accel qtest", qemu_binary, socket_path, + " -accel qtest", + qemu_binary, tracearg, socket_path, getenv("QTEST_LOG") ? "/dev/fd/2" : "/dev/null", qmp_socket_path, extra_args ?: ""); From 0c2b6c85c99d8d2a957e6637c7cc0fd33f2b0540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:07 +0000 Subject: [PATCH 03/18] tests: print newline after QMP response in qtest logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The QMP commands have a trailing newline, but the response does not. This makes the qtest logs hard to follow as the next QMP command appears in the same line as the previous QMP response. Reviewed-by: Thomas Huth Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-5-berrange@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/libqtest.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index 7db7ef3a68..5098d71485 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -612,10 +612,13 @@ QDict *qmp_fd_receive(int fd) } if (log) { - len = write(2, &c, 1); + g_assert(write(2, &c, 1) == 1); } json_message_parser_feed(&qmp.parser, &c, 1); } + if (log) { + g_assert(write(2, "\n", 1) == 1); + } json_message_parser_destroy(&qmp.parser); return qmp.response; From 4b2bbca7a0b58475d7ffb2fea05adbe08cde57f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:09 +0000 Subject: [PATCH 04/18] migration: fix use of TLS PSK credentials with a UNIX socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migration TLS code has a check mandating that a hostname be available when starting a TLS session. This is expected when using x509 credentials, but is bogus for PSK and anonymous credentials as neither involve hostname validation. The TLS crdentials object gained suitable error reporting in the case of TLS with x509 credentials, so there is no longer any need for the migration code to do its own (incorrect) validation. Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-7-berrange@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/tls.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/migration/tls.c b/migration/tls.c index ca1ea3bbdd..32c384a8b6 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -137,10 +137,6 @@ QIOChannelTLS *migration_tls_client_create(MigrationState *s, if (s->parameters.tls_hostname && *s->parameters.tls_hostname) { hostname = s->parameters.tls_hostname; } - if (!hostname) { - error_setg(errp, "No hostname available for TLS"); - return NULL; - } tioc = qio_channel_tls_new_client( ioc, creds, hostname, errp); From 19da6edfe87615462e469e77062d841c27514599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:10 +0000 Subject: [PATCH 05/18] tests: switch MigrateStart struct to be stack allocated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no compelling reason why the MigrateStart struct needs to be heap allocated. Using stack allocation and static initializers is simpler. Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-8-berrange@redhat.com> Reviewed-by: Peter Xu Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/migration-test.c | 132 +++++++++++++++-------------------- 1 file changed, 55 insertions(+), 77 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 0870656d82..36e5408702 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -474,28 +474,12 @@ typedef struct { bool only_target; /* Use dirty ring if true; dirty logging otherwise */ bool use_dirty_ring; - char *opts_source; - char *opts_target; + const char *opts_source; + const char *opts_target; } MigrateStart; -static MigrateStart *migrate_start_new(void) -{ - MigrateStart *args = g_new0(MigrateStart, 1); - - args->opts_source = g_strdup(""); - args->opts_target = g_strdup(""); - return args; -} - -static void migrate_start_destroy(MigrateStart *args) -{ - g_free(args->opts_source); - g_free(args->opts_target); - g_free(args); -} - static int test_migrate_start(QTestState **from, QTestState **to, - const char *uri, MigrateStart **pargs) + const char *uri, MigrateStart *args) { g_autofree gchar *arch_source = NULL; g_autofree gchar *arch_target = NULL; @@ -507,15 +491,12 @@ static int test_migrate_start(QTestState **from, QTestState **to, g_autofree char *shmem_path = NULL; const char *arch = qtest_get_arch(); const char *machine_opts = NULL; - MigrateStart *args = *pargs; const char *memory_size; - int ret = 0; if (args->use_shmem) { if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) { g_test_skip("/dev/shm is not supported"); - ret = -1; - goto out; + return -1; } } @@ -591,7 +572,8 @@ static int test_migrate_start(QTestState **from, QTestState **to, machine_opts ? " -machine " : "", machine_opts ? machine_opts : "", memory_size, tmpfs, - arch_source, shmem_opts, args->opts_source, + arch_source, shmem_opts, + args->opts_source ? args->opts_source : "", ignore_stderr); if (!args->only_target) { *from = qtest_init(cmd_source); @@ -609,7 +591,8 @@ static int test_migrate_start(QTestState **from, QTestState **to, machine_opts ? machine_opts : "", memory_size, tmpfs, uri, arch_target, shmem_opts, - args->opts_target, ignore_stderr); + args->opts_target ? args->opts_target : "", + ignore_stderr); *to = qtest_init(cmd_target); /* @@ -620,11 +603,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, unlink(shmem_path); } -out: - migrate_start_destroy(args); - /* This tells the caller that this structure is gone */ - *pargs = NULL; - return ret; + return 0; } static void test_migrate_end(QTestState *from, QTestState *to, bool test_dest) @@ -668,7 +647,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); QTestState *from, *to; - if (test_migrate_start(&from, &to, uri, &args)) { + if (test_migrate_start(&from, &to, uri, args)) { return -1; } @@ -712,10 +691,10 @@ static void migrate_postcopy_complete(QTestState *from, QTestState *to) static void test_postcopy(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = {}; QTestState *from, *to; - if (migrate_postcopy_prepare(&from, &to, args)) { + if (migrate_postcopy_prepare(&from, &to, &args)) { return; } migrate_postcopy_start(from, to); @@ -724,13 +703,13 @@ static void test_postcopy(void) static void test_postcopy_recovery(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = { + .hide_stderr = true, + }; QTestState *from, *to; g_autofree char *uri = NULL; - args->hide_stderr = true; - - if (migrate_postcopy_prepare(&from, &to, args)) { + if (migrate_postcopy_prepare(&from, &to, &args)) { return; } @@ -786,11 +765,11 @@ static void test_postcopy_recovery(void) static void test_baddest(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = { + .hide_stderr = true + }; QTestState *from, *to; - args->hide_stderr = true; - if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args)) { return; } @@ -802,11 +781,11 @@ static void test_baddest(void) static void test_precopy_unix_common(bool dirty_ring) { g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); - MigrateStart *args = migrate_start_new(); + MigrateStart args = { + .use_dirty_ring = dirty_ring, + }; QTestState *from, *to; - args->use_dirty_ring = dirty_ring; - if (test_migrate_start(&from, &to, uri, &args)) { return; } @@ -892,7 +871,7 @@ static void test_ignore_shared(void) static void test_xbzrle(const char *uri) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = {}; QTestState *from, *to; if (test_migrate_start(&from, &to, uri, &args)) { @@ -945,7 +924,7 @@ static void test_xbzrle_unix(void) static void test_precopy_tcp(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = {}; g_autofree char *uri = NULL; QTestState *from, *to; @@ -987,7 +966,7 @@ static void test_precopy_tcp(void) static void test_migrate_fd_proto(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = {}; QTestState *from, *to; int ret; int pair[2]; @@ -1074,7 +1053,7 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); QTestState *from, *to; - if (test_migrate_start(&from, &to, uri, &args)) { + if (test_migrate_start(&from, &to, uri, args)) { return; } @@ -1103,51 +1082,49 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) static void test_validate_uuid(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = { + .opts_source = "-uuid 11111111-1111-1111-1111-111111111111", + .opts_target = "-uuid 11111111-1111-1111-1111-111111111111", + }; - g_free(args->opts_source); - g_free(args->opts_target); - args->opts_source = g_strdup("-uuid 11111111-1111-1111-1111-111111111111"); - args->opts_target = g_strdup("-uuid 11111111-1111-1111-1111-111111111111"); - do_test_validate_uuid(args, false); + do_test_validate_uuid(&args, false); } static void test_validate_uuid_error(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = { + .opts_source = "-uuid 11111111-1111-1111-1111-111111111111", + .opts_target = "-uuid 22222222-2222-2222-2222-222222222222", + .hide_stderr = true, + }; - g_free(args->opts_source); - g_free(args->opts_target); - args->opts_source = g_strdup("-uuid 11111111-1111-1111-1111-111111111111"); - args->opts_target = g_strdup("-uuid 22222222-2222-2222-2222-222222222222"); - args->hide_stderr = true; - do_test_validate_uuid(args, true); + do_test_validate_uuid(&args, true); } static void test_validate_uuid_src_not_set(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = { + .opts_target = "-uuid 22222222-2222-2222-2222-222222222222", + .hide_stderr = true, + }; - g_free(args->opts_target); - args->opts_target = g_strdup("-uuid 22222222-2222-2222-2222-222222222222"); - args->hide_stderr = true; - do_test_validate_uuid(args, false); + do_test_validate_uuid(&args, false); } static void test_validate_uuid_dst_not_set(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = { + .opts_source = "-uuid 11111111-1111-1111-1111-111111111111", + .hide_stderr = true, + }; - g_free(args->opts_source); - args->opts_source = g_strdup("-uuid 11111111-1111-1111-1111-111111111111"); - args->hide_stderr = true; - do_test_validate_uuid(args, false); + do_test_validate_uuid(&args, false); } static void test_migrate_auto_converge(void) { g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); - MigrateStart *args = migrate_start_new(); + MigrateStart args = {}; QTestState *from, *to; int64_t remaining, percentage; @@ -1230,7 +1207,7 @@ static void test_migrate_auto_converge(void) static void test_multifd_tcp(const char *method) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = {}; QTestState *from, *to; QDict *rsp; g_autofree char *uri = NULL; @@ -1314,13 +1291,13 @@ static void test_multifd_tcp_zstd(void) */ static void test_multifd_tcp_cancel(void) { - MigrateStart *args = migrate_start_new(); + MigrateStart args = { + .hide_stderr = true, + }; QTestState *from, *to, *to2; QDict *rsp; g_autofree char *uri = NULL; - args->hide_stderr = true; - if (test_migrate_start(&from, &to, "defer", &args)) { return; } @@ -1357,8 +1334,9 @@ static void test_multifd_tcp_cancel(void) migrate_cancel(from); - args = migrate_start_new(); - args->only_target = true; + args = (MigrateStart){ + .only_target = true, + }; if (test_migrate_start(&from, &to2, "defer", &args)) { return; From ffed54f6e51db2685d44f34bb2437aac10314e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:11 +0000 Subject: [PATCH 06/18] tests: merge code for UNIX and TCP migration pre-copy tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test cases differ only in the URI they provide to the migration commands, and the ability to set the dirty_ring mode. This code is trivially merged into a common helper. Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-9-berrange@redhat.com> Reviewed-by: Peter Xu Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/migration-test.c | 98 ++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 36e5408702..b62869b3af 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -778,19 +778,32 @@ static void test_baddest(void) test_migrate_end(from, to, false); } -static void test_precopy_unix_common(bool dirty_ring) +typedef struct { + /* Optional: fine tune start parameters */ + MigrateStart start; + + /* Required: the URI for the dst QEMU to listen on */ + const char *listen_uri; + + /* + * Optional: the URI for the src QEMU to connect to + * If NULL, then it will query the dst QEMU for its actual + * listening address and use that as the connect address. + * This allows for dynamically picking a free TCP port. + */ + const char *connect_uri; +} MigrateCommon; + +static void test_precopy_common(MigrateCommon *args) { - g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); - MigrateStart args = { - .use_dirty_ring = dirty_ring, - }; QTestState *from, *to; - if (test_migrate_start(&from, &to, uri, &args)) { + if (test_migrate_start(&from, &to, args->listen_uri, &args->start)) { return; } - /* We want to pick a speed slow enough that the test completes + /* + * We want to pick a speed slow enough that the test completes * quickly, but that it doesn't complete precopy even on a slow * machine, so also set the downtime. */ @@ -802,7 +815,14 @@ static void test_precopy_unix_common(bool dirty_ring) /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); - migrate_qmp(from, uri, "{}"); + if (!args->connect_uri) { + g_autofree char *local_connect_uri = + migrate_get_socket_address(to, "socket-address"); + migrate_qmp(from, local_connect_uri, "{}"); + } else { + migrate_qmp(from, args->connect_uri, "{}"); + } + wait_for_migration_pass(from); @@ -822,14 +842,27 @@ static void test_precopy_unix_common(bool dirty_ring) static void test_precopy_unix(void) { - /* Using default dirty logging */ - test_precopy_unix_common(false); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .listen_uri = uri, + .connect_uri = uri, + }; + + test_precopy_common(&args); } static void test_precopy_unix_dirty_ring(void) { - /* Using dirty ring tracking */ - test_precopy_unix_common(true); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .start = { + .use_dirty_ring = true, + }, + .listen_uri = uri, + .connect_uri = uri, + }; + + test_precopy_common(&args); } #if 0 @@ -924,44 +957,11 @@ static void test_xbzrle_unix(void) static void test_precopy_tcp(void) { - MigrateStart args = {}; - g_autofree char *uri = NULL; - QTestState *from, *to; + MigrateCommon args = { + .listen_uri = "tcp:127.0.0.1:0", + }; - if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", &args)) { - return; - } - - /* - * We want to pick a speed slow enough that the test completes - * quickly, but that it doesn't complete precopy even on a slow - * machine, so also set the downtime. - */ - /* 1 ms should make it not converge*/ - migrate_set_parameter_int(from, "downtime-limit", 1); - /* 1GB/s */ - migrate_set_parameter_int(from, "max-bandwidth", 1000000000); - - /* Wait for the first serial output from the source */ - wait_for_serial("src_serial"); - - uri = migrate_get_socket_address(to, "socket-address"); - - migrate_qmp(from, uri, "{}"); - - wait_for_migration_pass(from); - - migrate_set_parameter_int(from, "downtime-limit", CONVERGE_DOWNTIME); - - if (!got_stop) { - qtest_qmp_eventwait(from, "STOP"); - } - qtest_qmp_eventwait(to, "RESUME"); - - wait_for_serial("dest_serial"); - wait_for_migration_complete(from); - - test_migrate_end(from, to, true); + test_precopy_common(&args); } static void test_migrate_fd_proto(void) From b3caa7b55e1ba9d3c02d50baa425f601d091f4cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:12 +0000 Subject: [PATCH 07/18] tests: introduce ability to provide hooks for migration precopy test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are alot of different scenarios to test with migration due to the wide number of parameters and capabilities available. To enable sharing of the basic precopy test scenario, we need to be able to set arbitrary parameters and capabilities before the migration is initiated, but don't want to have all this logic in the common helper function. Solve this by defining two hooks that can be provided by the test case, one before migration starts and one after migration finishes. Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-10-berrange@redhat.com> Reviewed-by: Peter Xu Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/migration-test.c | 38 ++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index b62869b3af..ae40429798 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -778,6 +778,30 @@ static void test_baddest(void) test_migrate_end(from, to, false); } +/* + * A hook that runs after the src and dst QEMUs have been + * created, but before the migration is started. This can + * be used to set migration parameters and capabilities. + * + * Returns: NULL, or a pointer to opaque state to be + * later passed to the TestMigrateFinishHook + */ +typedef void * (*TestMigrateStartHook)(QTestState *from, + QTestState *to); + +/* + * A hook that runs after the migration has finished, + * regardless of whether it succeeded or failed, but + * before QEMU has terminated (unless it self-terminated + * due to migration error) + * + * @opaque is a pointer to state previously returned + * by the TestMigrateStartHook if any, or NULL. + */ +typedef void (*TestMigrateFinishHook)(QTestState *from, + QTestState *to, + void *opaque); + typedef struct { /* Optional: fine tune start parameters */ MigrateStart start; @@ -792,11 +816,17 @@ typedef struct { * This allows for dynamically picking a free TCP port. */ const char *connect_uri; + + /* Optional: callback to run at start to set migration parameters */ + TestMigrateStartHook start_hook; + /* Optional: callback to run at finish to cleanup */ + TestMigrateFinishHook finish_hook; } MigrateCommon; static void test_precopy_common(MigrateCommon *args) { QTestState *from, *to; + void *data_hook = NULL; if (test_migrate_start(&from, &to, args->listen_uri, &args->start)) { return; @@ -812,6 +842,10 @@ static void test_precopy_common(MigrateCommon *args) /* 1GB/s */ migrate_set_parameter_int(from, "max-bandwidth", 1000000000); + if (args->start_hook) { + data_hook = args->start_hook(from, to); + } + /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); @@ -837,6 +871,10 @@ static void test_precopy_common(MigrateCommon *args) wait_for_serial("dest_serial"); wait_for_migration_complete(from); + if (args->finish_hook) { + args->finish_hook(from, to, data_hook); + } + test_migrate_end(from, to, true); } From 243e006686f51f076536b5e61efbefa8f2e92ab6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:13 +0000 Subject: [PATCH 08/18] tests: switch migration FD passing test to use common precopy helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The combination of the start and finish hooks allow the FD passing code to use the precopy helper Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-11-berrange@redhat.com> Reviewed-by: Peter Xu Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/migration-test.c | 57 +++++++++++++----------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index ae40429798..04f749aaa1 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -1002,31 +1002,12 @@ static void test_precopy_tcp(void) test_precopy_common(&args); } -static void test_migrate_fd_proto(void) +static void *test_migrate_fd_start_hook(QTestState *from, + QTestState *to) { - MigrateStart args = {}; - QTestState *from, *to; + QDict *rsp; int ret; int pair[2]; - QDict *rsp; - const char *error_desc; - - if (test_migrate_start(&from, &to, "defer", &args)) { - return; - } - - /* - * We want to pick a speed slow enough that the test completes - * quickly, but that it doesn't complete precopy even on a slow - * machine, so also set the downtime. - */ - /* 1 ms should make it not converge */ - migrate_set_parameter_int(from, "downtime-limit", 1); - /* 1GB/s */ - migrate_set_parameter_int(from, "max-bandwidth", 1000000000); - - /* Wait for the first serial output from the source */ - wait_for_serial("src_serial"); /* Create two connected sockets for migration */ ret = socketpair(PF_LOCAL, SOCK_STREAM, 0, pair); @@ -1051,17 +1032,15 @@ static void test_migrate_fd_proto(void) qobject_unref(rsp); close(pair[1]); - /* Start migration to the 2nd socket*/ - migrate_qmp(from, "fd:fd-mig", "{}"); + return NULL; +} - wait_for_migration_pass(from); - - migrate_set_parameter_int(from, "downtime-limit", CONVERGE_DOWNTIME); - - if (!got_stop) { - qtest_qmp_eventwait(from, "STOP"); - } - qtest_qmp_eventwait(to, "RESUME"); +static void test_migrate_fd_finish_hook(QTestState *from, + QTestState *to, + void *opaque) +{ + QDict *rsp; + const char *error_desc; /* Test closing fds */ /* We assume, that QEMU removes named fd from its list, @@ -1079,11 +1058,17 @@ static void test_migrate_fd_proto(void) error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); qobject_unref(rsp); +} - /* Complete migration */ - wait_for_serial("dest_serial"); - wait_for_migration_complete(from); - test_migrate_end(from, to, true); +static void test_migrate_fd_proto(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .connect_uri = "fd:fd-mig", + .start_hook = test_migrate_fd_start_hook, + .finish_hook = test_migrate_fd_finish_hook + }; + test_precopy_common(&args); } static void do_test_validate_uuid(MigrateStart *args, bool should_fail) From 00fbe7f6add0ac58556e9fe3354d300294e6c3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 10 Mar 2022 17:18:14 +0000 Subject: [PATCH 09/18] tests: expand the migration precopy helper to support failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migration precopy testing helper function always expects the migration to run to a completion state. There will be test scenarios for TLS where expect either the client or server to fail the migration. This expands the helper to cope with these scenarios. Signed-off-by: Daniel P. Berrangé Message-Id: <20220310171821.3724080-12-berrange@redhat.com> Reviewed-by: Peter Xu Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/migration-test.c | 53 +++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 04f749aaa1..2af36c16a3 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -821,6 +821,30 @@ typedef struct { TestMigrateStartHook start_hook; /* Optional: callback to run at finish to cleanup */ TestMigrateFinishHook finish_hook; + + /* + * Optional: normally we expect the migration process to complete. + * + * There can be a variety of reasons and stages in which failure + * can happen during tests. + * + * If a failure is expected to happen at time of establishing + * the connection, then MIG_TEST_FAIL will indicate that the dst + * QEMU is expected to stay running and accept future migration + * connections. + * + * If a failure is expected to happen while processing the + * migration stream, then MIG_TEST_FAIL_DEST_QUIT_ERR will indicate + * that the dst QEMU is expected to quit with non-zero exit status + */ + enum { + /* This test should succeed, the default */ + MIG_TEST_SUCCEED = 0, + /* This test should fail, dest qemu should keep alive */ + MIG_TEST_FAIL, + /* This test should fail, dest qemu should fail with abnormal status */ + MIG_TEST_FAIL_DEST_QUIT_ERR, + } result; } MigrateCommon; static void test_precopy_common(MigrateCommon *args) @@ -858,24 +882,33 @@ static void test_precopy_common(MigrateCommon *args) } - wait_for_migration_pass(from); + if (args->result != MIG_TEST_SUCCEED) { + bool allow_active = args->result == MIG_TEST_FAIL; + wait_for_migration_fail(from, allow_active); - migrate_set_parameter_int(from, "downtime-limit", CONVERGE_DOWNTIME); + if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) { + qtest_set_expected_status(to, 1); + } + } else { + wait_for_migration_pass(from); - if (!got_stop) { - qtest_qmp_eventwait(from, "STOP"); + migrate_set_parameter_int(from, "downtime-limit", CONVERGE_DOWNTIME); + + if (!got_stop) { + qtest_qmp_eventwait(from, "STOP"); + } + + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); } - qtest_qmp_eventwait(to, "RESUME"); - - wait_for_serial("dest_serial"); - wait_for_migration_complete(from); - if (args->finish_hook) { args->finish_hook(from, to, data_hook); } - test_migrate_end(from, to, true); + test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED); } static void test_precopy_unix(void) From 83174765da3563794a56fbe24216908f3a5db194 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 31 Mar 2022 11:08:39 -0400 Subject: [PATCH 10/18] migration: Postpone releasing MigrationState.hostname MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to release it right after migrate_fd_connect(). That's not good enough when there're more than one socket pair required, because it'll be needed to establish TLS connection for the rest channels. One example is multifd, where we copied over the hostname for each channel but that's actually not needed. Keeping the hostname until the cleanup phase of migration. Cc: Daniel P. Berrange Signed-off-by: Peter Xu Message-Id: <20220331150857.74406-2-peterx@redhat.com> Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert dgilbert: Fixup checkpatch error; don't need to check for NULL around g_free --- migration/channel.c | 1 - migration/migration.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/channel.c b/migration/channel.c index c4fc000a1a..c6a8dcf1d7 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -96,6 +96,5 @@ void migration_channel_connect(MigrationState *s, } } migrate_fd_connect(s, error); - g_free(s->hostname); error_free(error); } diff --git a/migration/migration.c b/migration/migration.c index 4dcb511bb6..3f61a08752 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1809,6 +1809,9 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_bh_delete(s->cleanup_bh); s->cleanup_bh = NULL; + g_free(s->hostname); + s->hostname = NULL; + qemu_savevm_state_cleanup(); if (s->to_dst_file) { From 7f692ec79a211d187a4edefa01396bd8ea2d02ef Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 31 Mar 2022 11:08:40 -0400 Subject: [PATCH 11/18] migration: Drop multifd tls_hostname cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hostname is cached N times, N equals to the multifd channels. Drop that cache because after previous patch we've got s->hostname being alive for the whole lifecycle of migration procedure. Cc: Juan Quintela Cc: Daniel P. Berrange Signed-off-by: Peter Xu Message-Id: <20220331150857.74406-3-peterx@redhat.com> Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/multifd.c | 10 +++------- migration/multifd.h | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c index 76b57a7177..1be4ab5d17 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -542,8 +542,6 @@ void multifd_save_cleanup(void) qemu_sem_destroy(&p->sem_sync); g_free(p->name); p->name = NULL; - g_free(p->tls_hostname); - p->tls_hostname = NULL; multifd_pages_clear(p->pages); p->pages = NULL; p->packet_len = 0; @@ -763,7 +761,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, Error **errp) { MigrationState *s = migrate_get_current(); - const char *hostname = p->tls_hostname; + const char *hostname = s->hostname; QIOChannelTLS *tioc; tioc = migration_tls_client_create(s, ioc, hostname, errp); @@ -787,7 +785,8 @@ static bool multifd_channel_connect(MultiFDSendParams *p, MigrationState *s = migrate_get_current(); trace_multifd_set_outgoing_channel( - ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); + ioc, object_get_typename(OBJECT(ioc)), + migrate_get_current()->hostname, error); if (!error) { if (s->parameters.tls_creds && @@ -874,7 +873,6 @@ int multifd_save_setup(Error **errp) int thread_count; uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); uint8_t i; - MigrationState *s; if (!migrate_use_multifd()) { return 0; @@ -884,7 +882,6 @@ int multifd_save_setup(Error **errp) return -1; } - s = migrate_get_current(); thread_count = migrate_multifd_channels(); multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); @@ -909,7 +906,6 @@ int multifd_save_setup(Error **errp) p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); p->packet->version = cpu_to_be32(MULTIFD_VERSION); p->name = g_strdup_printf("multifdsend_%d", i); - p->tls_hostname = g_strdup(s->hostname); /* We need one extra place for the packet header */ p->iov = g_new0(struct iovec, page_count + 1); p->normal = g_new0(ram_addr_t, page_count); diff --git a/migration/multifd.h b/migration/multifd.h index 4dda900a0b..3d577b98b7 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -72,8 +72,6 @@ typedef struct { uint8_t id; /* channel thread name */ char *name; - /* tls hostname */ - char *tls_hostname; /* channel thread id */ QemuThread thread; /* communication channel */ From ea2faf0c35f0f9d1f3d7c9c88637b9014575e02b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 31 Mar 2022 11:08:41 -0400 Subject: [PATCH 12/18] migration: Add pss.postcopy_requested status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This boolean flag shows whether the current page during migration is triggered by postcopy or not. Then in ram_save_host_page() and deeper stack we'll be able to have a reference on the priority of this page. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20220331150857.74406-4-peterx@redhat.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index 3532f64ecb..bfcd45a36e 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -414,6 +414,8 @@ struct PageSearchStatus { unsigned long page; /* Set once we wrap around */ bool complete_round; + /* Whether current page is explicitly requested by postcopy */ + bool postcopy_requested; }; typedef struct PageSearchStatus PageSearchStatus; @@ -1487,6 +1489,9 @@ retry: */ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) { + /* This is not a postcopy requested page */ + pss->postcopy_requested = false; + pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); if (pss->complete_round && pss->block == rs->last_seen_block && pss->page >= rs->last_page) { @@ -1981,6 +1986,7 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) * really rare. */ pss->complete_round = false; + pss->postcopy_requested = true; } return !!block; From f444eeda715d2307d45890eb488f309eede99250 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 31 Mar 2022 11:08:42 -0400 Subject: [PATCH 13/18] migration: Move migrate_allow_multifd and helpers into migration.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This variable, along with its helpers, is used to detect whether multiple channel will be supported for migration. In follow up patches, there'll be other capability that requires multi-channels. Hence move it outside multifd specific code and make it public. Meanwhile rename it from "multifd" to "multi_channels" to show its real meaning. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20220331150857.74406-5-peterx@redhat.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 22 +++++++++++++++++----- migration/migration.h | 3 +++ migration/multifd.c | 19 ++++--------------- migration/multifd.h | 2 -- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 3f61a08752..d86fb0c332 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -180,6 +180,18 @@ static int migration_maybe_pause(MigrationState *s, int new_state); static void migrate_fd_cancel(MigrationState *s); +static bool migrate_allow_multi_channels = true; + +void migrate_protocol_allow_multi_channels(bool allow) +{ + migrate_allow_multi_channels = allow; +} + +bool migrate_multi_channels_is_allowed(void) +{ + return migrate_allow_multi_channels; +} + static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) { uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; @@ -469,12 +481,12 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p = NULL; - migrate_protocol_allow_multifd(false); /* reset it anyway */ + migrate_protocol_allow_multi_channels(false); /* reset it anyway */ qapi_event_send_migration(MIGRATION_STATUS_SETUP); if (strstart(uri, "tcp:", &p) || strstart(uri, "unix:", NULL) || strstart(uri, "vsock:", NULL)) { - migrate_protocol_allow_multifd(true); + migrate_protocol_allow_multi_channels(true); socket_start_incoming_migration(p ? p : uri, errp); #ifdef CONFIG_RDMA } else if (strstart(uri, "rdma:", &p)) { @@ -1261,7 +1273,7 @@ static bool migrate_caps_check(bool *cap_list, /* incoming side only */ if (runstate_check(RUN_STATE_INMIGRATE) && - !migrate_multifd_is_allowed() && + !migrate_multi_channels_is_allowed() && cap_list[MIGRATION_CAPABILITY_MULTIFD]) { error_setg(errp, "multifd is not supported by current protocol"); return false; @@ -2322,11 +2334,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, } } - migrate_protocol_allow_multifd(false); + migrate_protocol_allow_multi_channels(false); if (strstart(uri, "tcp:", &p) || strstart(uri, "unix:", NULL) || strstart(uri, "vsock:", NULL)) { - migrate_protocol_allow_multifd(true); + migrate_protocol_allow_multi_channels(true); socket_start_outgoing_migration(s, p ? p : uri, &local_err); #ifdef CONFIG_RDMA } else if (strstart(uri, "rdma:", &p)) { diff --git a/migration/migration.h b/migration/migration.h index 2de861df01..f17ccc657c 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -430,4 +430,7 @@ void migration_cancel(const Error *error); void populate_vfio_info(MigrationInfo *info); void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); +bool migrate_multi_channels_is_allowed(void); +void migrate_protocol_allow_multi_channels(bool allow); + #endif diff --git a/migration/multifd.c b/migration/multifd.c index 1be4ab5d17..9ea4f581e2 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -517,7 +517,7 @@ void multifd_save_cleanup(void) { int i; - if (!migrate_use_multifd() || !migrate_multifd_is_allowed()) { + if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) { return; } multifd_send_terminate_threads(NULL); @@ -857,17 +857,6 @@ cleanup: multifd_new_send_channel_cleanup(p, sioc, local_err); } -static bool migrate_allow_multifd = true; -void migrate_protocol_allow_multifd(bool allow) -{ - migrate_allow_multifd = allow; -} - -bool migrate_multifd_is_allowed(void) -{ - return migrate_allow_multifd; -} - int multifd_save_setup(Error **errp) { int thread_count; @@ -877,7 +866,7 @@ int multifd_save_setup(Error **errp) if (!migrate_use_multifd()) { return 0; } - if (!migrate_multifd_is_allowed()) { + if (!migrate_multi_channels_is_allowed()) { error_setg(errp, "multifd is not supported by current protocol"); return -1; } @@ -976,7 +965,7 @@ int multifd_load_cleanup(Error **errp) { int i; - if (!migrate_use_multifd() || !migrate_multifd_is_allowed()) { + if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) { return 0; } multifd_recv_terminate_threads(NULL); @@ -1125,7 +1114,7 @@ int multifd_load_setup(Error **errp) if (!migrate_use_multifd()) { return 0; } - if (!migrate_multifd_is_allowed()) { + if (!migrate_multi_channels_is_allowed()) { error_setg(errp, "multifd is not supported by current protocol"); return -1; } diff --git a/migration/multifd.h b/migration/multifd.h index 3d577b98b7..7d0effcb03 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -13,8 +13,6 @@ #ifndef QEMU_MIGRATION_MULTIFD_H #define QEMU_MIGRATION_MULTIFD_H -bool migrate_multifd_is_allowed(void); -void migrate_protocol_allow_multifd(bool allow); int multifd_save_setup(Error **errp); void multifd_save_cleanup(void); int multifd_load_setup(Error **errp); From 929068ec2f2fc1e3d8585e29709f5c5ef8f9317b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 31 Mar 2022 11:08:43 -0400 Subject: [PATCH 14/18] migration: Export ram_load_postcopy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be reused in postcopy fast load thread. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20220331150857.74406-6-peterx@redhat.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 2 +- migration/ram.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index bfcd45a36e..253fe4b756 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3645,7 +3645,7 @@ int ram_postcopy_incoming_init(MigrationIncomingState *mis) * * @f: QEMUFile where to send the data */ -static int ram_load_postcopy(QEMUFile *f) +int ram_load_postcopy(QEMUFile *f) { int flags = 0, ret = 0; bool place_needed = false; diff --git a/migration/ram.h b/migration/ram.h index 2c6dc3675d..ded0a3a086 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -61,6 +61,7 @@ void ram_postcopy_send_discard_bitmap(MigrationState *ms); /* For incoming postcopy discard */ int ram_discard_range(const char *block_name, uint64_t start, size_t length); int ram_postcopy_incoming_init(MigrationIncomingState *mis); +int ram_load_postcopy(QEMUFile *f); void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); From a39e933962314c9949d71c25f234e8a3dddc4b25 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 31 Mar 2022 11:08:44 -0400 Subject: [PATCH 15/18] migration: Move channel setup out of postcopy_try_recover() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to use postcopy_try_recover() to replace migration_incoming_setup() to setup incoming channels. That's fine for the old world, but in the new world there can be more than one channels that need setup. Better move the channel setup out of it so that postcopy_try_recover() only handles the last phase of switching to the recovery phase. To do that in migration_fd_process_incoming(), move the postcopy_try_recover() call to be after migration_incoming_setup(), which will setup the channels. While in migration_ioc_process_incoming(), postpone the recover() routine right before we'll jump into migration_incoming_process(). A side benefit is we don't need to pass in QEMUFile* to postcopy_try_recover() anymore. Remove it. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20220331150857.74406-7-peterx@redhat.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index d86fb0c332..b1659b7092 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -671,19 +671,20 @@ void migration_incoming_process(void) } /* Returns true if recovered from a paused migration, otherwise false */ -static bool postcopy_try_recover(QEMUFile *f) +static bool postcopy_try_recover(void) { MigrationIncomingState *mis = migration_incoming_get_current(); if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { /* Resumed from a paused postcopy migration */ - mis->from_src_file = f; + /* This should be set already in migration_incoming_setup() */ + assert(mis->from_src_file); /* Postcopy has standalone thread to do vm load */ - qemu_file_set_blocking(f, true); + qemu_file_set_blocking(mis->from_src_file, true); /* Re-configure the return path */ - mis->to_src_file = qemu_file_get_return_path(f); + mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, MIGRATION_STATUS_POSTCOPY_RECOVER); @@ -704,11 +705,10 @@ static bool postcopy_try_recover(QEMUFile *f) void migration_fd_process_incoming(QEMUFile *f, Error **errp) { - if (postcopy_try_recover(f)) { + if (!migration_incoming_setup(f, errp)) { return; } - - if (!migration_incoming_setup(f, errp)) { + if (postcopy_try_recover()) { return; } migration_incoming_process(); @@ -724,11 +724,6 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) /* The first connection (multifd may have multiple) */ QEMUFile *f = qemu_fopen_channel_input(ioc); - /* If it's a recovery, we're done */ - if (postcopy_try_recover(f)) { - return; - } - if (!migration_incoming_setup(f, errp)) { return; } @@ -749,6 +744,10 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) } if (start_migration) { + /* If it's a recovery, we're done */ + if (postcopy_try_recover()) { + return; + } migration_incoming_process(); } } From 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 31 Mar 2022 11:08:45 -0400 Subject: [PATCH 16/18] migration: Allow migrate-recover to run multiple times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously migration didn't have an easy way to cleanup the listening transport, migrate recovery only allows to execute once. That's done with a trick flag in postcopy_recover_triggered. Now the facility is already there. Drop postcopy_recover_triggered and instead allows a new migrate-recover to release the previous listener transport. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20220331150857.74406-8-peterx@redhat.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 13 ++----------- migration/migration.h | 1 - migration/savevm.c | 3 --- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index b1659b7092..cc00b344a9 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2162,11 +2162,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) return; } - if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, - false, true) == true) { - error_setg(errp, "Migrate recovery is triggered already"); - return; - } + /* If there's an existing transport, release it */ + migration_incoming_transport_cleanup(mis); /* * Note that this call will never start a real migration; it will @@ -2174,12 +2171,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) * to continue using that newly established channel. */ qemu_start_incoming_migration(uri, errp); - - /* Safe to dereference with the assert above */ - if (*errp) { - /* Reset the flag so user could still retry */ - qatomic_set(&mis->postcopy_recover_triggered, false); - } } void qmp_migrate_pause(Error **errp) diff --git a/migration/migration.h b/migration/migration.h index f17ccc657c..a863032b71 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -139,7 +139,6 @@ struct MigrationIncomingState { struct PostcopyBlocktimeContext *blocktime_ctx; /* notify PAUSED postcopy incoming migrations to try to continue */ - bool postcopy_recover_triggered; QemuSemaphore postcopy_pause_sem_dst; QemuSemaphore postcopy_pause_sem_fault; diff --git a/migration/savevm.c b/migration/savevm.c index 02ed94c180..d9076897b8 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2589,9 +2589,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) assert(migrate_postcopy_ram()); - /* Clear the triggered bit to allow one recovery */ - mis->postcopy_recover_triggered = false; - /* * Unregister yank with either from/to src would work, since ioc behind it * is the same From f912ec5b2d65644116ff496b58d7c9145c19e4c0 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 6 Apr 2022 11:25:15 +0100 Subject: [PATCH 17/18] migration: Fix operator type Clang spotted an & that should have been an &&; fix it. Reported by: David Binderman / https://gitlab.com/dcb Fixes: 65dacaa04fa ("migration: introduce save_normal_page()") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/963 Signed-off-by: Dr. David Alan Gilbert Message-Id: <20220406102515.96320-1-dgilbert@redhat.com> Reviewed-by: Peter Maydell Reviewed-by: Peter Xu Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 253fe4b756..a2489a2699 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1291,7 +1291,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, offset | RAM_SAVE_FLAG_PAGE)); if (async) { qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, - migrate_release_ram() & + migrate_release_ram() && migration_in_postcopy()); } else { qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); From 552de79bfdd5e9e53847eb3c6d6e4cd898a4370e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 13 Apr 2022 12:33:29 +0100 Subject: [PATCH 18/18] migration: Read state once The 'status' field for the migration is updated normally using an atomic operation from the migration thread. Most readers of it aren't that careful, and in most cases it doesn't matter. In query_migrate->fill_source_migration_info the 'state' is read twice; the first time to decide which state fields to fill in, and then secondly to copy the state to the status field; that can end up with a status that's inconsistent; e.g. setting up the fields for 'setup' and then having an 'active' status. In that case libvirt gets upset by the lack of ram info. The symptom is: libvirt.libvirtError: internal error: migration was active, but no RAM info was set Read the state exactly once in fill_source_migration_info. This is a possible fix for: https://bugzilla.redhat.com/show_bug.cgi?id=2074205 Signed-off-by: Dr. David Alan Gilbert Message-Id: <20220413113329.103696-1-dgilbert@redhat.com> Reviewed-by: Juan Quintela Reviewed-by: Peter Xu Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index cc00b344a9..5a31b23bd6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1084,6 +1084,7 @@ static void populate_disk_info(MigrationInfo *info) static void fill_source_migration_info(MigrationInfo *info) { MigrationState *s = migrate_get_current(); + int state = qatomic_read(&s->state); GSList *cur_blocker = migration_blockers; info->blocked_reasons = NULL; @@ -1103,7 +1104,7 @@ static void fill_source_migration_info(MigrationInfo *info) } info->has_blocked_reasons = info->blocked_reasons != NULL; - switch (s->state) { + switch (state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ /* do not overwrite destination migration status */ @@ -1148,7 +1149,7 @@ static void fill_source_migration_info(MigrationInfo *info) info->has_status = true; break; } - info->status = s->state; + info->status = state; } typedef enum WriteTrackingSupport {