From 5e7577a101d8db8015c7dbc3ab977a41d5b3afdc Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 9 Oct 2017 18:07:56 +0200 Subject: [PATCH 01/21] migration: Fix migrate_test_apply for multifd parameters They were missing when introduced on the tree Signed-off-by: Juan Quintela Reviewed-by: Peter Xu --- migration/migration.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 98429dc843..fb62a639d8 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -865,6 +865,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_block_incremental) { dest->block_incremental = params->block_incremental; } + if (params->has_x_multifd_channels) { + dest->x_multifd_channels = params->x_multifd_channels; + } + if (params->has_x_multifd_page_count) { + dest->x_multifd_page_count = params->x_multifd_page_count; + } } static void migrate_params_apply(MigrateSetParameters *params) From ceaaecb49ff99b871aac64329c1a0708abc696a4 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 6 Oct 2017 18:07:42 +0200 Subject: [PATCH 02/21] migratiom: Remove max_item_age parameter It was not used at all since commit: 27af7d6ea5015e5ef1f7985eab94a8a218267a2b which replaced its use by the dirty sync count. Signed-off-by: Juan Quintela Reviewed-by: Peter Xu --- migration/page_cache.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/migration/page_cache.c b/migration/page_cache.c index ba984c4858..381e555ddb 100644 --- a/migration/page_cache.c +++ b/migration/page_cache.c @@ -41,7 +41,6 @@ struct PageCache { CacheItem *page_cache; unsigned int page_size; int64_t max_num_items; - uint64_t max_item_age; int64_t num_items; }; @@ -69,7 +68,6 @@ PageCache *cache_init(int64_t num_pages, unsigned int page_size) } cache->page_size = page_size; cache->num_items = 0; - cache->max_item_age = 0; cache->max_num_items = num_pages; DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items); From 9ca3f963949c88ac58f81de5e1e391bfcd7dbaa3 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 6 Oct 2017 18:17:41 +0200 Subject: [PATCH 03/21] migration: Make cache size elements use the right types Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert --- migration/page_cache.c | 8 ++++---- migration/page_cache.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/migration/page_cache.c b/migration/page_cache.c index 381e555ddb..6b2dd77cf0 100644 --- a/migration/page_cache.c +++ b/migration/page_cache.c @@ -39,12 +39,12 @@ struct CacheItem { struct PageCache { CacheItem *page_cache; - unsigned int page_size; - int64_t max_num_items; - int64_t num_items; + size_t page_size; + size_t max_num_items; + size_t num_items; }; -PageCache *cache_init(int64_t num_pages, unsigned int page_size) +PageCache *cache_init(size_t num_pages, size_t page_size) { int64_t i; diff --git a/migration/page_cache.h b/migration/page_cache.h index 4fadd0c501..931868b857 100644 --- a/migration/page_cache.h +++ b/migration/page_cache.h @@ -28,7 +28,7 @@ typedef struct PageCache PageCache; * @num_pages: cache maximal number of cached pages * @page_size: cache page size */ -PageCache *cache_init(int64_t num_pages, unsigned int page_size); +PageCache *cache_init(size_t num_pages, size_t page_size); /** * cache_fini: free all cache resources From 8acabf69ea36a5d8c09b4015b350afb3fc3bd12f Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 5 Oct 2017 22:00:31 +0200 Subject: [PATCH 04/21] migration: Move xbzrle cache resize error handling to xbzrle_cache_resize Signed-off-by: Juan Quintela Reviewed-by: Peter Xu --- migration/migration.c | 18 +----------------- migration/ram.c | 22 ++++++++++++++++++++-- migration/ram.h | 2 +- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index fb62a639d8..3feffb5e26 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1373,24 +1373,8 @@ void qmp_migrate_set_cache_size(int64_t value, Error **errp) MigrationState *s = migrate_get_current(); int64_t new_size; - /* Check for truncation */ - if (value != (size_t)value) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "exceeding address space"); - return; - } - - /* Cache should not be larger than guest ram size */ - if (value > ram_bytes_total()) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "exceeds guest ram size "); - return; - } - - new_size = xbzrle_cache_resize(value); + new_size = xbzrle_cache_resize(value, errp); if (new_size < 0) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "is smaller than page size"); return; } diff --git a/migration/ram.c b/migration/ram.c index b83f8977c5..7c3acad029 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -42,6 +42,7 @@ #include "postcopy-ram.h" #include "migration/page_cache.h" #include "qemu/error-report.h" +#include "qapi/qmp/qerror.h" #include "trace.h" #include "exec/ram_addr.h" #include "qemu/rcu_queue.h" @@ -113,13 +114,30 @@ static void XBZRLE_cache_unlock(void) * Returns the new_size or negative in case of error. * * @new_size: new cache size + * @errp: set *errp if the check failed, with reason */ -int64_t xbzrle_cache_resize(int64_t new_size) +int64_t xbzrle_cache_resize(int64_t new_size, Error **errp) { PageCache *new_cache; int64_t ret; + /* Check for truncation */ + if (new_size != (size_t)new_size) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "exceeding address space"); + return -1; + } + + /* Cache should not be larger than guest ram size */ + if (new_size > ram_bytes_total()) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "exceeds guest ram size"); + return -1; + } + if (new_size < TARGET_PAGE_SIZE) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "is smaller than one target page size"); return -1; } @@ -132,7 +150,7 @@ int64_t xbzrle_cache_resize(int64_t new_size) new_cache = cache_init(new_size / TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); if (!new_cache) { - error_report("Error creating cache"); + error_setg(errp, "Error creating cache"); ret = -1; goto out; } diff --git a/migration/ram.h b/migration/ram.h index 4a72d66503..511b3dc582 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -35,7 +35,7 @@ extern MigrationStats ram_counters; extern XBZRLECacheStats xbzrle_counters; -int64_t xbzrle_cache_resize(int64_t new_size); +int64_t xbzrle_cache_resize(int64_t new_size, Error **errp); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_total(void); From 80f8dfde97e89739d7b9edcf0afceaed3f7f2aad Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 6 Oct 2017 22:30:45 +0200 Subject: [PATCH 05/21] migration: Make cache_init() take an error parameter Once there, take a total size instead of the size of the pages. We move the check that the new_size is bigger than one page from xbzrle_cache_resize(). Signed-off-by: Juan Quintela Reviewed-by: Peter Xu -- Fix typo spotted by Peter Xu --- migration/page_cache.c | 17 +++++++++++------ migration/page_cache.h | 7 +++---- migration/ram.c | 18 +++++------------- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/migration/page_cache.c b/migration/page_cache.c index 6b2dd77cf0..9a9d13d6a2 100644 --- a/migration/page_cache.c +++ b/migration/page_cache.c @@ -14,6 +14,8 @@ #include "qemu/osdep.h" +#include "qapi/qmp/qerror.h" +#include "qapi/error.h" #include "qemu-common.h" #include "qemu/host-utils.h" #include "migration/page_cache.h" @@ -44,21 +46,23 @@ struct PageCache { size_t num_items; }; -PageCache *cache_init(size_t num_pages, size_t page_size) +PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp) { int64_t i; - + size_t num_pages = new_size / page_size; PageCache *cache; - if (num_pages <= 0) { - DPRINTF("invalid number of pages\n"); + if (new_size < page_size) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "is smaller than one target page size"); return NULL; } /* We prefer not to abort if there is no memory */ cache = g_try_malloc(sizeof(*cache)); if (!cache) { - DPRINTF("Failed to allocate cache\n"); + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "Failed to allocate cache"); return NULL; } /* round down to the nearest power of 2 */ @@ -76,7 +80,8 @@ PageCache *cache_init(size_t num_pages, size_t page_size) cache->page_cache = g_try_malloc((cache->max_num_items) * sizeof(*cache->page_cache)); if (!cache->page_cache) { - DPRINTF("Failed to allocate cache->page_cache\n"); + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "Failed to allocate page cache"); g_free(cache); return NULL; } diff --git a/migration/page_cache.h b/migration/page_cache.h index 931868b857..0cb94498a0 100644 --- a/migration/page_cache.h +++ b/migration/page_cache.h @@ -24,12 +24,11 @@ typedef struct PageCache PageCache; * * Returns new allocated cache or NULL on error * - * @cache pointer to the PageCache struct - * @num_pages: cache maximal number of cached pages + * @cache_size: cache size in bytes * @page_size: cache page size + * @errp: set *errp if the check failed, with reason */ -PageCache *cache_init(size_t num_pages, size_t page_size); - +PageCache *cache_init(int64_t cache_size, size_t page_size, Error **errp); /** * cache_fini: free all cache resources * @cache pointer to the PageCache struct diff --git a/migration/ram.c b/migration/ram.c index 7c3acad029..47501460c8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -135,22 +135,14 @@ int64_t xbzrle_cache_resize(int64_t new_size, Error **errp) return -1; } - if (new_size < TARGET_PAGE_SIZE) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "is smaller than one target page size"); - return -1; - } - XBZRLE_cache_lock(); if (XBZRLE.cache != NULL) { if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { goto out_new_size; } - new_cache = cache_init(new_size / TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); + new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp); if (!new_cache) { - error_setg(errp, "Error creating cache"); ret = -1; goto out; } @@ -2028,6 +2020,7 @@ err: static int ram_state_init(RAMState **rsp) { *rsp = g_new0(RAMState, 1); + Error *local_err = NULL; qemu_mutex_init(&(*rsp)->bitmap_mutex); qemu_mutex_init(&(*rsp)->src_page_req_mutex); @@ -2036,12 +2029,11 @@ static int ram_state_init(RAMState **rsp) if (migrate_use_xbzrle()) { XBZRLE_cache_lock(); XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE); - XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / - TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), + TARGET_PAGE_SIZE, &local_err); if (!XBZRLE.cache) { XBZRLE_cache_unlock(); - error_report("Error creating cache"); + error_report_err(local_err); g_free(*rsp); *rsp = NULL; return -1; From 93fbd0314ec060ffaf90169a06d5737fa97ffb25 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Oct 2017 10:05:50 +0100 Subject: [PATCH 06/21] migration: Add 'pause-before-switchover' capability When 'pause-before-switchover' is enabled, the outgoing migration will pause before invalidating the block devices and serializing the device state. At this point the management layer gets the chance to clean up any device jobs or other device users before the migration completes. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 10 ++++++++++ migration/migration.h | 1 + qapi/migration.json | 5 ++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 3feffb5e26..c041ec7aed 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1511,6 +1511,16 @@ bool migrate_use_multifd(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD]; } +bool migrate_pause_before_switchover(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[ + MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; +} + int migrate_multifd_channels(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index b83cceadc4..969866303e 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -177,6 +177,7 @@ bool migrate_zero_blocks(void); bool migrate_auto_converge(void); bool migrate_use_multifd(void); +bool migrate_pause_before_switchover(void); int migrate_multifd_channels(void); int migrate_multifd_page_count(void); diff --git a/qapi/migration.json b/qapi/migration.json index f8b365e3f5..4960231ba2 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -341,6 +341,9 @@ # @return-path: If enabled, migration will use the return path even # for precopy. (since 2.10) # +# @pause-before-switchover: Pause outgoing migration before serialising device +# state and before disabling block IO (since 2.11) +# # @x-multifd: Use more than one fd for migration (since 2.11) # # Since: 1.2 @@ -348,7 +351,7 @@ { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path', 'x-multifd' ] } + 'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] } ## # @MigrationCapabilityStatus: From 31e060774cf5c3b9945f6f16d6c18d6eae18e4d9 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Oct 2017 10:05:51 +0100 Subject: [PATCH 07/21] migration: Add 'pre-switchover' and 'device' statuses Add two statuses for use when the 'pause-before-switchover' capability is enabled. 'pre-switchover' is the state that we wait in for management to allow us to continue. 'device' is the state we enter while serialising the devices after management gives us the OK. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 6 ++++++ qapi/migration.json | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index c041ec7aed..f15372e007 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -526,6 +526,8 @@ static bool migration_is_setup_or_active(int state) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_SETUP: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: return true; default: @@ -600,6 +602,8 @@ MigrationInfo *qmp_query_migrate(Error **errp) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_CANCELLING: case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: /* TODO add some postcopy stats */ info->has_status = true; info->has_total_time = true; @@ -1189,6 +1193,8 @@ bool migration_is_idle(void) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_COLO: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: return false; case MIGRATION_STATUS__MAX: g_assert_not_reached(); diff --git a/qapi/migration.json b/qapi/migration.json index 4960231ba2..b56f95db64 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -96,12 +96,18 @@ # @colo: VM is in the process of fault tolerance, VM can not get into this # state unless colo capability is enabled for migration. (since 2.8) # +# @pre-switchover: Paused before device serialisation. (since 2.11) +# +# @device: During device serialisation when pause-before-switchover is enabled +# (since 2.11) +# # Since: 2.3 # ## { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', - 'active', 'postcopy-active', 'completed', 'failed', 'colo' ] } + 'active', 'postcopy-active', 'completed', 'failed', 'colo', + 'pre-switchover', 'device' ] } ## # @MigrationInfo: From e91d8951d59d483f085f7650381b8e55a1a55e4c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Oct 2017 10:05:52 +0100 Subject: [PATCH 08/21] migration: Wait for semaphore before completing migration Wait for a semaphore before completing the migration, if the previously added capability was enabled. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 38 ++++++++++++++++++++++++++++++++++++++ migration/migration.h | 3 +++ 2 files changed, 41 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index f15372e007..ef84d2c1fb 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1957,6 +1957,39 @@ fail: return -1; } +/** + * migration_maybe_pause: Pause if required to by + * migrate_pause_before_switchover called with the iothread locked + * Returns: 0 on success + */ +static int migration_maybe_pause(MigrationState *s, int *current_active_state) +{ + if (!migrate_pause_before_switchover()) { + return 0; + } + + /* Since leaving this state is not atomic with posting the semaphore + * it's possible that someone could have issued multiple migrate_continue + * and the semaphore is incorrectly positive at this point; + * the docs say it's undefined to reinit a semaphore that's already + * init'd, so use timedwait to eat up any existing posts. + */ + while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { + /* This block intentionally left blank */ + } + + qemu_mutex_unlock_iothread(); + migrate_set_state(&s->state, *current_active_state, + MIGRATION_STATUS_PRE_SWITCHOVER); + qemu_sem_wait(&s->pause_sem); + migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, + MIGRATION_STATUS_DEVICE); + *current_active_state = MIGRATION_STATUS_DEVICE; + qemu_mutex_lock_iothread(); + + return s->state == MIGRATION_STATUS_DEVICE ? 0 : -EINVAL; +} + /** * migration_completion: Used by migration_thread when there's not much left. * The caller 'breaks' the loop when this returns. @@ -1982,6 +2015,9 @@ static void migration_completion(MigrationState *s, int current_active_state, if (!ret) { bool inactivate = !migrate_colo_enabled(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + if (ret >= 0) { + ret = migration_maybe_pause(s, ¤t_active_state); + } if (ret >= 0) { qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, @@ -2363,6 +2399,7 @@ static void migration_instance_finalize(Object *obj) g_free(params->tls_hostname); g_free(params->tls_creds); + qemu_sem_destroy(&ms->pause_sem); } static void migration_instance_init(Object *obj) @@ -2373,6 +2410,7 @@ static void migration_instance_init(Object *obj) ms->state = MIGRATION_STATUS_NONE; ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE; ms->mbps = -1; + qemu_sem_init(&ms->pause_sem, 0); params->tls_hostname = g_strdup(""); params->tls_creds = g_strdup(""); diff --git a/migration/migration.h b/migration/migration.h index 969866303e..cd988a99b9 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -121,6 +121,9 @@ struct MigrationState /* Flag set once the migration thread called bdrv_inactivate_all */ bool block_inactive; + /* Migration is paused due to pause-before-switchover */ + QemuSemaphore pause_sem; + /* The semaphore is used to notify COLO thread that failover is finished */ QemuSemaphore colo_exit_sem; From 89cfc02cb6e3fdaf8ae246493ea51e75be2818c1 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Oct 2017 10:05:53 +0100 Subject: [PATCH 09/21] migration: migrate-continue A new qmp command allows the caller to continue from a given paused state. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 11 +++++++++++ qapi/migration.json | 17 +++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index ef84d2c1fb..90bfdc3a7c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1374,6 +1374,17 @@ void qmp_migrate_cancel(Error **errp) migrate_fd_cancel(migrate_get_current()); } +void qmp_migrate_continue(MigrationStatus state, Error **errp) +{ + MigrationState *s = migrate_get_current(); + if (s->state != state) { + error_setg(errp, "Migration not in expected state: %s", + MigrationStatus_str(s->state)); + return; + } + qemu_sem_post(&s->pause_sem); +} + void qmp_migrate_set_cache_size(int64_t value, Error **errp) { MigrationState *s = migrate_get_current(); diff --git a/qapi/migration.json b/qapi/migration.json index b56f95db64..272f191551 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -876,6 +876,23 @@ ## { 'command': 'migrate_cancel' } +## +# @migrate-continue: +# +# Continue migration when it's in a paused state. +# +# @state: The state the migration is currently expected to be in +# +# Returns: nothing on success +# Since: 2.11 +# Example: +# +# -> { "execute": "migrate-continue" , "arguments": +# { "state": "pre-switchover" } } +# <- { "return": {} } +## +{ 'command': 'migrate-continue', 'data': {'state': 'MigrationStatus'} } + ## # @migrate_set_downtime: # From 94ae12cba4f18253e3cf5f9a70335e22870053b4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Oct 2017 10:05:54 +0100 Subject: [PATCH 10/21] migrate: HMP migate_continue HMP equivalent to the just added migrate-continue Unpause a migrate paused at a given state. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- hmp-commands.hx | 12 ++++++++++++ hmp.c | 13 +++++++++++++ hmp.h | 1 + 3 files changed, 26 insertions(+) diff --git a/hmp-commands.hx b/hmp-commands.hx index 1941e19932..4afd57cf5f 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -959,7 +959,19 @@ STEXI @item migrate_cancel @findex migrate_cancel Cancel the current VM migration. +ETEXI + { + .name = "migrate_continue", + .args_type = "state:s", + .params = "state", + .help = "Continue migration from the given paused state", + .cmd = hmp_migrate_continue, + }, +STEXI +@item migrate_continue @var{state} +@findex migrate_continue +Continue migration from the paused state @var{state} ETEXI { diff --git a/hmp.c b/hmp.c index ec61329ebb..41fcce6f5a 100644 --- a/hmp.c +++ b/hmp.c @@ -1495,6 +1495,19 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict) qmp_migrate_cancel(NULL); } +void hmp_migrate_continue(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *state = qdict_get_str(qdict, "state"); + int val = qapi_enum_parse(&MigrationStatus_lookup, state, -1, &err); + + if (val >= 0) { + qmp_migrate_continue(val, &err); + } + + hmp_handle_error(mon, &err); +} + void hmp_migrate_incoming(Monitor *mon, const QDict *qdict) { Error *err = NULL; diff --git a/hmp.h b/hmp.h index 3605003e4c..a6f56b1f29 100644 --- a/hmp.h +++ b/hmp.h @@ -68,6 +68,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict); void hmp_delvm(Monitor *mon, const QDict *qdict); void hmp_info_snapshots(Monitor *mon, const QDict *qdict); void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); +void hmp_migrate_continue(Monitor *mon, const QDict *qdict); void hmp_migrate_incoming(Monitor *mon, const QDict *qdict); void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); From a7b36b486dd58d8f44f788a2a2efa6a4fa3b1223 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Oct 2017 10:05:55 +0100 Subject: [PATCH 11/21] migration: allow cancel to unpause If a migration_cancel is issued during the new paused state, kick the pause_sem to get to unpause so it can cancel. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 90bfdc3a7c..b523d8f215 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1114,6 +1114,10 @@ static void migrate_fd_cancel(MigrationState *s) if (!migration_is_setup_or_active(old_state)) { break; } + /* If the migration is paused, kick it out of the pause */ + if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { + qemu_sem_post(&s->pause_sem); + } migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); } while (s->state != MIGRATION_STATUS_CANCELLING); From 0331c8cabf6168aa263aa0b25f5e135b328606ac Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Oct 2017 10:05:56 +0100 Subject: [PATCH 12/21] migration: pause-before-switchover for postcopy Add pause-before-switchover support for postcopy. After starting postcopy it will transition active->pre-switchover->postcopy_active Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index b523d8f215..a058f8b46d 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -104,6 +104,9 @@ enum mig_rp_message_type { static MigrationState *current_migration; static bool migration_object_check(MigrationState *ms, Error **errp); +static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state); void migration_object_init(void) { @@ -1820,8 +1823,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) QEMUFile *fb; int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); bool restart_block = false; - migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_POSTCOPY_ACTIVE); + int cur_state = MIGRATION_STATUS_ACTIVE; + if (!migrate_pause_before_switchover()) { + migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + } trace_postcopy_start(); qemu_mutex_lock_iothread(); @@ -1835,6 +1841,12 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) goto fail; } + ret = migration_maybe_pause(ms, &cur_state, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + if (ret < 0) { + goto fail; + } + ret = bdrv_inactivate_all(); if (ret < 0) { goto fail; @@ -1977,7 +1989,9 @@ fail: * migrate_pause_before_switchover called with the iothread locked * Returns: 0 on success */ -static int migration_maybe_pause(MigrationState *s, int *current_active_state) +static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state) { if (!migrate_pause_before_switchover()) { return 0; @@ -1998,11 +2012,11 @@ static int migration_maybe_pause(MigrationState *s, int *current_active_state) MIGRATION_STATUS_PRE_SWITCHOVER); qemu_sem_wait(&s->pause_sem); migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, - MIGRATION_STATUS_DEVICE); - *current_active_state = MIGRATION_STATUS_DEVICE; + new_state); + *current_active_state = new_state; qemu_mutex_lock_iothread(); - return s->state == MIGRATION_STATUS_DEVICE ? 0 : -EINVAL; + return s->state == new_state ? 0 : -EINVAL; } /** @@ -2031,7 +2045,8 @@ static void migration_completion(MigrationState *s, int current_active_state, bool inactivate = !migrate_colo_enabled(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); if (ret >= 0) { - ret = migration_maybe_pause(s, ¤t_active_state); + ret = migration_maybe_pause(s, ¤t_active_state, + MIGRATION_STATUS_DEVICE); } if (ret >= 0) { qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); From 7d00ee6ad6c2dbeeb68e8d5343ec77bc5873d8f8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 19 Oct 2017 14:31:57 +0800 Subject: [PATCH 13/21] migration: provide ram_state_init() The old ram_state_init() is not really initializing the RAMState only, but including lots of other stuff that is RAM-related. Renaming it to ram_init_all(). Instead, provide a real ram_state_init(). Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 47501460c8..13f4b3d101 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2019,13 +2019,36 @@ err: static int ram_state_init(RAMState **rsp) { - *rsp = g_new0(RAMState, 1); - Error *local_err = NULL; + *rsp = g_try_new0(RAMState, 1); + + if (!*rsp) { + error_report("%s: Init ramstate fail", __func__); + return -1; + } qemu_mutex_init(&(*rsp)->bitmap_mutex); qemu_mutex_init(&(*rsp)->src_page_req_mutex); QSIMPLEQ_INIT(&(*rsp)->src_page_requests); + /* + * Count the total number of pages used by ram blocks not including any + * gaps due to alignment or unplugs. + */ + (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; + + ram_state_reset(*rsp); + + return 0; +} + +static int ram_init_all(RAMState **rsp) +{ + Error *local_err = NULL; + + if (ram_state_init(rsp)) { + return -1; + } + if (migrate_use_xbzrle()) { XBZRLE_cache_lock(); XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE); @@ -2065,7 +2088,6 @@ static int ram_state_init(RAMState **rsp) qemu_mutex_lock_ramlist(); rcu_read_lock(); - ram_state_reset(*rsp); /* Skip setting bitmap if there is no RAM */ if (ram_bytes_total()) { @@ -2083,12 +2105,6 @@ static int ram_state_init(RAMState **rsp) } } - /* - * Count the total number of pages used by ram blocks not including any - * gaps due to alignment or unplugs. - */ - (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; - memory_global_dirty_log_start(); migration_bitmap_sync(*rsp); qemu_mutex_unlock_ramlist(); @@ -2120,7 +2136,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) /* migration has already setup the bitmap, reuse it. */ if (!migration_in_colo_state()) { - if (ram_state_init(rsp) != 0) { + if (ram_init_all(rsp) != 0) { return -1; } } From 7d7c96be7b25f285b3759ec0545bbe82dd0d8076 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 19 Oct 2017 14:31:58 +0800 Subject: [PATCH 14/21] migration: provide ram_state_cleanup There are two Mutexes that are created but not yet destroyed for RAMState. Fix that. Since we are at it, provide helper function to clean up RAMState. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 13f4b3d101..d91e8787ae 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1576,6 +1576,15 @@ static void xbzrle_load_cleanup(void) XBZRLE.decoded_buf = NULL; } +static void ram_state_cleanup(RAMState **rsp) +{ + migration_page_queue_free(*rsp); + qemu_mutex_destroy(&(*rsp)->bitmap_mutex); + qemu_mutex_destroy(&(*rsp)->src_page_req_mutex); + g_free(*rsp); + *rsp = NULL; +} + static void ram_save_cleanup(void *opaque) { RAMState **rsp = opaque; @@ -1605,10 +1614,8 @@ static void ram_save_cleanup(void *opaque) XBZRLE.zero_target_page = NULL; } XBZRLE_cache_unlock(); - migration_page_queue_free(*rsp); compress_threads_save_cleanup(); - g_free(*rsp); - *rsp = NULL; + ram_state_cleanup(rsp); } static void ram_state_reset(RAMState *rs) From 84593a0807004d852132eaa56edf24d55793d480 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 19 Oct 2017 14:31:59 +0800 Subject: [PATCH 15/21] migration: clean up xbzrle cache init/destroy Let's further simplify ram_init_all() and ram_save_cleanup() by abstract all the XBZRLE related codes into their own functions. When allocating xbzrle cache, we are always very careful on -ENOMEM; which makes sense. Replacing the last g_malloc0() with g_try_malloc0(), then refactor the logic a bit. This patch should be fixing some memory leaks when some memory allocation failed for XBZRLE in the past. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 123 ++++++++++++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 46 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index d91e8787ae..60f9cfe8be 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1585,6 +1585,22 @@ static void ram_state_cleanup(RAMState **rsp) *rsp = NULL; } +static void xbzrle_cleanup(void) +{ + XBZRLE_cache_lock(); + if (XBZRLE.cache) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.encoded_buf); + g_free(XBZRLE.current_buf); + g_free(XBZRLE.zero_target_page); + XBZRLE.cache = NULL; + XBZRLE.encoded_buf = NULL; + XBZRLE.current_buf = NULL; + XBZRLE.zero_target_page = NULL; + } + XBZRLE_cache_unlock(); +} + static void ram_save_cleanup(void *opaque) { RAMState **rsp = opaque; @@ -1602,18 +1618,7 @@ static void ram_save_cleanup(void *opaque) block->unsentmap = NULL; } - XBZRLE_cache_lock(); - if (XBZRLE.cache) { - cache_fini(XBZRLE.cache); - g_free(XBZRLE.encoded_buf); - g_free(XBZRLE.current_buf); - g_free(XBZRLE.zero_target_page); - XBZRLE.cache = NULL; - XBZRLE.encoded_buf = NULL; - XBZRLE.current_buf = NULL; - XBZRLE.zero_target_page = NULL; - } - XBZRLE_cache_unlock(); + xbzrle_cleanup(); compress_threads_save_cleanup(); ram_state_cleanup(rsp); } @@ -2024,6 +2029,63 @@ err: return ret; } +/* + * For every allocation, we will try not to crash the VM if the + * allocation failed. + */ +static int xbzrle_init(void) +{ + Error *local_err = NULL; + + if (!migrate_use_xbzrle()) { + return 0; + } + + XBZRLE_cache_lock(); + + XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE); + if (!XBZRLE.zero_target_page) { + error_report("%s: Error allocating zero page", __func__); + goto err_out; + } + + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), + TARGET_PAGE_SIZE, &local_err); + if (!XBZRLE.cache) { + error_report_err(local_err); + goto free_zero_page; + } + + XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); + if (!XBZRLE.encoded_buf) { + error_report("%s: Error allocating encoded_buf", __func__); + goto free_cache; + } + + XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); + if (!XBZRLE.current_buf) { + error_report("%s: Error allocating current_buf", __func__); + goto free_encoded_buf; + } + + /* We are all good */ + XBZRLE_cache_unlock(); + return 0; + +free_encoded_buf: + g_free(XBZRLE.encoded_buf); + XBZRLE.encoded_buf = NULL; +free_cache: + cache_fini(XBZRLE.cache); + XBZRLE.cache = NULL; +free_zero_page: + g_free(XBZRLE.zero_target_page); + XBZRLE.zero_target_page = NULL; +err_out: + XBZRLE_cache_unlock(); + return -ENOMEM; +} + static int ram_state_init(RAMState **rsp) { *rsp = g_try_new0(RAMState, 1); @@ -2050,44 +2112,13 @@ static int ram_state_init(RAMState **rsp) static int ram_init_all(RAMState **rsp) { - Error *local_err = NULL; - if (ram_state_init(rsp)) { return -1; } - if (migrate_use_xbzrle()) { - XBZRLE_cache_lock(); - XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE); - XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), - TARGET_PAGE_SIZE, &local_err); - if (!XBZRLE.cache) { - XBZRLE_cache_unlock(); - error_report_err(local_err); - g_free(*rsp); - *rsp = NULL; - return -1; - } - XBZRLE_cache_unlock(); - - /* We prefer not to abort if there is no memory */ - XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); - if (!XBZRLE.encoded_buf) { - error_report("Error allocating encoded_buf"); - g_free(*rsp); - *rsp = NULL; - return -1; - } - - XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); - if (!XBZRLE.current_buf) { - error_report("Error allocating current_buf"); - g_free(XBZRLE.encoded_buf); - XBZRLE.encoded_buf = NULL; - g_free(*rsp); - *rsp = NULL; - return -1; - } + if (xbzrle_init()) { + ram_state_cleanup(rsp); + return -1; } /* For memory_global_dirty_log_start below. */ From d6eff5d75d6b8d2fb18dca4ebd9f02a16d8e7f3b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 19 Oct 2017 14:32:00 +0800 Subject: [PATCH 16/21] migration: new ram_init_bitmaps() Rearrange the bitmap initialization and the first sync. Since at it, make sure the locks are taken/released in correct order (I moved RCU unlock upper - though it may not affect much). Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 63 ++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 60f9cfe8be..1b19a899c9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2110,6 +2110,41 @@ static int ram_state_init(RAMState **rsp) return 0; } +static void ram_list_init_bitmaps(void) +{ + RAMBlock *block; + unsigned long pages; + + /* Skip setting bitmap if there is no RAM */ + if (ram_bytes_total()) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + pages = block->max_length >> TARGET_PAGE_BITS; + block->bmap = bitmap_new(pages); + bitmap_set(block->bmap, 0, pages); + if (migrate_postcopy_ram()) { + block->unsentmap = bitmap_new(pages); + bitmap_set(block->unsentmap, 0, pages); + } + } + } +} + +static void ram_init_bitmaps(RAMState *rs) +{ + /* For memory_global_dirty_log_start below. */ + qemu_mutex_lock_iothread(); + qemu_mutex_lock_ramlist(); + rcu_read_lock(); + + ram_list_init_bitmaps(); + memory_global_dirty_log_start(); + migration_bitmap_sync(rs); + + rcu_read_unlock(); + qemu_mutex_unlock_ramlist(); + qemu_mutex_unlock_iothread(); +} + static int ram_init_all(RAMState **rsp) { if (ram_state_init(rsp)) { @@ -2121,33 +2156,7 @@ static int ram_init_all(RAMState **rsp) return -1; } - /* For memory_global_dirty_log_start below. */ - qemu_mutex_lock_iothread(); - - qemu_mutex_lock_ramlist(); - rcu_read_lock(); - - /* Skip setting bitmap if there is no RAM */ - if (ram_bytes_total()) { - RAMBlock *block; - - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - unsigned long pages = block->max_length >> TARGET_PAGE_BITS; - - block->bmap = bitmap_new(pages); - bitmap_set(block->bmap, 0, pages); - if (migrate_postcopy_ram()) { - block->unsentmap = bitmap_new(pages); - bitmap_set(block->unsentmap, 0, pages); - } - } - } - - memory_global_dirty_log_start(); - migration_bitmap_sync(*rsp); - qemu_mutex_unlock_ramlist(); - qemu_mutex_unlock_iothread(); - rcu_read_unlock(); + ram_init_bitmaps(*rsp); return 0; } From 8be4620be237356e22905d9aa2356ced492970c8 Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Thu, 5 Oct 2017 14:13:18 +0300 Subject: [PATCH 17/21] migration: postcopy_place_page factoring out Need to mark copied pages as closer as possible to the place where it tracks down. That will be necessary in futher patch. Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Alexey Perevalov Signed-off-by: Juan Quintela --- migration/postcopy-ram.c | 13 +++++++------ migration/postcopy-ram.h | 4 ++-- migration/ram.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 0de68e8b25..d3073b93b5 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -646,9 +646,10 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, - size_t pagesize) + RAMBlock *rb) { struct uffdio_copy copy_struct; + size_t pagesize = qemu_ram_pagesize(rb); copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; @@ -677,11 +678,11 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { trace_postcopy_place_page_zero(host); - if (pagesize == getpagesize()) { + if (qemu_ram_pagesize(rb) == getpagesize()) { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host; zero_struct.range.len = getpagesize(); @@ -711,7 +712,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); } return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - pagesize); + rb); } return 0; @@ -774,14 +775,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, - size_t pagesize) + RAMBlock *rb) { assert(0); return -1; } int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { assert(0); return -1; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 587a8b86a7..77ea0fd264 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms, * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, - size_t pagesize); + RAMBlock *rb); /* * Place a zero page at (host) atomically * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize); + RAMBlock *rb); /* The current postcopy state is read/set by postcopy_state_get/set * which update it atomically. diff --git a/migration/ram.c b/migration/ram.c index 1b19a899c9..bd4fc5ceb4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2753,10 +2753,10 @@ static int ram_load_postcopy(QEMUFile *f) if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, - block->page_size); + block); } else { ret = postcopy_place_page(mis, place_dest, - place_source, block->page_size); + place_source, block); } } if (!ret) { From 727b9d7e4926755e14d9ac2b09777c51cccb9b80 Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Thu, 5 Oct 2017 14:13:19 +0300 Subject: [PATCH 18/21] migration: introduce qemu_ufd_copy_ioctl helper Just for placing auxilary operations inside helper, auxilary operations like: track received pages, notify about copying operation in futher patches. Reviewed-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Signed-off-by: Alexey Perevalov Signed-off-by: Juan Quintela --- migration/postcopy-ram.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index d3073b93b5..8bf6432567 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -641,6 +641,25 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return 0; } +static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, + void *from_addr, uint64_t pagesize) +{ + if (from_addr) { + struct uffdio_copy copy_struct; + copy_struct.dst = (uint64_t)(uintptr_t)host_addr; + copy_struct.src = (uint64_t)(uintptr_t)from_addr; + copy_struct.len = pagesize; + copy_struct.mode = 0; + return ioctl(userfault_fd, UFFDIO_COPY, ©_struct); + } else { + struct uffdio_zeropage zero_struct; + zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; + zero_struct.range.len = pagesize; + zero_struct.mode = 0; + return ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct); + } +} + /* * Place a host page (from) at (host) atomically * returns 0 on success @@ -648,20 +667,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, RAMBlock *rb) { - struct uffdio_copy copy_struct; size_t pagesize = qemu_ram_pagesize(rb); - copy_struct.dst = (uint64_t)(uintptr_t)host; - copy_struct.src = (uint64_t)(uintptr_t)from; - copy_struct.len = pagesize; - copy_struct.mode = 0; - /* copy also acks to the kernel waking the stalled thread up * TODO: We can inhibit that ack and only do it if it was requested * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ - if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { + if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -683,12 +696,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { - struct uffdio_zeropage zero_struct; - zero_struct.range.start = (uint64_t)(uintptr_t)host; - zero_struct.range.len = getpagesize(); - zero_struct.mode = 0; - - if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { + if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); From f9494614898f46e59bc2243de6fb11ebbfc9cda6 Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Thu, 5 Oct 2017 14:13:20 +0300 Subject: [PATCH 19/21] migration: add bitmap for received page This patch adds ability to track down already received pages, it's necessary for calculation vCPU block time in postcopy migration feature, and for recovery after postcopy migration failure. Also it's necessary to solve shared memory issue in postcopy livemigration. Information about received pages will be transferred to the software virtual bridge (e.g. OVS-VSWITCHD), to avoid fallocate (unmap) for already received pages. fallocate syscall is required for remmaped shared memory, due to remmaping itself blocks ioctl(UFFDIO_COPY, ioctl in this case will end with EEXIT error (struct page is exists after remmap). Bitmap is placed into RAMBlock as another postcopy/precopy related bitmaps. Reviewed-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Signed-off-by: Alexey Perevalov Signed-off-by: Juan Quintela --- include/exec/ram_addr.h | 10 ++++++++++ migration/postcopy-ram.c | 17 ++++++++++++----- migration/ram.c | 40 ++++++++++++++++++++++++++++++++++++++++ migration/ram.h | 5 +++++ 4 files changed, 67 insertions(+), 5 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index d017639f7e..6cbc02aa0f 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -47,6 +47,8 @@ struct RAMBlock { * of the postcopy phase */ unsigned long *unsentmap; + /* bitmap of already received pages in postcopy */ + unsigned long *receivedmap; }; static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) return (char *)block->host + offset; } +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, + RAMBlock *rb) +{ + uint64_t host_addr_offset = + (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); + return host_addr_offset >> TARGET_PAGE_BITS; +} + long qemu_getrampagesize(void); unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 8bf6432567..bec6c2c66b 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -642,22 +642,28 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, - void *from_addr, uint64_t pagesize) + void *from_addr, uint64_t pagesize, RAMBlock *rb) { + int ret; if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; copy_struct.src = (uint64_t)(uintptr_t)from_addr; copy_struct.len = pagesize; copy_struct.mode = 0; - return ioctl(userfault_fd, UFFDIO_COPY, ©_struct); + ret = ioctl(userfault_fd, UFFDIO_COPY, ©_struct); } else { struct uffdio_zeropage zero_struct; zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; zero_struct.range.len = pagesize; zero_struct.mode = 0; - return ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct); + ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct); } + if (!ret) { + ramblock_recv_bitmap_set_range(rb, host_addr, + pagesize / qemu_target_page_size()); + } + return ret; } /* @@ -674,7 +680,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ - if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize)) { + if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -696,7 +702,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, trace_postcopy_place_page_zero(host); if (qemu_ram_pagesize(rb) == getpagesize()) { - if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize())) { + if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), + rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); diff --git a/migration/ram.c b/migration/ram.c index bd4fc5ceb4..7f6327f708 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -45,6 +45,7 @@ #include "qapi/qmp/qerror.h" #include "trace.h" #include "exec/ram_addr.h" +#include "exec/target_page.h" #include "qemu/rcu_queue.h" #include "migration/colo.h" #include "migration/block.h" @@ -158,6 +159,35 @@ out: return ret; } +static void ramblock_recv_map_init(void) +{ + RAMBlock *rb; + + RAMBLOCK_FOREACH(rb) { + assert(!rb->receivedmap); + rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); + } +} + +int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr) +{ + return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), + rb->receivedmap); +} + +void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr) +{ + set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap); +} + +void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, + size_t nr) +{ + bitmap_set_atomic(rb->receivedmap, + ramblock_recv_bitmap_offset(host_addr, rb), + nr); +} + /* * An outstanding page request, on the source, having been received * and queued @@ -2021,6 +2051,8 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) goto err; } + bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), + length >> qemu_target_page_bits()); ret = ram_block_discard_range(rb, start, length); err: @@ -2607,13 +2639,20 @@ static int ram_load_setup(QEMUFile *f, void *opaque) { xbzrle_load_setup(); compress_threads_load_setup(); + ramblock_recv_map_init(); return 0; } static int ram_load_cleanup(void *opaque) { + RAMBlock *rb; xbzrle_load_cleanup(); compress_threads_load_cleanup(); + + RAMBLOCK_FOREACH(rb) { + g_free(rb->receivedmap); + rb->receivedmap = NULL; + } return 0; } @@ -2828,6 +2867,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } + ramblock_recv_bitmap_set(block, host); trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); } diff --git a/migration/ram.h b/migration/ram.h index 511b3dc582..f9f7eef894 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -57,4 +57,9 @@ int ram_discard_range(const char *block_name, uint64_t start, size_t length); int ram_postcopy_incoming_init(MigrationIncomingState *mis); void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); + +int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); +void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); +void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); + #endif From 40a5532f820e26f98d081a49aff9283cd63bd5fa Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 27 Sep 2017 10:52:11 +0200 Subject: [PATCH 20/21] qapi: Fix grammar in x-multifd-page-count descriptions Reported-by: Eric Blake Signed-off-by: Juan Quintela --- qapi/migration.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qapi/migration.json b/qapi/migration.json index 272f191551..6ae866e1aa 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -480,7 +480,7 @@ # number of sockets used for migration. The # default value is 2 (since 2.11) # -# @x-multifd-page-count: Number of pages sent together to a thread +# @x-multifd-page-count: Number of pages sent together to a thread. # The default value is 16 (since 2.11) # # Since: 2.4 @@ -551,7 +551,7 @@ # number of sockets used for migration. The # default value is 2 (since 2.11) # -# @x-multifd-page-count: Number of pages sent together to a thread +# @x-multifd-page-count: Number of pages sent together to a thread. # The default value is 16 (since 2.11) # # Since: 2.4 @@ -647,7 +647,7 @@ # number of sockets used for migration. # The default value is 2 (since 2.11) # -# @x-multifd-page-count: Number of pages sent together to a thread +# @x-multifd-page-count: Number of pages sent together to a thread. # The default value is 16 (since 2.11) # # Since: 2.4 From 87db1a7d89677e3dbc8b3763e417b9376009bdbb Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 5 Sep 2017 12:50:22 +0200 Subject: [PATCH 21/21] migration: Improve migration thread error handling We now report errors also when we finish migration, not only on info migrate. We plan to use this error from several places, and we want the first error to happen to win, so we add an mutex to order it. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert --- migration/migration.c | 19 ++++++++++++++++--- migration/migration.h | 7 ++++++- migration/tls.c | 1 - 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index a058f8b46d..62761d5705 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1084,19 +1084,30 @@ static void migrate_fd_cleanup(void *opaque) MIGRATION_STATUS_CANCELLED); } + if (s->error) { + /* It is used on info migrate. We can't free it */ + error_report_err(error_copy(s->error)); + } notifier_list_notify(&migration_state_notifiers, s); block_cleanup_parameters(s); } +void migrate_set_error(MigrationState *s, const Error *error) +{ + qemu_mutex_lock(&s->error_mutex); + if (!s->error) { + s->error = error_copy(error); + } + qemu_mutex_unlock(&s->error_mutex); +} + void migrate_fd_error(MigrationState *s, const Error *error) { trace_migrate_fd_error(error_get_pretty(error)); assert(s->to_dst_file == NULL); migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); - if (!s->error) { - s->error = error_copy(error); - } + migrate_set_error(s, error); notifier_list_notify(&migration_state_notifiers, s); block_cleanup_parameters(s); } @@ -2427,6 +2438,7 @@ static void migration_instance_finalize(Object *obj) MigrationState *ms = MIGRATION_OBJ(obj); MigrationParameters *params = &ms->parameters; + qemu_mutex_destroy(&ms->error_mutex); g_free(params->tls_hostname); g_free(params->tls_creds); qemu_sem_destroy(&ms->pause_sem); @@ -2441,6 +2453,7 @@ static void migration_instance_init(Object *obj) ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE; ms->mbps = -1; qemu_sem_init(&ms->pause_sem, 0); + qemu_mutex_init(&ms->error_mutex); params->tls_hostname = g_strdup(""); params->tls_creds = g_strdup(""); diff --git a/migration/migration.h b/migration/migration.h index cd988a99b9..8ccdd7a577 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -132,8 +132,12 @@ struct MigrationState int64_t colo_checkpoint_time; QEMUTimer *colo_delay_timer; - /* The last error that occurred */ + /* The first error that has occurred. + We used the mutex to be able to return the 1st error message */ Error *error; + /* mutex to protect errp */ + QemuMutex error_mutex; + /* Do we have to clean up -b/-i from old migrate parameters */ /* This feature is deprecated and will be removed */ bool must_remove_block_options; @@ -162,6 +166,7 @@ bool migration_has_all_channels(void); uint64_t migrate_max_downtime(void); +void migrate_set_error(MigrationState *s, const Error *error); void migrate_fd_error(MigrationState *s, const Error *error); void migrate_fd_connect(MigrationState *s); diff --git a/migration/tls.c b/migration/tls.c index 596e8790bd..026a008667 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -119,7 +119,6 @@ static void migration_tls_outgoing_handshake(QIOTask *task, if (qio_task_propagate_error(task, &err)) { trace_migration_tls_outgoing_handshake_error(error_get_pretty(err)); migrate_fd_error(s, err); - error_free(err); } else { trace_migration_tls_outgoing_handshake_complete(); migration_channel_connect(s, ioc, NULL);