From bdf46d6478df86d5810048459582dc8f5a15b064 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 4 Feb 2016 22:50:30 +0000 Subject: [PATCH 1/6] migration: reorder code to make it symmetric In qemu_savevm_state_complete_precopy(), it iterates on each device to add a json object and transfer related status to destination, while the order of the last two steps could be refined. Current order: json_start_object() save_section_header() vmstate_save() json_end_object() save_section_footer() After the change: json_start_object() save_section_header() vmstate_save() save_section_footer() json_end_object() This patch reorder the code to to make it symmetric. No functional change. Signed-off-by: Wei Yang Reviewed-by: Amit Shah Message-Id: <1454626230-16334-1-git-send-email-richard.weiyang@gmail.com> Signed-off-by: Amit Shah --- migration/savevm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 94f2894243..02e8487441 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1088,12 +1088,11 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) json_prop_int(vmdesc, "instance_id", se->instance_id); save_section_header(f, se, QEMU_VM_SECTION_FULL); - vmstate_save(f, se, vmdesc); - - json_end_object(vmdesc); trace_savevm_section_end(se->idstr, se->section_id, 0); save_section_footer(f, se); + + json_end_object(vmdesc); } if (!in_postcopy) { From d8b9d7719cf6fb3186ecc817b3c04005eb1a1f01 Mon Sep 17 00:00:00 2001 From: Matthew Fortune Date: Tue, 23 Feb 2016 16:09:15 +0000 Subject: [PATCH 2/6] migration/postcopy-ram: Guard use of sys/eventfd.h with CONFIG_EVENTFD sys/eventfd.h was being guarded only by a check for linux but does not exist on older distributions like CentOS 5. Move the include into the code that uses it and add an appropriate guard. Signed-off-by: Matthew Fortune Reviewed-by: Juan Quintela Message-Id: <6D39441BF12EF246A7ABCE6654B023536BB85DEB@hhmail02.hh.imgtec.org> Signed-off-by: Amit Shah --- migration/postcopy-ram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 254c629d48..fbd0064fce 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -52,14 +52,14 @@ struct PostcopyDiscardState { #if defined(__linux__) #include -#include #include #include #include #include /* for __u64 */ #endif -#if defined(__linux__) && defined(__NR_userfaultfd) +#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) +#include #include static bool ufd_version_check(int ufd) From a609ad8b6924c0476e3c3d4b8e9079974d3c6ecf Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 25 Feb 2016 10:47:49 +0100 Subject: [PATCH 3/6] MAINTAINERS: Add docs/migration.txt to the "Migration" section Signed-off-by: Thomas Huth Reviewed-by: Amit Shah Message-Id: <1456393669-20678-1-git-send-email-thuth@redhat.com> Signed-off-by: Amit Shah --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 606d9c08b5..5fafa815ad 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1241,6 +1241,7 @@ F: include/migration/ F: migration/ F: scripts/vmstate-static-checker.py F: tests/vmstate-static-checker-data/ +F: docs/migration.txt Seccomp M: Eduardo Otubo From 8da5ef579f956857ca1887ce0bc5e189dd9de0c4 Mon Sep 17 00:00:00 2001 From: Sascha Silbe Date: Fri, 26 Feb 2016 09:18:13 +0100 Subject: [PATCH 4/6] migration/vmstate: document VMStateFlags The VMState API is rather sparsely documented. Start by describing the meaning of all VMStateFlags. Reviewed-by: Amit Shah Reviewed-by: Juan Quintela Signed-off-by: Sascha Silbe Message-Id: <1456474693-11662-1-git-send-email-silbe@linux.vnet.ibm.com> Signed-off-by: Amit Shah --- include/migration/vmstate.h | 100 ++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 10 deletions(-) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 7246f29afe..84ee355ceb 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -88,21 +88,101 @@ struct VMStateInfo { }; enum VMStateFlags { + /* Ignored */ VMS_SINGLE = 0x001, + + /* The struct member at opaque + VMStateField.offset is a pointer + * to the actual field (e.g. struct a { uint8_t *b; + * }). Dereference the pointer before using it as basis for + * further pointer arithmetic (see e.g. VMS_ARRAY). Does not + * affect the meaning of VMStateField.num_offset or + * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for + * those. */ VMS_POINTER = 0x002, + + /* The field is an array of fixed size. VMStateField.num contains + * the number of entries in the array. The size of each entry is + * given by VMStateField.size and / or opaque + + * VMStateField.size_offset; see VMS_VBUFFER and + * VMS_MULTIPLY. Each array entry will be processed individually + * (VMStateField.info.get()/put() if VMS_STRUCT is not set, + * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not + * be combined with VMS_VARRAY*. */ VMS_ARRAY = 0x004, + + /* The field is itself a struct, containing one or more + * fields. Recurse into VMStateField.vmsd. Most useful in + * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each + * array entry. */ VMS_STRUCT = 0x008, - VMS_VARRAY_INT32 = 0x010, /* Array with size in int32_t field*/ - VMS_BUFFER = 0x020, /* static sized buffer */ + + /* The field is an array of variable size. The int32_t at opaque + + * VMStateField.num_offset contains the number of entries in the + * array. See the VMS_ARRAY description regarding array handling + * in general. May not be combined with VMS_ARRAY or any other + * VMS_VARRAY*. */ + VMS_VARRAY_INT32 = 0x010, + + /* Ignored */ + VMS_BUFFER = 0x020, + + /* The field is a (fixed-size or variable-size) array of pointers + * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry + * before using it. Note: Does not imply any one of VMS_ARRAY / + * VMS_VARRAY*; these need to be set explicitly. */ VMS_ARRAY_OF_POINTER = 0x040, - VMS_VARRAY_UINT16 = 0x080, /* Array with size in uint16_t field */ - VMS_VBUFFER = 0x100, /* Buffer with size in int32_t field */ - VMS_MULTIPLY = 0x200, /* multiply "size" field by field_size */ - VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/ - VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/ - VMS_MUST_EXIST = 0x1000, /* Field must exist in input */ - VMS_ALLOC = 0x2000, /* Alloc a buffer on the destination */ - VMS_MULTIPLY_ELEMENTS = 0x4000, /* multiply varray size by field->num */ + + /* The field is an array of variable size. The uint16_t at opaque + * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT16 = 0x080, + + /* The size of the individual entries (a single array entry if + * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if + * neither is set) is variable (i.e. not known at compile-time), + * but the same for all entries. Use the int32_t at opaque + + * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine + * the size of each (and every) entry. */ + VMS_VBUFFER = 0x100, + + /* Multiply the entry size given by the int32_t at opaque + + * VMStateField.size_offset (see VMS_VBUFFER description) with + * VMStateField.size to determine the number of bytes to be + * allocated. Only valid in combination with VMS_VBUFFER. */ + VMS_MULTIPLY = 0x200, + + /* The field is an array of variable size. The uint8_t at opaque + + * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT8 = 0x400, + + /* The field is an array of variable size. The uint32_t at opaque + * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT32 = 0x800, + + /* Fail loading the serialised VM state if this field is missing + * from the input. */ + VMS_MUST_EXIST = 0x1000, + + /* When loading serialised VM state, allocate memory for the + * (entire) field. Only valid in combination with + * VMS_POINTER. Note: Not all combinations with other flags are + * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't + * cause the individual entries to be allocated. */ + VMS_ALLOC = 0x2000, + + /* Multiply the number of entries given by the integer at opaque + + * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num + * to determine the number of entries in the array. Only valid in + * combination with one of VMS_VARRAY*. */ + VMS_MULTIPLY_ELEMENTS = 0x4000, }; typedef struct { From 0aa6aefc9c93db1f64e3ba406ee5234da75b545b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 24 Feb 2016 11:53:38 +0300 Subject: [PATCH 5/6] migration (ordinary): move bdrv_invalidate_cache_all of of coroutine context There is a possibility to hit an assert in qcow2_get_specific_info that s->qcow_version is undefined. This happens when VM in starting from suspended state, i.e. it processes incoming migration, and in the same time 'info block' is called. The problem is that qcow2_invalidate_cache() closes the image and memset()s BDRVQcowState in the middle. The patch moves processing of bdrv_invalidate_cache_all out of coroutine context for standard migration to avoid that. Signed-off-by: Denis V. Lunev Reviewed-by: Fam Zheng CC: Paolo Bonzini CC: Juan Quintela CC: Amit Shah Message-Id: <1456304019-10507-2-git-send-email-den@openvz.org> [Amit: Fix a use-after-free bug] Signed-off-by: Amit Shah --- include/migration/migration.h | 2 + migration/migration.c | 89 +++++++++++++++++++---------------- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index 85b6026d10..ac2c12c2a5 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -104,6 +104,8 @@ struct MigrationIncomingState { QemuMutex rp_mutex; /* We send replies from multiple threads */ void *postcopy_tmp_page; + QEMUBH *bh; + int state; /* See savevm.c */ LoadStateEntry_Head loadvm_handlers; diff --git a/migration/migration.c b/migration/migration.c index fc5e50b0be..0129d9f420 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -323,10 +323,56 @@ void qemu_start_incoming_migration(const char *uri, Error **errp) } } +static void process_incoming_migration_bh(void *opaque) +{ + Error *local_err = NULL; + MigrationIncomingState *mis = opaque; + + /* Make sure all file formats flush their mutable metadata */ + bdrv_invalidate_cache_all(&local_err); + if (local_err) { + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_FAILED); + error_report_err(local_err); + migrate_decompress_threads_join(); + exit(EXIT_FAILURE); + } + + /* + * This must happen after all error conditions are dealt with and + * we're sure the VM is going to be running on this host. + */ + qemu_announce_self(); + + /* If global state section was not received or we are in running + state, we need to obey autostart. Any other state is set with + runstate_set. */ + + if (!global_state_received() || + global_state_get_runstate() == RUN_STATE_RUNNING) { + if (autostart) { + vm_start(); + } else { + runstate_set(RUN_STATE_PAUSED); + } + } else { + runstate_set(global_state_get_runstate()); + } + migrate_decompress_threads_join(); + /* + * This must happen after any state changes since as soon as an external + * observer sees this event they might start to prod at the VM assuming + * it's ready to use. + */ + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_COMPLETED); + qemu_bh_delete(mis->bh); + migration_incoming_state_destroy(); +} + static void process_incoming_migration_co(void *opaque) { QEMUFile *f = opaque; - Error *local_err = NULL; MigrationIncomingState *mis; PostcopyState ps; int ret; @@ -369,45 +415,8 @@ static void process_incoming_migration_co(void *opaque) exit(EXIT_FAILURE); } - /* Make sure all file formats flush their mutable metadata */ - bdrv_invalidate_cache_all(&local_err); - if (local_err) { - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_FAILED); - error_report_err(local_err); - migrate_decompress_threads_join(); - exit(EXIT_FAILURE); - } - - /* - * This must happen after all error conditions are dealt with and - * we're sure the VM is going to be running on this host. - */ - qemu_announce_self(); - - /* If global state section was not received or we are in running - state, we need to obey autostart. Any other state is set with - runstate_set. */ - - if (!global_state_received() || - global_state_get_runstate() == RUN_STATE_RUNNING) { - if (autostart) { - vm_start(); - } else { - runstate_set(RUN_STATE_PAUSED); - } - } else { - runstate_set(global_state_get_runstate()); - } - migrate_decompress_threads_join(); - /* - * This must happen after any state changes since as soon as an external - * observer sees this event they might start to prod at the VM assuming - * it's ready to use. - */ - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_COMPLETED); - migration_incoming_state_destroy(); + mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); + qemu_bh_schedule(mis->bh); } void process_incoming_migration(QEMUFile *f) From ea6a55bcc0d144ac5086cebf7f84afa7071afe90 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 24 Feb 2016 11:53:39 +0300 Subject: [PATCH 6/6] migration (postcopy): move bdrv_invalidate_cache_all of of coroutine context There is a possibility to hit an assert in qcow2_get_specific_info that s->qcow_version is undefined. This happens when VM in starting from suspended state, i.e. it processes incoming migration, and in the same time 'info block' is called. The problem is that qcow2_invalidate_cache() closes the image and memset()s BDRVQcowState in the middle. The patch moves processing of bdrv_invalidate_cache_all out of coroutine context for postcopy migration to avoid that. This function is called with the following stack: process_incoming_migration_co qemu_loadvm_state qemu_loadvm_state_main loadvm_process_command loadvm_postcopy_handle_run Signed-off-by: Denis V. Lunev Tested-by: Dr. David Alan Gilbert Reviewed-by: Fam Zheng CC: Paolo Bonzini CC: Juan Quintela CC: Amit Shah Message-Id: <1456304019-10507-3-git-send-email-den@openvz.org> Signed-off-by: Amit Shah --- migration/savevm.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 02e8487441..b45915612f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1495,17 +1495,10 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) return 0; } -/* After all discards we can start running and asking for pages */ -static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) +static void loadvm_postcopy_handle_run_bh(void *opaque) { - PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); Error *local_err = NULL; - - trace_loadvm_postcopy_handle_run(); - if (ps != POSTCOPY_INCOMING_LISTENING) { - error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps); - return -1; - } + MigrationIncomingState *mis = opaque; /* TODO we should move all of this lot into postcopy_ram.c or a shared code * in migration.c @@ -1518,7 +1511,6 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) bdrv_invalidate_cache_all(&local_err); if (local_err) { error_report_err(local_err); - return -1; } trace_loadvm_postcopy_handle_run_cpu_sync(); @@ -1534,6 +1526,23 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) runstate_set(RUN_STATE_PAUSED); } + qemu_bh_delete(mis->bh); +} + +/* After all discards we can start running and asking for pages */ +static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) +{ + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); + + trace_loadvm_postcopy_handle_run(); + if (ps != POSTCOPY_INCOMING_LISTENING) { + error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps); + return -1; + } + + mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, NULL); + qemu_bh_schedule(mis->bh); + /* We need to finish reading the stream from the package * and also stop reading anything more from the stream that loaded the * package (since it's now being read by the listener thread).