From ed1f3e0090069dcb9458aa9e450df12bf8eba0b0 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 13 Oct 2015 12:21:27 +0100 Subject: [PATCH 1/3] Migration: Generate the completed event only when we complete The current migration-completed event is generated a bit too early, which means that an eager libvirt that's ready to go as soon as it sees the event ends up racing with the actual end of migration. This corresponds to RH bug: https://bugzilla.redhat.com/show_bug.cgi?id=1271145 Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Reviewed-by: Amit Shah xSigned-off-by: Juan Quintela --- migration/migration.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index b7de9b7b3f..3d40f24556 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -294,12 +294,12 @@ static void process_incoming_migration_co(void *opaque) migrate_decompress_threads_join(); exit(EXIT_FAILURE); } - migrate_generate_event(MIGRATION_STATUS_COMPLETED); qemu_announce_self(); /* Make sure all file formats flush their mutable metadata */ bdrv_invalidate_cache_all(&local_err); if (local_err) { + migrate_generate_event(MIGRATION_STATUS_FAILED); error_report_err(local_err); migrate_decompress_threads_join(); exit(EXIT_FAILURE); @@ -320,6 +320,12 @@ static void process_incoming_migration_co(void *opaque) runstate_set(global_state_get_runstate()); } migrate_decompress_threads_join(); + /* + * This must happen after any state changes since as soon as an external + * observer sees this event they might start to prod at the VM assuming + * it's ready to use. + */ + migrate_generate_event(MIGRATION_STATUS_COMPLETED); } void process_incoming_migration(QEMUFile *f) From 92e3762237475407fe03e1ccac6e30612ab96caf Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Wed, 14 Oct 2015 17:37:19 +0530 Subject: [PATCH 2/3] migration: announce VM's new home just before VM is runnable We were announcing the dest host's IP as our new IP a bit too soon -- if there were errors detected after this announcement was done, the migration is failed and the VM could continue running on the src host -- causing problems later. Move around the qemu_announce_self() call so it's done just before the VM is runnable. Signed-off-by: Amit Shah Reviewed-by: Juan Quintela Reviewed-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- migration/migration.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 3d40f24556..b092f386b4 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -294,7 +294,6 @@ static void process_incoming_migration_co(void *opaque) migrate_decompress_threads_join(); exit(EXIT_FAILURE); } - qemu_announce_self(); /* Make sure all file formats flush their mutable metadata */ bdrv_invalidate_cache_all(&local_err); @@ -305,6 +304,12 @@ static void process_incoming_migration_co(void *opaque) exit(EXIT_FAILURE); } + /* + * This must happen after all error conditions are dealt with and + * we're sure the VM is going to be running on this host. + */ + qemu_announce_self(); + /* If global state section was not received or we are in running state, we need to obey autostart. Any other state is set with runstate_set. */ From 60be6340796e66b5ac8aac2d98dde5c79336a89c Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 28 Sep 2015 14:41:58 +0300 Subject: [PATCH 3/3] migration: fix deadlock Release qemu global mutex before call synchronize_rcu(). synchronize_rcu() waiting for all readers to finish their critical sections. There is at least one critical section in which we try to get QGM (critical section is in address_space_rw() and prepare_mmio_access() is trying to aquire QGM). Both functions (migration_end() and migration_bitmap_extend()) are called from main thread which is holding QGM. Thus there is a race condition that ends up with deadlock: main thread working thread Lock QGA | | Call KVM_EXIT_IO handler | | | Open rcu reader's critical section Migration cleanup bh | | | synchronize_rcu() is | waiting for readers | | prepare_mmio_access() is waiting for QGM \ / deadlock The patch changes bitmap freeing from direct g_free after synchronize_rcu to free inside call_rcu. Signed-off-by: Denis V. Lunev Reported-by: Igor Redko Tested-by: Igor Redko Reviewed-by: Paolo Bonzini Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela CC: Anna Melekhova CC: Juan Quintela CC: Amit Shah CC: Paolo Bonzini CC: Wen Congyang --- migration/ram.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 2d1d0b99e4..a25bcc7db1 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -219,7 +219,6 @@ static RAMBlock *last_seen_block; /* This is the last block from where we have sent data */ static RAMBlock *last_sent_block; static ram_addr_t last_offset; -static unsigned long *migration_bitmap; static QemuMutex migration_bitmap_mutex; static uint64_t migration_dirty_pages; static uint32_t last_version; @@ -236,6 +235,11 @@ struct PageSearchStatus { }; typedef struct PageSearchStatus PageSearchStatus; +static struct BitmapRcu { + struct rcu_head rcu; + unsigned long *bmap; +} *migration_bitmap_rcu; + struct CompressParam { bool start; bool done; @@ -540,7 +544,7 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, unsigned long next; - bitmap = atomic_rcu_read(&migration_bitmap); + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; if (ram_bulk_stage && nr > base) { next = nr + 1; } else { @@ -558,7 +562,7 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) { unsigned long *bitmap; - bitmap = atomic_rcu_read(&migration_bitmap); + bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length); } @@ -1090,17 +1094,22 @@ void free_xbzrle_decoded_buf(void) xbzrle_decoded_buf = NULL; } +static void migration_bitmap_free(struct BitmapRcu *bmap) +{ + g_free(bmap->bmap); + g_free(bmap); +} + static void migration_end(void) { /* caller have hold iothread lock or is in a bh, so there is * no writing race against this migration_bitmap */ - unsigned long *bitmap = migration_bitmap; - atomic_rcu_set(&migration_bitmap, NULL); + struct BitmapRcu *bitmap = migration_bitmap_rcu; + atomic_rcu_set(&migration_bitmap_rcu, NULL); if (bitmap) { memory_global_dirty_log_stop(); - synchronize_rcu(); - g_free(bitmap); + call_rcu(bitmap, migration_bitmap_free, rcu); } XBZRLE_cache_lock(); @@ -1136,9 +1145,10 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) /* called in qemu main thread, so there is * no writing race against this migration_bitmap */ - if (migration_bitmap) { - unsigned long *old_bitmap = migration_bitmap, *bitmap; - bitmap = bitmap_new(new); + if (migration_bitmap_rcu) { + struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap; + bitmap = g_new(struct BitmapRcu, 1); + bitmap->bmap = bitmap_new(new); /* prevent migration_bitmap content from being set bit * by migration_bitmap_sync_range() at the same time. @@ -1146,13 +1156,12 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) * at the same time. */ qemu_mutex_lock(&migration_bitmap_mutex); - bitmap_copy(bitmap, old_bitmap, old); - bitmap_set(bitmap, old, new - old); - atomic_rcu_set(&migration_bitmap, bitmap); + bitmap_copy(bitmap->bmap, old_bitmap->bmap, old); + bitmap_set(bitmap->bmap, old, new - old); + atomic_rcu_set(&migration_bitmap_rcu, bitmap); qemu_mutex_unlock(&migration_bitmap_mutex); migration_dirty_pages += new - old; - synchronize_rcu(); - g_free(old_bitmap); + call_rcu(old_bitmap, migration_bitmap_free, rcu); } } @@ -1210,8 +1219,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) reset_ram_globals(); ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; - migration_bitmap = bitmap_new(ram_bitmap_pages); - bitmap_set(migration_bitmap, 0, ram_bitmap_pages); + migration_bitmap_rcu = g_new(struct BitmapRcu, 1); + migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages); + bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages); /* * Count the total number of pages used by ram blocks not including any