From 56e93d26b85bac76b93211393163c2ebcdee9481 Mon Sep 17 00:00:00 2001
From: Juan Quintela <quintela@redhat.com>
Date: Thu, 7 May 2015 19:33:31 +0200
Subject: [PATCH 01/21] migration: move ram stuff to migration/ram

For historic reasons, ram migration have been on arch_init.c.  Just
split it into migration/ram.c, the same that happened with block.c.

There is only code movement, no changes altogether.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 MAINTAINERS                   |    1 -
 Makefile.target               |    1 +
 arch_init.c                   | 1588 --------------------------------
 include/migration/migration.h |    2 +
 include/sysemu/arch_init.h    |    1 -
 migration/ram.c               | 1639 +++++++++++++++++++++++++++++++++
 trace-events                  |    2 +-
 7 files changed, 1643 insertions(+), 1591 deletions(-)
 create mode 100644 migration/ram.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 4ed82154ce..b1833959d6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1015,7 +1015,6 @@ S: Maintained
 F: include/migration/
 F: migration/
 F: savevm.c
-F: arch_init.c
 F: scripts/vmstate-static-checker.py
 F: tests/vmstate-static-checker-data/
 
diff --git a/Makefile.target b/Makefile.target
index ec5b92cb60..27209a7f0a 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -135,6 +135,7 @@ obj-$(CONFIG_KVM) += kvm-all.o
 obj-y += memory.o savevm.o cputlb.o
 obj-y += memory_mapping.o
 obj-y += dump.o
+obj-y += migration/ram.o
 LIBS := $(libs_softmmu) $(LIBS)
 
 # xen support
diff --git a/arch_init.c b/arch_init.c
index d29447497b..63c44d379a 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -55,14 +55,6 @@
 #include "qemu/host-utils.h"
 #include "qemu/rcu_queue.h"
 
-#ifdef DEBUG_ARCH_INIT
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
 int graphic_height = 768;
@@ -111,24 +103,6 @@ int graphic_depth = 32;
 #endif
 
 const uint32_t arch_type = QEMU_ARCH;
-static bool mig_throttle_on;
-static int dirty_rate_high_cnt;
-static void check_guest_throttling(void);
-
-static uint64_t bitmap_sync_count;
-
-/***********************************************************/
-/* ram save/restore */
-
-#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
-#define RAM_SAVE_FLAG_COMPRESS 0x02
-#define RAM_SAVE_FLAG_MEM_SIZE 0x04
-#define RAM_SAVE_FLAG_PAGE     0x08
-#define RAM_SAVE_FLAG_EOS      0x10
-#define RAM_SAVE_FLAG_CONTINUE 0x20
-#define RAM_SAVE_FLAG_XBZRLE   0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */
-#define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
 
 static struct defconfig_file {
     const char *filename;
@@ -139,8 +113,6 @@ static struct defconfig_file {
     { NULL }, /* end of list */
 };
 
-static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
-
 int qemu_read_default_config_files(bool userconfig)
 {
     int ret;
@@ -159,1517 +131,6 @@ int qemu_read_default_config_files(bool userconfig)
     return 0;
 }
 
-static inline bool is_zero_range(uint8_t *p, uint64_t size)
-{
-    return buffer_find_nonzero_offset(p, size) == size;
-}
-
-/* struct contains XBZRLE cache and a static page
-   used by the compression */
-static struct {
-    /* buffer used for XBZRLE encoding */
-    uint8_t *encoded_buf;
-    /* buffer for storing page content */
-    uint8_t *current_buf;
-    /* Cache for XBZRLE, Protected by lock. */
-    PageCache *cache;
-    QemuMutex lock;
-} XBZRLE;
-
-/* buffer used for XBZRLE decoding */
-static uint8_t *xbzrle_decoded_buf;
-
-static void XBZRLE_cache_lock(void)
-{
-    if (migrate_use_xbzrle())
-        qemu_mutex_lock(&XBZRLE.lock);
-}
-
-static void XBZRLE_cache_unlock(void)
-{
-    if (migrate_use_xbzrle())
-        qemu_mutex_unlock(&XBZRLE.lock);
-}
-
-/*
- * called from qmp_migrate_set_cache_size in main thread, possibly while
- * a migration is in progress.
- * A running migration maybe using the cache and might finish during this
- * call, hence changes to the cache are protected by XBZRLE.lock().
- */
-int64_t xbzrle_cache_resize(int64_t new_size)
-{
-    PageCache *new_cache;
-    int64_t ret;
-
-    if (new_size < TARGET_PAGE_SIZE) {
-        return -1;
-    }
-
-    XBZRLE_cache_lock();
-
-    if (XBZRLE.cache != NULL) {
-        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
-            goto out_new_size;
-        }
-        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
-                                        TARGET_PAGE_SIZE);
-        if (!new_cache) {
-            error_report("Error creating cache");
-            ret = -1;
-            goto out;
-        }
-
-        cache_fini(XBZRLE.cache);
-        XBZRLE.cache = new_cache;
-    }
-
-out_new_size:
-    ret = pow2floor(new_size);
-out:
-    XBZRLE_cache_unlock();
-    return ret;
-}
-
-/* accounting for migration statistics */
-typedef struct AccountingInfo {
-    uint64_t dup_pages;
-    uint64_t skipped_pages;
-    uint64_t norm_pages;
-    uint64_t iterations;
-    uint64_t xbzrle_bytes;
-    uint64_t xbzrle_pages;
-    uint64_t xbzrle_cache_miss;
-    double xbzrle_cache_miss_rate;
-    uint64_t xbzrle_overflows;
-} AccountingInfo;
-
-static AccountingInfo acct_info;
-
-static void acct_clear(void)
-{
-    memset(&acct_info, 0, sizeof(acct_info));
-}
-
-uint64_t dup_mig_bytes_transferred(void)
-{
-    return acct_info.dup_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t dup_mig_pages_transferred(void)
-{
-    return acct_info.dup_pages;
-}
-
-uint64_t skipped_mig_bytes_transferred(void)
-{
-    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t skipped_mig_pages_transferred(void)
-{
-    return acct_info.skipped_pages;
-}
-
-uint64_t norm_mig_bytes_transferred(void)
-{
-    return acct_info.norm_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t norm_mig_pages_transferred(void)
-{
-    return acct_info.norm_pages;
-}
-
-uint64_t xbzrle_mig_bytes_transferred(void)
-{
-    return acct_info.xbzrle_bytes;
-}
-
-uint64_t xbzrle_mig_pages_transferred(void)
-{
-    return acct_info.xbzrle_pages;
-}
-
-uint64_t xbzrle_mig_pages_cache_miss(void)
-{
-    return acct_info.xbzrle_cache_miss;
-}
-
-double xbzrle_mig_cache_miss_rate(void)
-{
-    return acct_info.xbzrle_cache_miss_rate;
-}
-
-uint64_t xbzrle_mig_pages_overflow(void)
-{
-    return acct_info.xbzrle_overflows;
-}
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
-struct CompressParam {
-    bool start;
-    bool done;
-    QEMUFile *file;
-    QemuMutex mutex;
-    QemuCond cond;
-    RAMBlock *block;
-    ram_addr_t offset;
-};
-typedef struct CompressParam CompressParam;
-
-struct DecompressParam {
-    bool start;
-    QemuMutex mutex;
-    QemuCond cond;
-    void *des;
-    uint8 *compbuf;
-    int len;
-};
-typedef struct DecompressParam DecompressParam;
-
-static CompressParam *comp_param;
-static QemuThread *compress_threads;
-/* comp_done_cond is used to wake up the migration thread when
- * one of the compression threads has finished the compression.
- * comp_done_lock is used to co-work with comp_done_cond.
- */
-static QemuMutex *comp_done_lock;
-static QemuCond *comp_done_cond;
-/* The empty QEMUFileOps will be used by file in CompressParam */
-static const QEMUFileOps empty_ops = { };
-
-static bool compression_switch;
-static bool quit_comp_thread;
-static bool quit_decomp_thread;
-static DecompressParam *decomp_param;
-static QemuThread *decompress_threads;
-static uint8_t *compressed_data_buf;
-
-static int do_compress_ram_page(CompressParam *param);
-
-static void *do_data_compress(void *opaque)
-{
-    CompressParam *param = opaque;
-
-    while (!quit_comp_thread) {
-        qemu_mutex_lock(&param->mutex);
-        /* Re-check the quit_comp_thread in case of
-         * terminate_compression_threads is called just before
-         * qemu_mutex_lock(&param->mutex) and after
-         * while(!quit_comp_thread), re-check it here can make
-         * sure the compression thread terminate as expected.
-         */
-        while (!param->start && !quit_comp_thread) {
-            qemu_cond_wait(&param->cond, &param->mutex);
-        }
-        if (!quit_comp_thread) {
-            do_compress_ram_page(param);
-        }
-        param->start = false;
-        qemu_mutex_unlock(&param->mutex);
-
-        qemu_mutex_lock(comp_done_lock);
-        param->done = true;
-        qemu_cond_signal(comp_done_cond);
-        qemu_mutex_unlock(comp_done_lock);
-    }
-
-    return NULL;
-}
-
-static inline void terminate_compression_threads(void)
-{
-    int idx, thread_count;
-
-    thread_count = migrate_compress_threads();
-    quit_comp_thread = true;
-    for (idx = 0; idx < thread_count; idx++) {
-        qemu_mutex_lock(&comp_param[idx].mutex);
-        qemu_cond_signal(&comp_param[idx].cond);
-        qemu_mutex_unlock(&comp_param[idx].mutex);
-    }
-}
-
-void migrate_compress_threads_join(void)
-{
-    int i, thread_count;
-
-    if (!migrate_use_compression()) {
-        return;
-    }
-    terminate_compression_threads();
-    thread_count = migrate_compress_threads();
-    for (i = 0; i < thread_count; i++) {
-        qemu_thread_join(compress_threads + i);
-        qemu_fclose(comp_param[i].file);
-        qemu_mutex_destroy(&comp_param[i].mutex);
-        qemu_cond_destroy(&comp_param[i].cond);
-    }
-    qemu_mutex_destroy(comp_done_lock);
-    qemu_cond_destroy(comp_done_cond);
-    g_free(compress_threads);
-    g_free(comp_param);
-    g_free(comp_done_cond);
-    g_free(comp_done_lock);
-    compress_threads = NULL;
-    comp_param = NULL;
-    comp_done_cond = NULL;
-    comp_done_lock = NULL;
-}
-
-void migrate_compress_threads_create(void)
-{
-    int i, thread_count;
-
-    if (!migrate_use_compression()) {
-        return;
-    }
-    quit_comp_thread = false;
-    compression_switch = true;
-    thread_count = migrate_compress_threads();
-    compress_threads = g_new0(QemuThread, thread_count);
-    comp_param = g_new0(CompressParam, thread_count);
-    comp_done_cond = g_new0(QemuCond, 1);
-    comp_done_lock = g_new0(QemuMutex, 1);
-    qemu_cond_init(comp_done_cond);
-    qemu_mutex_init(comp_done_lock);
-    for (i = 0; i < thread_count; i++) {
-        /* com_param[i].file is just used as a dummy buffer to save data, set
-         * it's ops to empty.
-         */
-        comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
-        comp_param[i].done = true;
-        qemu_mutex_init(&comp_param[i].mutex);
-        qemu_cond_init(&comp_param[i].cond);
-        qemu_thread_create(compress_threads + i, "compress",
-                           do_data_compress, comp_param + i,
-                           QEMU_THREAD_JOINABLE);
-    }
-}
-
-/**
- * save_page_header: Write page header to wire
- *
- * If this is the 1st block, it also writes the block identification
- *
- * Returns: Number of bytes written
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- *          in the lower bits, it contains flags
- */
-static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
-{
-    size_t size;
-
-    qemu_put_be64(f, offset);
-    size = 8;
-
-    if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
-        qemu_put_byte(f, strlen(block->idstr));
-        qemu_put_buffer(f, (uint8_t *)block->idstr,
-                        strlen(block->idstr));
-        size += 1 + strlen(block->idstr);
-    }
-    return size;
-}
-
-/* Update the xbzrle cache to reflect a page that's been sent as all 0.
- * The important thing is that a stale (not-yet-0'd) page be replaced
- * by the new data.
- * As a bonus, if the page wasn't in the cache it gets added so that
- * when a small write is made into the 0'd page it gets XBZRLE sent
- */
-static void xbzrle_cache_zero_page(ram_addr_t current_addr)
-{
-    if (ram_bulk_stage || !migrate_use_xbzrle()) {
-        return;
-    }
-
-    /* We don't care if this fails to allocate a new cache page
-     * as long as it updated an old one */
-    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
-                 bitmap_sync_count);
-}
-
-#define ENCODING_FLAG_XBZRLE 0x1
-
-/**
- * save_xbzrle_page: compress and send current page
- *
- * Returns: 1 means that we wrote the page
- *          0 means that page is identical to the one already sent
- *          -1 means that xbzrle would be longer than normal
- *
- * @f: QEMUFile where to send the data
- * @current_data:
- * @current_addr:
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
-                            ram_addr_t current_addr, RAMBlock *block,
-                            ram_addr_t offset, bool last_stage,
-                            uint64_t *bytes_transferred)
-{
-    int encoded_len = 0, bytes_xbzrle;
-    uint8_t *prev_cached_page;
-
-    if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
-        acct_info.xbzrle_cache_miss++;
-        if (!last_stage) {
-            if (cache_insert(XBZRLE.cache, current_addr, *current_data,
-                             bitmap_sync_count) == -1) {
-                return -1;
-            } else {
-                /* update *current_data when the page has been
-                   inserted into cache */
-                *current_data = get_cached_data(XBZRLE.cache, current_addr);
-            }
-        }
-        return -1;
-    }
-
-    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
-
-    /* save current buffer into memory */
-    memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
-
-    /* XBZRLE encoding (if there is no overflow) */
-    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-                                       TARGET_PAGE_SIZE);
-    if (encoded_len == 0) {
-        DPRINTF("Skipping unmodified page\n");
-        return 0;
-    } else if (encoded_len == -1) {
-        DPRINTF("Overflow\n");
-        acct_info.xbzrle_overflows++;
-        /* update data in the cache */
-        if (!last_stage) {
-            memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
-            *current_data = prev_cached_page;
-        }
-        return -1;
-    }
-
-    /* we need to update the data in the cache, in order to get the same data */
-    if (!last_stage) {
-        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
-    }
-
-    /* Send XBZRLE based compressed page */
-    bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
-    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
-    qemu_put_be16(f, encoded_len);
-    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
-    bytes_xbzrle += encoded_len + 1 + 2;
-    acct_info.xbzrle_pages++;
-    acct_info.xbzrle_bytes += bytes_xbzrle;
-    *bytes_transferred += bytes_xbzrle;
-
-    return 1;
-}
-
-static inline
-ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
-                                                 ram_addr_t start)
-{
-    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
-    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
-    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
-    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
-
-    unsigned long next;
-
-    if (ram_bulk_stage && nr > base) {
-        next = nr + 1;
-    } else {
-        next = find_next_bit(migration_bitmap, size, nr);
-    }
-
-    if (next < size) {
-        clear_bit(next, migration_bitmap);
-        migration_dirty_pages--;
-    }
-    return (next - base) << TARGET_PAGE_BITS;
-}
-
-static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
-{
-    migration_dirty_pages +=
-        cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
-}
-
-
-/* Fix me: there are too many global variables used in migration process. */
-static int64_t start_time;
-static int64_t bytes_xfer_prev;
-static int64_t num_dirty_pages_period;
-static uint64_t xbzrle_cache_miss_prev;
-static uint64_t iterations_prev;
-
-static void migration_bitmap_sync_init(void)
-{
-    start_time = 0;
-    bytes_xfer_prev = 0;
-    num_dirty_pages_period = 0;
-    xbzrle_cache_miss_prev = 0;
-    iterations_prev = 0;
-}
-
-/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
-static void migration_bitmap_sync(void)
-{
-    RAMBlock *block;
-    uint64_t num_dirty_pages_init = migration_dirty_pages;
-    MigrationState *s = migrate_get_current();
-    int64_t end_time;
-    int64_t bytes_xfer_now;
-
-    bitmap_sync_count++;
-
-    if (!bytes_xfer_prev) {
-        bytes_xfer_prev = ram_bytes_transferred();
-    }
-
-    if (!start_time) {
-        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-    }
-
-    trace_migration_bitmap_sync_start();
-    address_space_sync_dirty_bitmap(&address_space_memory);
-
-    rcu_read_lock();
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
-    }
-    rcu_read_unlock();
-
-    trace_migration_bitmap_sync_end(migration_dirty_pages
-                                    - num_dirty_pages_init);
-    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
-    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-
-    /* more than 1 second = 1000 millisecons */
-    if (end_time > start_time + 1000) {
-        if (migrate_auto_converge()) {
-            /* The following detection logic can be refined later. For now:
-               Check to see if the dirtied bytes is 50% more than the approx.
-               amount of bytes that just got transferred since the last time we
-               were in this routine. If that happens >N times (for now N==4)
-               we turn on the throttle down logic */
-            bytes_xfer_now = ram_bytes_transferred();
-            if (s->dirty_pages_rate &&
-               (num_dirty_pages_period * TARGET_PAGE_SIZE >
-                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
-               (dirty_rate_high_cnt++ > 4)) {
-                    trace_migration_throttle();
-                    mig_throttle_on = true;
-                    dirty_rate_high_cnt = 0;
-             }
-             bytes_xfer_prev = bytes_xfer_now;
-        } else {
-             mig_throttle_on = false;
-        }
-        if (migrate_use_xbzrle()) {
-            if (iterations_prev != acct_info.iterations) {
-                acct_info.xbzrle_cache_miss_rate =
-                   (double)(acct_info.xbzrle_cache_miss -
-                            xbzrle_cache_miss_prev) /
-                   (acct_info.iterations - iterations_prev);
-            }
-            iterations_prev = acct_info.iterations;
-            xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
-        }
-        s->dirty_pages_rate = num_dirty_pages_period * 1000
-            / (end_time - start_time);
-        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
-        start_time = end_time;
-        num_dirty_pages_period = 0;
-    }
-    s->dirty_sync_count = bitmap_sync_count;
-}
-
-/**
- * save_zero_page: Send the zero page to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @p: pointer to the page
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
-                          uint8_t *p, uint64_t *bytes_transferred)
-{
-    int pages = -1;
-
-    if (is_zero_range(p, TARGET_PAGE_SIZE)) {
-        acct_info.dup_pages++;
-        *bytes_transferred += save_page_header(f, block,
-                                               offset | RAM_SAVE_FLAG_COMPRESS);
-        qemu_put_byte(f, 0);
-        *bytes_transferred += 1;
-        pages = 1;
-    }
-
-    return pages;
-}
-
-/**
- * ram_save_page: Send the given page to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
-                         bool last_stage, uint64_t *bytes_transferred)
-{
-    int pages = -1;
-    uint64_t bytes_xmit;
-    ram_addr_t current_addr;
-    MemoryRegion *mr = block->mr;
-    uint8_t *p;
-    int ret;
-    bool send_async = true;
-
-    p = memory_region_get_ram_ptr(mr) + offset;
-
-    /* In doubt sent page as normal */
-    bytes_xmit = 0;
-    ret = ram_control_save_page(f, block->offset,
-                           offset, TARGET_PAGE_SIZE, &bytes_xmit);
-    if (bytes_xmit) {
-        *bytes_transferred += bytes_xmit;
-        pages = 1;
-    }
-
-    XBZRLE_cache_lock();
-
-    current_addr = block->offset + offset;
-
-    if (block == last_sent_block) {
-        offset |= RAM_SAVE_FLAG_CONTINUE;
-    }
-    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
-            if (bytes_xmit > 0) {
-                acct_info.norm_pages++;
-            } else if (bytes_xmit == 0) {
-                acct_info.dup_pages++;
-            }
-        }
-    } else {
-        pages = save_zero_page(f, block, offset, p, bytes_transferred);
-        if (pages > 0) {
-            /* Must let xbzrle know, otherwise a previous (now 0'd) cached
-             * page would be stale
-             */
-            xbzrle_cache_zero_page(current_addr);
-        } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
-            pages = save_xbzrle_page(f, &p, current_addr, block,
-                                     offset, last_stage, bytes_transferred);
-            if (!last_stage) {
-                /* Can't send this cached data async, since the cache page
-                 * might get updated before it gets to the wire
-                 */
-                send_async = false;
-            }
-        }
-    }
-
-    /* XBZRLE overflow or normal page */
-    if (pages == -1) {
-        *bytes_transferred += save_page_header(f, block,
-                                               offset | RAM_SAVE_FLAG_PAGE);
-        if (send_async) {
-            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
-        } else {
-            qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-        }
-        *bytes_transferred += TARGET_PAGE_SIZE;
-        pages = 1;
-        acct_info.norm_pages++;
-    }
-
-    XBZRLE_cache_unlock();
-
-    return pages;
-}
-
-static int do_compress_ram_page(CompressParam *param)
-{
-    int bytes_sent, blen;
-    uint8_t *p;
-    RAMBlock *block = param->block;
-    ram_addr_t offset = param->offset;
-
-    p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK);
-
-    bytes_sent = save_page_header(param->file, block, offset |
-                                  RAM_SAVE_FLAG_COMPRESS_PAGE);
-    blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
-                                     migrate_compress_level());
-    bytes_sent += blen;
-
-    return bytes_sent;
-}
-
-static inline void start_compression(CompressParam *param)
-{
-    param->done = false;
-    qemu_mutex_lock(&param->mutex);
-    param->start = true;
-    qemu_cond_signal(&param->cond);
-    qemu_mutex_unlock(&param->mutex);
-}
-
-static inline void start_decompression(DecompressParam *param)
-{
-    qemu_mutex_lock(&param->mutex);
-    param->start = true;
-    qemu_cond_signal(&param->cond);
-    qemu_mutex_unlock(&param->mutex);
-}
-
-static uint64_t bytes_transferred;
-
-static void flush_compressed_data(QEMUFile *f)
-{
-    int idx, len, thread_count;
-
-    if (!migrate_use_compression()) {
-        return;
-    }
-    thread_count = migrate_compress_threads();
-    for (idx = 0; idx < thread_count; idx++) {
-        if (!comp_param[idx].done) {
-            qemu_mutex_lock(comp_done_lock);
-            while (!comp_param[idx].done && !quit_comp_thread) {
-                qemu_cond_wait(comp_done_cond, comp_done_lock);
-            }
-            qemu_mutex_unlock(comp_done_lock);
-        }
-        if (!quit_comp_thread) {
-            len = qemu_put_qemu_file(f, comp_param[idx].file);
-            bytes_transferred += len;
-        }
-    }
-}
-
-static inline void set_compress_params(CompressParam *param, RAMBlock *block,
-                                       ram_addr_t offset)
-{
-    param->block = block;
-    param->offset = offset;
-}
-
-static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
-                                           ram_addr_t offset,
-                                           uint64_t *bytes_transferred)
-{
-    int idx, thread_count, bytes_xmit = -1, pages = -1;
-
-    thread_count = migrate_compress_threads();
-    qemu_mutex_lock(comp_done_lock);
-    while (true) {
-        for (idx = 0; idx < thread_count; idx++) {
-            if (comp_param[idx].done) {
-                bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
-                set_compress_params(&comp_param[idx], block, offset);
-                start_compression(&comp_param[idx]);
-                pages = 1;
-                acct_info.norm_pages++;
-                *bytes_transferred += bytes_xmit;
-                break;
-            }
-        }
-        if (pages > 0) {
-            break;
-        } else {
-            qemu_cond_wait(comp_done_cond, comp_done_lock);
-        }
-    }
-    qemu_mutex_unlock(comp_done_lock);
-
-    return pages;
-}
-
-/**
- * ram_save_compressed_page: compress the given page and send it to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
-                                    ram_addr_t offset, bool last_stage,
-                                    uint64_t *bytes_transferred)
-{
-    int pages = -1;
-    uint64_t bytes_xmit;
-    MemoryRegion *mr = block->mr;
-    uint8_t *p;
-    int ret;
-
-    p = memory_region_get_ram_ptr(mr) + offset;
-
-    bytes_xmit = 0;
-    ret = ram_control_save_page(f, block->offset,
-                                offset, TARGET_PAGE_SIZE, &bytes_xmit);
-    if (bytes_xmit) {
-        *bytes_transferred += bytes_xmit;
-        pages = 1;
-    }
-    if (block == last_sent_block) {
-        offset |= RAM_SAVE_FLAG_CONTINUE;
-    }
-    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
-            if (bytes_xmit > 0) {
-                acct_info.norm_pages++;
-            } else if (bytes_xmit == 0) {
-                acct_info.dup_pages++;
-            }
-        }
-    } else {
-        /* When starting the process of a new block, the first page of
-         * the block should be sent out before other pages in the same
-         * block, and all the pages in last block should have been sent
-         * out, keeping this order is important, because the 'cont' flag
-         * is used to avoid resending the block name.
-         */
-        if (block != last_sent_block) {
-            flush_compressed_data(f);
-            pages = save_zero_page(f, block, offset, p, bytes_transferred);
-            if (pages == -1) {
-                set_compress_params(&comp_param[0], block, offset);
-                /* Use the qemu thread to compress the data to make sure the
-                 * first page is sent out before other pages
-                 */
-                bytes_xmit = do_compress_ram_page(&comp_param[0]);
-                acct_info.norm_pages++;
-                qemu_put_qemu_file(f, comp_param[0].file);
-                *bytes_transferred += bytes_xmit;
-                pages = 1;
-            }
-        } else {
-            pages = save_zero_page(f, block, offset, p, bytes_transferred);
-            if (pages == -1) {
-                pages = compress_page_with_multi_thread(f, block, offset,
-                                                        bytes_transferred);
-            }
-        }
-    }
-
-    return pages;
-}
-
-/**
- * ram_find_and_save_block: Finds a dirty page and sends it to f
- *
- * Called within an RCU critical section.
- *
- * Returns:  The number of pages written
- *           0 means no dirty pages
- *
- * @f: QEMUFile where to send the data
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-
-static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
-                                   uint64_t *bytes_transferred)
-{
-    RAMBlock *block = last_seen_block;
-    ram_addr_t offset = last_offset;
-    bool complete_round = false;
-    int pages = 0;
-    MemoryRegion *mr;
-
-    if (!block)
-        block = QLIST_FIRST_RCU(&ram_list.blocks);
-
-    while (true) {
-        mr = block->mr;
-        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
-        if (complete_round && block == last_seen_block &&
-            offset >= last_offset) {
-            break;
-        }
-        if (offset >= block->used_length) {
-            offset = 0;
-            block = QLIST_NEXT_RCU(block, next);
-            if (!block) {
-                block = QLIST_FIRST_RCU(&ram_list.blocks);
-                complete_round = true;
-                ram_bulk_stage = false;
-                if (migrate_use_xbzrle()) {
-                    /* If xbzrle is on, stop using the data compression at this
-                     * point. In theory, xbzrle can do better than compression.
-                     */
-                    flush_compressed_data(f);
-                    compression_switch = false;
-                }
-            }
-        } else {
-            if (compression_switch && migrate_use_compression()) {
-                pages = ram_save_compressed_page(f, block, offset, last_stage,
-                                                 bytes_transferred);
-            } else {
-                pages = ram_save_page(f, block, offset, last_stage,
-                                      bytes_transferred);
-            }
-
-            /* if page is unmodified, continue to the next */
-            if (pages > 0) {
-                last_sent_block = block;
-                break;
-            }
-        }
-    }
-
-    last_seen_block = block;
-    last_offset = offset;
-
-    return pages;
-}
-
-void acct_update_position(QEMUFile *f, size_t size, bool zero)
-{
-    uint64_t pages = size / TARGET_PAGE_SIZE;
-    if (zero) {
-        acct_info.dup_pages += pages;
-    } else {
-        acct_info.norm_pages += pages;
-        bytes_transferred += size;
-        qemu_update_position(f, size);
-    }
-}
-
-static ram_addr_t ram_save_remaining(void)
-{
-    return migration_dirty_pages;
-}
-
-uint64_t ram_bytes_remaining(void)
-{
-    return ram_save_remaining() * TARGET_PAGE_SIZE;
-}
-
-uint64_t ram_bytes_transferred(void)
-{
-    return bytes_transferred;
-}
-
-uint64_t ram_bytes_total(void)
-{
-    RAMBlock *block;
-    uint64_t total = 0;
-
-    rcu_read_lock();
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
-        total += block->used_length;
-    rcu_read_unlock();
-    return total;
-}
-
-void free_xbzrle_decoded_buf(void)
-{
-    g_free(xbzrle_decoded_buf);
-    xbzrle_decoded_buf = NULL;
-}
-
-static void migration_end(void)
-{
-    if (migration_bitmap) {
-        memory_global_dirty_log_stop();
-        g_free(migration_bitmap);
-        migration_bitmap = NULL;
-    }
-
-    XBZRLE_cache_lock();
-    if (XBZRLE.cache) {
-        cache_fini(XBZRLE.cache);
-        g_free(XBZRLE.encoded_buf);
-        g_free(XBZRLE.current_buf);
-        XBZRLE.cache = NULL;
-        XBZRLE.encoded_buf = NULL;
-        XBZRLE.current_buf = NULL;
-    }
-    XBZRLE_cache_unlock();
-}
-
-static void ram_migration_cancel(void *opaque)
-{
-    migration_end();
-}
-
-static void reset_ram_globals(void)
-{
-    last_seen_block = NULL;
-    last_sent_block = NULL;
-    last_offset = 0;
-    last_version = ram_list.version;
-    ram_bulk_stage = true;
-}
-
-#define MAX_WAIT 50 /* ms, half buffered_file limit */
-
-
-/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
- * long-running RCU critical section.  When rcu-reclaims in the code
- * start to become numerous it will be necessary to reduce the
- * granularity of these critical sections.
- */
-
-static int ram_save_setup(QEMUFile *f, void *opaque)
-{
-    RAMBlock *block;
-    int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
-
-    mig_throttle_on = false;
-    dirty_rate_high_cnt = 0;
-    bitmap_sync_count = 0;
-    migration_bitmap_sync_init();
-
-    if (migrate_use_xbzrle()) {
-        XBZRLE_cache_lock();
-        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
-                                  TARGET_PAGE_SIZE,
-                                  TARGET_PAGE_SIZE);
-        if (!XBZRLE.cache) {
-            XBZRLE_cache_unlock();
-            error_report("Error creating cache");
-            return -1;
-        }
-        XBZRLE_cache_unlock();
-
-        /* We prefer not to abort if there is no memory */
-        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
-        if (!XBZRLE.encoded_buf) {
-            error_report("Error allocating encoded_buf");
-            return -1;
-        }
-
-        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
-        if (!XBZRLE.current_buf) {
-            error_report("Error allocating current_buf");
-            g_free(XBZRLE.encoded_buf);
-            XBZRLE.encoded_buf = NULL;
-            return -1;
-        }
-
-        acct_clear();
-    }
-
-    /* iothread lock needed for ram_list.dirty_memory[] */
-    qemu_mutex_lock_iothread();
-    qemu_mutex_lock_ramlist();
-    rcu_read_lock();
-    bytes_transferred = 0;
-    reset_ram_globals();
-
-    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
-    migration_bitmap = bitmap_new(ram_bitmap_pages);
-    bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
-
-    /*
-     * Count the total number of pages used by ram blocks not including any
-     * gaps due to alignment or unplugs.
-     */
-    migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
-
-    memory_global_dirty_log_start();
-    migration_bitmap_sync();
-    qemu_mutex_unlock_ramlist();
-    qemu_mutex_unlock_iothread();
-
-    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
-
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        qemu_put_byte(f, strlen(block->idstr));
-        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
-        qemu_put_be64(f, block->used_length);
-    }
-
-    rcu_read_unlock();
-
-    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
-    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
-
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
-    return 0;
-}
-
-static int ram_save_iterate(QEMUFile *f, void *opaque)
-{
-    int ret;
-    int i;
-    int64_t t0;
-    int pages_sent = 0;
-
-    rcu_read_lock();
-    if (ram_list.version != last_version) {
-        reset_ram_globals();
-    }
-
-    /* Read version before ram_list.blocks */
-    smp_rmb();
-
-    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
-
-    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    i = 0;
-    while ((ret = qemu_file_rate_limit(f)) == 0) {
-        int pages;
-
-        pages = ram_find_and_save_block(f, false, &bytes_transferred);
-        /* no more pages to sent */
-        if (pages == 0) {
-            break;
-        }
-        pages_sent += pages;
-        acct_info.iterations++;
-        check_guest_throttling();
-        /* we want to check in the 1st loop, just in case it was the 1st time
-           and we had to sync the dirty bitmap.
-           qemu_get_clock_ns() is a bit expensive, so we only check each some
-           iterations
-        */
-        if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
-            if (t1 > MAX_WAIT) {
-                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
-                        t1, i);
-                break;
-            }
-        }
-        i++;
-    }
-    flush_compressed_data(f);
-    rcu_read_unlock();
-
-    /*
-     * Must occur before EOS (or any QEMUFile operation)
-     * because of RDMA protocol.
-     */
-    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
-
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    bytes_transferred += 8;
-
-    ret = qemu_file_get_error(f);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return pages_sent;
-}
-
-/* Called with iothread lock */
-static int ram_save_complete(QEMUFile *f, void *opaque)
-{
-    rcu_read_lock();
-
-    migration_bitmap_sync();
-
-    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
-
-    /* try transferring iterative blocks of memory */
-
-    /* flush all remaining blocks regardless of rate limiting */
-    while (true) {
-        int pages;
-
-        pages = ram_find_and_save_block(f, true, &bytes_transferred);
-        /* no more blocks to sent */
-        if (pages == 0) {
-            break;
-        }
-    }
-
-    flush_compressed_data(f);
-    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
-    migration_end();
-
-    rcu_read_unlock();
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
-    return 0;
-}
-
-static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
-{
-    uint64_t remaining_size;
-
-    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
-
-    if (remaining_size < max_size) {
-        qemu_mutex_lock_iothread();
-        rcu_read_lock();
-        migration_bitmap_sync();
-        rcu_read_unlock();
-        qemu_mutex_unlock_iothread();
-        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
-    }
-    return remaining_size;
-}
-
-static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
-{
-    unsigned int xh_len;
-    int xh_flags;
-
-    if (!xbzrle_decoded_buf) {
-        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
-    }
-
-    /* extract RLE header */
-    xh_flags = qemu_get_byte(f);
-    xh_len = qemu_get_be16(f);
-
-    if (xh_flags != ENCODING_FLAG_XBZRLE) {
-        error_report("Failed to load XBZRLE page - wrong compression!");
-        return -1;
-    }
-
-    if (xh_len > TARGET_PAGE_SIZE) {
-        error_report("Failed to load XBZRLE page - len overflow!");
-        return -1;
-    }
-    /* load data and decode */
-    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
-
-    /* decode RLE */
-    if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
-                             TARGET_PAGE_SIZE) == -1) {
-        error_report("Failed to load XBZRLE page - decode error!");
-        return -1;
-    }
-
-    return 0;
-}
-
-/* Must be called from within a rcu critical section.
- * Returns a pointer from within the RCU-protected ram_list.
- */
-static inline void *host_from_stream_offset(QEMUFile *f,
-                                            ram_addr_t offset,
-                                            int flags)
-{
-    static RAMBlock *block = NULL;
-    char id[256];
-    uint8_t len;
-
-    if (flags & RAM_SAVE_FLAG_CONTINUE) {
-        if (!block || block->max_length <= offset) {
-            error_report("Ack, bad migration stream!");
-            return NULL;
-        }
-
-        return memory_region_get_ram_ptr(block->mr) + offset;
-    }
-
-    len = qemu_get_byte(f);
-    qemu_get_buffer(f, (uint8_t *)id, len);
-    id[len] = 0;
-
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        if (!strncmp(id, block->idstr, sizeof(id)) &&
-            block->max_length > offset) {
-            return memory_region_get_ram_ptr(block->mr) + offset;
-        }
-    }
-
-    error_report("Can't find block %s!", id);
-    return NULL;
-}
-
-/*
- * If a page (or a whole RDMA chunk) has been
- * determined to be zero, then zap it.
- */
-void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
-{
-    if (ch != 0 || !is_zero_range(host, size)) {
-        memset(host, ch, size);
-    }
-}
-
-static void *do_data_decompress(void *opaque)
-{
-    DecompressParam *param = opaque;
-    unsigned long pagesize;
-
-    while (!quit_decomp_thread) {
-        qemu_mutex_lock(&param->mutex);
-        while (!param->start && !quit_decomp_thread) {
-            qemu_cond_wait(&param->cond, &param->mutex);
-            pagesize = TARGET_PAGE_SIZE;
-            if (!quit_decomp_thread) {
-                /* uncompress() will return failed in some case, especially
-                 * when the page is dirted when doing the compression, it's
-                 * not a problem because the dirty page will be retransferred
-                 * and uncompress() won't break the data in other pages.
-                 */
-                uncompress((Bytef *)param->des, &pagesize,
-                           (const Bytef *)param->compbuf, param->len);
-            }
-            param->start = false;
-        }
-        qemu_mutex_unlock(&param->mutex);
-    }
-
-    return NULL;
-}
-
-void migrate_decompress_threads_create(void)
-{
-    int i, thread_count;
-
-    thread_count = migrate_decompress_threads();
-    decompress_threads = g_new0(QemuThread, thread_count);
-    decomp_param = g_new0(DecompressParam, thread_count);
-    compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
-    quit_decomp_thread = false;
-    for (i = 0; i < thread_count; i++) {
-        qemu_mutex_init(&decomp_param[i].mutex);
-        qemu_cond_init(&decomp_param[i].cond);
-        decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
-        qemu_thread_create(decompress_threads + i, "decompress",
-                           do_data_decompress, decomp_param + i,
-                           QEMU_THREAD_JOINABLE);
-    }
-}
-
-void migrate_decompress_threads_join(void)
-{
-    int i, thread_count;
-
-    quit_decomp_thread = true;
-    thread_count = migrate_decompress_threads();
-    for (i = 0; i < thread_count; i++) {
-        qemu_mutex_lock(&decomp_param[i].mutex);
-        qemu_cond_signal(&decomp_param[i].cond);
-        qemu_mutex_unlock(&decomp_param[i].mutex);
-    }
-    for (i = 0; i < thread_count; i++) {
-        qemu_thread_join(decompress_threads + i);
-        qemu_mutex_destroy(&decomp_param[i].mutex);
-        qemu_cond_destroy(&decomp_param[i].cond);
-        g_free(decomp_param[i].compbuf);
-    }
-    g_free(decompress_threads);
-    g_free(decomp_param);
-    g_free(compressed_data_buf);
-    decompress_threads = NULL;
-    decomp_param = NULL;
-    compressed_data_buf = NULL;
-}
-
-static void decompress_data_with_multi_threads(uint8_t *compbuf,
-                                               void *host, int len)
-{
-    int idx, thread_count;
-
-    thread_count = migrate_decompress_threads();
-    while (true) {
-        for (idx = 0; idx < thread_count; idx++) {
-            if (!decomp_param[idx].start) {
-                memcpy(decomp_param[idx].compbuf, compbuf, len);
-                decomp_param[idx].des = host;
-                decomp_param[idx].len = len;
-                start_decompression(&decomp_param[idx]);
-                break;
-            }
-        }
-        if (idx < thread_count) {
-            break;
-        }
-    }
-}
-
-static int ram_load(QEMUFile *f, void *opaque, int version_id)
-{
-    int flags = 0, ret = 0;
-    static uint64_t seq_iter;
-    int len = 0;
-
-    seq_iter++;
-
-    if (version_id != 4) {
-        ret = -EINVAL;
-    }
-
-    /* This RCU critical section can be very long running.
-     * When RCU reclaims in the code start to become numerous,
-     * it will be necessary to reduce the granularity of this
-     * critical section.
-     */
-    rcu_read_lock();
-    while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
-        ram_addr_t addr, total_ram_bytes;
-        void *host;
-        uint8_t ch;
-
-        addr = qemu_get_be64(f);
-        flags = addr & ~TARGET_PAGE_MASK;
-        addr &= TARGET_PAGE_MASK;
-
-        switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
-        case RAM_SAVE_FLAG_MEM_SIZE:
-            /* Synchronize RAM block list */
-            total_ram_bytes = addr;
-            while (!ret && total_ram_bytes) {
-                RAMBlock *block;
-                uint8_t len;
-                char id[256];
-                ram_addr_t length;
-
-                len = qemu_get_byte(f);
-                qemu_get_buffer(f, (uint8_t *)id, len);
-                id[len] = 0;
-                length = qemu_get_be64(f);
-
-                QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-                    if (!strncmp(id, block->idstr, sizeof(id))) {
-                        if (length != block->used_length) {
-                            Error *local_err = NULL;
-
-                            ret = qemu_ram_resize(block->offset, length, &local_err);
-                            if (local_err) {
-                                error_report_err(local_err);
-                            }
-                        }
-                        break;
-                    }
-                }
-
-                if (!block) {
-                    error_report("Unknown ramblock \"%s\", cannot "
-                                 "accept migration", id);
-                    ret = -EINVAL;
-                }
-
-                total_ram_bytes -= length;
-            }
-            break;
-        case RAM_SAVE_FLAG_COMPRESS:
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-            ch = qemu_get_byte(f);
-            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
-            break;
-        case RAM_SAVE_FLAG_PAGE:
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
-            break;
-        case RAM_SAVE_FLAG_COMPRESS_PAGE:
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-
-            len = qemu_get_be32(f);
-            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
-                error_report("Invalid compressed data length: %d", len);
-                ret = -EINVAL;
-                break;
-            }
-            qemu_get_buffer(f, compressed_data_buf, len);
-            decompress_data_with_multi_threads(compressed_data_buf, host, len);
-            break;
-        case RAM_SAVE_FLAG_XBZRLE:
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-            if (load_xbzrle(f, addr, host) < 0) {
-                error_report("Failed to decompress XBZRLE page at "
-                             RAM_ADDR_FMT, addr);
-                ret = -EINVAL;
-                break;
-            }
-            break;
-        case RAM_SAVE_FLAG_EOS:
-            /* normal exit */
-            break;
-        default:
-            if (flags & RAM_SAVE_FLAG_HOOK) {
-                ram_control_load_hook(f, flags);
-            } else {
-                error_report("Unknown combination of migration flags: %#x",
-                             flags);
-                ret = -EINVAL;
-            }
-        }
-        if (!ret) {
-            ret = qemu_file_get_error(f);
-        }
-    }
-
-    rcu_read_unlock();
-    DPRINTF("Completed load of VM with exit code %d seq iteration "
-            "%" PRIu64 "\n", ret, seq_iter);
-    return ret;
-}
-
-static SaveVMHandlers savevm_ram_handlers = {
-    .save_live_setup = ram_save_setup,
-    .save_live_iterate = ram_save_iterate,
-    .save_live_complete = ram_save_complete,
-    .save_live_pending = ram_save_pending,
-    .load_state = ram_load,
-    .cancel = ram_migration_cancel,
-};
-
-void ram_mig_init(void)
-{
-    qemu_mutex_init(&XBZRLE.lock);
-    register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
-}
-
 struct soundhw {
     const char *name;
     const char *descr;
@@ -1869,52 +330,3 @@ TargetInfo *qmp_query_target(Error **errp)
 
     return info;
 }
-
-/* Stub function that's gets run on the vcpu when its brought out of the
-   VM to run inside qemu via async_run_on_cpu()*/
-static void mig_sleep_cpu(void *opq)
-{
-    qemu_mutex_unlock_iothread();
-    g_usleep(30*1000);
-    qemu_mutex_lock_iothread();
-}
-
-/* To reduce the dirty rate explicitly disallow the VCPUs from spending
-   much time in the VM. The migration thread will try to catchup.
-   Workload will experience a performance drop.
-*/
-static void mig_throttle_guest_down(void)
-{
-    CPUState *cpu;
-
-    qemu_mutex_lock_iothread();
-    CPU_FOREACH(cpu) {
-        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
-    }
-    qemu_mutex_unlock_iothread();
-}
-
-static void check_guest_throttling(void)
-{
-    static int64_t t0;
-    int64_t        t1;
-
-    if (!mig_throttle_on) {
-        return;
-    }
-
-    if (!t0)  {
-        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-        return;
-    }
-
-    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-
-    /* If it has been more than 40 ms since the last time the guest
-     * was throttled then do it again.
-     */
-    if (40 < (t1-t0)/1000000) {
-        mig_throttle_guest_down();
-        t0 = t1;
-    }
-}
diff --git a/include/migration/migration.h b/include/migration/migration.h
index a6e025a248..b78a3b98b1 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -180,4 +180,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
                              ram_addr_t offset, size_t size,
                              uint64_t *bytes_sent);
 
+
+void ram_mig_init(void);
 #endif
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 54b36c16c4..c38892fec6 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -30,7 +30,6 @@ extern const uint32_t arch_type;
 void select_soundhw(const char *optarg);
 void do_acpitable_option(const QemuOpts *opts);
 void do_smbios_option(QemuOpts *opts);
-void ram_mig_init(void);
 void cpudef_init(void);
 void audio_init(void);
 int kvm_available(void);
diff --git a/migration/ram.c b/migration/ram.c
new file mode 100644
index 0000000000..ff889ba4ba
--- /dev/null
+++ b/migration/ram.c
@@ -0,0 +1,1639 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <zlib.h>
+#ifndef _WIN32
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
+#include "config.h"
+#include "monitor/monitor.h"
+#include "sysemu/sysemu.h"
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
+#include "hw/audio/audio.h"
+#include "migration/migration.h"
+#include "exec/address-spaces.h"
+#include "migration/page_cache.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "qmp-commands.h"
+#include "trace.h"
+#include "exec/cpu-all.h"
+#include "exec/ram_addr.h"
+#include "qemu/host-utils.h"
+#include "qemu/rcu_queue.h"
+
+#ifdef DEBUG_MIGRATION_RAM
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+static bool mig_throttle_on;
+static int dirty_rate_high_cnt;
+static void check_guest_throttling(void);
+
+static uint64_t bitmap_sync_count;
+
+/***********************************************************/
+/* ram save/restore */
+
+#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
+#define RAM_SAVE_FLAG_COMPRESS 0x02
+#define RAM_SAVE_FLAG_MEM_SIZE 0x04
+#define RAM_SAVE_FLAG_PAGE     0x08
+#define RAM_SAVE_FLAG_EOS      0x10
+#define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE   0x40
+/* 0x80 is reserved in migration.h start with 0x100 next */
+#define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
+
+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
+
+static inline bool is_zero_range(uint8_t *p, uint64_t size)
+{
+    return buffer_find_nonzero_offset(p, size) == size;
+}
+
+/* struct contains XBZRLE cache and a static page
+   used by the compression */
+static struct {
+    /* buffer used for XBZRLE encoding */
+    uint8_t *encoded_buf;
+    /* buffer for storing page content */
+    uint8_t *current_buf;
+    /* Cache for XBZRLE, Protected by lock. */
+    PageCache *cache;
+    QemuMutex lock;
+} XBZRLE;
+
+/* buffer used for XBZRLE decoding */
+static uint8_t *xbzrle_decoded_buf;
+
+static void XBZRLE_cache_lock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_lock(&XBZRLE.lock);
+}
+
+static void XBZRLE_cache_unlock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_unlock(&XBZRLE.lock);
+}
+
+/*
+ * called from qmp_migrate_set_cache_size in main thread, possibly while
+ * a migration is in progress.
+ * A running migration maybe using the cache and might finish during this
+ * call, hence changes to the cache are protected by XBZRLE.lock().
+ */
+int64_t xbzrle_cache_resize(int64_t new_size)
+{
+    PageCache *new_cache;
+    int64_t ret;
+
+    if (new_size < TARGET_PAGE_SIZE) {
+        return -1;
+    }
+
+    XBZRLE_cache_lock();
+
+    if (XBZRLE.cache != NULL) {
+        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
+            goto out_new_size;
+        }
+        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
+                                        TARGET_PAGE_SIZE);
+        if (!new_cache) {
+            error_report("Error creating cache");
+            ret = -1;
+            goto out;
+        }
+
+        cache_fini(XBZRLE.cache);
+        XBZRLE.cache = new_cache;
+    }
+
+out_new_size:
+    ret = pow2floor(new_size);
+out:
+    XBZRLE_cache_unlock();
+    return ret;
+}
+
+/* accounting for migration statistics */
+typedef struct AccountingInfo {
+    uint64_t dup_pages;
+    uint64_t skipped_pages;
+    uint64_t norm_pages;
+    uint64_t iterations;
+    uint64_t xbzrle_bytes;
+    uint64_t xbzrle_pages;
+    uint64_t xbzrle_cache_miss;
+    double xbzrle_cache_miss_rate;
+    uint64_t xbzrle_overflows;
+} AccountingInfo;
+
+static AccountingInfo acct_info;
+
+static void acct_clear(void)
+{
+    memset(&acct_info, 0, sizeof(acct_info));
+}
+
+uint64_t dup_mig_bytes_transferred(void)
+{
+    return acct_info.dup_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t dup_mig_pages_transferred(void)
+{
+    return acct_info.dup_pages;
+}
+
+uint64_t skipped_mig_bytes_transferred(void)
+{
+    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t skipped_mig_pages_transferred(void)
+{
+    return acct_info.skipped_pages;
+}
+
+uint64_t norm_mig_bytes_transferred(void)
+{
+    return acct_info.norm_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t norm_mig_pages_transferred(void)
+{
+    return acct_info.norm_pages;
+}
+
+uint64_t xbzrle_mig_bytes_transferred(void)
+{
+    return acct_info.xbzrle_bytes;
+}
+
+uint64_t xbzrle_mig_pages_transferred(void)
+{
+    return acct_info.xbzrle_pages;
+}
+
+uint64_t xbzrle_mig_pages_cache_miss(void)
+{
+    return acct_info.xbzrle_cache_miss;
+}
+
+double xbzrle_mig_cache_miss_rate(void)
+{
+    return acct_info.xbzrle_cache_miss_rate;
+}
+
+uint64_t xbzrle_mig_pages_overflow(void)
+{
+    return acct_info.xbzrle_overflows;
+}
+
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+struct CompressParam {
+    bool start;
+    bool done;
+    QEMUFile *file;
+    QemuMutex mutex;
+    QemuCond cond;
+    RAMBlock *block;
+    ram_addr_t offset;
+};
+typedef struct CompressParam CompressParam;
+
+struct DecompressParam {
+    bool start;
+    QemuMutex mutex;
+    QemuCond cond;
+    void *des;
+    uint8 *compbuf;
+    int len;
+};
+typedef struct DecompressParam DecompressParam;
+
+static CompressParam *comp_param;
+static QemuThread *compress_threads;
+/* comp_done_cond is used to wake up the migration thread when
+ * one of the compression threads has finished the compression.
+ * comp_done_lock is used to co-work with comp_done_cond.
+ */
+static QemuMutex *comp_done_lock;
+static QemuCond *comp_done_cond;
+/* The empty QEMUFileOps will be used by file in CompressParam */
+static const QEMUFileOps empty_ops = { };
+
+static bool compression_switch;
+static bool quit_comp_thread;
+static bool quit_decomp_thread;
+static DecompressParam *decomp_param;
+static QemuThread *decompress_threads;
+static uint8_t *compressed_data_buf;
+
+static int do_compress_ram_page(CompressParam *param);
+
+static void *do_data_compress(void *opaque)
+{
+    CompressParam *param = opaque;
+
+    while (!quit_comp_thread) {
+        qemu_mutex_lock(&param->mutex);
+        /* Re-check the quit_comp_thread in case of
+         * terminate_compression_threads is called just before
+         * qemu_mutex_lock(&param->mutex) and after
+         * while(!quit_comp_thread), re-check it here can make
+         * sure the compression thread terminate as expected.
+         */
+        while (!param->start && !quit_comp_thread) {
+            qemu_cond_wait(&param->cond, &param->mutex);
+        }
+        if (!quit_comp_thread) {
+            do_compress_ram_page(param);
+        }
+        param->start = false;
+        qemu_mutex_unlock(&param->mutex);
+
+        qemu_mutex_lock(comp_done_lock);
+        param->done = true;
+        qemu_cond_signal(comp_done_cond);
+        qemu_mutex_unlock(comp_done_lock);
+    }
+
+    return NULL;
+}
+
+static inline void terminate_compression_threads(void)
+{
+    int idx, thread_count;
+
+    thread_count = migrate_compress_threads();
+    quit_comp_thread = true;
+    for (idx = 0; idx < thread_count; idx++) {
+        qemu_mutex_lock(&comp_param[idx].mutex);
+        qemu_cond_signal(&comp_param[idx].cond);
+        qemu_mutex_unlock(&comp_param[idx].mutex);
+    }
+}
+
+void migrate_compress_threads_join(void)
+{
+    int i, thread_count;
+
+    if (!migrate_use_compression()) {
+        return;
+    }
+    terminate_compression_threads();
+    thread_count = migrate_compress_threads();
+    for (i = 0; i < thread_count; i++) {
+        qemu_thread_join(compress_threads + i);
+        qemu_fclose(comp_param[i].file);
+        qemu_mutex_destroy(&comp_param[i].mutex);
+        qemu_cond_destroy(&comp_param[i].cond);
+    }
+    qemu_mutex_destroy(comp_done_lock);
+    qemu_cond_destroy(comp_done_cond);
+    g_free(compress_threads);
+    g_free(comp_param);
+    g_free(comp_done_cond);
+    g_free(comp_done_lock);
+    compress_threads = NULL;
+    comp_param = NULL;
+    comp_done_cond = NULL;
+    comp_done_lock = NULL;
+}
+
+void migrate_compress_threads_create(void)
+{
+    int i, thread_count;
+
+    if (!migrate_use_compression()) {
+        return;
+    }
+    quit_comp_thread = false;
+    compression_switch = true;
+    thread_count = migrate_compress_threads();
+    compress_threads = g_new0(QemuThread, thread_count);
+    comp_param = g_new0(CompressParam, thread_count);
+    comp_done_cond = g_new0(QemuCond, 1);
+    comp_done_lock = g_new0(QemuMutex, 1);
+    qemu_cond_init(comp_done_cond);
+    qemu_mutex_init(comp_done_lock);
+    for (i = 0; i < thread_count; i++) {
+        /* com_param[i].file is just used as a dummy buffer to save data, set
+         * it's ops to empty.
+         */
+        comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
+        comp_param[i].done = true;
+        qemu_mutex_init(&comp_param[i].mutex);
+        qemu_cond_init(&comp_param[i].cond);
+        qemu_thread_create(compress_threads + i, "compress",
+                           do_data_compress, comp_param + i,
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+/**
+ * save_page_header: Write page header to wire
+ *
+ * If this is the 1st block, it also writes the block identification
+ *
+ * Returns: Number of bytes written
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ *          in the lower bits, it contains flags
+ */
+static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
+{
+    size_t size;
+
+    qemu_put_be64(f, offset);
+    size = 8;
+
+    if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
+        qemu_put_byte(f, strlen(block->idstr));
+        qemu_put_buffer(f, (uint8_t *)block->idstr,
+                        strlen(block->idstr));
+        size += 1 + strlen(block->idstr);
+    }
+    return size;
+}
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+    if (ram_bulk_stage || !migrate_use_xbzrle()) {
+        return;
+    }
+
+    /* We don't care if this fails to allocate a new cache page
+     * as long as it updated an old one */
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
+                 bitmap_sync_count);
+}
+
+#define ENCODING_FLAG_XBZRLE 0x1
+
+/**
+ * save_xbzrle_page: compress and send current page
+ *
+ * Returns: 1 means that we wrote the page
+ *          0 means that page is identical to the one already sent
+ *          -1 means that xbzrle would be longer than normal
+ *
+ * @f: QEMUFile where to send the data
+ * @current_data:
+ * @current_addr:
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
+                            ram_addr_t current_addr, RAMBlock *block,
+                            ram_addr_t offset, bool last_stage,
+                            uint64_t *bytes_transferred)
+{
+    int encoded_len = 0, bytes_xbzrle;
+    uint8_t *prev_cached_page;
+
+    if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
+        acct_info.xbzrle_cache_miss++;
+        if (!last_stage) {
+            if (cache_insert(XBZRLE.cache, current_addr, *current_data,
+                             bitmap_sync_count) == -1) {
+                return -1;
+            } else {
+                /* update *current_data when the page has been
+                   inserted into cache */
+                *current_data = get_cached_data(XBZRLE.cache, current_addr);
+            }
+        }
+        return -1;
+    }
+
+    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
+
+    /* save current buffer into memory */
+    memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
+
+    /* XBZRLE encoding (if there is no overflow) */
+    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
+                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                       TARGET_PAGE_SIZE);
+    if (encoded_len == 0) {
+        DPRINTF("Skipping unmodified page\n");
+        return 0;
+    } else if (encoded_len == -1) {
+        DPRINTF("Overflow\n");
+        acct_info.xbzrle_overflows++;
+        /* update data in the cache */
+        if (!last_stage) {
+            memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
+            *current_data = prev_cached_page;
+        }
+        return -1;
+    }
+
+    /* we need to update the data in the cache, in order to get the same data */
+    if (!last_stage) {
+        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
+    }
+
+    /* Send XBZRLE based compressed page */
+    bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
+    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
+    qemu_put_be16(f, encoded_len);
+    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
+    bytes_xbzrle += encoded_len + 1 + 2;
+    acct_info.xbzrle_pages++;
+    acct_info.xbzrle_bytes += bytes_xbzrle;
+    *bytes_transferred += bytes_xbzrle;
+
+    return 1;
+}
+
+static inline
+ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
+                                                 ram_addr_t start)
+{
+    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
+    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
+    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
+    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+
+    unsigned long next;
+
+    if (ram_bulk_stage && nr > base) {
+        next = nr + 1;
+    } else {
+        next = find_next_bit(migration_bitmap, size, nr);
+    }
+
+    if (next < size) {
+        clear_bit(next, migration_bitmap);
+        migration_dirty_pages--;
+    }
+    return (next - base) << TARGET_PAGE_BITS;
+}
+
+static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
+{
+    migration_dirty_pages +=
+        cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
+}
+
+
+/* Fix me: there are too many global variables used in migration process. */
+static int64_t start_time;
+static int64_t bytes_xfer_prev;
+static int64_t num_dirty_pages_period;
+static uint64_t xbzrle_cache_miss_prev;
+static uint64_t iterations_prev;
+
+static void migration_bitmap_sync_init(void)
+{
+    start_time = 0;
+    bytes_xfer_prev = 0;
+    num_dirty_pages_period = 0;
+    xbzrle_cache_miss_prev = 0;
+    iterations_prev = 0;
+}
+
+/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
+static void migration_bitmap_sync(void)
+{
+    RAMBlock *block;
+    uint64_t num_dirty_pages_init = migration_dirty_pages;
+    MigrationState *s = migrate_get_current();
+    int64_t end_time;
+    int64_t bytes_xfer_now;
+
+    bitmap_sync_count++;
+
+    if (!bytes_xfer_prev) {
+        bytes_xfer_prev = ram_bytes_transferred();
+    }
+
+    if (!start_time) {
+        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    }
+
+    trace_migration_bitmap_sync_start();
+    address_space_sync_dirty_bitmap(&address_space_memory);
+
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
+    }
+    rcu_read_unlock();
+
+    trace_migration_bitmap_sync_end(migration_dirty_pages
+                                    - num_dirty_pages_init);
+    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+    /* more than 1 second = 1000 millisecons */
+    if (end_time > start_time + 1000) {
+        if (migrate_auto_converge()) {
+            /* The following detection logic can be refined later. For now:
+               Check to see if the dirtied bytes is 50% more than the approx.
+               amount of bytes that just got transferred since the last time we
+               were in this routine. If that happens >N times (for now N==4)
+               we turn on the throttle down logic */
+            bytes_xfer_now = ram_bytes_transferred();
+            if (s->dirty_pages_rate &&
+               (num_dirty_pages_period * TARGET_PAGE_SIZE >
+                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
+               (dirty_rate_high_cnt++ > 4)) {
+                    trace_migration_throttle();
+                    mig_throttle_on = true;
+                    dirty_rate_high_cnt = 0;
+             }
+             bytes_xfer_prev = bytes_xfer_now;
+        } else {
+             mig_throttle_on = false;
+        }
+        if (migrate_use_xbzrle()) {
+            if (iterations_prev != acct_info.iterations) {
+                acct_info.xbzrle_cache_miss_rate =
+                   (double)(acct_info.xbzrle_cache_miss -
+                            xbzrle_cache_miss_prev) /
+                   (acct_info.iterations - iterations_prev);
+            }
+            iterations_prev = acct_info.iterations;
+            xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
+        }
+        s->dirty_pages_rate = num_dirty_pages_period * 1000
+            / (end_time - start_time);
+        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
+        start_time = end_time;
+        num_dirty_pages_period = 0;
+    }
+    s->dirty_sync_count = bitmap_sync_count;
+}
+
+/**
+ * save_zero_page: Send the zero page to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @p: pointer to the page
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+                          uint8_t *p, uint64_t *bytes_transferred)
+{
+    int pages = -1;
+
+    if (is_zero_range(p, TARGET_PAGE_SIZE)) {
+        acct_info.dup_pages++;
+        *bytes_transferred += save_page_header(f, block,
+                                               offset | RAM_SAVE_FLAG_COMPRESS);
+        qemu_put_byte(f, 0);
+        *bytes_transferred += 1;
+        pages = 1;
+    }
+
+    return pages;
+}
+
+/**
+ * ram_save_page: Send the given page to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
+                         bool last_stage, uint64_t *bytes_transferred)
+{
+    int pages = -1;
+    uint64_t bytes_xmit;
+    ram_addr_t current_addr;
+    MemoryRegion *mr = block->mr;
+    uint8_t *p;
+    int ret;
+    bool send_async = true;
+
+    p = memory_region_get_ram_ptr(mr) + offset;
+
+    /* In doubt sent page as normal */
+    bytes_xmit = 0;
+    ret = ram_control_save_page(f, block->offset,
+                           offset, TARGET_PAGE_SIZE, &bytes_xmit);
+    if (bytes_xmit) {
+        *bytes_transferred += bytes_xmit;
+        pages = 1;
+    }
+
+    XBZRLE_cache_lock();
+
+    current_addr = block->offset + offset;
+
+    if (block == last_sent_block) {
+        offset |= RAM_SAVE_FLAG_CONTINUE;
+    }
+    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+            if (bytes_xmit > 0) {
+                acct_info.norm_pages++;
+            } else if (bytes_xmit == 0) {
+                acct_info.dup_pages++;
+            }
+        }
+    } else {
+        pages = save_zero_page(f, block, offset, p, bytes_transferred);
+        if (pages > 0) {
+            /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+             * page would be stale
+             */
+            xbzrle_cache_zero_page(current_addr);
+        } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
+            pages = save_xbzrle_page(f, &p, current_addr, block,
+                                     offset, last_stage, bytes_transferred);
+            if (!last_stage) {
+                /* Can't send this cached data async, since the cache page
+                 * might get updated before it gets to the wire
+                 */
+                send_async = false;
+            }
+        }
+    }
+
+    /* XBZRLE overflow or normal page */
+    if (pages == -1) {
+        *bytes_transferred += save_page_header(f, block,
+                                               offset | RAM_SAVE_FLAG_PAGE);
+        if (send_async) {
+            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+        } else {
+            qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+        }
+        *bytes_transferred += TARGET_PAGE_SIZE;
+        pages = 1;
+        acct_info.norm_pages++;
+    }
+
+    XBZRLE_cache_unlock();
+
+    return pages;
+}
+
+static int do_compress_ram_page(CompressParam *param)
+{
+    int bytes_sent, blen;
+    uint8_t *p;
+    RAMBlock *block = param->block;
+    ram_addr_t offset = param->offset;
+
+    p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK);
+
+    bytes_sent = save_page_header(param->file, block, offset |
+                                  RAM_SAVE_FLAG_COMPRESS_PAGE);
+    blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
+                                     migrate_compress_level());
+    bytes_sent += blen;
+
+    return bytes_sent;
+}
+
+static inline void start_compression(CompressParam *param)
+{
+    param->done = false;
+    qemu_mutex_lock(&param->mutex);
+    param->start = true;
+    qemu_cond_signal(&param->cond);
+    qemu_mutex_unlock(&param->mutex);
+}
+
+static inline void start_decompression(DecompressParam *param)
+{
+    qemu_mutex_lock(&param->mutex);
+    param->start = true;
+    qemu_cond_signal(&param->cond);
+    qemu_mutex_unlock(&param->mutex);
+}
+
+static uint64_t bytes_transferred;
+
+static void flush_compressed_data(QEMUFile *f)
+{
+    int idx, len, thread_count;
+
+    if (!migrate_use_compression()) {
+        return;
+    }
+    thread_count = migrate_compress_threads();
+    for (idx = 0; idx < thread_count; idx++) {
+        if (!comp_param[idx].done) {
+            qemu_mutex_lock(comp_done_lock);
+            while (!comp_param[idx].done && !quit_comp_thread) {
+                qemu_cond_wait(comp_done_cond, comp_done_lock);
+            }
+            qemu_mutex_unlock(comp_done_lock);
+        }
+        if (!quit_comp_thread) {
+            len = qemu_put_qemu_file(f, comp_param[idx].file);
+            bytes_transferred += len;
+        }
+    }
+}
+
+static inline void set_compress_params(CompressParam *param, RAMBlock *block,
+                                       ram_addr_t offset)
+{
+    param->block = block;
+    param->offset = offset;
+}
+
+static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
+                                           ram_addr_t offset,
+                                           uint64_t *bytes_transferred)
+{
+    int idx, thread_count, bytes_xmit = -1, pages = -1;
+
+    thread_count = migrate_compress_threads();
+    qemu_mutex_lock(comp_done_lock);
+    while (true) {
+        for (idx = 0; idx < thread_count; idx++) {
+            if (comp_param[idx].done) {
+                bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
+                set_compress_params(&comp_param[idx], block, offset);
+                start_compression(&comp_param[idx]);
+                pages = 1;
+                acct_info.norm_pages++;
+                *bytes_transferred += bytes_xmit;
+                break;
+            }
+        }
+        if (pages > 0) {
+            break;
+        } else {
+            qemu_cond_wait(comp_done_cond, comp_done_lock);
+        }
+    }
+    qemu_mutex_unlock(comp_done_lock);
+
+    return pages;
+}
+
+/**
+ * ram_save_compressed_page: compress the given page and send it to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
+                                    ram_addr_t offset, bool last_stage,
+                                    uint64_t *bytes_transferred)
+{
+    int pages = -1;
+    uint64_t bytes_xmit;
+    MemoryRegion *mr = block->mr;
+    uint8_t *p;
+    int ret;
+
+    p = memory_region_get_ram_ptr(mr) + offset;
+
+    bytes_xmit = 0;
+    ret = ram_control_save_page(f, block->offset,
+                                offset, TARGET_PAGE_SIZE, &bytes_xmit);
+    if (bytes_xmit) {
+        *bytes_transferred += bytes_xmit;
+        pages = 1;
+    }
+    if (block == last_sent_block) {
+        offset |= RAM_SAVE_FLAG_CONTINUE;
+    }
+    if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+            if (bytes_xmit > 0) {
+                acct_info.norm_pages++;
+            } else if (bytes_xmit == 0) {
+                acct_info.dup_pages++;
+            }
+        }
+    } else {
+        /* When starting the process of a new block, the first page of
+         * the block should be sent out before other pages in the same
+         * block, and all the pages in last block should have been sent
+         * out, keeping this order is important, because the 'cont' flag
+         * is used to avoid resending the block name.
+         */
+        if (block != last_sent_block) {
+            flush_compressed_data(f);
+            pages = save_zero_page(f, block, offset, p, bytes_transferred);
+            if (pages == -1) {
+                set_compress_params(&comp_param[0], block, offset);
+                /* Use the qemu thread to compress the data to make sure the
+                 * first page is sent out before other pages
+                 */
+                bytes_xmit = do_compress_ram_page(&comp_param[0]);
+                acct_info.norm_pages++;
+                qemu_put_qemu_file(f, comp_param[0].file);
+                *bytes_transferred += bytes_xmit;
+                pages = 1;
+            }
+        } else {
+            pages = save_zero_page(f, block, offset, p, bytes_transferred);
+            if (pages == -1) {
+                pages = compress_page_with_multi_thread(f, block, offset,
+                                                        bytes_transferred);
+            }
+        }
+    }
+
+    return pages;
+}
+
+/**
+ * ram_find_and_save_block: Finds a dirty page and sends it to f
+ *
+ * Called within an RCU critical section.
+ *
+ * Returns:  The number of pages written
+ *           0 means no dirty pages
+ *
+ * @f: QEMUFile where to send the data
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+
+static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
+                                   uint64_t *bytes_transferred)
+{
+    RAMBlock *block = last_seen_block;
+    ram_addr_t offset = last_offset;
+    bool complete_round = false;
+    int pages = 0;
+    MemoryRegion *mr;
+
+    if (!block)
+        block = QLIST_FIRST_RCU(&ram_list.blocks);
+
+    while (true) {
+        mr = block->mr;
+        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+        if (complete_round && block == last_seen_block &&
+            offset >= last_offset) {
+            break;
+        }
+        if (offset >= block->used_length) {
+            offset = 0;
+            block = QLIST_NEXT_RCU(block, next);
+            if (!block) {
+                block = QLIST_FIRST_RCU(&ram_list.blocks);
+                complete_round = true;
+                ram_bulk_stage = false;
+                if (migrate_use_xbzrle()) {
+                    /* If xbzrle is on, stop using the data compression at this
+                     * point. In theory, xbzrle can do better than compression.
+                     */
+                    flush_compressed_data(f);
+                    compression_switch = false;
+                }
+            }
+        } else {
+            if (compression_switch && migrate_use_compression()) {
+                pages = ram_save_compressed_page(f, block, offset, last_stage,
+                                                 bytes_transferred);
+            } else {
+                pages = ram_save_page(f, block, offset, last_stage,
+                                      bytes_transferred);
+            }
+
+            /* if page is unmodified, continue to the next */
+            if (pages > 0) {
+                last_sent_block = block;
+                break;
+            }
+        }
+    }
+
+    last_seen_block = block;
+    last_offset = offset;
+
+    return pages;
+}
+
+void acct_update_position(QEMUFile *f, size_t size, bool zero)
+{
+    uint64_t pages = size / TARGET_PAGE_SIZE;
+    if (zero) {
+        acct_info.dup_pages += pages;
+    } else {
+        acct_info.norm_pages += pages;
+        bytes_transferred += size;
+        qemu_update_position(f, size);
+    }
+}
+
+static ram_addr_t ram_save_remaining(void)
+{
+    return migration_dirty_pages;
+}
+
+uint64_t ram_bytes_remaining(void)
+{
+    return ram_save_remaining() * TARGET_PAGE_SIZE;
+}
+
+uint64_t ram_bytes_transferred(void)
+{
+    return bytes_transferred;
+}
+
+uint64_t ram_bytes_total(void)
+{
+    RAMBlock *block;
+    uint64_t total = 0;
+
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
+        total += block->used_length;
+    rcu_read_unlock();
+    return total;
+}
+
+void free_xbzrle_decoded_buf(void)
+{
+    g_free(xbzrle_decoded_buf);
+    xbzrle_decoded_buf = NULL;
+}
+
+static void migration_end(void)
+{
+    if (migration_bitmap) {
+        memory_global_dirty_log_stop();
+        g_free(migration_bitmap);
+        migration_bitmap = NULL;
+    }
+
+    XBZRLE_cache_lock();
+    if (XBZRLE.cache) {
+        cache_fini(XBZRLE.cache);
+        g_free(XBZRLE.encoded_buf);
+        g_free(XBZRLE.current_buf);
+        XBZRLE.cache = NULL;
+        XBZRLE.encoded_buf = NULL;
+        XBZRLE.current_buf = NULL;
+    }
+    XBZRLE_cache_unlock();
+}
+
+static void ram_migration_cancel(void *opaque)
+{
+    migration_end();
+}
+
+static void reset_ram_globals(void)
+{
+    last_seen_block = NULL;
+    last_sent_block = NULL;
+    last_offset = 0;
+    last_version = ram_list.version;
+    ram_bulk_stage = true;
+}
+
+#define MAX_WAIT 50 /* ms, half buffered_file limit */
+
+
+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+ * long-running RCU critical section.  When rcu-reclaims in the code
+ * start to become numerous it will be necessary to reduce the
+ * granularity of these critical sections.
+ */
+
+static int ram_save_setup(QEMUFile *f, void *opaque)
+{
+    RAMBlock *block;
+    int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
+
+    mig_throttle_on = false;
+    dirty_rate_high_cnt = 0;
+    bitmap_sync_count = 0;
+    migration_bitmap_sync_init();
+
+    if (migrate_use_xbzrle()) {
+        XBZRLE_cache_lock();
+        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
+                                  TARGET_PAGE_SIZE,
+                                  TARGET_PAGE_SIZE);
+        if (!XBZRLE.cache) {
+            XBZRLE_cache_unlock();
+            error_report("Error creating cache");
+            return -1;
+        }
+        XBZRLE_cache_unlock();
+
+        /* We prefer not to abort if there is no memory */
+        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
+        if (!XBZRLE.encoded_buf) {
+            error_report("Error allocating encoded_buf");
+            return -1;
+        }
+
+        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
+        if (!XBZRLE.current_buf) {
+            error_report("Error allocating current_buf");
+            g_free(XBZRLE.encoded_buf);
+            XBZRLE.encoded_buf = NULL;
+            return -1;
+        }
+
+        acct_clear();
+    }
+
+    /* iothread lock needed for ram_list.dirty_memory[] */
+    qemu_mutex_lock_iothread();
+    qemu_mutex_lock_ramlist();
+    rcu_read_lock();
+    bytes_transferred = 0;
+    reset_ram_globals();
+
+    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+    migration_bitmap = bitmap_new(ram_bitmap_pages);
+    bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
+
+    /*
+     * Count the total number of pages used by ram blocks not including any
+     * gaps due to alignment or unplugs.
+     */
+    migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+
+    memory_global_dirty_log_start();
+    migration_bitmap_sync();
+    qemu_mutex_unlock_ramlist();
+    qemu_mutex_unlock_iothread();
+
+    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        qemu_put_byte(f, strlen(block->idstr));
+        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
+        qemu_put_be64(f, block->used_length);
+    }
+
+    rcu_read_unlock();
+
+    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
+    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
+
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+    return 0;
+}
+
+static int ram_save_iterate(QEMUFile *f, void *opaque)
+{
+    int ret;
+    int i;
+    int64_t t0;
+    int pages_sent = 0;
+
+    rcu_read_lock();
+    if (ram_list.version != last_version) {
+        reset_ram_globals();
+    }
+
+    /* Read version before ram_list.blocks */
+    smp_rmb();
+
+    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+
+    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    i = 0;
+    while ((ret = qemu_file_rate_limit(f)) == 0) {
+        int pages;
+
+        pages = ram_find_and_save_block(f, false, &bytes_transferred);
+        /* no more pages to sent */
+        if (pages == 0) {
+            break;
+        }
+        pages_sent += pages;
+        acct_info.iterations++;
+        check_guest_throttling();
+        /* we want to check in the 1st loop, just in case it was the 1st time
+           and we had to sync the dirty bitmap.
+           qemu_get_clock_ns() is a bit expensive, so we only check each some
+           iterations
+        */
+        if ((i & 63) == 0) {
+            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
+            if (t1 > MAX_WAIT) {
+                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
+                        t1, i);
+                break;
+            }
+        }
+        i++;
+    }
+    flush_compressed_data(f);
+    rcu_read_unlock();
+
+    /*
+     * Must occur before EOS (or any QEMUFile operation)
+     * because of RDMA protocol.
+     */
+    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
+
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    bytes_transferred += 8;
+
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return pages_sent;
+}
+
+/* Called with iothread lock */
+static int ram_save_complete(QEMUFile *f, void *opaque)
+{
+    rcu_read_lock();
+
+    migration_bitmap_sync();
+
+    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
+
+    /* try transferring iterative blocks of memory */
+
+    /* flush all remaining blocks regardless of rate limiting */
+    while (true) {
+        int pages;
+
+        pages = ram_find_and_save_block(f, true, &bytes_transferred);
+        /* no more blocks to sent */
+        if (pages == 0) {
+            break;
+        }
+    }
+
+    flush_compressed_data(f);
+    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
+    migration_end();
+
+    rcu_read_unlock();
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+    return 0;
+}
+
+static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+{
+    uint64_t remaining_size;
+
+    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+
+    if (remaining_size < max_size) {
+        qemu_mutex_lock_iothread();
+        rcu_read_lock();
+        migration_bitmap_sync();
+        rcu_read_unlock();
+        qemu_mutex_unlock_iothread();
+        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+    }
+    return remaining_size;
+}
+
+static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    unsigned int xh_len;
+    int xh_flags;
+
+    if (!xbzrle_decoded_buf) {
+        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    }
+
+    /* extract RLE header */
+    xh_flags = qemu_get_byte(f);
+    xh_len = qemu_get_be16(f);
+
+    if (xh_flags != ENCODING_FLAG_XBZRLE) {
+        error_report("Failed to load XBZRLE page - wrong compression!");
+        return -1;
+    }
+
+    if (xh_len > TARGET_PAGE_SIZE) {
+        error_report("Failed to load XBZRLE page - len overflow!");
+        return -1;
+    }
+    /* load data and decode */
+    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
+
+    /* decode RLE */
+    if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
+                             TARGET_PAGE_SIZE) == -1) {
+        error_report("Failed to load XBZRLE page - decode error!");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* Must be called from within a rcu critical section.
+ * Returns a pointer from within the RCU-protected ram_list.
+ */
+static inline void *host_from_stream_offset(QEMUFile *f,
+                                            ram_addr_t offset,
+                                            int flags)
+{
+    static RAMBlock *block = NULL;
+    char id[256];
+    uint8_t len;
+
+    if (flags & RAM_SAVE_FLAG_CONTINUE) {
+        if (!block || block->max_length <= offset) {
+            error_report("Ack, bad migration stream!");
+            return NULL;
+        }
+
+        return memory_region_get_ram_ptr(block->mr) + offset;
+    }
+
+    len = qemu_get_byte(f);
+    qemu_get_buffer(f, (uint8_t *)id, len);
+    id[len] = 0;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        if (!strncmp(id, block->idstr, sizeof(id)) &&
+            block->max_length > offset) {
+            return memory_region_get_ram_ptr(block->mr) + offset;
+        }
+    }
+
+    error_report("Can't find block %s!", id);
+    return NULL;
+}
+
+/*
+ * If a page (or a whole RDMA chunk) has been
+ * determined to be zero, then zap it.
+ */
+void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
+{
+    if (ch != 0 || !is_zero_range(host, size)) {
+        memset(host, ch, size);
+    }
+}
+
+static void *do_data_decompress(void *opaque)
+{
+    DecompressParam *param = opaque;
+    unsigned long pagesize;
+
+    while (!quit_decomp_thread) {
+        qemu_mutex_lock(&param->mutex);
+        while (!param->start && !quit_decomp_thread) {
+            qemu_cond_wait(&param->cond, &param->mutex);
+            pagesize = TARGET_PAGE_SIZE;
+            if (!quit_decomp_thread) {
+                /* uncompress() will return failed in some case, especially
+                 * when the page is dirted when doing the compression, it's
+                 * not a problem because the dirty page will be retransferred
+                 * and uncompress() won't break the data in other pages.
+                 */
+                uncompress((Bytef *)param->des, &pagesize,
+                           (const Bytef *)param->compbuf, param->len);
+            }
+            param->start = false;
+        }
+        qemu_mutex_unlock(&param->mutex);
+    }
+
+    return NULL;
+}
+
+void migrate_decompress_threads_create(void)
+{
+    int i, thread_count;
+
+    thread_count = migrate_decompress_threads();
+    decompress_threads = g_new0(QemuThread, thread_count);
+    decomp_param = g_new0(DecompressParam, thread_count);
+    compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
+    quit_decomp_thread = false;
+    for (i = 0; i < thread_count; i++) {
+        qemu_mutex_init(&decomp_param[i].mutex);
+        qemu_cond_init(&decomp_param[i].cond);
+        decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
+        qemu_thread_create(decompress_threads + i, "decompress",
+                           do_data_decompress, decomp_param + i,
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+void migrate_decompress_threads_join(void)
+{
+    int i, thread_count;
+
+    quit_decomp_thread = true;
+    thread_count = migrate_decompress_threads();
+    for (i = 0; i < thread_count; i++) {
+        qemu_mutex_lock(&decomp_param[i].mutex);
+        qemu_cond_signal(&decomp_param[i].cond);
+        qemu_mutex_unlock(&decomp_param[i].mutex);
+    }
+    for (i = 0; i < thread_count; i++) {
+        qemu_thread_join(decompress_threads + i);
+        qemu_mutex_destroy(&decomp_param[i].mutex);
+        qemu_cond_destroy(&decomp_param[i].cond);
+        g_free(decomp_param[i].compbuf);
+    }
+    g_free(decompress_threads);
+    g_free(decomp_param);
+    g_free(compressed_data_buf);
+    decompress_threads = NULL;
+    decomp_param = NULL;
+    compressed_data_buf = NULL;
+}
+
+static void decompress_data_with_multi_threads(uint8_t *compbuf,
+                                               void *host, int len)
+{
+    int idx, thread_count;
+
+    thread_count = migrate_decompress_threads();
+    while (true) {
+        for (idx = 0; idx < thread_count; idx++) {
+            if (!decomp_param[idx].start) {
+                memcpy(decomp_param[idx].compbuf, compbuf, len);
+                decomp_param[idx].des = host;
+                decomp_param[idx].len = len;
+                start_decompression(&decomp_param[idx]);
+                break;
+            }
+        }
+        if (idx < thread_count) {
+            break;
+        }
+    }
+}
+
+static int ram_load(QEMUFile *f, void *opaque, int version_id)
+{
+    int flags = 0, ret = 0;
+    static uint64_t seq_iter;
+    int len = 0;
+
+    seq_iter++;
+
+    if (version_id != 4) {
+        ret = -EINVAL;
+    }
+
+    /* This RCU critical section can be very long running.
+     * When RCU reclaims in the code start to become numerous,
+     * it will be necessary to reduce the granularity of this
+     * critical section.
+     */
+    rcu_read_lock();
+    while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
+        ram_addr_t addr, total_ram_bytes;
+        void *host;
+        uint8_t ch;
+
+        addr = qemu_get_be64(f);
+        flags = addr & ~TARGET_PAGE_MASK;
+        addr &= TARGET_PAGE_MASK;
+
+        switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
+        case RAM_SAVE_FLAG_MEM_SIZE:
+            /* Synchronize RAM block list */
+            total_ram_bytes = addr;
+            while (!ret && total_ram_bytes) {
+                RAMBlock *block;
+                uint8_t len;
+                char id[256];
+                ram_addr_t length;
+
+                len = qemu_get_byte(f);
+                qemu_get_buffer(f, (uint8_t *)id, len);
+                id[len] = 0;
+                length = qemu_get_be64(f);
+
+                QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+                    if (!strncmp(id, block->idstr, sizeof(id))) {
+                        if (length != block->used_length) {
+                            Error *local_err = NULL;
+
+                            ret = qemu_ram_resize(block->offset, length, &local_err);
+                            if (local_err) {
+                                error_report_err(local_err);
+                            }
+                        }
+                        break;
+                    }
+                }
+
+                if (!block) {
+                    error_report("Unknown ramblock \"%s\", cannot "
+                                 "accept migration", id);
+                    ret = -EINVAL;
+                }
+
+                total_ram_bytes -= length;
+            }
+            break;
+        case RAM_SAVE_FLAG_COMPRESS:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+            ch = qemu_get_byte(f);
+            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
+            break;
+        case RAM_SAVE_FLAG_PAGE:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            break;
+        case RAM_SAVE_FLAG_COMPRESS_PAGE:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+
+            len = qemu_get_be32(f);
+            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
+                error_report("Invalid compressed data length: %d", len);
+                ret = -EINVAL;
+                break;
+            }
+            qemu_get_buffer(f, compressed_data_buf, len);
+            decompress_data_with_multi_threads(compressed_data_buf, host, len);
+            break;
+        case RAM_SAVE_FLAG_XBZRLE:
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+            if (load_xbzrle(f, addr, host) < 0) {
+                error_report("Failed to decompress XBZRLE page at "
+                             RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
+            }
+            break;
+        case RAM_SAVE_FLAG_EOS:
+            /* normal exit */
+            break;
+        default:
+            if (flags & RAM_SAVE_FLAG_HOOK) {
+                ram_control_load_hook(f, flags);
+            } else {
+                error_report("Unknown combination of migration flags: %#x",
+                             flags);
+                ret = -EINVAL;
+            }
+        }
+        if (!ret) {
+            ret = qemu_file_get_error(f);
+        }
+    }
+
+    rcu_read_unlock();
+    DPRINTF("Completed load of VM with exit code %d seq iteration "
+            "%" PRIu64 "\n", ret, seq_iter);
+    return ret;
+}
+
+static SaveVMHandlers savevm_ram_handlers = {
+    .save_live_setup = ram_save_setup,
+    .save_live_iterate = ram_save_iterate,
+    .save_live_complete = ram_save_complete,
+    .save_live_pending = ram_save_pending,
+    .load_state = ram_load,
+    .cancel = ram_migration_cancel,
+};
+
+void ram_mig_init(void)
+{
+    qemu_mutex_init(&XBZRLE.lock);
+    register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
+}
+/* Stub function that's gets run on the vcpu when its brought out of the
+   VM to run inside qemu via async_run_on_cpu()*/
+
+static void mig_sleep_cpu(void *opq)
+{
+    qemu_mutex_unlock_iothread();
+    g_usleep(30*1000);
+    qemu_mutex_lock_iothread();
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+   much time in the VM. The migration thread will try to catchup.
+   Workload will experience a performance drop.
+*/
+static void mig_throttle_guest_down(void)
+{
+    CPUState *cpu;
+
+    qemu_mutex_lock_iothread();
+    CPU_FOREACH(cpu) {
+        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+    }
+    qemu_mutex_unlock_iothread();
+}
+
+static void check_guest_throttling(void)
+{
+    static int64_t t0;
+    int64_t        t1;
+
+    if (!mig_throttle_on) {
+        return;
+    }
+
+    if (!t0)  {
+        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        return;
+    }
+
+    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+    /* If it has been more than 40 ms since the last time the guest
+     * was throttled then do it again.
+     */
+    if (40 < (t1-t0)/1000000) {
+        mig_throttle_guest_down();
+        t0 = t1;
+    }
+}
diff --git a/trace-events b/trace-events
index 2662ffa850..dc1ef1f8e6 100644
--- a/trace-events
+++ b/trace-events
@@ -1205,7 +1205,7 @@ vmstate_subsection_load_good(const char *parent) "%s"
 # qemu-file.c
 qemu_file_fclose(void) ""
 
-# arch_init.c
+# migration/ram.c
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
 migration_throttle(void) ""

From c3049a56d69f1ee7e85b5100ba5d0e3dc69a14f1 Mon Sep 17 00:00:00 2001
From: Juan Quintela <quintela@redhat.com>
Date: Fri, 8 May 2015 12:49:01 +0200
Subject: [PATCH 02/21] migration: move savevm.c inside migration/

Now, everything is in place.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 MAINTAINERS                    | 1 -
 Makefile.target                | 4 ++--
 savevm.c => migration/savevm.c | 0
 trace-events                   | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)
 rename savevm.c => migration/savevm.c (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index b1833959d6..e728d3a1d2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1014,7 +1014,6 @@ M: Amit Shah <amit.shah@redhat.com>
 S: Maintained
 F: include/migration/
 F: migration/
-F: savevm.c
 F: scripts/vmstate-static-checker.py
 F: tests/vmstate-static-checker-data/
 
diff --git a/Makefile.target b/Makefile.target
index 27209a7f0a..3e7aafd72d 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -132,10 +132,10 @@ obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
 obj-y += qtest.o bootdevice.o
 obj-y += hw/
 obj-$(CONFIG_KVM) += kvm-all.o
-obj-y += memory.o savevm.o cputlb.o
+obj-y += memory.o cputlb.o
 obj-y += memory_mapping.o
 obj-y += dump.o
-obj-y += migration/ram.o
+obj-y += migration/ram.o migration/savevm.o
 LIBS := $(libs_softmmu) $(LIBS)
 
 # xen support
diff --git a/savevm.c b/migration/savevm.c
similarity index 100%
rename from savevm.c
rename to migration/savevm.c
diff --git a/trace-events b/trace-events
index dc1ef1f8e6..b64e1252ce 100644
--- a/trace-events
+++ b/trace-events
@@ -1179,7 +1179,7 @@ virtio_gpu_cmd_res_flush(uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint3
 virtio_gpu_fence_ctrl(uint64_t fence, uint32_t type) "fence 0x%" PRIx64 ", type 0x%x"
 virtio_gpu_fence_resp(uint64_t fence) "fence 0x%" PRIx64
 
-# savevm.c
+# migration/savevm.c
 qemu_loadvm_state_section(unsigned int section_type) "%d"
 qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
 qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"

From 76cc7b587f1cd1679821e034a2d9974af9bc7d2b Mon Sep 17 00:00:00 2001
From: Juan Quintela <quintela@redhat.com>
Date: Fri, 8 May 2015 13:20:21 +0200
Subject: [PATCH 03/21] migration: Add myself to the copyright list of both
 files

If anyone feels like adding himself to the list, just sent me a patch.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 migration/ram.c    | 4 ++++
 migration/savevm.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/migration/ram.c b/migration/ram.c
index ff889ba4ba..9db72a4c52 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2,6 +2,10 @@
  * QEMU System Emulator
  *
  * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2011-2015 Red Hat Inc
+ *
+ * Authors:
+ *  Juan Quintela <quintela@redhat.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
diff --git a/migration/savevm.c b/migration/savevm.c
index 3b0e222cb3..3dfa425071 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2,6 +2,10 @@
  * QEMU System Emulator
  *
  * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009-2015 Red Hat Inc
+ *
+ * Authors:
+ *  Juan Quintela <quintela@redhat.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal

From 7205c9ec525fe375dd34c0f116c36dc4aab4c0f7 Mon Sep 17 00:00:00 2001
From: Juan Quintela <quintela@redhat.com>
Date: Fri, 8 May 2015 13:54:36 +0200
Subject: [PATCH 04/21] migration: reduce include files

To make changes easier, with the copy, I maintained almost all include
files.  Now I remove the unnecessary ones on this patch.  This compiles
on linux x64 with all architectures configured, and cross-compiles for
windows 32 and 64 bits.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 arch_init.c     | 23 -----------------------
 migration/ram.c | 18 ++----------------
 2 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 63c44d379a..725c638ece 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -22,38 +22,15 @@
  * THE SOFTWARE.
  */
 #include <stdint.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <zlib.h>
-#ifndef _WIN32
-#include <sys/types.h>
-#include <sys/mman.h>
-#endif
-#include "config.h"
-#include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
-#include "qemu/bitops.h"
-#include "qemu/bitmap.h"
 #include "sysemu/arch_init.h"
-#include "audio/audio.h"
-#include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
 #include "hw/audio/audio.h"
-#include "sysemu/kvm.h"
-#include "migration/migration.h"
 #include "hw/i386/smbios.h"
-#include "exec/address-spaces.h"
-#include "hw/audio/pcspk.h"
-#include "migration/page_cache.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qmp-commands.h"
-#include "trace.h"
-#include "exec/cpu-all.h"
-#include "exec/ram_addr.h"
 #include "hw/acpi/acpi.h"
-#include "qemu/host-utils.h"
-#include "qemu/rcu_queue.h"
 
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
diff --git a/migration/ram.c b/migration/ram.c
index 9db72a4c52..3945328609 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -26,31 +26,17 @@
  * THE SOFTWARE.
  */
 #include <stdint.h>
-#include <stdarg.h>
-#include <stdlib.h>
 #include <zlib.h>
-#ifndef _WIN32
-#include <sys/types.h>
-#include <sys/mman.h>
-#endif
-#include "config.h"
-#include "monitor/monitor.h"
-#include "sysemu/sysemu.h"
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
-#include "hw/i386/pc.h"
-#include "hw/pci/pci.h"
-#include "hw/audio/audio.h"
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
 #include "migration/migration.h"
 #include "exec/address-spaces.h"
 #include "migration/page_cache.h"
-#include "qemu/config-file.h"
 #include "qemu/error-report.h"
-#include "qmp-commands.h"
 #include "trace.h"
-#include "exec/cpu-all.h"
 #include "exec/ram_addr.h"
-#include "qemu/host-utils.h"
 #include "qemu/rcu_queue.h"
 
 #ifdef DEBUG_MIGRATION_RAM

From 5ee6926582cca64238967b2d00d870265cdb10b8 Mon Sep 17 00:00:00 2001
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
Date: Fri, 15 May 2015 17:00:03 +0800
Subject: [PATCH 05/21] arch_init: Clean up the duplicate variable 'len'
 defining in ram_load()

There are two places that define 'len' variable, It's OK for compiling,
but makes it difficult for reading.

Remove the local one which defined in the inside 'while' loop.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/ram.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/migration/ram.c b/migration/ram.c
index 3945328609..57368e1575 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1459,7 +1459,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
             total_ram_bytes = addr;
             while (!ret && total_ram_bytes) {
                 RAMBlock *block;
-                uint8_t len;
                 char id[256];
                 ram_addr_t length;
 

From 5b61d5752156dcbbe2bf1366c877a676ed9f8f51 Mon Sep 17 00:00:00 2001
From: Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com>
Date: Wed, 3 Jun 2015 04:44:10 +0530
Subject: [PATCH 06/21] rdma: Fix qemu crash when IPv6 address is used for
 migration

Qemu crashes when IPv6 address is specified for migration and access
to any RDMA uverbs device available on the system is blocked using cgroups.
Fix the crash by checking the return value of ibv_open_device routine.

Signed-off-by: Meghana Cheripady <meghana.cheripady@avagotech.com>
Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/rdma.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/migration/rdma.c b/migration/rdma.c
index 77e34441dc..36719038f7 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -790,6 +790,13 @@ static int qemu_rdma_broken_ipv6_kernel(Error **errp, struct ibv_context *verbs)
 
         for (x = 0; x < num_devices; x++) {
             verbs = ibv_open_device(dev_list[x]);
+            if (!verbs) {
+                if (errno == EPERM) {
+                    continue;
+                } else {
+                    return -EINVAL;
+                }
+            }
 
             if (ibv_query_port(verbs, 1, &port_attr)) {
                 ibv_close_device(verbs);

From e45a1ebfc65fb23be8cddb684d97eaa92725484d Mon Sep 17 00:00:00 2001
From: Juan Quintela <quintela@redhat.com>
Date: Wed, 20 May 2015 17:14:28 +0200
Subject: [PATCH 07/21] migration: Remove duplicated assignment of SETUP status

We assign the MIGRATION_STATUS_SETUP status in two places.  Just in
succession.  Just remove the second one.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 migration/migration.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 732d229708..5d77046a69 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -838,9 +838,6 @@ static void *migration_thread(void *opaque)
 
 void migrate_fd_connect(MigrationState *s)
 {
-    s->state = MIGRATION_STATUS_SETUP;
-    trace_migrate_set_state(MIGRATION_STATUS_SETUP);
-
     /* This is a best 1st approximation. ns to ms */
     s->expected_downtime = max_downtime/1000000;
     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);

From 0163a2e025cda6acb33e100d296965671ace17d9 Mon Sep 17 00:00:00 2001
From: Juan Quintela <quintela@redhat.com>
Date: Wed, 13 May 2015 13:37:04 +0200
Subject: [PATCH 08/21] migration: create savevm_state

This way, we will put savevm global state here, instead of lots of variables.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/savevm.c | 51 +++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 3dfa425071..1a45d39a4b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -239,10 +239,15 @@ typedef struct SaveStateEntry {
     int is_ram;
 } SaveStateEntry;
 
+typedef struct SaveState {
+    QTAILQ_HEAD(, SaveStateEntry) handlers;
+    int global_section_id;
+} SaveState;
 
-static QTAILQ_HEAD(savevm_handlers, SaveStateEntry) savevm_handlers =
-    QTAILQ_HEAD_INITIALIZER(savevm_handlers);
-static int global_section_id;
+static SaveState savevm_state = {
+    .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
+    .global_section_id = 0,
+};
 
 static void dump_vmstate_vmsd(FILE *out_file,
                               const VMStateDescription *vmsd, int indent,
@@ -387,7 +392,7 @@ static int calculate_new_instance_id(const char *idstr)
     SaveStateEntry *se;
     int instance_id = 0;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (strcmp(idstr, se->idstr) == 0
             && instance_id <= se->instance_id) {
             instance_id = se->instance_id + 1;
@@ -401,7 +406,7 @@ static int calculate_compat_instance_id(const char *idstr)
     SaveStateEntry *se;
     int instance_id = 0;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->compat) {
             continue;
         }
@@ -429,7 +434,7 @@ int register_savevm_live(DeviceState *dev,
 
     se = g_malloc0(sizeof(SaveStateEntry));
     se->version_id = version_id;
-    se->section_id = global_section_id++;
+    se->section_id = savevm_state.global_section_id++;
     se->ops = ops;
     se->opaque = opaque;
     se->vmsd = NULL;
@@ -461,7 +466,7 @@ int register_savevm_live(DeviceState *dev,
     }
     assert(!se->compat || se->instance_id == 0);
     /* add at the end of list */
-    QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry);
+    QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
     return 0;
 }
 
@@ -495,9 +500,9 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
     }
     pstrcat(id, sizeof(id), idstr);
 
-    QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) {
+    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
         if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
-            QTAILQ_REMOVE(&savevm_handlers, se, entry);
+            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
             if (se->compat) {
                 g_free(se->compat);
             }
@@ -519,7 +524,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
 
     se = g_malloc0(sizeof(SaveStateEntry));
     se->version_id = vmsd->version_id;
-    se->section_id = global_section_id++;
+    se->section_id = savevm_state.global_section_id++;
     se->opaque = opaque;
     se->vmsd = vmsd;
     se->alias_id = alias_id;
@@ -547,7 +552,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
     }
     assert(!se->compat || se->instance_id == 0);
     /* add at the end of list */
-    QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry);
+    QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
     return 0;
 }
 
@@ -556,9 +561,9 @@ void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
 {
     SaveStateEntry *se, *new_se;
 
-    QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) {
+    QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
         if (se->vmsd == vmsd && se->opaque == opaque) {
-            QTAILQ_REMOVE(&savevm_handlers, se, entry);
+            QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
             if (se->compat) {
                 g_free(se->compat);
             }
@@ -610,7 +615,7 @@ bool qemu_savevm_state_blocked(Error **errp)
 {
     SaveStateEntry *se;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->vmsd && se->vmsd->unmigratable) {
             error_setg(errp, "State blocked by non-migratable device '%s'",
                        se->idstr);
@@ -627,7 +632,7 @@ void qemu_savevm_state_begin(QEMUFile *f,
     int ret;
 
     trace_savevm_state_begin();
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->set_params) {
             continue;
         }
@@ -637,7 +642,7 @@ void qemu_savevm_state_begin(QEMUFile *f,
     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         int len;
 
         if (!se->ops || !se->ops->save_live_setup) {
@@ -680,7 +685,7 @@ int qemu_savevm_state_iterate(QEMUFile *f)
     int ret = 1;
 
     trace_savevm_state_iterate();
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_iterate) {
             continue;
         }
@@ -731,7 +736,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
 
     cpu_synchronize_all_states();
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_complete) {
             continue;
         }
@@ -756,7 +761,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
     vmdesc = qjson_new();
     json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
     json_start_array(vmdesc, "devices");
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         int len;
 
         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
@@ -807,7 +812,7 @@ uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size)
     SaveStateEntry *se;
     uint64_t ret = 0;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_pending) {
             continue;
         }
@@ -826,7 +831,7 @@ void qemu_savevm_state_cancel(void)
     SaveStateEntry *se;
 
     trace_savevm_state_cancel();
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->ops && se->ops->cancel) {
             se->ops->cancel(se->opaque);
         }
@@ -876,7 +881,7 @@ static int qemu_save_device_state(QEMUFile *f)
 
     cpu_synchronize_all_states();
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         int len;
 
         if (se->is_ram) {
@@ -910,7 +915,7 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
 {
     SaveStateEntry *se;
 
-    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!strcmp(se->idstr, idstr) &&
             (instance_id == se->instance_id ||
              instance_id == se->alias_id))

From 5cd8cadae8db905afcbf877cae568c27d1d55a8a Mon Sep 17 00:00:00 2001
From: Juan Quintela <quintela@redhat.com>
Date: Tue, 23 Sep 2014 14:09:54 +0200
Subject: [PATCH 09/21] migration: Use normal VMStateDescriptions for
 Subsections

We create optional sections with this patch.  But we already have
optional subsections.  Instead of having two mechanism that do the
same, we can just generalize it.

For subsections we just change:

- Add a needed function to VMStateDescription
- Remove VMStateSubsection (after removal of the needed function
  it is just a VMStateDescription)
- Adjust the whole tree, moving the needed function to the corresponding
  VMStateDescription

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 cpus.c                      | 11 ++---
 docs/migration.txt          | 11 ++---
 exec.c                      | 11 ++---
 hw/acpi/ich9.c              | 10 ++---
 hw/acpi/piix4.c             | 10 ++---
 hw/block/fdc.c              | 42 ++++++++-----------
 hw/char/serial.c            | 41 ++++++++-----------
 hw/display/qxl.c            | 11 ++---
 hw/display/vga.c            | 11 ++---
 hw/ide/core.c               | 32 ++++++---------
 hw/ide/pci.c                | 16 +++-----
 hw/input/pckbd.c            | 32 +++++++--------
 hw/input/ps2.c              | 11 ++---
 hw/intc/apic_common.c       | 10 ++---
 hw/isa/lpc_ich9.c           | 10 ++---
 hw/net/e1000.c              | 11 ++---
 hw/net/rtl8139.c            | 11 ++---
 hw/net/vmxnet3.c            | 12 ++----
 hw/pci-host/piix.c          | 10 ++---
 hw/scsi/scsi-bus.c          | 11 ++---
 hw/timer/hpet.c             | 11 ++---
 hw/timer/mc146818rtc.c      | 31 +++++++-------
 hw/usb/hcd-ohci.c           | 11 ++---
 hw/usb/redirect.c           | 42 +++++++++----------
 hw/virtio/virtio.c          | 16 +++-----
 include/migration/vmstate.h |  8 +---
 migration/savevm.c          | 10 ++---
 migration/vmstate.c         | 16 ++++----
 target-arm/machine.c        | 26 +++++-------
 target-i386/machine.c       | 81 +++++++++++++++----------------------
 target-ppc/machine.c        | 62 +++++++++++-----------------
 target-s390x/machine.c      | 30 ++++++--------
 32 files changed, 266 insertions(+), 402 deletions(-)

diff --git a/cpus.c b/cpus.c
index f38b858f9b..b85fb5f03f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -480,6 +480,7 @@ static const VMStateDescription icount_vmstate_timers = {
     .name = "timer/icount",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = icount_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT64(qemu_icount_bias, TimersState),
         VMSTATE_INT64(qemu_icount, TimersState),
@@ -497,13 +498,9 @@ static const VMStateDescription vmstate_timers = {
         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &icount_vmstate_timers,
-            .needed = icount_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &icount_vmstate_timers,
+        NULL
     }
 };
 
diff --git a/docs/migration.txt b/docs/migration.txt
index 0492a4547a..f6df4beb2a 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -257,6 +257,7 @@ const VMStateDescription vmstate_ide_drive_pio_state = {
     .minimum_version_id = 1,
     .pre_save = ide_drive_pio_pre_save,
     .post_load = ide_drive_pio_post_load,
+    .needed = ide_drive_pio_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(req_nb_sectors, IDEState),
         VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1,
@@ -279,13 +280,9 @@ const VMStateDescription vmstate_ide_drive = {
         .... several fields ....
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ide_drive_pio_state,
-            .needed = ide_drive_pio_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ide_drive_pio_state,
+        NULL
     }
 };
 
diff --git a/exec.c b/exec.c
index 487583b1bd..ba3f2cf6df 100644
--- a/exec.c
+++ b/exec.c
@@ -454,6 +454,7 @@ static const VMStateDescription vmstate_cpu_common_exception_index = {
     .name = "cpu_common/exception_index",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = cpu_common_exception_index_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(exception_index, CPUState),
         VMSTATE_END_OF_LIST()
@@ -471,13 +472,9 @@ const VMStateDescription vmstate_cpu_common = {
         VMSTATE_UINT32(interrupt_request, CPUState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_cpu_common_exception_index,
-            .needed = cpu_common_exception_index_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_cpu_common_exception_index,
+        NULL
     }
 };
 
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 25bc023882..8a64ffb38f 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -152,6 +152,7 @@ static const VMStateDescription vmstate_memhp_state = {
     .version_id = 1,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .needed = vmstate_test_use_memhp,
     .fields      = (VMStateField[]) {
         VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, ICH9LPCPMRegs),
         VMSTATE_END_OF_LIST()
@@ -175,12 +176,9 @@ const VMStateDescription vmstate_ich9_pm = {
         VMSTATE_UINT32(smi_sts, ICH9LPCPMRegs),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_memhp_state,
-            .needed = vmstate_test_use_memhp,
-        },
-        VMSTATE_END_OF_LIST()
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_memhp_state,
+        NULL
     }
 };
 
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index b730ca6ced..3bd1d5a865 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -260,6 +260,7 @@ static const VMStateDescription vmstate_memhp_state = {
     .version_id = 1,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .needed = vmstate_test_use_memhp,
     .fields      = (VMStateField[]) {
         VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, PIIX4PMState),
         VMSTATE_END_OF_LIST()
@@ -298,12 +299,9 @@ static const VMStateDescription vmstate_acpi = {
                             vmstate_test_use_acpi_pci_hotplug),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_memhp_state,
-            .needed = vmstate_test_use_memhp,
-        },
-        VMSTATE_END_OF_LIST()
+    .subsections = (const VMStateDescription*[]) {
+         &vmstate_memhp_state,
+         NULL
     }
 };
 
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6e794597dc..5e1b67ee43 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -699,6 +699,7 @@ static const VMStateDescription vmstate_fdrive_media_changed = {
     .name = "fdrive/media_changed",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdrive_media_changed_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(media_changed, FDrive),
         VMSTATE_END_OF_LIST()
@@ -716,6 +717,7 @@ static const VMStateDescription vmstate_fdrive_media_rate = {
     .name = "fdrive/media_rate",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdrive_media_rate_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(media_rate, FDrive),
         VMSTATE_END_OF_LIST()
@@ -733,6 +735,7 @@ static const VMStateDescription vmstate_fdrive_perpendicular = {
     .name = "fdrive/perpendicular",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdrive_perpendicular_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(perpendicular, FDrive),
         VMSTATE_END_OF_LIST()
@@ -756,19 +759,11 @@ static const VMStateDescription vmstate_fdrive = {
         VMSTATE_UINT8(sect, FDrive),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_fdrive_media_changed,
-            .needed = &fdrive_media_changed_needed,
-        } , {
-            .vmsd = &vmstate_fdrive_media_rate,
-            .needed = &fdrive_media_rate_needed,
-        } , {
-            .vmsd = &vmstate_fdrive_perpendicular,
-            .needed = &fdrive_perpendicular_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_fdrive_media_changed,
+        &vmstate_fdrive_media_rate,
+        &vmstate_fdrive_perpendicular,
+        NULL
     }
 };
 
@@ -833,6 +828,7 @@ static const VMStateDescription vmstate_fdc_reset_sensei = {
     .name = "fdc/reset_sensei",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdc_reset_sensei_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(reset_sensei, FDCtrl),
         VMSTATE_END_OF_LIST()
@@ -850,6 +846,7 @@ static const VMStateDescription vmstate_fdc_result_timer = {
     .name = "fdc/result_timer",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdc_result_timer_needed,
     .fields = (VMStateField[]) {
         VMSTATE_TIMER_PTR(result_timer, FDCtrl),
         VMSTATE_END_OF_LIST()
@@ -867,6 +864,7 @@ static const VMStateDescription vmstate_fdc_phase = {
     .name = "fdc/phase",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fdc_phase_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(phase, FDCtrl),
         VMSTATE_END_OF_LIST()
@@ -911,19 +909,11 @@ static const VMStateDescription vmstate_fdc = {
                              vmstate_fdrive, FDrive),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_fdc_reset_sensei,
-            .needed = fdc_reset_sensei_needed,
-        } , {
-            .vmsd = &vmstate_fdc_result_timer,
-            .needed = fdc_result_timer_needed,
-        } , {
-            .vmsd = &vmstate_fdc_phase,
-            .needed = fdc_phase_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_fdc_reset_sensei,
+        &vmstate_fdc_result_timer,
+        &vmstate_fdc_phase,
+        NULL
     }
 };
 
diff --git a/hw/char/serial.c b/hw/char/serial.c
index 55011cfd26..513d73c27f 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -662,6 +662,7 @@ static const VMStateDescription vmstate_serial_thr_ipending = {
     .name = "serial/thr_ipending",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_thr_ipending_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(thr_ipending, SerialState),
         VMSTATE_END_OF_LIST()
@@ -678,6 +679,7 @@ static const VMStateDescription vmstate_serial_tsr = {
     .name = "serial/tsr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_tsr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(tsr_retry, SerialState),
         VMSTATE_UINT8(thr, SerialState),
@@ -697,6 +699,7 @@ static const VMStateDescription vmstate_serial_recv_fifo = {
     .name = "serial/recv_fifo",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_recv_fifo_needed,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(recv_fifo, SerialState, 1, vmstate_fifo8, Fifo8),
         VMSTATE_END_OF_LIST()
@@ -713,6 +716,7 @@ static const VMStateDescription vmstate_serial_xmit_fifo = {
     .name = "serial/xmit_fifo",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_xmit_fifo_needed,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(xmit_fifo, SerialState, 1, vmstate_fifo8, Fifo8),
         VMSTATE_END_OF_LIST()
@@ -729,6 +733,7 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = {
     .name = "serial/fifo_timeout_timer",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_fifo_timeout_timer_needed,
     .fields = (VMStateField[]) {
         VMSTATE_TIMER_PTR(fifo_timeout_timer, SerialState),
         VMSTATE_END_OF_LIST()
@@ -745,6 +750,7 @@ static const VMStateDescription vmstate_serial_timeout_ipending = {
     .name = "serial/timeout_ipending",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = serial_timeout_ipending_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(timeout_ipending, SerialState),
         VMSTATE_END_OF_LIST()
@@ -760,6 +766,7 @@ static bool serial_poll_needed(void *opaque)
 static const VMStateDescription vmstate_serial_poll = {
     .name = "serial/poll",
     .version_id = 1,
+    .needed = serial_poll_needed,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(poll_msl, SerialState),
@@ -788,31 +795,15 @@ const VMStateDescription vmstate_serial = {
         VMSTATE_UINT8_V(fcr_vmstate, SerialState, 3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_serial_thr_ipending,
-            .needed = &serial_thr_ipending_needed,
-        } , {
-            .vmsd = &vmstate_serial_tsr,
-            .needed = &serial_tsr_needed,
-        } , {
-            .vmsd = &vmstate_serial_recv_fifo,
-            .needed = &serial_recv_fifo_needed,
-        } , {
-            .vmsd = &vmstate_serial_xmit_fifo,
-            .needed = &serial_xmit_fifo_needed,
-        } , {
-            .vmsd = &vmstate_serial_fifo_timeout_timer,
-            .needed = &serial_fifo_timeout_timer_needed,
-        } , {
-            .vmsd = &vmstate_serial_timeout_ipending,
-            .needed = &serial_timeout_ipending_needed,
-        } , {
-            .vmsd = &vmstate_serial_poll,
-            .needed = &serial_poll_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_serial_thr_ipending,
+        &vmstate_serial_tsr,
+        &vmstate_serial_recv_fifo,
+        &vmstate_serial_xmit_fifo,
+        &vmstate_serial_fifo_timeout_timer,
+        &vmstate_serial_timeout_ipending,
+        &vmstate_serial_poll,
+        NULL
     }
 };
 
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index b220e2d5d2..722146ec3a 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2220,6 +2220,7 @@ static VMStateDescription qxl_vmstate_monitors_config = {
     .name               = "qxl/monitors-config",
     .version_id         = 1,
     .minimum_version_id = 1,
+    .needed = qxl_monitors_config_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(guest_monitors_config, PCIQXLDevice),
         VMSTATE_END_OF_LIST()
@@ -2253,13 +2254,9 @@ static VMStateDescription qxl_vmstate = {
         VMSTATE_UINT64(guest_cursor, PCIQXLDevice),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &qxl_vmstate_monitors_config,
-            .needed = qxl_monitors_config_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &qxl_vmstate_monitors_config,
+        NULL
     }
 };
 
diff --git a/hw/display/vga.c b/hw/display/vga.c
index d1d296c74e..b35d523e65 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -2035,6 +2035,7 @@ static const VMStateDescription vmstate_vga_endian = {
     .name = "vga.endian",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = vga_endian_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_BOOL(big_endian_fb, VGACommonState),
         VMSTATE_END_OF_LIST()
@@ -2078,13 +2079,9 @@ const VMStateDescription vmstate_vga_common = {
         VMSTATE_UINT32(vbe_bank_mask, VGACommonState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_vga_endian,
-            .needed = vga_endian_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_vga_endian,
+        NULL
     }
 };
 
diff --git a/hw/ide/core.c b/hw/ide/core.c
index fcb908061c..1efd98af63 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2561,6 +2561,7 @@ static const VMStateDescription vmstate_ide_atapi_gesn_state = {
     .name ="ide_drive/atapi/gesn_state",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ide_atapi_gesn_needed,
     .fields = (VMStateField[]) {
         VMSTATE_BOOL(events.new_media, IDEState),
         VMSTATE_BOOL(events.eject_request, IDEState),
@@ -2572,6 +2573,7 @@ static const VMStateDescription vmstate_ide_tray_state = {
     .name = "ide_drive/tray_state",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ide_tray_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_BOOL(tray_open, IDEState),
         VMSTATE_BOOL(tray_locked, IDEState),
@@ -2585,6 +2587,7 @@ static const VMStateDescription vmstate_ide_drive_pio_state = {
     .minimum_version_id = 1,
     .pre_save = ide_drive_pio_pre_save,
     .post_load = ide_drive_pio_post_load,
+    .needed = ide_drive_pio_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(req_nb_sectors, IDEState),
         VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1,
@@ -2626,19 +2629,11 @@ const VMStateDescription vmstate_ide_drive = {
         VMSTATE_UINT8_V(cdrom_changed, IDEState, 3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ide_drive_pio_state,
-            .needed = ide_drive_pio_state_needed,
-        }, {
-            .vmsd = &vmstate_ide_tray_state,
-            .needed = ide_tray_state_needed,
-        }, {
-            .vmsd = &vmstate_ide_atapi_gesn_state,
-            .needed = ide_atapi_gesn_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ide_drive_pio_state,
+        &vmstate_ide_tray_state,
+        &vmstate_ide_atapi_gesn_state,
+        NULL
     }
 };
 
@@ -2646,6 +2641,7 @@ static const VMStateDescription vmstate_ide_error_status = {
     .name ="ide_bus/error",
     .version_id = 2,
     .minimum_version_id = 1,
+    .needed = ide_error_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(error_status, IDEBus),
         VMSTATE_INT64_V(retry_sector_num, IDEBus, 2),
@@ -2664,13 +2660,9 @@ const VMStateDescription vmstate_ide_bus = {
         VMSTATE_UINT8(unit, IDEBus),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ide_error_status,
-            .needed = ide_error_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ide_error_status,
+        NULL
     }
 };
 
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 4b5e32dcbe..4afd0cfe8c 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -350,6 +350,7 @@ static const VMStateDescription vmstate_bmdma_current = {
     .name = "ide bmdma_current",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ide_bmdma_current_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(cur_addr, BMDMAState),
         VMSTATE_UINT32(cur_prd_last, BMDMAState),
@@ -363,6 +364,7 @@ static const VMStateDescription vmstate_bmdma_status = {
     .name ="ide bmdma/status",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ide_bmdma_status_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(status, BMDMAState),
         VMSTATE_END_OF_LIST()
@@ -383,16 +385,10 @@ static const VMStateDescription vmstate_bmdma = {
         VMSTATE_UINT8(migration_retry_unit, BMDMAState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_bmdma_current,
-            .needed = ide_bmdma_current_needed,
-        }, {
-            .vmsd = &vmstate_bmdma_status,
-            .needed = ide_bmdma_status_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_bmdma_current,
+        &vmstate_bmdma_status,
+        NULL
     }
 };
 
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index 9b9a7d7a8a..ddac69df6f 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -391,23 +391,24 @@ static int kbd_outport_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static const VMStateDescription vmstate_kbd_outport = {
-    .name = "pckbd_outport",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .post_load = kbd_outport_post_load,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT8(outport, KBDState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static bool kbd_outport_needed(void *opaque)
 {
     KBDState *s = opaque;
     return s->outport != kbd_outport_default(s);
 }
 
+static const VMStateDescription vmstate_kbd_outport = {
+    .name = "pckbd_outport",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = kbd_outport_post_load,
+    .needed = kbd_outport_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(outport, KBDState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static int kbd_post_load(void *opaque, int version_id)
 {
     KBDState *s = opaque;
@@ -430,12 +431,9 @@ static const VMStateDescription vmstate_kbd = {
         VMSTATE_UINT8(pending, KBDState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_kbd_outport,
-            .needed = kbd_outport_needed,
-        },
-        VMSTATE_END_OF_LIST()
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_kbd_outport,
+        NULL
     }
 };
 
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index 4baeea2b56..fdbe565e62 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -677,6 +677,7 @@ static const VMStateDescription vmstate_ps2_keyboard_ledstate = {
     .version_id = 3,
     .minimum_version_id = 2,
     .post_load = ps2_kbd_ledstate_post_load,
+    .needed = ps2_keyboard_ledstate_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(ledstate, PS2KbdState),
         VMSTATE_END_OF_LIST()
@@ -717,13 +718,9 @@ static const VMStateDescription vmstate_ps2_keyboard = {
         VMSTATE_INT32_V(scancode_set, PS2KbdState,3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ps2_keyboard_ledstate,
-            .needed = ps2_keyboard_ledstate_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ps2_keyboard_ledstate,
+        NULL
     }
 };
 
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d595d63a51..0032b97c5f 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -369,6 +369,7 @@ static const VMStateDescription vmstate_apic_common_sipi = {
     .name = "apic_sipi",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = apic_common_sipi_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(sipi_vector, APICCommonState),
         VMSTATE_INT32(wait_for_sipi, APICCommonState),
@@ -408,12 +409,9 @@ static const VMStateDescription vmstate_apic_common = {
                       APICCommonState), /* open-coded timer state */
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_apic_common_sipi,
-            .needed = apic_common_sipi_needed,
-        },
-        VMSTATE_END_OF_LIST()
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_apic_common_sipi,
+        NULL
     }
 };
 
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 71a9f7a716..b3e0b1fd52 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -650,6 +650,7 @@ static const VMStateDescription vmstate_ich9_rst_cnt = {
     .name = "ICH9LPC/rst_cnt",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = ich9_rst_cnt_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(rst_cnt, ICH9LPCState),
         VMSTATE_END_OF_LIST()
@@ -669,12 +670,9 @@ static const VMStateDescription vmstate_ich9_lpc = {
         VMSTATE_UINT32(sci_level, ICH9LPCState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_ich9_rst_cnt,
-            .needed = ich9_rst_cnt_needed
-        },
-        { 0 }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ich9_rst_cnt,
+        NULL
     }
 };
 
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 091d61acc3..bab8e2abfb 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1370,6 +1370,7 @@ static const VMStateDescription vmstate_e1000_mit_state = {
     .name = "e1000/mit_state",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = e1000_mit_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(mac_reg[RDTR], E1000State),
         VMSTATE_UINT32(mac_reg[RADV], E1000State),
@@ -1457,13 +1458,9 @@ static const VMStateDescription vmstate_e1000 = {
         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_e1000_mit_state,
-            .needed = e1000_mit_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_e1000_mit_state,
+        NULL
     }
 };
 
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index f868108dfe..e0db4727ae 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -3240,6 +3240,7 @@ static const VMStateDescription vmstate_rtl8139_hotplug_ready ={
     .name = "rtl8139/hotplug_ready",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = rtl8139_hotplug_ready_needed,
     .fields = (VMStateField[]) {
         VMSTATE_END_OF_LIST()
     }
@@ -3335,13 +3336,9 @@ static const VMStateDescription vmstate_rtl8139 = {
         VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_rtl8139_hotplug_ready,
-            .needed = rtl8139_hotplug_ready_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_rtl8139_hotplug_ready,
+        NULL
     }
 };
 
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index dfb328debd..8bcdf3ed77 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2226,6 +2226,7 @@ static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
     .version_id = 1,
     .minimum_version_id = 1,
     .pre_load = vmxnet3_mcast_list_pre_load,
+    .needed = vmxnet3_mc_list_needed,
     .fields = (VMStateField[]) {
         VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
             mcast_list_buff_size),
@@ -2470,14 +2471,9 @@ static const VMStateDescription vmstate_vmxnet3 = {
 
             VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmxstate_vmxnet3_mcast_list,
-            .needed = vmxnet3_mc_list_needed
-        },
-        {
-            /* empty element. */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmxstate_vmxnet3_mcast_list,
+        NULL
     }
 };
 
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index f1712b86fe..ed2424c4cd 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -582,6 +582,7 @@ static const VMStateDescription vmstate_piix3_rcr = {
     .name = "PIIX3/rcr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = piix3_rcr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(rcr, PIIX3State),
         VMSTATE_END_OF_LIST()
@@ -600,12 +601,9 @@ static const VMStateDescription vmstate_piix3 = {
                               PIIX_NUM_PIRQS, 3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_piix3_rcr,
-            .needed = piix3_rcr_needed,
-        },
-        { 0 }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_piix3_rcr,
+        NULL
     }
 };
 
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index bd2c0e4caa..f50b2f08af 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -1968,6 +1968,7 @@ static const VMStateDescription vmstate_scsi_sense_state = {
     .name = "SCSIDevice/sense",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = scsi_sense_state_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8_SUB_ARRAY(sense, SCSIDevice,
                                 SCSI_SENSE_BUF_SIZE_OLD,
@@ -1998,13 +1999,9 @@ const VMStateDescription vmstate_scsi_device = {
         },
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_scsi_sense_state,
-            .needed = scsi_sense_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_scsi_sense_state,
+        NULL
     }
 };
 
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index b6b8a2063d..b50071ef93 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -283,6 +283,7 @@ static const VMStateDescription vmstate_hpet_rtc_irq_level = {
     .name = "hpet/rtc_irq_level",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = hpet_rtc_irq_level_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(rtc_irq_level, HPETState),
         VMSTATE_END_OF_LIST()
@@ -322,13 +323,9 @@ static const VMStateDescription vmstate_hpet = {
                                     vmstate_hpet_timer, HPETTimer),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_hpet_rtc_irq_level,
-            .needed = hpet_rtc_irq_level_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_hpet_rtc_irq_level,
+        NULL
     }
 };
 
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index f2b77fa118..32048258c9 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -733,22 +733,23 @@ static int rtc_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static const VMStateDescription vmstate_rtc_irq_reinject_on_ack_count = {
-    .name = "mc146818rtc/irq_reinject_on_ack_count",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT16(irq_reinject_on_ack_count, RTCState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static bool rtc_irq_reinject_on_ack_count_needed(void *opaque)
 {
     RTCState *s = (RTCState *)opaque;
     return s->irq_reinject_on_ack_count != 0;
 }
 
+static const VMStateDescription vmstate_rtc_irq_reinject_on_ack_count = {
+    .name = "mc146818rtc/irq_reinject_on_ack_count",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = rtc_irq_reinject_on_ack_count_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(irq_reinject_on_ack_count, RTCState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_rtc = {
     .name = "mc146818rtc",
     .version_id = 3,
@@ -770,13 +771,9 @@ static const VMStateDescription vmstate_rtc = {
         VMSTATE_UINT64_V(next_alarm_time, RTCState, 3),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_rtc_irq_reinject_on_ack_count,
-            .needed = rtc_irq_reinject_on_ack_count_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_rtc_irq_reinject_on_ack_count,
+        NULL
     }
 };
 
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 1a22c9c0cb..7d65818064 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -2034,6 +2034,7 @@ static const VMStateDescription vmstate_ohci_eof_timer = {
     .version_id = 1,
     .minimum_version_id = 1,
     .pre_load = ohci_eof_timer_pre_load,
+    .needed = ohci_eof_timer_needed,
     .fields = (VMStateField[]) {
         VMSTATE_TIMER_PTR(eof_timer, OHCIState),
         VMSTATE_END_OF_LIST()
@@ -2081,13 +2082,9 @@ static const VMStateDescription vmstate_ohci_state = {
         VMSTATE_BOOL(async_complete, OHCIState),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_ohci_eof_timer,
-            .needed = ohci_eof_timer_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_ohci_eof_timer,
+        NULL
     }
 };
 
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 242a654583..6b4218c037 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -2257,16 +2257,6 @@ static const VMStateInfo usbredir_ep_bufpq_vmstate_info = {
 
 
 /* For endp_data migration */
-static const VMStateDescription usbredir_bulk_receiving_vmstate = {
-    .name = "usb-redir-ep/bulk-receiving",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT8(bulk_receiving_started, struct endp_data),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static bool usbredir_bulk_receiving_needed(void *priv)
 {
     struct endp_data *endp = priv;
@@ -2274,12 +2264,13 @@ static bool usbredir_bulk_receiving_needed(void *priv)
     return endp->bulk_receiving_started;
 }
 
-static const VMStateDescription usbredir_stream_vmstate = {
-    .name = "usb-redir-ep/stream-state",
+static const VMStateDescription usbredir_bulk_receiving_vmstate = {
+    .name = "usb-redir-ep/bulk-receiving",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = usbredir_bulk_receiving_needed,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32(max_streams, struct endp_data),
+        VMSTATE_UINT8(bulk_receiving_started, struct endp_data),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -2291,6 +2282,17 @@ static bool usbredir_stream_needed(void *priv)
     return endp->max_streams;
 }
 
+static const VMStateDescription usbredir_stream_vmstate = {
+    .name = "usb-redir-ep/stream-state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = usbredir_stream_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(max_streams, struct endp_data),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription usbredir_ep_vmstate = {
     .name = "usb-redir-ep",
     .version_id = 1,
@@ -2318,16 +2320,10 @@ static const VMStateDescription usbredir_ep_vmstate = {
         VMSTATE_INT32(bufpq_target_size, struct endp_data),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &usbredir_bulk_receiving_vmstate,
-            .needed = usbredir_bulk_receiving_needed,
-        }, {
-            .vmsd = &usbredir_stream_vmstate,
-            .needed = usbredir_stream_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &usbredir_bulk_receiving_vmstate,
+        &usbredir_stream_vmstate,
+        NULL
     }
 };
 
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fb49ffcb2d..ee4e07c5e7 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1053,6 +1053,7 @@ static const VMStateDescription vmstate_virtio_device_endian = {
     .name = "virtio/device_endian",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = &virtio_device_endian_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(device_endian, VirtIODevice),
         VMSTATE_END_OF_LIST()
@@ -1063,6 +1064,7 @@ static const VMStateDescription vmstate_virtio_64bit_features = {
     .name = "virtio/64bit_features",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = &virtio_64bit_features_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(guest_features, VirtIODevice),
         VMSTATE_END_OF_LIST()
@@ -1077,16 +1079,10 @@ static const VMStateDescription vmstate_virtio = {
     .fields = (VMStateField[]) {
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_virtio_device_endian,
-            .needed = &virtio_device_endian_needed
-        },
-        {
-            .vmsd = &vmstate_virtio_64bit_features,
-            .needed = &virtio_64bit_features_needed
-        },
-        { 0 }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_virtio_device_endian,
+        &vmstate_virtio_64bit_features,
+        NULL
     }
 };
 
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index bc7616aaa8..fc5e6439e4 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -120,11 +120,6 @@ typedef struct {
     bool (*field_exists)(void *opaque, int version_id);
 } VMStateField;
 
-typedef struct VMStateSubsection {
-    const VMStateDescription *vmsd;
-    bool (*needed)(void *opaque);
-} VMStateSubsection;
-
 struct VMStateDescription {
     const char *name;
     int unmigratable;
@@ -135,8 +130,9 @@ struct VMStateDescription {
     int (*pre_load)(void *opaque);
     int (*post_load)(void *opaque, int version_id);
     void (*pre_save)(void *opaque);
+    bool (*needed)(void *opaque);
     VMStateField *fields;
-    const VMStateSubsection *subsections;
+    const VMStateDescription **subsections;
 };
 
 extern const VMStateDescription vmstate_dummy;
diff --git a/migration/savevm.c b/migration/savevm.c
index 1a45d39a4b..002f9b8948 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -272,11 +272,11 @@ static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
 }
 
 static void dump_vmstate_vmss(FILE *out_file,
-                              const VMStateSubsection *subsection,
+                              const VMStateDescription **subsection,
                               int indent)
 {
-    if (subsection->vmsd != NULL) {
-        dump_vmstate_vmsd(out_file, subsection->vmsd, indent, true);
+    if (*subsection != NULL) {
+        dump_vmstate_vmsd(out_file, *subsection, indent, true);
     }
 }
 
@@ -317,12 +317,12 @@ static void dump_vmstate_vmsd(FILE *out_file,
         fprintf(out_file, "\n%*s]", indent, "");
     }
     if (vmsd->subsections != NULL) {
-        const VMStateSubsection *subsection = vmsd->subsections;
+        const VMStateDescription **subsection = vmsd->subsections;
         bool first;
 
         fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
         first = true;
-        while (subsection->vmsd != NULL) {
+        while (*subsection != NULL) {
             if (!first) {
                 fprintf(out_file, ",\n");
             }
diff --git a/migration/vmstate.c b/migration/vmstate.c
index e5388f0596..108995ec78 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -341,11 +341,11 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
 }
 
 static const VMStateDescription *
-    vmstate_get_subsection(const VMStateSubsection *sub, char *idstr)
+vmstate_get_subsection(const VMStateDescription **sub, char *idstr)
 {
-    while (sub && sub->needed) {
-        if (strcmp(idstr, sub->vmsd->name) == 0) {
-            return sub->vmsd;
+    while (sub && *sub && (*sub)->needed) {
+        if (strcmp(idstr, (*sub)->name) == 0) {
+            return *sub;
         }
         sub++;
     }
@@ -405,12 +405,12 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
 static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
                                     void *opaque, QJSON *vmdesc)
 {
-    const VMStateSubsection *sub = vmsd->subsections;
+    const VMStateDescription **sub = vmsd->subsections;
     bool subsection_found = false;
 
-    while (sub && sub->needed) {
-        if (sub->needed(opaque)) {
-            const VMStateDescription *vmsd = sub->vmsd;
+    while (sub && *sub && (*sub)->needed) {
+        if ((*sub)->needed(opaque)) {
+            const VMStateDescription *vmsd = *sub;
             uint8_t len;
 
             if (vmdesc) {
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 9446e5a8ab..36365a57c7 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -40,6 +40,7 @@ static const VMStateDescription vmstate_vfp = {
     .name = "cpu/vfp",
     .version_id = 3,
     .minimum_version_id = 3,
+    .needed = vfp_needed,
     .fields = (VMStateField[]) {
         VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 64),
         /* The xregs array is a little awkward because element 1 (FPSCR)
@@ -72,6 +73,7 @@ static const VMStateDescription vmstate_iwmmxt = {
     .name = "cpu/iwmmxt",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = iwmmxt_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16),
         VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16),
@@ -91,6 +93,7 @@ static const VMStateDescription vmstate_m = {
     .name = "cpu/m",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = m_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(env.v7m.other_sp, ARMCPU),
         VMSTATE_UINT32(env.v7m.vecbase, ARMCPU),
@@ -114,6 +117,7 @@ static const VMStateDescription vmstate_thumb2ee = {
     .name = "cpu/thumb2ee",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = thumb2ee_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(env.teecr, ARMCPU),
         VMSTATE_UINT32(env.teehbr, ARMCPU),
@@ -282,21 +286,11 @@ const VMStateDescription vmstate_arm_cpu = {
         VMSTATE_BOOL(powered_off, ARMCPU),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_vfp,
-            .needed = vfp_needed,
-        } , {
-            .vmsd = &vmstate_iwmmxt,
-            .needed = iwmmxt_needed,
-        } , {
-            .vmsd = &vmstate_m,
-            .needed = m_needed,
-        } , {
-            .vmsd = &vmstate_thumb2ee,
-            .needed = thumb2ee_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_vfp,
+        &vmstate_iwmmxt,
+        &vmstate_m,
+        &vmstate_thumb2ee,
+        NULL
     }
 };
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 69d86cb476..a0df64b577 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -403,6 +403,7 @@ static const VMStateDescription vmstate_steal_time_msr = {
     .name = "cpu/steal_time_msr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = steal_time_msr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.steal_time_msr, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -413,6 +414,7 @@ static const VMStateDescription vmstate_async_pf_msr = {
     .name = "cpu/async_pf_msr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = async_pf_msr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.async_pf_en_msr, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -423,6 +425,7 @@ static const VMStateDescription vmstate_pv_eoi_msr = {
     .name = "cpu/async_pv_eoi_msr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = pv_eoi_msr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.pv_eoi_en_msr, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -441,6 +444,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .name = "cpu/fpop_ip_dp",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fpop_ip_dp_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT16(env.fpop, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
@@ -461,6 +465,7 @@ static const VMStateDescription vmstate_msr_tsc_adjust = {
     .name = "cpu/msr_tsc_adjust",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tsc_adjust_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.tsc_adjust, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -479,6 +484,7 @@ static const VMStateDescription vmstate_msr_tscdeadline = {
     .name = "cpu/msr_tscdeadline",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tscdeadline_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.tsc_deadline, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -505,6 +511,7 @@ static const VMStateDescription vmstate_msr_ia32_misc_enable = {
     .name = "cpu/msr_ia32_misc_enable",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = misc_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_ia32_misc_enable, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -515,6 +522,7 @@ static const VMStateDescription vmstate_msr_ia32_feature_control = {
     .name = "cpu/msr_ia32_feature_control",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = feature_control_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_ia32_feature_control, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -549,6 +557,7 @@ static const VMStateDescription vmstate_msr_architectural_pmu = {
     .name = "cpu/msr_architectural_pmu",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = pmu_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU),
         VMSTATE_UINT64(env.msr_global_ctrl, X86CPU),
@@ -584,6 +593,7 @@ static const VMStateDescription vmstate_mpx = {
     .name = "cpu/mpx",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = mpx_needed,
     .fields = (VMStateField[]) {
         VMSTATE_BND_REGS(env.bnd_regs, X86CPU, 4),
         VMSTATE_UINT64(env.bndcs_regs.cfgu, X86CPU),
@@ -605,6 +615,7 @@ static const VMStateDescription vmstate_msr_hypercall_hypercall = {
     .name = "cpu/msr_hyperv_hypercall",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = hyperv_hypercall_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_hv_guest_os_id, X86CPU),
         VMSTATE_UINT64(env.msr_hv_hypercall, X86CPU),
@@ -624,6 +635,7 @@ static const VMStateDescription vmstate_msr_hyperv_vapic = {
     .name = "cpu/msr_hyperv_vapic",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = hyperv_vapic_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_hv_vapic, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -642,6 +654,7 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
     .name = "cpu/msr_hyperv_time",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = hyperv_time_enable_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_hv_tsc, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -683,6 +696,7 @@ static const VMStateDescription vmstate_avx512 = {
     .name = "cpu/avx512",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = avx512_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
         VMSTATE_ZMMH_REGS_VARS(env.xmm_regs, X86CPU, 0),
@@ -705,6 +719,7 @@ static const VMStateDescription vmstate_xss = {
     .name = "cpu/xss",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = xss_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.xss, X86CPU),
         VMSTATE_END_OF_LIST()
@@ -813,54 +828,22 @@ VMStateDescription vmstate_x86_cpu = {
         VMSTATE_END_OF_LIST()
         /* The above list is not sorted /wrt version numbers, watch out! */
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_async_pf_msr,
-            .needed = async_pf_msr_needed,
-        } , {
-            .vmsd = &vmstate_pv_eoi_msr,
-            .needed = pv_eoi_msr_needed,
-        } , {
-            .vmsd = &vmstate_steal_time_msr,
-            .needed = steal_time_msr_needed,
-        } , {
-            .vmsd = &vmstate_fpop_ip_dp,
-            .needed = fpop_ip_dp_needed,
-        }, {
-            .vmsd = &vmstate_msr_tsc_adjust,
-            .needed = tsc_adjust_needed,
-        }, {
-            .vmsd = &vmstate_msr_tscdeadline,
-            .needed = tscdeadline_needed,
-        }, {
-            .vmsd = &vmstate_msr_ia32_misc_enable,
-            .needed = misc_enable_needed,
-        }, {
-            .vmsd = &vmstate_msr_ia32_feature_control,
-            .needed = feature_control_needed,
-        }, {
-            .vmsd = &vmstate_msr_architectural_pmu,
-            .needed = pmu_enable_needed,
-        } , {
-            .vmsd = &vmstate_mpx,
-            .needed = mpx_needed,
-        }, {
-            .vmsd = &vmstate_msr_hypercall_hypercall,
-            .needed = hyperv_hypercall_enable_needed,
-        }, {
-            .vmsd = &vmstate_msr_hyperv_vapic,
-            .needed = hyperv_vapic_enable_needed,
-        }, {
-            .vmsd = &vmstate_msr_hyperv_time,
-            .needed = hyperv_time_enable_needed,
-        }, {
-            .vmsd = &vmstate_avx512,
-            .needed = avx512_needed,
-         }, {
-            .vmsd = &vmstate_xss,
-            .needed = xss_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_async_pf_msr,
+        &vmstate_pv_eoi_msr,
+        &vmstate_steal_time_msr,
+        &vmstate_fpop_ip_dp,
+        &vmstate_msr_tsc_adjust,
+        &vmstate_msr_tscdeadline,
+        &vmstate_msr_ia32_misc_enable,
+        &vmstate_msr_ia32_feature_control,
+        &vmstate_msr_architectural_pmu,
+        &vmstate_mpx,
+        &vmstate_msr_hypercall_hypercall,
+        &vmstate_msr_hyperv_vapic,
+        &vmstate_msr_hyperv_time,
+        &vmstate_avx512,
+        &vmstate_xss,
+        NULL
     }
 };
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index d875211a2d..f4ac7611dd 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -213,6 +213,7 @@ static const VMStateDescription vmstate_fpu = {
     .name = "cpu/fpu",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fpu_needed,
     .fields = (VMStateField[]) {
         VMSTATE_FLOAT64_ARRAY(env.fpr, PowerPCCPU, 32),
         VMSTATE_UINTTL(env.fpscr, PowerPCCPU),
@@ -231,6 +232,7 @@ static const VMStateDescription vmstate_altivec = {
     .name = "cpu/altivec",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = altivec_needed,
     .fields = (VMStateField[]) {
         VMSTATE_AVR_ARRAY(env.avr, PowerPCCPU, 32),
         VMSTATE_UINT32(env.vscr, PowerPCCPU),
@@ -249,6 +251,7 @@ static const VMStateDescription vmstate_vsx = {
     .name = "cpu/vsx",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = vsx_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_ARRAY(env.vsr, PowerPCCPU, 32),
         VMSTATE_END_OF_LIST()
@@ -269,6 +272,7 @@ static const VMStateDescription vmstate_tm = {
     .version_id = 1,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
+    .needed = tm_needed,
     .fields      = (VMStateField []) {
         VMSTATE_UINTTL_ARRAY(env.tm_gpr, PowerPCCPU, 32),
         VMSTATE_AVR_ARRAY(env.tm_vsr, PowerPCCPU, 64),
@@ -302,6 +306,7 @@ static const VMStateDescription vmstate_sr = {
     .name = "cpu/sr",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = sr_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINTTL_ARRAY(env.sr, PowerPCCPU, 32),
         VMSTATE_END_OF_LIST()
@@ -351,6 +356,7 @@ static const VMStateDescription vmstate_slb = {
     .name = "cpu/slb",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = slb_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32_EQUAL(env.slb_nr, PowerPCCPU),
         VMSTATE_SLB_ARRAY(env.slb, PowerPCCPU, MAX_SLB_ENTRIES),
@@ -383,6 +389,7 @@ static const VMStateDescription vmstate_tlb6xx = {
     .name = "cpu/tlb6xx",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tlb6xx_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
         VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlb6, PowerPCCPU,
@@ -429,6 +436,7 @@ static const VMStateDescription vmstate_pbr403 = {
     .name = "cpu/pbr403",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = pbr403_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINTTL_ARRAY(env.pb, PowerPCCPU, 4),
         VMSTATE_END_OF_LIST()
@@ -439,6 +447,7 @@ static const VMStateDescription vmstate_tlbemb = {
     .name = "cpu/tlb6xx",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tlbemb_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
         VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbe, PowerPCCPU,
@@ -448,13 +457,9 @@ static const VMStateDescription vmstate_tlbemb = {
         /* 403 protection registers */
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_pbr403,
-            .needed = pbr403_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_pbr403,
+        NULL
     }
 };
 
@@ -483,6 +488,7 @@ static const VMStateDescription vmstate_tlbmas = {
     .name = "cpu/tlbmas",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = tlbmas_needed,
     .fields = (VMStateField[]) {
         VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
         VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbm, PowerPCCPU,
@@ -533,38 +539,18 @@ const VMStateDescription vmstate_ppc_cpu = {
         VMSTATE_UINT32_EQUAL(env.nb_BATs, PowerPCCPU),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (VMStateSubsection []) {
-        {
-            .vmsd = &vmstate_fpu,
-            .needed = fpu_needed,
-        } , {
-            .vmsd = &vmstate_altivec,
-            .needed = altivec_needed,
-        } , {
-            .vmsd = &vmstate_vsx,
-            .needed = vsx_needed,
-        } , {
-            .vmsd = &vmstate_sr,
-            .needed = sr_needed,
-        } , {
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_fpu,
+        &vmstate_altivec,
+        &vmstate_vsx,
+        &vmstate_sr,
 #ifdef TARGET_PPC64
-            .vmsd = &vmstate_tm,
-            .needed = tm_needed,
-        } , {
-            .vmsd = &vmstate_slb,
-            .needed = slb_needed,
-        } , {
+        &vmstate_tm,
+        &vmstate_slb,
 #endif /* TARGET_PPC64 */
-            .vmsd = &vmstate_tlb6xx,
-            .needed = tlb6xx_needed,
-        } , {
-            .vmsd = &vmstate_tlbemb,
-            .needed = tlbemb_needed,
-        } , {
-            .vmsd = &vmstate_tlbmas,
-            .needed = tlbmas_needed,
-        } , {
-            /* empty */
-        }
+        &vmstate_tlb6xx,
+        &vmstate_tlbemb,
+        &vmstate_tlbmas,
+        NULL
     }
 };
diff --git a/target-s390x/machine.c b/target-s390x/machine.c
index 004474959a..b76fb08319 100644
--- a/target-s390x/machine.c
+++ b/target-s390x/machine.c
@@ -42,10 +42,17 @@ static void cpu_pre_save(void *opaque)
     }
 }
 
+static inline bool fpu_needed(void *opaque)
+{
+    /* This looks odd, but we might want to NOT transfer fprs in the future */
+    return true;
+}
+
 const VMStateDescription vmstate_fpu = {
     .name = "cpu/fpu",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = fpu_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU),
         VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU),
@@ -68,16 +75,11 @@ const VMStateDescription vmstate_fpu = {
     }
 };
 
-static inline bool fpu_needed(void *opaque)
-{
-    /* This looks odd, but we might want to NOT transfer fprs in the future */
-    return true;
-}
-
 const VMStateDescription vmstate_vregs = {
     .name = "cpu/vregs",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = vregs_needed,
     .fields = (VMStateField[]) {
         /* vregs[0][0] -> vregs[15][0] and fregs are overlays */
         VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU),
@@ -159,16 +161,10 @@ const VMStateDescription vmstate_s390_cpu = {
         VMSTATE_VBUFFER_UINT32(irqstate, S390CPU, 4, NULL, 0,
                                irqstate_saved_size),
         VMSTATE_END_OF_LIST()
-     },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_fpu,
-            .needed = fpu_needed,
-        } , {
-            .vmsd = &vmstate_vregs,
-            .needed = vregs_needed,
-        } , {
-            /* empty */
-        }
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_fpu,
+        &vmstate_vregs,
+        NULL
     },
 };

From b3af1bc9d21e6bec7dfd283d91b465c9f815b6d6 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 21 May 2015 13:24:11 +0100
Subject: [PATCH 10/21] Add qemu_get_counted_string to read a string prefixed
 by a count byte

and use it in loadvm_state and ram_load.

Where ever it's used, check the return and error if it failed.

Minor: ram_load was using a 257 byte array for its string, the
       maximum length is 255 bytes + 0 terminator, so fix to 256

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/qemu-file.h |  3 +++
 migration/qemu-file.c         | 17 +++++++++++++++++
 migration/savevm.c            | 11 ++++++-----
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index a01c5b817e..318aa1ed07 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -312,4 +312,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
 {
     qemu_get_be64s(f, (uint64_t *)pv);
 }
+
+size_t qemu_get_counted_string(QEMUFile *f, char buf[256]);
+
 #endif
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 2750365a7e..0ef543a6df 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -585,3 +585,20 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src)
     }
     return len;
 }
+
+/*
+ * Get a string whose length is determined by a single preceding byte
+ * A preallocated 256 byte buffer must be passed in.
+ * Returns: len on success and a 0 terminated string in the buffer
+ *          else 0
+ *          (Note a 0 length string will return 0 either way)
+ */
+size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
+{
+    size_t len = qemu_get_byte(f);
+    size_t res = qemu_get_buffer(f, (uint8_t *)buf, len);
+
+    buf[res] = 0;
+
+    return res == len ? res : 0;
+}
diff --git a/migration/savevm.c b/migration/savevm.c
index 002f9b8948..2b0aa65cd7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -973,8 +973,7 @@ int qemu_loadvm_state(QEMUFile *f)
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
         uint32_t instance_id, version_id, section_id;
         SaveStateEntry *se;
-        char idstr[257];
-        int len;
+        char idstr[256];
 
         trace_qemu_loadvm_state_section(section_type);
         switch (section_type) {
@@ -982,9 +981,11 @@ int qemu_loadvm_state(QEMUFile *f)
         case QEMU_VM_SECTION_FULL:
             /* Read section start */
             section_id = qemu_get_be32(f);
-            len = qemu_get_byte(f);
-            qemu_get_buffer(f, (uint8_t *)idstr, len);
-            idstr[len] = 0;
+            if (!qemu_get_counted_string(f, idstr)) {
+                error_report("Unable to read ID string for section %u",
+                            section_id);
+                return -EINVAL;
+            }
             instance_id = qemu_get_be32(f);
             version_id = qemu_get_be32(f);
 

From f796baa1b3efcf105ba3a465f797e05ac2b3dcfc Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 21 May 2015 13:24:12 +0100
Subject: [PATCH 11/21] Split header writing out of qemu_savevm_state_begin

Split qemu_savevm_state_begin to:
  qemu_savevm_state_header   That writes the initial file header.
  qemu_savevm_state_begin    That sets up devices and does the first
                             device pass.

Used later in postcopy.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/sysemu/sysemu.h |  1 +
 migration/migration.c   |  1 +
 migration/savevm.c      | 11 ++++++++---
 trace-events            |  1 +
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 853d90a317..ef793f702e 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -84,6 +84,7 @@ void qemu_announce_self(void);
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_state_begin(QEMUFile *f,
                              const MigrationParams *params);
+void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f);
 void qemu_savevm_state_complete(QEMUFile *f);
 void qemu_savevm_state_cancel(void);
diff --git a/migration/migration.c b/migration/migration.c
index 5d77046a69..438bf91b54 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -738,6 +738,7 @@ static void *migration_thread(void *opaque)
     int64_t start_time = initial_time;
     bool old_vm_running = false;
 
+    qemu_savevm_state_header(s->file);
     qemu_savevm_state_begin(s->file, &s->params);
 
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
diff --git a/migration/savevm.c b/migration/savevm.c
index 2b0aa65cd7..903dbebf9f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -625,6 +625,13 @@ bool qemu_savevm_state_blocked(Error **errp)
     return false;
 }
 
+void qemu_savevm_state_header(QEMUFile *f)
+{
+    trace_savevm_state_header();
+    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
+    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+}
+
 void qemu_savevm_state_begin(QEMUFile *f,
                              const MigrationParams *params)
 {
@@ -639,9 +646,6 @@ void qemu_savevm_state_begin(QEMUFile *f,
         se->ops->set_params(params, se->opaque);
     }
 
-    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
-    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
-
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         int len;
 
@@ -851,6 +855,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
     }
 
     qemu_mutex_unlock_iothread();
+    qemu_savevm_state_header(f);
     qemu_savevm_state_begin(f, &params);
     qemu_mutex_lock_iothread();
 
diff --git a/trace-events b/trace-events
index b64e1252ce..1abca7a1e5 100644
--- a/trace-events
+++ b/trace-events
@@ -1186,6 +1186,7 @@ qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint
 savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
 savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
 savevm_state_begin(void) ""
+savevm_state_header(void) ""
 savevm_state_iterate(void) ""
 savevm_state_complete(void) ""
 savevm_state_cancel(void) ""

From e3807054e20fb3b94d18cb751c437ee2f43b6fac Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 21 May 2015 13:24:13 +0100
Subject: [PATCH 12/21] qemu_ram_foreach_block: pass up error value, and down
 the ramblock name

check the return value of the function it calls and error if it's non-0
Fixup qemu_rdma_init_one_block that is the only current caller,
  and rdma_add_block the only function it calls using it.

Pass the name of the ramblock to the function; helps in debugging.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Michael R. Hines <mrhines@us.ibm.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 exec.c                    | 10 ++++++++--
 include/exec/cpu-common.h |  4 ++--
 migration/rdma.c          |  4 ++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/exec.c b/exec.c
index ba3f2cf6df..76bfc4ac4a 100644
--- a/exec.c
+++ b/exec.c
@@ -3345,14 +3345,20 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr)
     return res;
 }
 
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
 {
     RAMBlock *block;
+    int ret = 0;
 
     rcu_read_lock();
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        func(block->host, block->offset, block->used_length, opaque);
+        ret = func(block->idstr, block->host, block->offset,
+                   block->used_length, opaque);
+        if (ret) {
+            break;
+        }
     }
     rcu_read_unlock();
+    return ret;
 }
 #endif
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 43428bd030..de8a7200a9 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -126,10 +126,10 @@ void cpu_flush_icache_range(hwaddr start, int len);
 extern struct MemoryRegion io_mem_rom;
 extern struct MemoryRegion io_mem_notdirty;
 
-typedef void (RAMBlockIterFunc)(void *host_addr,
+typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
     ram_addr_t offset, ram_addr_t length, void *opaque);
 
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
 
 #endif
 
diff --git a/migration/rdma.c b/migration/rdma.c
index 36719038f7..791ef44713 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -570,10 +570,10 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr,
  * in advanced before the migration starts. This tells us where the RAM blocks
  * are so that we can register them individually.
  */
-static void qemu_rdma_init_one_block(void *host_addr,
+static int qemu_rdma_init_one_block(const char *block_name, void *host_addr,
     ram_addr_t block_offset, ram_addr_t length, void *opaque)
 {
-    rdma_add_block(opaque, host_addr, block_offset, length);
+    return rdma_add_block(opaque, host_addr, block_offset, length);
 }
 
 /*

From bca7856ae8220d9f15ff0f44b97397529e26a552 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 21 May 2015 13:24:14 +0100
Subject: [PATCH 13/21] Create MigrationIncomingState

There are currently lots of pieces of incoming migration state scattered
around, and postcopy is adding more, and it seems better to try and keep
it together.

allocate MIS in process_incoming_migration_co

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/migration.h |  9 +++++++++
 include/qemu/typedefs.h       |  1 +
 migration/migration.c         | 28 ++++++++++++++++++++++++++++
 migration/savevm.c            |  2 ++
 4 files changed, 40 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index b78a3b98b1..1323e3dd4a 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -42,6 +42,15 @@ struct MigrationParams {
 
 typedef struct MigrationState MigrationState;
 
+/* State for the incoming migration */
+struct MigrationIncomingState {
+    QEMUFile *file;
+};
+
+MigrationIncomingState *migration_incoming_get_current(void);
+MigrationIncomingState *migration_incoming_state_new(QEMUFile *f);
+void migration_incoming_state_destroy(void);
+
 struct MigrationState
 {
     int64_t bandwidth_limit;
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index cde3314896..74dfad3821 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -38,6 +38,7 @@ typedef struct MemoryListener MemoryListener;
 typedef struct MemoryMappingList MemoryMappingList;
 typedef struct MemoryRegion MemoryRegion;
 typedef struct MemoryRegionSection MemoryRegionSection;
+typedef struct MigrationIncomingState MigrationIncomingState;
 typedef struct MigrationParams MigrationParams;
 typedef struct Monitor Monitor;
 typedef struct MouseTransformInfo MouseTransformInfo;
diff --git a/migration/migration.c b/migration/migration.c
index 438bf91b54..66c0b57b26 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -53,6 +53,7 @@ static bool deferred_incoming;
    migrations at once.  For now we don't need to add
    dynamic creation of migration */
 
+/* For outgoing */
 MigrationState *migrate_get_current(void)
 {
     static MigrationState current_migration = {
@@ -71,6 +72,28 @@ MigrationState *migrate_get_current(void)
     return &current_migration;
 }
 
+/* For incoming */
+static MigrationIncomingState *mis_current;
+
+MigrationIncomingState *migration_incoming_get_current(void)
+{
+    return mis_current;
+}
+
+MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
+{
+    mis_current = g_malloc0(sizeof(MigrationIncomingState));
+    mis_current->file = f;
+
+    return mis_current;
+}
+
+void migration_incoming_state_destroy(void)
+{
+    g_free(mis_current);
+    mis_current = NULL;
+}
+
 /*
  * Called on -incoming with a defer: uri.
  * The migration can be started later after any parameters have been
@@ -115,9 +138,14 @@ static void process_incoming_migration_co(void *opaque)
     Error *local_err = NULL;
     int ret;
 
+    migration_incoming_state_new(f);
+
     ret = qemu_loadvm_state(f);
+
     qemu_fclose(f);
     free_xbzrle_decoded_buf();
+    migration_incoming_state_destroy();
+
     if (ret < 0) {
         error_report("load of migration failed: %s", strerror(-ret));
         migrate_decompress_threads_join();
diff --git a/migration/savevm.c b/migration/savevm.c
index 903dbebf9f..d0991e810f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1329,9 +1329,11 @@ int load_vmstate(const char *name)
     }
 
     qemu_system_reset(VMRESET_SILENT);
+    migration_incoming_state_new(f);
     ret = qemu_loadvm_state(f);
 
     qemu_fclose(f);
+    migration_incoming_state_destroy();
     if (ret < 0) {
         error_report("Error %d while loading VM state", ret);
         return ret;

From 7c1e52ba6f3994dc127118f491258ce84d0beb52 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 21 May 2015 13:24:15 +0100
Subject: [PATCH 14/21] Move copy out of qemu_peek_buffer

qemu_peek_buffer currently copies the data it reads into a buffer,
however a future patch wants access to the buffer without the copy,
hence rework to remove the copy to the layer above.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/qemu-file.h |  2 +-
 migration/qemu-file.c         | 12 +++++++-----
 migration/vmstate.c           |  5 +++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 318aa1ed07..4f67d79227 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -157,7 +157,7 @@ static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
 void qemu_put_be16(QEMUFile *f, unsigned int v);
 void qemu_put_be32(QEMUFile *f, unsigned int v);
 void qemu_put_be64(QEMUFile *f, uint64_t v);
-int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset);
+int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset);
 int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
 ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
                                   int level);
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 0ef543a6df..965a757772 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -349,14 +349,14 @@ void qemu_file_skip(QEMUFile *f, int size)
 }
 
 /*
- * Read 'size' bytes from file (at 'offset') into buf without moving the
- * pointer.
+ * Read 'size' bytes from file (at 'offset') without moving the
+ * pointer and set 'buf' to point to that data.
  *
  * It will return size bytes unless there was an error, in which case it will
  * return as many as it managed to read (assuming blocking fd's which
  * all current QEMUFile are)
  */
-int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
+int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset)
 {
     int pending;
     int index;
@@ -392,7 +392,7 @@ int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
         size = pending;
     }
 
-    memcpy(buf, f->buf + index, size);
+    *buf = f->buf + index;
     return size;
 }
 
@@ -411,11 +411,13 @@ int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
 
     while (pending > 0) {
         int res;
+        uint8_t *src;
 
-        res = qemu_peek_buffer(f, buf, MIN(pending, IO_BUF_SIZE), 0);
+        res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
         if (res == 0) {
             return done;
         }
+        memcpy(buf, src, res);
         qemu_file_skip(f, res);
         buf += res;
         pending -= res;
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 108995ec78..6138d1acb7 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -358,7 +358,7 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
     trace_vmstate_subsection_load(vmsd->name);
 
     while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) {
-        char idstr[256];
+        char idstr[256], *idstr_ret;
         int ret;
         uint8_t version_id, len, size;
         const VMStateDescription *sub_vmsd;
@@ -369,11 +369,12 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
             trace_vmstate_subsection_load_bad(vmsd->name, "(short)");
             return 0;
         }
-        size = qemu_peek_buffer(f, (uint8_t *)idstr, len, 2);
+        size = qemu_peek_buffer(f, (uint8_t **)&idstr_ret, len, 2);
         if (size != len) {
             trace_vmstate_subsection_load_bad(vmsd->name, "(peek fail)");
             return 0;
         }
+        memcpy(idstr, idstr_ret, size);
         idstr[size] = 0;
 
         if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) {

From 1a8f46f8d61ef885ff9d0bda251e4e9830c932ef Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 21 May 2015 13:24:16 +0100
Subject: [PATCH 15/21] Move loadvm_handlers into MigrationIncomingState

In postcopy we need the loadvm_handlers to be used in a couple
of different instances of the loadvm loop/routine, and thus
it can't be local any more.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/migration.h |  5 +++++
 include/migration/vmstate.h   |  2 ++
 include/qemu/typedefs.h       |  1 +
 migration/migration.c         |  2 ++
 migration/savevm.c            | 28 ++++++++++++++++------------
 5 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1323e3dd4a..720a949f0b 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -42,9 +42,14 @@ struct MigrationParams {
 
 typedef struct MigrationState MigrationState;
 
+typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
     QEMUFile *file;
+
+    /* See savevm.c */
+    LoadStateEntry_Head loadvm_handlers;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index fc5e6439e4..7153b1e145 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -808,6 +808,8 @@ extern const VMStateInfo vmstate_info_bitmap;
 
 #define SELF_ANNOUNCE_ROUNDS 5
 
+void loadvm_free_handlers(MigrationIncomingState *mis);
+
 int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
                        void *opaque, int version_id);
 void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 74dfad3821..6fdcbcd524 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -31,6 +31,7 @@ typedef struct I2CBus I2CBus;
 typedef struct I2SCodec I2SCodec;
 typedef struct ISABus ISABus;
 typedef struct ISADevice ISADevice;
+typedef struct LoadStateEntry LoadStateEntry;
 typedef struct MACAddr MACAddr;
 typedef struct MachineClass MachineClass;
 typedef struct MachineState MachineState;
diff --git a/migration/migration.c b/migration/migration.c
index 66c0b57b26..b04b4571a8 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -84,12 +84,14 @@ MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
 {
     mis_current = g_malloc0(sizeof(MigrationIncomingState));
     mis_current->file = f;
+    QLIST_INIT(&mis_current->loadvm_handlers);
 
     return mis_current;
 }
 
 void migration_incoming_state_destroy(void)
 {
+    loadvm_free_handlers(mis_current);
     g_free(mis_current);
     mis_current = NULL;
 }
diff --git a/migration/savevm.c b/migration/savevm.c
index d0991e810f..5324c4c7fc 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -936,18 +936,26 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
     return NULL;
 }
 
-typedef struct LoadStateEntry {
+struct LoadStateEntry {
     QLIST_ENTRY(LoadStateEntry) entry;
     SaveStateEntry *se;
     int section_id;
     int version_id;
-} LoadStateEntry;
+};
+
+void loadvm_free_handlers(MigrationIncomingState *mis)
+{
+    LoadStateEntry *le, *new_le;
+
+    QLIST_FOREACH_SAFE(le, &mis->loadvm_handlers, entry, new_le) {
+        QLIST_REMOVE(le, entry);
+        g_free(le);
+    }
+}
 
 int qemu_loadvm_state(QEMUFile *f)
 {
-    QLIST_HEAD(, LoadStateEntry) loadvm_handlers =
-        QLIST_HEAD_INITIALIZER(loadvm_handlers);
-    LoadStateEntry *le, *new_le;
+    MigrationIncomingState *mis = migration_incoming_get_current();
     Error *local_err = NULL;
     uint8_t section_type;
     unsigned int v;
@@ -978,6 +986,7 @@ int qemu_loadvm_state(QEMUFile *f)
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
         uint32_t instance_id, version_id, section_id;
         SaveStateEntry *se;
+        LoadStateEntry *le;
         char idstr[256];
 
         trace_qemu_loadvm_state_section(section_type);
@@ -1019,7 +1028,7 @@ int qemu_loadvm_state(QEMUFile *f)
             le->se = se;
             le->section_id = section_id;
             le->version_id = version_id;
-            QLIST_INSERT_HEAD(&loadvm_handlers, le, entry);
+            QLIST_INSERT_HEAD(&mis->loadvm_handlers, le, entry);
 
             ret = vmstate_load(f, le->se, le->version_id);
             if (ret < 0) {
@@ -1033,7 +1042,7 @@ int qemu_loadvm_state(QEMUFile *f)
             section_id = qemu_get_be32(f);
 
             trace_qemu_loadvm_state_section_partend(section_id);
-            QLIST_FOREACH(le, &loadvm_handlers, entry) {
+            QLIST_FOREACH(le, &mis->loadvm_handlers, entry) {
                 if (le->section_id == section_id) {
                     break;
                 }
@@ -1081,11 +1090,6 @@ int qemu_loadvm_state(QEMUFile *f)
     ret = 0;
 
 out:
-    QLIST_FOREACH_SAFE(le, &loadvm_handlers, entry, new_le) {
-        QLIST_REMOVE(le, entry);
-        g_free(le);
-    }
-
     if (ret == 0) {
         /* We may not have a VMDESC section, so ignore relative errors */
         ret = file_error_after_eof;

From ce39bfc9186005d222a78db4a7fbdc83e2d62481 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Tue, 19 May 2015 12:29:50 +0100
Subject: [PATCH 16/21] Merge section header writing

The header writing for device sections is open coded in
a few places, merge it into one.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/savevm.c | 72 ++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 44 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 5324c4c7fc..2942ed6563 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -611,6 +611,27 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
     vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 }
 
+/*
+ * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
+ */
+static void save_section_header(QEMUFile *f, SaveStateEntry *se,
+                                uint8_t section_type)
+{
+    qemu_put_byte(f, section_type);
+    qemu_put_be32(f, se->section_id);
+
+    if (section_type == QEMU_VM_SECTION_FULL ||
+        section_type == QEMU_VM_SECTION_START) {
+        /* ID string */
+        size_t len = strlen(se->idstr);
+        qemu_put_byte(f, len);
+        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
+
+        qemu_put_be32(f, se->instance_id);
+        qemu_put_be32(f, se->version_id);
+    }
+}
+
 bool qemu_savevm_state_blocked(Error **errp)
 {
     SaveStateEntry *se;
@@ -647,8 +668,6 @@ void qemu_savevm_state_begin(QEMUFile *f,
     }
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-        int len;
-
         if (!se->ops || !se->ops->save_live_setup) {
             continue;
         }
@@ -657,17 +676,7 @@ void qemu_savevm_state_begin(QEMUFile *f,
                 continue;
             }
         }
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_START);
-        qemu_put_be32(f, se->section_id);
-
-        /* ID string */
-        len = strlen(se->idstr);
-        qemu_put_byte(f, len);
-        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
-        qemu_put_be32(f, se->instance_id);
-        qemu_put_be32(f, se->version_id);
+        save_section_header(f, se, QEMU_VM_SECTION_START);
 
         ret = se->ops->save_live_setup(f, se->opaque);
         if (ret < 0) {
@@ -702,9 +711,8 @@ int qemu_savevm_state_iterate(QEMUFile *f)
             return 0;
         }
         trace_savevm_section_start(se->idstr, se->section_id);
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_PART);
-        qemu_put_be32(f, se->section_id);
+
+        save_section_header(f, se, QEMU_VM_SECTION_PART);
 
         ret = se->ops->save_live_iterate(f, se->opaque);
         trace_savevm_section_end(se->idstr, se->section_id, ret);
@@ -750,9 +758,8 @@ void qemu_savevm_state_complete(QEMUFile *f)
             }
         }
         trace_savevm_section_start(se->idstr, se->section_id);
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_END);
-        qemu_put_be32(f, se->section_id);
+
+        save_section_header(f, se, QEMU_VM_SECTION_END);
 
         ret = se->ops->save_live_complete(f, se->opaque);
         trace_savevm_section_end(se->idstr, se->section_id, ret);
@@ -766,7 +773,6 @@ void qemu_savevm_state_complete(QEMUFile *f)
     json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
     json_start_array(vmdesc, "devices");
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-        int len;
 
         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
             continue;
@@ -777,17 +783,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
         json_prop_str(vmdesc, "name", se->idstr);
         json_prop_int(vmdesc, "instance_id", se->instance_id);
 
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_FULL);
-        qemu_put_be32(f, se->section_id);
-
-        /* ID string */
-        len = strlen(se->idstr);
-        qemu_put_byte(f, len);
-        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
-        qemu_put_be32(f, se->instance_id);
-        qemu_put_be32(f, se->version_id);
+        save_section_header(f, se, QEMU_VM_SECTION_FULL);
 
         vmstate_save(f, se, vmdesc);
 
@@ -887,8 +883,6 @@ static int qemu_save_device_state(QEMUFile *f)
     cpu_synchronize_all_states();
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-        int len;
-
         if (se->is_ram) {
             continue;
         }
@@ -896,17 +890,7 @@ static int qemu_save_device_state(QEMUFile *f)
             continue;
         }
 
-        /* Section type */
-        qemu_put_byte(f, QEMU_VM_SECTION_FULL);
-        qemu_put_be32(f, se->section_id);
-
-        /* ID string */
-        len = strlen(se->idstr);
-        qemu_put_byte(f, len);
-        qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
-        qemu_put_be32(f, se->instance_id);
-        qemu_put_be32(f, se->version_id);
+        save_section_header(f, se, QEMU_VM_SECTION_FULL);
 
         vmstate_save(f, se, NULL);
     }

From 37fb569c0198cba58e3e1bdf6b9702c8248b89dd Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Tue, 19 May 2015 12:29:51 +0100
Subject: [PATCH 17/21] Disable section footers on older machine types

The next patch adds section footers; but we don't want to
break migration compatibility so disable them on older
machine types

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 hw/i386/pc_piix.c             | 2 ++
 hw/i386/pc_q35.c              | 2 ++
 include/migration/migration.h | 2 +-
 migration/savevm.c            | 7 +++++++
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 5253e6d4fa..e142f75649 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -52,6 +52,7 @@
 #ifdef CONFIG_XEN
 #  include <xen/hvm/hvm_info_table.h>
 #endif
+#include "migration/migration.h"
 
 #define MAX_IDE_BUS 2
 
@@ -305,6 +306,7 @@ static void pc_init1(MachineState *machine)
 
 static void pc_compat_2_3(MachineState *machine)
 {
+    savevm_skip_section_footers();
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 110dfb78a8..b68263d231 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -45,6 +45,7 @@
 #include "hw/usb.h"
 #include "hw/cpu/icc_bus.h"
 #include "qemu/error-report.h"
+#include "migration/migration.h"
 
 /* ICH9 AHCI has 6 ports */
 #define MAX_SATA_PORTS     6
@@ -289,6 +290,7 @@ static void pc_q35_init(MachineState *machine)
 
 static void pc_compat_2_3(MachineState *machine)
 {
+    savevm_skip_section_footers();
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 720a949f0b..7bdaf5593e 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -194,6 +194,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
                              ram_addr_t offset, size_t size,
                              uint64_t *bytes_sent);
 
-
 void ram_mig_init(void);
+void savevm_skip_section_footers(void);
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index 2942ed6563..80c4389ac4 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -55,6 +55,8 @@
 #define ARP_PTYPE_IP 0x0800
 #define ARP_OP_REQUEST_REV 0x3
 
+static bool skip_section_footers;
+
 static int announce_self_create(uint8_t *buf,
                                 uint8_t *mac_addr)
 {
@@ -611,6 +613,11 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
     vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 }
 
+void savevm_skip_section_footers(void)
+{
+    skip_section_footers = true;
+}
+
 /*
  * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  */

From f68945d42bab700d95b87f62e0898606ce2421ed Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Tue, 19 May 2015 12:29:52 +0100
Subject: [PATCH 18/21] Add a protective section footer

Badly formatted migration streams can go undetected or produce
misleading errors due to a lock of checking at the end of sections.
In particular a section that adds an extra 0x00 at the end
causes what looks like a normal end of stream and thus doesn't produce
any errors, and something that ends in a 0x01..0x04 kind of look
like real section headers and then fail when the section parser tries
to figure out which section they are.  This is made worse by the
choice of 0x00..0x04 being small numbers that are particularly common
in normal section data.

This patch adds a section footer consisting of a marker (0x7e - ~)
followed by the section-id that was also sent in the header.  If
they mismatch then it throws an error explaining which section was
being loaded.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/migration.h |  1 +
 migration/savevm.c            | 61 +++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 7bdaf5593e..9387c8c9d4 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -34,6 +34,7 @@
 #define QEMU_VM_SECTION_FULL         0x04
 #define QEMU_VM_SUBSECTION           0x05
 #define QEMU_VM_VMDESCRIPTION        0x06
+#define QEMU_VM_SECTION_FOOTER       0x7e
 
 struct MigrationParams {
     bool blk;
diff --git a/migration/savevm.c b/migration/savevm.c
index 80c4389ac4..2091882196 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -639,6 +639,53 @@ static void save_section_header(QEMUFile *f, SaveStateEntry *se,
     }
 }
 
+/*
+ * Write a footer onto device sections that catches cases misformatted device
+ * sections.
+ */
+static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
+{
+    if (!skip_section_footers) {
+        qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
+        qemu_put_be32(f, se->section_id);
+    }
+}
+
+/*
+ * Read a footer off the wire and check that it matches the expected section
+ *
+ * Returns: true if the footer was good
+ *          false if there is a problem (and calls error_report to say why)
+ */
+static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
+{
+    uint8_t read_mark;
+    uint32_t read_section_id;
+
+    if (skip_section_footers) {
+        /* No footer to check */
+        return true;
+    }
+
+    read_mark = qemu_get_byte(f);
+
+    if (read_mark != QEMU_VM_SECTION_FOOTER) {
+        error_report("Missing section footer for %s", se->idstr);
+        return false;
+    }
+
+    read_section_id = qemu_get_be32(f);
+    if (read_section_id != se->section_id) {
+        error_report("Mismatched section id in footer for %s -"
+                     " read 0x%x expected 0x%x",
+                     se->idstr, read_section_id, se->section_id);
+        return false;
+    }
+
+    /* All good */
+    return true;
+}
+
 bool qemu_savevm_state_blocked(Error **errp)
 {
     SaveStateEntry *se;
@@ -686,6 +733,7 @@ void qemu_savevm_state_begin(QEMUFile *f,
         save_section_header(f, se, QEMU_VM_SECTION_START);
 
         ret = se->ops->save_live_setup(f, se->opaque);
+        save_section_footer(f, se);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
             break;
@@ -723,6 +771,7 @@ int qemu_savevm_state_iterate(QEMUFile *f)
 
         ret = se->ops->save_live_iterate(f, se->opaque);
         trace_savevm_section_end(se->idstr, se->section_id, ret);
+        save_section_footer(f, se);
 
         if (ret < 0) {
             qemu_file_set_error(f, ret);
@@ -770,6 +819,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
 
         ret = se->ops->save_live_complete(f, se->opaque);
         trace_savevm_section_end(se->idstr, se->section_id, ret);
+        save_section_footer(f, se);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
             return;
@@ -796,6 +846,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
 
         json_end_object(vmdesc);
         trace_savevm_section_end(se->idstr, se->section_id, 0);
+        save_section_footer(f, se);
     }
 
     qemu_put_byte(f, QEMU_VM_EOF);
@@ -900,6 +951,8 @@ static int qemu_save_device_state(QEMUFile *f)
         save_section_header(f, se, QEMU_VM_SECTION_FULL);
 
         vmstate_save(f, se, NULL);
+
+        save_section_footer(f, se);
     }
 
     qemu_put_byte(f, QEMU_VM_EOF);
@@ -1027,6 +1080,10 @@ int qemu_loadvm_state(QEMUFile *f)
                              " device '%s'", instance_id, idstr);
                 goto out;
             }
+            if (!check_section_footer(f, le->se)) {
+                ret = -EINVAL;
+                goto out;
+            }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
@@ -1050,6 +1107,10 @@ int qemu_loadvm_state(QEMUFile *f)
                              section_id, le->se->idstr);
                 goto out;
             }
+            if (!check_section_footer(f, le->se)) {
+                ret = -EINVAL;
+                goto out;
+            }
             break;
         default:
             error_report("Unknown savevm section type %d", section_type);

From 73d9a7961ab1b083fb2095413a3bd091e35f4369 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Tue, 19 May 2015 12:29:53 +0100
Subject: [PATCH 19/21] Teach analyze-migration.py about section footers

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 scripts/analyze-migration.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index 0c8b22f2aa..f6894bece9 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -474,6 +474,7 @@ class MigrationDump(object):
     QEMU_VM_SECTION_FULL  = 0x04
     QEMU_VM_SUBSECTION    = 0x05
     QEMU_VM_VMDESCRIPTION = 0x06
+    QEMU_VM_SECTION_FOOTER= 0x7e
 
     def __init__(self, filename):
         self.section_classes = { ( 'ram', 0 ) : [ RamSection, None ],
@@ -526,6 +527,10 @@ class MigrationDump(object):
             elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END:
                 section_id = file.read32()
                 self.sections[section_id].read()
+            elif section_type == self.QEMU_VM_SECTION_FOOTER:
+                read_section_id = file.read32()
+                if read_section_id != section_id:
+                    raise Exception("Mismatched section footer: %x vs %x" % (read_section_id, section_id))
             else:
                 raise Exception("Unknown section type: %d" % section_type)
         file.close()

From a97270ad5d6dd0382ecb4568674226c8463e59fb Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 20 Apr 2015 16:57:16 +0100
Subject: [PATCH 20/21] Rename RDMA structures to make destination clear

RDMA has two data types that are named confusingly;
   RDMALocalBlock (pointed to indirectly by local_ram_blocks)
   RDMARemoteBlock (pointed to by block in RDMAContext)

RDMALocalBlocks, as the name suggests is a data strucuture that
represents the RDMAable RAM Blocks on the current side of the migration
whichever that is.

RDMARemoteBlocks is always the shape of the RAMBlocks on the
destination, even on the destination.

Rename:
     RDMARemoteBlock -> RDMADestBlock
     context->'block' -> context->dest_blocks

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael R. Hines <mrhines@us.ibm.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/rdma.c | 66 ++++++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 791ef44713..6c1e73fd89 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -236,13 +236,13 @@ typedef struct RDMALocalBlock {
  * corresponding RDMALocalBlock with
  * the information needed to perform the actual RDMA.
  */
-typedef struct QEMU_PACKED RDMARemoteBlock {
+typedef struct QEMU_PACKED RDMADestBlock {
     uint64_t remote_host_addr;
     uint64_t offset;
     uint64_t length;
     uint32_t remote_rkey;
     uint32_t padding;
-} RDMARemoteBlock;
+} RDMADestBlock;
 
 static uint64_t htonll(uint64_t v)
 {
@@ -258,20 +258,20 @@ static uint64_t ntohll(uint64_t v) {
     return ((uint64_t)ntohl(u.lv[0]) << 32) | (uint64_t) ntohl(u.lv[1]);
 }
 
-static void remote_block_to_network(RDMARemoteBlock *rb)
+static void dest_block_to_network(RDMADestBlock *db)
 {
-    rb->remote_host_addr = htonll(rb->remote_host_addr);
-    rb->offset = htonll(rb->offset);
-    rb->length = htonll(rb->length);
-    rb->remote_rkey = htonl(rb->remote_rkey);
+    db->remote_host_addr = htonll(db->remote_host_addr);
+    db->offset = htonll(db->offset);
+    db->length = htonll(db->length);
+    db->remote_rkey = htonl(db->remote_rkey);
 }
 
-static void network_to_remote_block(RDMARemoteBlock *rb)
+static void network_to_dest_block(RDMADestBlock *db)
 {
-    rb->remote_host_addr = ntohll(rb->remote_host_addr);
-    rb->offset = ntohll(rb->offset);
-    rb->length = ntohll(rb->length);
-    rb->remote_rkey = ntohl(rb->remote_rkey);
+    db->remote_host_addr = ntohll(db->remote_host_addr);
+    db->offset = ntohll(db->offset);
+    db->length = ntohll(db->length);
+    db->remote_rkey = ntohl(db->remote_rkey);
 }
 
 /*
@@ -350,7 +350,7 @@ typedef struct RDMAContext {
      * Description of ram blocks used throughout the code.
      */
     RDMALocalBlocks local_ram_blocks;
-    RDMARemoteBlock *block;
+    RDMADestBlock  *dest_blocks;
 
     /*
      * Migration on *destination* started.
@@ -590,7 +590,7 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
     memset(local, 0, sizeof *local);
     qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
     trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
-    rdma->block = (RDMARemoteBlock *) g_malloc0(sizeof(RDMARemoteBlock) *
+    rdma->dest_blocks = (RDMADestBlock *) g_malloc0(sizeof(RDMADestBlock) *
                         rdma->local_ram_blocks.nb_blocks);
     local->init = true;
     return 0;
@@ -2184,8 +2184,8 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
         rdma->connected = false;
     }
 
-    g_free(rdma->block);
-    rdma->block = NULL;
+    g_free(rdma->dest_blocks);
+    rdma->dest_blocks = NULL;
 
     for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
         if (rdma->wr_data[idx].control_mr) {
@@ -2974,25 +2974,25 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
              * their "local" descriptions with what was sent.
              */
             for (i = 0; i < local->nb_blocks; i++) {
-                rdma->block[i].remote_host_addr =
+                rdma->dest_blocks[i].remote_host_addr =
                     (uintptr_t)(local->block[i].local_host_addr);
 
                 if (rdma->pin_all) {
-                    rdma->block[i].remote_rkey = local->block[i].mr->rkey;
+                    rdma->dest_blocks[i].remote_rkey = local->block[i].mr->rkey;
                 }
 
-                rdma->block[i].offset = local->block[i].offset;
-                rdma->block[i].length = local->block[i].length;
+                rdma->dest_blocks[i].offset = local->block[i].offset;
+                rdma->dest_blocks[i].length = local->block[i].length;
 
-                remote_block_to_network(&rdma->block[i]);
+                dest_block_to_network(&rdma->dest_blocks[i]);
             }
 
             blocks.len = rdma->local_ram_blocks.nb_blocks
-                                                * sizeof(RDMARemoteBlock);
+                                                * sizeof(RDMADestBlock);
 
 
             ret = qemu_rdma_post_send_control(rdma,
-                                        (uint8_t *) rdma->block, &blocks);
+                                        (uint8_t *) rdma->dest_blocks, &blocks);
 
             if (ret < 0) {
                 error_report("rdma migration: error sending remote info");
@@ -3148,7 +3148,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
     if (flags == RAM_CONTROL_SETUP) {
         RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
         RDMALocalBlocks *local = &rdma->local_ram_blocks;
-        int reg_result_idx, i, j, nb_remote_blocks;
+        int reg_result_idx, i, j, nb_dest_blocks;
 
         head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
         trace_qemu_rdma_registration_stop_ram();
@@ -3169,7 +3169,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
             return ret;
         }
 
-        nb_remote_blocks = resp.len / sizeof(RDMARemoteBlock);
+        nb_dest_blocks = resp.len / sizeof(RDMADestBlock);
 
         /*
          * The protocol uses two different sets of rkeys (mutually exclusive):
@@ -3183,7 +3183,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
          * and then propagates the remote ram block descriptions to his local copy.
          */
 
-        if (local->nb_blocks != nb_remote_blocks) {
+        if (local->nb_blocks != nb_dest_blocks) {
             ERROR(errp, "ram blocks mismatch #1! "
                         "Your QEMU command line parameters are probably "
                         "not identical on both the source and destination.");
@@ -3191,26 +3191,26 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
         }
 
         qemu_rdma_move_header(rdma, reg_result_idx, &resp);
-        memcpy(rdma->block,
+        memcpy(rdma->dest_blocks,
             rdma->wr_data[reg_result_idx].control_curr, resp.len);
-        for (i = 0; i < nb_remote_blocks; i++) {
-            network_to_remote_block(&rdma->block[i]);
+        for (i = 0; i < nb_dest_blocks; i++) {
+            network_to_dest_block(&rdma->dest_blocks[i]);
 
             /* search local ram blocks */
             for (j = 0; j < local->nb_blocks; j++) {
-                if (rdma->block[i].offset != local->block[j].offset) {
+                if (rdma->dest_blocks[i].offset != local->block[j].offset) {
                     continue;
                 }
 
-                if (rdma->block[i].length != local->block[j].length) {
+                if (rdma->dest_blocks[i].length != local->block[j].length) {
                     ERROR(errp, "ram blocks mismatch #2! "
                         "Your QEMU command line parameters are probably "
                         "not identical on both the source and destination.");
                     return -EINVAL;
                 }
                 local->block[j].remote_host_addr =
-                        rdma->block[i].remote_host_addr;
-                local->block[j].remote_rkey = rdma->block[i].remote_rkey;
+                        rdma->dest_blocks[i].remote_host_addr;
+                local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey;
                 break;
             }
 

From 4fa3dd17dc29c316726f0d4a354a4d895e130c73 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 20 Apr 2015 16:57:21 +0100
Subject: [PATCH 21/21] Remove unneeded memset

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael R. Hines <mrhines@us.ibm.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/rdma.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 6c1e73fd89..48b3e64b34 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2452,7 +2452,6 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
 
     if (host_port) {
         rdma = g_malloc0(sizeof(RDMAContext));
-        memset(rdma, 0, sizeof(RDMAContext));
         rdma->current_index = -1;
         rdma->current_chunk = -1;