From 3d661c8ab18ad2013992c622ab307422ace891a2 Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Mon, 22 Apr 2019 13:34:20 +0300 Subject: [PATCH 01/33] migration: Add error_desc for file channel errors Currently, there is no information about error if outgoing migration was failed because of file channel errors. Example (QMP session): -> { "execute": "migrate", "arguments": { "uri": "exec:head -c 1" }} <- { "return": {} } ... -> { "execute": "query-migrate" } <- { "return": { "status": "failed" }} // There is not error's description And even in the QEMU's output there is nothing. This patch 1) Adds errp for the most of QEMUFileOps 2) Adds qemu_file_get_error_obj/qemu_file_set_error_obj 3) And finally using of qemu_file_get_error_obj in migration.c And now, the status for the mentioned fail will be: -> { "execute": "query-migrate" } <- { "return": { "status": "failed", "error-desc": "Unable to write to command: Broken pipe" }} Signed-off-by: Yury Kotov Message-Id: <20190422103420.15686-1-yury-kotov@yandex-team.ru> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 10 ++++-- migration/qemu-file-channel.c | 30 +++++++++-------- migration/qemu-file.c | 63 ++++++++++++++++++++++++++++------- migration/qemu-file.h | 15 ++++++--- migration/savevm.c | 6 ++-- 5 files changed, 88 insertions(+), 36 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 8a607fe1e2..28342969ea 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2963,6 +2963,7 @@ static MigThrError migration_detect_error(MigrationState *s) { int ret; int state = s->state; + Error *local_error = NULL; if (state == MIGRATION_STATUS_CANCELLING || state == MIGRATION_STATUS_CANCELLED) { @@ -2971,13 +2972,18 @@ static MigThrError migration_detect_error(MigrationState *s) } /* Try to detect any file errors */ - ret = qemu_file_get_error(s->to_dst_file); - + ret = qemu_file_get_error_obj(s->to_dst_file, &local_error); if (!ret) { /* Everything is fine */ + assert(!local_error); return MIG_THR_ERR_NONE; } + if (local_error) { + migrate_set_error(s, local_error); + error_free(local_error); + } + if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { /* * For postcopy, we allow the network to be down for a diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c index 8e639eb496..c382ea2d78 100644 --- a/migration/qemu-file-channel.c +++ b/migration/qemu-file-channel.c @@ -33,7 +33,8 @@ static ssize_t channel_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, - int64_t pos) + int64_t pos, + Error **errp) { QIOChannel *ioc = QIO_CHANNEL(opaque); ssize_t done = 0; @@ -47,7 +48,7 @@ static ssize_t channel_writev_buffer(void *opaque, while (nlocal_iov > 0) { ssize_t len; - len = qio_channel_writev(ioc, local_iov, nlocal_iov, NULL); + len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp); if (len == QIO_CHANNEL_ERR_BLOCK) { if (qemu_in_coroutine()) { qio_channel_yield(ioc, G_IO_OUT); @@ -57,7 +58,6 @@ static ssize_t channel_writev_buffer(void *opaque, continue; } if (len < 0) { - /* XXX handle Error objects */ done = -EIO; goto cleanup; } @@ -75,13 +75,14 @@ static ssize_t channel_writev_buffer(void *opaque, static ssize_t channel_get_buffer(void *opaque, uint8_t *buf, int64_t pos, - size_t size) + size_t size, + Error **errp) { QIOChannel *ioc = QIO_CHANNEL(opaque); ssize_t ret; do { - ret = qio_channel_read(ioc, (char *)buf, size, NULL); + ret = qio_channel_read(ioc, (char *)buf, size, errp); if (ret < 0) { if (ret == QIO_CHANNEL_ERR_BLOCK) { if (qemu_in_coroutine()) { @@ -90,7 +91,6 @@ static ssize_t channel_get_buffer(void *opaque, qio_channel_wait(ioc, G_IO_IN); } } else { - /* XXX handle Error * object */ return -EIO; } } @@ -100,18 +100,20 @@ static ssize_t channel_get_buffer(void *opaque, } -static int channel_close(void *opaque) +static int channel_close(void *opaque, Error **errp) { + int ret; QIOChannel *ioc = QIO_CHANNEL(opaque); - qio_channel_close(ioc, NULL); + ret = qio_channel_close(ioc, errp); object_unref(OBJECT(ioc)); - return 0; + return ret; } static int channel_shutdown(void *opaque, bool rd, - bool wr) + bool wr, + Error **errp) { QIOChannel *ioc = QIO_CHANNEL(opaque); @@ -125,8 +127,7 @@ static int channel_shutdown(void *opaque, } else { mode = QIO_CHANNEL_SHUTDOWN_WRITE; } - if (qio_channel_shutdown(ioc, mode, NULL) < 0) { - /* XXX handler Error * object */ + if (qio_channel_shutdown(ioc, mode, errp) < 0) { return -EIO; } } @@ -135,11 +136,12 @@ static int channel_shutdown(void *opaque, static int channel_set_blocking(void *opaque, - bool enabled) + bool enabled, + Error **errp) { QIOChannel *ioc = QIO_CHANNEL(opaque); - if (qio_channel_set_blocking(ioc, enabled, NULL) < 0) { + if (qio_channel_set_blocking(ioc, enabled, errp) < 0) { return -1; } return 0; diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 0431585502..c04a7a891b 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -28,6 +28,7 @@ #include "migration.h" #include "qemu-file.h" #include "trace.h" +#include "qapi/error.h" #define IO_BUF_SIZE 32768 #define MAX_IOV_SIZE MIN(IOV_MAX, 64) @@ -51,6 +52,7 @@ struct QEMUFile { unsigned int iovcnt; int last_error; + Error *last_error_obj; }; /* @@ -62,7 +64,7 @@ int qemu_file_shutdown(QEMUFile *f) if (!f->ops->shut_down) { return -ENOSYS; } - return f->ops->shut_down(f->opaque, true, true); + return f->ops->shut_down(f->opaque, true, true, NULL); } /* @@ -106,6 +108,36 @@ void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks) f->hooks = hooks; } +/* + * Get last error for stream f with optional Error* + * + * Return negative error value if there has been an error on previous + * operations, return 0 if no error happened. + * Optional, it returns Error* in errp, but it may be NULL even if return value + * is not 0. + * + */ +int qemu_file_get_error_obj(QEMUFile *f, Error **errp) +{ + if (errp) { + *errp = f->last_error_obj ? error_copy(f->last_error_obj) : NULL; + } + return f->last_error; +} + +/* + * Set the last error for stream f with optional Error* + */ +void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err) +{ + if (f->last_error == 0 && ret) { + f->last_error = ret; + error_propagate(&f->last_error_obj, err); + } else if (err) { + error_report_err(err); + } +} + /* * Get last error for stream f * @@ -115,14 +147,15 @@ void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks) */ int qemu_file_get_error(QEMUFile *f) { - return f->last_error; + return qemu_file_get_error_obj(f, NULL); } +/* + * Set the last error for stream f + */ void qemu_file_set_error(QEMUFile *f, int ret) { - if (f->last_error == 0) { - f->last_error = ret; - } + qemu_file_set_error_obj(f, ret, NULL); } bool qemu_file_is_writable(QEMUFile *f) @@ -176,6 +209,7 @@ void qemu_fflush(QEMUFile *f) { ssize_t ret = 0; ssize_t expect = 0; + Error *local_error = NULL; if (!qemu_file_is_writable(f)) { return; @@ -183,7 +217,8 @@ void qemu_fflush(QEMUFile *f) if (f->iovcnt > 0) { expect = iov_size(f->iov, f->iovcnt); - ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); + ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, + &local_error); qemu_iovec_release_ram(f); } @@ -195,7 +230,7 @@ void qemu_fflush(QEMUFile *f) * data set we requested, so sanity check that. */ if (ret != expect) { - qemu_file_set_error(f, ret < 0 ? ret : -EIO); + qemu_file_set_error_obj(f, ret < 0 ? ret : -EIO, local_error); } f->buf_index = 0; f->iovcnt = 0; @@ -283,6 +318,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) { int len; int pending; + Error *local_error = NULL; assert(!qemu_file_is_writable(f)); @@ -294,14 +330,16 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) f->buf_size = pending; len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, - IO_BUF_SIZE - pending); + IO_BUF_SIZE - pending, &local_error); if (len > 0) { f->buf_size += len; f->pos += len; } else if (len == 0) { - qemu_file_set_error(f, -EIO); + qemu_file_set_error_obj(f, -EIO, local_error); } else if (len != -EAGAIN) { - qemu_file_set_error(f, len); + qemu_file_set_error_obj(f, len, local_error); + } else { + error_free(local_error); } return len; @@ -327,7 +365,7 @@ int qemu_fclose(QEMUFile *f) ret = qemu_file_get_error(f); if (f->ops->close) { - int ret2 = f->ops->close(f->opaque); + int ret2 = f->ops->close(f->opaque, NULL); if (ret >= 0) { ret = ret2; } @@ -338,6 +376,7 @@ int qemu_fclose(QEMUFile *f) if (f->last_error) { ret = f->last_error; } + error_free(f->last_error_obj); g_free(f); trace_qemu_file_fclose(); return ret; @@ -783,6 +822,6 @@ void qemu_put_counted_string(QEMUFile *f, const char *str) void qemu_file_set_blocking(QEMUFile *f, bool block) { if (f->ops->set_blocking) { - f->ops->set_blocking(f->opaque, block); + f->ops->set_blocking(f->opaque, block, NULL); } } diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 13baf896bd..eb886db65f 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -32,7 +32,8 @@ * bytes actually read should be returned. */ typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf, - int64_t pos, size_t size); + int64_t pos, size_t size, + Error **errp); /* Close a file * @@ -41,7 +42,7 @@ typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf, * The meaning of return value on success depends on the specific back-end being * used. */ -typedef int (QEMUFileCloseFunc)(void *opaque); +typedef int (QEMUFileCloseFunc)(void *opaque, Error **errp); /* Called to return the OS file descriptor associated to the QEMUFile. */ @@ -49,14 +50,15 @@ typedef int (QEMUFileGetFD)(void *opaque); /* Called to change the blocking mode of the file */ -typedef int (QEMUFileSetBlocking)(void *opaque, bool enabled); +typedef int (QEMUFileSetBlocking)(void *opaque, bool enabled, Error **errp); /* * This function writes an iovec to file. The handler must write all * of the data or return a negative errno value. */ typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov, - int iovcnt, int64_t pos); + int iovcnt, int64_t pos, + Error **errp); /* * This function provides hooks around different @@ -97,7 +99,8 @@ typedef QEMUFile *(QEMURetPathFunc)(void *opaque); * Existing blocking reads/writes must be woken * Returns 0 on success, -err on error */ -typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr); +typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr, + Error **errp); typedef struct QEMUFileOps { QEMUFileGetBufferFunc *get_buffer; @@ -149,6 +152,8 @@ void qemu_update_position(QEMUFile *f, size_t size); void qemu_file_reset_rate_limit(QEMUFile *f); void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); +int qemu_file_get_error_obj(QEMUFile *f, Error **errp); +void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err); void qemu_file_set_error(QEMUFile *f, int ret); int qemu_file_shutdown(QEMUFile *f); QEMUFile *qemu_file_get_return_path(QEMUFile *f); diff --git a/migration/savevm.c b/migration/savevm.c index 79ed44d475..a2a5f89b75 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -124,7 +124,7 @@ static struct mig_cmd_args { /* savevm/loadvm support */ static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, - int64_t pos) + int64_t pos, Error **errp) { int ret; QEMUIOVector qiov; @@ -139,12 +139,12 @@ static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, } static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos, - size_t size) + size_t size, Error **errp) { return bdrv_load_vmstate(opaque, buf, pos, size); } -static int bdrv_fclose(void *opaque) +static int bdrv_fclose(void *opaque, Error **errp) { return bdrv_flush(opaque); } From 78dd48df397ad9d2b874f7aa28552b4f27f58a8d Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Fri, 5 Jul 2019 04:07:11 +0300 Subject: [PATCH 02/33] hw/net: fix vmxnet3 live migration At some point vmxnet3 live migration stopped working and git-bisect didn't help finding a working version. The issue is the PCI configuration space is not being migrated successfully and MSIX remains masked at destination. Remove the migration differentiation between PCI and PCIe since the logic resides now inside VMSTATE_PCI_DEVICE. Remove also the VMXNET3_COMPAT_FLAG_DISABLE_PCIE based differentiation since at 'realize' time is decided if the device is PCI or PCIe, then the above macro is enough. Use the opportunity to move to the standard VMSTATE_MSIX instead of the deprecated SaveVMHandlers. Signed-off-by: Marcel Apfelbaum Message-Id: <20190705010711.23277-1-marcel.apfelbaum@gmail.com> Tested-by: Sukrit Bhatnagar Reviewed-by: Dmitry Fleytman Signed-off-by: Dr. David Alan Gilbert --- hw/net/vmxnet3.c | 52 ++---------------------------------------------- 1 file changed, 2 insertions(+), 50 deletions(-) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 10d01d0058..8b17548b02 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -2141,21 +2141,6 @@ vmxnet3_cleanup_msi(VMXNET3State *s) msi_uninit(d); } -static void -vmxnet3_msix_save(QEMUFile *f, void *opaque) -{ - PCIDevice *d = PCI_DEVICE(opaque); - msix_save(d, f); -} - -static int -vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id) -{ - PCIDevice *d = PCI_DEVICE(opaque); - msix_load(d, f); - return 0; -} - static const MemoryRegionOps b0_ops = { .read = vmxnet3_io_bar0_read, .write = vmxnet3_io_bar0_write, @@ -2176,11 +2161,6 @@ static const MemoryRegionOps b1_ops = { }, }; -static SaveVMHandlers savevm_vmxnet3_msix = { - .save_state = vmxnet3_msix_save, - .load_state = vmxnet3_msix_load, -}; - static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) { uint64_t dsn_payload; @@ -2203,7 +2183,6 @@ static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) { - DeviceState *dev = DEVICE(pci_dev); VMXNET3State *s = VMXNET3(pci_dev); int ret; @@ -2249,8 +2228,6 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET, vmxnet3_device_serial_num(s)); } - - register_savevm_live(dev, "vmxnet3-msix", -1, 1, &savevm_vmxnet3_msix, s); } static void vmxnet3_instance_init(Object *obj) @@ -2440,29 +2417,6 @@ static const VMStateDescription vmstate_vmxnet3_int_state = { } }; -static bool vmxnet3_vmstate_need_pcie_device(void *opaque) -{ - VMXNET3State *s = VMXNET3(opaque); - - return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE); -} - -static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id) -{ - return !vmxnet3_vmstate_need_pcie_device(opaque); -} - -static const VMStateDescription vmstate_vmxnet3_pcie_device = { - .name = "vmxnet3/pcie", - .version_id = 1, - .minimum_version_id = 1, - .needed = vmxnet3_vmstate_need_pcie_device, - .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), - VMSTATE_END_OF_LIST() - } -}; - static const VMStateDescription vmstate_vmxnet3 = { .name = "vmxnet3", .version_id = 1, @@ -2470,9 +2424,8 @@ static const VMStateDescription vmstate_vmxnet3 = { .pre_save = vmxnet3_pre_save, .post_load = vmxnet3_post_load, .fields = (VMStateField[]) { - VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State, - vmxnet3_vmstate_test_pci_device, 0, - vmstate_pci_device, PCIDevice), + VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), + VMSTATE_MSIX(parent_obj, VMXNET3State), VMSTATE_BOOL(rx_packets_compound, VMXNET3State), VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State), VMSTATE_BOOL(lro_supported, VMXNET3State), @@ -2508,7 +2461,6 @@ static const VMStateDescription vmstate_vmxnet3 = { }, .subsections = (const VMStateDescription*[]) { &vmxstate_vmxnet3_mcast_list, - &vmstate_vmxnet3_pcie_device, NULL } }; From 640dfb14db919462dedc1b4fa0feaaf99a0bff42 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 16 Jul 2019 08:54:11 +0800 Subject: [PATCH 03/33] migration: consolidate time info into populate_time_info Consolidate time information fill up into its function for better readability. Signed-off-by: Wei Yang Message-Id: <20190716005411.4156-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 28342969ea..7c66da3a83 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -823,6 +823,25 @@ bool migration_is_setup_or_active(int state) } } +static void populate_time_info(MigrationInfo *info, MigrationState *s) +{ + info->has_status = true; + info->has_setup_time = true; + info->setup_time = s->setup_time; + if (s->state == MIGRATION_STATUS_COMPLETED) { + info->has_total_time = true; + info->total_time = s->total_time; + info->has_downtime = true; + info->downtime = s->downtime; + } else { + info->has_total_time = true; + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - + s->start_time; + info->has_expected_downtime = true; + info->expected_downtime = s->expected_downtime; + } +} + static void populate_ram_info(MigrationInfo *info, MigrationState *s) { info->has_ram = true; @@ -908,16 +927,8 @@ static void fill_source_migration_info(MigrationInfo *info) case MIGRATION_STATUS_DEVICE: case MIGRATION_STATUS_POSTCOPY_PAUSED: case MIGRATION_STATUS_POSTCOPY_RECOVER: - /* TODO add some postcopy stats */ - info->has_status = true; - info->has_total_time = true; - info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - - s->start_time; - info->has_expected_downtime = true; - info->expected_downtime = s->expected_downtime; - info->has_setup_time = true; - info->setup_time = s->setup_time; - + /* TODO add some postcopy stats */ + populate_time_info(info, s); populate_ram_info(info, s); populate_disk_info(info); break; @@ -926,14 +937,7 @@ static void fill_source_migration_info(MigrationInfo *info) /* TODO: display COLO specific information (checkpoint info etc.) */ break; case MIGRATION_STATUS_COMPLETED: - info->has_status = true; - info->has_total_time = true; - info->total_time = s->total_time; - info->has_downtime = true; - info->downtime = s->downtime; - info->has_setup_time = true; - info->setup_time = s->setup_time; - + populate_time_info(info, s); populate_ram_info(info, s); break; case MIGRATION_STATUS_FAILED: From 0abfff9ea7c56c2f6ad3cee10120915ec723cb32 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 27 Jun 2019 10:08:20 +0800 Subject: [PATCH 04/33] migration/postcopy: the valid condition is one less then end If one equals end, it means we have gone through the whole bitmap. Use a more restrict check to skip a unnecessary condition. Signed-off-by: Wei Yang Message-Id: <20190627020822.15485-2-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 889148dd84..68bc11c9e7 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2865,7 +2865,7 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, for (current = 0; current < end; ) { unsigned long one = find_next_bit(unsentmap, end, current); - if (one <= end) { + if (one < end) { unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); unsigned long discard_length; From 33a5cb6202f05bf32a870a17c2d9b6d6be6e52f0 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 27 Jun 2019 10:08:21 +0800 Subject: [PATCH 05/33] migration/postcopy: break the loop when there is no more page to discard When one is equal or bigger then end, it means there is no page to discard. Just break the loop in this case instead of processing it. No functional change, just refactor it a little. Signed-off-by: Wei Yang Message-Id: <20190627020822.15485-3-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 68bc11c9e7..8a97dadec4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2864,23 +2864,23 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, for (current = 0; current < end; ) { unsigned long one = find_next_bit(unsentmap, end, current); + unsigned long zero, discard_length; - if (one < end) { - unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); - unsigned long discard_length; - - if (zero >= end) { - discard_length = end - one; - } else { - discard_length = zero - one; - } - if (discard_length) { - postcopy_discard_send_range(ms, pds, one, discard_length); - } - current = one + discard_length; - } else { - current = one; + if (one >= end) { + break; } + + zero = find_next_zero_bit(unsentmap, end, one + 1); + + if (zero >= end) { + discard_length = end - one; + } else { + discard_length = zero - one; + } + if (discard_length) { + postcopy_discard_send_range(ms, pds, one, discard_length); + } + current = one + discard_length; } return 0; From a162b572e9e5a85c440656fe97d7a8b133257379 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 27 Jun 2019 10:08:22 +0800 Subject: [PATCH 06/33] migration/postcopy: discard_length must not be 0 Since we break the loop when there is no more page to discard, we are sure the following process would find some page to discard. It is not necessary to check it again. Signed-off-by: Wei Yang Message-Id: <20190627020822.15485-4-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 8a97dadec4..4bb5e24459 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2877,9 +2877,7 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, } else { discard_length = zero - one; } - if (discard_length) { - postcopy_discard_send_range(ms, pds, one, discard_length); - } + postcopy_discard_send_range(ms, pds, one, discard_length); current = one + discard_length; } From e927a0331757c358a1010c50628613d4da83038e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 10 Jul 2019 13:08:13 +0800 Subject: [PATCH 07/33] migration/postcopy: reduce one operation to calculate fixup_start_addr Use the same way for run_end to calculate run_start, which saves one operation. Signed-off-by: Wei Yang Message-Id: <20190710050814.31344-2-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 4bb5e24459..da399f2c8a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2973,10 +2973,12 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, host_offset = run_start % host_ratio; if (host_offset) { do_fixup = true; - run_start -= host_offset; - fixup_start_addr = run_start; - /* For the next pass */ - run_start = run_start + host_ratio; + fixup_start_addr = run_start - host_offset; + /* + * This host page has gone, the next loop iteration starts + * from after the fixup + */ + run_start = fixup_start_addr + host_ratio; } else { /* Find the end of this run */ unsigned long run_end; From 8996604fe6b9e6646883f70e737b9440e7563c87 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 10 Jul 2019 13:08:14 +0800 Subject: [PATCH 08/33] migration/postcopy: do_fixup is true when host_offset is non-zero This means it is not necessary to spare an extra variable to hold this condition. Use host_offset directly is fine. Signed-off-by: Wei Yang Message-Id: <20190710050814.31344-3-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index da399f2c8a..255f289bbb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2962,7 +2962,6 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, } while (run_start < pages) { - bool do_fixup = false; unsigned long fixup_start_addr; unsigned long host_offset; @@ -2972,7 +2971,6 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, */ host_offset = run_start % host_ratio; if (host_offset) { - do_fixup = true; fixup_start_addr = run_start - host_offset; /* * This host page has gone, the next loop iteration starts @@ -2994,7 +2992,6 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, */ host_offset = run_end % host_ratio; if (host_offset) { - do_fixup = true; fixup_start_addr = run_end - host_offset; /* * This host page has gone, the next loop iteration starts @@ -3010,7 +3007,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, } } - if (do_fixup) { + if (host_offset) { unsigned long page; /* Tell the destination to discard this page */ From 4e455d51efcccc058a359ab77fa7ef5e21a4957f Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 9 Jul 2019 22:09:22 +0800 Subject: [PATCH 09/33] migration/savevm: flush file for iterable_only case It would be proper to flush file even for iterable_only case. Signed-off-by: Wei Yang Message-Id: <20190709140924.13291-2-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/migration/savevm.c b/migration/savevm.c index a2a5f89b75..0bfdceefcc 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1292,7 +1292,7 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, } if (iterable_only) { - return 0; + goto flush; } vmdesc = qjson_new(); @@ -1353,6 +1353,7 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, } qjson_destroy(vmdesc); +flush: qemu_fflush(f); return 0; } From 622a80c9552cc2e4866529ce41088d2f90e25454 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 9 Jul 2019 22:09:23 +0800 Subject: [PATCH 10/33] migration/savevm: split qemu_savevm_state_complete_precopy() into two parts This is a preparation patch for further cleanup. No functional change, just wrap two major part of qemu_savevm_state_complete_precopy() into function. Signed-off-by: Wei Yang Message-Id: <20190709140924.13291-3-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 66 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 0bfdceefcc..63545a3026 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1246,23 +1246,12 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f) qemu_fflush(f); } -int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, - bool inactivate_disks) +static +int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy, + bool iterable_only) { - QJSON *vmdesc; - int vmdesc_len; SaveStateEntry *se; int ret; - bool in_postcopy = migration_in_postcopy(); - Error *local_err = NULL; - - if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) { - error_report_err(local_err); - } - - trace_savevm_state_complete_precopy(); - - cpu_synchronize_all_states(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || @@ -1291,9 +1280,18 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, } } - if (iterable_only) { - goto flush; - } + return 0; +} + +static +int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + bool in_postcopy, + bool inactivate_disks) +{ + QJSON *vmdesc; + int vmdesc_len; + SaveStateEntry *se; + int ret; vmdesc = qjson_new(); json_prop_int(vmdesc, "page_size", qemu_target_page_size()); @@ -1353,6 +1351,40 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, } qjson_destroy(vmdesc); + return 0; +} + +int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, + bool inactivate_disks) +{ + int ret; + Error *local_err = NULL; + bool in_postcopy = migration_in_postcopy(); + + if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) { + error_report_err(local_err); + } + + trace_savevm_state_complete_precopy(); + + cpu_synchronize_all_states(); + + ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy, + iterable_only); + if (ret) { + return ret; + } + + if (iterable_only) { + goto flush; + } + + ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy, + inactivate_disks); + if (ret) { + return ret; + } + flush: qemu_fflush(f); return 0; From e326767b455e031a9bd187adfcbaca4362cb9f40 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 9 Jul 2019 22:09:24 +0800 Subject: [PATCH 11/33] migration/savevm: move non SaveStateEntry condition check out of iteration in_postcopy and iterable_only are not SaveStateEntry specific, it would be more proper to check them out of iteration. Signed-off-by: Wei Yang Message-Id: <20190709140924.13291-4-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 63545a3026..69a827a92f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1247,8 +1247,7 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f) } static -int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy, - bool iterable_only) +int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) { SaveStateEntry *se; int ret; @@ -1257,7 +1256,6 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy, if (!se->ops || (in_postcopy && se->ops->has_postcopy && se->ops->has_postcopy(se->opaque)) || - (in_postcopy && !iterable_only) || !se->ops->save_live_complete_precopy) { continue; } @@ -1369,10 +1367,11 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, cpu_synchronize_all_states(); - ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy, - iterable_only); - if (ret) { - return ret; + if (!in_postcopy || iterable_only) { + ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy); + if (ret) { + return ret; + } } if (iterable_only) { From 305b6f8431f0b612649bffe1fbfde760a5cd9300 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 11 Jul 2019 16:08:16 +0800 Subject: [PATCH 12/33] migration/postcopy: PostcopyState is already set in loadvm_postcopy_handle_advise() PostcopyState is already set to ADVISE at the beginning of loadvm_postcopy_handle_advise(). Remove the redundant set. Signed-off-by: Wei Yang Message-Id: <20190711080816.6405-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 69a827a92f..eed5e551da 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1648,8 +1648,6 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, return -1; } - postcopy_state_set(POSTCOPY_INCOMING_ADVISE); - return 0; } From 52aec70923a19e73f7ecf5da484a9fd1b07634f5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 18 Jul 2019 16:37:47 +0800 Subject: [PATCH 13/33] migration/postcopy: start_postcopy could be true only when migrate_postcopy() return true There is only one place to set start_postcopy to true, qmp_migrate_start_postcopy(), which make sure start_postcopy could be set to true when migrate_postcopy() return true. So start_postcopy is true implies the other one. Signed-off-by: Wei Yang Message-Id: <20190718083747.5859-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 7c66da3a83..8331e62831 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3103,8 +3103,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) if (pending_size && pending_size >= s->threshold_size) { /* Still a significant amount to transfer */ - if (migrate_postcopy() && !in_postcopy && - pend_pre <= s->threshold_size && + if (!in_postcopy && pend_pre <= s->threshold_size && atomic_read(&s->start_postcopy)) { if (postcopy_start(s)) { error_report("%s: postcopy failed to start", __func__); From 6a88eb2b0881c1137b1647147a37de9e818142d8 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 19 Jul 2019 15:11:29 +0800 Subject: [PATCH 14/33] migration: use migration_in_postcopy() to check POSTCOPY_ACTIVE Use common helper function to check the state. Signed-off-by: Wei Yang Message-Id: <20190719071129.11880-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/rdma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 3036221ee8..0e73e759ca 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3140,7 +3140,7 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, CHECK_ERROR_STATE(); - if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + if (migration_in_postcopy()) { rcu_read_unlock(); return RAM_SAVE_CONTROL_NOT_SUPP; } @@ -3775,7 +3775,7 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, CHECK_ERROR_STATE(); - if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + if (migration_in_postcopy()) { rcu_read_unlock(); return 0; } @@ -3810,7 +3810,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, CHECK_ERROR_STATE(); - if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + if (migration_in_postcopy()) { rcu_read_unlock(); return 0; } From 5d0980a459774a52e3f3729cee0a2562d11026d2 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 18 Jul 2019 09:25:47 +0800 Subject: [PATCH 15/33] migration: just pass RAMBlock is enough RAMBlock->used_length is always passed to migration_bitmap_sync_range(), which could be retrieved from RAMBlock. Suggested-by: Paolo Bonzini Signed-off-by: Wei Yang Message-Id: <20190718012547.16373-1-richardw.yang@linux.intel.com> Reviewed-by: Peter Xu Reviewed-by: Paolo Bonzini Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 255f289bbb..97f241d6d9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1748,11 +1748,10 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, } /* Called with RCU critical section */ -static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, - ram_addr_t length) +static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb) { rs->migration_dirty_pages += - cpu_physical_memory_sync_dirty_bitmap(rb, 0, length, + cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length, &rs->num_dirty_pages_period); } @@ -1841,7 +1840,7 @@ static void migration_bitmap_sync(RAMState *rs) qemu_mutex_lock(&rs->bitmap_mutex); rcu_read_lock(); RAMBLOCK_FOREACH_NOT_IGNORED(block) { - migration_bitmap_sync_range(rs, block, block->used_length); + migration_bitmap_sync_range(rs, block); } ram_counters.remaining = ram_bytes_remaining(); rcu_read_unlock(); @@ -4293,7 +4292,7 @@ static void colo_flush_ram_cache(void) memory_global_dirty_log_sync(); rcu_read_lock(); RAMBLOCK_FOREACH_NOT_IGNORED(block) { - migration_bitmap_sync_range(ram_state, block, block->used_length); + migration_bitmap_sync_range(ram_state, block); } rcu_read_unlock(); From 4695ce3fdcb636384eac743cee9a1b6375ad0ccb Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 18 Jul 2019 14:42:57 +0800 Subject: [PATCH 16/33] migration: equation is more proper than and to check LOADVM_QUIT LOADVM_QUIT allows a command to quit all layers of nested loadvm loops, while current return value check is not that proper even it works now. Current return value check "ret & LOADVM_QUIT" would return true if bit[0] is 1. This would be true when ret is -1 which is used to indicate an error of handling a command. Since there is only one place return LOADVM_QUIT and no other combination of return value, use "ret == LOADVM_QUIT" would be more proper. Signed-off-by: Wei Yang Message-Id: <20190718064257.29218-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/savevm.c b/migration/savevm.c index eed5e551da..412768216c 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2437,7 +2437,7 @@ retry: case QEMU_VM_COMMAND: ret = loadvm_process_command(f); trace_qemu_loadvm_state_section_command(ret); - if ((ret < 0) || (ret & LOADVM_QUIT)) { + if ((ret < 0) || (ret == LOADVM_QUIT)) { goto out; } break; From be4a1a1b6f4559c9b1f97aac31e0fb01a1fbe48e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 22 Jul 2019 15:53:38 +0800 Subject: [PATCH 17/33] migration: return -EINVAL directly when version_id mismatch It is not reasonable to continue when version_id mismatch. Signed-off-by: Wei Yang Message-Id: <20190722075339.25121-2-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 97f241d6d9..6a75aedc91 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4334,7 +4334,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) seq_iter++; if (version_id != 4) { - ret = -EINVAL; + return -EINVAL; } if (!migrate_use_compression()) { From 10da4a368992a5950abd07673ba8311fbbe667f5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 25 Jul 2019 08:20:23 +0800 Subject: [PATCH 18/33] migration: extract ram_load_precopy After cleanup, it would be clear to audience there are two cases ram_load: * precopy * postcopy And it is not necessary to check postcopy_running on each iteration for precopy. Signed-off-by: Wei Yang Reviewed-by: Dr. David Alan Gilbert Message-Id: <20190725002023.2335-3-richardw.yang@linux.intel.com> Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 73 +++++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 6a75aedc91..a44e9c0abc 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4318,40 +4318,26 @@ static void colo_flush_ram_cache(void) trace_colo_flush_ram_cache_end(); } -static int ram_load(QEMUFile *f, void *opaque, int version_id) +/** + * ram_load_precopy: load pages in precopy case + * + * Returns 0 for success or -errno in case of error + * + * Called in precopy mode by ram_load(). + * rcu_read_lock is taken prior to this being called. + * + * @f: QEMUFile where to send the data + */ +static int ram_load_precopy(QEMUFile *f) { - int flags = 0, ret = 0, invalid_flags = 0; - static uint64_t seq_iter; - int len = 0; - /* - * If system is running in postcopy mode, page inserts to host memory must - * be atomic - */ - bool postcopy_running = postcopy_is_running(); + int flags = 0, ret = 0, invalid_flags = 0, len = 0; /* ADVISE is earlier, it shows the source has the postcopy capability on */ bool postcopy_advised = postcopy_is_advised(); - - seq_iter++; - - if (version_id != 4) { - return -EINVAL; - } - if (!migrate_use_compression()) { invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; } - /* This RCU critical section can be very long running. - * When RCU reclaims in the code start to become numerous, - * it will be necessary to reduce the granularity of this - * critical section. - */ - rcu_read_lock(); - if (postcopy_running) { - ret = ram_load_postcopy(f); - } - - while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) { + while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr, total_ram_bytes; void *host = NULL; uint8_t ch; @@ -4502,6 +4488,39 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } } + return ret; +} + +static int ram_load(QEMUFile *f, void *opaque, int version_id) +{ + int ret = 0; + static uint64_t seq_iter; + /* + * If system is running in postcopy mode, page inserts to host memory must + * be atomic + */ + bool postcopy_running = postcopy_is_running(); + + seq_iter++; + + if (version_id != 4) { + return -EINVAL; + } + + /* + * This RCU critical section can be very long running. + * When RCU reclaims in the code start to become numerous, + * it will be necessary to reduce the granularity of this + * critical section. + */ + rcu_read_lock(); + + if (postcopy_running) { + ret = ram_load_postcopy(f); + } else { + ret = ram_load_precopy(f); + } + ret |= wait_for_decompress_done(); rcu_read_unlock(); trace_ram_load_complete(ret, seq_iter); From 810cf2bbd4c5c1417bda8bec49caf0ababc22860 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 24 Jul 2019 09:07:21 +0800 Subject: [PATCH 19/33] migration/postcopy: make PostcopyDiscardState a static variable In postcopy-ram.c, we provide three functions to discard certain RAMBlock range: * postcopy_discard_send_init() * postcopy_discard_send_range() * postcopy_discard_send_finish() Currently, we allocate/deallocate PostcopyDiscardState for each RAMBlock on sending discard information to destination. This is not necessary and the same data area could be reused for each RAMBlock. This patch defines PostcopyDiscardState a static variable. By doing so: 1) avoid memory allocation and deallocation to the system 2) avoid potential failure of memory allocation 3) hide some details for their users Signed-off-by: Wei Yang Message-Id: <20190724010721.2146-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/postcopy-ram.c | 70 +++++++++++++++++----------------------- migration/postcopy-ram.h | 13 +++----- migration/ram.c | 30 +++++++---------- 3 files changed, 46 insertions(+), 67 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 9faacacc9e..2cb1a69752 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -1377,22 +1377,16 @@ void postcopy_fault_thread_notify(MigrationIncomingState *mis) * asking to discard individual ranges. * * @ms: The current migration state. - * @offset: the bitmap offset of the named RAMBlock in the migration - * bitmap. + * @offset: the bitmap offset of the named RAMBlock in the migration bitmap. * @name: RAMBlock that discards will operate on. - * - * returns: a new PDS. */ -PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, - const char *name) +static PostcopyDiscardState pds = {0}; +void postcopy_discard_send_init(MigrationState *ms, const char *name) { - PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState)); - - if (res) { - res->ramblock_name = name; - } - - return res; + pds.ramblock_name = name; + pds.cur_entry = 0; + pds.nsentwords = 0; + pds.nsentcmds = 0; } /** @@ -1401,30 +1395,29 @@ PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, * be sent later. * * @ms: Current migration state. - * @pds: Structure initialised by postcopy_discard_send_init(). * @start,@length: a range of pages in the migration bitmap in the * RAM block passed to postcopy_discard_send_init() (length=1 is one page) */ -void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, - unsigned long start, unsigned long length) +void postcopy_discard_send_range(MigrationState *ms, unsigned long start, + unsigned long length) { size_t tp_size = qemu_target_page_size(); /* Convert to byte offsets within the RAM block */ - pds->start_list[pds->cur_entry] = start * tp_size; - pds->length_list[pds->cur_entry] = length * tp_size; - trace_postcopy_discard_send_range(pds->ramblock_name, start, length); - pds->cur_entry++; - pds->nsentwords++; + pds.start_list[pds.cur_entry] = start * tp_size; + pds.length_list[pds.cur_entry] = length * tp_size; + trace_postcopy_discard_send_range(pds.ramblock_name, start, length); + pds.cur_entry++; + pds.nsentwords++; - if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) { + if (pds.cur_entry == MAX_DISCARDS_PER_COMMAND) { /* Full set, ship it! */ qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file, - pds->ramblock_name, - pds->cur_entry, - pds->start_list, - pds->length_list); - pds->nsentcmds++; - pds->cur_entry = 0; + pds.ramblock_name, + pds.cur_entry, + pds.start_list, + pds.length_list); + pds.nsentcmds++; + pds.cur_entry = 0; } } @@ -1433,24 +1426,21 @@ void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, * bitmap code. Sends any outstanding discard messages, frees the PDS * * @ms: Current migration state. - * @pds: Structure initialised by postcopy_discard_send_init(). */ -void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds) +void postcopy_discard_send_finish(MigrationState *ms) { /* Anything unsent? */ - if (pds->cur_entry) { + if (pds.cur_entry) { qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file, - pds->ramblock_name, - pds->cur_entry, - pds->start_list, - pds->length_list); - pds->nsentcmds++; + pds.ramblock_name, + pds.cur_entry, + pds.start_list, + pds.length_list); + pds.nsentcmds++; } - trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords, - pds->nsentcmds); - - g_free(pds); + trace_postcopy_discard_send_finish(pds.ramblock_name, pds.nsentwords, + pds.nsentcmds); } /* diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 9d55536fd1..9c8bd2bae0 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -43,10 +43,8 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis); /* * Called at the start of each RAMBlock by the bitmap code. - * Returns a new PDS */ -PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, - const char *name); +void postcopy_discard_send_init(MigrationState *ms, const char *name); /* * Called by the bitmap code for each chunk to discard. @@ -55,15 +53,14 @@ PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, * @start,@length: a range of pages in the migration bitmap in the * RAM block passed to postcopy_discard_send_init() (length=1 is one page) */ -void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, - unsigned long start, unsigned long length); +void postcopy_discard_send_range(MigrationState *ms, unsigned long start, + unsigned long length); /* * Called at the end of each RAMBlock by the bitmap code. - * Sends any outstanding discard messages, frees the PDS. + * Sends any outstanding discard messages. */ -void postcopy_discard_send_finish(MigrationState *ms, - PostcopyDiscardState *pds); +void postcopy_discard_send_finish(MigrationState *ms); /* * Place a page (from) at (host) efficiently diff --git a/migration/ram.c b/migration/ram.c index a44e9c0abc..f428639af5 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2850,12 +2850,9 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) * with the dirtymap; so a '1' means it's either dirty or unsent. * * @ms: current migration state - * @pds: state for postcopy * @block: RAMBlock to discard */ -static int postcopy_send_discard_bm_ram(MigrationState *ms, - PostcopyDiscardState *pds, - RAMBlock *block) +static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block) { unsigned long end = block->used_length >> TARGET_PAGE_BITS; unsigned long current; @@ -2876,7 +2873,7 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, } else { discard_length = zero - one; } - postcopy_discard_send_range(ms, pds, one, discard_length); + postcopy_discard_send_range(ms, one, discard_length); current = one + discard_length; } @@ -2902,16 +2899,15 @@ static int postcopy_each_ram_send_discard(MigrationState *ms) int ret; RAMBLOCK_FOREACH_NOT_IGNORED(block) { - PostcopyDiscardState *pds = - postcopy_discard_send_init(ms, block->idstr); + postcopy_discard_send_init(ms, block->idstr); /* * Postcopy sends chunks of bitmap over the wire, but it * just needs indexes at this point, avoids it having * target page specific code. */ - ret = postcopy_send_discard_bm_ram(ms, pds, block); - postcopy_discard_send_finish(ms, pds); + ret = postcopy_send_discard_bm_ram(ms, block); + postcopy_discard_send_finish(ms); if (ret) { return ret; } @@ -2934,11 +2930,9 @@ static int postcopy_each_ram_send_discard(MigrationState *ms) * @unsent_pass: if true we need to canonicalize partially unsent host pages * otherwise we need to canonicalize partially dirty host pages * @block: block that contains the page we want to canonicalize - * @pds: state for postcopy */ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, - RAMBlock *block, - PostcopyDiscardState *pds) + RAMBlock *block) { RAMState *rs = ram_state; unsigned long *bitmap = block->bmap; @@ -3018,8 +3012,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, * (any partially sent pages were already discarded * by the previous unsent_pass) */ - postcopy_discard_send_range(ms, pds, fixup_start_addr, - host_ratio); + postcopy_discard_send_range(ms, fixup_start_addr, host_ratio); } /* Clean up the bitmap */ @@ -3062,18 +3055,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, */ static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) { - PostcopyDiscardState *pds = - postcopy_discard_send_init(ms, block->idstr); + postcopy_discard_send_init(ms, block->idstr); /* First pass: Discard all partially sent host pages */ - postcopy_chunk_hostpages_pass(ms, true, block, pds); + postcopy_chunk_hostpages_pass(ms, true, block); /* * Second pass: Ensure that all partially dirty host pages are made * fully dirty. */ - postcopy_chunk_hostpages_pass(ms, false, block, pds); + postcopy_chunk_hostpages_pass(ms, false, block); - postcopy_discard_send_finish(ms, pds); + postcopy_discard_send_finish(ms); return 0; } From dad45ab2be9d5ce033ddba328204b06f2d10f0ed Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 6 Aug 2019 08:46:47 +0800 Subject: [PATCH 20/33] migration/postcopy: simplify calculation of run_start and fixup_start_addr The purpose of the calculation is to find a HostPage which is partially dirty. * fixup_start_addr points to the start of the HostPage to discard * run_start points to the next HostPage to check While in the middle stage, there would two cases for run_start: * aligned with HostPage means this is not partially dirty * not aligned means this is partially dirty When it is aligned, no work and calculation is necessary. run_start already points to the start of next HostPage and is ready to continue. When it is not aligned, the calculation could be simplified with: * fixup_start_addr = QEMU_ALIGN_DOWN(run_start, host_ratio) * run_start = QEMU_ALIGN_UP(run_start, host_ratio) By doing so, run_start always points to the next HostPage to check. fixup_start_addr always points to the HostPage to discard. Signed-off-by: Wei Yang Message-Id: <20190806004648.8659-2-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index f428639af5..d2184c3cfc 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2955,7 +2955,6 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, } while (run_start < pages) { - unsigned long fixup_start_addr; unsigned long host_offset; /* @@ -2963,45 +2962,26 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, * page, then we need to fixup this host page. */ host_offset = run_start % host_ratio; - if (host_offset) { - fixup_start_addr = run_start - host_offset; - /* - * This host page has gone, the next loop iteration starts - * from after the fixup - */ - run_start = fixup_start_addr + host_ratio; - } else { + if (!host_offset) { /* Find the end of this run */ - unsigned long run_end; if (unsent_pass) { - run_end = find_next_bit(unsentmap, pages, run_start + 1); + run_start = find_next_bit(unsentmap, pages, run_start + 1); } else { - run_end = find_next_zero_bit(bitmap, pages, run_start + 1); + run_start = find_next_zero_bit(bitmap, pages, run_start + 1); } /* * If the end isn't at the start of a host page, then the * run doesn't finish at the end of a host page * and we need to discard. */ - host_offset = run_end % host_ratio; - if (host_offset) { - fixup_start_addr = run_end - host_offset; - /* - * This host page has gone, the next loop iteration starts - * from after the fixup - */ - run_start = fixup_start_addr + host_ratio; - } else { - /* - * No discards on this iteration, next loop starts from - * next sent/dirty page - */ - run_start = run_end + 1; - } + host_offset = run_start % host_ratio; } if (host_offset) { unsigned long page; + unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start, + host_ratio); + run_start = QEMU_ALIGN_UP(run_start, host_ratio); /* Tell the destination to discard this page */ if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { From 9dec3cc3f4545b8035ac7bf0854b81669b18c516 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 6 Aug 2019 08:46:48 +0800 Subject: [PATCH 21/33] migration/postcopy: use QEMU_IS_ALIGNED to replace host_offset Use QEMU_IS_ALIGNED for the check, it would be more consistent with other align calculations. Signed-off-by: Wei Yang Message-Id: <20190806004648.8659-3-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index d2184c3cfc..eee68a7991 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2955,14 +2955,12 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, } while (run_start < pages) { - unsigned long host_offset; /* * If the start of this run of pages is in the middle of a host * page, then we need to fixup this host page. */ - host_offset = run_start % host_ratio; - if (!host_offset) { + if (QEMU_IS_ALIGNED(run_start, host_ratio)) { /* Find the end of this run */ if (unsent_pass) { run_start = find_next_bit(unsentmap, pages, run_start + 1); @@ -2974,10 +2972,9 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, * run doesn't finish at the end of a host page * and we need to discard. */ - host_offset = run_start % host_ratio; } - if (host_offset) { + if (!QEMU_IS_ALIGNED(run_start, host_ratio)) { unsigned long page; unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start, host_ratio); From 32e70aad7e984f7a609850985895e97fc03579ab Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 6 Aug 2019 08:36:45 +0800 Subject: [PATCH 22/33] hmp: Remove migration capabilities from "info migrate" With the growth of migration capabilities, it is not proper to display them in "info migrate". Users are recommended to use "info migrate_capabiltiies" to list them. Signed-off-by: Wei Yang Suggested-by: Dr. David Alan Gilbert Message-Id: <20190806003645.8426-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- monitor/hmp-cmds.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 5ca3ebe942..35788c0645 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -220,24 +220,11 @@ static char *SocketAddress_to_str(SocketAddress *addr) void hmp_info_migrate(Monitor *mon, const QDict *qdict) { MigrationInfo *info; - MigrationCapabilityStatusList *caps, *cap; info = qmp_query_migrate(NULL); - caps = qmp_query_migrate_capabilities(NULL); migration_global_dump(mon); - /* do not display parameters during setup */ - if (info->has_status && caps) { - monitor_printf(mon, "capabilities: "); - for (cap = caps; cap; cap = cap->next) { - monitor_printf(mon, "%s: %s ", - MigrationCapability_str(cap->value->capability), - cap->value->state ? "on" : "off"); - } - monitor_printf(mon, "\n"); - } - if (info->has_status) { monitor_printf(mon, "Migration status: %s", MigrationStatus_str(info->status)); @@ -370,7 +357,6 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "]\n"); } qapi_free_MigrationInfo(info); - qapi_free_MigrationCapabilityStatusList(caps); } void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict) From 14adf288d3a1617bf9ccf2da346b2b002a07d8f2 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 2 Apr 2019 08:31:06 +0800 Subject: [PATCH 23/33] migration: remove unused field bytes_xfer MigrationState->bytes_xfer is only set to 0 in migrate_init(). Remove this unnecessary field. Signed-off-by: Wei Yang Message-Id: <20190402003106.17614-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 1 - migration/migration.h | 1 - 2 files changed, 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 8331e62831..12b8e5dbe5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1699,7 +1699,6 @@ void migrate_init(MigrationState *s) * parameters/capabilities that the user set, and * locks. */ - s->bytes_xfer = 0; s->cleanup_bh = 0; s->to_dst_file = NULL; s->state = MIGRATION_STATUS_NONE; diff --git a/migration/migration.h b/migration/migration.h index 1fdd7b21fd..5bc60709db 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -132,7 +132,6 @@ struct MigrationState DeviceState parent_obj; /*< public >*/ - size_t bytes_xfer; QemuThread thread; QEMUBH *cleanup_bh; QEMUFile *to_dst_file; From 87f3bd8717cd88932de302e215f1da51bfb8051a Mon Sep 17 00:00:00 2001 From: Ivan Ren Date: Fri, 2 Aug 2019 18:18:41 +0800 Subject: [PATCH 24/33] migration: always initialise ram_counters for a new migration This patch fix a multifd migration bug in migration speed calculation, this problem can be reproduced as follows: 1. start a vm and give a heavy memory write stress to prevent the vm be successfully migrated to destination 2. begin a migration with multifd 3. migrate for a long time [actually, this can be measured by transferred bytes] 4. migrate cancel 5. begin a new migration with multifd, the migration will directly run into migration_completion phase Reason as follows: Migration update bandwidth and s->threshold_size in function migration_update_counters after BUFFER_DELAY time: current_bytes = migration_total_bytes(s); transferred = current_bytes - s->iteration_initial_bytes; time_spent = current_time - s->iteration_start_time; bandwidth = (double)transferred / time_spent; s->threshold_size = bandwidth * s->parameters.downtime_limit; In multifd migration, migration_total_bytes function return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes. s->iteration_initial_bytes will be initialized to 0 at every new migration, but ram_counters is a global variable, and history migration data will be accumulated. So if the ram_counters.multifd_bytes is big enough, it may lead pending_size >= s->threshold_size become false in migration_iteration_run after the first migration_update_counters. Signed-off-by: Ivan Ren Reviewed-by: Juan Quintela Reviewed-by: Wei Yang Suggested-by: Wei Yang Message-Id: <1564741121-1840-1-git-send-email-ivanren@tencent.com> Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 25 +++++++++++++++++++------ migration/savevm.c | 1 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 12b8e5dbe5..c49e9dc035 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1911,6 +1911,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, } migrate_init(s); + /* + * set ram_counters memory to zero for a + * new migration + */ + memset(&ram_counters, 0, sizeof(ram_counters)); return true; } @@ -3034,6 +3039,17 @@ static void migration_calculate_complete(MigrationState *s) } } +static void update_iteration_initial_status(MigrationState *s) +{ + /* + * Update these three fields at the same time to avoid mismatch info lead + * wrong speed calculation. + */ + s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + s->iteration_initial_bytes = migration_total_bytes(s); + s->iteration_initial_pages = ram_get_total_transferred_pages(); +} + static void migration_update_counters(MigrationState *s, int64_t current_time) { @@ -3069,9 +3085,7 @@ static void migration_update_counters(MigrationState *s, qemu_file_reset_rate_limit(s->to_dst_file); - s->iteration_start_time = current_time; - s->iteration_initial_bytes = current_bytes; - s->iteration_initial_pages = ram_get_total_transferred_pages(); + update_iteration_initial_status(s); trace_migrate_transferred(transferred, time_spent, bandwidth, s->threshold_size); @@ -3194,7 +3208,7 @@ static void *migration_thread(void *opaque) rcu_register_thread(); object_ref(OBJECT(s)); - s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + update_iteration_initial_status(s); qemu_savevm_state_header(s->to_dst_file); @@ -3259,8 +3273,7 @@ static void *migration_thread(void *opaque) * the local variables. This is important to avoid * breaking transferred_bytes and bandwidth calculation */ - s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - s->iteration_initial_bytes = 0; + update_iteration_initial_status(s); } current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); diff --git a/migration/savevm.c b/migration/savevm.c index 412768216c..1ac15301ad 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1456,6 +1456,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) } migrate_init(ms); + memset(&ram_counters, 0, sizeof(ram_counters)); ms->to_dst_file = f; qemu_mutex_unlock_iothread(); From 5d7d2558631b4421826c60046c606584c58ab76c Mon Sep 17 00:00:00 2001 From: Ivan Ren Date: Tue, 30 Jul 2019 13:33:34 +0800 Subject: [PATCH 25/33] migration: add qemu_file_update_transfer interface Add qemu_file_update_transfer for just update bytes_xfer for speed limitation. This will be used for further migration feature such as multifd migration. Signed-off-by: Ivan Ren Reviewed-by: Wei Yang Reviewed-by: Juan Quintela Message-Id: <1564464816-21804-2-git-send-email-ivanren@tencent.com> Signed-off-by: Dr. David Alan Gilbert --- migration/qemu-file.c | 5 +++++ migration/qemu-file.h | 1 + 2 files changed, 6 insertions(+) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index c04a7a891b..e33c46764f 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -654,6 +654,11 @@ void qemu_file_reset_rate_limit(QEMUFile *f) f->bytes_xfer = 0; } +void qemu_file_update_transfer(QEMUFile *f, int64_t len) +{ + f->bytes_xfer += len; +} + void qemu_put_be16(QEMUFile *f, unsigned int v) { qemu_put_byte(f, v >> 8); diff --git a/migration/qemu-file.h b/migration/qemu-file.h index eb886db65f..d064940b8c 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -150,6 +150,7 @@ int qemu_peek_byte(QEMUFile *f, int offset); void qemu_file_skip(QEMUFile *f, int size); void qemu_update_position(QEMUFile *f, size_t size); void qemu_file_reset_rate_limit(QEMUFile *f); +void qemu_file_update_transfer(QEMUFile *f, int64_t len); void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error_obj(QEMUFile *f, Error **errp); From 1b81c974ccfd536aceef840e220912b142a7dda0 Mon Sep 17 00:00:00 2001 From: Ivan Ren Date: Tue, 30 Jul 2019 13:33:35 +0800 Subject: [PATCH 26/33] migration: add speed limit for multifd migration Limit the speed of multifd migration through common speed limitation qemu file. Signed-off-by: Ivan Ren Message-Id: <1564464816-21804-3-git-send-email-ivanren@tencent.com> Reviewed-by: Wei Yang Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index eee68a7991..1179519345 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -922,7 +922,7 @@ struct { * false. */ -static int multifd_send_pages(void) +static int multifd_send_pages(RAMState *rs) { int i; static int next_channel; @@ -954,6 +954,7 @@ static int multifd_send_pages(void) multifd_send_state->pages = p->pages; p->pages = pages; transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; + qemu_file_update_transfer(rs->f, transferred); ram_counters.multifd_bytes += transferred; ram_counters.transferred += transferred;; qemu_mutex_unlock(&p->mutex); @@ -962,7 +963,7 @@ static int multifd_send_pages(void) return 1; } -static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) +static int multifd_queue_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) { MultiFDPages_t *pages = multifd_send_state->pages; @@ -981,12 +982,12 @@ static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) } } - if (multifd_send_pages() < 0) { + if (multifd_send_pages(rs) < 0) { return -1; } if (pages->block != block) { - return multifd_queue_page(block, offset); + return multifd_queue_page(rs, block, offset); } return 1; @@ -1054,7 +1055,7 @@ void multifd_save_cleanup(void) multifd_send_state = NULL; } -static void multifd_send_sync_main(void) +static void multifd_send_sync_main(RAMState *rs) { int i; @@ -1062,7 +1063,7 @@ static void multifd_send_sync_main(void) return; } if (multifd_send_state->pages->used) { - if (multifd_send_pages() < 0) { + if (multifd_send_pages(rs) < 0) { error_report("%s: multifd_send_pages fail", __func__); return; } @@ -1083,6 +1084,7 @@ static void multifd_send_sync_main(void) p->packet_num = multifd_send_state->packet_num++; p->flags |= MULTIFD_FLAG_SYNC; p->pending_job++; + qemu_file_update_transfer(rs->f, p->packet_len); qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); } @@ -2078,7 +2080,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) { - if (multifd_queue_page(block, offset) < 0) { + if (multifd_queue_page(rs, block, offset) < 0) { return -1; } ram_counters.normal++; @@ -3447,7 +3449,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); - multifd_send_sync_main(); + multifd_send_sync_main(*rsp); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); @@ -3535,7 +3537,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_ROUND); out: - multifd_send_sync_main(); + multifd_send_sync_main(rs); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); ram_counters.transferred += 8; @@ -3594,7 +3596,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) rcu_read_unlock(); - multifd_send_sync_main(); + multifd_send_sync_main(rs); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); From 81507f6b7e87468f24ed5886559feda15fe2db0c Mon Sep 17 00:00:00 2001 From: Ivan Ren Date: Tue, 30 Jul 2019 13:33:36 +0800 Subject: [PATCH 27/33] migration: update ram_counters for multifd sync packet Multifd sync will send MULTIFD_FLAG_SYNC flag info to destination, add these bytes to ram_counters record. Signed-off-by: Ivan Ren Suggested-by: Wei Yang Message-Id: <1564464816-21804-4-git-send-email-ivanren@tencent.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index 1179519345..30f13ffbdd 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1085,6 +1085,8 @@ static void multifd_send_sync_main(RAMState *rs) p->flags |= MULTIFD_FLAG_SYNC; p->pending_job++; qemu_file_update_transfer(rs->f, p->packet_len); + ram_counters.multifd_bytes += p->packet_len; + ram_counters.transferred += p->packet_len; qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); } From 7a3e957177c18917cf713d7e366fa6aca351da1f Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 8 Aug 2019 11:31:55 +0800 Subject: [PATCH 28/33] migration: rename migration_bitmap_sync_range to ramblock_sync_dirty_bitmap Rename for better understanding of the code. Suggested-by: Paolo Bonzini Signed-off-by: Wei Yang Message-Id: <20190808033155.30162-1-richardw.yang@linux.intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 30f13ffbdd..9e6cc1e685 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1752,7 +1752,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, } /* Called with RCU critical section */ -static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb) +static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb) { rs->migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length, @@ -1844,7 +1844,7 @@ static void migration_bitmap_sync(RAMState *rs) qemu_mutex_lock(&rs->bitmap_mutex); rcu_read_lock(); RAMBLOCK_FOREACH_NOT_IGNORED(block) { - migration_bitmap_sync_range(rs, block); + ramblock_sync_dirty_bitmap(rs, block); } ram_counters.remaining = ram_bytes_remaining(); rcu_read_unlock(); @@ -4265,7 +4265,7 @@ static void colo_flush_ram_cache(void) memory_global_dirty_log_sync(); rcu_read_lock(); RAMBLOCK_FOREACH_NOT_IGNORED(block) { - migration_bitmap_sync_range(ram_state, block); + ramblock_sync_dirty_bitmap(ram_state, block); } rcu_read_unlock(); From 1ce542620ada7e905a253c99b9acac279f245bae Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 5 Aug 2019 13:31:46 +0800 Subject: [PATCH 29/33] migration/postcopy: use mis->bh instead of allocating a QEMUBH For migration incoming side, it either quit in precopy or postcopy. It is safe to use the mis->bh for both instead of allocating a dedicated QEMUBH for postcopy. Signed-off-by: Wei Yang Reviewed-by: Dr. David Alan Gilbert Message-Id: <20190805053146.32326-1-richardw.yang@linux.intel.com> Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 1ac15301ad..6369a4ff7a 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1866,16 +1866,10 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) return 0; } - -typedef struct { - QEMUBH *bh; -} HandleRunBhData; - static void loadvm_postcopy_handle_run_bh(void *opaque) { Error *local_err = NULL; - HandleRunBhData *data = opaque; - MigrationIncomingState *mis = migration_incoming_get_current(); + MigrationIncomingState *mis = opaque; /* TODO we should move all of this lot into postcopy_ram.c or a shared code * in migration.c @@ -1907,15 +1901,13 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) runstate_set(RUN_STATE_PAUSED); } - qemu_bh_delete(data->bh); - g_free(data); + qemu_bh_delete(mis->bh); } /* After all discards we can start running and asking for pages */ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); - HandleRunBhData *data; trace_loadvm_postcopy_handle_run(); if (ps != POSTCOPY_INCOMING_LISTENING) { @@ -1923,9 +1915,8 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) return -1; } - data = g_new(HandleRunBhData, 1); - data->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, data); - qemu_bh_schedule(data->bh); + mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis); + qemu_bh_schedule(mis->bh); /* We need to finish reading the stream from the package * and also stop reading anything more from the stream that loaded the From 3170a6453bc8bf03adcaebbf52f9fe8328d0687b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 8 Aug 2019 19:03:24 +0400 Subject: [PATCH 30/33] qemu-file: move qemu_{get,put}_counted_string() declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move migration helpers for strings under include/, so they can be used outside of migration/ Signed-off-by: Marc-André Lureau Reviewed-by: Juan Quintela Message-Id: <20190808150325.21939-2-marcandre.lureau@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- include/migration/qemu-file-types.h | 4 ++++ migration/qemu-file.h | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h index c0a1988155..2867e3da84 100644 --- a/include/migration/qemu-file-types.h +++ b/include/migration/qemu-file-types.h @@ -161,6 +161,10 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) qemu_get_be64s(f, (uint64_t *)pv); } +size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); + +void qemu_put_counted_string(QEMUFile *f, const char *name); + int qemu_file_rate_limit(QEMUFile *f); #endif diff --git a/migration/qemu-file.h b/migration/qemu-file.h index d064940b8c..b6303dbeef 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -161,8 +161,6 @@ QEMUFile *qemu_file_get_return_path(QEMUFile *f); void qemu_fflush(QEMUFile *f); void qemu_file_set_blocking(QEMUFile *f, bool block); -size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); - void ram_control_before_iterate(QEMUFile *f, uint64_t flags); void ram_control_after_iterate(QEMUFile *f, uint64_t flags); void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); @@ -181,6 +179,4 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, uint64_t *bytes_sent); -void qemu_put_counted_string(QEMUFile *f, const char *name); - #endif From 5558c91ae8093d7808f9a6cf3b311fc80a885f5e Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 14 Aug 2019 04:02:13 +0200 Subject: [PATCH 31/33] migration: Add traces for multifd terminate threads Signed-off-by: Juan Quintela Message-Id: <20190814020218.1868-2-quintela@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 4 ++++ migration/trace-events | 2 ++ 2 files changed, 6 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index 9e6cc1e685..b542929a7c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -997,6 +997,8 @@ static void multifd_send_terminate_threads(Error *err) { int i; + trace_multifd_send_terminate_threads(err != NULL); + if (err) { MigrationState *s = migrate_get_current(); migrate_set_error(s, err); @@ -1258,6 +1260,8 @@ static void multifd_recv_terminate_threads(Error *err) { int i; + trace_multifd_recv_terminate_threads(err != NULL); + if (err) { MigrationState *s = migrate_get_current(); migrate_set_error(s, err); diff --git a/migration/trace-events b/migration/trace-events index d8e54c367a..886ce70ca0 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -85,12 +85,14 @@ multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uin multifd_recv_sync_main(long packet_num) "packet num %ld" multifd_recv_sync_main_signal(uint8_t id) "channel %d" multifd_recv_sync_main_wait(uint8_t id) "channel %d" +multifd_recv_terminate_threads(bool error) "error %d" multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 multifd_recv_thread_start(uint8_t id) "%d" multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" multifd_send_sync_main(long packet_num) "packet num %ld" multifd_send_sync_main_signal(uint8_t id) "channel %d" multifd_send_sync_main_wait(uint8_t id) "channel %d" +multifd_send_terminate_threads(bool error) "error %d" multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 multifd_send_thread_start(uint8_t id) "%d" ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" From 18cdcea3713591e08a5736105930f48ba643fc7e Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 14 Aug 2019 04:02:14 +0200 Subject: [PATCH 32/33] migration: Make global sem_sync semaphore by channel This makes easy to debug things because when you want for all threads to arrive at that semaphore, you know which one your are waiting for. Signed-off-by: Juan Quintela Message-Id: <20190814020218.1868-3-quintela@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index b542929a7c..c7aa3d9a2c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -661,6 +661,8 @@ typedef struct { uint64_t num_packets; /* pages sent through this channel */ uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; } MultiFDSendParams; typedef struct { @@ -896,8 +898,6 @@ struct { MultiFDSendParams *params; /* array of pages to sent */ MultiFDPages_t *pages; - /* syncs main thread and channels */ - QemuSemaphore sem_sync; /* global number of generated multifd packets */ uint64_t packet_num; /* send channels ready */ @@ -1039,6 +1039,7 @@ void multifd_save_cleanup(void) p->c = NULL; qemu_mutex_destroy(&p->mutex); qemu_sem_destroy(&p->sem); + qemu_sem_destroy(&p->sem_sync); g_free(p->name); p->name = NULL; multifd_pages_clear(p->pages); @@ -1048,7 +1049,6 @@ void multifd_save_cleanup(void) p->packet = NULL; } qemu_sem_destroy(&multifd_send_state->channels_ready); - qemu_sem_destroy(&multifd_send_state->sem_sync); g_free(multifd_send_state->params); multifd_send_state->params = NULL; multifd_pages_clear(multifd_send_state->pages); @@ -1096,7 +1096,7 @@ static void multifd_send_sync_main(RAMState *rs) MultiFDSendParams *p = &multifd_send_state->params[i]; trace_multifd_send_sync_main_wait(p->id); - qemu_sem_wait(&multifd_send_state->sem_sync); + qemu_sem_wait(&p->sem_sync); } trace_multifd_send_sync_main(multifd_send_state->packet_num); } @@ -1156,7 +1156,7 @@ static void *multifd_send_thread(void *opaque) qemu_mutex_unlock(&p->mutex); if (flags & MULTIFD_FLAG_SYNC) { - qemu_sem_post(&multifd_send_state->sem_sync); + qemu_sem_post(&p->sem_sync); } qemu_sem_post(&multifd_send_state->channels_ready); } else if (p->quit) { @@ -1179,7 +1179,7 @@ out: */ if (ret != 0) { if (flags & MULTIFD_FLAG_SYNC) { - qemu_sem_post(&multifd_send_state->sem_sync); + qemu_sem_post(&p->sem_sync); } qemu_sem_post(&multifd_send_state->channels_ready); } @@ -1225,7 +1225,6 @@ int multifd_save_setup(void) multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); multifd_send_state->pages = multifd_pages_init(page_count); - qemu_sem_init(&multifd_send_state->sem_sync, 0); qemu_sem_init(&multifd_send_state->channels_ready, 0); for (i = 0; i < thread_count; i++) { @@ -1233,6 +1232,7 @@ int multifd_save_setup(void) qemu_mutex_init(&p->mutex); qemu_sem_init(&p->sem, 0); + qemu_sem_init(&p->sem_sync, 0); p->quit = false; p->pending_job = 0; p->id = i; From 7dd59d01ddcc4a4ba0c44c2cc9e3b35c79aa7a29 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 14 Aug 2019 04:02:17 +0200 Subject: [PATCH 33/33] migration: add some multifd traces Signed-off-by: Juan Quintela Message-Id: <20190814020218.1868-6-quintela@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 3 +++ migration/trace-events | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index c7aa3d9a2c..35552c090b 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1170,6 +1170,7 @@ static void *multifd_send_thread(void *opaque) out: if (local_err) { + trace_multifd_send_error(p->id); multifd_send_terminate_threads(local_err); } @@ -1200,6 +1201,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task)); Error *local_err = NULL; + trace_multifd_new_send_channel_async(p->id); if (qio_task_propagate_error(task, &local_err)) { migrate_set_error(migrate_get_current(), local_err); multifd_save_cleanup(); @@ -1486,6 +1488,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) atomic_read(&multifd_recv_state->count)); return false; } + trace_multifd_recv_new_channel(id); p = &multifd_recv_state->params[id]; if (p->c != NULL) { diff --git a/migration/trace-events b/migration/trace-events index 886ce70ca0..00ffcd5930 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -81,14 +81,18 @@ migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" migration_throttle(void) "" +multifd_new_send_channel_async(uint8_t id) "channel %d" multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" +multifd_recv_new_channel(uint8_t id) "channel %d" multifd_recv_sync_main(long packet_num) "packet num %ld" multifd_recv_sync_main_signal(uint8_t id) "channel %d" multifd_recv_sync_main_wait(uint8_t id) "channel %d" multifd_recv_terminate_threads(bool error) "error %d" multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 multifd_recv_thread_start(uint8_t id) "%d" +multifd_save_setup_wait(uint8_t id) "%d" multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" +multifd_send_error(uint8_t id) "channel %d" multifd_send_sync_main(long packet_num) "packet num %ld" multifd_send_sync_main_signal(uint8_t id) "channel %d" multifd_send_sync_main_wait(uint8_t id) "channel %d"