From e8f5fe2de125a0bfbefbaa6a69af81f4817cb7a0 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 9 Mar 2017 15:27:08 +0000
Subject: [PATCH 01/18] memory_region: Fix name comments

The 'name' parameter to memory_region_init_* had been marked as debug
only, however vmstate_region_ram uses it as a parameter to
qemu_ram_set_idstr to set RAMBlock names and these form part of the
migration stream.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20170309152708.30635-1-dgilbert@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/memory.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 691102317c..e39256ad03 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -371,7 +371,8 @@ void memory_region_init_io(MemoryRegion *mr,
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
- * @name: the name of the region.
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
  */
@@ -390,7 +391,8 @@ void memory_region_init_ram(MemoryRegion *mr,
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
- * @name: the name of the region.
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
  * @size: used size of the region.
  * @max_size: max size of the region.
  * @resized: callback to notify owner about used size change.
@@ -412,7 +414,8 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
- * @name: the name of the region.
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
  * @size: size of the region.
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @path: the path in which to allocate the RAM.
@@ -434,7 +437,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
- * @name: the name of the region.
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
  * @size: size of the region.
  * @ptr: memory to be mapped; must contain at least @size bytes.
  */
@@ -496,7 +500,8 @@ void memory_region_init_alias(MemoryRegion *mr,
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
- * @name: the name of the region.
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
  */
@@ -513,7 +518,8 @@ void memory_region_init_rom(MemoryRegion *mr,
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
  * @ops: callbacks for write access handling (must not be NULL).
- * @name: the name of the region.
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
  */

From c0d9f7d0bcedeaa65d5c984fbe0d351e1402eab5 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 28 Feb 2017 18:40:01 +0100
Subject: [PATCH 02/18] docs: Add a note about mixing bootindex with "-boot
 order"

Occasionally the users try to mix the bootindex properties with the
"-boot order" parameter - and this likely does not give the expected
results. So let's add a proper statement that these two concepts
should not be used together.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1488303601-23741-1-git-send-email-thuth@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 docs/bootindex.txt | 9 +++++++++
 qemu-options.hx    | 5 ++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/bootindex.txt b/docs/bootindex.txt
index f84fac7200..b9a8ba122f 100644
--- a/docs/bootindex.txt
+++ b/docs/bootindex.txt
@@ -41,3 +41,12 @@ has three bootable devices target1, target3, target5 connected to it,
 the option ROM will have a boot method for each of them, but it is not
 possible to map from boot method back to a specific target.  This is a
 shortcoming of the PC BIOS boot specification.
+
+== Mixing bootindex and boot order parameters ==
+
+Note that it does not make sense to use the bootindex property together
+with the "-boot order=..." (or "-boot once=...") parameter. The guest
+firmware implementations normally either support the one or the other,
+but not both parameters at the same time. Mixing them will result in
+undefined behavior, and thus the guest firmware will likely not boot
+from the expected devices.
diff --git a/qemu-options.hx b/qemu-options.hx
index 8dd8ee34a6..99af8edf5f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -252,7 +252,10 @@ drive letters depend on the target architecture. The x86 PC uses: a, b
 (floppy 1 and 2), c (first hard disk), d (first CD-ROM), n-p (Etherboot
 from network adapter 1-4), hard disk boot is the default. To apply a
 particular boot order only on the first startup, specify it via
-@option{once}.
+@option{once}. Note that the @option{order} or @option{once} parameter
+should not be used together with the @option{bootindex} property of
+devices, since the firmware implementations normally do not support both
+at the same time.
 
 Interactive boot menus/prompts can be enabled via @option{menu=on} as far
 as firmware/BIOS supports them. The default is non-interactive boot.

From 1e356fc14beaa3ece6c0e961bd479af58be3198b Mon Sep 17 00:00:00 2001
From: Jitendra Kolhe <jitendra.kolhe@hpe.com>
Date: Fri, 24 Feb 2017 09:01:43 +0530
Subject: [PATCH 03/18] mem-prealloc: reduce large guest start-up and migration
 time.

Using "-mem-prealloc" option for a large guest leads to higher guest
start-up and migration time. This is because with "-mem-prealloc" option
qemu tries to map every guest page (create address translations), and
make sure the pages are available during runtime. virsh/libvirt by
default, seems to use "-mem-prealloc" option in case the guest is
configured to use huge pages. The patch tries to map all guest pages
simultaneously by spawning multiple threads. Currently limiting the
change to QEMU library functions on POSIX compliant host only, as we are
not sure if the problem exists on win32. Below are some stats with
"-mem-prealloc" option for guest configured to use huge pages.

------------------------------------------------------------------------
Idle Guest      | Start-up time | Migration time
------------------------------------------------------------------------
Guest stats with 2M HugePage usage - single threaded (existing code)
------------------------------------------------------------------------
64 Core - 4TB   | 54m11.796s    | 75m43.843s
64 Core - 1TB   | 8m56.576s     | 14m29.049s
64 Core - 256GB | 2m11.245s     | 3m26.598s
------------------------------------------------------------------------
Guest stats with 2M HugePage usage - map guest pages using 8 threads
------------------------------------------------------------------------
64 Core - 4TB   | 5m1.027s      | 34m10.565s
64 Core - 1TB   | 1m10.366s     | 8m28.188s
64 Core - 256GB | 0m19.040s     | 2m10.148s
-----------------------------------------------------------------------
Guest stats with 2M HugePage usage - map guest pages using 16 threads
-----------------------------------------------------------------------
64 Core - 4TB   | 1m58.970s     | 31m43.400s
64 Core - 1TB   | 0m39.885s     | 7m55.289s
64 Core - 256GB | 0m11.960s     | 2m0.135s
-----------------------------------------------------------------------

Changed in v2:
 - modify number of memset threads spawned to min(smp_cpus, 16).
 - removed 64GB memory restriction for spawning memset threads.

Changed in v3:
 - limit number of threads spawned based on
   min(sysconf(_SC_NPROCESSORS_ONLN), 16, smp_cpus)
 - implement memset thread specific siglongjmp in SIGBUS signal_handler.

Changed in v4
 - remove sigsetjmp/siglongjmp and SIGBUS unblock/block for main thread
   as main thread no longer touches any pages.
 - simplify code my returning memset_thread_failed status from
   touch_all_pages.

Signed-off-by: Jitendra Kolhe <jitendra.kolhe@hpe.com>
Message-Id: <1487907103-32350-1-git-send-email-jitendra.kolhe@hpe.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 backends/hostmem.c   |   4 +-
 exec.c               |   2 +-
 include/qemu/osdep.h |   3 +-
 util/oslib-posix.c   | 108 ++++++++++++++++++++++++++++++++++---------
 util/oslib-win32.c   |   3 +-
 5 files changed, 94 insertions(+), 26 deletions(-)

diff --git a/backends/hostmem.c b/backends/hostmem.c
index 7f5de70609..162c2187d8 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -224,7 +224,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
         void *ptr = memory_region_get_ram_ptr(&backend->mr);
         uint64_t sz = memory_region_size(&backend->mr);
 
-        os_mem_prealloc(fd, ptr, sz, &local_err);
+        os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             return;
@@ -328,7 +328,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
          */
         if (backend->prealloc) {
             os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
-                            &local_err);
+                            smp_cpus, &local_err);
             if (local_err) {
                 goto out;
             }
diff --git a/exec.c b/exec.c
index aabb035e92..68135a9391 100644
--- a/exec.c
+++ b/exec.c
@@ -1467,7 +1467,7 @@ static void *file_ram_alloc(RAMBlock *block,
     }
 
     if (mem_prealloc) {
-        os_mem_prealloc(fd, area, memory, errp);
+        os_mem_prealloc(fd, area, memory, smp_cpus, errp);
         if (errp && *errp) {
             goto error;
         }
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index af37195fef..122ff06ff6 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -438,7 +438,8 @@ unsigned long qemu_getauxval(unsigned long type);
 
 void qemu_set_tty_echo(int fd, bool echo);
 
-void os_mem_prealloc(int fd, char *area, size_t sz, Error **errp);
+void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus,
+                     Error **errp);
 
 int qemu_read_password(char *buf, int buf_size);
 
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index cd686aae3d..956f66ab4a 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -55,6 +55,21 @@
 #include "qemu/error-report.h"
 #endif
 
+#define MAX_MEM_PREALLOC_THREAD_COUNT (MIN(sysconf(_SC_NPROCESSORS_ONLN), 16))
+
+struct MemsetThread {
+    char *addr;
+    uint64_t numpages;
+    uint64_t hpagesize;
+    QemuThread pgthread;
+    sigjmp_buf env;
+};
+typedef struct MemsetThread MemsetThread;
+
+static MemsetThread *memset_thread;
+static int memset_num_threads;
+static bool memset_thread_failed;
+
 int qemu_get_thread_id(void)
 {
 #if defined(__linux__)
@@ -316,18 +331,83 @@ char *qemu_get_exec_dir(void)
     return g_strdup(exec_dir);
 }
 
-static sigjmp_buf sigjump;
-
 static void sigbus_handler(int signal)
 {
-    siglongjmp(sigjump, 1);
+    int i;
+    if (memset_thread) {
+        for (i = 0; i < memset_num_threads; i++) {
+            if (qemu_thread_is_self(&memset_thread[i].pgthread)) {
+                siglongjmp(memset_thread[i].env, 1);
+            }
+        }
+    }
 }
 
-void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp)
+static void *do_touch_pages(void *arg)
+{
+    MemsetThread *memset_args = (MemsetThread *)arg;
+    char *addr = memset_args->addr;
+    uint64_t numpages = memset_args->numpages;
+    uint64_t hpagesize = memset_args->hpagesize;
+    sigset_t set, oldset;
+    int i = 0;
+
+    /* unblock SIGBUS */
+    sigemptyset(&set);
+    sigaddset(&set, SIGBUS);
+    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
+
+    if (sigsetjmp(memset_args->env, 1)) {
+        memset_thread_failed = true;
+    } else {
+        for (i = 0; i < numpages; i++) {
+            memset(addr, 0, 1);
+            addr += hpagesize;
+        }
+    }
+    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+    return NULL;
+}
+
+static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
+                            int smp_cpus)
+{
+    uint64_t numpages_per_thread, size_per_thread;
+    char *addr = area;
+    int i = 0;
+
+    memset_thread_failed = false;
+    memset_num_threads = MIN(smp_cpus, MAX_MEM_PREALLOC_THREAD_COUNT);
+    memset_thread = g_new0(MemsetThread, memset_num_threads);
+    numpages_per_thread = (numpages / memset_num_threads);
+    size_per_thread = (hpagesize * numpages_per_thread);
+    for (i = 0; i < memset_num_threads; i++) {
+        memset_thread[i].addr = addr;
+        memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
+                                    numpages : numpages_per_thread;
+        memset_thread[i].hpagesize = hpagesize;
+        qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
+                           do_touch_pages, &memset_thread[i],
+                           QEMU_THREAD_JOINABLE);
+        addr += size_per_thread;
+        numpages -= numpages_per_thread;
+    }
+    for (i = 0; i < memset_num_threads; i++) {
+        qemu_thread_join(&memset_thread[i].pgthread);
+    }
+    g_free(memset_thread);
+    memset_thread = NULL;
+
+    return memset_thread_failed;
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
+                     Error **errp)
 {
     int ret;
     struct sigaction act, oldact;
-    sigset_t set, oldset;
+    size_t hpagesize = qemu_fd_getpagesize(fd);
+    size_t numpages = DIV_ROUND_UP(memory, hpagesize);
 
     memset(&act, 0, sizeof(act));
     act.sa_handler = &sigbus_handler;
@@ -340,23 +420,10 @@ void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp)
         return;
     }
 
-    /* unblock SIGBUS */
-    sigemptyset(&set);
-    sigaddset(&set, SIGBUS);
-    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
-
-    if (sigsetjmp(sigjump, 1)) {
+    /* touch pages simultaneously */
+    if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
         error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
             "pages available to allocate guest RAM\n");
-    } else {
-        int i;
-        size_t hpagesize = qemu_fd_getpagesize(fd);
-        size_t numpages = DIV_ROUND_UP(memory, hpagesize);
-
-        /* MAP_POPULATE silently ignores failures */
-        for (i = 0; i < numpages; i++) {
-            memset(area + (hpagesize * i), 0, 1);
-        }
     }
 
     ret = sigaction(SIGBUS, &oldact, NULL);
@@ -365,7 +432,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp)
         perror("os_mem_prealloc: failed to reinstall signal handler");
         exit(1);
     }
-    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 }
 
 
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 0b1890fd33..80e4668935 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -541,7 +541,8 @@ int getpagesize(void)
     return system_info.dwPageSize;
 }
 
-void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp)
+void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
+                     Error **errp)
 {
     int i;
     size_t pagesize = getpagesize();

From 79ca7a1b898eb97c4192f3c78027a0f20485e7b4 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 7 Mar 2017 15:19:08 +0100
Subject: [PATCH 04/18] exec: add cpu_synchronize_state to cpu_memory_rw_debug

I sometimes got "Cannot access memory" when using the x command
on the monitor. Turns out that the cpu env did contain stale data
(e.g. wrong control register content for page table origin).
We must synchronize the state of the CPU before walking the page
tables. A similar issues happens for a remote gdb, so lets
do the cpu_synchronize_state in cpu_memory_rw_debug.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <1488896348-13560-1-git-send-email-borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/exec.c b/exec.c
index 68135a9391..a22f5a0385 100644
--- a/exec.c
+++ b/exec.c
@@ -43,6 +43,7 @@
 #include "exec/ioport.h"
 #include "sysemu/dma.h"
 #include "sysemu/numa.h"
+#include "sysemu/hw_accel.h"
 #include "exec/address-spaces.h"
 #include "sysemu/xen-mapcache.h"
 #include "trace-root.h"
@@ -3309,6 +3310,7 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
     hwaddr phys_addr;
     target_ulong page;
 
+    cpu_synchronize_state(cpu);
     while (len > 0) {
         int asidx;
         MemTxAttrs attrs;

From c70b11d160c6bca8e994d40639fcb41558c9fa0a Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Thu, 9 Mar 2017 16:46:34 -0300
Subject: [PATCH 05/18] kvm: Print MSR information if KVM_{GET,SET}_MSRS failed

When a KVM_{GET,SET}_MSRS ioctl() fails, it is difficult to find
out which MSR caused the problem. Print an error message for
debugging, before we trigger the (ret == cpu->kvm_msr_buf->nmsrs)
assert.

Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170309194634.28457-1-ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/kvm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 472399fb2c..55865dbee0 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -1824,6 +1824,12 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
         return ret;
     }
 
+    if (ret < cpu->kvm_msr_buf->nmsrs) {
+        struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
+        error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64,
+                     (uint32_t)e->index, (uint64_t)e->data);
+    }
+
     assert(ret == cpu->kvm_msr_buf->nmsrs);
     return 0;
 }
@@ -2189,6 +2195,12 @@ static int kvm_get_msrs(X86CPU *cpu)
         return ret;
     }
 
+    if (ret < cpu->kvm_msr_buf->nmsrs) {
+        struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
+        error_report("error: failed to get MSR 0x%" PRIx32,
+                     (uint32_t)e->index);
+    }
+
     assert(ret == cpu->kvm_msr_buf->nmsrs);
     /*
      * MTRR masks: Each mask consists of 5 parts

From ca2edcd35cd1a8589dfa0533c19ff232fec7b4b5 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Thu, 9 Mar 2017 15:50:46 -0300
Subject: [PATCH 06/18] kvmclock: Don't crash QEMU if KVM is disabled

Most machines don't allow sysbus devices like "kvmclock" to be
created from the command-line, but some of them do (the ones with
has_dynamic_sysbus=true). In those cases, it's possible to
manually create a kvmclock device without KVM being enabled,
making QEMU crash:

  $ qemu-system-x86_64 -machine q35,accel=tcg -device kvmclock
  Segmentation fault (core dumped)

This changes kvmclock's realize method to return an error if KVM
is disabled, to ensure it won't crash QEMU.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Message-Id: <20170309185046.17555-1-ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/kvm/clock.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index ef9d560f9c..13eca374cd 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -22,6 +22,7 @@
 #include "kvm_i386.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
+#include "qapi/error.h"
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
@@ -208,6 +209,11 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
 {
     KVMClockState *s = KVM_CLOCK(dev);
 
+    if (!kvm_enabled()) {
+        error_setg(errp, "kvmclock device requires KVM");
+        return;
+    }
+
     kvm_update_clock(s);
 
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);

From 9b4b157ef6edc5cf060aef3402bdece7f581b5a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
Date: Fri, 10 Mar 2017 15:28:19 +0400
Subject: [PATCH 07/18] scripts/dump-guest-memory.py: fix int128_get64 on
 recent gcc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Int128 is no longer a struct, reaching a python exception:
Python Exception <class 'gdb.error'> Attempt to extract a component of a value that is not a (null).:

Replace struct access with a cast to uint64[] instead.

Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=1427466

Signed-off-by: Marc-AndrÃ© Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170310112819.16760-1-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 scripts/dump-guest-memory.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
index 9956fc036c..f7c6635f15 100644
--- a/scripts/dump-guest-memory.py
+++ b/scripts/dump-guest-memory.py
@@ -314,8 +314,18 @@ def get_arch_phdr(endianness, elfclass):
 def int128_get64(val):
     """Returns low 64bit part of Int128 struct."""
 
-    assert val["hi"] == 0
-    return val["lo"]
+    try:
+        assert val["hi"] == 0
+        return val["lo"]
+    except gdb.error:
+        u64t = gdb.lookup_type('uint64_t').array(2)
+        u64 = val.cast(u64t)
+        if sys.byteorder == 'little':
+            assert u64[1] == 0
+            return u64[0]
+        else:
+            assert u64[0] == 0
+            return u64[1]
 
 
 def qlist_foreach(head, field_str):

From c12d66aac1aaacbc8495dd11712b06cc64259d92 Mon Sep 17 00:00:00 2001
From: Lin Ma <lma@suse.com>
Date: Fri, 10 Mar 2017 18:14:05 +0800
Subject: [PATCH 08/18] configure: add the missing help output for optional
 features

Signed-off-by: Lin Ma <lma@suse.com>
Message-Id: <20170310101405.26974-1-lma@suse.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 configure | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/configure b/configure
index 75c7c3526c..99d8bece70 100755
--- a/configure
+++ b/configure
@@ -1330,6 +1330,12 @@ Advanced options (experts only):
   --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
   --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)
   --tls-priority           default TLS protocol/cipher priority string
+  --enable-gprof           QEMU profiling with gprof
+  --enable-profiler        profiler support
+  --enable-xen-pv-domain-build
+                           xen pv domain builder
+  --enable-debug-stack-usage
+                           track the maximum stack usage of stacks created by qemu_alloc_stack
 
 Optional features, enabled with --enable-FEATURE and
 disabled with --disable-FEATURE, default is enabled if available:
@@ -1397,6 +1403,12 @@ disabled with --disable-FEATURE, default is enabled if available:
   tcmalloc        tcmalloc support
   jemalloc        jemalloc support
   replication     replication support
+  vhost-vsock     virtio sockets device support
+  opengl          opengl support
+  virglrenderer   virgl rendering support
+  xfsctl          xfsctl support
+  qom-cast-debug  cast debugging support
+  tools           build qemu-io, qemu-nbd and qemu-image tools
 
 NOTE: The object files are built at the place where configure is launched
 EOF

From bd5d983fa87e5a0230a2bc6a54972f53e39ad978 Mon Sep 17 00:00:00 2001
From: Suramya Shah <shah.suramya@gmail.com>
Date: Fri, 10 Mar 2017 22:09:48 +0530
Subject: [PATCH 09/18] util: Removed unneeded header from path.c

Signed-off-by: Suramya Shah <shah.suramya@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170310163948.7567-1-shah.suramya@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/path.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/util/path.c b/util/path.c
index 5479f76c6d..7f9fc272fb 100644
--- a/util/path.c
+++ b/util/path.c
@@ -6,7 +6,6 @@
 #include "qemu/osdep.h"
 #include <sys/param.h>
 #include <dirent.h>
-#include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "qemu/path.h"
 

From b01a2d07c963e96dbd151f0db1eaa06f273acf34 Mon Sep 17 00:00:00 2001
From: Li Qiang <liq3ea@gmail.com>
Date: Tue, 14 Mar 2017 03:56:20 -0700
Subject: [PATCH 10/18] scsi: mptsas: fix the wrong reading size in fetch
 request

When fetching request, it should read sizeof(*hdr), not the
pointer hdr.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-Id: <1489488980-130668-1-git-send-email-liqiang6-s@360.cn>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/mptsas.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index 2e091c0156..765ab53c34 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -756,7 +756,7 @@ static void mptsas_fetch_request(MPTSASState *s)
 
     /* Read the message header from the guest first. */
     addr = s->host_mfa_high_addr | MPTSAS_FIFO_GET(s, request_post);
-    pci_dma_read(pci, addr, req, sizeof(hdr));
+    pci_dma_read(pci, addr, req, sizeof(*hdr));
 
     if (hdr->Function < ARRAY_SIZE(mpi_request_sizes) &&
         mpi_request_sizes[hdr->Function]) {
@@ -766,8 +766,8 @@ static void mptsas_fetch_request(MPTSASState *s)
          */
         size = mpi_request_sizes[hdr->Function];
         assert(size <= MPTSAS_MAX_REQUEST_SIZE);
-        pci_dma_read(pci, addr + sizeof(hdr), &req[sizeof(hdr)],
-                     size - sizeof(hdr));
+        pci_dma_read(pci, addr + sizeof(*hdr), &req[sizeof(*hdr)],
+                     size - sizeof(*hdr));
     }
 
     if (hdr->Function == MPI_FUNCTION_SCSI_IO_REQUEST) {

From c0d24e7f70816c8af51ebe9dc74aa276a81858dd Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 1 Mar 2017 11:28:04 +0100
Subject: [PATCH 11/18] target/nios2: take BQL around interrupt check

The interrupt controller does not have its own locking.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/nios2/op_helper.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
index 538853cda7..efb1c489c9 100644
--- a/target/nios2/op_helper.c
+++ b/target/nios2/op_helper.c
@@ -21,6 +21,7 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
+#include "qemu/main-loop.h"
 
 #if !defined(CONFIG_USER_ONLY)
 void helper_mmu_read_debug(CPUNios2State *env, uint32_t rn)
@@ -35,7 +36,9 @@ void helper_mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v)
 
 void helper_check_interrupts(CPUNios2State *env)
 {
+    qemu_mutex_lock_iothread();
     nios2_check_interrupts(env);
+    qemu_mutex_unlock_iothread();
 }
 #endif /* !CONFIG_USER_ONLY */
 

From 33bef0b9948b85000221d32c758d9d4a9276aaaf Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 3 Mar 2017 11:37:57 +0100
Subject: [PATCH 12/18] qemu-timer: fix off-by-one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the first timer is exactly at the current value of the clock, the
deadline is met and the timer should fire.  This fixes itself on the next
iteration of the loop without icount; with icount, however, execution
of instructions will stop exactly at the deadline and won't proceed.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/qemu-timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index 6cf70b96f6..2f201512df 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -199,7 +199,7 @@ bool timerlist_expired(QEMUTimerList *timer_list)
     expire_time = timer_list->active_timers->expire_time;
     qemu_mutex_unlock(&timer_list->active_timers_lock);
 
-    return expire_time < qemu_clock_get_ns(timer_list->clock->type);
+    return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
 }
 
 bool qemu_clock_expired(QEMUClockType type)

From d2528bdc19988db73056be75dd9bf52eeee620f5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 3 Mar 2017 12:01:16 +0100
Subject: [PATCH 13/18] qemu-timer: do not include sysemu/cpus.h from
 util/qemu-timer.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This dependency is the wrong way, and we will need util/qemu-timer.h from
sysemu/cpus.h in the next patch.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpu-exec.c               | 1 +
 hw/core/ptimer.c         | 1 +
 hw/ppc/pnv.c             | 1 +
 include/qemu/timer.h     | 1 -
 include/sysemu/cpus.h    | 2 ++
 kvm-all.c                | 1 +
 monitor.c                | 1 +
 replay/replay.c          | 1 +
 target/alpha/translate.c | 1 +
 translate-all.c          | 1 +
 util/main-loop.c         | 1 +
 util/qemu-timer.c        | 1 +
 12 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index d04dd91ebd..748cb66bca 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -33,6 +33,7 @@
 #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
 #include "hw/i386/apic.h"
 #endif
+#include "sysemu/cpus.h"
 #include "sysemu/replay.h"
 
 /* -icount align implementation. */
diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c
index 59ccb00550..7221c68a98 100644
--- a/hw/core/ptimer.c
+++ b/hw/core/ptimer.c
@@ -13,6 +13,7 @@
 #include "sysemu/replay.h"
 #include "sysemu/qtest.h"
 #include "block/aio.h"
+#include "sysemu/cpus.h"
 
 #define DELTA_ADJUST     1
 #define DELTA_NO_ADJUST -1
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 09f0d22def..3fa722af82 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -21,6 +21,7 @@
 #include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/numa.h"
+#include "sysemu/cpus.h"
 #include "hw/hw.h"
 #include "target/ppc/cpu.h"
 #include "qemu/log.h"
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 26e628584c..91cd8c8a84 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -4,7 +4,6 @@
 #include "qemu-common.h"
 #include "qemu/notify.h"
 #include "qemu/host-utils.h"
-#include "sysemu/cpus.h"
 
 #define NANOSECONDS_PER_SECOND 1000000000LL
 
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index a73b5d4bce..e521a91661 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -1,6 +1,8 @@
 #ifndef QEMU_CPUS_H
 #define QEMU_CPUS_H
 
+#include "qemu/timer.h"
+
 /* cpus.c */
 bool qemu_in_vcpu_thread(void);
 void qemu_init_cpu_loop(void);
diff --git a/kvm-all.c b/kvm-all.c
index 9040bd50a4..90b8573656 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -29,6 +29,7 @@
 #include "hw/s390x/adapter.h"
 #include "exec/gdbstub.h"
 #include "sysemu/kvm_int.h"
+#include "sysemu/cpus.h"
 #include "qemu/bswap.h"
 #include "exec/memory.h"
 #include "exec/ram_addr.h"
diff --git a/monitor.c b/monitor.c
index f11893e1c3..be282ecb80 100644
--- a/monitor.c
+++ b/monitor.c
@@ -77,6 +77,7 @@
 #include "qapi-event.h"
 #include "qmp-introspect.h"
 #include "sysemu/qtest.h"
+#include "sysemu/cpus.h"
 #include "qemu/cutils.h"
 #include "qapi/qmp/dispatch.h"
 
diff --git a/replay/replay.c b/replay/replay.c
index 1835b9902e..78e2a7e570 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -16,6 +16,7 @@
 #include "replay-internal.h"
 #include "qemu/timer.h"
 #include "qemu/main-loop.h"
+#include "sysemu/cpus.h"
 #include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
 
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 055286a7b8..df5d695344 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "sysemu/cpus.h"
 #include "disas/disas.h"
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
diff --git a/translate-all.c b/translate-all.c
index 34480aebba..b3ee876526 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -57,6 +57,7 @@
 #include "qemu/timer.h"
 #include "qemu/main-loop.h"
 #include "exec/log.h"
+#include "sysemu/cpus.h"
 
 /* #define DEBUG_TB_INVALIDATE */
 /* #define DEBUG_TB_FLUSH */
diff --git a/util/main-loop.c b/util/main-loop.c
index ca7bb072f9..7efc229e3d 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -28,6 +28,7 @@
 #include "qemu/timer.h"
 #include "qemu/sockets.h"	// struct in_addr needed for libslirp.h
 #include "sysemu/qtest.h"
+#include "sysemu/cpus.h"
 #include "slirp/libslirp.h"
 #include "qemu/main-loop.h"
 #include "block/aio.h"
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index 2f201512df..ac993403ab 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -27,6 +27,7 @@
 #include "qemu/timer.h"
 #include "sysemu/replay.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/cpus.h"
 
 #ifdef CONFIG_POSIX
 #include <pthread.h>

From 3f53bc61a404cd1d13fdba8441282a33a755f8c6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 3 Mar 2017 11:50:29 +0100
Subject: [PATCH 14/18] cpus: define QEMUTimerListNotifyCB for QEMU system
 emulation

There is no change for now, because the callback just invokes
qemu_notify_event.

Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpus.c                       |  5 +++++
 include/qemu/timer.h         |  4 ++--
 include/sysemu/cpus.h        |  1 +
 stubs/cpu-get-icount.c       |  6 ++++++
 tests/test-aio-multithread.c |  2 +-
 tests/test-aio.c             |  2 +-
 util/async.c                 |  2 +-
 util/main-loop.c             |  2 +-
 util/qemu-timer.c            | 10 +++++-----
 9 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/cpus.c b/cpus.c
index 69e21858b8..e9da3bcb59 100644
--- a/cpus.c
+++ b/cpus.c
@@ -800,6 +800,11 @@ static void qemu_cpu_kick_rr_cpu(void)
     } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
 }
 
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
+{
+    qemu_notify_event();
+}
+
 static void kick_tcg_thread(void *opaque)
 {
     timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 91cd8c8a84..1441b426cd 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -59,7 +59,7 @@ struct QEMUTimerListGroup {
 };
 
 typedef void QEMUTimerCB(void *opaque);
-typedef void QEMUTimerListNotifyCB(void *opaque);
+typedef void QEMUTimerListNotifyCB(void *opaque, QEMUClockType type);
 
 struct QEMUTimer {
     int64_t expire_time;        /* in nanoseconds */
@@ -776,7 +776,7 @@ static inline int64_t qemu_soonest_timeout(int64_t timeout1, int64_t timeout2)
  *
  * Initialise the clock & timer infrastructure
  */
-void init_clocks(void);
+void init_clocks(QEMUTimerListNotifyCB *notify_cb);
 
 int64_t cpu_get_ticks(void);
 /* Caller must hold BQL */
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index e521a91661..a8053f1715 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -22,6 +22,7 @@ void dump_drift_info(FILE *f, fprintf_function cpu_fprintf);
 
 /* Unblock cpu */
 void qemu_cpu_kick_self(void);
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type);
 
 void cpu_synchronize_all_states(void);
 void cpu_synchronize_all_post_reset(void);
diff --git a/stubs/cpu-get-icount.c b/stubs/cpu-get-icount.c
index 2e8b63b225..0b7239d721 100644
--- a/stubs/cpu-get-icount.c
+++ b/stubs/cpu-get-icount.c
@@ -2,6 +2,7 @@
 #include "qemu-common.h"
 #include "qemu/timer.h"
 #include "sysemu/cpus.h"
+#include "qemu/main-loop.h"
 
 int use_icount;
 
@@ -9,3 +10,8 @@ int64_t cpu_get_icount(void)
 {
     abort();
 }
+
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
+{
+    qemu_notify_event();
+}
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
index 8b0b40ec78..549d784915 100644
--- a/tests/test-aio-multithread.c
+++ b/tests/test-aio-multithread.c
@@ -438,7 +438,7 @@ static void test_multi_mutex_10(void)
 
 int main(int argc, char **argv)
 {
-    init_clocks();
+    init_clocks(NULL);
 
     g_test_init(&argc, &argv, NULL);
     g_test_add_func("/aio/multi/lifecycle", test_lifecycle);
diff --git a/tests/test-aio.c b/tests/test-aio.c
index 2754f154ce..54e20d6ab1 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -835,7 +835,7 @@ int main(int argc, char **argv)
     Error *local_error = NULL;
     GSource *src;
 
-    init_clocks();
+    init_clocks(NULL);
 
     ctx = aio_context_new(&local_error);
     if (!ctx) {
diff --git a/util/async.c b/util/async.c
index 7d469eb857..663e297e1f 100644
--- a/util/async.c
+++ b/util/async.c
@@ -351,7 +351,7 @@ void aio_notify_accept(AioContext *ctx)
     }
 }
 
-static void aio_timerlist_notify(void *opaque)
+static void aio_timerlist_notify(void *opaque, QEMUClockType type)
 {
     aio_notify(opaque);
 }
diff --git a/util/main-loop.c b/util/main-loop.c
index 7efc229e3d..4534c89308 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -144,7 +144,7 @@ int qemu_init_main_loop(Error **errp)
     GSource *src;
     Error *local_error = NULL;
 
-    init_clocks();
+    init_clocks(qemu_timer_notify_cb);
 
     ret = qemu_signal_init();
     if (ret) {
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index ac993403ab..dc3181e9b8 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -122,7 +122,7 @@ void timerlist_free(QEMUTimerList *timer_list)
     g_free(timer_list);
 }
 
-static void qemu_clock_init(QEMUClockType type)
+static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
 {
     QEMUClock *clock = qemu_clock_ptr(type);
 
@@ -134,7 +134,7 @@ static void qemu_clock_init(QEMUClockType type)
     clock->last = INT64_MIN;
     QLIST_INIT(&clock->timerlists);
     notifier_list_init(&clock->reset_notifiers);
-    main_loop_tlg.tl[type] = timerlist_new(type, NULL, NULL);
+    main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
 }
 
 bool qemu_clock_use_for_deadline(QEMUClockType type)
@@ -278,7 +278,7 @@ QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
 void timerlist_notify(QEMUTimerList *timer_list)
 {
     if (timer_list->notify_cb) {
-        timer_list->notify_cb(timer_list->notify_opaque);
+        timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
     } else {
         qemu_notify_event();
     }
@@ -635,11 +635,11 @@ void qemu_clock_unregister_reset_notifier(QEMUClockType type,
     notifier_remove(notifier);
 }
 
-void init_clocks(void)
+void init_clocks(QEMUTimerListNotifyCB *notify_cb)
 {
     QEMUClockType type;
     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
-        qemu_clock_init(type);
+        qemu_clock_init(type, notify_cb);
     }
 
 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK

From e330c118f2a5a5365409b123cd0dd2c7d575bf05 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 3 Mar 2017 11:51:07 +0100
Subject: [PATCH 15/18] main-loop: remove now unnecessary optimization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This optimization is not necessary anymore, because the vCPU now drops
the I/O thread lock even with TCG.  Drop it to simplify the code and
avoid the "I/O thread spun for 1000 iterations" warning.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 vl.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/vl.c b/vl.c
index 1a95500ac7..0b4ed5241c 100644
--- a/vl.c
+++ b/vl.c
@@ -1888,17 +1888,14 @@ static bool main_loop_should_exit(void)
 
 static void main_loop(void)
 {
-    bool nonblocking;
-    int last_io = 0;
 #ifdef CONFIG_PROFILER
     int64_t ti;
 #endif
     do {
-        nonblocking = tcg_enabled() && last_io > 0;
 #ifdef CONFIG_PROFILER
         ti = profile_getclock();
 #endif
-        last_io = main_loop_wait(nonblocking);
+        main_loop_wait(false);
 #ifdef CONFIG_PROFILER
         dev_time += profile_getclock() - ti;
 #endif

From 6b8f0187a4d7c263e356302f8d308655372a4b5b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 2 Mar 2017 19:56:40 +0100
Subject: [PATCH 16/18] icount: process QEMU_CLOCK_VIRTUAL timers in vCPU
 thread

icount has become much slower after tcg_cpu_exec has stopped
using the BQL.  There is also a latent bug that is masked by
the slowness.

The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL
timer now has to wake up the I/O thread and wait for it.  The rendez-vous
is mediated by the BQL QemuMutex:

- handle_icount_deadline wakes up the I/O thread with BQL taken
- the I/O thread wakes up and waits on the BQL
- the VCPU thread releases the BQL a little later
- the I/O thread raises an interrupt, which calls qemu_cpu_kick
- the VCPU thread notices the interrupt, takes the BQL to
  process it and waits on it

All this back and forth is extremely expensive, causing a 6 to 8-fold
slowdown when icount is turned on.

One may think that the issue is that the VCPU thread is too dependent
on the BQL, but then the latent bug comes in.  I first tried removing
the BQL completely from the x86 cpu_exec, only to see everything break.
The only way to fix it (and make everything slow again) was to add a dummy
BQL lock/unlock pair.

This is because in -icount mode you really have to process the events
before the CPU restarts executing the next instruction.  Therefore, this
series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in
the vCPU thread when running in icount mode.

The required changes include:

- make the timer notification callback wake up TCG's single vCPU thread
  when run from another thread.  By using async_run_on_cpu, the callback
  can override all_cpu_threads_idle() when the CPU is halted.

- move handle_icount_deadline after qemu_tcg_wait_io_event, so that
  the timer notification callback is invoked after the dummy work item
  wakes up the vCPU thread

- make handle_icount_deadline run the timers instead of just waking the
  I/O thread.

- stop processing the timers in the main loop

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpus.c               | 28 +++++++++++++++++++++++++---
 include/qemu/timer.h | 24 ++++++++++++++++++++++++
 util/qemu-timer.c    |  4 +++-
 3 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/cpus.c b/cpus.c
index e9da3bcb59..b84a392dda 100644
--- a/cpus.c
+++ b/cpus.c
@@ -800,9 +800,25 @@ static void qemu_cpu_kick_rr_cpu(void)
     } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
 }
 
+static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
+{
+}
+
 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
 {
-    qemu_notify_event();
+    if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
+        qemu_notify_event();
+        return;
+    }
+
+    if (!qemu_in_vcpu_thread() && first_cpu) {
+        /* qemu_cpu_kick is not enough to kick a halted CPU out of
+         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
+         * causes cpu_thread_is_idle to return false.  This way,
+         * handle_icount_deadline can run.
+         */
+        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
+    }
 }
 
 static void kick_tcg_thread(void *opaque)
@@ -1150,12 +1166,15 @@ static int64_t tcg_get_icount_limit(void)
 
 static void handle_icount_deadline(void)
 {
+    assert(qemu_in_vcpu_thread());
     if (use_icount) {
         int64_t deadline =
             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 
         if (deadline == 0) {
+            /* Wake up other AioContexts.  */
             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+            qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
         }
     }
 }
@@ -1268,6 +1287,11 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
         qemu_account_warp_timer();
 
+        /* Run the timers here.  This is much more efficient than
+         * waking up the I/O thread and waiting for completion.
+         */
+        handle_icount_deadline();
+
         if (!cpu) {
             cpu = first_cpu;
         }
@@ -1309,8 +1333,6 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
             atomic_mb_set(&cpu->exit_request, 0);
         }
 
-        handle_icount_deadline();
-
         qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
         deal_with_unplugged_cpus();
     }
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 1441b426cd..e1742f2f3d 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -533,6 +533,12 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list,
  * Create a new timer and associate it with the default
  * timer list for the clock type @type.
  *
+ * The default timer list has one special feature: in icount mode,
+ * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
+ * not true of other timer lists, which are typically associated
+ * with an AioContext---each of them runs its timer callbacks in its own
+ * AioContext thread.
+ *
  * Returns: a pointer to the timer
  */
 static inline QEMUTimer *timer_new(QEMUClockType type, int scale,
@@ -550,6 +556,12 @@ static inline QEMUTimer *timer_new(QEMUClockType type, int scale,
  * Create a new timer with nanosecond scale on the default timer list
  * associated with the clock.
  *
+ * The default timer list has one special feature: in icount mode,
+ * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
+ * not true of other timer lists, which are typically associated
+ * with an AioContext---each of them runs its timer callbacks in its own
+ * AioContext thread.
+ *
  * Returns: a pointer to the newly created timer
  */
 static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb,
@@ -564,6 +576,12 @@ static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb,
  * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
  *
+ * The default timer list has one special feature: in icount mode,
+ * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
+ * not true of other timer lists, which are typically associated
+ * with an AioContext---each of them runs its timer callbacks in its own
+ * AioContext thread.
+ *
  * Create a new timer with microsecond scale on the default timer list
  * associated with the clock.
  *
@@ -581,6 +599,12 @@ static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb,
  * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
  *
+ * The default timer list has one special feature: in icount mode,
+ * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
+ * not true of other timer lists, which are typically associated
+ * with an AioContext---each of them runs its timer callbacks in its own
+ * AioContext thread.
+ *
  * Create a new timer with millisecond scale on the default timer list
  * associated with the clock.
  *
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index dc3181e9b8..82d56507a2 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -658,7 +658,9 @@ bool qemu_clock_run_all_timers(void)
     QEMUClockType type;
 
     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
-        progress |= qemu_clock_run_timers(type);
+        if (qemu_clock_use_for_deadline(type)) {
+            progress |= qemu_clock_run_timers(type);
+        }
     }
 
     return progress;

From b31f84126215e3fd4b8acbc3083ae30d407329e8 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 14 Mar 2017 20:56:27 +0800
Subject: [PATCH 17/18] memory: info mtree check mr range overflow

The address of memory regions might overflow when something wrong
happened, like reported in:

https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html

For easier debugging, let's try to detect it.

Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1489496187-624-1-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/memory.c b/memory.c
index 284894b135..64b0a605ef 100644
--- a/memory.c
+++ b/memory.c
@@ -2494,6 +2494,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
     MemoryRegionListHead submr_print_queue;
     const MemoryRegion *submr;
     unsigned int i;
+    hwaddr cur_start, cur_end;
 
     if (!mr) {
         return;
@@ -2503,6 +2504,18 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
         mon_printf(f, MTREE_INDENT);
     }
 
+    cur_start = base + mr->addr;
+    cur_end = cur_start + MR_SIZE(mr->size);
+
+    /*
+     * Try to detect overflow of memory region. This should never
+     * happen normally. When it happens, we dump something to warn the
+     * user who is observing this.
+     */
+    if (cur_start < base || cur_end < cur_start) {
+        mon_printf(f, "[DETECTED OVERFLOW!] ");
+    }
+
     if (mr->alias) {
         MemoryRegionList *ml;
         bool found = false;
@@ -2522,8 +2535,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
         mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
                    " (prio %d, %s): alias %s @%s " TARGET_FMT_plx
                    "-" TARGET_FMT_plx "%s\n",
-                   base + mr->addr,
-                   base + mr->addr + MR_SIZE(mr->size),
+                   cur_start, cur_end,
                    mr->priority,
                    memory_region_type((MemoryRegion *)mr),
                    memory_region_name(mr),
@@ -2534,8 +2546,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
     } else {
         mon_printf(f,
                    TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s\n",
-                   base + mr->addr,
-                   base + mr->addr + MR_SIZE(mr->size),
+                   cur_start, cur_end,
                    mr->priority,
                    memory_region_type((MemoryRegion *)mr),
                    memory_region_name(mr),
@@ -2562,7 +2573,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
     }
 
     QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
-        mtree_print_mr(mon_printf, f, ml->mr, level + 1, base + mr->addr,
+        mtree_print_mr(mon_printf, f, ml->mr, level + 1, cur_start,
                        alias_print_queue);
     }
 

From 2563c9c6b8670400c48e562034b321a7cf3d9a85 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Tue, 7 Mar 2017 09:16:27 -0600
Subject: [PATCH 18/18] nbd/client: fix drop_sync [CVE-2017-2630]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Comparison symbol is misused. It may lead to memory corruption.
Introduced in commit 7d3123e.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20170203154757.36140-6-vsementsov@virtuozzo.com>
[eblake: add CVE details, update conditional]
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-AndrÃ© Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170307151627.27212-1-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 nbd/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nbd/client.c b/nbd/client.c
index 5c9dee37fa..3dc2564cd0 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -94,7 +94,7 @@ static ssize_t drop_sync(QIOChannel *ioc, size_t size)
     char small[1024];
     char *buffer;
 
-    buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size));
+    buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
     while (size > 0) {
         ssize_t count = read_sync(ioc, buffer, MIN(65536, size));