From e8f5fe2de125a0bfbefbaa6a69af81f4817cb7a0 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 9 Mar 2017 15:27:08 +0000 Subject: [PATCH 01/18] memory_region: Fix name comments The 'name' parameter to memory_region_init_* had been marked as debug only, however vmstate_region_ram uses it as a parameter to qemu_ram_set_idstr to set RAMBlock names and these form part of the migration stream. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20170309152708.30635-1-dgilbert@redhat.com> Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 691102317c..e39256ad03 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -371,7 +371,8 @@ void memory_region_init_io(MemoryRegion *mr, * * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count - * @name: the name of the region. + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. */ @@ -390,7 +391,8 @@ void memory_region_init_ram(MemoryRegion *mr, * * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count - * @name: the name of the region. + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device * @size: used size of the region. * @max_size: max size of the region. * @resized: callback to notify owner about used size change. @@ -412,7 +414,8 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, * * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count - * @name: the name of the region. + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device * @size: size of the region. * @share: %true if memory must be mmaped with the MAP_SHARED flag * @path: the path in which to allocate the RAM. @@ -434,7 +437,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, * * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count - * @name: the name of the region. + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device * @size: size of the region. * @ptr: memory to be mapped; must contain at least @size bytes. */ @@ -496,7 +500,8 @@ void memory_region_init_alias(MemoryRegion *mr, * * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count - * @name: the name of the region. + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. */ @@ -513,7 +518,8 @@ void memory_region_init_rom(MemoryRegion *mr, * @mr: the #MemoryRegion to be initialized. * @owner: the object that tracks the region's reference count * @ops: callbacks for write access handling (must not be NULL). - * @name: the name of the region. + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. */ From c0d9f7d0bcedeaa65d5c984fbe0d351e1402eab5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 28 Feb 2017 18:40:01 +0100 Subject: [PATCH 02/18] docs: Add a note about mixing bootindex with "-boot order" Occasionally the users try to mix the bootindex properties with the "-boot order" parameter - and this likely does not give the expected results. So let's add a proper statement that these two concepts should not be used together. Signed-off-by: Thomas Huth Message-Id: <1488303601-23741-1-git-send-email-thuth@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Laszlo Ersek Signed-off-by: Paolo Bonzini --- docs/bootindex.txt | 9 +++++++++ qemu-options.hx | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/bootindex.txt b/docs/bootindex.txt index f84fac7200..b9a8ba122f 100644 --- a/docs/bootindex.txt +++ b/docs/bootindex.txt @@ -41,3 +41,12 @@ has three bootable devices target1, target3, target5 connected to it, the option ROM will have a boot method for each of them, but it is not possible to map from boot method back to a specific target. This is a shortcoming of the PC BIOS boot specification. + +== Mixing bootindex and boot order parameters == + +Note that it does not make sense to use the bootindex property together +with the "-boot order=..." (or "-boot once=...") parameter. The guest +firmware implementations normally either support the one or the other, +but not both parameters at the same time. Mixing them will result in +undefined behavior, and thus the guest firmware will likely not boot +from the expected devices. diff --git a/qemu-options.hx b/qemu-options.hx index 8dd8ee34a6..99af8edf5f 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -252,7 +252,10 @@ drive letters depend on the target architecture. The x86 PC uses: a, b (floppy 1 and 2), c (first hard disk), d (first CD-ROM), n-p (Etherboot from network adapter 1-4), hard disk boot is the default. To apply a particular boot order only on the first startup, specify it via -@option{once}. +@option{once}. Note that the @option{order} or @option{once} parameter +should not be used together with the @option{bootindex} property of +devices, since the firmware implementations normally do not support both +at the same time. Interactive boot menus/prompts can be enabled via @option{menu=on} as far as firmware/BIOS supports them. The default is non-interactive boot. From 1e356fc14beaa3ece6c0e961bd479af58be3198b Mon Sep 17 00:00:00 2001 From: Jitendra Kolhe Date: Fri, 24 Feb 2017 09:01:43 +0530 Subject: [PATCH 03/18] mem-prealloc: reduce large guest start-up and migration time. Using "-mem-prealloc" option for a large guest leads to higher guest start-up and migration time. This is because with "-mem-prealloc" option qemu tries to map every guest page (create address translations), and make sure the pages are available during runtime. virsh/libvirt by default, seems to use "-mem-prealloc" option in case the guest is configured to use huge pages. The patch tries to map all guest pages simultaneously by spawning multiple threads. Currently limiting the change to QEMU library functions on POSIX compliant host only, as we are not sure if the problem exists on win32. Below are some stats with "-mem-prealloc" option for guest configured to use huge pages. ------------------------------------------------------------------------ Idle Guest | Start-up time | Migration time ------------------------------------------------------------------------ Guest stats with 2M HugePage usage - single threaded (existing code) ------------------------------------------------------------------------ 64 Core - 4TB | 54m11.796s | 75m43.843s 64 Core - 1TB | 8m56.576s | 14m29.049s 64 Core - 256GB | 2m11.245s | 3m26.598s ------------------------------------------------------------------------ Guest stats with 2M HugePage usage - map guest pages using 8 threads ------------------------------------------------------------------------ 64 Core - 4TB | 5m1.027s | 34m10.565s 64 Core - 1TB | 1m10.366s | 8m28.188s 64 Core - 256GB | 0m19.040s | 2m10.148s ----------------------------------------------------------------------- Guest stats with 2M HugePage usage - map guest pages using 16 threads ----------------------------------------------------------------------- 64 Core - 4TB | 1m58.970s | 31m43.400s 64 Core - 1TB | 0m39.885s | 7m55.289s 64 Core - 256GB | 0m11.960s | 2m0.135s ----------------------------------------------------------------------- Changed in v2: - modify number of memset threads spawned to min(smp_cpus, 16). - removed 64GB memory restriction for spawning memset threads. Changed in v3: - limit number of threads spawned based on min(sysconf(_SC_NPROCESSORS_ONLN), 16, smp_cpus) - implement memset thread specific siglongjmp in SIGBUS signal_handler. Changed in v4 - remove sigsetjmp/siglongjmp and SIGBUS unblock/block for main thread as main thread no longer touches any pages. - simplify code my returning memset_thread_failed status from touch_all_pages. Signed-off-by: Jitendra Kolhe Message-Id: <1487907103-32350-1-git-send-email-jitendra.kolhe@hpe.com> Signed-off-by: Paolo Bonzini --- backends/hostmem.c | 4 +- exec.c | 2 +- include/qemu/osdep.h | 3 +- util/oslib-posix.c | 108 ++++++++++++++++++++++++++++++++++--------- util/oslib-win32.c | 3 +- 5 files changed, 94 insertions(+), 26 deletions(-) diff --git a/backends/hostmem.c b/backends/hostmem.c index 7f5de70609..162c2187d8 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -224,7 +224,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, void *ptr = memory_region_get_ram_ptr(&backend->mr); uint64_t sz = memory_region_size(&backend->mr); - os_mem_prealloc(fd, ptr, sz, &local_err); + os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -328,7 +328,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) */ if (backend->prealloc) { os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, - &local_err); + smp_cpus, &local_err); if (local_err) { goto out; } diff --git a/exec.c b/exec.c index aabb035e92..68135a9391 100644 --- a/exec.c +++ b/exec.c @@ -1467,7 +1467,7 @@ static void *file_ram_alloc(RAMBlock *block, } if (mem_prealloc) { - os_mem_prealloc(fd, area, memory, errp); + os_mem_prealloc(fd, area, memory, smp_cpus, errp); if (errp && *errp) { goto error; } diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index af37195fef..122ff06ff6 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -438,7 +438,8 @@ unsigned long qemu_getauxval(unsigned long type); void qemu_set_tty_echo(int fd, bool echo); -void os_mem_prealloc(int fd, char *area, size_t sz, Error **errp); +void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, + Error **errp); int qemu_read_password(char *buf, int buf_size); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index cd686aae3d..956f66ab4a 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -55,6 +55,21 @@ #include "qemu/error-report.h" #endif +#define MAX_MEM_PREALLOC_THREAD_COUNT (MIN(sysconf(_SC_NPROCESSORS_ONLN), 16)) + +struct MemsetThread { + char *addr; + uint64_t numpages; + uint64_t hpagesize; + QemuThread pgthread; + sigjmp_buf env; +}; +typedef struct MemsetThread MemsetThread; + +static MemsetThread *memset_thread; +static int memset_num_threads; +static bool memset_thread_failed; + int qemu_get_thread_id(void) { #if defined(__linux__) @@ -316,18 +331,83 @@ char *qemu_get_exec_dir(void) return g_strdup(exec_dir); } -static sigjmp_buf sigjump; - static void sigbus_handler(int signal) { - siglongjmp(sigjump, 1); + int i; + if (memset_thread) { + for (i = 0; i < memset_num_threads; i++) { + if (qemu_thread_is_self(&memset_thread[i].pgthread)) { + siglongjmp(memset_thread[i].env, 1); + } + } + } } -void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp) +static void *do_touch_pages(void *arg) +{ + MemsetThread *memset_args = (MemsetThread *)arg; + char *addr = memset_args->addr; + uint64_t numpages = memset_args->numpages; + uint64_t hpagesize = memset_args->hpagesize; + sigset_t set, oldset; + int i = 0; + + /* unblock SIGBUS */ + sigemptyset(&set); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_UNBLOCK, &set, &oldset); + + if (sigsetjmp(memset_args->env, 1)) { + memset_thread_failed = true; + } else { + for (i = 0; i < numpages; i++) { + memset(addr, 0, 1); + addr += hpagesize; + } + } + pthread_sigmask(SIG_SETMASK, &oldset, NULL); + return NULL; +} + +static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, + int smp_cpus) +{ + uint64_t numpages_per_thread, size_per_thread; + char *addr = area; + int i = 0; + + memset_thread_failed = false; + memset_num_threads = MIN(smp_cpus, MAX_MEM_PREALLOC_THREAD_COUNT); + memset_thread = g_new0(MemsetThread, memset_num_threads); + numpages_per_thread = (numpages / memset_num_threads); + size_per_thread = (hpagesize * numpages_per_thread); + for (i = 0; i < memset_num_threads; i++) { + memset_thread[i].addr = addr; + memset_thread[i].numpages = (i == (memset_num_threads - 1)) ? + numpages : numpages_per_thread; + memset_thread[i].hpagesize = hpagesize; + qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", + do_touch_pages, &memset_thread[i], + QEMU_THREAD_JOINABLE); + addr += size_per_thread; + numpages -= numpages_per_thread; + } + for (i = 0; i < memset_num_threads; i++) { + qemu_thread_join(&memset_thread[i].pgthread); + } + g_free(memset_thread); + memset_thread = NULL; + + return memset_thread_failed; +} + +void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, + Error **errp) { int ret; struct sigaction act, oldact; - sigset_t set, oldset; + size_t hpagesize = qemu_fd_getpagesize(fd); + size_t numpages = DIV_ROUND_UP(memory, hpagesize); memset(&act, 0, sizeof(act)); act.sa_handler = &sigbus_handler; @@ -340,23 +420,10 @@ void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp) return; } - /* unblock SIGBUS */ - sigemptyset(&set); - sigaddset(&set, SIGBUS); - pthread_sigmask(SIG_UNBLOCK, &set, &oldset); - - if (sigsetjmp(sigjump, 1)) { + /* touch pages simultaneously */ + if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) { error_setg(errp, "os_mem_prealloc: Insufficient free host memory " "pages available to allocate guest RAM\n"); - } else { - int i; - size_t hpagesize = qemu_fd_getpagesize(fd); - size_t numpages = DIV_ROUND_UP(memory, hpagesize); - - /* MAP_POPULATE silently ignores failures */ - for (i = 0; i < numpages; i++) { - memset(area + (hpagesize * i), 0, 1); - } } ret = sigaction(SIGBUS, &oldact, NULL); @@ -365,7 +432,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp) perror("os_mem_prealloc: failed to reinstall signal handler"); exit(1); } - pthread_sigmask(SIG_SETMASK, &oldset, NULL); } diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 0b1890fd33..80e4668935 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -541,7 +541,8 @@ int getpagesize(void) return system_info.dwPageSize; } -void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp) +void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, + Error **errp) { int i; size_t pagesize = getpagesize(); From 79ca7a1b898eb97c4192f3c78027a0f20485e7b4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 7 Mar 2017 15:19:08 +0100 Subject: [PATCH 04/18] exec: add cpu_synchronize_state to cpu_memory_rw_debug I sometimes got "Cannot access memory" when using the x command on the monitor. Turns out that the cpu env did contain stale data (e.g. wrong control register content for page table origin). We must synchronize the state of the CPU before walking the page tables. A similar issues happens for a remote gdb, so lets do the cpu_synchronize_state in cpu_memory_rw_debug. Signed-off-by: Christian Borntraeger Message-Id: <1488896348-13560-1-git-send-email-borntraeger@de.ibm.com> Signed-off-by: Paolo Bonzini --- exec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exec.c b/exec.c index 68135a9391..a22f5a0385 100644 --- a/exec.c +++ b/exec.c @@ -43,6 +43,7 @@ #include "exec/ioport.h" #include "sysemu/dma.h" #include "sysemu/numa.h" +#include "sysemu/hw_accel.h" #include "exec/address-spaces.h" #include "sysemu/xen-mapcache.h" #include "trace-root.h" @@ -3309,6 +3310,7 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, hwaddr phys_addr; target_ulong page; + cpu_synchronize_state(cpu); while (len > 0) { int asidx; MemTxAttrs attrs; From c70b11d160c6bca8e994d40639fcb41558c9fa0a Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Thu, 9 Mar 2017 16:46:34 -0300 Subject: [PATCH 05/18] kvm: Print MSR information if KVM_{GET,SET}_MSRS failed When a KVM_{GET,SET}_MSRS ioctl() fails, it is difficult to find out which MSR caused the problem. Print an error message for debugging, before we trigger the (ret == cpu->kvm_msr_buf->nmsrs) assert. Suggested-by: Dr. David Alan Gilbert Signed-off-by: Eduardo Habkost Message-Id: <20170309194634.28457-1-ehabkost@redhat.com> Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 472399fb2c..55865dbee0 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1824,6 +1824,12 @@ static int kvm_put_msrs(X86CPU *cpu, int level) return ret; } + if (ret < cpu->kvm_msr_buf->nmsrs) { + struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; + error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, + (uint32_t)e->index, (uint64_t)e->data); + } + assert(ret == cpu->kvm_msr_buf->nmsrs); return 0; } @@ -2189,6 +2195,12 @@ static int kvm_get_msrs(X86CPU *cpu) return ret; } + if (ret < cpu->kvm_msr_buf->nmsrs) { + struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; + error_report("error: failed to get MSR 0x%" PRIx32, + (uint32_t)e->index); + } + assert(ret == cpu->kvm_msr_buf->nmsrs); /* * MTRR masks: Each mask consists of 5 parts From ca2edcd35cd1a8589dfa0533c19ff232fec7b4b5 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Thu, 9 Mar 2017 15:50:46 -0300 Subject: [PATCH 06/18] kvmclock: Don't crash QEMU if KVM is disabled Most machines don't allow sysbus devices like "kvmclock" to be created from the command-line, but some of them do (the ones with has_dynamic_sysbus=true). In those cases, it's possible to manually create a kvmclock device without KVM being enabled, making QEMU crash: $ qemu-system-x86_64 -machine q35,accel=tcg -device kvmclock Segmentation fault (core dumped) This changes kvmclock's realize method to return an error if KVM is disabled, to ensure it won't crash QEMU. Signed-off-by: Eduardo Habkost Message-Id: <20170309185046.17555-1-ehabkost@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/kvm/clock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index ef9d560f9c..13eca374cd 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -22,6 +22,7 @@ #include "kvm_i386.h" #include "hw/sysbus.h" #include "hw/kvm/clock.h" +#include "qapi/error.h" #include #include @@ -208,6 +209,11 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) { KVMClockState *s = KVM_CLOCK(dev); + if (!kvm_enabled()) { + error_setg(errp, "kvmclock device requires KVM"); + return; + } + kvm_update_clock(s); qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); From 9b4b157ef6edc5cf060aef3402bdece7f581b5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 10 Mar 2017 15:28:19 +0400 Subject: [PATCH 07/18] scripts/dump-guest-memory.py: fix int128_get64 on recent gcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Int128 is no longer a struct, reaching a python exception: Python Exception Attempt to extract a component of a value that is not a (null).: Replace struct access with a cast to uint64[] instead. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1427466 Signed-off-by: Marc-André Lureau Message-Id: <20170310112819.16760-1-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 9956fc036c..f7c6635f15 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -314,8 +314,18 @@ def get_arch_phdr(endianness, elfclass): def int128_get64(val): """Returns low 64bit part of Int128 struct.""" - assert val["hi"] == 0 - return val["lo"] + try: + assert val["hi"] == 0 + return val["lo"] + except gdb.error: + u64t = gdb.lookup_type('uint64_t').array(2) + u64 = val.cast(u64t) + if sys.byteorder == 'little': + assert u64[1] == 0 + return u64[0] + else: + assert u64[0] == 0 + return u64[1] def qlist_foreach(head, field_str): From c12d66aac1aaacbc8495dd11712b06cc64259d92 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 10 Mar 2017 18:14:05 +0800 Subject: [PATCH 08/18] configure: add the missing help output for optional features Signed-off-by: Lin Ma Message-Id: <20170310101405.26974-1-lma@suse.com> Signed-off-by: Paolo Bonzini --- configure | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/configure b/configure index 75c7c3526c..99d8bece70 100755 --- a/configure +++ b/configure @@ -1330,6 +1330,12 @@ Advanced options (experts only): --with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent --with-win-sdk=SDK-path path to Windows Platform SDK (to build VSS .tlb) --tls-priority default TLS protocol/cipher priority string + --enable-gprof QEMU profiling with gprof + --enable-profiler profiler support + --enable-xen-pv-domain-build + xen pv domain builder + --enable-debug-stack-usage + track the maximum stack usage of stacks created by qemu_alloc_stack Optional features, enabled with --enable-FEATURE and disabled with --disable-FEATURE, default is enabled if available: @@ -1397,6 +1403,12 @@ disabled with --disable-FEATURE, default is enabled if available: tcmalloc tcmalloc support jemalloc jemalloc support replication replication support + vhost-vsock virtio sockets device support + opengl opengl support + virglrenderer virgl rendering support + xfsctl xfsctl support + qom-cast-debug cast debugging support + tools build qemu-io, qemu-nbd and qemu-image tools NOTE: The object files are built at the place where configure is launched EOF From bd5d983fa87e5a0230a2bc6a54972f53e39ad978 Mon Sep 17 00:00:00 2001 From: Suramya Shah Date: Fri, 10 Mar 2017 22:09:48 +0530 Subject: [PATCH 09/18] util: Removed unneeded header from path.c Signed-off-by: Suramya Shah Reviewed-by: Eric Blake Message-Id: <20170310163948.7567-1-shah.suramya@gmail.com> Signed-off-by: Paolo Bonzini --- util/path.c | 1 - 1 file changed, 1 deletion(-) diff --git a/util/path.c b/util/path.c index 5479f76c6d..7f9fc272fb 100644 --- a/util/path.c +++ b/util/path.c @@ -6,7 +6,6 @@ #include "qemu/osdep.h" #include #include -#include "qemu-common.h" #include "qemu/cutils.h" #include "qemu/path.h" From b01a2d07c963e96dbd151f0db1eaa06f273acf34 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Tue, 14 Mar 2017 03:56:20 -0700 Subject: [PATCH 10/18] scsi: mptsas: fix the wrong reading size in fetch request When fetching request, it should read sizeof(*hdr), not the pointer hdr. Signed-off-by: Li Qiang Message-Id: <1489488980-130668-1-git-send-email-liqiang6-s@360.cn> Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/mptsas.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c index 2e091c0156..765ab53c34 100644 --- a/hw/scsi/mptsas.c +++ b/hw/scsi/mptsas.c @@ -756,7 +756,7 @@ static void mptsas_fetch_request(MPTSASState *s) /* Read the message header from the guest first. */ addr = s->host_mfa_high_addr | MPTSAS_FIFO_GET(s, request_post); - pci_dma_read(pci, addr, req, sizeof(hdr)); + pci_dma_read(pci, addr, req, sizeof(*hdr)); if (hdr->Function < ARRAY_SIZE(mpi_request_sizes) && mpi_request_sizes[hdr->Function]) { @@ -766,8 +766,8 @@ static void mptsas_fetch_request(MPTSASState *s) */ size = mpi_request_sizes[hdr->Function]; assert(size <= MPTSAS_MAX_REQUEST_SIZE); - pci_dma_read(pci, addr + sizeof(hdr), &req[sizeof(hdr)], - size - sizeof(hdr)); + pci_dma_read(pci, addr + sizeof(*hdr), &req[sizeof(*hdr)], + size - sizeof(*hdr)); } if (hdr->Function == MPI_FUNCTION_SCSI_IO_REQUEST) { From c0d24e7f70816c8af51ebe9dc74aa276a81858dd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Mar 2017 11:28:04 +0100 Subject: [PATCH 11/18] target/nios2: take BQL around interrupt check The interrupt controller does not have its own locking. Signed-off-by: Paolo Bonzini --- target/nios2/op_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c index 538853cda7..efb1c489c9 100644 --- a/target/nios2/op_helper.c +++ b/target/nios2/op_helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" +#include "qemu/main-loop.h" #if !defined(CONFIG_USER_ONLY) void helper_mmu_read_debug(CPUNios2State *env, uint32_t rn) @@ -35,7 +36,9 @@ void helper_mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v) void helper_check_interrupts(CPUNios2State *env) { + qemu_mutex_lock_iothread(); nios2_check_interrupts(env); + qemu_mutex_unlock_iothread(); } #endif /* !CONFIG_USER_ONLY */ From 33bef0b9948b85000221d32c758d9d4a9276aaaf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2017 11:37:57 +0100 Subject: [PATCH 12/18] qemu-timer: fix off-by-one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the first timer is exactly at the current value of the clock, the deadline is met and the timer should fire. This fixes itself on the next iteration of the loop without icount; with icount, however, execution of instructions will stop exactly at the deadline and won't proceed. Reviewed-by: Alex Bennée Reviewed-by: Edgar E. Iglesias Signed-off-by: Paolo Bonzini --- util/qemu-timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/qemu-timer.c b/util/qemu-timer.c index 6cf70b96f6..2f201512df 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -199,7 +199,7 @@ bool timerlist_expired(QEMUTimerList *timer_list) expire_time = timer_list->active_timers->expire_time; qemu_mutex_unlock(&timer_list->active_timers_lock); - return expire_time < qemu_clock_get_ns(timer_list->clock->type); + return expire_time <= qemu_clock_get_ns(timer_list->clock->type); } bool qemu_clock_expired(QEMUClockType type) From d2528bdc19988db73056be75dd9bf52eeee620f5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2017 12:01:16 +0100 Subject: [PATCH 13/18] qemu-timer: do not include sysemu/cpus.h from util/qemu-timer.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This dependency is the wrong way, and we will need util/qemu-timer.h from sysemu/cpus.h in the next patch. Reviewed-by: Alex Bennée Reviewed-by: Edgar E. Iglesias Signed-off-by: Paolo Bonzini --- cpu-exec.c | 1 + hw/core/ptimer.c | 1 + hw/ppc/pnv.c | 1 + include/qemu/timer.h | 1 - include/sysemu/cpus.h | 2 ++ kvm-all.c | 1 + monitor.c | 1 + replay/replay.c | 1 + target/alpha/translate.c | 1 + translate-all.c | 1 + util/main-loop.c | 1 + util/qemu-timer.c | 1 + 12 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cpu-exec.c b/cpu-exec.c index d04dd91ebd..748cb66bca 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -33,6 +33,7 @@ #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) #include "hw/i386/apic.h" #endif +#include "sysemu/cpus.h" #include "sysemu/replay.h" /* -icount align implementation. */ diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c index 59ccb00550..7221c68a98 100644 --- a/hw/core/ptimer.c +++ b/hw/core/ptimer.c @@ -13,6 +13,7 @@ #include "sysemu/replay.h" #include "sysemu/qtest.h" #include "block/aio.h" +#include "sysemu/cpus.h" #define DELTA_ADJUST 1 #define DELTA_NO_ADJUST -1 diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 09f0d22def..3fa722af82 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -21,6 +21,7 @@ #include "qapi/error.h" #include "sysemu/sysemu.h" #include "sysemu/numa.h" +#include "sysemu/cpus.h" #include "hw/hw.h" #include "target/ppc/cpu.h" #include "qemu/log.h" diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 26e628584c..91cd8c8a84 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -4,7 +4,6 @@ #include "qemu-common.h" #include "qemu/notify.h" #include "qemu/host-utils.h" -#include "sysemu/cpus.h" #define NANOSECONDS_PER_SECOND 1000000000LL diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index a73b5d4bce..e521a91661 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -1,6 +1,8 @@ #ifndef QEMU_CPUS_H #define QEMU_CPUS_H +#include "qemu/timer.h" + /* cpus.c */ bool qemu_in_vcpu_thread(void); void qemu_init_cpu_loop(void); diff --git a/kvm-all.c b/kvm-all.c index 9040bd50a4..90b8573656 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -29,6 +29,7 @@ #include "hw/s390x/adapter.h" #include "exec/gdbstub.h" #include "sysemu/kvm_int.h" +#include "sysemu/cpus.h" #include "qemu/bswap.h" #include "exec/memory.h" #include "exec/ram_addr.h" diff --git a/monitor.c b/monitor.c index f11893e1c3..be282ecb80 100644 --- a/monitor.c +++ b/monitor.c @@ -77,6 +77,7 @@ #include "qapi-event.h" #include "qmp-introspect.h" #include "sysemu/qtest.h" +#include "sysemu/cpus.h" #include "qemu/cutils.h" #include "qapi/qmp/dispatch.h" diff --git a/replay/replay.c b/replay/replay.c index 1835b9902e..78e2a7e570 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -16,6 +16,7 @@ #include "replay-internal.h" #include "qemu/timer.h" #include "qemu/main-loop.h" +#include "sysemu/cpus.h" #include "sysemu/sysemu.h" #include "qemu/error-report.h" diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 055286a7b8..df5d695344 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "cpu.h" +#include "sysemu/cpus.h" #include "disas/disas.h" #include "qemu/host-utils.h" #include "exec/exec-all.h" diff --git a/translate-all.c b/translate-all.c index 34480aebba..b3ee876526 100644 --- a/translate-all.c +++ b/translate-all.c @@ -57,6 +57,7 @@ #include "qemu/timer.h" #include "qemu/main-loop.h" #include "exec/log.h" +#include "sysemu/cpus.h" /* #define DEBUG_TB_INVALIDATE */ /* #define DEBUG_TB_FLUSH */ diff --git a/util/main-loop.c b/util/main-loop.c index ca7bb072f9..7efc229e3d 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -28,6 +28,7 @@ #include "qemu/timer.h" #include "qemu/sockets.h" // struct in_addr needed for libslirp.h #include "sysemu/qtest.h" +#include "sysemu/cpus.h" #include "slirp/libslirp.h" #include "qemu/main-loop.h" #include "block/aio.h" diff --git a/util/qemu-timer.c b/util/qemu-timer.c index 2f201512df..ac993403ab 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -27,6 +27,7 @@ #include "qemu/timer.h" #include "sysemu/replay.h" #include "sysemu/sysemu.h" +#include "sysemu/cpus.h" #ifdef CONFIG_POSIX #include From 3f53bc61a404cd1d13fdba8441282a33a755f8c6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2017 11:50:29 +0100 Subject: [PATCH 14/18] cpus: define QEMUTimerListNotifyCB for QEMU system emulation There is no change for now, because the callback just invokes qemu_notify_event. Reviewed-by: Edgar E. Iglesias Signed-off-by: Paolo Bonzini --- cpus.c | 5 +++++ include/qemu/timer.h | 4 ++-- include/sysemu/cpus.h | 1 + stubs/cpu-get-icount.c | 6 ++++++ tests/test-aio-multithread.c | 2 +- tests/test-aio.c | 2 +- util/async.c | 2 +- util/main-loop.c | 2 +- util/qemu-timer.c | 10 +++++----- 9 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cpus.c b/cpus.c index 69e21858b8..e9da3bcb59 100644 --- a/cpus.c +++ b/cpus.c @@ -800,6 +800,11 @@ static void qemu_cpu_kick_rr_cpu(void) } while (cpu != atomic_mb_read(&tcg_current_rr_cpu)); } +void qemu_timer_notify_cb(void *opaque, QEMUClockType type) +{ + qemu_notify_event(); +} + static void kick_tcg_thread(void *opaque) { timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 91cd8c8a84..1441b426cd 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -59,7 +59,7 @@ struct QEMUTimerListGroup { }; typedef void QEMUTimerCB(void *opaque); -typedef void QEMUTimerListNotifyCB(void *opaque); +typedef void QEMUTimerListNotifyCB(void *opaque, QEMUClockType type); struct QEMUTimer { int64_t expire_time; /* in nanoseconds */ @@ -776,7 +776,7 @@ static inline int64_t qemu_soonest_timeout(int64_t timeout1, int64_t timeout2) * * Initialise the clock & timer infrastructure */ -void init_clocks(void); +void init_clocks(QEMUTimerListNotifyCB *notify_cb); int64_t cpu_get_ticks(void); /* Caller must hold BQL */ diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index e521a91661..a8053f1715 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -22,6 +22,7 @@ void dump_drift_info(FILE *f, fprintf_function cpu_fprintf); /* Unblock cpu */ void qemu_cpu_kick_self(void); +void qemu_timer_notify_cb(void *opaque, QEMUClockType type); void cpu_synchronize_all_states(void); void cpu_synchronize_all_post_reset(void); diff --git a/stubs/cpu-get-icount.c b/stubs/cpu-get-icount.c index 2e8b63b225..0b7239d721 100644 --- a/stubs/cpu-get-icount.c +++ b/stubs/cpu-get-icount.c @@ -2,6 +2,7 @@ #include "qemu-common.h" #include "qemu/timer.h" #include "sysemu/cpus.h" +#include "qemu/main-loop.h" int use_icount; @@ -9,3 +10,8 @@ int64_t cpu_get_icount(void) { abort(); } + +void qemu_timer_notify_cb(void *opaque, QEMUClockType type) +{ + qemu_notify_event(); +} diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c index 8b0b40ec78..549d784915 100644 --- a/tests/test-aio-multithread.c +++ b/tests/test-aio-multithread.c @@ -438,7 +438,7 @@ static void test_multi_mutex_10(void) int main(int argc, char **argv) { - init_clocks(); + init_clocks(NULL); g_test_init(&argc, &argv, NULL); g_test_add_func("/aio/multi/lifecycle", test_lifecycle); diff --git a/tests/test-aio.c b/tests/test-aio.c index 2754f154ce..54e20d6ab1 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -835,7 +835,7 @@ int main(int argc, char **argv) Error *local_error = NULL; GSource *src; - init_clocks(); + init_clocks(NULL); ctx = aio_context_new(&local_error); if (!ctx) { diff --git a/util/async.c b/util/async.c index 7d469eb857..663e297e1f 100644 --- a/util/async.c +++ b/util/async.c @@ -351,7 +351,7 @@ void aio_notify_accept(AioContext *ctx) } } -static void aio_timerlist_notify(void *opaque) +static void aio_timerlist_notify(void *opaque, QEMUClockType type) { aio_notify(opaque); } diff --git a/util/main-loop.c b/util/main-loop.c index 7efc229e3d..4534c89308 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -144,7 +144,7 @@ int qemu_init_main_loop(Error **errp) GSource *src; Error *local_error = NULL; - init_clocks(); + init_clocks(qemu_timer_notify_cb); ret = qemu_signal_init(); if (ret) { diff --git a/util/qemu-timer.c b/util/qemu-timer.c index ac993403ab..dc3181e9b8 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -122,7 +122,7 @@ void timerlist_free(QEMUTimerList *timer_list) g_free(timer_list); } -static void qemu_clock_init(QEMUClockType type) +static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb) { QEMUClock *clock = qemu_clock_ptr(type); @@ -134,7 +134,7 @@ static void qemu_clock_init(QEMUClockType type) clock->last = INT64_MIN; QLIST_INIT(&clock->timerlists); notifier_list_init(&clock->reset_notifiers); - main_loop_tlg.tl[type] = timerlist_new(type, NULL, NULL); + main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL); } bool qemu_clock_use_for_deadline(QEMUClockType type) @@ -278,7 +278,7 @@ QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) void timerlist_notify(QEMUTimerList *timer_list) { if (timer_list->notify_cb) { - timer_list->notify_cb(timer_list->notify_opaque); + timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type); } else { qemu_notify_event(); } @@ -635,11 +635,11 @@ void qemu_clock_unregister_reset_notifier(QEMUClockType type, notifier_remove(notifier); } -void init_clocks(void) +void init_clocks(QEMUTimerListNotifyCB *notify_cb) { QEMUClockType type; for (type = 0; type < QEMU_CLOCK_MAX; type++) { - qemu_clock_init(type); + qemu_clock_init(type, notify_cb); } #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK From e330c118f2a5a5365409b123cd0dd2c7d575bf05 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2017 11:51:07 +0100 Subject: [PATCH 15/18] main-loop: remove now unnecessary optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This optimization is not necessary anymore, because the vCPU now drops the I/O thread lock even with TCG. Drop it to simplify the code and avoid the "I/O thread spun for 1000 iterations" warning. Reviewed-by: Alex Bennée Reviewed-by: Edgar E. Iglesias Signed-off-by: Paolo Bonzini --- vl.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/vl.c b/vl.c index 1a95500ac7..0b4ed5241c 100644 --- a/vl.c +++ b/vl.c @@ -1888,17 +1888,14 @@ static bool main_loop_should_exit(void) static void main_loop(void) { - bool nonblocking; - int last_io = 0; #ifdef CONFIG_PROFILER int64_t ti; #endif do { - nonblocking = tcg_enabled() && last_io > 0; #ifdef CONFIG_PROFILER ti = profile_getclock(); #endif - last_io = main_loop_wait(nonblocking); + main_loop_wait(false); #ifdef CONFIG_PROFILER dev_time += profile_getclock() - ti; #endif From 6b8f0187a4d7c263e356302f8d308655372a4b5b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 2 Mar 2017 19:56:40 +0100 Subject: [PATCH 16/18] icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread icount has become much slower after tcg_cpu_exec has stopped using the BQL. There is also a latent bug that is masked by the slowness. The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL timer now has to wake up the I/O thread and wait for it. The rendez-vous is mediated by the BQL QemuMutex: - handle_icount_deadline wakes up the I/O thread with BQL taken - the I/O thread wakes up and waits on the BQL - the VCPU thread releases the BQL a little later - the I/O thread raises an interrupt, which calls qemu_cpu_kick - the VCPU thread notices the interrupt, takes the BQL to process it and waits on it All this back and forth is extremely expensive, causing a 6 to 8-fold slowdown when icount is turned on. One may think that the issue is that the VCPU thread is too dependent on the BQL, but then the latent bug comes in. I first tried removing the BQL completely from the x86 cpu_exec, only to see everything break. The only way to fix it (and make everything slow again) was to add a dummy BQL lock/unlock pair. This is because in -icount mode you really have to process the events before the CPU restarts executing the next instruction. Therefore, this series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in the vCPU thread when running in icount mode. The required changes include: - make the timer notification callback wake up TCG's single vCPU thread when run from another thread. By using async_run_on_cpu, the callback can override all_cpu_threads_idle() when the CPU is halted. - move handle_icount_deadline after qemu_tcg_wait_io_event, so that the timer notification callback is invoked after the dummy work item wakes up the vCPU thread - make handle_icount_deadline run the timers instead of just waking the I/O thread. - stop processing the timers in the main loop Signed-off-by: Paolo Bonzini --- cpus.c | 28 +++++++++++++++++++++++++--- include/qemu/timer.h | 24 ++++++++++++++++++++++++ util/qemu-timer.c | 4 +++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/cpus.c b/cpus.c index e9da3bcb59..b84a392dda 100644 --- a/cpus.c +++ b/cpus.c @@ -800,9 +800,25 @@ static void qemu_cpu_kick_rr_cpu(void) } while (cpu != atomic_mb_read(&tcg_current_rr_cpu)); } +static void do_nothing(CPUState *cpu, run_on_cpu_data unused) +{ +} + void qemu_timer_notify_cb(void *opaque, QEMUClockType type) { - qemu_notify_event(); + if (!use_icount || type != QEMU_CLOCK_VIRTUAL) { + qemu_notify_event(); + return; + } + + if (!qemu_in_vcpu_thread() && first_cpu) { + /* qemu_cpu_kick is not enough to kick a halted CPU out of + * qemu_tcg_wait_io_event. async_run_on_cpu, instead, + * causes cpu_thread_is_idle to return false. This way, + * handle_icount_deadline can run. + */ + async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL); + } } static void kick_tcg_thread(void *opaque) @@ -1150,12 +1166,15 @@ static int64_t tcg_get_icount_limit(void) static void handle_icount_deadline(void) { + assert(qemu_in_vcpu_thread()); if (use_icount) { int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); if (deadline == 0) { + /* Wake up other AioContexts. */ qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); } } } @@ -1268,6 +1287,11 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ qemu_account_warp_timer(); + /* Run the timers here. This is much more efficient than + * waking up the I/O thread and waiting for completion. + */ + handle_icount_deadline(); + if (!cpu) { cpu = first_cpu; } @@ -1309,8 +1333,6 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) atomic_mb_set(&cpu->exit_request, 0); } - handle_icount_deadline(); - qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus)); deal_with_unplugged_cpus(); } diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 1441b426cd..e1742f2f3d 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -533,6 +533,12 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, * Create a new timer and associate it with the default * timer list for the clock type @type. * + * The default timer list has one special feature: in icount mode, + * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is + * not true of other timer lists, which are typically associated + * with an AioContext---each of them runs its timer callbacks in its own + * AioContext thread. + * * Returns: a pointer to the timer */ static inline QEMUTimer *timer_new(QEMUClockType type, int scale, @@ -550,6 +556,12 @@ static inline QEMUTimer *timer_new(QEMUClockType type, int scale, * Create a new timer with nanosecond scale on the default timer list * associated with the clock. * + * The default timer list has one special feature: in icount mode, + * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is + * not true of other timer lists, which are typically associated + * with an AioContext---each of them runs its timer callbacks in its own + * AioContext thread. + * * Returns: a pointer to the newly created timer */ static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb, @@ -564,6 +576,12 @@ static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb, * @cb: the callback to call when the timer expires * @opaque: the opaque pointer to pass to the callback * + * The default timer list has one special feature: in icount mode, + * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is + * not true of other timer lists, which are typically associated + * with an AioContext---each of them runs its timer callbacks in its own + * AioContext thread. + * * Create a new timer with microsecond scale on the default timer list * associated with the clock. * @@ -581,6 +599,12 @@ static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb, * @cb: the callback to call when the timer expires * @opaque: the opaque pointer to pass to the callback * + * The default timer list has one special feature: in icount mode, + * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is + * not true of other timer lists, which are typically associated + * with an AioContext---each of them runs its timer callbacks in its own + * AioContext thread. + * * Create a new timer with millisecond scale on the default timer list * associated with the clock. * diff --git a/util/qemu-timer.c b/util/qemu-timer.c index dc3181e9b8..82d56507a2 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -658,7 +658,9 @@ bool qemu_clock_run_all_timers(void) QEMUClockType type; for (type = 0; type < QEMU_CLOCK_MAX; type++) { - progress |= qemu_clock_run_timers(type); + if (qemu_clock_use_for_deadline(type)) { + progress |= qemu_clock_run_timers(type); + } } return progress; From b31f84126215e3fd4b8acbc3083ae30d407329e8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 14 Mar 2017 20:56:27 +0800 Subject: [PATCH 17/18] memory: info mtree check mr range overflow The address of memory regions might overflow when something wrong happened, like reported in: https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html For easier debugging, let's try to detect it. Reported-by: Mark Cave-Ayland Signed-off-by: Peter Xu Message-Id: <1489496187-624-1-git-send-email-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- memory.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/memory.c b/memory.c index 284894b135..64b0a605ef 100644 --- a/memory.c +++ b/memory.c @@ -2494,6 +2494,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, MemoryRegionListHead submr_print_queue; const MemoryRegion *submr; unsigned int i; + hwaddr cur_start, cur_end; if (!mr) { return; @@ -2503,6 +2504,18 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, mon_printf(f, MTREE_INDENT); } + cur_start = base + mr->addr; + cur_end = cur_start + MR_SIZE(mr->size); + + /* + * Try to detect overflow of memory region. This should never + * happen normally. When it happens, we dump something to warn the + * user who is observing this. + */ + if (cur_start < base || cur_end < cur_start) { + mon_printf(f, "[DETECTED OVERFLOW!] "); + } + if (mr->alias) { MemoryRegionList *ml; bool found = false; @@ -2522,8 +2535,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): alias %s @%s " TARGET_FMT_plx "-" TARGET_FMT_plx "%s\n", - base + mr->addr, - base + mr->addr + MR_SIZE(mr->size), + cur_start, cur_end, mr->priority, memory_region_type((MemoryRegion *)mr), memory_region_name(mr), @@ -2534,8 +2546,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, } else { mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s\n", - base + mr->addr, - base + mr->addr + MR_SIZE(mr->size), + cur_start, cur_end, mr->priority, memory_region_type((MemoryRegion *)mr), memory_region_name(mr), @@ -2562,7 +2573,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, } QTAILQ_FOREACH(ml, &submr_print_queue, queue) { - mtree_print_mr(mon_printf, f, ml->mr, level + 1, base + mr->addr, + mtree_print_mr(mon_printf, f, ml->mr, level + 1, cur_start, alias_print_queue); } From 2563c9c6b8670400c48e562034b321a7cf3d9a85 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 7 Mar 2017 09:16:27 -0600 Subject: [PATCH 18/18] nbd/client: fix drop_sync [CVE-2017-2630] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Comparison symbol is misused. It may lead to memory corruption. Introduced in commit 7d3123e. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20170203154757.36140-6-vsementsov@virtuozzo.com> [eblake: add CVE details, update conditional] Signed-off-by: Eric Blake Reviewed-by: Marc-André Lureau Message-Id: <20170307151627.27212-1-eblake@redhat.com> Signed-off-by: Paolo Bonzini --- nbd/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbd/client.c b/nbd/client.c index 5c9dee37fa..3dc2564cd0 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -94,7 +94,7 @@ static ssize_t drop_sync(QIOChannel *ioc, size_t size) char small[1024]; char *buffer; - buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size)); + buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size)); while (size > 0) { ssize_t count = read_sync(ioc, buffer, MIN(65536, size));