From 5692399f0af2b48b164b3c0b5c4c532b186b33ae Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 11 Jul 2014 09:44:26 +0200 Subject: [PATCH 01/11] backends: Introduce chr-testdev From: Paolo Bonzini chr-testdev enables a virtio serial channel to be used for guest initiated qemu exits. hw/misc/debugexit already enables guest initiated qemu exits, but only for PC targets. chr-testdev supports any virtio-capable target. kvm-unit-tests/arm is already making use of this backend. Currently there is a single command implemented, "q". It takes a (prefix) argument for the exit code, thus an exit is implemented by writing, e.g. "1q", to the virtio-serial port. It can be used as: $QEMU ... \ -device virtio-serial-device \ -device virtserialport,chardev=ctd -chardev testdev,id=ctd or, use: $QEMU ... \ -device virtio-serial-device \ -device virtconsole,chardev=ctd -chardev testdev,id=ctd to bind it to virtio-serial port0. Signed-off-by: Paolo Bonzini Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- backends/Makefile.objs | 2 +- backends/testdev.c | 131 +++++++++++++++++++++++++++++++++++++++++ include/sysemu/char.h | 3 + qapi-schema.json | 3 +- qemu-char.c | 4 ++ stubs/Makefile.objs | 1 + stubs/chr-testdev.c | 7 +++ 7 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 backends/testdev.c create mode 100644 stubs/chr-testdev.c diff --git a/backends/Makefile.objs b/backends/Makefile.objs index 506a46c33b..31a3a894f5 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -1,7 +1,7 @@ common-obj-y += rng.o rng-egd.o common-obj-$(CONFIG_POSIX) += rng-random.o -common-obj-y += msmouse.o +common-obj-y += msmouse.o testdev.o common-obj-$(CONFIG_BRLAPI) += baum.o baum.o-cflags := $(SDL_CFLAGS) diff --git a/backends/testdev.c b/backends/testdev.c new file mode 100644 index 0000000000..70d63b3b8d --- /dev/null +++ b/backends/testdev.c @@ -0,0 +1,131 @@ +/* + * QEMU Char Device for testsuite control + * + * Copyright (c) 2014 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "sysemu/char.h" + +#define BUF_SIZE 32 + +typedef struct { + CharDriverState *chr; + uint8_t in_buf[32]; + int in_buf_used; +} TestdevCharState; + +/* Try to interpret a whole incoming packet */ +static int testdev_eat_packet(TestdevCharState *testdev) +{ + const uint8_t *cur = testdev->in_buf; + int len = testdev->in_buf_used; + uint8_t c; + int arg; + +#define EAT(c) do { \ + if (!len--) { \ + return 0; \ + } \ + c = *cur++; \ +} while (0) + + EAT(c); + + while (isspace(c)) { + EAT(c); + } + + arg = 0; + while (isdigit(c)) { + arg = arg * 10 + c - '0'; + EAT(c); + } + + while (isspace(c)) { + EAT(c); + } + + switch (c) { + case 'q': + exit((arg << 1) | 1); + break; + default: + break; + } + return cur - testdev->in_buf; +} + +/* The other end is writing some data. Store it and try to interpret */ +static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len) +{ + TestdevCharState *testdev = chr->opaque; + int tocopy, eaten, orig_len = len; + + while (len) { + /* Complete our buffer as much as possible */ + tocopy = MIN(len, BUF_SIZE - testdev->in_buf_used); + + memcpy(testdev->in_buf + testdev->in_buf_used, buf, tocopy); + testdev->in_buf_used += tocopy; + buf += tocopy; + len -= tocopy; + + /* Interpret it as much as possible */ + while (testdev->in_buf_used > 0 && + (eaten = testdev_eat_packet(testdev)) > 0) { + memmove(testdev->in_buf, testdev->in_buf + eaten, + testdev->in_buf_used - eaten); + testdev->in_buf_used -= eaten; + } + } + return orig_len; +} + +static void testdev_close(struct CharDriverState *chr) +{ + TestdevCharState *testdev = chr->opaque; + + g_free(testdev); +} + +CharDriverState *chr_testdev_init(void) +{ + TestdevCharState *testdev; + CharDriverState *chr; + + testdev = g_malloc0(sizeof(TestdevCharState)); + testdev->chr = chr = g_malloc0(sizeof(CharDriverState)); + + chr->opaque = testdev; + chr->chr_write = testdev_write; + chr->chr_close = testdev_close; + + return chr; +} + +static void register_types(void) +{ + register_char_driver_qapi("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL); +} + +type_init(register_types); diff --git a/include/sysemu/char.h b/include/sysemu/char.h index 0bbd631e72..98cd4c958e 100644 --- a/include/sysemu/char.h +++ b/include/sysemu/char.h @@ -358,6 +358,9 @@ CharDriverState *qemu_char_get_next_serial(void); /* msmouse */ CharDriverState *qemu_chr_open_msmouse(void); +/* testdev.c */ +CharDriverState *chr_testdev_init(void); + /* baum.c */ CharDriverState *chr_baum_init(void); diff --git a/qapi-schema.json b/qapi-schema.json index b11aad2068..341f417a5f 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2749,7 +2749,7 @@ # # Configuration info for the new chardev backend. # -# Since: 1.4 +# Since: 1.4 (testdev since 2.2) ## { 'type': 'ChardevDummy', 'data': { } } @@ -2764,6 +2764,7 @@ 'mux' : 'ChardevMux', 'msmouse': 'ChardevDummy', 'braille': 'ChardevDummy', + 'testdev': 'ChardevDummy', 'stdio' : 'ChardevStdio', 'console': 'ChardevDummy', 'spicevmc' : 'ChardevSpiceChannel', diff --git a/qemu-char.c b/qemu-char.c index 956be49ecd..6964a2d9fd 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -3256,6 +3256,7 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename) strcmp(filename, "pty") == 0 || strcmp(filename, "msmouse") == 0 || strcmp(filename, "braille") == 0 || + strcmp(filename, "testdev") == 0 || strcmp(filename, "stdio") == 0) { qemu_opt_set(opts, "backend", filename); return opts; @@ -4057,6 +4058,9 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend, chr = chr_baum_init(); break; #endif + case CHARDEV_BACKEND_KIND_TESTDEV: + chr = chr_testdev_init(); + break; case CHARDEV_BACKEND_KIND_STDIO: chr = qemu_chr_open_stdio(backend->stdio); break; diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 528e1617b3..5e347d04bd 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -2,6 +2,7 @@ stub-obj-y += arch-query-cpu-def.o stub-obj-y += bdrv-commit-all.o stub-obj-y += chr-baum-init.o stub-obj-y += chr-msmouse.o +stub-obj-y += chr-testdev.o stub-obj-y += clock-warp.o stub-obj-y += cpu-get-clock.o stub-obj-y += cpu-get-icount.o diff --git a/stubs/chr-testdev.c b/stubs/chr-testdev.c new file mode 100644 index 0000000000..23112a2c07 --- /dev/null +++ b/stubs/chr-testdev.c @@ -0,0 +1,7 @@ +#include "qemu-common.h" +#include "sysemu/char.h" + +CharDriverState *chr_testdev_init(void) +{ + return 0; +} From c96778bb843c029846196e62a4977e75fb5ef9b3 Mon Sep 17 00:00:00 2001 From: KONRAD Frederic Date: Fri, 1 Aug 2014 01:37:09 +0200 Subject: [PATCH 02/11] icount: put icount variables into TimerState. This puts qemu_icount and qemu_icount_bias into TimerState structure to allow them to be migrated. Signed-off-by: KONRAD Frederic Reviewed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini --- cpus.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/cpus.c b/cpus.c index 5e7f2cf3cf..127de1c259 100644 --- a/cpus.c +++ b/cpus.c @@ -102,17 +102,12 @@ static bool all_cpu_threads_idle(void) /* Protected by TimersState seqlock */ -/* Compensate for varying guest execution speed. */ -static int64_t qemu_icount_bias; static int64_t vm_clock_warp_start; /* Conversion factor from emulated instructions to virtual clock ticks. */ static int icount_time_shift; /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ #define MAX_ICOUNT_SHIFT 10 -/* Only written by TCG thread */ -static int64_t qemu_icount; - static QEMUTimer *icount_rt_timer; static QEMUTimer *icount_vm_timer; static QEMUTimer *icount_warp_timer; @@ -129,6 +124,11 @@ typedef struct TimersState { int64_t cpu_clock_offset; int32_t cpu_ticks_enabled; int64_t dummy; + + /* Compensate for varying guest execution speed. */ + int64_t qemu_icount_bias; + /* Only written by TCG thread */ + int64_t qemu_icount; } TimersState; static TimersState timers_state; @@ -139,14 +139,14 @@ static int64_t cpu_get_icount_locked(void) int64_t icount; CPUState *cpu = current_cpu; - icount = qemu_icount; + icount = timers_state.qemu_icount; if (cpu) { if (!cpu_can_do_io(cpu)) { fprintf(stderr, "Bad clock read\n"); } icount -= (cpu->icount_decr.u16.low + cpu->icount_extra); } - return qemu_icount_bias + (icount << icount_time_shift); + return timers_state.qemu_icount_bias + (icount << icount_time_shift); } int64_t cpu_get_icount(void) @@ -284,7 +284,8 @@ static void icount_adjust(void) icount_time_shift++; } last_delta = delta; - qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift); + timers_state.qemu_icount_bias = cur_icount + - (timers_state.qemu_icount << icount_time_shift); seqlock_write_unlock(&timers_state.vm_clock_seqlock); } @@ -333,7 +334,7 @@ static void icount_warp_rt(void *opaque) int64_t delta = cur_time - cur_icount; warp_delta = MIN(warp_delta, delta); } - qemu_icount_bias += warp_delta; + timers_state.qemu_icount_bias += warp_delta; } vm_clock_warp_start = -1; seqlock_write_unlock(&timers_state.vm_clock_seqlock); @@ -351,7 +352,7 @@ void qtest_clock_warp(int64_t dest) int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); int64_t warp = qemu_soonest_timeout(dest - clock, deadline); seqlock_write_lock(&timers_state.vm_clock_seqlock); - qemu_icount_bias += warp; + timers_state.qemu_icount_bias += warp; seqlock_write_unlock(&timers_state.vm_clock_seqlock); qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); @@ -1250,7 +1251,8 @@ static int tcg_cpu_exec(CPUArchState *env) int64_t count; int64_t deadline; int decr; - qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra); + timers_state.qemu_icount -= (cpu->icount_decr.u16.low + + cpu->icount_extra); cpu->icount_decr.u16.low = 0; cpu->icount_extra = 0; deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); @@ -1265,7 +1267,7 @@ static int tcg_cpu_exec(CPUArchState *env) } count = qemu_icount_round(deadline); - qemu_icount += count; + timers_state.qemu_icount += count; decr = (count > 0xffff) ? 0xffff : count; count -= decr; cpu->icount_decr.u16.low = decr; @@ -1278,7 +1280,8 @@ static int tcg_cpu_exec(CPUArchState *env) if (use_icount) { /* Fold pending instructions back into the instruction counter, and clear the interrupt flag. */ - qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra); + timers_state.qemu_icount -= (cpu->icount_decr.u16.low + + cpu->icount_extra); cpu->icount_decr.u32 = 0; cpu->icount_extra = 0; } From d09eae3726418d4c8df2e195fd1a3bf05074dd48 Mon Sep 17 00:00:00 2001 From: KONRAD Frederic Date: Fri, 1 Aug 2014 01:37:10 +0200 Subject: [PATCH 03/11] migration: migrate icount fields. This fixes a bug where qemu_icount and qemu_icount_bias are not migrated. It adds a subsection "timer/icount" to vmstate_timers so icount is migrated only when needed. Signed-off-by: KONRAD Frederic Reviewed-by: Amit Shah Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini --- cpus.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/cpus.c b/cpus.c index 127de1c259..a6b6557146 100644 --- a/cpus.c +++ b/cpus.c @@ -429,6 +429,25 @@ void qemu_clock_warp(QEMUClockType type) } } +static bool icount_state_needed(void *opaque) +{ + return use_icount; +} + +/* + * This is a subsection for icount migration. + */ +static const VMStateDescription icount_vmstate_timers = { + .name = "timer/icount", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT64(qemu_icount_bias, TimersState), + VMSTATE_INT64(qemu_icount, TimersState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_timers = { .name = "timer", .version_id = 2, @@ -438,6 +457,14 @@ static const VMStateDescription vmstate_timers = { VMSTATE_INT64(dummy, TimersState), VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection[]) { + { + .vmsd = &icount_vmstate_timers, + .needed = icount_state_needed, + }, { + /* empty */ + } } }; From 3f03131390a8c91a0cac530f7ae79b04b42ab928 Mon Sep 17 00:00:00 2001 From: KONRAD Frederic Date: Fri, 1 Aug 2014 01:37:15 +0200 Subject: [PATCH 04/11] timer: add cpu_icount_to_ns function. This adds cpu_icount_to_ns function which is needed for reverse execution. It returns the time for a specific instruction. Signed-off-by: KONRAD Frederic Reviewed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini --- cpus.c | 7 ++++++- include/qemu/timer.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cpus.c b/cpus.c index a6b6557146..62636a65a8 100644 --- a/cpus.c +++ b/cpus.c @@ -146,7 +146,7 @@ static int64_t cpu_get_icount_locked(void) } icount -= (cpu->icount_decr.u16.low + cpu->icount_extra); } - return timers_state.qemu_icount_bias + (icount << icount_time_shift); + return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount); } int64_t cpu_get_icount(void) @@ -162,6 +162,11 @@ int64_t cpu_get_icount(void) return icount; } +int64_t cpu_icount_to_ns(int64_t icount) +{ + return icount << icount_time_shift; +} + /* return the host CPU cycle counter and handle stop/restart */ /* Caller must hold the BQL */ int64_t cpu_get_ticks(void) diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 7f9a074c2a..e12c7149e1 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -745,6 +745,7 @@ static inline int64_t get_clock(void) /* icount */ int64_t cpu_get_icount(void); int64_t cpu_get_clock(void); +int64_t cpu_icount_to_ns(int64_t icount); /*******************************************/ /* host CPU ticks (if available) */ From 714683950547ea8173aefe25f574874c06233455 Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Wed, 23 Jul 2014 11:47:50 +0200 Subject: [PATCH 05/11] icount: Fix virtual clock start value on ARM When using the icount option on ARM, the virtual clock starts counting at realtime clock but it should start at 0. The reason why the virtual clock starts at realtime clock is because the first time we call qemu_clock_warp (which calls icount_warp_rt) in tcg_exec_all, qemu_icount_bias (which is part of the virtual time computation mechanism) will increment by realtime - vm_clock_warp_start, with vm_clock_warp_start being 0 (see icount_warp_rt in cpus.c). By changing the value of vm_clock_warp_start from 0 to -1, the first time we call qemu_clock_warp which calls icount_warp_rt, we will return immediatly because icount_warp_rt first checks if vm_clock_warp_start is -1 and if it's the case it returns. Therefore, qemu_icount_bias will first be incremented by the value of a virtual timer deadline when the virtual cpu goes from active to inactive. The virtual time will start at 0 and increment based on the instruction counter when the vcpu is active or the qemu_icount_bias value when inactive. Signed-off-by: Sebastian Tanase Signed-off-by: Paolo Bonzini --- cpus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpus.c b/cpus.c index 62636a65a8..bbb8d4e8be 100644 --- a/cpus.c +++ b/cpus.c @@ -102,7 +102,7 @@ static bool all_cpu_threads_idle(void) /* Protected by TimersState seqlock */ -static int64_t vm_clock_warp_start; +static int64_t vm_clock_warp_start = -1; /* Conversion factor from emulated instructions to virtual clock ticks. */ static int icount_time_shift; /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ From 1ad9580bd730f195a59136d11fdc431f90f266aa Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Fri, 25 Jul 2014 11:56:28 +0200 Subject: [PATCH 06/11] icount: Add QemuOpts for icount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make icount parameter use QemuOpts style options in order to easily add other suboptions. Signed-off-by: Sebastian Tanase Tested-by: Camille Bégué Signed-off-by: Paolo Bonzini --- cpus.c | 10 +++++++++- include/qemu-common.h | 3 ++- qemu-options.hx | 4 ++-- qtest.c | 13 +++++++++++-- vl.c | 35 ++++++++++++++++++++++++++++------- 5 files changed, 52 insertions(+), 13 deletions(-) diff --git a/cpus.c b/cpus.c index bbb8d4e8be..8291044203 100644 --- a/cpus.c +++ b/cpus.c @@ -473,13 +473,21 @@ static const VMStateDescription vmstate_timers = { } }; -void configure_icount(const char *option) +void configure_icount(QemuOpts *opts, Error **errp) { + const char *option; + seqlock_init(&timers_state.vm_clock_seqlock, NULL); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); + option = qemu_opt_get(opts, "shift"); if (!option) { return; } + /* When using -icount shift, the shift option will be + misinterpreted as a boolean */ + if (strcmp(option, "on") == 0 || strcmp(option, "off") == 0) { + error_setg(errp, "The shift option must be a number or auto"); + } icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME, icount_warp_rt, NULL); diff --git a/include/qemu-common.h b/include/qemu-common.h index 6ef8282234..04b0769c47 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -41,6 +41,7 @@ #include #include #include "glib-compat.h" +#include "qemu/option.h" #ifdef _WIN32 #include "sysemu/os-win32.h" @@ -105,7 +106,7 @@ static inline char *realpath(const char *path, char *resolved_path) #endif /* icount */ -void configure_icount(const char *option); +void configure_icount(QemuOpts *opts, Error **errp); extern int use_icount; #include "qemu/osdep.h" diff --git a/qemu-options.hx b/qemu-options.hx index 154962558b..5a1b001881 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3011,11 +3011,11 @@ re-inject them. ETEXI DEF("icount", HAS_ARG, QEMU_OPTION_icount, \ - "-icount [N|auto]\n" \ + "-icount [shift=N|auto]\n" \ " enable virtual instruction counter with 2^N clock ticks per\n" \ " instruction\n", QEMU_ARCH_ALL) STEXI -@item -icount [@var{N}|auto] +@item -icount [shift=@var{N}|auto] @findex -icount Enable virtual instruction counter. The virtual cpu will execute one instruction every 2^@var{N} ns of virtual time. If @code{auto} is specified diff --git a/qtest.c b/qtest.c index 04a6dc1f0f..ef0d99191c 100644 --- a/qtest.c +++ b/qtest.c @@ -19,6 +19,9 @@ #include "hw/irq.h" #include "sysemu/sysemu.h" #include "sysemu/cpus.h" +#include "qemu/config-file.h" +#include "qemu/option.h" +#include "qemu/error-report.h" #define MAX_IRQ 256 @@ -509,10 +512,16 @@ static void qtest_event(void *opaque, int event) } } +static void configure_qtest_icount(const char *options) +{ + QemuOpts *opts = qemu_opts_parse(qemu_find_opts("icount"), options, 1); + configure_icount(opts, &error_abort); + qemu_opts_del(opts); +} + int qtest_init_accel(MachineClass *mc) { - configure_icount("0"); - + configure_qtest_icount("0"); return 0; } diff --git a/vl.c b/vl.c index fe451aaf15..f2621a50db 100644 --- a/vl.c +++ b/vl.c @@ -537,6 +537,20 @@ static QemuOptsList qemu_mem_opts = { }, }; +static QemuOptsList qemu_icount_opts = { + .name = "icount", + .implied_opt_name = "shift", + .merge_lists = true, + .head = QTAILQ_HEAD_INITIALIZER(qemu_icount_opts.head), + .desc = { + { + .name = "shift", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + /** * Get machine options * @@ -2908,13 +2922,12 @@ int main(int argc, char **argv, char **envp) { int i; int snapshot, linux_boot; - const char *icount_option = NULL; const char *initrd_filename; const char *kernel_filename, *kernel_cmdline; const char *boot_order; DisplayState *ds; int cyls, heads, secs, translation; - QemuOpts *hda_opts = NULL, *opts, *machine_opts; + QemuOpts *hda_opts = NULL, *opts, *machine_opts, *icount_opts = NULL; QemuOptsList *olist; int optind; const char *optarg; @@ -2979,6 +2992,7 @@ int main(int argc, char **argv, char **envp) qemu_add_opts(&qemu_msg_opts); qemu_add_opts(&qemu_name_opts); qemu_add_opts(&qemu_numa_opts); + qemu_add_opts(&qemu_icount_opts); runstate_init(); @@ -3830,7 +3844,11 @@ int main(int argc, char **argv, char **envp) } break; case QEMU_OPTION_icount: - icount_option = optarg; + icount_opts = qemu_opts_parse(qemu_find_opts("icount"), + optarg, 1); + if (!icount_opts) { + exit(1); + } break; case QEMU_OPTION_incoming: incoming = optarg; @@ -4306,11 +4324,14 @@ int main(int argc, char **argv, char **envp) qemu_spice_init(); #endif - if (icount_option && (kvm_enabled() || xen_enabled())) { - fprintf(stderr, "-icount is not allowed with kvm or xen\n"); - exit(1); + if (icount_opts) { + if (kvm_enabled() || xen_enabled()) { + fprintf(stderr, "-icount is not allowed with kvm or xen\n"); + exit(1); + } + configure_icount(icount_opts, &error_abort); + qemu_opts_del(icount_opts); } - configure_icount(icount_option); /* clean up network at qemu process termination */ atexit(&net_cleanup); From a8bfac37085c3372366d722f131a7e18d664ee4d Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Fri, 25 Jul 2014 11:56:29 +0200 Subject: [PATCH 07/11] icount: Add align option to icount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The align option is used for activating the align algorithm in order to synchronise the host clock and the guest clock. Signed-off-by: Sebastian Tanase Tested-by: Camille Bégué Signed-off-by: Paolo Bonzini --- cpus.c | 19 ++++++++++++------- include/qemu-common.h | 1 + qemu-options.hx | 15 +++++++++++++-- vl.c | 4 ++++ 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/cpus.c b/cpus.c index 8291044203..7e09538799 100644 --- a/cpus.c +++ b/cpus.c @@ -476,25 +476,30 @@ static const VMStateDescription vmstate_timers = { void configure_icount(QemuOpts *opts, Error **errp) { const char *option; + char *rem_str = NULL; seqlock_init(&timers_state.vm_clock_seqlock, NULL); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); option = qemu_opt_get(opts, "shift"); if (!option) { + if (qemu_opt_get(opts, "align") != NULL) { + error_setg(errp, "Please specify shift option when using align"); + } return; } - /* When using -icount shift, the shift option will be - misinterpreted as a boolean */ - if (strcmp(option, "on") == 0 || strcmp(option, "off") == 0) { - error_setg(errp, "The shift option must be a number or auto"); - } - + icount_align_option = qemu_opt_get_bool(opts, "align", false); icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME, icount_warp_rt, NULL); if (strcmp(option, "auto") != 0) { - icount_time_shift = strtol(option, NULL, 0); + errno = 0; + icount_time_shift = strtol(option, &rem_str, 0); + if (errno != 0 || *rem_str != '\0' || !strlen(option)) { + error_setg(errp, "icount: Invalid shift value"); + } use_icount = 1; return; + } else if (icount_align_option) { + error_setg(errp, "shift=auto and align=on are incompatible"); } use_icount = 2; diff --git a/include/qemu-common.h b/include/qemu-common.h index 04b0769c47..5d10ac27a1 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -108,6 +108,7 @@ static inline char *realpath(const char *path, char *resolved_path) /* icount */ void configure_icount(QemuOpts *opts, Error **errp); extern int use_icount; +extern int icount_align_option; #include "qemu/osdep.h" #include "qemu/bswap.h" diff --git a/qemu-options.hx b/qemu-options.hx index 5a1b001881..96516c1e23 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3011,9 +3011,9 @@ re-inject them. ETEXI DEF("icount", HAS_ARG, QEMU_OPTION_icount, \ - "-icount [shift=N|auto]\n" \ + "-icount [shift=N|auto][,align=on|off]\n" \ " enable virtual instruction counter with 2^N clock ticks per\n" \ - " instruction\n", QEMU_ARCH_ALL) + " instruction and enable aligning the host and virtual clocks\n", QEMU_ARCH_ALL) STEXI @item -icount [shift=@var{N}|auto] @findex -icount @@ -3026,6 +3026,17 @@ Note that while this option can give deterministic behavior, it does not provide cycle accurate emulation. Modern CPUs contain superscalar out of order cores with complex cache hierarchies. The number of instructions executed often has little or no correlation with actual performance. + +@option{align=on} will activate the delay algorithm which will try to +to synchronise the host clock and the virtual clock. The goal is to +have a guest running at the real frequency imposed by the shift option. +Whenever the guest clock is behind the host clock and if +@option{align=on} is specified then we print a messsage to the user +to inform about the delay. +Currently this option does not work when @option{shift} is @code{auto}. +Note: The sync algorithm will work for those shift values for which +the guest clock runs ahead of the host clock. Typically this happens +when the shift value is high (how high depends on the host machine). ETEXI DEF("watchdog", HAS_ARG, QEMU_OPTION_watchdog, \ diff --git a/vl.c b/vl.c index f2621a50db..a8029d59d7 100644 --- a/vl.c +++ b/vl.c @@ -183,6 +183,7 @@ uint8_t *boot_splash_filedata; size_t boot_splash_filedata_size; uint8_t qemu_extra_params_fw[2]; +int icount_align_option; typedef struct FWBootEntry FWBootEntry; struct FWBootEntry { @@ -546,6 +547,9 @@ static QemuOptsList qemu_icount_opts = { { .name = "shift", .type = QEMU_OPT_STRING, + }, { + .name = "align", + .type = QEMU_OPT_BOOL, }, { /* end of list */ } }, From c2aa5f819900660f936faadfe92fe5d60a562482 Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Fri, 25 Jul 2014 11:56:31 +0200 Subject: [PATCH 08/11] cpu-exec: Add sleeping algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The goal is to sleep qemu whenever the guest clock is in advance compared to the host clock (we use the monotonic clocks). The amount of time to sleep is calculated in the execution loop in cpu_exec. At first, we tried to approximate at each for loop the real time elapsed while searching for a TB (generating or retrieving from cache) and executing it. We would then approximate the virtual time corresponding to the number of virtual instructions executed. The difference between these 2 values would allow us to know if the guest is in advance or delayed. However, the function used for measuring the real time (qemu_clock_get_ns(QEMU_CLOCK_REALTIME)) proved to be very expensive. We had an added overhead of 13% of the total run time. Therefore, we modified the algorithm and only take into account the difference between the 2 clocks at the begining of the cpu_exec function. During the for loop we try to reduce the advance of the guest only by computing the virtual time elapsed and sleeping if necessary. The overhead is thus reduced to 3%. Even though this method still has a noticeable overhead, it no longer is a bottleneck in trying to achieve a better guest frequency for which the guest clock is faster than the host one. As for the the alignement of the 2 clocks, with the first algorithm the guest clock was oscillating between -1 and 1ms compared to the host clock. Using the second algorithm we notice that the guest is 5ms behind the host, which is still acceptable for our use case. The tests where conducted using fio and stress. The host machine in an i5 CPU at 3.10GHz running Debian Jessie (kernel 3.12). The guest machine is an arm versatile-pb built with buildroot. Currently, on our test machine, the lowest icount we can achieve that is suitable for aligning the 2 clocks is 6. However, we observe that the IO tests (using fio) are slower than the cpu tests (using stress). Signed-off-by: Sebastian Tanase Tested-by: Camille Bégué Signed-off-by: Paolo Bonzini --- cpu-exec.c | 79 ++++++++++++++++++++++++++++++++++++++++++++ cpus.c | 17 ++++++++++ include/qemu/timer.h | 1 + 3 files changed, 97 insertions(+) diff --git a/cpu-exec.c b/cpu-exec.c index 38e5f02a30..68f82b631b 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -22,6 +22,72 @@ #include "tcg.h" #include "qemu/atomic.h" #include "sysemu/qtest.h" +#include "qemu/timer.h" + +/* -icount align implementation. */ + +typedef struct SyncClocks { + int64_t diff_clk; + int64_t last_cpu_icount; +} SyncClocks; + +#if !defined(CONFIG_USER_ONLY) +/* Allow the guest to have a max 3ms advance. + * The difference between the 2 clocks could therefore + * oscillate around 0. + */ +#define VM_CLOCK_ADVANCE 3000000 + +static void align_clocks(SyncClocks *sc, const CPUState *cpu) +{ + int64_t cpu_icount; + + if (!icount_align_option) { + return; + } + + cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; + sc->diff_clk += cpu_icount_to_ns(sc->last_cpu_icount - cpu_icount); + sc->last_cpu_icount = cpu_icount; + + if (sc->diff_clk > VM_CLOCK_ADVANCE) { +#ifndef _WIN32 + struct timespec sleep_delay, rem_delay; + sleep_delay.tv_sec = sc->diff_clk / 1000000000LL; + sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL; + if (nanosleep(&sleep_delay, &rem_delay) < 0) { + sc->diff_clk -= (sleep_delay.tv_sec - rem_delay.tv_sec) * 1000000000LL; + sc->diff_clk -= sleep_delay.tv_nsec - rem_delay.tv_nsec; + } else { + sc->diff_clk = 0; + } +#else + Sleep(sc->diff_clk / SCALE_MS); + sc->diff_clk = 0; +#endif + } +} + +static void init_delay_params(SyncClocks *sc, + const CPUState *cpu) +{ + if (!icount_align_option) { + return; + } + sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - + qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + cpu_get_clock_offset(); + sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; +} +#else +static void align_clocks(SyncClocks *sc, const CPUState *cpu) +{ +} + +static void init_delay_params(SyncClocks *sc, const CPUState *cpu) +{ +} +#endif /* CONFIG USER ONLY */ void cpu_loop_exit(CPUState *cpu) { @@ -227,6 +293,8 @@ int cpu_exec(CPUArchState *env) TranslationBlock *tb; uint8_t *tc_ptr; uintptr_t next_tb; + SyncClocks sc; + /* This must be volatile so it is not trashed by longjmp() */ volatile bool have_tb_lock = false; @@ -283,6 +351,13 @@ int cpu_exec(CPUArchState *env) #endif cpu->exception_index = -1; + /* Calculate difference between guest clock and host clock. + * This delay includes the delay of the last cycle, so + * what we have to do is sleep until it is 0. As for the + * advance/delay we gain here, we try to fix it next time. + */ + init_delay_params(&sc, cpu); + /* prepare setjmp context for exception handling */ for(;;) { if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -672,6 +747,7 @@ int cpu_exec(CPUArchState *env) if (insns_left > 0) { /* Execute remaining instructions. */ cpu_exec_nocache(env, insns_left, tb); + align_clocks(&sc, cpu); } cpu->exception_index = EXCP_INTERRUPT; next_tb = 0; @@ -684,6 +760,9 @@ int cpu_exec(CPUArchState *env) } } cpu->current_tb = NULL; + /* Try to align the host and virtual clocks + if the guest is in advance */ + align_clocks(&sc, cpu); /* reset soft MMU for next block (it can currently only be set by a memory fault) */ } /* for(;;) */ diff --git a/cpus.c b/cpus.c index 7e09538799..19245e99b9 100644 --- a/cpus.c +++ b/cpus.c @@ -219,6 +219,23 @@ int64_t cpu_get_clock(void) return ti; } +/* return the offset between the host clock and virtual CPU clock */ +int64_t cpu_get_clock_offset(void) +{ + int64_t ti; + unsigned start; + + do { + start = seqlock_read_begin(&timers_state.vm_clock_seqlock); + ti = timers_state.cpu_clock_offset; + if (!timers_state.cpu_ticks_enabled) { + ti -= get_clock(); + } + } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); + + return -ti; +} + /* enable cpu_get_ticks() * Caller must hold BQL which server as mutex for vm_clock_seqlock. */ diff --git a/include/qemu/timer.h b/include/qemu/timer.h index e12c7149e1..5f5210d543 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -745,6 +745,7 @@ static inline int64_t get_clock(void) /* icount */ int64_t cpu_get_icount(void); int64_t cpu_get_clock(void); +int64_t cpu_get_clock_offset(void); int64_t cpu_icount_to_ns(int64_t icount); /*******************************************/ From 7f7bc144ed653c6026ec956045224666abdec316 Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Fri, 25 Jul 2014 11:56:32 +0200 Subject: [PATCH 09/11] cpu-exec: Print to console if the guest is late MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the align option is enabled, we print to the user whenever the guest clock is behind the host clock in order for he/she to have a hint about the actual performance. The maximum print interval is 2s and we limit the number of messages to 100. If desired, this can be changed in cpu-exec.c Signed-off-by: Sebastian Tanase Tested-by: Camille Bégué Signed-off-by: Paolo Bonzini --- cpu-exec.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/cpu-exec.c b/cpu-exec.c index 68f82b631b..3c14502329 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -29,6 +29,7 @@ typedef struct SyncClocks { int64_t diff_clk; int64_t last_cpu_icount; + int64_t realtime_clock; } SyncClocks; #if !defined(CONFIG_USER_ONLY) @@ -37,6 +38,9 @@ typedef struct SyncClocks { * oscillate around 0. */ #define VM_CLOCK_ADVANCE 3000000 +#define THRESHOLD_REDUCE 1.5 +#define MAX_DELAY_PRINT_RATE 2000000000LL +#define MAX_NB_PRINTS 100 static void align_clocks(SyncClocks *sc, const CPUState *cpu) { @@ -68,16 +72,43 @@ static void align_clocks(SyncClocks *sc, const CPUState *cpu) } } +static void print_delay(const SyncClocks *sc) +{ + static float threshold_delay; + static int64_t last_realtime_clock; + static int nb_prints; + + if (icount_align_option && + sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE && + nb_prints < MAX_NB_PRINTS) { + if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) || + (-sc->diff_clk / (float)1000000000LL < + (threshold_delay - THRESHOLD_REDUCE))) { + threshold_delay = (-sc->diff_clk / 1000000000LL) + 1; + printf("Warning: The guest is now late by %.1f to %.1f seconds\n", + threshold_delay - 1, + threshold_delay); + nb_prints++; + last_realtime_clock = sc->realtime_clock; + } + } +} + static void init_delay_params(SyncClocks *sc, const CPUState *cpu) { if (!icount_align_option) { return; } + sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - - qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + sc->realtime_clock + cpu_get_clock_offset(); sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; + + /* Print every 2s max if the guest is late. We limit the number + of printed messages to NB_PRINT_MAX(currently 100) */ + print_delay(sc); } #else static void align_clocks(SyncClocks *sc, const CPUState *cpu) From 27498bef357de432a9aa403c5ccf11776773ba58 Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Fri, 25 Jul 2014 11:56:33 +0200 Subject: [PATCH 10/11] monitor: Add drift info to 'info jit' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Show in 'info jit' the current delay between the host clock and the guest clock. In addition, print the maximum advance and delay of the guest compared to the host. Signed-off-by: Sebastian Tanase Tested-by: Camille Bégué Signed-off-by: Paolo Bonzini --- cpu-exec.c | 6 ++++++ cpus.c | 19 +++++++++++++++++++ include/qemu-common.h | 4 ++++ monitor.c | 1 + 4 files changed, 30 insertions(+) diff --git a/cpu-exec.c b/cpu-exec.c index 3c14502329..cbc8067b37 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -105,6 +105,12 @@ static void init_delay_params(SyncClocks *sc, sc->realtime_clock + cpu_get_clock_offset(); sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; + if (sc->diff_clk < max_delay) { + max_delay = sc->diff_clk; + } + if (sc->diff_clk > max_advance) { + max_advance = sc->diff_clk; + } /* Print every 2s max if the guest is late. We limit the number of printed messages to NB_PRINT_MAX(currently 100) */ diff --git a/cpus.c b/cpus.c index 19245e99b9..2b5c0bd7c7 100644 --- a/cpus.c +++ b/cpus.c @@ -64,6 +64,8 @@ #endif /* CONFIG_LINUX */ static CPUState *next_cpu; +int64_t max_delay; +int64_t max_advance; bool cpu_is_stopped(CPUState *cpu) { @@ -1552,3 +1554,20 @@ void qmp_inject_nmi(Error **errp) error_set(errp, QERR_UNSUPPORTED); #endif } + +void dump_drift_info(FILE *f, fprintf_function cpu_fprintf) +{ + if (!use_icount) { + return; + } + + cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n", + (cpu_get_clock() - cpu_get_icount())/SCALE_MS); + if (icount_align_option) { + cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS); + cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS); + } else { + cpu_fprintf(f, "Max guest delay NA\n"); + cpu_fprintf(f, "Max guest advance NA\n"); + } +} diff --git a/include/qemu-common.h b/include/qemu-common.h index 5d10ac27a1..bcf7a6ad43 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -109,6 +109,10 @@ static inline char *realpath(const char *path, char *resolved_path) void configure_icount(QemuOpts *opts, Error **errp); extern int use_icount; extern int icount_align_option; +/* drift information for info jit command */ +extern int64_t max_delay; +extern int64_t max_advance; +void dump_drift_info(FILE *f, fprintf_function cpu_fprintf); #include "qemu/osdep.h" #include "qemu/bswap.h" diff --git a/monitor.c b/monitor.c index 5bc70a642d..cdbaa60f98 100644 --- a/monitor.c +++ b/monitor.c @@ -1047,6 +1047,7 @@ static void do_info_registers(Monitor *mon, const QDict *qdict) static void do_info_jit(Monitor *mon, const QDict *qdict) { dump_exec_info((FILE *)mon, monitor_fprintf); + dump_drift_info((FILE *)mon, monitor_fprintf); } static void do_info_history(Monitor *mon, const QDict *qdict) From eddedd546a68f6ac864b71d50dd8d39b939b724b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 28 Jul 2014 12:37:50 +0100 Subject: [PATCH 11/11] target-mips: Ignore unassigned accesses with KVM MIPS registers an unassigned access handler which raises a guest bus error exception. However this causes QEMU to crash when KVM is enabled as it isn't called from the main execution loop so longjmp() gets called without a corresponding setjmp(). Until the KVM API can be updated to trigger a guest exception in response to an MMIO exit, prevent the bus error exception being raised from mips_cpu_unassigned_access() if KVM is enabled. The check is at run time since the do_unassigned_access callback is initialised before it is known whether KVM will be enabled. The problem can be triggered with Malta emulation by making the guest write to the reset region at physical address 0x1bf00000, since it is marked read-only which is treated as unassigned for writes. Signed-off-by: James Hogan Reviewed-by: Aurelien Jarno Cc: Peter Maydell Cc: Paolo Bonzini Cc: Gleb Natapov Cc: Christoffer Dall Cc: Sanjay Lal Signed-off-by: Paolo Bonzini --- target-mips/op_helper.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c index 27651a4a00..df97b35f87 100644 --- a/target-mips/op_helper.c +++ b/target-mips/op_helper.c @@ -21,6 +21,7 @@ #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" +#include "sysemu/kvm.h" #ifndef CONFIG_USER_ONLY static inline void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global); @@ -2168,6 +2169,16 @@ void mips_cpu_unassigned_access(CPUState *cs, hwaddr addr, MIPSCPU *cpu = MIPS_CPU(cs); CPUMIPSState *env = &cpu->env; + /* + * Raising an exception with KVM enabled will crash because it won't be from + * the main execution loop so the longjmp won't have a matching setjmp. + * Until we can trigger a bus error exception through KVM lets just ignore + * the access. + */ + if (kvm_enabled()) { + return; + } + if (is_exec) { helper_raise_exception(env, EXCP_IBE); } else {