From b3c09bde4842f1b157bc4431135aad4f2031e312 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 9 Mar 2012 14:13:40 +0000 Subject: [PATCH 1/7] kvm: add flightrecorder script The kvm kernel module includes a number of trace events which can be useful when debugging system behavior. Even on production systems these trace events can be used to observe guest behavior and identify the source of problems. The kvm_flightrecorder script is a command-line wrapper for the /sys/kernel/debug/tracing interface. Kernel symbols do not need to be installed. This script captures a fixed-size buffer of KVM trace events. Recent events overwrite the oldest events when the buffer size is exceeded and it is possible to leave KVM tracing enabled for any period of time with just a fixed-size buffer. If the buffer is large enough this script is a useful tool for collecting detailed information after an issue occurs with a guest. Hence the name "flight recorder". The script can also be used in 'tail' mode to simply view KVM trace events as they occur. This is handy for development and to ensure that the guest is indeed running. Signed-off-by: Stefan Hajnoczi Signed-off-by: Marcelo Tosatti --- scripts/kvm/kvm_flightrecorder | 126 +++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100755 scripts/kvm/kvm_flightrecorder diff --git a/scripts/kvm/kvm_flightrecorder b/scripts/kvm/kvm_flightrecorder new file mode 100755 index 0000000000..7fb1c2d1a7 --- /dev/null +++ b/scripts/kvm/kvm_flightrecorder @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# +# KVM Flight Recorder - ring buffer tracing script +# +# Copyright (C) 2012 IBM Corp +# +# Author: Stefan Hajnoczi +# +# This script provides a command-line interface to kvm ftrace and is designed +# to be used as a flight recorder that is always running. To start in-memory +# recording: +# +# sudo kvm_flightrecorder start 8192 # 8 MB per-cpu ring buffers +# +# The per-cpu ring buffer size can be given in KB as an optional argument to +# the 'start' subcommand. +# +# To stop the flight recorder: +# +# sudo kvm_flightrecorder stop +# +# To dump the contents of the flight recorder (this can be done when the +# recorder is stopped or while it is running): +# +# sudo kvm_flightrecorder dump >/path/to/dump.txt +# +# To observe the trace while it is running, use the 'tail' subcommand: +# +# sudo kvm_flightrecorder tail +# +# Note that the flight recorder may impact overall system performance by +# consuming CPU cycles. No disk I/O is performed since the ring buffer holds a +# fixed-size in-memory trace. + +import sys +import os + +tracing_dir = '/sys/kernel/debug/tracing' + +def trace_path(*args): + return os.path.join(tracing_dir, *args) + +def write_file(path, data): + open(path, 'wb').write(data) + +def enable_event(subsystem, event, enable): + write_file(trace_path('events', subsystem, event, 'enable'), '1' if enable else '0') + +def enable_subsystem(subsystem, enable): + write_file(trace_path('events', subsystem, 'enable'), '1' if enable else '0') + +def start_tracing(): + enable_subsystem('kvm', True) + write_file(trace_path('tracing_on'), '1') + +def stop_tracing(): + write_file(trace_path('tracing_on'), '0') + enable_subsystem('kvm', False) + write_file(trace_path('events', 'enable'), '0') + write_file(trace_path('current_tracer'), 'nop') + +def dump_trace(): + tracefile = open(trace_path('trace'), 'r') + try: + lines = True + while lines: + lines = tracefile.readlines(64 * 1024) + sys.stdout.writelines(lines) + except KeyboardInterrupt: + pass + +def tail_trace(): + try: + for line in open(trace_path('trace_pipe'), 'r'): + sys.stdout.write(line) + except KeyboardInterrupt: + pass + +def usage(): + print 'Usage: %s start [buffer_size_kb] | stop | dump | tail' % sys.argv[0] + print 'Control the KVM flight recorder tracing.' + sys.exit(0) + +def main(): + if len(sys.argv) < 2: + usage() + + cmd = sys.argv[1] + if cmd == '--version': + print 'kvm_flightrecorder version 1.0' + sys.exit(0) + + if not os.path.isdir(tracing_dir): + print 'Unable to tracing debugfs directory, try:' + print 'mount -t debugfs none /sys/kernel/debug' + sys.exit(1) + if not os.access(tracing_dir, os.W_OK): + print 'Unable to write to tracing debugfs directory, please run as root' + sys.exit(1) + + if cmd == 'start': + stop_tracing() # clean up first + + if len(sys.argv) == 3: + try: + buffer_size_kb = int(sys.argv[2]) + except ValueError: + print 'Invalid per-cpu trace buffer size in KB' + sys.exit(1) + write_file(trace_path('buffer_size_kb'), str(buffer_size_kb)) + print 'Per-CPU ring buffer size set to %d KB' % buffer_size_kb + + start_tracing() + print 'KVM flight recorder enabled' + elif cmd == 'stop': + stop_tracing() + print 'KVM flight recorder disabled' + elif cmd == 'dump': + dump_trace() + elif cmd == 'tail': + tail_trace() + else: + usage() + +if __name__ == '__main__': + sys.exit(main()) From 56b9ead234439317629054fa370c547c652ab09a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 21 Mar 2012 13:36:22 +0100 Subject: [PATCH 2/7] kvm: Drop redundant kvm_enabled from cpu_thread_is_idle This is now implied by kvm_irqchip_in_kernel. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- cpus.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index eb22bd5df1..b182b3d7d9 100644 --- a/cpus.c +++ b/cpus.c @@ -441,8 +441,7 @@ static bool cpu_thread_is_idle(CPUArchState *env) if (env->stopped || !runstate_is_running()) { return true; } - if (!env->halted || qemu_cpu_has_work(env) || - (kvm_enabled() && kvm_irqchip_in_kernel())) { + if (!env->halted || qemu_cpu_has_work(env) || kvm_irqchip_in_kernel()) { return false; } return true; From 4b8f1c88e9de2ded754b12a967a93d395bed1245 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 20 Mar 2012 14:31:38 +0200 Subject: [PATCH 3/7] kvm: allow arbitrarily sized mmio ioeventfd We use a 2 byte ioeventfd for virtio memory, add support for this. Signed-off-by: Michael S. Tsirkin Reviewed-by: Amos Kong Signed-off-by: Avi Kivity --- hw/ivshmem.c | 8 ++++---- kvm-all.c | 15 ++++++++------- kvm-stub.c | 2 +- kvm.h | 3 ++- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/hw/ivshmem.c b/hw/ivshmem.c index b80aa8f1de..df4f50a092 100644 --- a/hw/ivshmem.c +++ b/hw/ivshmem.c @@ -354,8 +354,8 @@ static void close_guest_eventfds(IVShmemState *s, int posn) guest_curr_max = s->peers[posn].nb_eventfds; for (i = 0; i < guest_curr_max; i++) { - kvm_set_ioeventfd_mmio_long(s->peers[posn].eventfds[i], - s->mmio_addr + DOORBELL, (posn << 16) | i, 0); + kvm_set_ioeventfd_mmio(s->peers[posn].eventfds[i], + s->mmio_addr + DOORBELL, (posn << 16) | i, 0, 4); close(s->peers[posn].eventfds[i]); } @@ -500,8 +500,8 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags) } if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { - if (kvm_set_ioeventfd_mmio_long(incoming_fd, s->mmio_addr + DOORBELL, - (incoming_posn << 16) | guest_max_eventfd, 1) < 0) { + if (kvm_set_ioeventfd_mmio(incoming_fd, s->mmio_addr + DOORBELL, + (incoming_posn << 16) | guest_max_eventfd, 1, 4) < 0) { fprintf(stderr, "ivshmem: ioeventfd not available\n"); } } diff --git a/kvm-all.c b/kvm-all.c index ba2cee10f2..f1cb69fbce 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -747,10 +747,10 @@ static void kvm_mem_ioeventfd_add(MemoryRegionSection *section, { int r; - assert(match_data && section->size == 4); + assert(match_data && section->size <= 8); - r = kvm_set_ioeventfd_mmio_long(fd, section->offset_within_address_space, - data, true); + r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space, + data, true, section->size); if (r < 0) { abort(); } @@ -761,8 +761,8 @@ static void kvm_mem_ioeventfd_del(MemoryRegionSection *section, { int r; - r = kvm_set_ioeventfd_mmio_long(fd, section->offset_within_address_space, - data, false); + r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space, + data, false, section->size); if (r < 0) { abort(); } @@ -1642,14 +1642,15 @@ int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset) return r; } -int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign) +int kvm_set_ioeventfd_mmio(int fd, uint32_t addr, uint32_t val, bool assign, + uint32_t size) { int ret; struct kvm_ioeventfd iofd; iofd.datamatch = val; iofd.addr = addr; - iofd.len = 4; + iofd.len = size; iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH; iofd.fd = fd; diff --git a/kvm-stub.c b/kvm-stub.c index 69a1228756..0d426db44e 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -120,7 +120,7 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) return -ENOSYS; } -int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign) +int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, uint32_t len) { return -ENOSYS; } diff --git a/kvm.h b/kvm.h index 330f17b1db..9bdbdb020e 100644 --- a/kvm.h +++ b/kvm.h @@ -210,7 +210,8 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, #endif #endif -int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign); +int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, + uint32_t size); int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); #endif From c73b00973b13a4a1e823ce935bcfe264c758b40b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 22 Mar 2012 00:00:48 +0100 Subject: [PATCH 4/7] kvm: Drop unused kvm_pit_in_kernel This is now implied by kvm_irqchip_in_kernel. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 6 ------ kvm-stub.c | 6 ------ kvm.h | 2 -- 3 files changed, 14 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index f1cb69fbce..5ff954abd7 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -75,7 +75,6 @@ struct KVMState #ifdef KVM_CAP_SET_GUEST_DEBUG struct kvm_sw_breakpoint_head kvm_sw_breakpoints; #endif - int pit_in_kernel; int pit_state2; int xsave, xcrs; int many_ioeventfds; @@ -198,11 +197,6 @@ static void kvm_reset_vcpu(void *opaque) kvm_arch_reset_vcpu(env); } -int kvm_pit_in_kernel(void) -{ - return kvm_state->pit_in_kernel; -} - int kvm_init_vcpu(CPUArchState *env) { KVMState *s = kvm_state; diff --git a/kvm-stub.c b/kvm-stub.c index 0d426db44e..47c573d6f3 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -16,12 +16,6 @@ #include "gdbstub.h" #include "kvm.h" -int kvm_pit_in_kernel(void) -{ - return 0; -} - - int kvm_init_vcpu(CPUArchState *env) { return -ENOSYS; diff --git a/kvm.h b/kvm.h index 9bdbdb020e..4ccae8c0c8 100644 --- a/kvm.h +++ b/kvm.h @@ -83,8 +83,6 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap); int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset); #endif -int kvm_pit_in_kernel(void); - int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr); int kvm_on_sigbus(int code, void *addr); From bc8c6788b60cbbe56700568bcb645e139ab29b7d Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 28 Mar 2012 14:18:05 -0400 Subject: [PATCH 5/7] kvm: set gsi_bits and max_gsi correctly The current kvm_init_irq_routing() doesn't set up the used_gsi_bitmap correctly, and as a consequence pins max_gsi to 32 when it really should be 1024. I ran into this limitation while testing pci passthrough, where I consistently got an -ENOSPC return from kvm_get_irq_route_gsi() called from assigned_dev_update_msix_mmio(). Signed-off-by: Jason Baron Acked-by: Alex Williamson Acked-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index 5ff954abd7..b8e9dc69de 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -871,7 +871,7 @@ static void kvm_init_irq_routing(KVMState *s) unsigned int gsi_bits, i; /* Round up so we can search ints using ffs */ - gsi_bits = (gsi_count + 31) / 32; + gsi_bits = ALIGN(gsi_count, 32); s->used_gsi_bitmap = g_malloc0(gsi_bits / 8); s->max_gsi = gsi_bits; From 9ab2195dcb2e7d202287b6db7cfaa4f9e01941f6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 12 Apr 2012 00:43:27 -0300 Subject: [PATCH 6/7] kvm: update linux headers Signed-off-by: Marcelo Tosatti --- linux-headers/asm-powerpc/kvm.h | 1 + linux-headers/asm-s390/kvm.h | 2 ++ linux-headers/linux/kvm.h | 9 +++++++++ 3 files changed, 12 insertions(+) diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index b921c3f489..1bea4d8ea6 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -277,6 +277,7 @@ struct kvm_sync_regs { #define KVM_CPU_E500V2 2 #define KVM_CPU_3S_32 3 #define KVM_CPU_3S_64 4 +#define KVM_CPU_E500MC 5 /* for KVM_CAP_SPAPR_TCE */ struct kvm_create_spapr_tce { diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h index 9acbde4af2..96076676e2 100644 --- a/linux-headers/asm-s390/kvm.h +++ b/linux-headers/asm-s390/kvm.h @@ -44,10 +44,12 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_PREFIX (1UL << 0) #define KVM_SYNC_GPRS (1UL << 1) #define KVM_SYNC_ACRS (1UL << 2) +#define KVM_SYNC_CRS (1UL << 3) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ __u64 gprs[16]; /* general purpose registers */ __u32 acrs[16]; /* access registers */ + __u64 crs[16]; /* control registers */ }; #endif diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index f6b53432db..ee7bd9cc32 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -588,6 +588,8 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_TSC_DEADLINE_TIMER 72 #define KVM_CAP_S390_UCONTROL 73 #define KVM_CAP_SYNC_REGS 74 +#define KVM_CAP_PCI_2_3 75 +#define KVM_CAP_KVMCLOCK_CTRL 76 #ifdef KVM_CAP_IRQ_ROUTING @@ -784,6 +786,9 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_TSC_CONTROL */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) +/* Available with KVM_CAP_PCI_2_3 */ +#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ + struct kvm_assigned_pci_dev) /* * ioctls for vcpu fds @@ -855,8 +860,12 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_ONE_REG */ #define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) #define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) +/* VM is being stopped by host */ +#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) struct kvm_assigned_pci_dev { __u32 assigned_dev_id; From f349c12c0434e29c79ecde89029320c4002f7253 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Sat, 7 Apr 2012 06:17:47 +0530 Subject: [PATCH 7/7] kvmclock: guest stop notification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Often when a guest is stopped from the qemu console, it will report spurious soft lockup warnings on resume. There are kernel patches being discussed that will give the host the ability to tell the guest that it is being stopped and should ignore the soft lockup warning that generates. This patch uses the qemu Notifier system to tell the guest it is about to be stopped. Signed-off-by: Eric B Munson Signed-off-by: Raghavendra K T Reviewed-by: Andreas Färber Signed-off-by: Marcelo Tosatti --- hw/kvm/clock.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c index 446bd62176..824b978397 100644 --- a/hw/kvm/clock.c +++ b/hw/kvm/clock.c @@ -65,9 +65,25 @@ static void kvmclock_vm_state_change(void *opaque, int running, RunState state) { KVMClockState *s = opaque; + CPUArchState *penv = first_cpu; + int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL); + int ret; if (running) { s->clock_valid = false; + + if (!cap_clock_ctrl) { + return; + } + for (penv = first_cpu; penv != NULL; penv = penv->next_cpu) { + ret = kvm_vcpu_ioctl(penv, KVM_KVMCLOCK_CTRL, 0); + if (ret) { + if (ret != -EINVAL) { + fprintf(stderr, "%s: %s\n", __func__, strerror(-ret)); + } + return; + } + } } }