From 276ce81563aeadea5493fbc4fbc84647d5bfbdeb Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 10 Dec 2010 15:42:53 +0800 Subject: [PATCH 01/31] kvm: Enable user space NMI injection for kvm guest Make use of the new KVM_NMI IOCTL to send NMIs into the KVM guest if the user space raised them. (example: qemu monitor's "nmi" command) Signed-off-by: Lai Jiangshan Acked-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- configure | 3 +++ target-i386/kvm.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/configure b/configure index 210670c16f..9a02d1f730 100755 --- a/configure +++ b/configure @@ -1671,6 +1671,9 @@ if test "$kvm" != "no" ; then #if !defined(KVM_CAP_DESTROY_MEMORY_REGION_WORKS) #error Missing KVM capability KVM_CAP_DESTROY_MEMORY_REGION_WORKS #endif +#if !defined(KVM_CAP_USER_NMI) +#error Missing KVM capability KVM_CAP_USER_NMI +#endif int main(void) { return 0; } EOF if test "$kerneldir" != "" ; then diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7dfc357e42..755f8c9089 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1417,6 +1417,13 @@ int kvm_arch_get_registers(CPUState *env) int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) { + /* Inject NMI */ + if (env->interrupt_request & CPU_INTERRUPT_NMI) { + env->interrupt_request &= ~CPU_INTERRUPT_NMI; + DPRINTF("injected NMI\n"); + kvm_vcpu_ioctl(env, KVM_NMI); + } + /* Try to inject an interrupt if the guest can accept it */ if (run->ready_for_interrupt_injection && (env->interrupt_request & CPU_INTERRUPT_HARD) && From 14a095184c6068c78e67f17c200cf00b4170f704 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 10 Dec 2010 15:52:36 +0800 Subject: [PATCH 02/31] kvm: convert kvm_ioctl(KVM_CHECK_EXTENSION) to kvm_check_extension() simple cleanup and use existing helper: kvm_check_extension(). Signed-off-by: Lai Jiangshan Signed-off-by: Marcelo Tosatti --- kvm-all.c | 2 +- target-i386/kvm.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 255b6fad9c..935c4362e6 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -717,7 +717,7 @@ int kvm_init(int smp_cpus) s->broken_set_mem_region = 1; #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS - ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS); + ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS); if (ret > 0) { s->broken_set_mem_region = 0; } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 755f8c9089..4004de76dd 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -187,7 +187,7 @@ static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, { int r; - r = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_MCE); + r = kvm_check_extension(s, KVM_CAP_MCE); if (r > 0) { *max_banks = r; return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); @@ -540,7 +540,7 @@ int kvm_arch_init(KVMState *s, int smp_cpus) * versions of KVM just assumed that it would be at the end of physical * memory but that doesn't work with more than 4GB of memory. We simply * refuse to work with those older versions of KVM. */ - ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); + ret = kvm_check_extension(s, KVM_CAP_SET_TSS_ADDR); if (ret <= 0) { fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n"); return ret; From b3cd24e04a2aea342429c09ed93468dd3206fede Mon Sep 17 00:00:00 2001 From: Jin Dongming Date: Fri, 10 Dec 2010 17:20:44 +0900 Subject: [PATCH 03/31] Clean up cpu_inject_x86_mce() Clean up cpu_inject_x86_mce() for later patch. Signed-off-by: Jin Dongming Signed-off-by: Marcelo Tosatti --- target-i386/helper.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/target-i386/helper.c b/target-i386/helper.c index 25a3e36138..2c94130a15 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1021,21 +1021,12 @@ static void breakpoint_handler(CPUState *env) /* This should come from sysemu.h - if we could include it here... */ void qemu_system_reset_request(void); -void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, +static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc) { uint64_t mcg_cap = cenv->mcg_cap; - unsigned bank_num = mcg_cap & 0xff; uint64_t *banks = cenv->mce_banks; - if (bank >= bank_num || !(status & MCI_STATUS_VAL)) - return; - - if (kvm_enabled()) { - kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0); - return; - } - /* * if MSR_MCG_CTL is not all 1s, the uncorrected error * reporting is disabled @@ -1076,6 +1067,22 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, } else banks[1] |= MCI_STATUS_OVER; } + +void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, + uint64_t mcg_status, uint64_t addr, uint64_t misc) +{ + unsigned bank_num = cenv->mcg_cap & 0xff; + + if (bank >= bank_num || !(status & MCI_STATUS_VAL)) { + return; + } + + if (kvm_enabled()) { + kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0); + } else { + qemu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc); + } +} #endif /* !CONFIG_USER_ONLY */ static void mce_init(CPUX86State *cenv) From 31ce5e0c49821d92fb30cce2f3055ef33613b287 Mon Sep 17 00:00:00 2001 From: Jin Dongming Date: Fri, 10 Dec 2010 17:21:02 +0900 Subject: [PATCH 04/31] Add "broadcast" option for mce command When the following test case is injected with mce command, maybe user could not get the expected result. DATA command cpu bank status mcg_status addr misc (qemu) mce 1 1 0xbd00000000000000 0x05 0x1234 0x8c Expected Result panic type: "Fatal Machine check" That is because each mce command can only inject the given cpu and could not inject mce interrupt to other cpus. So user will get the following result: panic type: "Fatal machine check on current CPU" "broadcast" option is used for injecting dummy data into other cpus. Injecting mce with this option the expected result could be gotten. Usage: Broadcast[on] command broadcast cpu bank status mcg_status addr misc (qemu) mce -b 1 1 0xbd00000000000000 0x05 0x1234 0x8c Broadcast[off] command cpu bank status mcg_status addr misc (qemu) mce 1 1 0xbd00000000000000 0x05 0x1234 0x8c Signed-off-by: Jin Dongming Signed-off-by: Marcelo Tosatti --- cpu-all.h | 3 ++- hmp-commands.hx | 6 +++--- monitor.c | 7 +++++-- target-i386/helper.c | 20 ++++++++++++++++++-- target-i386/kvm.c | 16 ++++++++++++---- target-i386/kvm_x86.h | 5 ++++- 6 files changed, 44 insertions(+), 13 deletions(-) diff --git a/cpu-all.h b/cpu-all.h index 30ae17d41d..4ce4e83032 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -964,6 +964,7 @@ int cpu_memory_rw_debug(CPUState *env, target_ulong addr, uint8_t *buf, int len, int is_write); void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, - uint64_t mcg_status, uint64_t addr, uint64_t misc); + uint64_t mcg_status, uint64_t addr, uint64_t misc, + int broadcast); #endif /* CPU_ALL_H */ diff --git a/hmp-commands.hx b/hmp-commands.hx index 1cea572b15..d65a41f523 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1116,9 +1116,9 @@ ETEXI { .name = "mce", - .args_type = "cpu_index:i,bank:i,status:l,mcg_status:l,addr:l,misc:l", - .params = "cpu bank status mcgstatus addr misc", - .help = "inject a MCE on the given CPU", + .args_type = "broadcast:-b,cpu_index:i,bank:i,status:l,mcg_status:l,addr:l,misc:l", + .params = "[-b] cpu bank status mcgstatus addr misc", + .help = "inject a MCE on the given CPU [and broadcast to other CPUs with -b option]", .mhandler.cmd = do_inject_mce, }, diff --git a/monitor.c b/monitor.c index d291158c2f..396d5cdda3 100644 --- a/monitor.c +++ b/monitor.c @@ -2671,12 +2671,15 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict) uint64_t mcg_status = qdict_get_int(qdict, "mcg_status"); uint64_t addr = qdict_get_int(qdict, "addr"); uint64_t misc = qdict_get_int(qdict, "misc"); + int broadcast = qdict_get_try_bool(qdict, "broadcast", 0); - for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) + for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) { if (cenv->cpu_index == cpu_index && cenv->mcg_cap) { - cpu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc); + cpu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, + broadcast); break; } + } } #endif diff --git a/target-i386/helper.c b/target-i386/helper.c index 2c94130a15..2cfb4a42a7 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1069,18 +1069,34 @@ static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, } void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, - uint64_t mcg_status, uint64_t addr, uint64_t misc) + uint64_t mcg_status, uint64_t addr, uint64_t misc, + int broadcast) { unsigned bank_num = cenv->mcg_cap & 0xff; + CPUState *env; + int flag = 0; if (bank >= bank_num || !(status & MCI_STATUS_VAL)) { return; } if (kvm_enabled()) { - kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0); + if (broadcast) { + flag |= MCE_BROADCAST; + } + + kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, flag); } else { qemu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc); + if (broadcast) { + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (cenv == env) { + continue; + } + + qemu_inject_x86_mce(env, 1, 0xa000000000000000, 0, 0, 0); + } + } } } #endif /* !CONFIG_USER_ONLY */ diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 4004de76dd..8b868ad40a 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -264,11 +264,13 @@ static void kvm_do_inject_x86_mce(void *_data) } } } + +static void kvm_mce_broadcast_rest(CPUState *env); #endif void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, - int abort_on_error) + int flag) { #ifdef KVM_CAP_MCE struct kvm_x86_mce mce = { @@ -288,10 +290,15 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, return; } + if (flag & MCE_BROADCAST) { + kvm_mce_broadcast_rest(cenv); + } + run_on_cpu(cenv, kvm_do_inject_x86_mce, &data); #else - if (abort_on_error) + if (flag & ABORT_ON_ERROR) { abort(); + } #endif } @@ -1716,7 +1723,8 @@ static void kvm_mce_broadcast_rest(CPUState *env) continue; } kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1); + MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, + ABORT_ON_ERROR); } } } @@ -1816,7 +1824,7 @@ int kvm_on_sigbus(int code, void *addr) | 0xc0; kvm_inject_x86_mce(first_cpu, 9, status, MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr, - (MCM_ADDR_PHYS << 6) | 0xc, 1); + (MCM_ADDR_PHYS << 6) | 0xc, ABORT_ON_ERROR); kvm_mce_broadcast_rest(first_cpu); } else #endif diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h index 04932cf4c8..9d7b584267 100644 --- a/target-i386/kvm_x86.h +++ b/target-i386/kvm_x86.h @@ -15,8 +15,11 @@ #ifndef __KVM_X86_H__ #define __KVM_X86_H__ +#define ABORT_ON_ERROR 0x01 +#define MCE_BROADCAST 0x02 + void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, - int abort_on_error); + int flag); #endif From 2bd3e04c3b3c76d573435a299a4d85bad0021a90 Mon Sep 17 00:00:00 2001 From: Jin Dongming Date: Fri, 10 Dec 2010 17:21:14 +0900 Subject: [PATCH 05/31] Add function for checking mca broadcast of CPU Add function for checking whether current CPU support mca broadcast. Signed-off-by: Jin Dongming Signed-off-by: Marcelo Tosatti --- target-i386/cpu.h | 1 + target-i386/helper.c | 33 +++++++++++++++++++++++++++++++++ target-i386/kvm.c | 6 +----- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index f0c07cde3b..dddcd749cc 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -760,6 +760,7 @@ int cpu_x86_exec(CPUX86State *s); void cpu_x86_close(CPUX86State *s); void x86_cpu_list (FILE *f, fprintf_function cpu_fprintf, const char *optarg); void x86_cpudef_setup(void); +int cpu_x86_support_mca_broadcast(CPUState *env); int cpu_get_pic_interrupt(CPUX86State *s); /* MSDOS compatibility mode FPU exception support */ diff --git a/target-i386/helper.c b/target-i386/helper.c index 2cfb4a42a7..6dfa27d51b 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -110,6 +110,32 @@ void cpu_x86_close(CPUX86State *env) qemu_free(env); } +static void cpu_x86_version(CPUState *env, int *family, int *model) +{ + int cpuver = env->cpuid_version; + + if (family == NULL || model == NULL) { + return; + } + + *family = (cpuver >> 8) & 0x0f; + *model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0x0f); +} + +/* Broadcast MCA signal for processor version 06H_EH and above */ +int cpu_x86_support_mca_broadcast(CPUState *env) +{ + int family = 0; + int model = 0; + + cpu_x86_version(env, &family, &model); + if ((family == 6 && model >= 14) || family > 6) { + return 1; + } + + return 0; +} + /***********************************************************/ /* x86 debug */ @@ -1080,6 +1106,13 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, return; } + if (broadcast) { + if (!cpu_x86_support_mca_broadcast(cenv)) { + fprintf(stderr, "Current CPU does not support broadcast\n"); + return; + } + } + if (kvm_enabled()) { if (broadcast) { flag |= MCE_BROADCAST; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 8b868ad40a..2115a585ef 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1711,13 +1711,9 @@ static void hardware_memory_error(void) static void kvm_mce_broadcast_rest(CPUState *env) { CPUState *cenv; - int family, model, cpuver = env->cpuid_version; - - family = (cpuver >> 8) & 0xf; - model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0xf); /* Broadcast MCA signal for processor version 06H_EH and above */ - if ((family == 6 && model >= 14) || family > 6) { + if (cpu_x86_support_mca_broadcast(env)) { for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) { if (cenv == env) { continue; From 6643e2f001f207bdb85646a4c48d1e13244d87c3 Mon Sep 17 00:00:00 2001 From: Jin Dongming Date: Wed, 22 Dec 2010 12:24:22 +0900 Subject: [PATCH 06/31] kvm: introduce kvm_mce_in_progress Share same error handing, and rename this function after MCIP (Machine Check In Progress) flag. Signed-off-by: Hidetoshi Seto Signed-off-by: Jin Dongming Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 2115a585ef..5a699fcaec 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -219,7 +219,7 @@ static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n) } /* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */ -static int kvm_mce_in_exception(CPUState *env) +static int kvm_mce_in_progress(CPUState *env) { struct kvm_msr_entry msr_mcg_status = { .index = MSR_MCG_STATUS, @@ -228,7 +228,8 @@ static int kvm_mce_in_exception(CPUState *env) r = kvm_get_msr(env, &msr_mcg_status, 1); if (r == -1 || r == 0) { - return -1; + fprintf(stderr, "Failed to get MCE status\n"); + return 0; } return !!(msr_mcg_status.data & MCG_STATUS_MCIP); } @@ -248,10 +249,7 @@ static void kvm_do_inject_x86_mce(void *_data) /* If there is an MCE exception being processed, ignore this SRAO MCE */ if ((data->env->mcg_cap & MCG_SER_P) && !(data->mce->status & MCI_STATUS_AR)) { - r = kvm_mce_in_exception(data->env); - if (r == -1) { - fprintf(stderr, "Failed to get MCE status\n"); - } else if (r) { + if (kvm_mce_in_progress(data->env)) { return; } } @@ -1752,10 +1750,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) * If there is an MCE excpetion being processed, ignore * this SRAO MCE */ - r = kvm_mce_in_exception(env); - if (r == -1) { - fprintf(stderr, "Failed to get MCE status\n"); - } else if (r) { + if (kvm_mce_in_progress(env)) { return 0; } /* Fake an Intel architectural Memory scrubbing UCR */ From e387c33892be35ca70255739a2fe118f76c95ac3 Mon Sep 17 00:00:00 2001 From: Jin Dongming Date: Wed, 22 Dec 2010 12:24:38 +0900 Subject: [PATCH 07/31] kvm: kvm_mce_inj_* subroutines for templated error injections Refactor codes for maintainability. Signed-off-by: Hidetoshi Seto Signed-off-by: Jin Dongming Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 111 +++++++++++++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 40 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 5a699fcaec..ce01e182c6 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1722,44 +1722,75 @@ static void kvm_mce_broadcast_rest(CPUState *env) } } } + +static void kvm_mce_inj_srar_dataload(CPUState *env, target_phys_addr_t paddr) +{ + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | MCI_STATUS_AR | 0x134, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + int r; + + r = kvm_set_mce(env, &mce); + if (r < 0) { + fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); + abort(); + } + kvm_mce_broadcast_rest(env); +} + +static void kvm_mce_inj_srao_memscrub(CPUState *env, target_phys_addr_t paddr) +{ + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + int r; + + r = kvm_set_mce(env, &mce); + if (r < 0) { + fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); + abort(); + } + kvm_mce_broadcast_rest(env); +} + +static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr) +{ + uint64_t status; + + status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0; + kvm_inject_x86_mce(env, 9, status, + MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr, + (MCM_ADDR_PHYS << 6) | 0xc, ABORT_ON_ERROR); + + kvm_mce_broadcast_rest(env); +} + #endif int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) { #if defined(KVM_CAP_MCE) - struct kvm_x86_mce mce = { - .bank = 9, - }; void *vaddr; ram_addr_t ram_addr; target_phys_addr_t paddr; - int r; if ((env->mcg_cap & MCG_SER_P) && addr && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) { - if (code == BUS_MCEERR_AR) { - /* Fake an Intel architectural Data Load SRAR UCR */ - mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | MCI_STATUS_AR | 0x134; - mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; - mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV; - } else { - /* - * If there is an MCE excpetion being processed, ignore - * this SRAO MCE - */ - if (kvm_mce_in_progress(env)) { - return 0; - } - /* Fake an Intel architectural Memory scrubbing UCR */ - mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0; - mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; - mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; - } vaddr = (void *)addr; if (qemu_ram_addr_from_host(vaddr, &ram_addr) || !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) { @@ -1772,13 +1803,20 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) hardware_memory_error(); } } - mce.addr = paddr; - r = kvm_set_mce(env, &mce); - if (r < 0) { - fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); - abort(); + + if (code == BUS_MCEERR_AR) { + /* Fake an Intel architectural Data Load SRAR UCR */ + kvm_mce_inj_srar_dataload(env, paddr); + } else { + /* + * If there is an MCE excpetion being processed, ignore + * this SRAO MCE + */ + if (!kvm_mce_in_progress(env)) { + /* Fake an Intel architectural Memory scrubbing UCR */ + kvm_mce_inj_srao_memscrub(env, paddr); + } } - kvm_mce_broadcast_rest(env); } else #endif { @@ -1797,7 +1835,6 @@ int kvm_on_sigbus(int code, void *addr) { #if defined(KVM_CAP_MCE) if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { - uint64_t status; void *vaddr; ram_addr_t ram_addr; target_phys_addr_t paddr; @@ -1810,13 +1847,7 @@ int kvm_on_sigbus(int code, void *addr) "QEMU itself instead of guest system!: %p\n", addr); return 0; } - status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0; - kvm_inject_x86_mce(first_cpu, 9, status, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr, - (MCM_ADDR_PHYS << 6) | 0xc, ABORT_ON_ERROR); - kvm_mce_broadcast_rest(first_cpu); + kvm_mce_inj_srao_memscrub2(first_cpu, paddr); } else #endif { From 7cc2cc3e2608b182f1e0fc7ecae6e3b1fa4f46e0 Mon Sep 17 00:00:00 2001 From: Jin Dongming Date: Wed, 22 Dec 2010 12:24:51 +0900 Subject: [PATCH 08/31] kvm: introduce kvm_inject_x86_mce_on Pass a table instead of multiple args. Note: kvm_inject_x86_mce(env, bank, status, mcg_status, addr, misc, abort_on_error); is equal to: struct kvm_x86_mce mce = { .bank = bank, .status = status, .mcg_status = mcg_status, .addr = addr, .misc = misc, }; kvm_inject_x86_mce_on(env, &mce, abort_on_error); Signed-off-by: Hidetoshi Seto Signed-off-by: Jin Dongming Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 57 ++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index ce01e182c6..9a4bf98831 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -263,6 +263,23 @@ static void kvm_do_inject_x86_mce(void *_data) } } +static void kvm_inject_x86_mce_on(CPUState *env, struct kvm_x86_mce *mce, + int flag) +{ + struct kvm_x86_mce_data data = { + .env = env, + .mce = mce, + .abort_on_error = (flag & ABORT_ON_ERROR), + }; + + if (!env->mcg_cap) { + fprintf(stderr, "MCE support is not enabled!\n"); + return; + } + + run_on_cpu(env, kvm_do_inject_x86_mce, &data); +} + static void kvm_mce_broadcast_rest(CPUState *env); #endif @@ -278,21 +295,12 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, .addr = addr, .misc = misc, }; - struct kvm_x86_mce_data data = { - .env = cenv, - .mce = &mce, - }; - - if (!cenv->mcg_cap) { - fprintf(stderr, "MCE support is not enabled!\n"); - return; - } if (flag & MCE_BROADCAST) { kvm_mce_broadcast_rest(cenv); } - run_on_cpu(cenv, kvm_do_inject_x86_mce, &data); + kvm_inject_x86_mce_on(cenv, &mce, flag); #else if (flag & ABORT_ON_ERROR) { abort(); @@ -1708,6 +1716,13 @@ static void hardware_memory_error(void) #ifdef KVM_CAP_MCE static void kvm_mce_broadcast_rest(CPUState *env) { + struct kvm_x86_mce mce = { + .bank = 1, + .status = MCI_STATUS_VAL | MCI_STATUS_UC, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = 0, + .misc = 0, + }; CPUState *cenv; /* Broadcast MCA signal for processor version 06H_EH and above */ @@ -1716,9 +1731,7 @@ static void kvm_mce_broadcast_rest(CPUState *env) if (cenv == env) { continue; } - kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, - ABORT_ON_ERROR); + kvm_inject_x86_mce_on(cenv, &mce, ABORT_ON_ERROR); } } } @@ -1767,15 +1780,17 @@ static void kvm_mce_inj_srao_memscrub(CPUState *env, target_phys_addr_t paddr) static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr) { - uint64_t status; - - status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0; - kvm_inject_x86_mce(env, 9, status, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr, - (MCM_ADDR_PHYS << 6) | 0xc, ABORT_ON_ERROR); + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + kvm_inject_x86_mce_on(env, &mce, ABORT_ON_ERROR); kvm_mce_broadcast_rest(env); } From acaa75507b34f7b588924a09c76c6848d209e08c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Dec 2010 15:56:44 +0100 Subject: [PATCH 09/31] kvm: x86: Fix DPL write back of segment registers The DPL is stored in the flags and not in the selector. In fact, the RPL may differ from the DPL at some point in time, and so we were corrupting the guest state so far. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- target-i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 9a4bf98831..ee7bdf8012 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -602,7 +602,7 @@ static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) lhs->limit = rhs->limit; lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; lhs->present = (flags & DESC_P_MASK) != 0; - lhs->dpl = rhs->selector & 3; + lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3; lhs->db = (flags >> DESC_B_SHIFT) & 1; lhs->s = (flags & DESC_S_MASK) != 0; lhs->l = (flags >> DESC_L_SHIFT) & 1; From 3980e3024b2d1677a9910de5e35d22d5d8392522 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Dec 2010 15:57:35 +0100 Subject: [PATCH 10/31] kvm: x86: Remove obsolete SS.RPL/DPL aligment This seems to date back to the days KVM didn't support real mode. The check is no longer needed and, even worse, is corrupting the guest state in case SS.RPL != DPL. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- target-i386/kvm.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index ee7bdf8012..7e5982b132 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -780,13 +780,6 @@ static int kvm_put_sregs(CPUState *env) set_seg(&sregs.fs, &env->segs[R_FS]); set_seg(&sregs.gs, &env->segs[R_GS]); set_seg(&sregs.ss, &env->segs[R_SS]); - - if (env->cr[0] & CR0_PE_MASK) { - /* force ss cpl to cs cpl */ - sregs.ss.selector = (sregs.ss.selector & ~3) | - (sregs.cs.selector & 3); - sregs.ss.dpl = sregs.ss.selector & 3; - } } set_seg(&sregs.tr, &env->tr); From 95c077c91900c1420cd4f0be996ffeea6fb6cec8 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Dec 2010 15:58:23 +0100 Subject: [PATCH 11/31] kvm: x86: Prevent sign extension of DR7 in guest debugging mode This unbreaks guest debugging when the 4th hardware breakpoint used for guest debugging is a watchpoint of 4 or 8 byte lenght. The 31st bit of DR7 is set in that case and used to cause a sign extension to the high word which was breaking the guest state (vm entry failure). Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- target-i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7e5982b132..85edaccad1 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1686,7 +1686,7 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) dbg->arch.debugreg[n] = hw_breakpoint[n].addr; dbg->arch.debugreg[7] |= (2 << (n * 2)) | (type_code[hw_breakpoint[n].type] << (16 + n*4)) | - (len_code[hw_breakpoint[n].len] << (18 + n*4)); + ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4)); } } /* Legal xcr0 for loading */ From b9bec74bcb16519a876ec21cd5277c526a9b512d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Dec 2010 16:19:29 +0100 Subject: [PATCH 12/31] kvm: x86: Fix a few coding style violations No functional changes. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- target-i386/kvm.c | 335 +++++++++++++++++++++++++--------------------- 1 file changed, 182 insertions(+), 153 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 85edaccad1..fda07d2a00 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -150,34 +150,34 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, #ifdef CONFIG_KVM_PARA struct kvm_para_features { - int cap; - int feature; + int cap; + int feature; } para_features[] = { #ifdef KVM_CAP_CLOCKSOURCE - { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE }, + { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE }, #endif #ifdef KVM_CAP_NOP_IO_DELAY - { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, + { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, #endif #ifdef KVM_CAP_PV_MMU - { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, + { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, #endif #ifdef KVM_CAP_ASYNC_PF - { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, + { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, #endif - { -1, -1 } + { -1, -1 } }; static int get_para_features(CPUState *env) { - int i, features = 0; + int i, features = 0; - for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) { - if (kvm_check_extension(env->kvm_state, para_features[i].cap)) - features |= (1 << para_features[i].feature); + for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) { + if (kvm_check_extension(env->kvm_state, para_features[i].cap)) { + features |= (1 << para_features[i].feature); } - - return features; + } + return features; } #endif @@ -389,13 +389,15 @@ int kvm_arch_init_vcpu(CPUState *env) c->index = j; cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - if (i == 4 && c->eax == 0) + if (i == 4 && c->eax == 0) { break; - if (i == 0xb && !(c->ecx & 0xff00)) + } + if (i == 0xb && !(c->ecx & 0xff00)) { break; - if (i == 0xd && c->eax == 0) + } + if (i == 0xd && c->eax == 0) { break; - + } c = &cpuid_data.entries[cpuid_i++]; } break; @@ -425,17 +427,18 @@ int kvm_arch_init_vcpu(CPUState *env) uint64_t mcg_cap; int banks; - if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) + if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) { perror("kvm_get_mce_cap_supported FAILED"); - else { + } else { if (banks > MCE_BANKS_DEF) banks = MCE_BANKS_DEF; mcg_cap &= MCE_CAP_DEF; mcg_cap |= banks; - if (kvm_setup_mce(env, &mcg_cap)) + if (kvm_setup_mce(env, &mcg_cap)) { perror("kvm_setup_mce FAILED"); - else + } else { env->mcg_cap = mcg_cap; + } } } #endif @@ -577,7 +580,7 @@ int kvm_arch_init(KVMState *s, int smp_cpus) return kvm_init_identity_map_page(s); } - + static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) { lhs->selector = rhs->selector; @@ -616,23 +619,23 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) lhs->selector = rhs->selector; lhs->base = rhs->base; lhs->limit = rhs->limit; - lhs->flags = - (rhs->type << DESC_TYPE_SHIFT) - | (rhs->present * DESC_P_MASK) - | (rhs->dpl << DESC_DPL_SHIFT) - | (rhs->db << DESC_B_SHIFT) - | (rhs->s * DESC_S_MASK) - | (rhs->l << DESC_L_SHIFT) - | (rhs->g * DESC_G_MASK) - | (rhs->avl * DESC_AVL_MASK); + lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | + (rhs->present * DESC_P_MASK) | + (rhs->dpl << DESC_DPL_SHIFT) | + (rhs->db << DESC_B_SHIFT) | + (rhs->s * DESC_S_MASK) | + (rhs->l << DESC_L_SHIFT) | + (rhs->g * DESC_G_MASK) | + (rhs->avl * DESC_AVL_MASK); } static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) { - if (set) + if (set) { *kvm_reg = *qemu_reg; - else + } else { *qemu_reg = *kvm_reg; + } } static int kvm_getput_regs(CPUState *env, int set) @@ -642,8 +645,9 @@ static int kvm_getput_regs(CPUState *env, int set) if (!set) { ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s); - if (ret < 0) + if (ret < 0) { return ret; + } } kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); @@ -668,8 +672,9 @@ static int kvm_getput_regs(CPUState *env, int set) kvm_getput_reg(®s.rflags, &env->eflags, set); kvm_getput_reg(®s.rip, &env->eip, set); - if (set) + if (set) { ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, ®s); + } return ret; } @@ -683,8 +688,9 @@ static int kvm_put_fpu(CPUState *env) fpu.fsw = env->fpus & ~(7 << 11); fpu.fsw |= (env->fpstt & 7) << 11; fpu.fcw = env->fpuc; - for (i = 0; i < 8; ++i) - fpu.ftwx |= (!env->fptags[i]) << i; + for (i = 0; i < 8; ++i) { + fpu.ftwx |= (!env->fptags[i]) << i; + } memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs); fpu.mxcsr = env->mxcsr; @@ -709,8 +715,9 @@ static int kvm_put_xsave(CPUState *env) struct kvm_xsave* xsave; uint16_t cwd, swd, twd, fop; - if (!kvm_has_xsave()) + if (!kvm_has_xsave()) { return kvm_put_fpu(env); + } xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); memset(xsave, 0, sizeof(struct kvm_xsave)); @@ -718,8 +725,9 @@ static int kvm_put_xsave(CPUState *env) swd = env->fpus & ~(7 << 11); swd |= (env->fpstt & 7) << 11; cwd = env->fpuc; - for (i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { twd |= (!env->fptags[i]) << i; + } xsave->region[0] = (uint32_t)(swd << 16) + cwd; xsave->region[1] = (uint32_t)(fop << 16) + twd; memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs, @@ -743,8 +751,9 @@ static int kvm_put_xcrs(CPUState *env) #ifdef KVM_CAP_XCRS struct kvm_xcrs xcrs; - if (!kvm_has_xcrs()) + if (!kvm_has_xcrs()) { return 0; + } xcrs.nr_xcrs = 1; xcrs.flags = 0; @@ -767,19 +776,19 @@ static int kvm_put_sregs(CPUState *env) } if ((env->eflags & VM_MASK)) { - set_v8086_seg(&sregs.cs, &env->segs[R_CS]); - set_v8086_seg(&sregs.ds, &env->segs[R_DS]); - set_v8086_seg(&sregs.es, &env->segs[R_ES]); - set_v8086_seg(&sregs.fs, &env->segs[R_FS]); - set_v8086_seg(&sregs.gs, &env->segs[R_GS]); - set_v8086_seg(&sregs.ss, &env->segs[R_SS]); + set_v8086_seg(&sregs.cs, &env->segs[R_CS]); + set_v8086_seg(&sregs.ds, &env->segs[R_DS]); + set_v8086_seg(&sregs.es, &env->segs[R_ES]); + set_v8086_seg(&sregs.fs, &env->segs[R_FS]); + set_v8086_seg(&sregs.gs, &env->segs[R_GS]); + set_v8086_seg(&sregs.ss, &env->segs[R_SS]); } else { - set_seg(&sregs.cs, &env->segs[R_CS]); - set_seg(&sregs.ds, &env->segs[R_DS]); - set_seg(&sregs.es, &env->segs[R_ES]); - set_seg(&sregs.fs, &env->segs[R_FS]); - set_seg(&sregs.gs, &env->segs[R_GS]); - set_seg(&sregs.ss, &env->segs[R_SS]); + set_seg(&sregs.cs, &env->segs[R_CS]); + set_seg(&sregs.ds, &env->segs[R_DS]); + set_seg(&sregs.es, &env->segs[R_ES]); + set_seg(&sregs.fs, &env->segs[R_FS]); + set_seg(&sregs.gs, &env->segs[R_GS]); + set_seg(&sregs.ss, &env->segs[R_SS]); } set_seg(&sregs.tr, &env->tr); @@ -822,10 +831,12 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); - if (kvm_has_msr_star(env)) - kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); - if (kvm_has_msr_hsave_pa(env)) + if (kvm_has_msr_star(env)) { + kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); + } + if (kvm_has_msr_hsave_pa(env)) { kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -854,13 +865,15 @@ static int kvm_put_msrs(CPUState *env, int level) #ifdef KVM_CAP_MCE if (env->mcg_cap) { int i; - if (level == KVM_PUT_RESET_STATE) + + if (level == KVM_PUT_RESET_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); - else if (level == KVM_PUT_FULL_STATE) { + } else if (level == KVM_PUT_FULL_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl); - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]); + } } } #endif @@ -878,14 +891,16 @@ static int kvm_get_fpu(CPUState *env) int i, ret; ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu); - if (ret < 0) + if (ret < 0) { return ret; + } env->fpstt = (fpu.fsw >> 11) & 7; env->fpus = fpu.fsw; env->fpuc = fpu.fcw; - for (i = 0; i < 8; ++i) - env->fptags[i] = !((fpu.ftwx >> i) & 1); + for (i = 0; i < 8; ++i) { + env->fptags[i] = !((fpu.ftwx >> i) & 1); + } memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs); env->mxcsr = fpu.mxcsr; @@ -900,8 +915,9 @@ static int kvm_get_xsave(CPUState *env) int ret, i; uint16_t cwd, swd, twd, fop; - if (!kvm_has_xsave()) + if (!kvm_has_xsave()) { return kvm_get_fpu(env); + } xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); ret = kvm_vcpu_ioctl(env, KVM_GET_XSAVE, xsave); @@ -917,8 +933,9 @@ static int kvm_get_xsave(CPUState *env) env->fpstt = (swd >> 11) & 7; env->fpus = swd; env->fpuc = cwd; - for (i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { env->fptags[i] = !((twd >> i) & 1); + } env->mxcsr = xsave->region[XSAVE_MXCSR]; memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE], sizeof env->fpregs); @@ -940,19 +957,22 @@ static int kvm_get_xcrs(CPUState *env) int i, ret; struct kvm_xcrs xcrs; - if (!kvm_has_xcrs()) + if (!kvm_has_xcrs()) { return 0; + } ret = kvm_vcpu_ioctl(env, KVM_GET_XCRS, &xcrs); - if (ret < 0) + if (ret < 0) { return ret; + } - for (i = 0; i < xcrs.nr_xcrs; i++) + for (i = 0; i < xcrs.nr_xcrs; i++) { /* Only support xcr0 now */ if (xcrs.xcrs[0].xcr == 0) { env->xcr0 = xcrs.xcrs[0].value; break; } + } return 0; #else return 0; @@ -966,8 +986,9 @@ static int kvm_get_sregs(CPUState *env) int bit, i, ret; ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); - if (ret < 0) + if (ret < 0) { return ret; + } /* There can only be one pending IRQ set in the bitmap at a time, so try to find it and save its number instead (-1 for none). */ @@ -1005,21 +1026,19 @@ static int kvm_get_sregs(CPUState *env) env->efer = sregs.efer; //cpu_set_apic_tpr(env->apic_state, sregs.cr8); -#define HFLAG_COPY_MASK ~( \ - HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ - HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ - HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ - HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) - - +#define HFLAG_COPY_MASK \ + ~( HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ + HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ + HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ + HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & - (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); + (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << - (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); + (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); if (env->efer & MSR_EFER_LMA) { hflags |= HF_LMA_MASK; @@ -1029,19 +1048,16 @@ static int kvm_get_sregs(CPUState *env) hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; } else { hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> - (DESC_B_SHIFT - HF_CS32_SHIFT); + (DESC_B_SHIFT - HF_CS32_SHIFT); hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> - (DESC_B_SHIFT - HF_SS32_SHIFT); - if (!(env->cr[0] & CR0_PE_MASK) || - (env->eflags & VM_MASK) || - !(hflags & HF_CS32_MASK)) { - hflags |= HF_ADDSEG_MASK; - } else { - hflags |= ((env->segs[R_DS].base | - env->segs[R_ES].base | - env->segs[R_SS].base) != 0) << - HF_ADDSEG_SHIFT; - } + (DESC_B_SHIFT - HF_SS32_SHIFT); + if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK) || + !(hflags & HF_CS32_MASK)) { + hflags |= HF_ADDSEG_MASK; + } else { + hflags |= ((env->segs[R_DS].base | env->segs[R_ES].base | + env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT; + } } env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; @@ -1061,10 +1077,12 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_IA32_SYSENTER_CS; msrs[n++].index = MSR_IA32_SYSENTER_ESP; msrs[n++].index = MSR_IA32_SYSENTER_EIP; - if (kvm_has_msr_star(env)) - msrs[n++].index = MSR_STAR; - if (kvm_has_msr_hsave_pa(env)) + if (kvm_has_msr_star(env)) { + msrs[n++].index = MSR_STAR; + } + if (kvm_has_msr_hsave_pa(env)) { msrs[n++].index = MSR_VM_HSAVE_PA; + } msrs[n++].index = MSR_IA32_TSC; #ifdef TARGET_X86_64 if (lm_capable_kernel) { @@ -1084,15 +1102,17 @@ static int kvm_get_msrs(CPUState *env) if (env->mcg_cap) { msrs[n++].index = MSR_MCG_STATUS; msrs[n++].index = MSR_MCG_CTL; - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { msrs[n++].index = MSR_MC0_CTL + i; + } } #endif msr_data.info.nmsrs = n; ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data); - if (ret < 0) + if (ret < 0) { return ret; + } for (i = 0; i < ret; i++) { switch (msrs[i].index) { @@ -1320,7 +1340,7 @@ static int kvm_get_debugregs(CPUState *env) ret = kvm_vcpu_ioctl(env, KVM_GET_DEBUGREGS, &dbgregs); if (ret < 0) { - return ret; + return ret; } for (i = 0; i < 4; i++) { env->dr[i] = dbgregs.db[i]; @@ -1339,44 +1359,44 @@ int kvm_arch_put_registers(CPUState *env, int level) assert(cpu_is_stopped(env) || qemu_cpu_self(env)); ret = kvm_getput_regs(env, 1); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_xsave(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_xcrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_sregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_msrs(env, level); - if (ret < 0) + if (ret < 0) { return ret; - + } if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_mp_state(env); - if (ret < 0) + if (ret < 0) { return ret; + } } - ret = kvm_put_vcpu_events(env, level); - if (ret < 0) + if (ret < 0) { return ret; - + } /* must be last */ ret = kvm_guest_debug_workarounds(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_debugregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } return 0; } @@ -1387,37 +1407,37 @@ int kvm_arch_get_registers(CPUState *env) assert(cpu_is_stopped(env) || qemu_cpu_self(env)); ret = kvm_getput_regs(env, 0); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_xsave(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_xcrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_sregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_msrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_mp_state(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_vcpu_events(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_debugregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } return 0; } @@ -1451,10 +1471,11 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) * interrupt, request an interrupt window exit. This will * cause a return to userspace as soon as the guest is ready to * receive interrupts. */ - if ((env->interrupt_request & CPU_INTERRUPT_HARD)) + if ((env->interrupt_request & CPU_INTERRUPT_HARD)) { run->request_interrupt_window = 1; - else + } else { run->request_interrupt_window = 0; + } DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(env->apic_state); @@ -1464,11 +1485,11 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) int kvm_arch_post_run(CPUState *env, struct kvm_run *run) { - if (run->if_flag) + if (run->if_flag) { env->eflags |= IF_MASK; - else + } else { env->eflags &= ~IF_MASK; - + } cpu_set_apic_tpr(env->apic_state, run->cr8); cpu_set_apic_base(env->apic_state, run->apic_base); @@ -1524,8 +1545,9 @@ int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) static const uint8_t int3 = 0xcc; if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || - cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) + cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) { return -EINVAL; + } return 0; } @@ -1534,8 +1556,9 @@ int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) uint8_t int3; if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc || - cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) + cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) { return -EINVAL; + } return 0; } @@ -1551,10 +1574,12 @@ static int find_hw_breakpoint(target_ulong addr, int len, int type) { int n; - for (n = 0; n < nb_hw_breakpoint; n++) + for (n = 0; n < nb_hw_breakpoint; n++) { if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type && - (hw_breakpoint[n].len == len || len == -1)) + (hw_breakpoint[n].len == len || len == -1)) { return n; + } + } return -1; } @@ -1573,8 +1598,9 @@ int kvm_arch_insert_hw_breakpoint(target_ulong addr, case 2: case 4: case 8: - if (addr & (len - 1)) + if (addr & (len - 1)) { return -EINVAL; + } break; default: return -EINVAL; @@ -1584,12 +1610,12 @@ int kvm_arch_insert_hw_breakpoint(target_ulong addr, return -ENOSYS; } - if (nb_hw_breakpoint == 4) + if (nb_hw_breakpoint == 4) { return -ENOBUFS; - - if (find_hw_breakpoint(addr, len, type) >= 0) + } + if (find_hw_breakpoint(addr, len, type) >= 0) { return -EEXIST; - + } hw_breakpoint[nb_hw_breakpoint].addr = addr; hw_breakpoint[nb_hw_breakpoint].len = len; hw_breakpoint[nb_hw_breakpoint].type = type; @@ -1604,9 +1630,9 @@ int kvm_arch_remove_hw_breakpoint(target_ulong addr, int n; n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type); - if (n < 0) + if (n < 0) { return -ENOENT; - + } nb_hw_breakpoint--; hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint]; @@ -1627,11 +1653,12 @@ int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) if (arch_info->exception == 1) { if (arch_info->dr6 & (1 << 14)) { - if (cpu_single_env->singlestep_enabled) + if (cpu_single_env->singlestep_enabled) { handle = 1; + } } else { - for (n = 0; n < 4; n++) - if (arch_info->dr6 & (1 << n)) + for (n = 0; n < 4; n++) { + if (arch_info->dr6 & (1 << n)) { switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) { case 0x0: handle = 1; @@ -1649,10 +1676,12 @@ int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) hw_watchpoint.flags = BP_MEM_ACCESS; break; } + } + } } - } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc)) + } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc)) { handle = 1; - + } if (!handle) { cpu_synchronize_state(cpu_single_env); assert(cpu_single_env->exception_injected == -1); @@ -1676,9 +1705,9 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) }; int n; - if (kvm_sw_breakpoints_active(env)) + if (kvm_sw_breakpoints_active(env)) { dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; - + } if (nb_hw_breakpoint > 0) { dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; dbg->arch.debugreg[7] = 0x0600; @@ -1696,8 +1725,8 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) bool kvm_arch_stop_on_emulation_error(CPUState *env) { - return !(env->cr[0] & CR0_PE_MASK) || - ((env->segs[R_CS].selector & 3) != 3); + return !(env->cr[0] & CR0_PE_MASK) || + ((env->segs[R_CS].selector & 3) != 3); } static void hardware_memory_error(void) From a426e122173f36f05ea2cb72dcff77b7408546ce Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 4 Jan 2011 09:32:13 +0100 Subject: [PATCH 13/31] kvm: Fix coding style violations No functional changes. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 139 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 79 insertions(+), 60 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 935c4362e6..86ddbd6700 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -88,10 +88,12 @@ static KVMSlot *kvm_alloc_slot(KVMState *s) for (i = 0; i < ARRAY_SIZE(s->slots); i++) { /* KVM private memory slots */ - if (i >= 8 && i < 12) + if (i >= 8 && i < 12) { continue; - if (s->slots[i].memory_size == 0) + } + if (s->slots[i].memory_size == 0) { return &s->slots[i]; + } } fprintf(stderr, "%s: no free slot available\n", __func__); @@ -226,9 +228,10 @@ int kvm_init_vcpu(CPUState *env) } #ifdef KVM_CAP_COALESCED_MMIO - if (s->coalesced_mmio && !s->coalesced_mmio_ring) - s->coalesced_mmio_ring = (void *) env->kvm_run + - s->coalesced_mmio * PAGE_SIZE; + if (s->coalesced_mmio && !s->coalesced_mmio_ring) { + s->coalesced_mmio_ring = + (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE; + } #endif ret = kvm_arch_init_vcpu(env); @@ -275,16 +278,14 @@ static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size) { - return kvm_dirty_pages_log_change(phys_addr, size, - KVM_MEM_LOG_DIRTY_PAGES, - KVM_MEM_LOG_DIRTY_PAGES); + return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES, + KVM_MEM_LOG_DIRTY_PAGES); } int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size) { - return kvm_dirty_pages_log_change(phys_addr, size, - 0, - KVM_MEM_LOG_DIRTY_PAGES); + return kvm_dirty_pages_log_change(phys_addr, size, 0, + KVM_MEM_LOG_DIRTY_PAGES); } static int kvm_set_migration_log(int enable) @@ -356,7 +357,7 @@ static int kvm_get_dirty_pages_log_range(unsigned long start_addr, * @end_addr: end of logged region. */ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, - target_phys_addr_t end_addr) + target_phys_addr_t end_addr) { KVMState *s = kvm_state; unsigned long size, allocated_size = 0; @@ -480,9 +481,8 @@ static int kvm_check_many_ioeventfds(void) #endif } -static void kvm_set_phys_mem(target_phys_addr_t start_addr, - ram_addr_t size, - ram_addr_t phys_offset) +static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, + ram_addr_t phys_offset) { KVMState *s = kvm_state; ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK; @@ -589,13 +589,13 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, } /* in case the KVM bug workaround already "consumed" the new slot */ - if (!size) + if (!size) { return; - + } /* KVM does not need to know about this memory */ - if (flags >= IO_MEM_UNASSIGNED) + if (flags >= IO_MEM_UNASSIGNED) { return; - + } mem = kvm_alloc_slot(s); mem->memory_size = size; mem->start_addr = start_addr; @@ -611,30 +611,29 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, } static void kvm_client_set_memory(struct CPUPhysMemoryClient *client, - target_phys_addr_t start_addr, - ram_addr_t size, - ram_addr_t phys_offset) + target_phys_addr_t start_addr, + ram_addr_t size, ram_addr_t phys_offset) { - kvm_set_phys_mem(start_addr, size, phys_offset); + kvm_set_phys_mem(start_addr, size, phys_offset); } static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client, - target_phys_addr_t start_addr, - target_phys_addr_t end_addr) + target_phys_addr_t start_addr, + target_phys_addr_t end_addr) { - return kvm_physical_sync_dirty_bitmap(start_addr, end_addr); + return kvm_physical_sync_dirty_bitmap(start_addr, end_addr); } static int kvm_client_migration_log(struct CPUPhysMemoryClient *client, - int enable) + int enable) { - return kvm_set_migration_log(enable); + return kvm_set_migration_log(enable); } static CPUPhysMemoryClient kvm_cpu_phys_memory_client = { - .set_memory = kvm_client_set_memory, - .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap, - .migration_log = kvm_client_migration_log, + .set_memory = kvm_client_set_memory, + .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap, + .migration_log = kvm_client_migration_log, }; int kvm_init(int smp_cpus) @@ -651,9 +650,9 @@ int kvm_init(int smp_cpus) #ifdef KVM_CAP_SET_GUEST_DEBUG QTAILQ_INIT(&s->kvm_sw_breakpoints); #endif - for (i = 0; i < ARRAY_SIZE(s->slots); i++) + for (i = 0; i < ARRAY_SIZE(s->slots); i++) { s->slots[i].slot = i; - + } s->vmfd = -1; s->fd = qemu_open("/dev/kvm", O_RDWR); if (s->fd == -1) { @@ -664,8 +663,9 @@ int kvm_init(int smp_cpus) ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0); if (ret < KVM_API_VERSION) { - if (ret > 0) + if (ret > 0) { ret = -EINVAL; + } fprintf(stderr, "kvm version too old\n"); goto err; } @@ -750,8 +750,9 @@ int kvm_init(int smp_cpus) #endif ret = kvm_arch_init(s, smp_cpus); - if (ret < 0) + if (ret < 0) { goto err; + } kvm_state = s; cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client); @@ -762,10 +763,12 @@ int kvm_init(int smp_cpus) err: if (s) { - if (s->vmfd != -1) + if (s->vmfd != -1) { close(s->vmfd); - if (s->fd != -1) + } + if (s->fd != -1) { close(s->fd); + } } qemu_free(s); @@ -829,8 +832,9 @@ static void kvm_handle_internal_error(CPUState *env, struct kvm_run *run) cpu_dump_state(env, stderr, fprintf, 0); if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) { fprintf(stderr, "emulation failure\n"); - if (!kvm_arch_stop_on_emulation_error(env)) - return; + if (!kvm_arch_stop_on_emulation_error(env)) { + return; + } } /* FIXME: Should trigger a qmp message to let management know * something went wrong. @@ -870,8 +874,9 @@ static void do_kvm_cpu_synchronize_state(void *_env) void kvm_cpu_synchronize_state(CPUState *env) { - if (!env->kvm_vcpu_dirty) + if (!env->kvm_vcpu_dirty) { run_on_cpu(env, do_kvm_cpu_synchronize_state, env); + } } void kvm_cpu_synchronize_post_reset(CPUState *env) @@ -1011,9 +1016,9 @@ int kvm_ioctl(KVMState *s, int type, ...) va_end(ap); ret = ioctl(s->fd, type, arg); - if (ret == -1) + if (ret == -1) { ret = -errno; - + } return ret; } @@ -1028,9 +1033,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) va_end(ap); ret = ioctl(s->vmfd, type, arg); - if (ret == -1) + if (ret == -1) { ret = -errno; - + } return ret; } @@ -1045,9 +1050,9 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...) va_end(ap); ret = ioctl(env->kvm_fd, type, arg); - if (ret == -1) + if (ret == -1) { ret = -errno; - + } return ret; } @@ -1116,8 +1121,9 @@ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp; QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) { - if (bp->pc == pc) + if (bp->pc == pc) { return bp; + } } return NULL; } @@ -1172,8 +1178,9 @@ int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, } bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint)); - if (!bp) + if (!bp) { return -ENOMEM; + } bp->pc = addr; bp->use_count = 1; @@ -1187,14 +1194,16 @@ int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, bp, entry); } else { err = kvm_arch_insert_hw_breakpoint(addr, len, type); - if (err) + if (err) { return err; + } } for (env = first_cpu; env != NULL; env = env->next_cpu) { err = kvm_update_guest_debug(env, 0); - if (err) + if (err) { return err; + } } return 0; } @@ -1208,8 +1217,9 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, if (type == GDB_BREAKPOINT_SW) { bp = kvm_find_sw_breakpoint(current_env, addr); - if (!bp) + if (!bp) { return -ENOENT; + } if (bp->use_count > 1) { bp->use_count--; @@ -1217,21 +1227,24 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, } err = kvm_arch_remove_sw_breakpoint(current_env, bp); - if (err) + if (err) { return err; + } QTAILQ_REMOVE(¤t_env->kvm_state->kvm_sw_breakpoints, bp, entry); qemu_free(bp); } else { err = kvm_arch_remove_hw_breakpoint(addr, len, type); - if (err) + if (err) { return err; + } } for (env = first_cpu; env != NULL; env = env->next_cpu) { err = kvm_update_guest_debug(env, 0); - if (err) + if (err) { return err; + } } return 0; } @@ -1246,15 +1259,17 @@ void kvm_remove_all_breakpoints(CPUState *current_env) if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) { /* Try harder to find a CPU that currently sees the breakpoint. */ for (env = first_cpu; env != NULL; env = env->next_cpu) { - if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) + if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) { break; + } } } } kvm_arch_remove_all_hw_breakpoints(); - for (env = first_cpu; env != NULL; env = env->next_cpu) + for (env = first_cpu; env != NULL; env = env->next_cpu) { kvm_update_guest_debug(env, 0); + } } #else /* !KVM_CAP_SET_GUEST_DEBUG */ @@ -1286,8 +1301,9 @@ int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset) struct kvm_signal_mask *sigmask; int r; - if (!sigset) + if (!sigset) { return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL); + } sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset)); @@ -1342,13 +1358,16 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) .fd = fd, }; int r; - if (!kvm_enabled()) + if (!kvm_enabled()) { return -ENOSYS; - if (!assign) + } + if (!assign) { kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN; + } r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick); - if (r < 0) + if (r < 0) { return r; + } return 0; #else return -ENOSYS; From 646042e1aba31f377892e48f6145fbc8487d4481 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:05 +0100 Subject: [PATCH 14/31] kvm: x86: Swallow KVM_EXIT_SET_TPR This exit only triggers activity in the common exit path, but we should accept it in order to be able to detect unknown exit types. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index fda07d2a00..0aeb079004 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1534,6 +1534,9 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) DPRINTF("handle_hlt\n"); ret = kvm_handle_halt(env); break; + case KVM_EXIT_SET_TPR: + ret = 1; + break; } return ret; From 73aaec4a39b3cf11082303a6cf6bcde8796c09c6 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:06 +0100 Subject: [PATCH 15/31] kvm: Stop on all fatal exit reasons Ensure that we stop the guest whenever we face a fatal or unknown exit reason. If we stop, we also have to enforce a cpu loop exit. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 15 +++++++++++---- target-i386/kvm.c | 4 ++++ target-ppc/kvm.c | 4 ++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 86ddbd6700..eaf927248f 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -815,7 +815,7 @@ static int kvm_handle_io(uint16_t port, void *data, int direction, int size, } #ifdef KVM_CAP_INTERNAL_ERROR_DATA -static void kvm_handle_internal_error(CPUState *env, struct kvm_run *run) +static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run) { if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) { @@ -833,13 +833,13 @@ static void kvm_handle_internal_error(CPUState *env, struct kvm_run *run) if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) { fprintf(stderr, "emulation failure\n"); if (!kvm_arch_stop_on_emulation_error(env)) { - return; + return 0; } } /* FIXME: Should trigger a qmp message to let management know * something went wrong. */ - vm_stop(0); + return -1; } #endif @@ -967,16 +967,19 @@ int kvm_cpu_exec(CPUState *env) break; case KVM_EXIT_UNKNOWN: DPRINTF("kvm_exit_unknown\n"); + ret = -1; break; case KVM_EXIT_FAIL_ENTRY: DPRINTF("kvm_exit_fail_entry\n"); + ret = -1; break; case KVM_EXIT_EXCEPTION: DPRINTF("kvm_exit_exception\n"); + ret = -1; break; #ifdef KVM_CAP_INTERNAL_ERROR_DATA case KVM_EXIT_INTERNAL_ERROR: - kvm_handle_internal_error(env, run); + ret = kvm_handle_internal_error(env, run); break; #endif case KVM_EXIT_DEBUG: @@ -997,6 +1000,10 @@ int kvm_cpu_exec(CPUState *env) } } while (ret > 0); + if (ret < 0) { + vm_stop(0); + env->exit_request = 1; + } if (env->exit_request) { env->exit_request = 0; env->exception_index = EXCP_INTERRUPT; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 0aeb079004..6b4abaa6c0 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1537,6 +1537,10 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) case KVM_EXIT_SET_TPR: ret = 1; break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; + break; } return ret; diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 5caa07cba8..849b404b83 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -307,6 +307,10 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) dprintf("handle halt\n"); ret = kvmppc_handle_halt(env); break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; + break; } return ret; From bb44e0d12df70bd4a653341db4446daf6a9326be Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:07 +0100 Subject: [PATCH 16/31] kvm: Improve reporting of fatal errors Report KVM_EXIT_UNKNOWN, KVM_EXIT_FAIL_ENTRY, and KVM_EXIT_EXCEPTION with more details to stderr. The latter two are so far x86-only, so move them into the arch-specific handler. Integrate the Intel real mode warning on KVM_EXIT_FAIL_ENTRY that qemu-kvm carries, but actually restrict it to Intel CPUs. Moreover, always dump the CPU state in case we fail. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 22 ++++++++-------------- target-i386/cpu.h | 2 ++ target-i386/cpuid.c | 5 ++--- target-i386/kvm.c | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index eaf927248f..10e11944b3 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -817,22 +817,22 @@ static int kvm_handle_io(uint16_t port, void *data, int direction, int size, #ifdef KVM_CAP_INTERNAL_ERROR_DATA static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run) { - + fprintf(stderr, "KVM internal error."); if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) { int i; - fprintf(stderr, "KVM internal error. Suberror: %d\n", - run->internal.suberror); - + fprintf(stderr, " Suberror: %d\n", run->internal.suberror); for (i = 0; i < run->internal.ndata; ++i) { fprintf(stderr, "extra data[%d]: %"PRIx64"\n", i, (uint64_t)run->internal.data[i]); } + } else { + fprintf(stderr, "\n"); } - cpu_dump_state(env, stderr, fprintf, 0); if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) { fprintf(stderr, "emulation failure\n"); if (!kvm_arch_stop_on_emulation_error(env)) { + cpu_dump_state(env, stderr, fprintf, 0); return 0; } } @@ -966,15 +966,8 @@ int kvm_cpu_exec(CPUState *env) ret = 1; break; case KVM_EXIT_UNKNOWN: - DPRINTF("kvm_exit_unknown\n"); - ret = -1; - break; - case KVM_EXIT_FAIL_ENTRY: - DPRINTF("kvm_exit_fail_entry\n"); - ret = -1; - break; - case KVM_EXIT_EXCEPTION: - DPRINTF("kvm_exit_exception\n"); + fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n", + (uint64_t)run->hw.hardware_exit_reason); ret = -1; break; #ifdef KVM_CAP_INTERNAL_ERROR_DATA @@ -1001,6 +994,7 @@ int kvm_cpu_exec(CPUState *env) } while (ret > 0); if (ret < 0) { + cpu_dump_state(env, stderr, fprintf, 0); vm_stop(0); env->exit_request = 1; } diff --git a/target-i386/cpu.h b/target-i386/cpu.h index dddcd749cc..a457423b4a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -874,6 +874,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t *ecx, uint32_t *edx); int cpu_x86_register (CPUX86State *env, const char *cpu_model); void cpu_clear_apic_feature(CPUX86State *env); +void host_cpuid(uint32_t function, uint32_t count, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); /* helper.c */ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index 165045ec42..5382a283f5 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -103,9 +103,8 @@ typedef struct model_features_t { int check_cpuid = 0; int enforce_cpuid = 0; -static void host_cpuid(uint32_t function, uint32_t count, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +void host_cpuid(uint32_t function, uint32_t count, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { #if defined(CONFIG_KVM) uint32_t vec[4]; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 6b4abaa6c0..0ba13fc55d 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1525,8 +1525,19 @@ static int kvm_handle_halt(CPUState *env) return 1; } +static bool host_supports_vmx(void) +{ + uint32_t ecx, unused; + + host_cpuid(1, 0, &unused, &unused, &ecx, &unused); + return ecx & CPUID_EXT_VMX; +} + +#define VMX_INVALID_GUEST_STATE 0x80000021 + int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) { + uint64_t code; int ret = 0; switch (run->exit_reason) { @@ -1537,6 +1548,28 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) case KVM_EXIT_SET_TPR: ret = 1; break; + case KVM_EXIT_FAIL_ENTRY: + code = run->fail_entry.hardware_entry_failure_reason; + fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n", + code); + if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) { + fprintf(stderr, + "\nIf you're runnning a guest on an Intel machine without " + "unrestricted mode\n" + "support, the failure can be most likely due to the guest " + "entering an invalid\n" + "state for Intel VT. For example, the guest maybe running " + "in big real mode\n" + "which is not supported on less recent Intel processors." + "\n\n"); + } + ret = -1; + break; + case KVM_EXIT_EXCEPTION: + fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n", + run->ex.exception, run->ex.error_code); + ret = -1; + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; From f5c848eed769b795ac9c06bfb315e4aa9116337c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:08 +0100 Subject: [PATCH 17/31] x86: Optionally dump code bytes on cpu_dump_state Introduce the cpu_dump_state flag CPU_DUMP_CODE and implement it for x86. This writes out the code bytes around the current instruction pointer. Make use of this feature in KVM to help debugging fatal vm exits. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpu-all.h | 2 ++ kvm-all.c | 4 ++-- target-i386/helper.c | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/cpu-all.h b/cpu-all.h index 4ce4e83032..ffbd6a4df8 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -765,6 +765,8 @@ int page_check_range(target_ulong start, target_ulong len, int flags); CPUState *cpu_copy(CPUState *env); CPUState *qemu_get_cpu(int cpu); +#define CPU_DUMP_CODE 0x00010000 + void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, int flags); void cpu_dump_statistics(CPUState *env, FILE *f, fprintf_function cpu_fprintf, diff --git a/kvm-all.c b/kvm-all.c index 10e11944b3..41decdeae4 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -832,7 +832,7 @@ static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run) if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) { fprintf(stderr, "emulation failure\n"); if (!kvm_arch_stop_on_emulation_error(env)) { - cpu_dump_state(env, stderr, fprintf, 0); + cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE); return 0; } } @@ -994,7 +994,7 @@ int kvm_cpu_exec(CPUState *env) } while (ret > 0); if (ret < 0) { - cpu_dump_state(env, stderr, fprintf, 0); + cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE); vm_stop(0); env->exit_request = 1; } diff --git a/target-i386/helper.c b/target-i386/helper.c index 6dfa27d51b..1217452e71 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -249,6 +249,9 @@ done: cpu_fprintf(f, "\n"); } +#define DUMP_CODE_BYTES_TOTAL 50 +#define DUMP_CODE_BYTES_BACKWARD 20 + void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -434,6 +437,24 @@ void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, cpu_fprintf(f, " "); } } + if (flags & CPU_DUMP_CODE) { + target_ulong base = env->segs[R_CS].base + env->eip; + target_ulong offs = MIN(env->eip, DUMP_CODE_BYTES_BACKWARD); + uint8_t code; + char codestr[3]; + + cpu_fprintf(f, "Code="); + for (i = 0; i < DUMP_CODE_BYTES_TOTAL; i++) { + if (cpu_memory_rw_debug(env, base - offs + i, &code, 1, 0) == 0) { + snprintf(codestr, sizeof(codestr), "%02x", code); + } else { + snprintf(codestr, sizeof(codestr), "??"); + } + cpu_fprintf(f, "%s%s%s%s", i > 0 ? " " : "", + i == offs ? "<" : "", codestr, i == offs ? ">" : ""); + } + cpu_fprintf(f, "\n"); + } } /***********************************************************/ From 0d75a9ecd7ff77db0f1b5bfb3a3fee2438acefa4 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:09 +0100 Subject: [PATCH 18/31] kvm: x86: Align kvm_arch_put_registers code with comment The ordering doesn't matter in this case, but better keep it consistent. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 0ba13fc55d..9bb34abc5b 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1388,12 +1388,12 @@ int kvm_arch_put_registers(CPUState *env, int level) if (ret < 0) { return ret; } - /* must be last */ - ret = kvm_guest_debug_workarounds(env); + ret = kvm_put_debugregs(env); if (ret < 0) { return ret; } - ret = kvm_put_debugregs(env); + /* must be last */ + ret = kvm_guest_debug_workarounds(env); if (ret < 0) { return ret; } From c14750e8ad4a9d8d7621e2594abda34df19a6eff Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:10 +0100 Subject: [PATCH 19/31] kvm: x86: Prepare kvm_get_mp_state for in-kernel irqchip This code path will not yet be taken as we still lack in-kernel irqchip support. But qemu-kvm can already make use of it and drop its own mp_state access services. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 9bb34abc5b..531b69e68a 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1198,6 +1198,9 @@ static int kvm_get_mp_state(CPUState *env) return ret; } env->mp_state = mp_state.mp_state; + if (kvm_irqchip_in_kernel()) { + env->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED); + } return 0; } From 3390e7f79784cbc75df408740cda4edbcf57ac58 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:11 +0100 Subject: [PATCH 20/31] kvm: x86: Remove redundant mp_state initialization kvm_arch_reset_vcpu initializes mp_state, and that function is invoked right after kvm_arch_init_vcpu. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 531b69e68a..07c75c00ad 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -321,8 +321,6 @@ int kvm_arch_init_vcpu(CPUState *env) uint32_t signature[3]; #endif - env->mp_state = KVM_MP_STATE_RUNNABLE; - env->cpuid_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX); i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR; From 1a5e9d2fafa5d31587e218cea462637bfad52b53 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:12 +0100 Subject: [PATCH 21/31] kvm: x86: Fix xcr0 reset mismerge For unknown reasons, xcr0 reset ended up in kvm_arch_update_guest_debug on upstream merge. Fix this and also remove the misleading comment (1 is THE reset value). Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 07c75c00ad..c4a22dd58f 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -450,6 +450,7 @@ void kvm_arch_reset_vcpu(CPUState *env) env->interrupt_injected = -1; env->nmi_injected = 0; env->nmi_pending = 0; + env->xcr0 = 1; if (kvm_irqchip_in_kernel()) { env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE : KVM_MP_STATE_UNINITIALIZED; @@ -1759,8 +1760,6 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4)); } } - /* Legal xcr0 for loading */ - env->xcr0 = 1; } #endif /* KVM_CAP_SET_GUEST_DEBUG */ From c3a3a7d356c4df2fe145037172ae52cba5f545a5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:13 +0100 Subject: [PATCH 22/31] kvm: x86: Refactor msr_star/hsave_pa setup and checks Simplify kvm_has_msr_star/hsave_pa to booleans and push their one-time initialization into kvm_arch_init. Also handle potential errors of that setup procedure. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 47 +++++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index c4a22dd58f..454ddb182f 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -54,6 +54,8 @@ #define BUS_MCEERR_AO 5 #endif +static bool has_msr_star; +static bool has_msr_hsave_pa; static int lm_capable_kernel; #ifdef KVM_CAP_EXT_CPUID @@ -459,13 +461,10 @@ void kvm_arch_reset_vcpu(CPUState *env) } } -int has_msr_star; -int has_msr_hsave_pa; - -static void kvm_supported_msrs(CPUState *env) +static int kvm_get_supported_msrs(KVMState *s) { static int kvm_supported_msrs; - int ret; + int ret = 0; /* first time */ if (kvm_supported_msrs == 0) { @@ -476,9 +475,9 @@ static void kvm_supported_msrs(CPUState *env) /* Obtain MSR list from KVM. These are the MSRs that we must * save/restore */ msr_list.nmsrs = 0; - ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list); + ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list); if (ret < 0 && ret != -E2BIG) { - return; + return ret; } /* Old kernel modules had a bug and could write beyond the provided memory. Allocate at least a safe amount of 1K. */ @@ -487,17 +486,17 @@ static void kvm_supported_msrs(CPUState *env) sizeof(msr_list.indices[0]))); kvm_msr_list->nmsrs = msr_list.nmsrs; - ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); + ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); if (ret >= 0) { int i; for (i = 0; i < kvm_msr_list->nmsrs; i++) { if (kvm_msr_list->indices[i] == MSR_STAR) { - has_msr_star = 1; + has_msr_star = true; continue; } if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) { - has_msr_hsave_pa = 1; + has_msr_hsave_pa = true; continue; } } @@ -506,19 +505,7 @@ static void kvm_supported_msrs(CPUState *env) free(kvm_msr_list); } - return; -} - -static int kvm_has_msr_hsave_pa(CPUState *env) -{ - kvm_supported_msrs(env); - return has_msr_hsave_pa; -} - -static int kvm_has_msr_star(CPUState *env) -{ - kvm_supported_msrs(env); - return has_msr_star; + return ret; } static int kvm_init_identity_map_page(KVMState *s) @@ -543,9 +530,13 @@ static int kvm_init_identity_map_page(KVMState *s) int kvm_arch_init(KVMState *s, int smp_cpus) { int ret; - struct utsname utsname; + ret = kvm_get_supported_msrs(s); + if (ret < 0) { + return ret; + } + uname(&utsname); lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; @@ -830,10 +821,10 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); - if (kvm_has_msr_star(env)) { + if (has_msr_star) { kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); } - if (kvm_has_msr_hsave_pa(env)) { + if (has_msr_hsave_pa) { kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); } #ifdef TARGET_X86_64 @@ -1076,10 +1067,10 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_IA32_SYSENTER_CS; msrs[n++].index = MSR_IA32_SYSENTER_ESP; msrs[n++].index = MSR_IA32_SYSENTER_EIP; - if (kvm_has_msr_star(env)) { + if (has_msr_star) { msrs[n++].index = MSR_STAR; } - if (kvm_has_msr_hsave_pa(env)) { + if (has_msr_hsave_pa) { msrs[n++].index = MSR_VM_HSAVE_PA; } msrs[n++].index = MSR_IA32_TSC; From ff5c186b8b6169bf25a6f30670a75fb9d4c945e3 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:14 +0100 Subject: [PATCH 23/31] kvm: x86: Reset paravirtual MSRs Make sure to write the cleared MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, and MSR_KVM_ASYNC_PF_EN to the kernel state so that a freshly booted guest cannot be disturbed by old values. Signed-off-by: Jan Kiszka CC: Glauber Costa Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 454ddb182f..825af428d9 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -845,6 +845,13 @@ static int kvm_put_msrs(CPUState *env, int level) if (smp_cpus == 1 || env->tsc != 0) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); } + } + /* + * The following paravirtual MSRs have side effects on the guest or are + * too heavy for normal writeback. Limit them to reset or full state + * updates. + */ + if (level >= KVM_PUT_RESET_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); From 521f0798d7e38c21983211a6585ebcf79de4c14a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:15 +0100 Subject: [PATCH 24/31] kvm: x86: Fix !CONFIG_KVM_PARA build If we lack kvm_para.h, MSR_KVM_ASYNC_PF_EN is not defined. The change in kvm_arch_init_vcpu is just for consistency reasons. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 825af428d9..feaf33dc76 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -319,7 +319,7 @@ int kvm_arch_init_vcpu(CPUState *env) uint32_t limit, i, j, cpuid_i; uint32_t unused; struct kvm_cpuid_entry2 *c; -#ifdef KVM_CPUID_SIGNATURE +#ifdef CONFIG_KVM_PARA uint32_t signature[3]; #endif @@ -855,7 +855,7 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); -#ifdef KVM_CAP_ASYNC_PF +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr); #endif } @@ -1091,7 +1091,7 @@ static int kvm_get_msrs(CPUState *env) #endif msrs[n++].index = MSR_KVM_SYSTEM_TIME; msrs[n++].index = MSR_KVM_WALL_CLOCK; -#ifdef KVM_CAP_ASYNC_PF +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) msrs[n++].index = MSR_KVM_ASYNC_PF_EN; #endif @@ -1167,7 +1167,7 @@ static int kvm_get_msrs(CPUState *env) } #endif break; -#ifdef KVM_CAP_ASYNC_PF +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; break; From cad1e2827b616487e3574300f2eaeea13a355197 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:16 +0100 Subject: [PATCH 25/31] kvm: Drop smp_cpus argument from init functions No longer used. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 4 ++-- kvm-stub.c | 2 +- kvm.h | 4 ++-- target-i386/kvm.c | 2 +- target-ppc/kvm.c | 2 +- target-s390x/kvm.c | 2 +- vl.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 41decdeae4..8053f92105 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -636,7 +636,7 @@ static CPUPhysMemoryClient kvm_cpu_phys_memory_client = { .migration_log = kvm_client_migration_log, }; -int kvm_init(int smp_cpus) +int kvm_init(void) { static const char upgrade_note[] = "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n" @@ -749,7 +749,7 @@ int kvm_init(int smp_cpus) s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS); #endif - ret = kvm_arch_init(s, smp_cpus); + ret = kvm_arch_init(s); if (ret < 0) { goto err; } diff --git a/kvm-stub.c b/kvm-stub.c index 33d4476fa3..88682f288b 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -58,7 +58,7 @@ int kvm_check_extension(KVMState *s, unsigned int extension) return 0; } -int kvm_init(int smp_cpus) +int kvm_init(void) { return -ENOSYS; } diff --git a/kvm.h b/kvm.h index ce08d42756..a9717528e2 100644 --- a/kvm.h +++ b/kvm.h @@ -34,7 +34,7 @@ struct kvm_run; /* external API */ -int kvm_init(int smp_cpus); +int kvm_init(void); int kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); @@ -105,7 +105,7 @@ int kvm_arch_get_registers(CPUState *env); int kvm_arch_put_registers(CPUState *env, int level); -int kvm_arch_init(KVMState *s, int smp_cpus); +int kvm_arch_init(KVMState *s); int kvm_arch_init_vcpu(CPUState *env); diff --git a/target-i386/kvm.c b/target-i386/kvm.c index feaf33dc76..016b67d5ec 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -527,7 +527,7 @@ static int kvm_init_identity_map_page(KVMState *s) return 0; } -int kvm_arch_init(KVMState *s, int smp_cpus) +int kvm_arch_init(KVMState *s) { int ret; struct utsname utsname; diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 849b404b83..3c05630fb9 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -56,7 +56,7 @@ static void kvm_kick_env(void *env) qemu_cpu_kick(env); } -int kvm_arch_init(KVMState *s, int smp_cpus) +int kvm_arch_init(KVMState *s) { #ifdef KVM_CAP_PPC_UNSET_IRQ cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index adf4a9e1e5..b177e10126 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -70,7 +70,7 @@ #define SCLP_CMDW_READ_SCP_INFO 0x00020001 #define SCLP_CMDW_READ_SCP_INFO_FORCED 0x00120001 -int kvm_arch_init(KVMState *s, int smp_cpus) +int kvm_arch_init(KVMState *s) { return 0; } diff --git a/vl.c b/vl.c index 0292184273..33f844fdbc 100644 --- a/vl.c +++ b/vl.c @@ -2836,7 +2836,7 @@ int main(int argc, char **argv, char **envp) } if (kvm_allowed) { - int ret = kvm_init(smp_cpus); + int ret = kvm_init(); if (ret < 0) { if (!kvm_available()) { printf("KVM not supported for this target\n"); From 94a8d39afd8ccfdbf578af04c3385fdb5f545af1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:17 +0100 Subject: [PATCH 26/31] kvm: Consolidate must-have capability checks Instead of splattering the code with #ifdefs and runtime checks for capabilities we cannot work without anyway, provide central test infrastructure for verifying their availability both at build and runtime. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- configure | 39 +++++++++++++++++--------- kvm-all.c | 69 +++++++++++++++++++--------------------------- kvm.h | 10 +++++++ target-i386/kvm.c | 39 ++++++-------------------- target-ppc/kvm.c | 4 +++ target-s390x/kvm.c | 4 +++ 6 files changed, 80 insertions(+), 85 deletions(-) diff --git a/configure b/configure index 9a02d1f730..4673bf04c0 100755 --- a/configure +++ b/configure @@ -1662,18 +1662,31 @@ if test "$kvm" != "no" ; then #if !defined(KVM_API_VERSION) || KVM_API_VERSION < 12 || KVM_API_VERSION > 12 #error Invalid KVM version #endif -#if !defined(KVM_CAP_USER_MEMORY) -#error Missing KVM capability KVM_CAP_USER_MEMORY -#endif -#if !defined(KVM_CAP_SET_TSS_ADDR) -#error Missing KVM capability KVM_CAP_SET_TSS_ADDR -#endif -#if !defined(KVM_CAP_DESTROY_MEMORY_REGION_WORKS) -#error Missing KVM capability KVM_CAP_DESTROY_MEMORY_REGION_WORKS -#endif -#if !defined(KVM_CAP_USER_NMI) -#error Missing KVM capability KVM_CAP_USER_NMI +EOF + must_have_caps="KVM_CAP_USER_MEMORY \ + KVM_CAP_DESTROY_MEMORY_REGION_WORKS \ + KVM_CAP_COALESCED_MMIO \ + KVM_CAP_SYNC_MMU \ + " + if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) ; then + must_have_caps="$caps \ + KVM_CAP_SET_TSS_ADDR \ + KVM_CAP_EXT_CPUID \ + KVM_CAP_CLOCKSOURCE \ + KVM_CAP_NOP_IO_DELAY \ + KVM_CAP_PV_MMU \ + KVM_CAP_MP_STATE \ + KVM_CAP_USER_NMI \ + " + fi + for c in $must_have_caps ; do + cat >> $TMPC <> $TMPC <1) printf(", "); printf("%s",$2);}'` if test "$kvmerr" != "" ; then echo -e "${kvmerr}\n\ - NOTE: To enable KVM support, update your kernel to 2.6.29+ or install \ - recent kvm-kmod from http://sourceforge.net/projects/kvm." +NOTE: To enable KVM support, update your kernel to 2.6.29+ or install \ +recent kvm-kmod from http://sourceforge.net/projects/kvm." fi fi feature_not_found "kvm" diff --git a/kvm-all.c b/kvm-all.c index 8053f92105..3a1f63b5b1 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -63,9 +63,7 @@ struct KVMState int fd; int vmfd; int coalesced_mmio; -#ifdef KVM_CAP_COALESCED_MMIO struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; -#endif int broken_set_mem_region; int migration_log; int vcpu_events; @@ -82,6 +80,12 @@ struct KVMState static KVMState *kvm_state; +static const KVMCapabilityInfo kvm_required_capabilites[] = { + KVM_CAP_INFO(USER_MEMORY), + KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS), + KVM_CAP_LAST_INFO +}; + static KVMSlot *kvm_alloc_slot(KVMState *s) { int i; @@ -227,12 +231,10 @@ int kvm_init_vcpu(CPUState *env) goto err; } -#ifdef KVM_CAP_COALESCED_MMIO if (s->coalesced_mmio && !s->coalesced_mmio_ring) { s->coalesced_mmio_ring = (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE; } -#endif ret = kvm_arch_init_vcpu(env); if (ret == 0) { @@ -401,7 +403,6 @@ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) { int ret = -ENOSYS; -#ifdef KVM_CAP_COALESCED_MMIO KVMState *s = kvm_state; if (s->coalesced_mmio) { @@ -412,7 +413,6 @@ int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone); } -#endif return ret; } @@ -420,7 +420,6 @@ int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) { int ret = -ENOSYS; -#ifdef KVM_CAP_COALESCED_MMIO KVMState *s = kvm_state; if (s->coalesced_mmio) { @@ -431,7 +430,6 @@ int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone); } -#endif return ret; } @@ -481,6 +479,18 @@ static int kvm_check_many_ioeventfds(void) #endif } +static const KVMCapabilityInfo * +kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) +{ + while (list->name) { + if (!kvm_check_extension(s, list->value)) { + return list; + } + list++; + } + return NULL; +} + static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, ram_addr_t phys_offset) { @@ -642,6 +652,7 @@ int kvm_init(void) "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n" "(see http://sourceforge.net/projects/kvm).\n"; KVMState *s; + const KVMCapabilityInfo *missing_cap; int ret; int i; @@ -685,35 +696,19 @@ int kvm_init(void) goto err; } - /* initially, KVM allocated its own memory and we had to jump through - * hooks to make phys_ram_base point to this. Modern versions of KVM - * just use a user allocated buffer so we can use regular pages - * unmodified. Make sure we have a sufficiently modern version of KVM. - */ - if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) { + missing_cap = kvm_check_extension_list(s, kvm_required_capabilites); + if (!missing_cap) { + missing_cap = + kvm_check_extension_list(s, kvm_arch_required_capabilities); + } + if (missing_cap) { ret = -EINVAL; - fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s", - upgrade_note); + fprintf(stderr, "kvm does not support %s\n%s", + missing_cap->name, upgrade_note); goto err; } - /* There was a nasty bug in < kvm-80 that prevents memory slots from being - * destroyed properly. Since we rely on this capability, refuse to work - * with any kernel without this capability. */ - if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) { - ret = -EINVAL; - - fprintf(stderr, - "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s", - upgrade_note); - goto err; - } - - s->coalesced_mmio = 0; -#ifdef KVM_CAP_COALESCED_MMIO s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO); - s->coalesced_mmio_ring = NULL; -#endif s->broken_set_mem_region = 1; #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS @@ -845,7 +840,6 @@ static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run) void kvm_flush_coalesced_mmio_buffer(void) { -#ifdef KVM_CAP_COALESCED_MMIO KVMState *s = kvm_state; if (s->coalesced_mmio_ring) { struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring; @@ -859,7 +853,6 @@ void kvm_flush_coalesced_mmio_buffer(void) ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX; } } -#endif } static void do_kvm_cpu_synchronize_state(void *_env) @@ -1059,13 +1052,7 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...) int kvm_has_sync_mmu(void) { -#ifdef KVM_CAP_SYNC_MMU - KVMState *s = kvm_state; - - return kvm_check_extension(s, KVM_CAP_SYNC_MMU); -#else - return 0; -#endif + return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU); } int kvm_has_vcpu_events(void) diff --git a/kvm.h b/kvm.h index a9717528e2..ca57517af2 100644 --- a/kvm.h +++ b/kvm.h @@ -32,6 +32,14 @@ extern int kvm_allowed; struct kvm_run; +typedef struct KVMCapabilityInfo { + const char *name; + int value; +} KVMCapabilityInfo; + +#define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP } +#define KVM_CAP_LAST_INFO { NULL, 0 } + /* external API */ int kvm_init(void); @@ -86,6 +94,8 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...); /* Arch specific hooks */ +extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; + int kvm_arch_post_run(CPUState *env, struct kvm_run *run); int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 016b67d5ec..1db8227db9 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -54,12 +54,17 @@ #define BUS_MCEERR_AO 5 #endif +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(SET_TSS_ADDR), + KVM_CAP_INFO(EXT_CPUID), + KVM_CAP_INFO(MP_STATE), + KVM_CAP_LAST_INFO +}; + static bool has_msr_star; static bool has_msr_hsave_pa; static int lm_capable_kernel; -#ifdef KVM_CAP_EXT_CPUID - static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) { struct kvm_cpuid2 *cpuid; @@ -93,10 +98,6 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, uint32_t ret = 0; uint32_t cpuid_1_edx; - if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) { - return -1U; - } - max = 1; while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) { max *= 2; @@ -140,30 +141,14 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, return ret; } -#else - -uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, - uint32_t index, int reg) -{ - return -1U; -} - -#endif - #ifdef CONFIG_KVM_PARA struct kvm_para_features { int cap; int feature; } para_features[] = { -#ifdef KVM_CAP_CLOCKSOURCE { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE }, -#endif -#ifdef KVM_CAP_NOP_IO_DELAY { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, -#endif -#ifdef KVM_CAP_PV_MMU { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, -#endif #ifdef KVM_CAP_ASYNC_PF { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, #endif @@ -542,15 +527,7 @@ int kvm_arch_init(KVMState *s) /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code * directly. In order to use vm86 mode, a TSS is needed. Since this - * must be part of guest physical memory, we need to allocate it. Older - * versions of KVM just assumed that it would be at the end of physical - * memory but that doesn't work with more than 4GB of memory. We simply - * refuse to work with those older versions of KVM. */ - ret = kvm_check_extension(s, KVM_CAP_SET_TSS_ADDR); - if (ret <= 0) { - fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n"); - return ret; - } + * must be part of guest physical memory, we need to allocate it. */ /* this address is 3 pages before the bios, and the bios should present * as unavaible memory. FIXME, need to ensure the e820 map deals with diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 3c05630fb9..710eca1dca 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -37,6 +37,10 @@ do { } while (0) #endif +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO +}; + static int cap_interrupt_unset = false; static int cap_interrupt_level = false; diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index b177e10126..38823f54f7 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -70,6 +70,10 @@ #define SCLP_CMDW_READ_SCP_INFO 0x00020001 #define SCLP_CMDW_READ_SCP_INFO_FORCED 0x00120001 +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO +}; + int kvm_arch_init(KVMState *s) { return 0; From 110761987d10c6e6983cc445618acfd158d7ce02 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:18 +0100 Subject: [PATCH 27/31] kvm: x86: Rework identity map and TSS setup for larger BIOS sizes In order to support loading BIOSes > 256K, reorder the code, adjusting the base if the kernel supports moving the identity map. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 63 ++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 1db8227db9..72f9fdf24e 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -493,27 +493,9 @@ static int kvm_get_supported_msrs(KVMState *s) return ret; } -static int kvm_init_identity_map_page(KVMState *s) -{ -#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR - int ret; - uint64_t addr = 0xfffbc000; - - if (!kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) { - return 0; - } - - ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &addr); - if (ret < 0) { - fprintf(stderr, "kvm_set_identity_map_addr: %s\n", strerror(ret)); - return ret; - } -#endif - return 0; -} - int kvm_arch_init(KVMState *s) { + uint64_t identity_base = 0xfffbc000; int ret; struct utsname utsname; @@ -525,27 +507,42 @@ int kvm_arch_init(KVMState *s) uname(&utsname); lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; - /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code - * directly. In order to use vm86 mode, a TSS is needed. Since this - * must be part of guest physical memory, we need to allocate it. */ - - /* this address is 3 pages before the bios, and the bios should present - * as unavaible memory. FIXME, need to ensure the e820 map deals with - * this? - */ /* - * Tell fw_cfg to notify the BIOS to reserve the range. + * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. + * In order to use vm86 mode, an EPT identity map and a TSS are needed. + * Since these must be part of guest physical memory, we need to allocate + * them, both by setting their start addresses in the kernel and by + * creating a corresponding e820 entry. We need 4 pages before the BIOS. + * + * Older KVM versions may not support setting the identity map base. In + * that case we need to stick with the default, i.e. a 256K maximum BIOS + * size. */ - if (e820_add_entry(0xfffbc000, 0x4000, E820_RESERVED) < 0) { - perror("e820_add_entry() table is full"); - exit(1); +#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR + if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) { + /* Allows up to 16M BIOSes. */ + identity_base = 0xfeffc000; + + ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); + if (ret < 0) { + return ret; + } } - ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000); +#endif + /* Set TSS base one page after EPT identity map. */ + ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000); if (ret < 0) { return ret; } - return kvm_init_identity_map_page(s); + /* Tell fw_cfg to notify the BIOS to reserve the range. */ + ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED); + if (ret < 0) { + fprintf(stderr, "e820_add_entry() table is full\n"); + return ret; + } + + return 0; } static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) From b0c883b5e079a4f719c43f875034e5e665ccc232 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:19 +0100 Subject: [PATCH 28/31] kvm: Flush coalesced mmio buffer on IO window exits We must flush pending mmio writes if we leave kvm_cpu_exec for an IO window. Otherwise we risk to loose those requests when migrating to a different host during that window. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 3a1f63b5b1..9976762f87 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -918,6 +918,8 @@ int kvm_cpu_exec(CPUState *env) cpu_single_env = env; kvm_arch_post_run(env, run); + kvm_flush_coalesced_mmio_buffer(); + if (ret == -EINTR || ret == -EAGAIN) { cpu_exit(env); DPRINTF("io window exit\n"); @@ -930,8 +932,6 @@ int kvm_cpu_exec(CPUState *env) abort(); } - kvm_flush_coalesced_mmio_buffer(); - ret = 0; /* exit loop */ switch (run->exit_reason) { case KVM_EXIT_IO: From b66042c722e4e7b2811a08873b1ecf2c2c1e5f0a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:20 +0100 Subject: [PATCH 29/31] kvm: Do not use qemu_fair_mutex The imbalance in the hold time of qemu_global_mutex only exists in TCG mode. In contrast to TCG VCPUs, KVM drops the global lock during guest execution. We already avoid touching the fairness lock from the IO-thread in KVM mode, so also stop using it from the VCPU threads. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpus.c b/cpus.c index 0309189bfd..4c9928e2ce 100644 --- a/cpus.c +++ b/cpus.c @@ -735,9 +735,7 @@ static sigset_t block_io_signals(void) void qemu_mutex_lock_iothread(void) { if (kvm_enabled()) { - qemu_mutex_lock(&qemu_fair_mutex); qemu_mutex_lock(&qemu_global_mutex); - qemu_mutex_unlock(&qemu_fair_mutex); } else { qemu_mutex_lock(&qemu_fair_mutex); if (qemu_mutex_trylock(&qemu_global_mutex)) { From d8f771d9124e9a295b564d47d7546d93e844b526 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:21 +0100 Subject: [PATCH 30/31] kvm: x86: Implicitly clear nmi_injected/pending on reset All CPUX86State variables before CPU_COMMON are automatically cleared on reset. Reorder nmi_injected and nmi_pending to avoid having to touch them explicitly. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/cpu.h | 6 ++++-- target-i386/kvm.c | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index a457423b4a..af701a4412 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -699,6 +699,10 @@ typedef struct CPUX86State { uint32_t smbase; int old_exception; /* exception in flight */ + /* KVM states, automatically cleared on reset */ + uint8_t nmi_injected; + uint8_t nmi_pending; + CPU_COMMON /* processor features (e.g. for CPUID insn) */ @@ -726,8 +730,6 @@ typedef struct CPUX86State { int32_t exception_injected; int32_t interrupt_injected; uint8_t soft_interrupt; - uint8_t nmi_injected; - uint8_t nmi_pending; uint8_t has_error_code; uint32_t sipi_vector; uint32_t cpuid_kvm_features; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 72f9fdf24e..b2c5ee0efe 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -435,8 +435,6 @@ void kvm_arch_reset_vcpu(CPUState *env) { env->exception_injected = -1; env->interrupt_injected = -1; - env->nmi_injected = 0; - env->nmi_pending = 0; env->xcr0 = 1; if (kvm_irqchip_in_kernel()) { env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE : From c5999bfcfdf66390c98115044cb6fd174fbcf36d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Jan 2011 21:48:22 +0100 Subject: [PATCH 31/31] kvm: x86: Only read/write MSR_KVM_ASYNC_PF_EN if supported If the kernel does not support KVM_CAP_ASYNC_PF, it also does not know about the related MSR. So skip it during state synchronization in that case. Fixes annoying kernel warnings. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index b2c5ee0efe..8e8880a92b 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -63,6 +63,9 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { static bool has_msr_star; static bool has_msr_hsave_pa; +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) +static bool has_msr_async_pf_en; +#endif static int lm_capable_kernel; static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) @@ -164,6 +167,7 @@ static int get_para_features(CPUState *env) features |= (1 << para_features[i].feature); } } + has_msr_async_pf_en = features & (1 << KVM_FEATURE_ASYNC_PF); return features; } #endif @@ -828,7 +832,10 @@ static int kvm_put_msrs(CPUState *env, int level) env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); #if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) - kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr); + if (has_msr_async_pf_en) { + kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN, + env->async_pf_en_msr); + } #endif } #ifdef KVM_CAP_MCE @@ -1064,7 +1071,9 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_KVM_SYSTEM_TIME; msrs[n++].index = MSR_KVM_WALL_CLOCK; #if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) - msrs[n++].index = MSR_KVM_ASYNC_PF_EN; + if (has_msr_async_pf_en) { + msrs[n++].index = MSR_KVM_ASYNC_PF_EN; + } #endif #ifdef KVM_CAP_MCE