From 79a197ab180e75838523c58973b1221ad7bf51eb Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sat, 6 Jul 2019 00:06:36 +0300 Subject: [PATCH 01/12] target/i386: kvm: Demand nested migration kernel capabilities only when vCPU may have enabled VMX Previous to this change, a vCPU exposed with VMX running on a kernel without KVM_CAP_NESTED_STATE or KVM_CAP_EXCEPTION_PAYLOAD resulted in adding a migration blocker. This was because when the code was written it was thought there is no way to reliably know if a vCPU is utilising VMX or not at runtime. However, it turns out that this can be known to some extent: In order for a vCPU to enter VMX operation it must have CR4.VMXE set. Since it was set, CR4.VMXE must remain set as long as the vCPU is in VMX operation. This is because CR4.VMXE is one of the bits set in MSR_IA32_VMX_CR4_FIXED1. There is one exception to the above statement when vCPU enters SMM mode. When a vCPU enters SMM mode, it temporarily exits VMX operation and may also reset CR4.VMXE during execution in SMM mode. When the vCPU exits SMM mode, vCPU state is restored to be in VMX operation and CR4.VMXE is restored to its original state of being set. Therefore, when the vCPU is not in SMM mode, we can infer whether VMX is being used by examining CR4.VMXE. Otherwise, we cannot know for certain but assume the worse that vCPU may utilise VMX. Summaring all the above, a vCPU may have enabled VMX in case CR4.VMXE is set or vCPU is in SMM mode. Therefore, remove migration blocker and check before migration (cpu_pre_save()) if the vCPU may have enabled VMX. If true, only then require relevant kernel capabilities. While at it, demand KVM_CAP_EXCEPTION_PAYLOAD only when the vCPU is in guest-mode and there is a pending/injected exception. Otherwise, this kernel capability is not required for proper migration. Reviewed-by: Joao Martins Signed-off-by: Liran Alon Reviewed-by: Maran Wilson Tested-by: Maran Wilson Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 22 ++++++++++++++++++++++ target/i386/kvm.c | 26 ++++++-------------------- target/i386/kvm_i386.h | 1 + target/i386/machine.c | 24 ++++++++++++++++++++---- 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 05393cf9d1..8b3dc5533e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1877,6 +1877,28 @@ static inline bool cpu_has_vmx(CPUX86State *env) return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; } +/* + * In order for a vCPU to enter VMX operation it must have CR4.VMXE set. + * Since it was set, CR4.VMXE must remain set as long as vCPU is in + * VMX operation. This is because CR4.VMXE is one of the bits set + * in MSR_IA32_VMX_CR4_FIXED1. + * + * There is one exception to above statement when vCPU enters SMM mode. + * When a vCPU enters SMM mode, it temporarily exit VMX operation and + * may also reset CR4.VMXE during execution in SMM mode. + * When vCPU exits SMM mode, vCPU state is restored to be in VMX operation + * and CR4.VMXE is restored to it's original value of being set. + * + * Therefore, when vCPU is not in SMM mode, we can infer whether + * VMX is being used by examining CR4.VMXE. Otherwise, we cannot + * know for certain. + */ +static inline bool cpu_vmx_maybe_enabled(CPUX86State *env) +{ + return cpu_has_vmx(env) && + ((env->cr[4] & CR4_VMXE_MASK) || (env->hflags & HF_SMM_MASK)); +} + /* fpu_helper.c */ void update_fp_status(CPUX86State *env); void update_mxcsr_status(CPUX86State *env); diff --git a/target/i386/kvm.c b/target/i386/kvm.c index ec7870c6af..4542f0fad0 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -128,6 +128,11 @@ bool kvm_has_adjust_clock_stable(void) return (ret == KVM_CLOCK_TSC_STABLE); } +bool kvm_has_exception_payload(void) +{ + return has_exception_payload; +} + bool kvm_allows_irq0_override(void) { return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); @@ -1342,7 +1347,6 @@ static int hyperv_init_vcpu(X86CPU *cpu) } static Error *invtsc_mig_blocker; -static Error *nested_virt_mig_blocker; #define KVM_MAX_CPUID_ENTRIES 100 @@ -1653,22 +1657,6 @@ int kvm_arch_init_vcpu(CPUState *cs) !!(c->ecx & CPUID_EXT_SMX); } - if (cpu_has_vmx(env) && !nested_virt_mig_blocker && - ((kvm_max_nested_state_length() <= 0) || !has_exception_payload)) { - error_setg(&nested_virt_mig_blocker, - "Kernel do not provide required capabilities for " - "nested virtualization migration. " - "(CAP_NESTED_STATE=%d, CAP_EXCEPTION_PAYLOAD=%d)", - kvm_max_nested_state_length() > 0, - has_exception_payload); - r = migrate_add_blocker(nested_virt_mig_blocker, &local_err); - if (local_err) { - error_report_err(local_err); - error_free(nested_virt_mig_blocker); - return r; - } - } - if (env->mcg_cap & MCG_LMCE_P) { has_msr_mcg_ext_ctl = has_msr_feature_control = true; } @@ -1683,7 +1671,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (local_err) { error_report_err(local_err); error_free(invtsc_mig_blocker); - goto fail2; + return r; } } } @@ -1752,8 +1740,6 @@ int kvm_arch_init_vcpu(CPUState *cs) fail: migrate_del_blocker(invtsc_mig_blocker); - fail2: - migrate_del_blocker(nested_virt_mig_blocker); return r; } diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h index 3057ba4f7d..06fe06bdb3 100644 --- a/target/i386/kvm_i386.h +++ b/target/i386/kvm_i386.h @@ -35,6 +35,7 @@ bool kvm_allows_irq0_override(void); bool kvm_has_smm(void); bool kvm_has_adjust_clock_stable(void); +bool kvm_has_exception_payload(void); void kvm_synchronize_all_tsc(void); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_do_init_vcpu(X86CPU *cs); diff --git a/target/i386/machine.c b/target/i386/machine.c index 704ba6de46..ac2d1d1d36 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -7,6 +7,7 @@ #include "hw/isa/isa.h" #include "migration/cpu.h" #include "hyperv.h" +#include "kvm_i386.h" #include "sysemu/kvm.h" #include "sysemu/tcg.h" @@ -232,10 +233,25 @@ static int cpu_pre_save(void *opaque) } #ifdef CONFIG_KVM - /* Verify we have nested virtualization state from kernel if required */ - if (kvm_enabled() && cpu_has_vmx(env) && !env->nested_state) { - error_report("Guest enabled nested virtualization but kernel " - "does not support saving of nested state"); + /* + * In case vCPU may have enabled VMX, we need to make sure kernel have + * required capabilities in order to perform migration correctly: + * + * 1) We must be able to extract vCPU nested-state from KVM. + * + * 2) In case vCPU is running in guest-mode and it has a pending exception, + * we must be able to determine if it's in a pending or injected state. + * Note that in case KVM don't have required capability to do so, + * a pending/injected exception will always appear as an + * injected exception. + */ + if (kvm_enabled() && cpu_vmx_maybe_enabled(env) && + (!env->nested_state || + (!kvm_has_exception_payload() && (env->hflags & HF_GUEST_MASK) && + env->exception_injected))) { + error_report("Guest maybe enabled nested virtualization but kernel " + "does not support required capabilities to save vCPU " + "nested state"); return -EINVAL; } #endif From 1e44f3ab71fb4291d266a264f7c207ae5c6d59b2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jul 2019 15:41:48 +0200 Subject: [PATCH 02/12] target/i386: skip KVM_GET/SET_NESTED_STATE if VMX disabled, or for SVM Do not allocate env->nested_state unless we later need to migrate the nested virtualization state. With this change, nested_state_needed() will return false if the VMX flag is not included in the virtual machine. KVM_GET/SET_NESTED_STATE is also disabled for SVM which is safer (we know that at least the NPT root and paging mode have to be saved/loaded), and thus the corresponding subsection can go away as well. Inspired by a patch from Liran Alon. Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 16 ++++++++-------- target/i386/machine.c | 21 +-------------------- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 4542f0fad0..ada89d27cc 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1711,15 +1711,15 @@ int kvm_arch_init_vcpu(CPUState *cs) max_nested_state_len = kvm_max_nested_state_length(); if (max_nested_state_len > 0) { assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); - env->nested_state = g_malloc0(max_nested_state_len); - env->nested_state->size = max_nested_state_len; - - if (IS_INTEL_CPU(env)) { - struct kvm_vmx_nested_state_hdr *vmx_hdr = - &env->nested_state->hdr.vmx; + if (cpu_has_vmx(env)) { + struct kvm_vmx_nested_state_hdr *vmx_hdr; + env->nested_state = g_malloc0(max_nested_state_len); + env->nested_state->size = max_nested_state_len; env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; + + vmx_hdr = &env->nested_state->hdr.vmx; vmx_hdr->vmxon_pa = -1ull; vmx_hdr->vmcs12_pa = -1ull; } @@ -3515,7 +3515,7 @@ static int kvm_put_nested_state(X86CPU *cpu) CPUX86State *env = &cpu->env; int max_nested_state_len = kvm_max_nested_state_length(); - if (max_nested_state_len <= 0) { + if (!env->nested_state) { return 0; } @@ -3529,7 +3529,7 @@ static int kvm_get_nested_state(X86CPU *cpu) int max_nested_state_len = kvm_max_nested_state_length(); int ret; - if (max_nested_state_len <= 0) { + if (!env->nested_state) { return 0; } diff --git a/target/i386/machine.c b/target/i386/machine.c index ac2d1d1d36..b1146093b5 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1035,31 +1035,13 @@ static const VMStateDescription vmstate_vmx_nested_state = { } }; -static bool svm_nested_state_needed(void *opaque) -{ - struct kvm_nested_state *nested_state = opaque; - - return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM); -} - -static const VMStateDescription vmstate_svm_nested_state = { - .name = "cpu/kvm_nested_state/svm", - .version_id = 1, - .minimum_version_id = 1, - .needed = svm_nested_state_needed, - .fields = (VMStateField[]) { - VMSTATE_END_OF_LIST() - } -}; - static bool nested_state_needed(void *opaque) { X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; return (env->nested_state && - (vmx_nested_state_needed(env->nested_state) || - svm_nested_state_needed(env->nested_state))); + vmx_nested_state_needed(env->nested_state)); } static int nested_state_post_load(void *opaque, int version_id) @@ -1121,7 +1103,6 @@ static const VMStateDescription vmstate_kvm_nested_state = { }, .subsections = (const VMStateDescription*[]) { &vmstate_vmx_nested_state, - &vmstate_svm_nested_state, NULL } }; From 12e1dc49395674960efec967ca4161428c08307a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Jul 2019 11:46:50 +0200 Subject: [PATCH 03/12] virtio-scsi: remove unused argument to virtio_scsi_common_realize The argument is not used and passing it clutters error propagation in the callers. So, get rid of it. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini --- hw/scsi/vhost-scsi.c | 2 +- hw/scsi/vhost-user-scsi.c | 2 +- hw/scsi/virtio-scsi.c | 4 ++-- include/hw/virtio/virtio-scsi.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 4090f99ee4..76bb875ca6 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -262,7 +262,7 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error **errp) vhost_dev_cleanup(&vsc->dev); g_free(vqs); - virtio_scsi_common_unrealize(dev, errp); + virtio_scsi_common_unrealize(dev); } static Property vhost_scsi_properties[] = { diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index a9fd8ea305..a0b69fbc0f 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -125,7 +125,7 @@ static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp) vhost_dev_cleanup(&vsc->dev); g_free(vqs); - virtio_scsi_common_unrealize(dev, errp); + virtio_scsi_common_unrealize(dev); vhost_user_cleanup(&s->vhost_user); } diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 61ce365fe9..d0bdbff090 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -922,7 +922,7 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) virtio_scsi_dataplane_setup(s, errp); } -void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp) +void virtio_scsi_common_unrealize(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); @@ -936,7 +936,7 @@ static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp) VirtIOSCSI *s = VIRTIO_SCSI(dev); qbus_set_hotplug_handler(BUS(&s->bus), NULL, &error_abort); - virtio_scsi_common_unrealize(dev, errp); + virtio_scsi_common_unrealize(dev); } static Property virtio_scsi_properties[] = { diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 4c0bcdb788..122f7c4b6f 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -145,7 +145,7 @@ void virtio_scsi_common_realize(DeviceState *dev, VirtIOHandleOutput cmd, Error **errp); -void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp); +void virtio_scsi_common_unrealize(DeviceState *dev); bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); From 934443c37befc9e268f5c661cb9552a2ee572e19 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 17 Jul 2019 08:46:05 +0800 Subject: [PATCH 04/12] vhost-scsi: Call virtio_scsi_common_unrealize() when device realize failed This avoids memory leak when device hotplug is failed. Signed-off-by: Xie Yongji Message-Id: <20190717004606.12444-1-xieyongji@baidu.com> Signed-off-by: Paolo Bonzini --- hw/scsi/vhost-scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 76bb875ca6..343ca8be7a 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -210,7 +210,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) if (err) { error_propagate(errp, err); error_free(vsc->migration_blocker); - goto close_fd; + goto free_virtio; } } @@ -240,6 +240,8 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) migrate_del_blocker(vsc->migration_blocker); } g_free(vsc->dev.vqs); + free_virtio: + virtio_scsi_common_unrealize(dev); close_fd: close(vhostfd); return; From 68fa7ca015dc8afb86e3aa51b31362f63048bd5c Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 17 Jul 2019 08:46:06 +0800 Subject: [PATCH 05/12] vhost-user-scsi: Call virtio_scsi_common_unrealize() when device realize failed This avoids memory leak when device hotplug is failed. Signed-off-by: Xie Yongji Message-Id: <20190717004606.12444-2-xieyongji@baidu.com> Signed-off-by: Paolo Bonzini --- hw/scsi/vhost-user-scsi.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index a0b69fbc0f..fcee67d5a7 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -87,7 +87,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) } if (!vhost_user_init(&s->vhost_user, &vs->conf.chardev, errp)) { - return; + goto free_virtio; } vsc->dev.nvqs = 2 + vs->conf.num_queues; @@ -101,15 +101,21 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) if (ret < 0) { error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s", strerror(-ret)); - vhost_user_cleanup(&s->vhost_user); - g_free(vqs); - return; + goto free_vhost; } /* Channel and lun both are 0 for bootable vhost-user-scsi disk */ vsc->channel = 0; vsc->lun = 0; vsc->target = vs->conf.boot_tpgt; + + return; + +free_vhost: + vhost_user_cleanup(&s->vhost_user); + g_free(vqs); +free_virtio: + virtio_scsi_common_unrealize(dev); } static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp) From 1849f297f5952ea60ddfd39fe02ce21cba6aa4d8 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 18 Jul 2019 11:42:36 +0200 Subject: [PATCH 06/12] scsi-generic: Check sense key before request snooping and patching When READ CAPACITY command completes, scsi_read_complete() function snoops the command result and updates SCSIDevice members blocksize and max_lba . However, this update is executed even when READ CAPACITY command indicates an error in sense data. This causes unexpected blocksize update with zero value for SCSI devices without READ CAPACITY(10) command support and eventually results in a divide by zero. An emulated device by TCMU-runner is an example of a device that doesn't support READ CAPACITY(10) command. To avoid the unexpected update, add sense key check in scsi_read_complete() function. The function already checks the sense key for VPD Block Limits emulation. Do the scsi_parse_sense_buf() call for all requests rather than just for VPD Block Limits emulation, so that blocksize and max_lba are only updated if READ CAPACITY returns zero sense key. Signed-off-by: Shin'ichiro Kawasaki [Extend the check to all requests, not just READ CAPACITY] Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-generic.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index f07891b3f6..c11a0c9a84 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -254,24 +254,28 @@ static void scsi_read_complete(void * opaque, int ret) r->len = -1; - /* - * Check if this is a VPD Block Limits request that - * resulted in sense error but would need emulation. - * In this case, emulate a valid VPD response. - */ - if (s->needs_vpd_bl_emulation && ret == 0 && - (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) && - r->req.cmd.buf[0] == INQUIRY && - (r->req.cmd.buf[1] & 0x01) && - r->req.cmd.buf[2] == 0xb0) { + if (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) { SCSISense sense = scsi_parse_sense_buf(r->req.sense, r->io_header.sb_len_wr); - if (sense.key == ILLEGAL_REQUEST) { + + /* + * Check if this is a VPD Block Limits request that + * resulted in sense error but would need emulation. + * In this case, emulate a valid VPD response. + */ + if (sense.key == ILLEGAL_REQUEST && + s->needs_vpd_bl_emulation && + r->req.cmd.buf[0] == INQUIRY && + (r->req.cmd.buf[1] & 0x01) && + r->req.cmd.buf[2] == 0xb0) { len = scsi_generic_emulate_block_limits(r, s); /* - * No need to let scsi_read_complete go on and handle an + * It's okay to jup to req_complete: no need to + * let scsi_handle_inquiry_reply handle an * INQUIRY VPD BL request we created manually. */ + } + if (sense.key) { goto req_complete; } } From 2f950b1e449818ec69ce70a19270f1a039350c2e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 18 Jul 2019 09:04:56 +0800 Subject: [PATCH 07/12] test-bitmap: add test for bitmap_set Add a test for bitmap_set. There are three cases: * Both start and end is BITS_PER_LONG aligned * Only start is BITS_PER_LONG aligned * Only end is BITS_PER_LONG aligned Signed-off-by: Wei Yang Message-Id: <20190718010456.4234-3-richardw.yang@linux.intel.com> Reviewed-by: Peter Xu Signed-off-by: Paolo Bonzini --- tests/test-bitmap.c | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/test-bitmap.c b/tests/test-bitmap.c index cb7c5e462d..18aa584591 100644 --- a/tests/test-bitmap.c +++ b/tests/test-bitmap.c @@ -59,12 +59,67 @@ static void check_bitmap_copy_with_offset(void) g_free(bmap3); } +typedef void (*bmap_set_func)(unsigned long *map, long i, long len); +static void bitmap_set_case(bmap_set_func set_func) +{ + unsigned long *bmap; + int offset; + + bmap = bitmap_new(BMAP_SIZE); + + /* Both Aligned, set bits [BITS_PER_LONG, 3*BITS_PER_LONG] */ + set_func(bmap, BITS_PER_LONG, 2 * BITS_PER_LONG); + g_assert_cmpuint(bmap[1], ==, -1ul); + g_assert_cmpuint(bmap[2], ==, -1ul); + g_assert_cmpint(find_first_bit(bmap, BITS_PER_LONG), ==, BITS_PER_LONG); + g_assert_cmpint(find_next_zero_bit(bmap, 3 * BITS_PER_LONG, BITS_PER_LONG), + ==, 3 * BITS_PER_LONG); + + for (offset = 0; offset <= BITS_PER_LONG; offset++) { + bitmap_clear(bmap, 0, BMAP_SIZE); + /* End Aligned, set bits [BITS_PER_LONG - offset, 3*BITS_PER_LONG] */ + set_func(bmap, BITS_PER_LONG - offset, 2 * BITS_PER_LONG + offset); + g_assert_cmpuint(bmap[1], ==, -1ul); + g_assert_cmpuint(bmap[2], ==, -1ul); + g_assert_cmpint(find_first_bit(bmap, BITS_PER_LONG), + ==, BITS_PER_LONG - offset); + g_assert_cmpint(find_next_zero_bit(bmap, + 3 * BITS_PER_LONG, + BITS_PER_LONG - offset), + ==, 3 * BITS_PER_LONG); + } + + for (offset = 0; offset <= BITS_PER_LONG; offset++) { + bitmap_clear(bmap, 0, BMAP_SIZE); + /* Start Aligned, set bits [BITS_PER_LONG, 3*BITS_PER_LONG + offset] */ + set_func(bmap, BITS_PER_LONG, 2 * BITS_PER_LONG + offset); + g_assert_cmpuint(bmap[1], ==, -1ul); + g_assert_cmpuint(bmap[2], ==, -1ul); + g_assert_cmpint(find_first_bit(bmap, BITS_PER_LONG), + ==, BITS_PER_LONG); + g_assert_cmpint(find_next_zero_bit(bmap, + 3 * BITS_PER_LONG + offset, + BITS_PER_LONG), + ==, 3 * BITS_PER_LONG + offset); + } + + g_free(bmap); +} + +static void check_bitmap_set(void) +{ + bitmap_set_case(bitmap_set); + bitmap_set_case(bitmap_set_atomic); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); g_test_add_func("/bitmap/bitmap_copy_with_offset", check_bitmap_copy_with_offset); + g_test_add_func("/bitmap/bitmap_set", + check_bitmap_set); g_test_run(); From 8072aae3770aed5ed1274a3d6b83a94672c6181a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 14 Jun 2019 11:52:37 +1000 Subject: [PATCH 08/12] hmp: Print if memory section is registered with an accelerator This adds an accelerator name to the "into mtree -f" to tell the user if a particular memory section is registered with the accelerator; the primary user for this is KVM and such information is useful for debugging purposes. This adds a has_memory() callback to the accelerator class allowing any accelerator to have a label in that memory tree dump. Since memory sections are passed to memory listeners and get registered in accelerators (rather than memory regions), this only prints new labels for flatviews attached to the system address space. An example: Root memory region: system 0000000000000000-0000002fffffffff (prio 0, ram): /objects/mem0 kvm 0000003000000000-0000005fffffffff (prio 0, ram): /objects/mem1 kvm 0000200000000020-000020000000003f (prio 1, i/o): virtio-pci 0000200080000000-000020008000003f (prio 0, i/o): capabilities Signed-off-by: Alexey Kardashevskiy Message-Id: <20190614015237.82463-1-aik@ozlabs.ru> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 38 ++++++++++++++++++++++++++++++++++++++ include/sysemu/accel.h | 3 +++ memory.c | 22 ++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 35ea3cb624..f450f25295 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -111,6 +111,13 @@ struct KVMState /* memory encryption */ void *memcrypt_handle; int (*memcrypt_encrypt_data)(void *handle, uint8_t *ptr, uint64_t len); + + /* For "info mtree -f" to tell if an MR is registered in KVM */ + int nr_as; + struct KVMAs { + KVMMemoryListener *ml; + AddressSpace *as; + } *as; }; KVMState *kvm_state; @@ -1159,6 +1166,14 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, kml->listener.priority = 10; memory_listener_register(&kml->listener, as); + + for (i = 0; i < s->nr_as; ++i) { + if (!s->as[i].as) { + s->as[i].as = as; + s->as[i].ml = kml; + break; + } + } } static MemoryListener kvm_io_listener = { @@ -1809,6 +1824,12 @@ static int kvm_init(MachineState *ms) s->nr_slots = 32; } + s->nr_as = kvm_check_extension(s, KVM_CAP_MULTI_ADDRESS_SPACE); + if (s->nr_as <= 1) { + s->nr_as = 1; + } + s->as = g_new0(struct KVMAs, s->nr_as); + kvm_type = qemu_opt_get(qemu_get_machine_opts(), "kvm-type"); if (mc->kvm_type) { type = mc->kvm_type(ms, kvm_type); @@ -2828,11 +2849,28 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) return r; } +static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, + hwaddr start_addr, hwaddr size) +{ + KVMState *kvm = KVM_STATE(ms->accelerator); + int i; + + for (i = 0; i < kvm->nr_as; ++i) { + if (kvm->as[i].as == as && kvm->as[i].ml) { + return NULL != kvm_lookup_matching_slot(kvm->as[i].ml, + start_addr, size); + } + } + + return false; +} + static void kvm_accel_class_init(ObjectClass *oc, void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "KVM"; ac->init_machine = kvm_init; + ac->has_memory = kvm_accel_has_memory; ac->allowed = &kvm_allowed; } diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h index 81293cdb08..89ce57e404 100644 --- a/include/sysemu/accel.h +++ b/include/sysemu/accel.h @@ -25,6 +25,7 @@ #include "qom/object.h" #include "hw/qdev-properties.h" +#include "exec/hwaddr.h" typedef struct AccelState { /*< private >*/ @@ -39,6 +40,8 @@ typedef struct AccelClass { const char *name; int (*init_machine)(MachineState *ms); void (*setup_post)(MachineState *ms, AccelState *accel); + bool (*has_memory)(MachineState *ms, AddressSpace *as, + hwaddr start_addr, hwaddr size); bool *allowed; /* * Array of global properties that would be applied when specific diff --git a/memory.c b/memory.c index d4579bbaec..5d8c9a9234 100644 --- a/memory.c +++ b/memory.c @@ -30,7 +30,9 @@ #include "sysemu/kvm.h" #include "sysemu/sysemu.h" #include "sysemu/tcg.h" +#include "sysemu/accel.h" #include "hw/qdev-properties.h" +#include "hw/boards.h" #include "migration/vmstate.h" //#define DEBUG_UNASSIGNED @@ -2999,6 +3001,8 @@ struct FlatViewInfo { int counter; bool dispatch_tree; bool owner; + AccelClass *ac; + const char *ac_name; }; static void mtree_print_flatview(gpointer key, gpointer value, @@ -3061,6 +3065,17 @@ static void mtree_print_flatview(gpointer key, gpointer value, if (fvi->owner) { mtree_print_mr_owner(mr); } + + if (fvi->ac) { + for (i = 0; i < fv_address_spaces->len; ++i) { + as = g_array_index(fv_address_spaces, AddressSpace*, i); + if (fvi->ac->has_memory(current_machine, as, + int128_get64(range->addr.start), + MR_SIZE(range->addr.size) + 1)) { + qemu_printf(" %s", fvi->ac_name); + } + } + } qemu_printf("\n"); range++; } @@ -3101,6 +3116,13 @@ void mtree_info(bool flatview, bool dispatch_tree, bool owner) }; GArray *fv_address_spaces; GHashTable *views = g_hash_table_new(g_direct_hash, g_direct_equal); + AccelClass *ac = ACCEL_GET_CLASS(current_machine->accelerator); + + if (ac->has_memory) { + fvi.ac = ac; + fvi.ac_name = current_machine->accel ? current_machine->accel : + object_class_get_name(OBJECT_CLASS(ac)); + } /* Gather all FVs in one table */ QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { From 3bf5de5240525a52a88deb3e864646763157e2ba Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 18 Jul 2019 20:39:49 +1000 Subject: [PATCH 09/12] qmp: don't emit the RESET event on wakeup Commit 1405819637f53 ("qmp: don't emit the RESET event on wakeup from S3") changed system wakeup to avoid calling qapi_event_send_reset. Commit 76ed4b18debfe ("s390/ipl: fix ipl with -no-reboot") appears to have inadvertently broken that logic. Acked-by: Cornelia Huck Signed-off-by: Nicholas Piggin Message-Id: <20190718103951.10027-2-npiggin@gmail.com> Signed-off-by: Paolo Bonzini --- vl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vl.c b/vl.c index a5808f9a02..cefe5a3968 100644 --- a/vl.c +++ b/vl.c @@ -1550,7 +1550,7 @@ void qemu_system_reset(ShutdownCause reason) } else { qemu_devices_reset(); } - if (reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) { + if (reason && reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) { qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); } cpu_synchronize_all_post_reset(); From 21e709aa071d301f154f137bffaaa19fe11e88bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 18 Jul 2019 16:04:13 +0400 Subject: [PATCH 10/12] build-sys: do no support modules on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our module system does not support Windows, because it relies on resolving symbols from the main executable. If there is enough interest in supporting modules on Windows, we could generate an import library for the executable and link with it: https://stackoverflow.com/questions/15454968/dll-plugin-that-uses-functions-defined-in-the-main-executable However, there is a small chicken egg problem, since the executable link and exports extra symbols needed by the library... Signed-off-by: Marc-André Lureau Message-Id: <20190718120413.27678-1-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- configure | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 99c64be6b7..7be0e68222 100755 --- a/configure +++ b/configure @@ -1752,7 +1752,7 @@ disabled with --disable-FEATURE, default is enabled if available: guest-agent build the QEMU Guest Agent guest-agent-msi build guest agent Windows MSI installation package pie Position Independent Executables - modules modules support + modules modules support (non-Windows) debug-tcg TCG debugging (default is disabled) debug-info debugging information sparse sparse checker @@ -2007,6 +2007,11 @@ else QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces" fi +# Our module code doesn't support Windows +if test "$modules" = "yes" && test "$mingw32" = "yes" ; then + error_exit "Modules are not available for Windows" +fi + # Static linking is not possible with modules or PIE if test "$static" = "yes" ; then if test "$modules" = "yes" ; then From 2924ab02c28ce8d32da144a6ae8bfc5a8d7e072b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Fri, 19 Jul 2019 14:12:22 +0300 Subject: [PATCH 11/12] i386: indicate that 'pconfig' feature was removed intentionally pconfig feature was added in 5131dc433df and removed in 712f807e196. This patch mark this feature as known to QEMU and removed by intentinally. This follows the convention of 9ccb9784b57 and f1a23522b03 dealing with 'osxsave' and 'ospke'. Signed-off-by: Denis V. Lunev CC: Paolo Bonzini CC: Richard Henderson CC: Eduardo Habkost Message-Id: <20190719111222.14943-1-den@openvz.org> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 805ce95247..e3320f5e92 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1083,7 +1083,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, "md-clear", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, NULL /* pconfig */, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "spec-ctrl", "stibp", NULL, "arch-capabilities", "core-capability", "ssbd", From d4b976c0a81dc625ccd05e2b3075f353170669d4 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 19 Jul 2019 12:41:18 +0200 Subject: [PATCH 12/12] target/i386: sev: fix failed message typos In these multiline messages, there were typos. Fix them -- add a missing space and remove a superfluous apostrophe. Inspired by Tom's patch. Signed-off-by: Jiri Slaby Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: qemu-trivial@nongnu.org Cc: Brijesh Singh Cc: Tom Lendacky Message-Id: <20190719104118.17735-1-jslaby@suse.cz> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 5ba1384ea1..f1423cb0c0 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -763,7 +763,7 @@ sev_guest_init(const char *id) "reduced-phys-bits", NULL); if (s->reduced_phys_bits < 1) { error_report("%s: reduced_phys_bits check failed, it should be >=1," - "' requested '%d'", __func__, s->reduced_phys_bits); + " requested '%d'", __func__, s->reduced_phys_bits); goto err; } @@ -783,7 +783,7 @@ sev_guest_init(const char *id) ret = sev_platform_ioctl(s->sev_fd, SEV_PLATFORM_STATUS, &status, &fw_error); if (ret) { - error_report("%s: failed to get platform status ret=%d" + error_report("%s: failed to get platform status ret=%d " "fw_error='%d: %s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); goto err;