From a2b107dbbd342ff2077aa5af705efaf68c375459 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Mon, 26 Nov 2018 14:59:58 +0100 Subject: [PATCH 1/5] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words It was found that QMP users of QEMU (e.g. libvirt) may need HV_CPUID_ENLIGHTMENT_INFO.EAX/HV_CPUID_NESTED_FEATURES.EAX information. In particular, 'hv_tlbflush' and 'hv_evmcs' enlightenments are only exposed in HV_CPUID_ENLIGHTMENT_INFO.EAX. HV_CPUID_NESTED_FEATURES.EAX is exposed for two reasons: convenience (we don't need to export it from hyperv_handle_properties() and as future-proof for Enlightened MSR-Bitmap, PV EPT invalidation and direct virtual flush features. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Message-Id: <20181126135958.20956-1-vkuznets@redhat.com> Reviewed-by: Roman Kagan <rkagan@virtuozzo.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> --- target/i386/cpu.c | 30 +++++++++++++++++ target/i386/cpu.h | 2 ++ target/i386/kvm.c | 85 +++++++++++++++++++++++++---------------------- 3 files changed, 77 insertions(+), 40 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index fa37203d89..bd005f5374 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -980,6 +980,36 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .cpuid = { .eax = 0x40000003, .reg = R_EDX, }, }, + [FEAT_HV_RECOMM_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL /* hv_recommend_pv_as_switch */, + NULL /* hv_recommend_pv_tlbflush_local */, + NULL /* hv_recommend_pv_tlbflush_remote */, + NULL /* hv_recommend_msr_apic_access */, + NULL /* hv_recommend_msr_reset */, + NULL /* hv_recommend_relaxed_timing */, + NULL /* hv_recommend_dma_remapping */, + NULL /* hv_recommend_int_remapping */, + NULL /* hv_recommend_x2apic_msrs */, + NULL /* hv_recommend_autoeoi_deprecation */, + NULL /* hv_recommend_pv_ipi */, + NULL /* hv_recommend_ex_hypercalls */, + NULL /* hv_hypervisor_is_nested */, + NULL /* hv_recommend_int_mbec */, + NULL /* hv_recommend_evmcs */, + NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x40000004, .reg = R_EAX, }, + }, + [FEAT_HV_NESTED_EAX] = { + .type = CPUID_FEATURE_WORD, + .cpuid = { .eax = 0x4000000A, .reg = R_EAX, }, + }, [FEAT_SVM] = { .type = CPUID_FEATURE_WORD, .feat_names = { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ef41a033c5..386094a241 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -497,6 +497,8 @@ typedef enum FeatureWord { FEAT_HYPERV_EAX, /* CPUID[4000_0003].EAX */ FEAT_HYPERV_EBX, /* CPUID[4000_0003].EBX */ FEAT_HYPERV_EDX, /* CPUID[4000_0003].EDX */ + FEAT_HV_RECOMM_EAX, /* CPUID[4000_0004].EAX */ + FEAT_HV_NESTED_EAX, /* CPUID[4000_000A].EAX */ FEAT_SVM, /* CPUID[8000_000A].EDX */ FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ FEAT_6_EAX, /* CPUID[6].EAX */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 739cf8c8ea..9af4542fb8 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -798,6 +798,48 @@ static int hyperv_handle_properties(CPUState *cs) } env->features[FEAT_HYPERV_EAX] |= HV_SYNTIMERS_AVAILABLE; } + if (cpu->hyperv_relaxed_timing) { + env->features[FEAT_HV_RECOMM_EAX] |= HV_RELAXED_TIMING_RECOMMENDED; + } + if (cpu->hyperv_vapic) { + env->features[FEAT_HV_RECOMM_EAX] |= HV_APIC_ACCESS_RECOMMENDED; + } + if (cpu->hyperv_tlbflush) { + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_TLBFLUSH) <= 0) { + fprintf(stderr, "Hyper-V TLB flush support " + "(requested by 'hv-tlbflush' cpu flag) " + " is not supported by kernel\n"); + return -ENOSYS; + } + env->features[FEAT_HV_RECOMM_EAX] |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; + env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } + if (cpu->hyperv_ipi) { + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_SEND_IPI) <= 0) { + fprintf(stderr, "Hyper-V IPI send support " + "(requested by 'hv-ipi' cpu flag) " + " is not supported by kernel\n"); + return -ENOSYS; + } + env->features[FEAT_HV_RECOMM_EAX] |= HV_CLUSTER_IPI_RECOMMENDED; + env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } + if (cpu->hyperv_evmcs) { + uint16_t evmcs_version; + + if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, + (uintptr_t)&evmcs_version)) { + fprintf(stderr, "Hyper-V Enlightened VMCS " + "(requested by 'hv-evmcs' cpu flag) " + "is not supported by kernel\n"); + return -ENOSYS; + } + env->features[FEAT_HV_RECOMM_EAX] |= HV_ENLIGHTENED_VMCS_RECOMMENDED; + env->features[FEAT_HV_NESTED_EAX] = evmcs_version; + } + return 0; } @@ -879,7 +921,6 @@ int kvm_arch_init_vcpu(CPUState *cs) uint32_t unused; struct kvm_cpuid_entry2 *c; uint32_t signature[3]; - uint16_t evmcs_version; int kvm_base = KVM_CPUID_SIGNATURE; int r; Error *local_err = NULL; @@ -954,44 +995,8 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; c->function = HV_CPUID_ENLIGHTMENT_INFO; - if (cpu->hyperv_relaxed_timing) { - c->eax |= HV_RELAXED_TIMING_RECOMMENDED; - } - if (cpu->hyperv_vapic) { - c->eax |= HV_APIC_ACCESS_RECOMMENDED; - } - if (cpu->hyperv_tlbflush) { - if (kvm_check_extension(cs->kvm_state, - KVM_CAP_HYPERV_TLBFLUSH) <= 0) { - fprintf(stderr, "Hyper-V TLB flush support " - "(requested by 'hv-tlbflush' cpu flag) " - " is not supported by kernel\n"); - return -ENOSYS; - } - c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; - c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; - } - if (cpu->hyperv_ipi) { - if (kvm_check_extension(cs->kvm_state, - KVM_CAP_HYPERV_SEND_IPI) <= 0) { - fprintf(stderr, "Hyper-V IPI send support " - "(requested by 'hv-ipi' cpu flag) " - " is not supported by kernel\n"); - return -ENOSYS; - } - c->eax |= HV_CLUSTER_IPI_RECOMMENDED; - c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; - } - if (cpu->hyperv_evmcs) { - if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, - (uintptr_t)&evmcs_version)) { - fprintf(stderr, "Hyper-V Enlightened VMCS " - "(requested by 'hv-evmcs' cpu flag) " - "is not supported by kernel\n"); - return -ENOSYS; - } - c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; - } + + c->eax = env->features[FEAT_HV_RECOMM_EAX]; c->ebx = cpu->hyperv_spinlock_attempts; c = &cpuid_data.entries[cpuid_i++]; @@ -1015,7 +1020,7 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; c->function = HV_CPUID_NESTED_FEATURES; - c->eax = evmcs_version; + c->eax = env->features[FEAT_HV_NESTED_EAX]; } } From 483c6ad426dbab72d912fe4793d7d558671aa727 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Thu, 20 Dec 2018 10:07:32 -0200 Subject: [PATCH 2/5] target-i386: Reenable RDTSCP support on Opteron_G[345] CPU models CPU models The missing functionality was added ~3 years ago with the Linux commit 46896c73c1a4 ("KVM: svm: add support for RDTSCP") so reenable RDTSCP support on those CPU models. Opteron_G2 - being family 15, model 6, doesn't have RDTSCP support (the real hardware doesn't have it. K8 got RDTSCP support with the NPT models, i.e., models >= 0x40). Document the host's minimum required kernel version, while at it. Signed-off-by: Borislav Petkov <bp@suse.de> Message-ID: <20181212200803.GG6653@zn.tnic> [ehabkost: moved compat properties code to pc.c] Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> --- hw/i386/pc.c | 3 +++ qemu-doc.texi | 13 +++++++++++++ target/i386/cpu.c | 11 ++++------- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index fc65049e1d..5cac33fcb9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -112,6 +112,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; GlobalProperty pc_compat_3_1[] = { { "intel-iommu", "dma-drain", "off" }, + { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, + { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, + { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, }; const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1); diff --git a/qemu-doc.texi b/qemu-doc.texi index f7ad1dfe4b..16b955cbf9 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -37,6 +37,7 @@ * QEMU System emulator for non PC targets:: * QEMU Guest Agent:: * QEMU User space emulator:: +* System requirements:: * Implementation notes:: * Deprecated features:: * Supported build platforms:: @@ -2813,6 +2814,18 @@ Act as if the host page size was 'pagesize' bytes Run the emulation in single step mode. @end table +@node System requirements +@chapter System requirements + +@section KVM kernel module + +On x86_64 hosts, the default set of CPU features enabled by the KVM accelerator +require the host to be running Linux v4.5 or newer. + +The OpteronG[345] CPU models require KVM support for RDTSCP, which was +added with Linux 4.5 which is supported by the major distros. And even +if RHEL7 has kernel 3.10, KVM there has the required functionality there +to make it close to a 4.5 or newer kernel. @include qemu-tech.texi diff --git a/target/i386/cpu.c b/target/i386/cpu.c index bd005f5374..4cfc0daf97 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2736,7 +2736,6 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_DE | CPUID_FP87, .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = @@ -2760,9 +2759,9 @@ static X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_1_ECX] = CPUID_EXT_POPCNT | CPUID_EXT_CX16 | CPUID_EXT_MONITOR | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, + CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL | + CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, @@ -2787,10 +2786,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_SYSCALL | CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_FMA4 | CPUID_EXT3_XOP | CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | @@ -2818,10 +2816,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_SYSCALL | CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_TBM | CPUID_EXT3_FMA4 | CPUID_EXT3_XOP | CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | From ecb85fe48cacb2f8740186e81f2f38a2e02bd963 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini@redhat.com> Date: Thu, 20 Dec 2018 13:11:00 +0100 Subject: [PATCH 3/5] target/i386: Disable MPX support on named CPU models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MPX support is being phased out by Intel; GCC has dropped it, Linux is also going to do that. Even though KVM will have special code to support MPX after the kernel proper stops enabling it in XCR0, we probably also want to deprecate that in a few years. As a start, do not enable it by default for any named CPU model starting with the 4.0 machine types; this include Skylake, Icelake and Cascadelake. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20181220121100.21554-1-pbonzini@redhat.com> Reviewed-by: Wainer dos Santos Moschetta <wainersm@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> --- hw/i386/pc.c | 7 +++++++ target/i386/cpu.c | 14 +++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 5cac33fcb9..73d688f842 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -115,6 +115,13 @@ GlobalProperty pc_compat_3_1[] = { { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, + { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, + { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, }; const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 4cfc0daf97..9d4dccf020 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2326,7 +2326,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, + CPUID_7_0_EBX_SMAP, /* Missing: XSAVES (not supported by some Linux versions, * including v4.1 to v4.12). * KVM doesn't yet expose any XSAVES state save component, @@ -2373,7 +2373,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, + CPUID_7_0_EBX_SMAP, /* Missing: XSAVES (not supported by some Linux versions, * including v4.1 to v4.12). * KVM doesn't yet expose any XSAVES state save component, @@ -2418,7 +2418,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, @@ -2470,7 +2470,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL, @@ -2520,7 +2520,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | @@ -2576,7 +2576,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_INTEL_PT, + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_INTEL_PT, .features[FEAT_7_0_ECX] = CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | @@ -2631,7 +2631,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | From 258fe08bd341d2e230676228307294e41f33002c Mon Sep 17 00:00:00 2001 From: Eduardo Habkost <ehabkost@redhat.com> Date: Tue, 11 Dec 2018 17:25:27 -0200 Subject: [PATCH 4/5] x86: host-phys-bits-limit option Some downstream distributions of QEMU set host-phys-bits=on by default. This worked very well for most use cases, because phys-bits really didn't have huge consequences. The only difference was on the CPUID data seen by guests, and on the handling of reserved bits. This changed in KVM commit 855feb673640 ("KVM: MMU: Add 5 level EPT & Shadow page table support"). Now choosing a large phys-bits value for a VM has bigger impact: it will make KVM use 5-level EPT even when it's not really necessary. This means using the host phys-bits value may not be the best choice. Management software could address this problem by manually configuring phys-bits depending on the size of the VM and the amount of MMIO address space required for hotplug. But this is not trivial to implement. However, there's another workaround that would work for most cases: keep using the host phys-bits value, but only if it's smaller than 48. This patch makes this possible by introducing a new "-cpu" option: "host-phys-bits-limit". Management software or users can make sure they will always use 4-level EPT using: "host-phys-bits=on,host-phys-bits-limit=48". This behavior is still not enabled by default because QEMU doesn't enable host-phys-bits=on by default. But users, management software, or downstream distributions may choose to change their defaults using the new option. Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <20181211192527.13254-1-ehabkost@redhat.com> [ehabkost: removed test code while some issues are addressed] Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> --- target/i386/cpu.c | 5 +++++ target/i386/cpu.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 9d4dccf020..3ece83696e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5178,6 +5178,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) if (cpu->host_phys_bits) { /* The user asked for us to use the host physical bits */ cpu->phys_bits = host_phys_bits; + if (cpu->host_phys_bits_limit && + cpu->phys_bits > cpu->host_phys_bits_limit) { + cpu->phys_bits = cpu->host_phys_bits_limit; + } } /* Print a warning if the user set it to a value that's not the @@ -5765,6 +5769,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, UINT32_MAX), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 386094a241..59656a70e6 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1461,6 +1461,9 @@ struct X86CPU { /* if true override the phys_bits value with a value read from the host */ bool host_phys_bits; + /* if set, limit maximum value for phys_bits when host_phys_bits is true */ + uint8_t host_phys_bits_limit; + /* Stop SMI delivery for migration compatibility with old machines */ bool kvm_no_smi_migration; From abd5fc4c862d033a989552914149f01c9476bb16 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Fri, 21 Dec 2018 15:16:04 +0100 Subject: [PATCH 5/5] i386/kvm: add a comment explaining why .feat_names are commented out for Hyper-V feature bits Hyper-V .feat_names are, unlike hardware features, commented out and it is not obvious why we do that. Document the current status quo. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Message-Id: <20181221141604.16935-1-vkuznets@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> --- target/i386/cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3ece83696e..2f5412592d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -929,6 +929,13 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { */ .no_autoenable_flags = ~0U, }, + /* + * .feat_names are commented out for Hyper-V enlightenments because we + * don't want to have two different ways for enabling them on QEMU command + * line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require + * enabling several feature bits simultaneously, exposing these bits + * individually may just confuse guests. + */ [FEAT_HYPERV_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = {