From 67f85346ca9305d9fb3254ceff735ceaadeb0911 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 27 Jun 2023 16:14:06 +1000 Subject: [PATCH 1/5] icount: don't adjust virtual time backwards after warp The icount-based QEMU_CLOCK_VIRTUAL runs ahead of the RT clock at times. When warping, it is possible it is still ahead at the end of the warp, which causes icount adaptive mode to adjust it backward. This can result in the machine observing time going backwards. Prevent this by clamping adaptive adjustment to 0 at minimum. Signed-off-by: Nicholas Piggin Message-ID: <20230627061406.241847-1-npiggin@gmail.com> Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- softmmu/icount.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/softmmu/icount.c b/softmmu/icount.c index 4504433e16..a5cef9c60a 100644 --- a/softmmu/icount.c +++ b/softmmu/icount.c @@ -259,11 +259,16 @@ static void icount_warp_rt(void) warp_delta = clock - timers_state.vm_clock_warp_start; if (icount_enabled() == 2) { /* - * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too - * far ahead of real time. + * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far + * ahead of real time (it might already be ahead so careful not + * to go backwards). */ int64_t cur_icount = icount_get_locked(); int64_t delta = clock - cur_icount; + + if (delta < 0) { + delta = 0; + } warp_delta = MIN(warp_delta, delta); } qatomic_set_i64(&timers_state.qemu_icount_bias, From 5bef742cc4f0e21c80a31611af7881ba811e507f Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 23 Jun 2023 13:26:25 -0700 Subject: [PATCH 2/5] target/i386: Export MSR_ARCH_CAPABILITIES bits to guests On Intel CPUs there are certain bits in MSR_ARCH_CAPABILITIES that indicates if the CPU is not affected by a vulnerability. Without these bits guests may try to deploy the mitigation even if the CPU is not affected. Export the bits to guests that indicate immunity to hardware vulnerabilities. Signed-off-by: Pawan Gupta Message-ID: <63d85cc76d4cdc51e6c732478b81d8f13be11e5a.1687551881.git.pawan.kumar.gupta@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c0fb6b3ad9..b96a609d96 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1060,10 +1060,10 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", "ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", "taa-no", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", NULL, "fb-clear", NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "pbrsb-no", NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, .msr = { From 9fb4f5f5a1fd7619e221fd5068003c5b491b2bf0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 17 Jun 2023 01:13:34 +0200 Subject: [PATCH 3/5] target/i386: ignore ARCH_CAPABILITIES features in user mode emulation ARCH_CAPABILITIES is only accessible through a read-only MSR, so it has no impact on any user-mode operation (user-mode cannot read the MSR). So do not bother printing warnings about it in user mode emulation. Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index b96a609d96..328779874f 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1069,6 +1069,13 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .msr = { .index = MSR_IA32_ARCH_CAPABILITIES, }, + /* + * FEAT_ARCH_CAPABILITIES only affects a read-only MSR, which + * cannot be read from user mode. Therefore, it has no impact + > on any user-mode operation, and warnings about unsupported + * features do not matter. + */ + .tcg_features = ~0U, }, [FEAT_CORE_CAPABILITY] = { .type = MSR_FEATURE_WORD, From d903259dd2dbe40e007db1724dd072c5e210b3f4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 21 Jun 2023 01:17:28 +0200 Subject: [PATCH 4/5] target/i386: ignore CPL0-specific features in user mode emulation Features such as PCID are only accessible through privileged operations, and therefore have no impact on any user-mode operation. Allow reporting them to programs running under user mode emulation, so that "-cpu" can be used with more named CPU models. XSAVES would be similar, but it doesn't make sense to provide it until XSAVEC is implemented. With this change, all CPUs up to Broadwell-v4 can be emulate. Skylake-Client requires XSAVEC, while EPYC also requires SHA-NI, MISALIGNSSE and TOPOEXT. MISALIGNSSE is not hard to implement, but I am not sure it is worth using a precious hflags bit for it. Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1534 Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 83 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 7 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 328779874f..a3ddd1b613 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -623,13 +623,25 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */ /* missing: CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */ + +/* + * Kernel-only features that can be shown to usermode programs even if + * they aren't actually supported by TCG, because qemu-user only runs + * in CPL=3; remove them if they are ever implemented for system emulation. + */ +#if defined CONFIG_USER_ONLY +#define CPUID_EXT_KERNEL_FEATURES (CPUID_EXT_PCID | CPUID_EXT_TSC_DEADLINE_TIMER | \ + CPUID_EXT_X2APIC) +#else +#define CPUID_EXT_KERNEL_FEATURES 0 +#endif #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \ CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \ CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \ CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR | \ CPUID_EXT_RDRAND | CPUID_EXT_AVX | CPUID_EXT_F16C | \ - CPUID_EXT_FMA) + CPUID_EXT_FMA | CPUID_EXT_KERNEL_FEATURES) /* missing: CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, @@ -642,22 +654,63 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_EXT2_X86_64_FEATURES 0 #endif +/* + * CPUID_*_KERNEL_FEATURES denotes bits and features that are not usable + * in usermode or by 32-bit programs. Those are added to supported + * TCG features unconditionally in user-mode emulation mode. This may + * indeed seem strange or incorrect, but it works because code running + * under usermode emulation cannot access them. + * + * Even for long mode, qemu-i386 is not running "a userspace program on a + * 32-bit CPU"; it's running "a userspace program with a 32-bit code segment" + * and therefore using the 32-bit ABI; the CPU itself might be 64-bit + * but again the difference is only visible in kernel mode. + */ +#if defined CONFIG_USER_ONLY +#define CPUID_EXT2_KERNEL_FEATURES CPUID_EXT2_FFXSR +#else +#define CPUID_EXT2_KERNEL_FEATURES 0 +#endif + #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \ CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \ CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_PDPE1GB | \ - CPUID_EXT2_SYSCALL | TCG_EXT2_X86_64_FEATURES) + CPUID_EXT2_SYSCALL | TCG_EXT2_X86_64_FEATURES | \ + CPUID_EXT2_KERNEL_FEATURES) + +#if defined CONFIG_USER_ONLY +#define CPUID_EXT3_KERNEL_FEATURES CPUID_EXT3_OSVW +#else +#define CPUID_EXT3_KERNEL_FEATURES 0 +#endif + #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \ - CPUID_EXT3_3DNOWPREFETCH) + CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES) + #define TCG_EXT4_FEATURES 0 + +#if defined CONFIG_USER_ONLY +#define CPUID_SVM_KERNEL_FEATURES (CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI) +#else +#define CPUID_SVM_KERNEL_FEATURES 0 +#endif #define TCG_SVM_FEATURES (CPUID_SVM_NPT | CPUID_SVM_VGIF | \ - CPUID_SVM_SVME_ADDR_CHK) + CPUID_SVM_SVME_ADDR_CHK | CPUID_SVM_KERNEL_FEATURES) + #define TCG_KVM_FEATURES 0 + +#if defined CONFIG_USER_ONLY +#define CPUID_7_0_EBX_KERNEL_FEATURES CPUID_7_0_EBX_INVPCID +#else +#define CPUID_7_0_EBX_KERNEL_FEATURES 0 +#endif #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP | \ CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \ CPUID_7_0_EBX_PCOMMIT | CPUID_7_0_EBX_CLFLUSHOPT | \ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_FSGSBASE | \ - CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_RDSEED) + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_RDSEED | \ + CPUID_7_0_EBX_KERNEL_FEATURES) /* missing: CPUID_7_0_EBX_HLE CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM */ @@ -672,7 +725,14 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS | CPUID_7_0_ECX_VAES | \ TCG_7_0_ECX_RDPID) -#define TCG_7_0_EDX_FEATURES CPUID_7_0_EDX_FSRM +#if defined CONFIG_USER_ONLY +#define CPUID_7_0_EDX_KERNEL_FEATURES (CPUID_7_0_EDX_SPEC_CTRL | \ + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD) +#else +#define CPUID_7_0_EDX_KERNEL_FEATURES 0 +#endif +#define TCG_7_0_EDX_FEATURES (CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_KERNEL_FEATURES) + #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ CPUID_7_1_EAX_FSRC) #define TCG_7_1_EDX_FEATURES 0 @@ -686,8 +746,17 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_SGX_12_0_EBX_FEATURES 0 #define TCG_SGX_12_1_EAX_FEATURES 0 +#if defined CONFIG_USER_ONLY +#define CPUID_8000_0008_EBX_KERNEL_FEATURES (CPUID_8000_0008_EBX_IBPB | \ + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | \ + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | CPUID_8000_0008_EBX_AMD_SSBD | \ + CPUID_8000_0008_EBX_AMD_PSFD) +#else +#define CPUID_8000_0008_EBX_KERNEL_FEATURES 0 +#endif + #define TCG_8000_0008_EBX (CPUID_8000_0008_EBX_XSAVEERPTR | \ - CPUID_8000_0008_EBX_WBNOINVD) + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_KERNEL_FEATURES) FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_1_EDX] = { From 40a205da415e9c10ff02505078700e14ead77092 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 20 Jun 2023 16:49:35 +0200 Subject: [PATCH 5/5] target/i386: emulate 64-bit ring 0 for linux-user if LM feature is set 32-bit binaries can run on a long mode processor even if the kernel is 64-bit, of course, and this can have slightly different behavior; for example, SYSCALL is allowed on Intel processors. Allow reporting LM to programs running under user mode emulation, so that "-cpu" can be used with named CPU models even for qemu-i386 and even without disabling LM by hand. Fortunately, most of the runtime code in QEMU has to depend on HF_LMA_MASK or on HF_CS64_MASK (which is anyway false for qemu-i386's 32-bit code segment) rather than TARGET_X86_64, therefore all that is needed is an update of linux-user's ring 0 setup. Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1534 Signed-off-by: Paolo Bonzini --- linux-user/i386/cpu_loop.c | 57 ++++++++++++++++++------------------- target/i386/cpu.c | 15 ++++++++-- target/i386/tcg/translate.c | 6 ++-- 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c index 9eeda551ea..ef2dcb3d76 100644 --- a/linux-user/i386/cpu_loop.c +++ b/linux-user/i386/cpu_loop.c @@ -47,7 +47,7 @@ static void write_dt(void *ptr, unsigned long addr, unsigned long limit, } static uint64_t *idt_table; -#ifdef TARGET_X86_64 + static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, uint64_t addr, unsigned int sel) { @@ -60,8 +60,10 @@ static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, p[2] = tswap32(addr >> 32); p[3] = 0; } + +#ifdef TARGET_X86_64 /* only dpl matters as we do only user space emulation */ -static void set_idt(int n, unsigned int dpl) +static void set_idt(int n, unsigned int dpl, bool is64) { set_gate64(idt_table + n * 2, 0, dpl, 0, 0); } @@ -78,9 +80,13 @@ static void set_gate(void *ptr, unsigned int type, unsigned int dpl, } /* only dpl matters as we do only user space emulation */ -static void set_idt(int n, unsigned int dpl) +static void set_idt(int n, unsigned int dpl, bool is64) { - set_gate(idt_table + n, 0, dpl, 0, 0); + if (is64) { + set_gate64(idt_table + n * 2, 0, dpl, 0, 0); + } else { + set_gate(idt_table + n, 0, dpl, 0, 0); + } } #endif @@ -325,6 +331,9 @@ static void target_cpu_free(void *obj) void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) { CPUState *cpu = env_cpu(env); + bool is64 = (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) != 0; + int i; + OBJECT(cpu)->free = target_cpu_free; env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; env->hflags |= HF_PE_MASK | HF_CPL_MASK; @@ -332,15 +341,18 @@ void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) env->cr[4] |= CR4_OSFXSR_MASK; env->hflags |= HF_OSFXSR_MASK; } -#ifndef TARGET_ABI32 + /* enable 64 bit mode if possible */ - if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { + if (is64) { + env->cr[4] |= CR4_PAE_MASK; + env->efer |= MSR_EFER_LMA | MSR_EFER_LME; + env->hflags |= HF_LMA_MASK; + } +#ifndef TARGET_ABI32 + else { fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); exit(EXIT_FAILURE); } - env->cr[4] |= CR4_PAE_MASK; - env->efer |= MSR_EFER_LMA | MSR_EFER_LME; - env->hflags |= HF_LMA_MASK; #endif /* flags setup : we activate the IRQs by default as in user mode */ @@ -379,27 +391,12 @@ void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); idt_table = g2h_untagged(env->idt.base); - set_idt(0, 0); - set_idt(1, 0); - set_idt(2, 0); - set_idt(3, 3); - set_idt(4, 3); - set_idt(5, 0); - set_idt(6, 0); - set_idt(7, 0); - set_idt(8, 0); - set_idt(9, 0); - set_idt(10, 0); - set_idt(11, 0); - set_idt(12, 0); - set_idt(13, 0); - set_idt(14, 0); - set_idt(15, 0); - set_idt(16, 0); - set_idt(17, 0); - set_idt(18, 0); - set_idt(19, 0); - set_idt(0x80, 3); + for (i = 0; i < 20; i++) { + set_idt(i, 0, is64); + } + set_idt(3, 3, is64); + set_idt(4, 3, is64); + set_idt(0x80, 3, is64); /* linux segment setup */ { diff --git a/target/i386/cpu.c b/target/i386/cpu.c index a3ddd1b613..b5688cabb4 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -666,7 +666,10 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, * and therefore using the 32-bit ABI; the CPU itself might be 64-bit * but again the difference is only visible in kernel mode. */ -#if defined CONFIG_USER_ONLY +#if defined CONFIG_LINUX_USER +#define CPUID_EXT2_KERNEL_FEATURES (CPUID_EXT2_LM | CPUID_EXT2_FFXSR) +#elif defined CONFIG_USER_ONLY +/* FIXME: Long mode not yet supported for i386 bsd-user */ #define CPUID_EXT2_KERNEL_FEATURES CPUID_EXT2_FFXSR #else #define CPUID_EXT2_KERNEL_FEATURES 0 @@ -5539,7 +5542,15 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, } #ifndef TARGET_X86_64 if (w == FEAT_8000_0001_EDX) { - r &= ~CPUID_EXT2_LM; + /* + * 32-bit TCG can emulate 64-bit compatibility mode. If there is no + * way for userspace to get out of its 32-bit jail, we can leave + * the LM bit set. + */ + uint32_t unavail = tcg_enabled() + ? CPUID_EXT2_LM & ~CPUID_EXT2_KERNEL_FEATURES + : CPUID_EXT2_LM; + r &= ~unavail; } #endif if (migratable_only) { diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 28cb3fb7f4..90c7b32f36 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -173,12 +173,14 @@ typedef struct DisasContext { #endif #if !defined(TARGET_X86_64) #define CODE64(S) false -#define LMA(S) false #elif defined(CONFIG_USER_ONLY) #define CODE64(S) true -#define LMA(S) true #else #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0) +#endif +#if defined(CONFIG_SOFTMMU) && !defined(TARGET_X86_64) +#define LMA(S) false +#else #define LMA(S) (((S)->flags & HF_LMA_MASK) != 0) #endif