diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 905fb844e4..801cff16a5 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -69,6 +69,11 @@ #define KVM_GUESTDBG_BLOCKIRQ 0 #endif +/* Default num of memslots to be allocated when VM starts */ +#define KVM_MEMSLOTS_NR_ALLOC_DEFAULT 16 +/* Default max allowed memslots if kernel reported nothing */ +#define KVM_MEMSLOTS_NR_MAX_DEFAULT 32 + struct KVMParkedVcpu { unsigned long vcpu_id; int kvm_fd; @@ -165,11 +170,62 @@ void kvm_resample_fd_notify(int gsi) } } +/** + * kvm_slots_grow(): Grow the slots[] array in the KVMMemoryListener + * + * @kml: The KVMMemoryListener* to grow the slots[] array + * @nr_slots_new: The new size of slots[] array + * + * Returns: True if the array grows larger, false otherwise. + */ +static bool kvm_slots_grow(KVMMemoryListener *kml, unsigned int nr_slots_new) +{ + unsigned int i, cur = kml->nr_slots_allocated; + KVMSlot *slots; + + if (nr_slots_new > kvm_state->nr_slots_max) { + nr_slots_new = kvm_state->nr_slots_max; + } + + if (cur >= nr_slots_new) { + /* Big enough, no need to grow, or we reached max */ + return false; + } + + if (cur == 0) { + slots = g_new0(KVMSlot, nr_slots_new); + } else { + assert(kml->slots); + slots = g_renew(KVMSlot, kml->slots, nr_slots_new); + /* + * g_renew() doesn't initialize extended buffers, however kvm + * memslots require fields to be zero-initialized. E.g. pointers, + * memory_size field, etc. + */ + memset(&slots[cur], 0x0, sizeof(slots[0]) * (nr_slots_new - cur)); + } + + for (i = cur; i < nr_slots_new; i++) { + slots[i].slot = i; + } + + kml->slots = slots; + kml->nr_slots_allocated = nr_slots_new; + trace_kvm_slots_grow(cur, nr_slots_new); + + return true; +} + +static bool kvm_slots_double(KVMMemoryListener *kml) +{ + return kvm_slots_grow(kml, kml->nr_slots_allocated * 2); +} + unsigned int kvm_get_max_memslots(void) { KVMState *s = KVM_STATE(current_accel()); - return s->nr_slots; + return s->nr_slots_max; } unsigned int kvm_get_free_memslots(void) @@ -183,25 +239,36 @@ unsigned int kvm_get_free_memslots(void) if (!s->as[i].ml) { continue; } - used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots); + used_slots = MAX(used_slots, s->as[i].ml->nr_slots_used); } kvm_slots_unlock(); - return s->nr_slots - used_slots; + return s->nr_slots_max - used_slots; } /* Called with KVMMemoryListener.slots_lock held */ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml) { - KVMState *s = kvm_state; + unsigned int n; int i; - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { if (kml->slots[i].memory_size == 0) { return &kml->slots[i]; } } + /* + * If no free slots, try to grow first by doubling. Cache the old size + * here to avoid another round of search: if the grow succeeded, it + * means slots[] now must have the existing "n" slots occupied, + * followed by one or more free slots starting from slots[n]. + */ + n = kml->nr_slots_allocated; + if (kvm_slots_double(kml)) { + return &kml->slots[n]; + } + return NULL; } @@ -222,10 +289,9 @@ static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml, hwaddr start_addr, hwaddr size) { - KVMState *s = kvm_state; int i; - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { KVMSlot *mem = &kml->slots[i]; if (start_addr == mem->start_addr && size == mem->memory_size) { @@ -267,7 +333,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, int i, ret = 0; kvm_slots_lock(); - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { KVMSlot *mem = &kml->slots[i]; if (ram >= mem->ram && ram < mem->ram + mem->memory_size) { @@ -1071,7 +1137,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml, kvm_slots_lock(); - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { mem = &kml->slots[i]; /* Discard slots that are empty or do not overlap the section */ if (!mem->memory_size || @@ -1450,7 +1516,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, } start_addr += slot_size; size -= slot_size; - kml->nr_used_slots--; + kml->nr_slots_used--; } while (size); return; } @@ -1489,7 +1555,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, ram_start_offset += slot_size; ram += slot_size; size -= slot_size; - kml->nr_used_slots++; + kml->nr_slots_used++; } while (size); } @@ -1715,12 +1781,8 @@ static void kvm_log_sync_global(MemoryListener *l, bool last_stage) /* Flush all kernel dirty addresses into KVMSlot dirty bitmap */ kvm_dirty_ring_flush(); - /* - * TODO: make this faster when nr_slots is big while there are - * only a few used slots (small VMs). - */ kvm_slots_lock(); - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { mem = &kml->slots[i]; if (mem->memory_size && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { kvm_slot_sync_dirty_pages(mem); @@ -1835,12 +1897,9 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, { int i; - kml->slots = g_new0(KVMSlot, s->nr_slots); kml->as_id = as_id; - for (i = 0; i < s->nr_slots; i++) { - kml->slots[i].slot = i; - } + kvm_slots_grow(kml, KVM_MEMSLOTS_NR_ALLOC_DEFAULT); QSIMPLEQ_INIT(&kml->transaction_add); QSIMPLEQ_INIT(&kml->transaction_del); @@ -2545,26 +2604,14 @@ static int kvm_init(MachineState *ms) goto err; } - kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); - kvm_guest_memfd_supported = - kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && - kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && - (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); - kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); - s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + s->nr_slots_max = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); /* If unspecified, use the default value */ - if (!s->nr_slots) { - s->nr_slots = 32; + if (!s->nr_slots_max) { + s->nr_slots_max = KVM_MEMSLOTS_NR_MAX_DEFAULT; } - s->nr_as = kvm_check_extension(s, KVM_CAP_MULTI_ADDRESS_SPACE); - if (s->nr_as <= 1) { - s->nr_as = 1; - } - s->as = g_new0(struct KVMAs, s->nr_as); - type = find_kvm_machine_type(ms); if (type < 0) { ret = -EINVAL; @@ -2578,6 +2625,12 @@ static int kvm_init(MachineState *ms) s->vmfd = ret; + s->nr_as = kvm_vm_check_extension(s, KVM_CAP_MULTI_ADDRESS_SPACE); + if (s->nr_as <= 1) { + s->nr_as = 1; + } + s->as = g_new0(struct KVMAs, s->nr_as); + /* check the vcpu limits */ soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -2630,7 +2683,7 @@ static int kvm_init(MachineState *ms) } kvm_readonly_mem_allowed = - (kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0); + (kvm_vm_check_extension(s, KVM_CAP_READONLY_MEM) > 0); kvm_resamplefds_allowed = (kvm_check_extension(s, KVM_CAP_IRQFD_RESAMPLE) > 0); @@ -2664,6 +2717,12 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_supported_memory_attributes = kvm_vm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_guest_memfd_supported = + kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && + kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && + (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 82c65fd2ab..e43d18a869 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -36,3 +36,4 @@ kvm_io_window_exit(void) "" kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32 kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s" kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64 +kvm_slots_grow(unsigned int old, unsigned int new) "%u -> %u" diff --git a/docs/system/i386/hyperv.rst b/docs/system/i386/hyperv.rst index 2505dc4c86..1c1de77feb 100644 --- a/docs/system/i386/hyperv.rst +++ b/docs/system/i386/hyperv.rst @@ -262,14 +262,19 @@ Supplementary features ``hv-passthrough`` In some cases (e.g. during development) it may make sense to use QEMU in 'pass-through' mode and give Windows guests all enlightenments currently - supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU - flag. + supported by KVM. Note: ``hv-passthrough`` flag only enables enlightenments which are known to QEMU (have corresponding 'hv-' flag) and copies ``hv-spinlocks`` and ``hv-vendor-id`` values from KVM to QEMU. ``hv-passthrough`` overrides all other 'hv-' settings on - the command line. Also, enabling this flag effectively prevents migration as the - list of enabled enlightenments may differ between target and destination hosts. + the command line. + + Note: ``hv-passthrough`` does not enable ``hv-syndbg`` which can prevent certain + Windows guests from booting when used without proper configuration. If needed, + ``hv-syndbg`` can be enabled additionally. + + Note: ``hv-passthrough`` effectively prevents migration as the list of enabled + enlightenments may differ between target and destination hosts. ``hv-enforce-cpuid`` By default, KVM allows the guest to use all currently supported Hyper-V @@ -278,6 +283,36 @@ Supplementary features feature alters this behavior and only allows the guest to use exposed Hyper-V enlightenments. +Recommendations +--------------- + +To achieve the best performance of Windows and Hyper-V guests and unless there +are any specific requirements (e.g. migration to older QEMU/KVM versions, +emulating specific Hyper-V version, ...), it is recommended to enable all +currently implemented Hyper-V enlightenments with the following exceptions: + +- ``hv-syndbg``, ``hv-passthrough``, ``hv-enforce-cpuid`` should not be enabled + in production configurations as these are debugging/development features. +- ``hv-reset`` can be avoided as modern Hyper-V versions don't expose it. +- ``hv-evmcs`` can (and should) be enabled on Intel CPUs only. While the feature + is only used in nested configurations (Hyper-V, WSL2), enabling it for regular + Windows guests should not have any negative effects. +- ``hv-no-nonarch-coresharing`` must only be enabled if vCPUs are properly pinned + so no non-architectural core sharing is possible. +- ``hv-vendor-id``, ``hv-version-id-build``, ``hv-version-id-major``, + ``hv-version-id-minor``, ``hv-version-id-spack``, ``hv-version-id-sbranch``, + ``hv-version-id-snumber`` can be left unchanged, guests are not supposed to + behave differently when different Hyper-V version is presented to them. +- ``hv-crash`` must only be enabled if the crash information is consumed via + QAPI by higher levels of the virtualization stack. Enabling this feature + effectively prevents Windows from creating dumps upon crashes. +- ``hv-reenlightenment`` can only be used on hardware which supports TSC + scaling or when guest migration is not needed. +- ``hv-spinlocks`` should be set to e.g. 0xfff when host CPUs are overcommited + (meaning there are other scheduled tasks or guests) and can be left unchanged + from the default value (0xffffffff) otherwise. +- ``hv-avic``/``hv-apicv`` should not be enabled if the hardware does not + support APIC virtualization (Intel APICv, AMD AVIC). Useful links ------------ diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 17483ff53b..a1e72763da 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -45,7 +45,8 @@ typedef struct KVMMemoryUpdate { typedef struct KVMMemoryListener { MemoryListener listener; KVMSlot *slots; - unsigned int nr_used_slots; + unsigned int nr_slots_used; + unsigned int nr_slots_allocated; int as_id; QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; @@ -102,8 +103,8 @@ struct KVMDirtyRingReaper { struct KVMState { AccelState parent_obj; - - int nr_slots; + /* Max number of KVM slots supported */ + int nr_slots_max; int fd; int vmfd; int coalesced_mmio; diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 0d30191482..565aad02ea 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1054,9 +1054,9 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = CPUID_FEATURE_WORD, .feat_names = { "fsgsbase", "tsc-adjust", "sgx", "bmi1", - "hle", "avx2", NULL, "smep", + "hle", "avx2", "fdp-excptn-only", "smep", "bmi2", "erms", "invpcid", "rtm", - NULL, NULL, "mpx", NULL, + NULL, "zero-fcs-fds", "mpx", NULL, "avx512f", "avx512dq", "rdseed", "adx", "smap", "avx512ifma", "pcommit", "clflushopt", "clwb", "intel-pt", "avx512pf", "avx512er", @@ -1148,8 +1148,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_7_2_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - NULL, NULL, NULL, NULL, - NULL, "mcdt-no", NULL, NULL, + "intel-psfd", "ipred-ctrl", "rrsba-ctrl", "ddpd-u", + "bhi-ctrl", "mcdt-no", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1865,9 +1865,10 @@ static inline uint64_t x86_cpu_xsave_xss_components(X86CPU *cpu) * Returns the set of feature flags that are supported and migratable by * QEMU, for a given FeatureWord. */ -static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) +static uint64_t x86_cpu_get_migratable_flags(X86CPU *cpu, FeatureWord w) { FeatureWordInfo *wi = &feature_word_info[w]; + CPUX86State *env = &cpu->env; uint64_t r = 0; int i; @@ -1881,6 +1882,12 @@ static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) r |= f; } } + + /* when tsc-khz is set explicitly, invtsc is migratable */ + if ((w == FEAT_8000_0007_EDX) && env->user_tsc_khz) { + r |= CPUID_APM_INVTSC; + } + return r; } @@ -6124,7 +6131,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w) r &= ~unavail; if (cpu && cpu->migratable) { - r &= x86_cpu_get_migratable_flags(w); + r &= x86_cpu_get_migratable_flags(cpu, w); } return r; } @@ -8292,8 +8299,10 @@ static Property x86_cpu_properties[] = { HYPERV_FEAT_TLBFLUSH_DIRECT, 0), DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU, hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF), +#ifdef CONFIG_SYNDBG DEFINE_PROP_BIT64("hv-syndbg", X86CPU, hyperv_features, HYPERV_FEAT_SYNDBG, 0), +#endif DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false), DEFINE_PROP_BOOL("hv-enforce-cpuid", X86CPU, hyperv_enforce_cpuid, false), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 4c63e7b045..74886d1580 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -533,6 +533,8 @@ typedef enum X86Seg { #define MSR_AMD64_TSC_RATIO_DEFAULT 0x100000000ULL +#define MSR_K7_HWCR 0xc0010015 + #define MSR_VM_HSAVE_PA 0xc0010117 #define MSR_IA32_XFD 0x000001c4 @@ -820,6 +822,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_EBX_HLE (1U << 4) /* Intel Advanced Vector Extensions 2 */ #define CPUID_7_0_EBX_AVX2 (1U << 5) +/* FPU data pointer updated only on x87 exceptions */ +#define CPUID_7_0_EBX_FDP_EXCPTN_ONLY (1u << 6) /* Supervisor-mode Execution Prevention */ #define CPUID_7_0_EBX_SMEP (1U << 7) /* 2nd Group of Advanced Bit Manipulation Extensions */ @@ -830,6 +834,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_EBX_INVPCID (1U << 10) /* Restricted Transactional Memory */ #define CPUID_7_0_EBX_RTM (1U << 11) +/* Zero out FPU CS and FPU DS */ +#define CPUID_7_0_EBX_ZERO_FCS_FDS (1U << 13) /* Memory Protection Extension */ #define CPUID_7_0_EBX_MPX (1U << 14) /* AVX-512 Foundation */ @@ -1854,6 +1860,9 @@ typedef struct CPUArchState { uint64_t msr_lbr_depth; LBREntry lbr_records[ARCH_LBR_NR_ENTRIES]; + /* AMD MSRC001_0015 Hardware Configuration */ + uint64_t msr_hwcr; + /* exception/interrupt handling */ int error_code; int exception_is_int; diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c index b94f12acc2..70b89cacf9 100644 --- a/target/i386/kvm/hyperv.c +++ b/target/i386/kvm/hyperv.c @@ -80,6 +80,7 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) * necessary because memory hierarchy is being changed */ async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); + cpu_exit(CPU(cpu)); return EXCP_INTERRUPT; case KVM_EXIT_HYPERV_HCALL: { diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index e6f94900f3..fd9f198892 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -165,6 +165,7 @@ static bool has_msr_ucode_rev; static bool has_msr_vmx_procbased_ctls2; static bool has_msr_perf_capabs; static bool has_msr_pkrs; +static bool has_msr_hwcr; static uint32_t has_architectural_pmu_version; static uint32_t num_architectural_pmu_gp_counters; @@ -933,6 +934,7 @@ static struct { uint32_t bits; } flags[2]; uint64_t dependencies; + bool skip_passthrough; } kvm_hyperv_properties[] = { [HYPERV_FEAT_RELAXED] = { .desc = "relaxed timing (hv-relaxed)", @@ -1055,16 +1057,15 @@ static struct { .bits = HV_DEPRECATING_AEOI_RECOMMENDED} } }, -#ifdef CONFIG_SYNDBG [HYPERV_FEAT_SYNDBG] = { .desc = "Enable synthetic kernel debugger channel (hv-syndbg)", .flags = { {.func = HV_CPUID_FEATURES, .reg = R_EDX, .bits = HV_FEATURE_DEBUG_MSRS_AVAILABLE} }, - .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_RELAXED) + .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_RELAXED), + .skip_passthrough = true, }, -#endif [HYPERV_FEAT_MSR_BITMAP] = { .desc = "enlightened MSR-Bitmap (hv-emsr-bitmap)", .flags = { @@ -1316,6 +1317,13 @@ static bool hyperv_feature_supported(CPUState *cs, int feature) uint32_t func, bits; int i, reg; + /* + * kvm_hyperv_properties needs to define at least one CPUID flag which + * must be used to detect the feature, it's hard to say whether it is + * supported or not otherwise. + */ + assert(kvm_hyperv_properties[feature].flags[0].func); + for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties[feature].flags); i++) { func = kvm_hyperv_properties[feature].flags[i].func; @@ -1465,7 +1473,8 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp) * hv_build_cpuid_leaf() uses this info to build guest CPUIDs. */ for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) { - if (hyperv_feature_supported(cs, feat)) { + if (hyperv_feature_supported(cs, feat) && + !kvm_hyperv_properties[feat].skip_passthrough) { cpu->hyperv_features |= BIT(feat); } } @@ -1838,10 +1847,12 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, int times; c->function = i; - c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | - KVM_CPUID_FLAG_STATE_READ_NEXT; cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); times = c->eax & 0xff; + if (times > 1) { + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | + KVM_CPUID_FLAG_STATE_READ_NEXT; + } for (j = 1; j < times; ++j) { if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { @@ -1864,10 +1875,6 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, case 0xb: case 0xd: for (j = 0; ; j++) { - if (i == 0xd && j == 64) { - break; - } - c->function = i; c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; c->index = j; @@ -1883,7 +1890,12 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, break; } if (i == 0xd && c->eax == 0) { - continue; + if (j < 63) { + continue; + } else { + cpuid_i--; + break; + } } if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { goto full; @@ -2574,6 +2586,8 @@ static int kvm_get_supported_msrs(KVMState *s) case MSR_IA32_PKRS: has_msr_pkrs = true; break; + case MSR_K7_HWCR: + has_msr_hwcr = true; } } } @@ -3916,6 +3930,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (has_msr_virt_ssbd) { kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, env->virt_ssbd); } + if (has_msr_hwcr) { + kvm_msr_entry_add(cpu, MSR_K7_HWCR, env->msr_hwcr); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { @@ -4016,13 +4033,11 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, env->msr_hv_tsc_emulation_status); } -#ifdef CONFIG_SYNDBG if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG) && has_msr_hv_syndbg_options) { kvm_msr_entry_add(cpu, HV_X64_MSR_SYNDBG_OPTIONS, hyperv_syndbg_query_options()); } -#endif } if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) { kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, @@ -4400,6 +4415,9 @@ static int kvm_get_msrs(X86CPU *cpu) kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0); env->tsc_valid = !runstate_is_running(); } + if (has_msr_hwcr) { + kvm_msr_entry_add(cpu, MSR_K7_HWCR, 0); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { @@ -4919,6 +4937,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31: env->lbr_records[index - MSR_ARCH_LBR_INFO_0].info = msrs[i].data; break; + case MSR_K7_HWCR: + env->msr_hwcr = msrs[i].data; + break; } } diff --git a/target/i386/machine.c b/target/i386/machine.c index 39f8294f27..b4610325aa 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1543,6 +1543,25 @@ static const VMStateDescription vmstate_msr_xfd = { } }; +static bool msr_hwcr_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->msr_hwcr != 0; +} + +static const VMStateDescription vmstate_msr_hwcr = { + .name = "cpu/msr_hwcr", + .version_id = 1, + .minimum_version_id = 1, + .needed = msr_hwcr_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.msr_hwcr, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + #ifdef TARGET_X86_64 static bool intel_fred_msrs_needed(void *opaque) { @@ -1773,6 +1792,7 @@ const VMStateDescription vmstate_x86_cpu = { &vmstate_msr_intel_sgx, &vmstate_pdptrs, &vmstate_msr_xfd, + &vmstate_msr_hwcr, #ifdef TARGET_X86_64 &vmstate_msr_fred, &vmstate_amx_xtile, diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 30be9237c3..1f19371646 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -129,6 +129,37 @@ * * (^) these are the two cases in which Intel and AMD disagree on the * primary exception class + * + * Instructions still in translate.c + * --------------------------------- + * Generation of TCG opcodes for almost all instructions is in emit.c.inc; + * this file interprets the prefixes and opcode bytes down to individual + * instruction mnemonics. There is only a handful of opcodes still using + * a switch statement to decode modrm bits 3-5 and prefixes after decoding + * is complete; these are relics of the older x86 decoder and their code + * generation is performed in translate.c. + * + * These unconverted opcodes also perform their own effective address + * generation using the gen_lea_modrm() function. + * + * There is nothing particularly complicated about them; simply, they don't + * need any nasty hacks in the decoder, and they shouldn't get in the way + * of the implementation of new x86 instructions, so they are left alone + * for the time being. + * + * x87: + * 0xD8 - 0xDF + * + * privileged/system: + * 0x0F 0x00 group 6 (SLDT, STR, LLDT, LTR, VERR, VERW) + * 0x0F 0x01 group 7 (SGDT, SIDT, LGDT, LIDT, SMSW, LMSW, INVLPG, + * MONITOR, MWAIT, CLAC, STAC, XGETBV, XSETBV, + * SWAPGS, RDTSCP) + * 0x0F 0xC7 (reg operand) group 9 (RDRAND, RDSEED, RDPID) + * + * MPX: + * 0x0F 0x1A BNDLDX, BNDMOV, BNDCL, BNDCU + * 0x0F 0x1B BNDSTX, BNDMOV, BNDMK, BNDCN */ #define X86_OP_NONE { 0 }, @@ -205,6 +236,7 @@ #define sextT0 .special = X86_SPECIAL_SExtT0, #define zextT0 .special = X86_SPECIAL_ZExtT0, #define op0_Mw .special = X86_SPECIAL_Op0_Mw, +#define btEvGv .special = X86_SPECIAL_BitTest, #define vex1 .vex_class = 1, #define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar, @@ -269,6 +301,43 @@ static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEnt } } +static void decode_group8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86GenFunc group8_gen[8] = { + NULL, NULL, NULL, NULL, + gen_BT, gen_BTS, gen_BTR, gen_BTC, + }; + int op = (get_modrm(s, env) >> 3) & 7; + entry->gen = group8_gen[op]; + if (op == 4) { + /* prevent writeback and LOCK for BT */ + entry->op1 = entry->op0; + entry->op0 = X86_TYPE_None; + entry->s0 = X86_SIZE_None; + } else { + entry->special = X86_SPECIAL_HasLock; + } +} + +static void decode_group9(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry group9_reg = + X86_OP_ENTRY0(multi0F); /* unconverted */ + static const X86OpEntry cmpxchg8b = + X86_OP_ENTRY1(CMPXCHG8B, M,q, lock p_00 cpuid(CX8)); + static const X86OpEntry cmpxchg16b = + X86_OP_ENTRY1(CMPXCHG16B, M,dq, lock p_00 cpuid(CX16)); + + int modrm = get_modrm(s, env); + int op = (modrm >> 3) & 7; + + if ((modrm >> 6) == 3) { + *entry = group9_reg; + } else if (op == 1) { + *entry = REX_W(s) ? cmpxchg16b : cmpxchg8b; + } +} + static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) { static const X86OpEntry group15_reg[8] = { @@ -1073,6 +1142,8 @@ static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entr } static const X86OpEntry opcodes_0F[256] = { + [0x00] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */ + [0x01] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */ [0x02] = X86_OP_ENTRYwr(LAR, G,v, E,w, chk(prot)), [0x03] = X86_OP_ENTRYwr(LSL, G,v, E,w, chk(prot)), [0x05] = X86_OP_ENTRY0(SYSCALL, chk(o64_intel)), @@ -1162,12 +1233,14 @@ static const X86OpEntry opcodes_0F[256] = { [0xa0] = X86_OP_ENTRYr(PUSH, FS, w), [0xa1] = X86_OP_ENTRYw(POP, FS, w), [0xa2] = X86_OP_ENTRY0(CPUID), + [0xa3] = X86_OP_ENTRYrr(BT, E,v, G,v, btEvGv), [0xa4] = X86_OP_ENTRY4(SHLD, E,v, 2op,v, G,v), [0xa5] = X86_OP_ENTRY3(SHLD, E,v, 2op,v, G,v), [0xb0] = X86_OP_ENTRY2(CMPXCHG,E,b, G,b, lock), [0xb1] = X86_OP_ENTRY2(CMPXCHG,E,v, G,v, lock), [0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None), + [0xb3] = X86_OP_ENTRY2(BTR, E,v, G,v, btEvGv), [0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None), [0xb5] = X86_OP_ENTRY3(LGS, G,v, EM,p, None, None), [0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */ @@ -1180,6 +1253,7 @@ static const X86OpEntry opcodes_0F[256] = { [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66), [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66), [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66), + [0xc7] = X86_OP_GROUP0(group9), [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), @@ -1222,6 +1296,8 @@ static const X86OpEntry opcodes_0F[256] = { [0x18] = X86_OP_ENTRY1(NOP, nop,v), /* prefetch/reserved NOP */ [0x19] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ + [0x1a] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted MPX */ + [0x1b] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted MPX */ [0x1c] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ [0x1d] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ [0x1e] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ @@ -1294,6 +1370,7 @@ static const X86OpEntry opcodes_0F[256] = { [0xa8] = X86_OP_ENTRYr(PUSH, GS, w), [0xa9] = X86_OP_ENTRYw(POP, GS, w), [0xaa] = X86_OP_ENTRY0(RSM, chk(smm) svm(RSM)), + [0xab] = X86_OP_ENTRY2(BTS, E,v, G,v, btEvGv), [0xac] = X86_OP_ENTRY4(SHRD, E,v, 2op,v, G,v), [0xad] = X86_OP_ENTRY3(SHRD, E,v, 2op,v, G,v), [0xae] = X86_OP_GROUP0(group15), @@ -1306,6 +1383,8 @@ static const X86OpEntry opcodes_0F[256] = { [0xb8] = X86_OP_GROUP0(0FB8), /* decoded as modrm, which is visible as a difference between page fault and #UD */ [0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */ + [0xba] = X86_OP_GROUP2(group8, E,v, I,b), + [0xbb] = X86_OP_ENTRY2(BTC, E,v, G,v, btEvGv), [0xbc] = X86_OP_GROUP0(0FBC), [0xbd] = X86_OP_GROUP0(0FBD), [0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */ @@ -1627,9 +1706,9 @@ static const X86OpEntry opcodes_root[256] = { [0xE2] = X86_OP_ENTRYr(LOOP, J,b), /* implicit: CX with aflag size */ [0xE3] = X86_OP_ENTRYr(JCXZ, J,b), /* implicit: CX with aflag size */ [0xE4] = X86_OP_ENTRYwr(IN, 0,b, I_unsigned,b), /* AL */ - [0xE5] = X86_OP_ENTRYwr(IN, 0,v, I_unsigned,b), /* AX/EAX */ + [0xE5] = X86_OP_ENTRYwr(IN, 0,z, I_unsigned,b), /* AX/EAX */ [0xE6] = X86_OP_ENTRYrr(OUT, 0,b, I_unsigned,b), /* AL */ - [0xE7] = X86_OP_ENTRYrr(OUT, 0,v, I_unsigned,b), /* AX/EAX */ + [0xE7] = X86_OP_ENTRYrr(OUT, 0,z, I_unsigned,b), /* AX/EAX */ [0xF1] = X86_OP_ENTRY0(INT1, svm(ICEBP)), [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0) svm(HLT)), @@ -1756,14 +1835,27 @@ static const X86OpEntry opcodes_root[256] = { [0xCE] = X86_OP_ENTRY0(INTO), [0xCF] = X86_OP_ENTRY0(IRET, chk(vm86_iopl) svm(IRET)), + /* + * x87 is nolea because it needs the address without segment base, + * in order to store it in fdp. + */ + [0xD8] = X86_OP_ENTRY1(x87, nop,v, nolea), + [0xD9] = X86_OP_ENTRY1(x87, nop,v, nolea), + [0xDA] = X86_OP_ENTRY1(x87, nop,v, nolea), + [0xDB] = X86_OP_ENTRY1(x87, nop,v, nolea), + [0xDC] = X86_OP_ENTRY1(x87, nop,v, nolea), + [0xDD] = X86_OP_ENTRY1(x87, nop,v, nolea), + [0xDE] = X86_OP_ENTRY1(x87, nop,v, nolea), + [0xDF] = X86_OP_ENTRY1(x87, nop,v, nolea), + [0xE8] = X86_OP_ENTRYr(CALL, J,z_f64), [0xE9] = X86_OP_ENTRYr(JMP, J,z_f64), [0xEA] = X86_OP_ENTRYrr(JMPF, I_unsigned,p, I_unsigned,w, chk(i64)), [0xEB] = X86_OP_ENTRYr(JMP, J,b), [0xEC] = X86_OP_ENTRYwr(IN, 0,b, 2,w), /* AL, DX */ - [0xED] = X86_OP_ENTRYwr(IN, 0,v, 2,w), /* AX/EAX, DX */ + [0xED] = X86_OP_ENTRYwr(IN, 0,z, 2,w), /* AX/EAX, DX */ [0xEE] = X86_OP_ENTRYrr(OUT, 0,b, 2,w), /* DX, AL */ - [0xEF] = X86_OP_ENTRYrr(OUT, 0,v, 2,w), /* DX, AX/EAX */ + [0xEF] = X86_OP_ENTRYrr(OUT, 0,z, 2,w), /* DX, AX/EAX */ [0xF8] = X86_OP_ENTRY0(CLC), [0xF9] = X86_OP_ENTRY0(STC), @@ -2203,8 +2295,12 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return (s->cpuid_features & CPUID_CMOV); case X86_FEAT_CLFLUSH: return (s->cpuid_features & CPUID_CLFLUSH); + case X86_FEAT_CX8: + return (s->cpuid_features & CPUID_CX8); case X86_FEAT_FXSR: return (s->cpuid_features & CPUID_FXSR); + case X86_FEAT_CX16: + return (s->cpuid_ext_features & CPUID_EXT_CX16); case X86_FEAT_F16C: return (s->cpuid_ext_features & CPUID_EXT_F16C); case X86_FEAT_FMA: @@ -2428,6 +2524,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu) CPUX86State *env = cpu_env(cpu); X86DecodedInsn decode; X86DecodeFunc decode_func = decode_root; + bool accept_lock = false; uint8_t cc_live, b; s->pc = s->base.pc_next; @@ -2587,34 +2684,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu) } } - /* Go back to old decoder for unconverted opcodes. */ - if (!(s->prefix & PREFIX_VEX)) { - if ((b & ~7) == 0xd8) { - if (!disas_insn_x87(s, cpu, b)) { - goto unknown_op; - } - return; - } - - if (b == 0x0f) { - b = x86_ldub_code(env, s); - switch (b) { - case 0x00 ... 0x01: /* mostly privileged instructions */ - case 0x1a ... 0x1b: /* MPX */ - case 0xa3: /* bt */ - case 0xab: /* bts */ - case 0xb3: /* btr */ - case 0xba ... 0xbb: /* grp8, btc */ - case 0xc7: /* grp9 */ - disas_insn_old(s, cpu, b + 0x100); - return; - default: - decode_func = do_decode_0F; - break; - } - } - } - memset(&decode, 0, sizeof(decode)); decode.cc_op = -1; decode.b = b; @@ -2666,9 +2735,10 @@ static void disas_insn(DisasContext *s, CPUState *cpu) if (decode.op[0].has_ea) { s->prefix |= PREFIX_LOCK; } - decode.e.special = X86_SPECIAL_HasLock; /* fallthrough */ case X86_SPECIAL_HasLock: + case X86_SPECIAL_BitTest: + accept_lock = decode.op[0].has_ea; break; case X86_SPECIAL_Op0_Rd: @@ -2710,10 +2780,8 @@ static void disas_insn(DisasContext *s, CPUState *cpu) break; } - if (s->prefix & PREFIX_LOCK) { - if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) { - goto illegal_op; - } + if ((s->prefix & PREFIX_LOCK) && !accept_lock) { + goto illegal_op; } if (!validate_vex(s, &decode)) { @@ -2759,9 +2827,10 @@ static void disas_insn(DisasContext *s, CPUState *cpu) if (decode.e.special != X86_SPECIAL_NoLoadEA && (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)) { - gen_load_ea(s, &decode.mem, decode.e.vex_class == 12); + gen_lea_modrm(s, &decode); } if (s->prefix & PREFIX_LOCK) { + assert(decode.op[0].has_ea && !decode.op[2].has_ea); gen_load(s, &decode, 2, s->T1); decode.e.gen(s, &decode); } else { diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index f9bf9a6041..7f23d373ea 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -114,6 +114,8 @@ typedef enum X86CPUIDFeature { X86_FEAT_CLWB, X86_FEAT_CMOV, X86_FEAT_CMPCCXADD, + X86_FEAT_CX8, + X86_FEAT_CX16, X86_FEAT_F16C, X86_FEAT_FMA, X86_FEAT_FSGSBASE, @@ -190,6 +192,9 @@ typedef enum X86InsnSpecial { /* Always locked if it has a memory operand (XCHG) */ X86_SPECIAL_Locked, + /* Like HasLock, but also operand 2 provides bit displacement into memory. */ + X86_SPECIAL_BitTest, + /* Do not load effective address in s->A0 */ X86_SPECIAL_NoLoadEA, @@ -261,12 +266,13 @@ typedef enum X86VEXSpecial { typedef struct X86OpEntry X86OpEntry; typedef struct X86DecodedInsn X86DecodedInsn; +struct DisasContext; /* Decode function for multibyte opcodes. */ -typedef void (*X86DecodeFunc)(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b); +typedef void (*X86DecodeFunc)(struct DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b); /* Code generation function. */ -typedef void (*X86GenFunc)(DisasContext *s, X86DecodedInsn *decode); +typedef void (*X86GenFunc)(struct DisasContext *s, X86DecodedInsn *decode); struct X86OpEntry { /* Based on the is_decode flags. */ @@ -313,6 +319,14 @@ typedef struct X86DecodedOp { }; } X86DecodedOp; +typedef struct AddressParts { + int def_seg; + int base; + int index; + int scale; + target_long disp; +} AddressParts; + struct X86DecodedInsn { X86OpEntry e; X86DecodedOp op[3]; @@ -330,3 +344,4 @@ struct X86DecodedInsn { uint8_t b; }; +static void gen_lea_modrm(struct DisasContext *s, X86DecodedInsn *decode); diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 9b50419918..fd17a9b1ec 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -78,9 +78,26 @@ static void gen_NM_exception(DisasContext *s) gen_exception(s, EXCP07_PREX); } -static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) +static void gen_lea_modrm(DisasContext *s, X86DecodedInsn *decode) { - TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); + AddressParts *mem = &decode->mem; + TCGv ea; + + ea = gen_lea_modrm_1(s, *mem, decode->e.vex_class == 12); + if (decode->e.special == X86_SPECIAL_BitTest) { + MemOp ot = decode->op[1].ot; + int poslen = 8 << ot; + int opn = decode->op[2].n; + TCGv ofs = tcg_temp_new(); + + /* Extract memory displacement from the second operand. */ + assert(decode->op[2].unit == X86_OP_INT && decode->op[2].ot != MO_8); + tcg_gen_sextract_tl(ofs, cpu_regs[opn], 3, poslen - 3); + tcg_gen_andi_tl(ofs, ofs, -1 << ot); + tcg_gen_add_tl(s->A0, ea, ofs); + ea = s->A0; + } + gen_lea_v_seg(s, ea, mem->def_seg, s->override); } @@ -412,6 +429,32 @@ static void prepare_update3_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op, decode->cc_op = op; } +/* Set up decode->cc_* to modify CF while keeping other flags unchanged. */ +static void prepare_update_cf(X86DecodedInsn *decode, DisasContext *s, TCGv cf) +{ + switch (s->cc_op) { + case CC_OP_ADOX: + case CC_OP_ADCOX: + decode->cc_src2 = cpu_cc_src2; + decode->cc_src = cpu_cc_src; + decode->cc_op = CC_OP_ADCOX; + break; + + case CC_OP_EFLAGS: + case CC_OP_ADCX: + decode->cc_src = cpu_cc_src; + decode->cc_op = CC_OP_ADCX; + break; + + default: + decode->cc_src = tcg_temp_new(); + gen_mov_eflags(s, decode->cc_src); + decode->cc_op = CC_OP_ADCX; + break; + } + decode->cc_dst = cf; +} + static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs) { MemOp ot = decode->op[0].ot; @@ -1390,6 +1433,109 @@ static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode) tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ); } +static TCGv gen_bt_mask(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + TCGv mask = tcg_temp_new(); + + tcg_gen_andi_tl(s->T1, s->T1, (8 << ot) - 1); + tcg_gen_shl_tl(mask, tcg_constant_tl(1), s->T1); + return mask; +} + +/* Expects truncated bit index in s->T1, 1 << s->T1 in MASK. */ +static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv mask) +{ + TCGv cf; + + /* + * C is the result of the test, Z is unchanged, and the others + * are all undefined. + */ + switch (s->cc_op) { + case CC_OP_DYNAMIC: + case CC_OP_CLR: + case CC_OP_EFLAGS: + case CC_OP_ADCX: + case CC_OP_ADOX: + case CC_OP_ADCOX: + /* Generate EFLAGS and replace the C bit. */ + cf = tcg_temp_new(); + tcg_gen_setcond_tl(TCG_COND_TSTNE, cf, src, mask); + prepare_update_cf(decode, s, cf); + break; + default: + /* + * Z was going to be computed from the non-zero status of CC_DST. + * We can get that same Z value (and the new C value) by leaving + * CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the + * same width. + */ + decode->cc_src = tcg_temp_new(); + decode->cc_dst = cpu_cc_dst; + decode->cc_op = ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB; + tcg_gen_shr_tl(decode->cc_src, src, s->T1); + break; + } +} + +static void gen_BT(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv mask = gen_bt_mask(s, decode); + + gen_bt_flags(s, decode, s->T0, mask); +} + +static void gen_BTC(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv old = tcg_temp_new(); + TCGv mask = gen_bt_mask(s, decode); + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_fetch_xor_tl(old, s->A0, mask, s->mem_index, ot | MO_LE); + } else { + tcg_gen_mov_tl(old, s->T0); + tcg_gen_xor_tl(s->T0, s->T0, mask); + } + + gen_bt_flags(s, decode, old, mask); +} + +static void gen_BTR(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv old = tcg_temp_new(); + TCGv mask = gen_bt_mask(s, decode); + + if (s->prefix & PREFIX_LOCK) { + TCGv maskc = tcg_temp_new(); + tcg_gen_not_tl(maskc, mask); + tcg_gen_atomic_fetch_and_tl(old, s->A0, maskc, s->mem_index, ot | MO_LE); + } else { + tcg_gen_mov_tl(old, s->T0); + tcg_gen_andc_tl(s->T0, s->T0, mask); + } + + gen_bt_flags(s, decode, old, mask); +} + +static void gen_BTS(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv old = tcg_temp_new(); + TCGv mask = gen_bt_mask(s, decode); + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_fetch_or_tl(old, s->A0, mask, s->mem_index, ot | MO_LE); + } else { + tcg_gen_mov_tl(old, s->T0); + tcg_gen_or_tl(s->T0, s->T0, mask); + } + + gen_bt_flags(s, decode, old, mask); +} + static void gen_BZHI(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1642,6 +1788,102 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) decode->cc_op = CC_OP_SUBB + ot; } +static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode) +{ +#ifdef TARGET_X86_64 + MemOp mop = MO_TE | MO_128 | MO_ALIGN; + TCGv_i64 t0, t1; + TCGv_i128 cmp, val; + + cmp = tcg_temp_new_i128(); + val = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); + + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); + } else { + tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); + } + + tcg_gen_extr_i128_i64(s->T0, s->T1, val); + + /* Determine success after the fact. */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]); + tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]); + tcg_gen_or_i64(t0, t0, t1); + + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1); + + /* + * Extract the result values for the register pair. We may do this + * unconditionally, because on success (Z=1), the old value matches + * the previous value in RDX:RAX. + */ + tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0); + tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1); +#else + abort(); +#endif +} + +static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 cmp, val, old; + TCGv Z; + + cmp = tcg_temp_new_i64(); + val = tcg_temp_new_i64(); + old = tcg_temp_new_i64(); + + /* Construct the comparison values from the register pair. */ + tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); + + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); + } else { + tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, + s->mem_index, MO_TEUQ); + } + + /* Set tmp0 to match the required value of Z. */ + tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); + Z = tcg_temp_new(); + tcg_gen_trunc_i64_tl(Z, cmp); + + /* + * Extract the result values for the register pair. + * For 32-bit, we may do this unconditionally, because on success (Z=1), + * the old value matches the previous value in EDX:EAX. For x86_64, + * the store must be conditional, because we must leave the source + * registers unchanged on success, and zero-extend the writeback + * on failure (Z=0). + */ + if (TARGET_LONG_BITS == 32) { + tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old); + } else { + TCGv zero = tcg_constant_tl(0); + + tcg_gen_extr_i64_tl(s->T0, s->T1, old); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero, + s->T0, cpu_regs[R_EAX]); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero, + s->T1, cpu_regs[R_EDX]); + } + + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1); +} + static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 3b8fd827e1..02ae6a0d1f 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -695,7 +695,6 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, sa.env = env; sa.ra = 0; - sa.mmu_index = cpu_mmu_index_kernel(env); if (type == 5) { /* task gate */ @@ -705,7 +704,9 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, } shift = switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip); if (has_error_code) { - /* push the error code */ + /* push the error code on the destination stack */ + cpl = env->hflags & HF_CPL_MASK; + sa.mmu_index = x86_mmu_index_pl(env, cpl); if (env->segs[R_SS].flags & DESC_B_MASK) { sa.sp_mask = 0xffffffff; } else { @@ -750,6 +751,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, if (e2 & DESC_C_MASK) { dpl = cpl; } + sa.mmu_index = x86_mmu_index_pl(env, dpl); if (dpl < cpl) { /* to inner privilege */ uint32_t esp; @@ -1001,7 +1003,7 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, sa.env = env; sa.ra = 0; - sa.mmu_index = cpu_mmu_index_kernel(env); + sa.mmu_index = x86_mmu_index_pl(env, dpl); sa.sp_mask = -1; sa.ss_base = 0; if (dpl < cpl || ist != 0) { @@ -1135,7 +1137,7 @@ static void do_interrupt_real(CPUX86State *env, int intno, int is_int, sa.sp = env->regs[R_ESP]; sa.sp_mask = 0xffff; sa.ss_base = env->segs[R_SS].base; - sa.mmu_index = cpu_mmu_index_kernel(env); + sa.mmu_index = x86_mmu_index_pl(env, 0); if (is_int) { old_eip = next_eip; @@ -1599,7 +1601,7 @@ void helper_lcall_real(CPUX86State *env, uint32_t new_cs, uint32_t new_eip, sa.sp = env->regs[R_ESP]; sa.sp_mask = get_sp_mask(env->segs[R_SS].flags); sa.ss_base = env->segs[R_SS].base; - sa.mmu_index = cpu_mmu_index_kernel(env); + sa.mmu_index = x86_mmu_index_pl(env, 0); if (shift) { pushl(&sa, env->segs[R_CS].selector); @@ -1639,9 +1641,9 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, sa.env = env; sa.ra = GETPC(); - sa.mmu_index = cpu_mmu_index_kernel(env); if (e2 & DESC_S_MASK) { + /* "normal" far call, no stack switch possible */ if (!(e2 & DESC_CS_MASK)) { raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC()); } @@ -1665,6 +1667,7 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, raise_exception_err_ra(env, EXCP0B_NOSEG, new_cs & 0xfffc, GETPC()); } + sa.mmu_index = x86_mmu_index_pl(env, cpl); #ifdef TARGET_X86_64 /* XXX: check 16/32 bit cases in long mode */ if (shift == 2) { @@ -1792,6 +1795,7 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, if (!(e2 & DESC_C_MASK) && dpl < cpl) { /* to inner privilege */ + sa.mmu_index = x86_mmu_index_pl(env, dpl); #ifdef TARGET_X86_64 if (shift == 2) { ss = dpl; /* SS = NULL selector with RPL = new CPL */ @@ -1870,6 +1874,7 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, new_stack = 1; } else { /* to same privilege */ + sa.mmu_index = x86_mmu_index_pl(env, cpl); sa.sp = env->regs[R_ESP]; sa.sp_mask = get_sp_mask(env->segs[R_SS].flags); sa.ss_base = env->segs[R_SS].base; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 98f5fe61ed..ef190416b4 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -29,6 +29,7 @@ #include "exec/helper-proto.h" #include "exec/helper-gen.h" #include "helper-tcg.h" +#include "decode-new.h" #include "exec/log.h" @@ -693,11 +694,6 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign) return dst; } -static void gen_exts(MemOp ot, TCGv reg) -{ - gen_ext_tl(reg, reg, ot, true); -} - static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1) { TCGv tmp = gen_ext_tl(NULL, cpu_regs[R_ECX], s->aflag, false); @@ -1525,14 +1521,6 @@ static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s) /* Decompose an address. */ -typedef struct AddressParts { - int def_seg; - int base; - int index; - int scale; - target_long disp; -} AddressParts; - static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s, int modrm, bool is_vsib) { @@ -1691,24 +1679,11 @@ static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a, bool is_vsib) return ea; } -static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) -{ - AddressParts a = gen_lea_modrm_0(env, s, modrm, false); - TCGv ea = gen_lea_modrm_1(s, a, false); - gen_lea_v_seg(s, ea, a.def_seg, s->override); -} - -static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm) -{ - (void)gen_lea_modrm_0(env, s, modrm, false); -} - /* Used for BNDCL, BNDCU, BNDCN. */ -static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, +static void gen_bndck(DisasContext *s, X86DecodedInsn *decode, TCGCond cond, TCGv_i64 bndv) { - AddressParts a = gen_lea_modrm_0(env, s, modrm, false); - TCGv ea = gen_lea_modrm_1(s, a, false); + TCGv ea = gen_lea_modrm_1(s, decode->mem, false); tcg_gen_extu_tl_i64(s->tmp1_i64, ea); if (!CODE64(s)) { @@ -1720,8 +1695,9 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, } /* generate modrm load of memory or register. */ -static void gen_ld_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) +static void gen_ld_modrm(DisasContext *s, X86DecodedInsn *decode, MemOp ot) { + int modrm = s->modrm; int mod, rm; mod = (modrm >> 6) & 3; @@ -1729,14 +1705,15 @@ static void gen_ld_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) if (mod == 3) { gen_op_mov_v_reg(s, ot, s->T0, rm); } else { - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); gen_op_ld_v(s, ot, s->T0, s->A0); } } /* generate modrm store of memory or register. */ -static void gen_st_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) +static void gen_st_modrm(DisasContext *s, X86DecodedInsn *decode, MemOp ot) { + int modrm = s->modrm; int mod, rm; mod = (modrm >> 6) & 3; @@ -1744,7 +1721,7 @@ static void gen_st_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) if (mod == 3) { gen_op_mov_reg_v(s, ot, rm, s->T0); } else { - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); gen_op_st_v(s, ot, s->T0, s->A0); } } @@ -2312,129 +2289,32 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align) tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop); } -static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm) +#include "emit.c.inc" + +static void gen_x87(DisasContext *s, X86DecodedInsn *decode) { - TCGv_i64 cmp, val, old; - TCGv Z; - - gen_lea_modrm(env, s, modrm); - - cmp = tcg_temp_new_i64(); - val = tcg_temp_new_i64(); - old = tcg_temp_new_i64(); - - /* Construct the comparison values from the register pair. */ - tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); - tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); - - /* Only require atomic with LOCK; non-parallel handled in generator. */ - if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); - } else { - tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, - s->mem_index, MO_TEUQ); - } - - /* Set tmp0 to match the required value of Z. */ - tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); - Z = tcg_temp_new(); - tcg_gen_trunc_i64_tl(Z, cmp); - - /* - * Extract the result values for the register pair. - * For 32-bit, we may do this unconditionally, because on success (Z=1), - * the old value matches the previous value in EDX:EAX. For x86_64, - * the store must be conditional, because we must leave the source - * registers unchanged on success, and zero-extend the writeback - * on failure (Z=0). - */ - if (TARGET_LONG_BITS == 32) { - tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old); - } else { - TCGv zero = tcg_constant_tl(0); - - tcg_gen_extr_i64_tl(s->T0, s->T1, old); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero, - s->T0, cpu_regs[R_EAX]); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero, - s->T1, cpu_regs[R_EDX]); - } - - /* Update Z. */ - gen_compute_eflags(s); - tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1); -} - -#ifdef TARGET_X86_64 -static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm) -{ - MemOp mop = MO_TE | MO_128 | MO_ALIGN; - TCGv_i64 t0, t1; - TCGv_i128 cmp, val; - - gen_lea_modrm(env, s, modrm); - - cmp = tcg_temp_new_i128(); - val = tcg_temp_new_i128(); - tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); - tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); - - /* Only require atomic with LOCK; non-parallel handled in generator. */ - if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); - } else { - tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); - } - - tcg_gen_extr_i128_i64(s->T0, s->T1, val); - - /* Determine success after the fact. */ - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]); - tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]); - tcg_gen_or_i64(t0, t0, t1); - - /* Update Z. */ - gen_compute_eflags(s); - tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0); - tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1); - - /* - * Extract the result values for the register pair. We may do this - * unconditionally, because on success (Z=1), the old value matches - * the previous value in RDX:RAX. - */ - tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0); - tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1); -} -#endif - -static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) -{ - CPUX86State *env = cpu_env(cpu); bool update_fip = true; - int modrm, mod, rm, op; + int b = decode->b; + int modrm = s->modrm; + int mod, rm, op; if (s->flags & (HF_EM_MASK | HF_TS_MASK)) { /* if CR0.EM or CR0.TS are set, generate an FPU exception */ /* XXX: what to do if illegal op ? */ gen_exception(s, EXCP07_PREX); - return true; + return; } - modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3; rm = modrm & 7; op = ((b & 7) << 3) | ((modrm >> 3) & 7); if (mod != 3) { /* memory op */ - AddressParts a = gen_lea_modrm_0(env, s, modrm, false); - TCGv ea = gen_lea_modrm_1(s, a, false); + TCGv ea = gen_lea_modrm_1(s, decode->mem, false); TCGv last_addr = tcg_temp_new(); bool update_fdp = true; tcg_gen_mov_tl(last_addr, ea); - gen_lea_v_seg(s, ea, a.def_seg, s->override); + gen_lea_v_seg(s, ea, decode->mem.def_seg, s->override); switch (op) { case 0x00 ... 0x07: /* fxxxs */ @@ -2624,11 +2504,11 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_helper_fpop(tcg_env); break; default: - return false; + goto illegal_op; } if (update_fdp) { - int last_seg = s->override >= 0 ? s->override : a.def_seg; + int last_seg = s->override >= 0 ? s->override : decode->mem.def_seg; tcg_gen_ld_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, @@ -2665,7 +2545,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) update_fip = false; break; default: - return false; + goto illegal_op; } break; case 0x0c: /* grp d9/4 */ @@ -2684,7 +2564,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_helper_fxam_ST0(tcg_env); break; default: - return false; + goto illegal_op; } break; case 0x0d: /* grp d9/5 */ @@ -2719,7 +2599,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_helper_fldz_ST0(tcg_env); break; default: - return false; + goto illegal_op; } } break; @@ -2821,7 +2701,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_helper_fpop(tcg_env); break; default: - return false; + goto illegal_op; } break; case 0x1c: @@ -2841,7 +2721,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) case 4: /* fsetpm (287 only, just do nop here) */ break; default: - return false; + goto illegal_op; } break; case 0x1d: /* fucomi */ @@ -2893,7 +2773,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_helper_fpop(tcg_env); break; default: - return false; + goto illegal_op; } break; case 0x38: /* ffreep sti, undocumented op */ @@ -2908,7 +2788,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); break; default: - return false; + goto illegal_op; } break; case 0x3d: /* fucomip */ @@ -2955,7 +2835,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) } break; default: - return false; + goto illegal_op; } } @@ -2967,49 +2847,29 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) tcg_gen_st_tl(eip_cur_tl(s), tcg_env, offsetof(CPUX86State, fpip)); } - return true; + return; illegal_op: gen_illegal_opcode(s); - return true; } -static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) +static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode) { - CPUX86State *env = cpu_env(cpu); int prefixes = s->prefix; MemOp dflag = s->dflag; + int b = decode->b + 0x100; + int modrm = s->modrm; MemOp ot; - int modrm, reg, rm, mod, op, val; + int reg, rm, mod, op; /* now check op code */ switch (b) { - case 0x1c7: /* cmpxchg8b */ - modrm = x86_ldub_code(env, s); + case 0x1c7: /* RDSEED, RDPID with f3 prefix */ mod = (modrm >> 6) & 3; switch ((modrm >> 3) & 7) { - case 1: /* CMPXCHG8, CMPXCHG16 */ - if (mod == 3) { - goto illegal_op; - } -#ifdef TARGET_X86_64 - if (dflag == MO_64) { - if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) { - goto illegal_op; - } - gen_cmpxchg16b(s, env, modrm); - break; - } -#endif - if (!(s->cpuid_features & CPUID_CX8)) { - goto illegal_op; - } - gen_cmpxchg8b(s, env, modrm); - break; - - case 7: /* RDSEED, RDPID with f3 prefix */ + case 7: if (mod != 3 || - (s->prefix & (PREFIX_LOCK | PREFIX_REPNZ))) { + (s->prefix & PREFIX_REPNZ)) { goto illegal_op; } if (s->prefix & PREFIX_REPZ) { @@ -3029,7 +2889,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) case 6: /* RDRAND */ if (mod != 3 || - (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) || + (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) || !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) { goto illegal_op; } @@ -3046,148 +2906,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } break; - /************************/ - /* bit operations */ - case 0x1ba: /* bt/bts/btr/btc Gv, im */ - ot = dflag; - modrm = x86_ldub_code(env, s); - op = (modrm >> 3) & 7; - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - if (mod != 3) { - s->rip_offset = 1; - gen_lea_modrm(env, s, modrm); - if (!(s->prefix & PREFIX_LOCK)) { - gen_op_ld_v(s, ot, s->T0, s->A0); - } - } else { - gen_op_mov_v_reg(s, ot, s->T0, rm); - } - /* load shift */ - val = x86_ldub_code(env, s); - tcg_gen_movi_tl(s->T1, val); - if (op < 4) - goto unknown_op; - op -= 4; - goto bt_op; - case 0x1a3: /* bt Gv, Ev */ - op = 0; - goto do_btx; - case 0x1ab: /* bts */ - op = 1; - goto do_btx; - case 0x1b3: /* btr */ - op = 2; - goto do_btx; - case 0x1bb: /* btc */ - op = 3; - do_btx: - ot = dflag; - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(s, MO_32, s->T1, reg); - if (mod != 3) { - AddressParts a = gen_lea_modrm_0(env, s, modrm, false); - /* specific case: we need to add a displacement */ - gen_exts(ot, s->T1); - tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot); - tcg_gen_shli_tl(s->tmp0, s->tmp0, ot); - tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a, false), s->tmp0); - gen_lea_v_seg(s, s->A0, a.def_seg, s->override); - if (!(s->prefix & PREFIX_LOCK)) { - gen_op_ld_v(s, ot, s->T0, s->A0); - } - } else { - gen_op_mov_v_reg(s, ot, s->T0, rm); - } - bt_op: - tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1); - tcg_gen_movi_tl(s->tmp0, 1); - tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1); - if (s->prefix & PREFIX_LOCK) { - switch (op) { - case 0: /* bt */ - /* Needs no atomic ops; we suppressed the normal - memory load for LOCK above so do it now. */ - gen_op_ld_v(s, ot, s->T0, s->A0); - break; - case 1: /* bts */ - tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0, - s->mem_index, ot | MO_LE); - break; - case 2: /* btr */ - tcg_gen_not_tl(s->tmp0, s->tmp0); - tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0, - s->mem_index, ot | MO_LE); - break; - default: - case 3: /* btc */ - tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0, - s->mem_index, ot | MO_LE); - break; - } - tcg_gen_shr_tl(s->tmp4, s->T0, s->T1); - } else { - tcg_gen_shr_tl(s->tmp4, s->T0, s->T1); - switch (op) { - case 0: /* bt */ - /* Data already loaded; nothing to do. */ - break; - case 1: /* bts */ - tcg_gen_or_tl(s->T0, s->T0, s->tmp0); - break; - case 2: /* btr */ - tcg_gen_andc_tl(s->T0, s->T0, s->tmp0); - break; - default: - case 3: /* btc */ - tcg_gen_xor_tl(s->T0, s->T0, s->tmp0); - break; - } - if (op != 0) { - if (mod != 3) { - gen_op_st_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_reg_v(s, ot, rm, s->T0); - } - } - } - - /* Delay all CC updates until after the store above. Note that - C is the result of the test, Z is unchanged, and the others - are all undefined. */ - switch (s->cc_op) { - case CC_OP_MULB ... CC_OP_MULQ: - case CC_OP_ADDB ... CC_OP_ADDQ: - case CC_OP_ADCB ... CC_OP_ADCQ: - case CC_OP_SUBB ... CC_OP_SUBQ: - case CC_OP_SBBB ... CC_OP_SBBQ: - case CC_OP_LOGICB ... CC_OP_LOGICQ: - case CC_OP_INCB ... CC_OP_INCQ: - case CC_OP_DECB ... CC_OP_DECQ: - case CC_OP_SHLB ... CC_OP_SHLQ: - case CC_OP_SARB ... CC_OP_SARQ: - case CC_OP_BMILGB ... CC_OP_BMILGQ: - case CC_OP_POPCNT: - /* Z was going to be computed from the non-zero status of CC_DST. - We can get that same Z value (and the new C value) by leaving - CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the - same width. */ - tcg_gen_mov_tl(cpu_cc_src, s->tmp4); - set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB); - break; - default: - /* Otherwise, generate EFLAGS and replace the C bit. */ - gen_compute_eflags(s); - tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4, - ctz32(CC_C), 1); - break; - } - break; case 0x100: - modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3; op = (modrm >> 3) & 7; switch(op) { @@ -3201,14 +2920,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, ldt.selector)); ot = mod == 3 ? dflag : MO_16; - gen_st_modrm(env, s, modrm, ot); + gen_st_modrm(s, decode, ot); break; case 2: /* lldt */ if (!PE(s) || VM86(s)) goto illegal_op; if (check_cpl0(s)) { gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE); - gen_ld_modrm(env, s, modrm, MO_16); + gen_ld_modrm(s, decode, MO_16); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_lldt(tcg_env, s->tmp2_i32); } @@ -3223,14 +2942,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, tr.selector)); ot = mod == 3 ? dflag : MO_16; - gen_st_modrm(env, s, modrm, ot); + gen_st_modrm(s, decode, ot); break; case 3: /* ltr */ if (!PE(s) || VM86(s)) goto illegal_op; if (check_cpl0(s)) { gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE); - gen_ld_modrm(env, s, modrm, MO_16); + gen_ld_modrm(s, decode, MO_16); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_ltr(tcg_env, s->tmp2_i32); } @@ -3239,7 +2958,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) case 5: /* verw */ if (!PE(s) || VM86(s)) goto illegal_op; - gen_ld_modrm(env, s, modrm, MO_16); + gen_ld_modrm(s, decode, MO_16); gen_update_cc_op(s); if (op == 4) { gen_helper_verr(tcg_env, s->T0); @@ -3249,19 +2968,18 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) assume_cc_op(s, CC_OP_EFLAGS); break; default: - goto unknown_op; + goto illegal_op; } break; case 0x101: - modrm = x86_ldub_code(env, s); switch (modrm) { CASE_MODRM_MEM_OP(0): /* sgdt */ if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) { break; } gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ); - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, gdt.limit)); gen_op_st_v(s, MO_16, s->T0, s->A0); @@ -3317,7 +3035,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; } gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ); - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, idt.limit)); gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); @@ -3331,8 +3049,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) case 0xd0: /* xgetbv */ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 - || (s->prefix & (PREFIX_LOCK | PREFIX_DATA - | PREFIX_REPZ | PREFIX_REPNZ))) { + || (s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { goto illegal_op; } tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); @@ -3342,8 +3059,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) case 0xd1: /* xsetbv */ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 - || (s->prefix & (PREFIX_LOCK | PREFIX_DATA - | PREFIX_REPZ | PREFIX_REPNZ))) { + || (s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { goto illegal_op; } gen_svm_check_intercept(s, SVM_EXIT_XSETBV); @@ -3467,7 +3183,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; } gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE); - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); gen_op_ld_v(s, MO_16, s->T1, s->A0); gen_add_A0_im(s, 2); gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0); @@ -3483,7 +3199,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; } gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE); - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); gen_op_ld_v(s, MO_16, s->T1, s->A0); gen_add_A0_im(s, 2); gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0); @@ -3507,11 +3223,10 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) */ mod = (modrm >> 6) & 3; ot = (mod != 3 ? MO_16 : s->dflag); - gen_st_modrm(env, s, modrm, ot); + gen_st_modrm(s, decode, ot); break; case 0xee: /* rdpkru */ - if (s->prefix & (PREFIX_LOCK | PREFIX_DATA - | PREFIX_REPZ | PREFIX_REPNZ)) { + if (s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ)) { goto illegal_op; } tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); @@ -3519,8 +3234,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64); break; case 0xef: /* wrpkru */ - if (s->prefix & (PREFIX_LOCK | PREFIX_DATA - | PREFIX_REPZ | PREFIX_REPNZ)) { + if (s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ)) { goto illegal_op; } tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], @@ -3534,7 +3248,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; } gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0); - gen_ld_modrm(env, s, modrm, MO_16); + gen_ld_modrm(s, decode, MO_16); /* * Only the 4 lower bits of CR0 are modified. * PE cannot be set to zero if already set to one. @@ -3552,7 +3266,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; } gen_svm_check_intercept(s, SVM_EXIT_INVLPG); - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); gen_helper_flush_page(tcg_env, s->A0); s->base.is_jmp = DISAS_EOB_NEXT; break; @@ -3585,33 +3299,30 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; default: - goto unknown_op; + goto illegal_op; } break; case 0x11a: - modrm = x86_ldub_code(env, s); if (s->flags & HF_MPX_EN_MASK) { mod = (modrm >> 6) & 3; reg = ((modrm >> 3) & 7) | REX_R(s); if (prefixes & PREFIX_REPZ) { /* bndcl */ if (reg >= 4 - || (prefixes & PREFIX_LOCK) || s->aflag == MO_16) { goto illegal_op; } - gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]); + gen_bndck(s, decode, TCG_COND_LTU, cpu_bndl[reg]); } else if (prefixes & PREFIX_REPNZ) { /* bndcu */ if (reg >= 4 - || (prefixes & PREFIX_LOCK) || s->aflag == MO_16) { goto illegal_op; } TCGv_i64 notu = tcg_temp_new_i64(); tcg_gen_not_i64(notu, cpu_bndu[reg]); - gen_bndck(env, s, modrm, TCG_COND_GTU, notu); + gen_bndck(s, decode, TCG_COND_GTU, notu); } else if (prefixes & PREFIX_DATA) { /* bndmov -- from reg/mem */ if (reg >= 4 || s->aflag == MO_16) { @@ -3619,7 +3330,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } if (mod == 3) { int reg2 = (modrm & 7) | REX_B(s); - if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) { + if (reg2 >= 4) { goto illegal_op; } if (s->flags & HF_MPX_IU_MASK) { @@ -3627,7 +3338,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]); } } else { - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); if (CODE64(s)) { tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0, s->mem_index, MO_LEUQ); @@ -3646,9 +3357,8 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } } else if (mod != 3) { /* bndldx */ - AddressParts a = gen_lea_modrm_0(env, s, modrm, false); + AddressParts a = decode->mem; if (reg >= 4 - || (prefixes & PREFIX_LOCK) || s->aflag == MO_16 || a.base < -1) { goto illegal_op; @@ -3676,21 +3386,18 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) gen_set_hflag(s, HF_MPX_IU_MASK); } } - gen_nop_modrm(env, s, modrm); break; case 0x11b: - modrm = x86_ldub_code(env, s); if (s->flags & HF_MPX_EN_MASK) { mod = (modrm >> 6) & 3; reg = ((modrm >> 3) & 7) | REX_R(s); if (mod != 3 && (prefixes & PREFIX_REPZ)) { /* bndmk */ if (reg >= 4 - || (prefixes & PREFIX_LOCK) || s->aflag == MO_16) { goto illegal_op; } - AddressParts a = gen_lea_modrm_0(env, s, modrm, false); + AddressParts a = decode->mem; if (a.base >= 0) { tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]); if (!CODE64(s)) { @@ -3703,7 +3410,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) /* rip-relative generates #ud */ goto illegal_op; } - tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a, false)); + tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, decode->mem, false)); if (!CODE64(s)) { tcg_gen_ext32u_tl(s->A0, s->A0); } @@ -3714,11 +3421,10 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } else if (prefixes & PREFIX_REPNZ) { /* bndcn */ if (reg >= 4 - || (prefixes & PREFIX_LOCK) || s->aflag == MO_16) { goto illegal_op; } - gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]); + gen_bndck(s, decode, TCG_COND_GTU, cpu_bndu[reg]); } else if (prefixes & PREFIX_DATA) { /* bndmov -- to reg/mem */ if (reg >= 4 || s->aflag == MO_16) { @@ -3726,7 +3432,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } if (mod == 3) { int reg2 = (modrm & 7) | REX_B(s); - if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) { + if (reg2 >= 4) { goto illegal_op; } if (s->flags & HF_MPX_IU_MASK) { @@ -3734,7 +3440,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]); } } else { - gen_lea_modrm(env, s, modrm); + gen_lea_modrm(s, decode); if (CODE64(s)) { tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0, s->mem_index, MO_LEUQ); @@ -3751,9 +3457,8 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } } else if (mod != 3) { /* bndstx */ - AddressParts a = gen_lea_modrm_0(env, s, modrm, false); + AddressParts a = decode->mem; if (reg >= 4 - || (prefixes & PREFIX_LOCK) || s->aflag == MO_16 || a.base < -1) { goto illegal_op; @@ -3778,7 +3483,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } } } - gen_nop_modrm(env, s, modrm); break; default: g_assert_not_reached(); @@ -3787,12 +3491,8 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) illegal_op: gen_illegal_opcode(s); return; - unknown_op: - gen_unknown_opcode(env, s); } -#include "decode-new.h" -#include "emit.c.inc" #include "decode-new.c.inc" void tcg_x86_init(void) @@ -4000,15 +3700,9 @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) case 2: /* Restore state that may affect the next instruction. */ dc->pc = dc->base.pc_next; - /* - * TODO: These save/restore can be removed after the table-based - * decoder is complete; we will be decoding the insn completely - * before any code generation that might affect these variables. - */ - dc->cc_op_dirty = orig_cc_op_dirty; - dc->cc_op = orig_cc_op; - dc->pc_save = orig_pc_save; - /* END TODO */ + assert(dc->cc_op_dirty == orig_cc_op_dirty); + assert(dc->cc_op == orig_cc_op); + assert(dc->pc_save == orig_pc_save); dc->base.num_insns--; tcg_remove_ops_after(dc->prev_insn_end); dc->base.insn_start = dc->prev_insn_start; diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index a5d57197a4..27bccc14e5 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -565,6 +565,20 @@ static bool tcg_target_const_match(int64_t val, int ct, } if (ct & TCG_CT_CONST_CMP) { + if (is_tst_cond(cond)) { + if (is_const_p16(uval) >= 0) { + return true; /* TMxx */ + } + if (risbg_mask(uval)) { + return true; /* RISBG */ + } + return false; + } + + if (type == TCG_TYPE_I32) { + return true; + } + switch (cond) { case TCG_COND_EQ: case TCG_COND_NE: @@ -584,13 +598,7 @@ static bool tcg_target_const_match(int64_t val, int ct, break; case TCG_COND_TSTNE: case TCG_COND_TSTEQ: - if (is_const_p16(uval) >= 0) { - return true; /* TMxx */ - } - if (risbg_mask(uval)) { - return true; /* RISBG */ - } - break; + /* checked above, fallthru */ default: g_assert_not_reached(); } @@ -3231,9 +3239,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_rotl_i64: case INDEX_op_rotr_i32: case INDEX_op_rotr_i64: + return C_O1_I2(r, r, ri); case INDEX_op_setcond_i32: case INDEX_op_negsetcond_i32: - return C_O1_I2(r, r, ri); case INDEX_op_setcond_i64: case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC);