From c2bf2ccb266dc9ae4a6da75b845f54535417e109 Mon Sep 17 00:00:00 2001 From: Artyom Kunakovsky Date: Thu, 23 May 2024 08:11:18 +0300 Subject: [PATCH 01/24] configure: move -mcx16 flag out of CPU_CFLAGS The point of CPU_CFLAGS is really just to select the appropriate multilib, for example for library linking tests, and -mcx16 is not needed for that purpose. Furthermore, if -mcx16 is part of QEMU's choice of a basic x86_64 instruction set, it should be applied to cross-compiled x86_64 code too; it is plausible that tests/tcg would want to cover cmpxchg16b as well, for example. In the end this makes just as much sense as a per sub-build tweak, so move the flag to meson.build and cross_cc_cflags_x86_64. This leaves out contrib/plugins, which would fail when attempting to use __sync_val_compare_and_swap_16 (note it does not do yet); while minor, this *is* a disadvantage of this change. But building contrib/plugins with a Makefile instead of meson.build is something self-inflicted just for the sake of showing that it can be done, and if this kind of papercut started becoming a problem we could make the directory part of the meson build. Until then, we can live with the limitation. Signed-off-by: Artyom Kunakovsky Message-ID: <20240523051118.29367-1-artyomkunakovsky@gmail.com> [rewrite commit message, remove from configure. - Paolo] Signed-off-by: Paolo Bonzini --- configure | 7 ++----- meson.build | 7 +++++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/configure b/configure index 38ee257701..4d01a42ba6 100755 --- a/configure +++ b/configure @@ -512,10 +512,7 @@ case "$cpu" in cpu="x86_64" host_arch=x86_64 linux_arch=x86 - # ??? Only extremely old AMD cpus do not have cmpxchg16b. - # If we truly care, we should simply detect this case at - # runtime and generate the fallback to serial emulation. - CPU_CFLAGS="-m64 -mcx16" + CPU_CFLAGS="-m64" ;; esac @@ -1203,7 +1200,7 @@ fi : ${cross_cc_cflags_sparc64="-m64 -mcpu=ultrasparc"} : ${cross_cc_sparc="$cross_cc_sparc64"} : ${cross_cc_cflags_sparc="-m32 -mcpu=supersparc"} -: ${cross_cc_cflags_x86_64="-m64"} +: ${cross_cc_cflags_x86_64="-m64 -mcx16"} compute_target_variable() { eval "$2=" diff --git a/meson.build b/meson.build index a9de71d450..7fd82b5f48 100644 --- a/meson.build +++ b/meson.build @@ -336,6 +336,13 @@ if host_arch == 'i386' and not cc.links(''' qemu_common_flags = ['-march=i486'] + qemu_common_flags endif +# ??? Only extremely old AMD cpus do not have cmpxchg16b. +# If we truly care, we should simply detect this case at +# runtime and generate the fallback to serial emulation. +if host_arch == 'x86_64' + qemu_common_flags = ['-mcx16'] + qemu_common_flags +endif + if get_option('prefer_static') qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static' endif From 8225bff7c5db504f50e54ef66b079854635dba70 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 May 2024 17:17:47 +0200 Subject: [PATCH 02/24] target/i386: disable jmp_opt if EFLAGS.RF is 1 If EFLAGS.RF is 1, special processing in gen_eob_worker() is needed and therefore goto_tb cannot be used. Suggested-by: Richard Henderson Reviewed-by: Richard Henderson Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 76be742580..ebcff8766c 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4660,7 +4660,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->cpuid_7_1_eax_features = env->features[FEAT_7_1_EAX]; dc->cpuid_xsave_features = env->features[FEAT_XSAVE]; dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) || - (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK))); + (flags & (HF_RF_MASK | HF_TF_MASK | HF_INHIBIT_IRQ_MASK))); /* * If jmp_opt, we want to handle each string instruction individually. * For icount also disable repz optimization so that each iteration From f0f0136abba688a6516647a79cc91e03fad6d5d7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 25 May 2024 10:03:22 +0200 Subject: [PATCH 03/24] target/i386: no single-step exception after MOV or POP SS Intel SDM 18.3.1.4 "If an occurrence of the MOV or POP instruction loads the SS register executes with EFLAGS.TF = 1, no single-step debug exception occurs following the MOV or POP instruction." Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index ebcff8766c..9782250b20 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2273,7 +2273,7 @@ gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) if (recheck_tf) { gen_helper_rechecking_single_step(tcg_env); tcg_gen_exit_tb(NULL, 0); - } else if (s->flags & HF_TF_MASK) { + } else if ((s->flags & HF_TF_MASK) && !inhibit) { gen_helper_single_step(tcg_env); } else if (jr && /* give irqs a chance to happen */ From 69d728126223941de298b34798eb433be28b2734 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 18:43:44 +0200 Subject: [PATCH 04/24] target/i386: cleanup eob handling of RSM gen_helper_rsm cannot generate an exception, and reloads the flags. So there's no need to spill cc_op and update cpu_eip, but on the other hand cc_op must be reset to CC_OP_EFLAGS before returning. It all works by chance, because by spilling cc_op before the call to the helper, it becomes non-dirty and gen_eob will not overwrite the CC_OP_EFLAGS value that is placed there by the helper. But let's clean it up. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 9782250b20..849864d1aa 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4488,9 +4488,8 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) /* we should not be in SMM mode */ g_assert_not_reached(); #else - gen_update_cc_op(s); - gen_update_eip_next(s); gen_helper_rsm(tcg_env); + set_cc_op(s, CC_OP_EFLAGS); #endif /* CONFIG_USER_ONLY */ s->base.is_jmp = DISAS_EOB_ONLY; break; From f6ac77eab66e8e8e6ba04d58bc262b60698ecd1d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 18:38:02 +0200 Subject: [PATCH 05/24] target/i386: remove unnecessary gen_update_cc_op before gen_eob* This is already handled in gen_eob(). Before adding another DISAS_* case, remove the double calls. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 849864d1aa..920d854c2b 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4775,14 +4775,12 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_jmp_rel_csize(dc, 0, 0); break; case DISAS_EOB_NEXT: - gen_update_cc_op(dc); gen_update_eip_cur(dc); /* fall through */ case DISAS_EOB_ONLY: gen_eob(dc); break; case DISAS_EOB_INHIBIT_IRQ: - gen_update_cc_op(dc); gen_update_eip_cur(dc); gen_eob_inhibit_irq(dc); break; From a0625efd4d61a7e1e3f0435aa6a83c6330b72b9a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 23:08:40 +0200 Subject: [PATCH 06/24] target/i386: cpu_load_eflags already sets cc_op No need to set it again at the end of the translation block, cc_op_dirty can be set to false. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 2 +- target/i386/tcg/translate.c | 37 ++++++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index c78e35b1e2..3f2ae0aa7e 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1883,7 +1883,7 @@ static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_helper_iret_protected(tcg_env, tcg_constant_i32(s->dflag - 1), eip_next_i32(s)); } - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); s->base.is_jmp = DISAS_EOB_ONLY; } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 920d854c2b..25c973e20c 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -309,7 +309,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_POPCNT] = USES_CC_SRC, }; -static void set_cc_op(DisasContext *s, CCOp op) +static void set_cc_op_1(DisasContext *s, CCOp op, bool dirty) { int dead; @@ -332,20 +332,27 @@ static void set_cc_op(DisasContext *s, CCOp op) tcg_gen_discard_tl(s->cc_srcT); } - if (op == CC_OP_DYNAMIC) { - /* The DYNAMIC setting is translator only, and should never be - stored. Thus we always consider it clean. */ - s->cc_op_dirty = false; - } else { - /* Discard any computed CC_OP value (see shifts). */ - if (s->cc_op == CC_OP_DYNAMIC) { - tcg_gen_discard_i32(cpu_cc_op); - } - s->cc_op_dirty = true; + if (dirty && s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_discard_i32(cpu_cc_op); } + s->cc_op_dirty = dirty; s->cc_op = op; } +static void set_cc_op(DisasContext *s, CCOp op) +{ + /* + * The DYNAMIC setting is translator only, everything else + * will be spilled later. + */ + set_cc_op_1(s, op, op != CC_OP_DYNAMIC); +} + +static void assume_cc_op(DisasContext *s, CCOp op) +{ + set_cc_op_1(s, op, false); +} + static void gen_update_cc_op(DisasContext *s) { if (s->cc_op_dirty) { @@ -3554,6 +3561,10 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) gen_update_cc_op(s); gen_update_eip_cur(s); gen_helper_syscall(tcg_env, cur_insn_len_i32(s)); + /* condition codes are modified only in long mode */ + if (LMA(s)) { + assume_cc_op(s, CC_OP_EFLAGS); + } /* TF handling for the syscall insn is different. The TF bit is checked after the syscall insn completes. This allows #DB to not be generated after one has entered CPL0 if TF is set in FMASK. */ @@ -3570,7 +3581,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) gen_helper_sysret(tcg_env, tcg_constant_i32(dflag - 1)); /* condition codes are modified only in long mode */ if (LMA(s)) { - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } /* TF handling for the sysret insn is different. The TF bit is checked after the sysret insn completes. This allows #DB to be @@ -4489,7 +4500,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) g_assert_not_reached(); #else gen_helper_rsm(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); #endif /* CONFIG_USER_ONLY */ s->base.is_jmp = DISAS_EOB_ONLY; break; From abdcc5c8eff0879c76aeb9f16d0c13044bfecbda Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 23:04:28 +0200 Subject: [PATCH 07/24] target/i386: set CC_OP in helpers if they want CC_OP_EFLAGS Mark cc_op as clean and do not spill it at the end of the translation block. Technically this is a tiny bit less efficient, but: * it results in translations that are a tiny bit smaller * for most of these instructions, it is not unlikely that they are close to the end of the basic block, in which case cc_op would not be overwritten * anyway the cost is probably dwarfed by that of computing flags. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/ops_sse.h | 8 ++++++++ target/i386/tcg/emit.c.inc | 22 +++++++++++----------- target/i386/tcg/fpu_helper.c | 2 ++ target/i386/tcg/int_helper.c | 13 +++++++++---- target/i386/tcg/seg_helper.c | 16 ++++++++-------- target/i386/tcg/translate.c | 12 ++++++------ 6 files changed, 44 insertions(+), 29 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 6a465a35fd..f0aa1894aa 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -1111,6 +1111,7 @@ void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) s1 = s->ZMM_S(0); ret = float32_compare_quiet(s0, s1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; + CC_OP = CC_OP_EFLAGS; } void helper_comiss(CPUX86State *env, Reg *d, Reg *s) @@ -1122,6 +1123,7 @@ void helper_comiss(CPUX86State *env, Reg *d, Reg *s) s1 = s->ZMM_S(0); ret = float32_compare(s0, s1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; + CC_OP = CC_OP_EFLAGS; } void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) @@ -1133,6 +1135,7 @@ void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) d1 = s->ZMM_D(0); ret = float64_compare_quiet(d0, d1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; + CC_OP = CC_OP_EFLAGS; } void helper_comisd(CPUX86State *env, Reg *d, Reg *s) @@ -1144,6 +1147,7 @@ void helper_comisd(CPUX86State *env, Reg *d, Reg *s) d1 = s->ZMM_D(0); ret = float64_compare(d0, d1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; + CC_OP = CC_OP_EFLAGS; } #endif @@ -1610,6 +1614,7 @@ void glue(helper_ptest, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) cf |= (s->Q(i) & ~d->Q(i)); } CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C); + CC_OP = CC_OP_EFLAGS; } #define FMOVSLDUP(i) s->L((i) & ~1) @@ -1966,6 +1971,7 @@ static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s, validd--; CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0); + CC_OP = CC_OP_EFLAGS; switch ((ctrl >> 2) & 3) { case 0: @@ -2297,6 +2303,7 @@ void glue(helper_vtestps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) cf |= (s->L(i) & ~d->L(i)); } CC_SRC = ((zf >> 31) ? 0 : CC_Z) | ((cf >> 31) ? 0 : CC_C); + CC_OP = CC_OP_EFLAGS; } void glue(helper_vtestpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -2309,6 +2316,7 @@ void glue(helper_vtestpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) cf |= (s->Q(i) & ~d->Q(i)); } CC_SRC = ((zf >> 63) ? 0 : CC_Z) | ((cf >> 63) ? 0 : CC_C); + CC_OP = CC_OP_EFLAGS; } void glue(helper_vpmaskmovd_st, SUFFIX)(CPUX86State *env, diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 3f2ae0aa7e..e0ac21abe2 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -918,7 +918,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } else { \ gen_helper_##lname##_ymm(tcg_env, OP_PTR1, OP_PTR2); \ } \ - set_cc_op(s, CC_OP_EFLAGS); \ + assume_cc_op(s, CC_OP_EFLAGS); \ } UNARY_CMP_SSE(VPTEST, ptest) UNARY_CMP_SSE(VTESTPS, vtestps) @@ -1079,7 +1079,7 @@ static void gen_AAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_aaa(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_AAD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1102,7 +1102,7 @@ static void gen_AAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_aas(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1566,14 +1566,14 @@ static void gen_DAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_daa(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_DAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_das(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -2353,14 +2353,14 @@ static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpestri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpestrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); if ((s->prefix & PREFIX_VEX) && !s->vex_l) { tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), 16, 16, 0); @@ -2371,14 +2371,14 @@ static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpistri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpistrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); if ((s->prefix & PREFIX_VEX) && !s->vex_l) { tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), 16, 16, 0); @@ -3595,7 +3595,7 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) SSEFunc_0_epp fn; fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss; fn(tcg_env, OP_PTR1, OP_PTR2); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -3982,7 +3982,7 @@ static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode SSEFunc_0_epp fn; fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss; fn(tcg_env, OP_PTR1, OP_PTR2); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index ece22a3553..8df8cae631 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -487,6 +487,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) ret = floatx80_compare(ST0, FT0, &env->fp_status); eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); CC_SRC = eflags | fcomi_ccval[ret + 1]; + CC_OP = CC_OP_EFLAGS; merge_exception_flags(env, old_flags); } @@ -499,6 +500,7 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); CC_SRC = eflags | fcomi_ccval[ret + 1]; + CC_OP = CC_OP_EFLAGS; merge_exception_flags(env, old_flags); } diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c index 4cc59f1520..e1f9240528 100644 --- a/target/i386/tcg/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -187,6 +187,7 @@ void helper_aaa(CPUX86State *env) } env->regs[R_EAX] = (env->regs[R_EAX] & ~0xffff) | al | (ah << 8); CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } void helper_aas(CPUX86State *env) @@ -211,6 +212,7 @@ void helper_aas(CPUX86State *env) } env->regs[R_EAX] = (env->regs[R_EAX] & ~0xffff) | al | (ah << 8); CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } void helper_daa(CPUX86State *env) @@ -238,6 +240,7 @@ void helper_daa(CPUX86State *env) eflags |= parity_table[al]; /* pf */ eflags |= (al & 0x80); /* sf */ CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } void helper_das(CPUX86State *env) @@ -269,6 +272,7 @@ void helper_das(CPUX86State *env) eflags |= parity_table[al]; /* pf */ eflags |= (al & 0x80); /* sf */ CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } #ifdef TARGET_X86_64 @@ -449,10 +453,11 @@ target_ulong HELPER(rdrand)(CPUX86State *env) error_free(err); /* Failure clears CF and all other flags, and returns 0. */ env->cc_src = 0; - return 0; + ret = 0; + } else { + /* Success sets CF and clears all others. */ + env->cc_src = CC_C; } - - /* Success sets CF and clears all others. */ - env->cc_src = CC_C; + env->cc_op = CC_OP_EFLAGS; return ret; } diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 34ccabd8ce..0301459004 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -2326,7 +2326,7 @@ void helper_verr(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env); + eflags = cpu_cc_compute_all(env) | CC_Z; if ((selector & 0xfffc) == 0) { goto fail; } @@ -2351,11 +2351,11 @@ void helper_verr(CPUX86State *env, target_ulong selector1) } else { if (dpl < cpl || dpl < rpl) { fail: - CC_SRC = eflags & ~CC_Z; - return; + eflags &= ~CC_Z; } } - CC_SRC = eflags | CC_Z; + CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } void helper_verw(CPUX86State *env, target_ulong selector1) @@ -2364,7 +2364,7 @@ void helper_verw(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env); + eflags = cpu_cc_compute_all(env) | CC_Z; if ((selector & 0xfffc) == 0) { goto fail; } @@ -2385,9 +2385,9 @@ void helper_verw(CPUX86State *env, target_ulong selector1) } if (!(e2 & DESC_W_MASK)) { fail: - CC_SRC = eflags & ~CC_Z; - return; + eflags &= ~CC_Z; } } - CC_SRC = eflags | CC_Z; + CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 25c973e20c..1b0485e01b 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2984,7 +2984,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_update_cc_op(s); gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); gen_helper_fucomi_ST0_FT0(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); break; case 0x1e: /* fcomi */ if (!(s->cpuid_features & CPUID_CMOV)) { @@ -2993,7 +2993,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_update_cc_op(s); gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); gen_helper_fcomi_ST0_FT0(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); break; case 0x28: /* ffree sti */ gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg)); @@ -3052,7 +3052,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); gen_helper_fucomi_ST0_FT0(tcg_env); gen_helper_fpop(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); break; case 0x3e: /* fcomip */ if (!(s->cpuid_features & CPUID_CMOV)) { @@ -3062,7 +3062,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); gen_helper_fcomi_ST0_FT0(tcg_env); gen_helper_fpop(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); break; case 0x10 ... 0x13: /* fcmovxx */ case 0x18 ... 0x1b: @@ -3268,7 +3268,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) gen_helper_rdrand(s->T0, tcg_env); rm = (modrm & 7) | REX_B(s); gen_op_mov_reg_v(s, dflag, rm, s->T0); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); break; default: @@ -3655,7 +3655,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } else { gen_helper_verw(tcg_env, s->T0); } - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); break; default: goto unknown_op; From 9594b593315a56ca459b8bc2de1cad5a91597f37 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 19:04:36 +0200 Subject: [PATCH 08/24] target/i386: document and group DISAS_* constants Place DISAS_* constants that update cpu_eip first, and the "jump" ones last. Add comments explaining the differences and usage. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 1b0485e01b..1246118e42 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -144,9 +144,28 @@ typedef struct DisasContext { TCGOp *prev_insn_end; } DisasContext; -#define DISAS_EOB_ONLY DISAS_TARGET_0 -#define DISAS_EOB_NEXT DISAS_TARGET_1 -#define DISAS_EOB_INHIBIT_IRQ DISAS_TARGET_2 +/* + * Point EIP to next instruction before ending translation. + * For instructions that can change hflags. + */ +#define DISAS_EOB_NEXT DISAS_TARGET_0 + +/* + * Point EIP to next instruction and set HF_INHIBIT_IRQ if not + * already set. For instructions that activate interrupt shadow. + */ +#define DISAS_EOB_INHIBIT_IRQ DISAS_TARGET_1 + +/* + * Return to the main loop; EIP might have already been updated + * but even in that case do not use lookup_and_goto_ptr(). + */ +#define DISAS_EOB_ONLY DISAS_TARGET_2 + +/* + * EIP has already been updated. For jumps that wish to use + * lookup_and_goto_ptr() + */ #define DISAS_JUMP DISAS_TARGET_3 /* The environment in which user-only runs is constrained. */ From c8494cb8b1fd73a1e6a3a6a022a7e4d7e481bdc1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 18:46:55 +0200 Subject: [PATCH 09/24] target/i386: avoid calling gen_eob_syscall before tb_stop syscall and sysret only have one exit, so they do not need to generate the end-of-translation code inline. It can be deferred to tb_stop. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 1246118e42..06aaaa00b4 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -168,6 +168,12 @@ typedef struct DisasContext { */ #define DISAS_JUMP DISAS_TARGET_3 +/* + * EIP has already been updated. Use updated value of + * EFLAGS.TF to determine singlestep trap (SYSCALL/SYSRET). + */ +#define DISAS_EOB_RECHECK_TF DISAS_TARGET_4 + /* The environment in which user-only runs is constrained. */ #ifdef CONFIG_USER_ONLY #define PE(S) true @@ -3587,7 +3593,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) /* TF handling for the syscall insn is different. The TF bit is checked after the syscall insn completes. This allows #DB to not be generated after one has entered CPL0 if TF is set in FMASK. */ - gen_eob_syscall(s); + s->base.is_jmp = DISAS_EOB_RECHECK_TF; break; case 0x107: /* sysret */ /* For Intel SYSRET is only valid in long mode */ @@ -3606,7 +3612,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) checked after the sysret insn completes. This allows #DB to be generated "as if" the syscall insn in userspace has just completed. */ - gen_eob_syscall(s); + s->base.is_jmp = DISAS_EOB_RECHECK_TF; } break; case 0x1a2: /* cpuid */ @@ -4810,6 +4816,9 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_EOB_ONLY: gen_eob(dc); break; + case DISAS_EOB_RECHECK_TF: + gen_eob_syscall(dc); + break; case DISAS_EOB_INHIBIT_IRQ: gen_update_eip_cur(dc); gen_eob_inhibit_irq(dc); From 2512f786bfe0a63fbf59cf0354d2b2ae40198a6f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 18:35:55 +0200 Subject: [PATCH 10/24] target/i386: avoid calling gen_eob_inhibit_irq before tb_stop sti only has one exit, so it does not need to generate the end-of-translation code inline. It can be deferred to tb_stop. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 4 +--- target/i386/tcg/translate.c | 13 ------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index e0ac21abe2..88bcb9699c 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -3475,9 +3475,7 @@ static void gen_STD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) static void gen_STI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_set_eflags(s, IF_MASK); - /* interruptions are enabled only the first insn after sti */ - gen_update_eip_next(s); - gen_eob_inhibit_irq(s); + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; } static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 06aaaa00b4..a7493b5ccf 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -564,19 +564,6 @@ static void gen_update_eip_cur(DisasContext *s) s->pc_save = s->base.pc_next; } -static void gen_update_eip_next(DisasContext *s) -{ - assert(s->pc_save != -1); - if (tb_cflags(s->base.tb) & CF_PCREL) { - tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save); - } else if (CODE64(s)) { - tcg_gen_movi_tl(cpu_eip, s->pc); - } else { - tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base)); - } - s->pc_save = s->pc; -} - static int cur_insn_len(DisasContext *s) { return s->pc - s->base.pc_next; From ad8f2ad77eb5b78d4478f900518d832af005ad40 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 18:38:32 +0200 Subject: [PATCH 11/24] target/i386: assert that gen_update_eip_cur and gen_update_eip_next are the same in tb_stop This is an invariant now that there are no calls to gen_eob_inhibit_irq() outside tb_stop. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a7493b5ccf..fcb7934efa 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4798,6 +4798,7 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_jmp_rel_csize(dc, 0, 0); break; case DISAS_EOB_NEXT: + assert(dc->base.pc_next == dc->pc); gen_update_eip_cur(dc); /* fall through */ case DISAS_EOB_ONLY: @@ -4807,6 +4808,7 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_eob_syscall(dc); break; case DISAS_EOB_INHIBIT_IRQ: + assert(dc->base.pc_next == dc->pc); gen_update_eip_cur(dc); gen_eob_inhibit_irq(dc); break; From f5bd6a48ee129f7bc8335ad75e18d2f88b131745 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 16 May 2024 23:29:53 +0200 Subject: [PATCH 12/24] target/i386: raze the gen_eob* jungle Make gen_eob take the DISAS_* constant as an argument, so that it is not necessary to have wrappers around it. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 62 +++++++++---------------------------- 1 file changed, 15 insertions(+), 47 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index fcb7934efa..46c452032b 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -260,8 +260,6 @@ STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val) STUB_HELPER(wrmsr, TCGv_env env) #endif -static void gen_eob(DisasContext *s); -static void gen_jr(DisasContext *s); static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num); static void gen_jmp_rel_csize(DisasContext *s, int diff, int tb_num); static void gen_exception_gpf(DisasContext *s); @@ -2266,12 +2264,13 @@ static void gen_bnd_jmp(DisasContext *s) } } -/* Generate an end of block. Trace exception is also generated if needed. - If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. - If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of - S->TF. This is used by the syscall/sysret insns. */ +/* + * Generate an end of block, including common tasks such as generating + * single step traps, resetting the RF flag, and handling the interrupt + * shadow. + */ static void -gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) +gen_eob(DisasContext *s, int mode) { bool inhibit_reset; @@ -2282,52 +2281,29 @@ gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) if (s->flags & HF_INHIBIT_IRQ_MASK) { gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK); inhibit_reset = true; - } else if (inhibit) { + } else if (mode == DISAS_EOB_INHIBIT_IRQ) { gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); } if (s->base.tb->flags & HF_RF_MASK) { gen_reset_eflags(s, RF_MASK); } - if (recheck_tf) { + if (mode == DISAS_EOB_RECHECK_TF) { gen_helper_rechecking_single_step(tcg_env); tcg_gen_exit_tb(NULL, 0); - } else if ((s->flags & HF_TF_MASK) && !inhibit) { + } else if ((s->flags & HF_TF_MASK) && mode != DISAS_EOB_INHIBIT_IRQ) { gen_helper_single_step(tcg_env); - } else if (jr && + } else if (mode == DISAS_JUMP && /* give irqs a chance to happen */ !inhibit_reset) { tcg_gen_lookup_and_goto_ptr(); } else { tcg_gen_exit_tb(NULL, 0); } + s->base.is_jmp = DISAS_NORETURN; } -static inline void -gen_eob_syscall(DisasContext *s) -{ - gen_eob_worker(s, false, true, false); -} - -/* End of block. Set HF_INHIBIT_IRQ_MASK if it isn't already set. */ -static void gen_eob_inhibit_irq(DisasContext *s) -{ - gen_eob_worker(s, true, false, false); -} - -/* End of block, resetting the inhibit irq flag. */ -static void gen_eob(DisasContext *s) -{ - gen_eob_worker(s, false, false, false); -} - -/* Jump to register */ -static void gen_jr(DisasContext *s) -{ - gen_eob_worker(s, false, false, true); -} - /* Jump to eip+diff, truncating the result to OT. */ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) { @@ -2379,9 +2355,9 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) tcg_gen_movi_tl(cpu_eip, new_eip); } if (s->jmp_opt) { - gen_jr(s); /* jump to another page */ + gen_eob(s, DISAS_JUMP); /* jump to another page */ } else { - gen_eob(s); /* exit to main loop */ + gen_eob(s, DISAS_EOB_ONLY); /* exit to main loop */ } } } @@ -4798,22 +4774,14 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_jmp_rel_csize(dc, 0, 0); break; case DISAS_EOB_NEXT: + case DISAS_EOB_INHIBIT_IRQ: assert(dc->base.pc_next == dc->pc); gen_update_eip_cur(dc); /* fall through */ case DISAS_EOB_ONLY: - gen_eob(dc); - break; case DISAS_EOB_RECHECK_TF: - gen_eob_syscall(dc); - break; - case DISAS_EOB_INHIBIT_IRQ: - assert(dc->base.pc_next == dc->pc); - gen_update_eip_cur(dc); - gen_eob_inhibit_irq(dc); - break; case DISAS_JUMP: - gen_jr(dc); + gen_eob(dc, dc->base.is_jmp); break; default: g_assert_not_reached(); From b3c49e654a0d382cca34dbb3bbf1022cffff8d0f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Apr 2024 15:04:13 +0200 Subject: [PATCH 13/24] target/i386: reg in gen_ldst_modrm is always OR_TMP0 Values other than OR_TMP0 were only ever used by MOV and MOVNTI opcodes. Now that these have been converted to the new decoder, remove the argument. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 46c452032b..4bb932af16 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1828,10 +1828,9 @@ static void gen_add_A0_ds_seg(DisasContext *s) gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override); } -/* generate modrm memory load or store of 'reg'. TMP0 is used if reg == - OR_TMP0 */ +/* generate modrm memory load or store of 'reg'. */ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm, - MemOp ot, int reg, int is_store) + MemOp ot, int is_store) { int mod, rm; @@ -1839,24 +1838,16 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm, rm = (modrm & 7) | REX_B(s); if (mod == 3) { if (is_store) { - if (reg != OR_TMP0) - gen_op_mov_v_reg(s, ot, s->T0, reg); gen_op_mov_reg_v(s, ot, rm, s->T0); } else { gen_op_mov_v_reg(s, ot, s->T0, rm); - if (reg != OR_TMP0) - gen_op_mov_reg_v(s, ot, reg, s->T0); } } else { gen_lea_modrm(env, s, modrm); if (is_store) { - if (reg != OR_TMP0) - gen_op_mov_v_reg(s, ot, s->T0, reg); gen_op_st_v(s, ot, s->T0, s->A0); } else { gen_op_ld_v(s, ot, s->T0, s->A0); - if (reg != OR_TMP0) - gen_op_mov_reg_v(s, ot, reg, s->T0); } } } @@ -3447,7 +3438,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) ot = dflag; modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_ldst_modrm(env, s, modrm, ot, 0); gen_extu(ot, s->T0); /* Note that lzcnt and tzcnt are in different extensions. */ @@ -3598,14 +3589,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, ldt.selector)); ot = mod == 3 ? dflag : MO_16; - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); + gen_ldst_modrm(env, s, modrm, ot, 1); break; case 2: /* lldt */ if (!PE(s) || VM86(s)) goto illegal_op; if (check_cpl0(s)) { gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE); - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ldst_modrm(env, s, modrm, MO_16, 0); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_lldt(tcg_env, s->tmp2_i32); } @@ -3620,14 +3611,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, tr.selector)); ot = mod == 3 ? dflag : MO_16; - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); + gen_ldst_modrm(env, s, modrm, ot, 1); break; case 3: /* ltr */ if (!PE(s) || VM86(s)) goto illegal_op; if (check_cpl0(s)) { gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE); - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ldst_modrm(env, s, modrm, MO_16, 0); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_ltr(tcg_env, s->tmp2_i32); } @@ -3636,7 +3627,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) case 5: /* verw */ if (!PE(s) || VM86(s)) goto illegal_op; - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ldst_modrm(env, s, modrm, MO_16, 0); gen_update_cc_op(s); if (op == 4) { gen_helper_verr(tcg_env, s->T0); @@ -3900,7 +3891,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) */ mod = (modrm >> 6) & 3; ot = (mod != 3 ? MO_16 : s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); + gen_ldst_modrm(env, s, modrm, ot, 1); break; case 0xee: /* rdpkru */ if (s->prefix & (PREFIX_LOCK | PREFIX_DATA @@ -3927,7 +3918,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; } gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0); - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ldst_modrm(env, s, modrm, MO_16, 0); /* * Only the 4 lower bits of CR0 are modified. * PE cannot be set to zero if already set to one. @@ -3999,7 +3990,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) ot = dflag != MO_16 ? MO_32 : MO_16; modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ldst_modrm(env, s, modrm, MO_16, 0); t0 = tcg_temp_new(); gen_update_cc_op(s); if (b == 0x102) { @@ -4503,7 +4494,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) reg = ((modrm >> 3) & 7) | REX_R(s); ot = dflag; - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_ldst_modrm(env, s, modrm, ot, 0); gen_extu(ot, s->T0); tcg_gen_mov_tl(cpu_cc_src, s->T0); tcg_gen_ctpop_tl(s->T0, s->T0); From 420d60caad92644a0ebebb8a2b41dedc6635f530 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Apr 2024 15:04:13 +0200 Subject: [PATCH 14/24] target/i386: split gen_ldst_modrm for load and store The is_store argument of gen_ldst_modrm has only ever been passed a constant. Just split the function in two. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 52 +++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 4bb932af16..afbed87056 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1828,27 +1828,33 @@ static void gen_add_A0_ds_seg(DisasContext *s) gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override); } -/* generate modrm memory load or store of 'reg'. */ -static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm, - MemOp ot, int is_store) +/* generate modrm load of memory or register. */ +static void gen_ld_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) { int mod, rm; mod = (modrm >> 6) & 3; rm = (modrm & 7) | REX_B(s); if (mod == 3) { - if (is_store) { - gen_op_mov_reg_v(s, ot, rm, s->T0); - } else { - gen_op_mov_v_reg(s, ot, s->T0, rm); - } + gen_op_mov_v_reg(s, ot, s->T0, rm); } else { gen_lea_modrm(env, s, modrm); - if (is_store) { - gen_op_st_v(s, ot, s->T0, s->A0); - } else { - gen_op_ld_v(s, ot, s->T0, s->A0); - } + gen_op_ld_v(s, ot, s->T0, s->A0); + } +} + +/* generate modrm store of memory or register. */ +static void gen_st_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) +{ + int mod, rm; + + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + if (mod == 3) { + gen_op_mov_reg_v(s, ot, rm, s->T0); + } else { + gen_lea_modrm(env, s, modrm); + gen_op_st_v(s, ot, s->T0, s->A0); } } @@ -3438,7 +3444,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) ot = dflag; modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ldst_modrm(env, s, modrm, ot, 0); + gen_ld_modrm(env, s, modrm, ot); gen_extu(ot, s->T0); /* Note that lzcnt and tzcnt are in different extensions. */ @@ -3589,14 +3595,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, ldt.selector)); ot = mod == 3 ? dflag : MO_16; - gen_ldst_modrm(env, s, modrm, ot, 1); + gen_st_modrm(env, s, modrm, ot); break; case 2: /* lldt */ if (!PE(s) || VM86(s)) goto illegal_op; if (check_cpl0(s)) { gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE); - gen_ldst_modrm(env, s, modrm, MO_16, 0); + gen_ld_modrm(env, s, modrm, MO_16); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_lldt(tcg_env, s->tmp2_i32); } @@ -3611,14 +3617,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, tr.selector)); ot = mod == 3 ? dflag : MO_16; - gen_ldst_modrm(env, s, modrm, ot, 1); + gen_st_modrm(env, s, modrm, ot); break; case 3: /* ltr */ if (!PE(s) || VM86(s)) goto illegal_op; if (check_cpl0(s)) { gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE); - gen_ldst_modrm(env, s, modrm, MO_16, 0); + gen_ld_modrm(env, s, modrm, MO_16); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_ltr(tcg_env, s->tmp2_i32); } @@ -3627,7 +3633,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) case 5: /* verw */ if (!PE(s) || VM86(s)) goto illegal_op; - gen_ldst_modrm(env, s, modrm, MO_16, 0); + gen_ld_modrm(env, s, modrm, MO_16); gen_update_cc_op(s); if (op == 4) { gen_helper_verr(tcg_env, s->T0); @@ -3891,7 +3897,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) */ mod = (modrm >> 6) & 3; ot = (mod != 3 ? MO_16 : s->dflag); - gen_ldst_modrm(env, s, modrm, ot, 1); + gen_st_modrm(env, s, modrm, ot); break; case 0xee: /* rdpkru */ if (s->prefix & (PREFIX_LOCK | PREFIX_DATA @@ -3918,7 +3924,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; } gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0); - gen_ldst_modrm(env, s, modrm, MO_16, 0); + gen_ld_modrm(env, s, modrm, MO_16); /* * Only the 4 lower bits of CR0 are modified. * PE cannot be set to zero if already set to one. @@ -3990,7 +3996,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) ot = dflag != MO_16 ? MO_32 : MO_16; modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ldst_modrm(env, s, modrm, MO_16, 0); + gen_ld_modrm(env, s, modrm, MO_16); t0 = tcg_temp_new(); gen_update_cc_op(s); if (b == 0x102) { @@ -4494,7 +4500,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) reg = ((modrm >> 3) & 7) | REX_R(s); ot = dflag; - gen_ldst_modrm(env, s, modrm, ot, 0); + gen_ld_modrm(env, s, modrm, ot); gen_extu(ot, s->T0); tcg_gen_mov_tl(cpu_cc_src, s->T0); tcg_gen_ctpop_tl(s->T0, s->T0); From d0e31d6d3740726db067e2e49d0593b4ce85ccfe Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Apr 2024 15:31:58 +0200 Subject: [PATCH 15/24] target/i386: inline gen_add_A0_ds_seg It is only used in MONITOR, where a direct call of gen_lea_v_seg is simpler, and in XLAT. Inline it in the latter. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 2 +- target/i386/tcg/translate.c | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 88bcb9699c..01ad57629e 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -4043,7 +4043,7 @@ static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { /* AL is already zero-extended into s->T0. */ tcg_gen_add_tl(s->A0, cpu_regs[R_EBX], s->T0); - gen_add_A0_ds_seg(s); + gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override); gen_op_ld_v(s, MO_8, s->T0, s->A0); } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index afbed87056..2039ccf283 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1822,12 +1822,6 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, gen_helper_bndck(tcg_env, s->tmp2_i32); } -/* used for LEA and MOV AX, mem */ -static void gen_add_A0_ds_seg(DisasContext *s) -{ - gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override); -} - /* generate modrm load of memory or register. */ static void gen_ld_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) { @@ -3674,8 +3668,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } gen_update_cc_op(s); gen_update_eip_cur(s); - tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]); - gen_add_A0_ds_seg(s); + gen_lea_v_seg(s, s->aflag, cpu_regs[R_EAX], R_DS, s->override); gen_helper_monitor(tcg_env, s->A0); break; From 20237d4070e8fc5dae743f0286dda11375b7c474 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Apr 2024 16:00:54 +0200 Subject: [PATCH 16/24] target/i386: use mo_stacksize more Use mo_stacksize for all stack accesses, including when a 64-bit code segment is impossible and the code is therefore checking only for SS32(s). Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 2039ccf283..2a20f9bafb 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2075,12 +2075,12 @@ static inline void gen_pop_update(DisasContext *s, MemOp ot) static inline void gen_stack_A0(DisasContext *s) { - gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1); + gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1); } static void gen_pusha(DisasContext *s) { - MemOp s_ot = SS32(s) ? MO_32 : MO_16; + MemOp s_ot = mo_stacksize(s); MemOp d_ot = s->dflag; int size = 1 << d_ot; int i; @@ -2096,7 +2096,7 @@ static void gen_pusha(DisasContext *s) static void gen_popa(DisasContext *s) { - MemOp s_ot = SS32(s) ? MO_32 : MO_16; + MemOp s_ot = mo_stacksize(s); MemOp d_ot = s->dflag; int size = 1 << d_ot; int i; @@ -2118,7 +2118,7 @@ static void gen_popa(DisasContext *s) static void gen_enter(DisasContext *s, int esp_addend, int level) { MemOp d_ot = mo_pushpop(s, s->dflag); - MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16; + MemOp a_ot = mo_stacksize(s); int size = 1 << d_ot; /* Push BP; compute FrameTemp into T1. */ From f0e754d3ce52b25e6bea0ff04cecd8f0b111c1d6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Apr 2024 16:58:15 +0200 Subject: [PATCH 17/24] target/i386: introduce gen_lea_ss_ofs Generalize gen_stack_A0() to include an initial add and to use an arbitrary destination. This is a common pattern and it is not a huge burden to add the extra arguments to the only caller of gen_stack_A0(). Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 2 +- target/i386/tcg/translate.c | 51 +++++++++++++++---------------------- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 01ad57629e..0a13be4989 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -3077,7 +3077,7 @@ static void gen_RETF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0; if (!PE(s) || VM86(s)) { - gen_stack_A0(s); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], 0); /* pop offset */ gen_op_ld_v(s, s->dflag, s->T0, s->A0); /* NOTE: keeping EIP updated is not a problem in case of diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 2a20f9bafb..15993f8302 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2035,24 +2035,27 @@ static inline void gen_stack_update(DisasContext *s, int addend) gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend); } +static void gen_lea_ss_ofs(DisasContext *s, TCGv dest, TCGv src, target_ulong offset) +{ + if (offset) { + tcg_gen_addi_tl(dest, src, offset); + src = dest; + } + gen_lea_v_seg_dest(s, mo_stacksize(s), dest, src, R_SS, -1); +} + /* Generate a push. It depends on ss32, addseg and dflag. */ static void gen_push_v(DisasContext *s, TCGv val) { MemOp d_ot = mo_pushpop(s, s->dflag); MemOp a_ot = mo_stacksize(s); int size = 1 << d_ot; - TCGv new_esp = s->A0; + TCGv new_esp = tcg_temp_new(); - tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size); - - if (!CODE64(s)) { - if (ADDSEG(s)) { - new_esp = tcg_temp_new(); - tcg_gen_mov_tl(new_esp, s->A0); - } - gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); - } + tcg_gen_subi_tl(new_esp, cpu_regs[R_ESP], size); + /* Now reduce the value to the address size and apply SS base. */ + gen_lea_ss_ofs(s, s->A0, new_esp, 0); gen_op_st_v(s, d_ot, val, s->A0); gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp); } @@ -2062,7 +2065,7 @@ static MemOp gen_pop_T0(DisasContext *s) { MemOp d_ot = mo_pushpop(s, s->dflag); - gen_lea_v_seg_dest(s, mo_stacksize(s), s->T0, cpu_regs[R_ESP], R_SS, -1); + gen_lea_ss_ofs(s, s->T0, cpu_regs[R_ESP], 0); gen_op_ld_v(s, d_ot, s->T0, s->T0); return d_ot; @@ -2073,21 +2076,14 @@ static inline void gen_pop_update(DisasContext *s, MemOp ot) gen_stack_update(s, 1 << ot); } -static inline void gen_stack_A0(DisasContext *s) -{ - gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1); -} - static void gen_pusha(DisasContext *s) { - MemOp s_ot = mo_stacksize(s); MemOp d_ot = s->dflag; int size = 1 << d_ot; int i; for (i = 0; i < 8; i++) { - tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size); - gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], (i - 8) * size); gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0); } @@ -2096,7 +2092,6 @@ static void gen_pusha(DisasContext *s) static void gen_popa(DisasContext *s) { - MemOp s_ot = mo_stacksize(s); MemOp d_ot = s->dflag; int size = 1 << d_ot; int i; @@ -2106,8 +2101,7 @@ static void gen_popa(DisasContext *s) if (7 - i == R_ESP) { continue; } - tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size); - gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], i * size); gen_op_ld_v(s, d_ot, s->T0, s->A0); gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0); } @@ -2123,7 +2117,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) /* Push BP; compute FrameTemp into T1. */ tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size); - gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, s->T1, 0); gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0); level &= 31; @@ -2132,18 +2126,15 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) /* Copy level-1 pointers from the previous frame. */ for (i = 1; i < level; ++i) { - tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i); - gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], -size * i); gen_op_ld_v(s, d_ot, s->tmp0, s->A0); - tcg_gen_subi_tl(s->A0, s->T1, size * i); - gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, s->T1, -size * i); gen_op_st_v(s, d_ot, s->tmp0, s->A0); } /* Push the current FrameTemp as the last level. */ - tcg_gen_subi_tl(s->A0, s->T1, size * level); - gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, s->T1, -size * level); gen_op_st_v(s, d_ot, s->T1, s->A0); } @@ -2160,7 +2151,7 @@ static void gen_leave(DisasContext *s) MemOp d_ot = mo_pushpop(s, s->dflag); MemOp a_ot = mo_stacksize(s); - gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], 0); gen_op_ld_v(s, d_ot, s->T0, s->A0); tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot); From 6605817b1ad734d29efebcdca7c4baeeec3a65c7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Apr 2024 16:49:09 +0200 Subject: [PATCH 18/24] target/i386: clean up repeated string operations Do not bother generating inline wrappers for gen_repz and gen_repz2; use s->prefix to separate REPZ from REPNZ in the case of SCAS and CMPS. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 22 +++++++++------------- target/i386/tcg/translate.c | 22 ++++------------------ 2 files changed, 13 insertions(+), 31 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0a13be4989..377d2201c9 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1508,10 +1508,8 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; - if (s->prefix & PREFIX_REPNZ) { - gen_repz_cmps(s, ot, 1); - } else if (s->prefix & PREFIX_REPZ) { - gen_repz_cmps(s, ot, 0); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_nz(s, ot, gen_cmps); } else { gen_cmps(s, ot); } @@ -1834,7 +1832,7 @@ static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) translator_io_start(&s->base); if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_ins(s, ot); + gen_repz(s, ot, gen_ins); } else { gen_ins(s, ot); } @@ -1993,7 +1991,7 @@ static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_lods(s, ot); + gen_repz(s, ot, gen_lods); } else { gen_lods(s, ot); } @@ -2155,7 +2153,7 @@ static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_movs(s, ot); + gen_repz(s, ot, gen_movs); } else { gen_movs(s, ot); } @@ -2321,7 +2319,7 @@ static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) translator_io_start(&s->base); if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_outs(s, ot); + gen_repz(s, ot, gen_outs); } else { gen_outs(s, ot); } @@ -3329,10 +3327,8 @@ static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; - if (s->prefix & PREFIX_REPNZ) { - gen_repz_scas(s, ot, 1); - } else if (s->prefix & PREFIX_REPZ) { - gen_repz_scas(s, ot, 0); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_nz(s, ot, gen_scas); } else { gen_scas(s, ot); } @@ -3495,7 +3491,7 @@ static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_stos(s, ot); + gen_repz(s, ot, gen_stos); } else { gen_stos(s, ot); } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 15993f8302..7dd7ebf60d 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1327,14 +1327,12 @@ static void gen_repz(DisasContext *s, MemOp ot, gen_jmp_rel_csize(s, -cur_insn_len(s), 0); } -#define GEN_REPZ(op) \ - static inline void gen_repz_ ## op(DisasContext *s, MemOp ot) \ - { gen_repz(s, ot, gen_##op); } - -static void gen_repz2(DisasContext *s, MemOp ot, int nz, - void (*fn)(DisasContext *s, MemOp ot)) +static void gen_repz_nz(DisasContext *s, MemOp ot, + void (*fn)(DisasContext *s, MemOp ot)) { TCGLabel *l2; + int nz = (s->prefix & PREFIX_REPNZ) ? 1 : 0; + l2 = gen_jz_ecx_string(s); fn(s, ot); gen_op_add_reg_im(s, s->aflag, R_ECX, -1); @@ -1350,18 +1348,6 @@ static void gen_repz2(DisasContext *s, MemOp ot, int nz, gen_jmp_rel_csize(s, -cur_insn_len(s), 0); } -#define GEN_REPZ2(op) \ - static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, int nz) \ - { gen_repz2(s, ot, nz, gen_##op); } - -GEN_REPZ(movs) -GEN_REPZ(stos) -GEN_REPZ(lods) -GEN_REPZ(ins) -GEN_REPZ(outs) -GEN_REPZ2(scas) -GEN_REPZ2(cmps) - static void gen_helper_fp_arith_ST0_FT0(int op) { switch (op) { From 56da568f88cc5537d49e0bfc793b0ca105605947 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 May 2024 16:59:34 +0200 Subject: [PATCH 19/24] target/i386: remove aflag argument of gen_lea_v_seg It is always s->aflag. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 6 +++--- target/i386/tcg/translate.c | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 377d2201c9..e990141454 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -76,7 +76,7 @@ static void gen_NM_exception(DisasContext *s) static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) { TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); - gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); + gen_lea_v_seg(s, ea, mem->def_seg, s->override); } static inline int mmx_offset(MemOp ot) @@ -2044,7 +2044,7 @@ static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_DS, s->override); + gen_lea_v_seg(s, cpu_regs[R_EDI], R_DS, s->override); if (s->prefix & PREFIX_DATA) { gen_helper_maskmov_xmm(tcg_env, OP_PTR1, OP_PTR2, s->A0); @@ -4039,7 +4039,7 @@ static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { /* AL is already zero-extended into s->T0. */ tcg_gen_add_tl(s->A0, cpu_regs[R_EBX], s->T0); - gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override); + gen_lea_v_seg(s, s->A0, R_DS, s->override); gen_op_ld_v(s, MO_8, s->T0, s->A0); } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 7dd7ebf60d..6dedfe94c0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -680,20 +680,20 @@ static void gen_lea_v_seg_dest(DisasContext *s, MemOp aflag, TCGv dest, TCGv a0, } } -static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, +static void gen_lea_v_seg(DisasContext *s, TCGv a0, int def_seg, int ovr_seg) { - gen_lea_v_seg_dest(s, aflag, s->A0, a0, def_seg, ovr_seg); + gen_lea_v_seg_dest(s, s->aflag, s->A0, a0, def_seg, ovr_seg); } static inline void gen_string_movl_A0_ESI(DisasContext *s) { - gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override); + gen_lea_v_seg(s, cpu_regs[R_ESI], R_DS, s->override); } static inline void gen_string_movl_A0_EDI(DisasContext *s) { - gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1); + gen_lea_v_seg(s, cpu_regs[R_EDI], R_ES, -1); } static inline TCGv gen_compute_Dshift(DisasContext *s, MemOp ot) @@ -1784,7 +1784,7 @@ static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) { AddressParts a = gen_lea_modrm_0(env, s, modrm); TCGv ea = gen_lea_modrm_1(s, a, false); - gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); + gen_lea_v_seg(s, ea, a.def_seg, s->override); } static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm) @@ -2523,7 +2523,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) bool update_fdp = true; tcg_gen_mov_tl(last_addr, ea); - gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); + gen_lea_v_seg(s, ea, a.def_seg, s->override); switch (op) { case 0x00 ... 0x07: /* fxxxs */ @@ -3320,7 +3320,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot); tcg_gen_shli_tl(s->tmp0, s->tmp0, ot); tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a, false), s->tmp0); - gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); + gen_lea_v_seg(s, s->A0, a.def_seg, s->override); if (!(s->prefix & PREFIX_LOCK)) { gen_op_ld_v(s, ot, s->T0, s->A0); } @@ -3645,7 +3645,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } gen_update_cc_op(s); gen_update_eip_cur(s); - gen_lea_v_seg(s, s->aflag, cpu_regs[R_EAX], R_DS, s->override); + gen_lea_v_seg(s, cpu_regs[R_EAX], R_DS, s->override); gen_helper_monitor(tcg_env, s->A0); break; @@ -4051,7 +4051,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } else { tcg_gen_movi_tl(s->A0, 0); } - gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); + gen_lea_v_seg(s, s->A0, a.def_seg, s->override); if (a.index >= 0) { tcg_gen_mov_tl(s->T0, cpu_regs[a.index]); } else { @@ -4156,7 +4156,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } else { tcg_gen_movi_tl(s->A0, 0); } - gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); + gen_lea_v_seg(s, s->A0, a.def_seg, s->override); if (a.index >= 0) { tcg_gen_mov_tl(s->T0, cpu_regs[a.index]); } else { From 0241e0fedab48a3da15b12a65818120ba566536f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 May 2024 11:32:27 +0200 Subject: [PATCH 20/24] meson: remove unnecessary reference to libm libm is linked into all targets via libqemuutil, no need to specify it explicitly. Signed-off-by: Paolo Bonzini --- block/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/meson.build b/block/meson.build index e1f03fd773..8993055c75 100644 --- a/block/meson.build +++ b/block/meson.build @@ -110,7 +110,7 @@ foreach m : [ [blkio, 'blkio', files('blkio.c')], [curl, 'curl', files('curl.c')], [glusterfs, 'gluster', files('gluster.c')], - [libiscsi, 'iscsi', [files('iscsi.c'), libm]], + [libiscsi, 'iscsi', files('iscsi.c')], [libnfs, 'nfs', files('nfs.c')], [libssh, 'ssh', files('ssh.c')], [rbd, 'rbd', files('rbd.c')], From 2bfd3c4860a0058ff65a5da16f4d2609ac70b580 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 May 2024 12:06:10 +0200 Subject: [PATCH 21/24] meson: remove unnecessary dependency The dbus_display1_dep is not really used since all occurrences also request gio independently. Just list the generated sources and drop dbus_display1_dep. Signed-off-by: Paolo Bonzini --- audio/meson.build | 4 ++-- tests/qtest/meson.build | 2 +- ui/meson.build | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/audio/meson.build b/audio/meson.build index 608f35e6af..59f0a431d5 100644 --- a/audio/meson.build +++ b/audio/meson.build @@ -30,8 +30,8 @@ endforeach if dbus_display module_ss = ss.source_set() - module_ss.add(when: [gio, dbus_display1_dep, pixman], - if_true: files('dbusaudio.c')) + module_ss.add(when: [gio, pixman], + if_true: [dbus_display1, files('dbusaudio.c')]) audio_modules += {'dbus': module_ss} endif diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 86293051dc..b98fae6a6d 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -354,7 +354,7 @@ if vnc.found() endif if dbus_display - qtests += {'dbus-display-test': [dbus_display1_dep, gio]} + qtests += {'dbus-display-test': [dbus_display1, gio]} endif qtest_executables = {} diff --git a/ui/meson.build b/ui/meson.build index 5d89986b0e..cfbf29428d 100644 --- a/ui/meson.build +++ b/ui/meson.build @@ -91,8 +91,7 @@ if dbus_display '--interface-prefix', 'org.qemu.', '--c-namespace', 'QemuDBus', '--generate-c-code', '@BASENAME@']) - dbus_display1_dep = declare_dependency(sources: dbus_display1, dependencies: gio) - dbus_ss.add(when: [gio, dbus_display1_dep], + dbus_ss.add(when: gio, if_true: [files( 'dbus-chardev.c', 'dbus-clipboard.c', @@ -100,7 +99,7 @@ if dbus_display 'dbus-error.c', 'dbus-listener.c', 'dbus.c', - ), opengl, gbm, pixman]) + ), opengl, gbm, pixman, dbus_display1]) ui_modules += {'dbus' : dbus_ss} endif From 61306156d976b1235e26dc1658ccaae2170ce185 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 May 2024 10:54:50 +0200 Subject: [PATCH 22/24] tcg: include dependencies in static_library() This ensures that for example libffi can be reached even if it is not in /usr/include. Signed-off-by: Paolo Bonzini --- tcg/meson.build | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tcg/meson.build b/tcg/meson.build index 8251589fd4..ffbe754d8b 100644 --- a/tcg/meson.build +++ b/tcg/meson.build @@ -32,19 +32,19 @@ tcg_ss = tcg_ss.apply({}) libtcg_user = static_library('tcg_user', tcg_ss.sources() + genh, name_suffix: 'fa', + dependencies: tcg_ss.dependencies(), c_args: '-DCONFIG_USER_ONLY', build_by_default: false) -tcg_user = declare_dependency(link_with: libtcg_user, - dependencies: tcg_ss.dependencies()) +tcg_user = declare_dependency(link_with: libtcg_user) user_ss.add(tcg_user) libtcg_system = static_library('tcg_system', tcg_ss.sources() + genh, name_suffix: 'fa', + dependencies: tcg_ss.dependencies(), c_args: '-DCONFIG_SOFTMMU', build_by_default: false) -tcg_system = declare_dependency(link_with: libtcg_system, - dependencies: tcg_ss.dependencies()) +tcg_system = declare_dependency(link_with: libtcg_system) system_ss.add(tcg_system) From ae9433fb1140f40143d877623b3c0f030d96a99f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 May 2024 11:21:35 +0200 Subject: [PATCH 23/24] meson: do not query modules before they are processed Signed-off-by: Paolo Bonzini --- block/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/meson.build b/block/meson.build index 8993055c75..158dc3b89d 100644 --- a/block/meson.build +++ b/block/meson.build @@ -119,7 +119,7 @@ foreach m : [ module_ss = ss.source_set() module_ss.add(when: m[0], if_true: m[2]) if enable_modules - modsrc += module_ss.all_sources() + modsrc += m[2] endif block_modules += {m[1] : module_ss} endif From 70eb5fde05bdd051c087669ffcf2aee39e0c8170 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 May 2024 18:16:08 +0200 Subject: [PATCH 24/24] migration: remove unnecessary zlib dependency zlib code is only used by the emulators, not by the tests. Signed-off-by: Paolo Bonzini --- meson.build | 2 +- migration/dirtyrate.c | 1 - migration/meson.build | 2 +- migration/qemu-file.c | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/meson.build b/meson.build index 7fd82b5f48..6386607144 100644 --- a/meson.build +++ b/meson.build @@ -3696,7 +3696,7 @@ libmigration = static_library('migration', sources: migration_files + genh, name_suffix: 'fa', build_by_default: false) migration = declare_dependency(link_with: libmigration, - dependencies: [zlib, qom, io]) + dependencies: [qom, io]) system_ss.add(migration) block_ss = block_ss.apply({}) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index d02d70b7b4..1d9db81299 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" -#include #include "hw/core/cpu.h" #include "qapi/error.h" #include "exec/ramblock.h" diff --git a/migration/meson.build b/migration/meson.build index 8815f80837..bdc3244bce 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -29,7 +29,7 @@ system_ss.add(files( 'socket.c', 'tls.c', 'threadinfo.c', -), gnutls) +), gnutls, zlib) if get_option('replication').allowed() system_ss.add(files('colo-failover.c', 'colo.c')) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 9ccbbb0099..b6d2f588bd 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -22,7 +22,6 @@ * THE SOFTWARE. */ #include "qemu/osdep.h" -#include #include "qemu/madvise.h" #include "qemu/error-report.h" #include "qemu/iov.h"