target/i386: convert CMPXCHG8B/CMPXCHG16B to new decoder

The gen_cmpxchg8b and gen_cmpxchg16b functions even have the correct
prototype already; the only thing that needs to be done is removing the
gen_lea_modrm() call.

This moves the last LOCK-enabled instructions to the new decoder.  It is
now possible to assume that gen_multi0F is called only after checking
that PREFIX_LOCK was not specified.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2024-06-10 10:39:00 +02:00
parent a2e2c78d2a
commit fcd16539eb
4 changed files with 124 additions and 129 deletions

View File

@ -288,6 +288,25 @@ static void decode_group8(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
}
}
static void decode_group9(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry group9_reg =
X86_OP_ENTRY0(multi0F); /* unconverted */
static const X86OpEntry cmpxchg8b =
X86_OP_ENTRY1(CMPXCHG8B, M,q, lock p_00 cpuid(CX8));
static const X86OpEntry cmpxchg16b =
X86_OP_ENTRY1(CMPXCHG16B, M,dq, lock p_00 cpuid(CX16));
int modrm = get_modrm(s, env);
int op = (modrm >> 3) & 7;
if ((modrm >> 6) == 3) {
*entry = group9_reg;
} else if (op == 1) {
*entry = REX_W(s) ? cmpxchg16b : cmpxchg8b;
}
}
static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry group15_reg[8] = {
@ -1203,7 +1222,7 @@ static const X86OpEntry opcodes_0F[256] = {
[0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
[0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
[0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
[0xc7] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */
[0xc7] = X86_OP_GROUP0(group9),
[0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
[0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
@ -2245,8 +2264,12 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return (s->cpuid_features & CPUID_CMOV);
case X86_FEAT_CLFLUSH:
return (s->cpuid_features & CPUID_CLFLUSH);
case X86_FEAT_CX8:
return (s->cpuid_features & CPUID_CX8);
case X86_FEAT_FXSR:
return (s->cpuid_features & CPUID_FXSR);
case X86_FEAT_CX16:
return (s->cpuid_ext_features & CPUID_EXT_CX16);
case X86_FEAT_F16C:
return (s->cpuid_ext_features & CPUID_EXT_F16C);
case X86_FEAT_FMA:
@ -2726,15 +2749,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
break;
}
/*
* hack for old decoder: 0F C7 has both instructions that accept LOCK
* and instructions that don't, but also needs X86_SPECIAL_NoLoadEA.
* Keep this here until CMPXCHG8B/CMPXCHG16B is separated from the
* other unconverted opcodes.
*/
if (decode.e.gen == gen_multi0F) {
accept_lock = true;
}
if ((s->prefix & PREFIX_LOCK) && !accept_lock) {
goto illegal_op;
}

View File

@ -114,6 +114,8 @@ typedef enum X86CPUIDFeature {
X86_FEAT_CLWB,
X86_FEAT_CMOV,
X86_FEAT_CMPCCXADD,
X86_FEAT_CX8,
X86_FEAT_CX16,
X86_FEAT_F16C,
X86_FEAT_FMA,
X86_FEAT_FSGSBASE,

View File

@ -1788,6 +1788,102 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode)
decode->cc_op = CC_OP_SUBB + ot;
}
static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef TARGET_X86_64
MemOp mop = MO_TE | MO_128 | MO_ALIGN;
TCGv_i64 t0, t1;
TCGv_i128 cmp, val;
cmp = tcg_temp_new_i128();
val = tcg_temp_new_i128();
tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
/* Only require atomic with LOCK; non-parallel handled in generator. */
if (s->prefix & PREFIX_LOCK) {
tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
} else {
tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
}
tcg_gen_extr_i128_i64(s->T0, s->T1, val);
/* Determine success after the fact. */
t0 = tcg_temp_new_i64();
t1 = tcg_temp_new_i64();
tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
tcg_gen_or_i64(t0, t0, t1);
/* Update Z. */
gen_compute_eflags(s);
tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
/*
* Extract the result values for the register pair. We may do this
* unconditionally, because on success (Z=1), the old value matches
* the previous value in RDX:RAX.
*/
tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
#else
abort();
#endif
}
static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i64 cmp, val, old;
TCGv Z;
cmp = tcg_temp_new_i64();
val = tcg_temp_new_i64();
old = tcg_temp_new_i64();
/* Construct the comparison values from the register pair. */
tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
/* Only require atomic with LOCK; non-parallel handled in generator. */
if (s->prefix & PREFIX_LOCK) {
tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
} else {
tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
s->mem_index, MO_TEUQ);
}
/* Set tmp0 to match the required value of Z. */
tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
Z = tcg_temp_new();
tcg_gen_trunc_i64_tl(Z, cmp);
/*
* Extract the result values for the register pair.
* For 32-bit, we may do this unconditionally, because on success (Z=1),
* the old value matches the previous value in EDX:EAX. For x86_64,
* the store must be conditional, because we must leave the source
* registers unchanged on success, and zero-extend the writeback
* on failure (Z=0).
*/
if (TARGET_LONG_BITS == 32) {
tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
} else {
TCGv zero = tcg_constant_tl(0);
tcg_gen_extr_i64_tl(s->T0, s->T1, old);
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
s->T0, cpu_regs[R_EAX]);
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
s->T1, cpu_regs[R_EDX]);
}
/* Update Z. */
gen_compute_eflags(s);
tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
}
static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);

View File

@ -2289,104 +2289,6 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
}
static void gen_cmpxchg8b(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i64 cmp, val, old;
TCGv Z;
gen_lea_modrm(s, decode);
cmp = tcg_temp_new_i64();
val = tcg_temp_new_i64();
old = tcg_temp_new_i64();
/* Construct the comparison values from the register pair. */
tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
/* Only require atomic with LOCK; non-parallel handled in generator. */
if (s->prefix & PREFIX_LOCK) {
tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
} else {
tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
s->mem_index, MO_TEUQ);
}
/* Set tmp0 to match the required value of Z. */
tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
Z = tcg_temp_new();
tcg_gen_trunc_i64_tl(Z, cmp);
/*
* Extract the result values for the register pair.
* For 32-bit, we may do this unconditionally, because on success (Z=1),
* the old value matches the previous value in EDX:EAX. For x86_64,
* the store must be conditional, because we must leave the source
* registers unchanged on success, and zero-extend the writeback
* on failure (Z=0).
*/
if (TARGET_LONG_BITS == 32) {
tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
} else {
TCGv zero = tcg_constant_tl(0);
tcg_gen_extr_i64_tl(s->T0, s->T1, old);
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
s->T0, cpu_regs[R_EAX]);
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
s->T1, cpu_regs[R_EDX]);
}
/* Update Z. */
gen_compute_eflags(s);
tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
}
#ifdef TARGET_X86_64
static void gen_cmpxchg16b(DisasContext *s, X86DecodedInsn *decode)
{
MemOp mop = MO_TE | MO_128 | MO_ALIGN;
TCGv_i64 t0, t1;
TCGv_i128 cmp, val;
gen_lea_modrm(s, decode);
cmp = tcg_temp_new_i128();
val = tcg_temp_new_i128();
tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
/* Only require atomic with LOCK; non-parallel handled in generator. */
if (s->prefix & PREFIX_LOCK) {
tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
} else {
tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
}
tcg_gen_extr_i128_i64(s->T0, s->T1, val);
/* Determine success after the fact. */
t0 = tcg_temp_new_i64();
t1 = tcg_temp_new_i64();
tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
tcg_gen_or_i64(t0, t0, t1);
/* Update Z. */
gen_compute_eflags(s);
tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
/*
* Extract the result values for the register pair. We may do this
* unconditionally, because on success (Z=1), the old value matches
* the previous value in RDX:RAX.
*/
tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
}
#endif
#include "emit.c.inc"
static void gen_x87(DisasContext *s, X86DecodedInsn *decode)
@ -2962,29 +2864,10 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
/* now check op code */
switch (b) {
case 0x1c7: /* cmpxchg8b */
case 0x1c7: /* RDSEED, RDPID with f3 prefix */
mod = (modrm >> 6) & 3;
switch ((modrm >> 3) & 7) {
case 1: /* CMPXCHG8, CMPXCHG16 */
if (mod == 3) {
goto illegal_op;
}
#ifdef TARGET_X86_64
if (dflag == MO_64) {
if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
goto illegal_op;
}
gen_cmpxchg16b(s, decode);
break;
}
#endif
if (!(s->cpuid_features & CPUID_CX8)) {
goto illegal_op;
}
gen_cmpxchg8b(s, decode);
break;
case 7: /* RDSEED, RDPID with f3 prefix */
case 7:
if (mod != 3 ||
(s->prefix & (PREFIX_LOCK | PREFIX_REPNZ))) {
goto illegal_op;