mirror of https://github.com/xemu-project/xemu.git
target/i386: convert CMPXCHG8B/CMPXCHG16B to new decoder
The gen_cmpxchg8b and gen_cmpxchg16b functions even have the correct prototype already; the only thing that needs to be done is removing the gen_lea_modrm() call. This moves the last LOCK-enabled instructions to the new decoder. It is now possible to assume that gen_multi0F is called only after checking that PREFIX_LOCK was not specified. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
a2e2c78d2a
commit
fcd16539eb
|
@ -288,6 +288,25 @@ static void decode_group8(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
|
|||
}
|
||||
}
|
||||
|
||||
static void decode_group9(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
|
||||
{
|
||||
static const X86OpEntry group9_reg =
|
||||
X86_OP_ENTRY0(multi0F); /* unconverted */
|
||||
static const X86OpEntry cmpxchg8b =
|
||||
X86_OP_ENTRY1(CMPXCHG8B, M,q, lock p_00 cpuid(CX8));
|
||||
static const X86OpEntry cmpxchg16b =
|
||||
X86_OP_ENTRY1(CMPXCHG16B, M,dq, lock p_00 cpuid(CX16));
|
||||
|
||||
int modrm = get_modrm(s, env);
|
||||
int op = (modrm >> 3) & 7;
|
||||
|
||||
if ((modrm >> 6) == 3) {
|
||||
*entry = group9_reg;
|
||||
} else if (op == 1) {
|
||||
*entry = REX_W(s) ? cmpxchg16b : cmpxchg8b;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
|
||||
{
|
||||
static const X86OpEntry group15_reg[8] = {
|
||||
|
@ -1203,7 +1222,7 @@ static const X86OpEntry opcodes_0F[256] = {
|
|||
[0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
|
||||
[0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
|
||||
[0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
|
||||
[0xc7] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */
|
||||
[0xc7] = X86_OP_GROUP0(group9),
|
||||
|
||||
[0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
|
||||
[0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
|
||||
|
@ -2245,8 +2264,12 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
|
|||
return (s->cpuid_features & CPUID_CMOV);
|
||||
case X86_FEAT_CLFLUSH:
|
||||
return (s->cpuid_features & CPUID_CLFLUSH);
|
||||
case X86_FEAT_CX8:
|
||||
return (s->cpuid_features & CPUID_CX8);
|
||||
case X86_FEAT_FXSR:
|
||||
return (s->cpuid_features & CPUID_FXSR);
|
||||
case X86_FEAT_CX16:
|
||||
return (s->cpuid_ext_features & CPUID_EXT_CX16);
|
||||
case X86_FEAT_F16C:
|
||||
return (s->cpuid_ext_features & CPUID_EXT_F16C);
|
||||
case X86_FEAT_FMA:
|
||||
|
@ -2726,15 +2749,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
|
|||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* hack for old decoder: 0F C7 has both instructions that accept LOCK
|
||||
* and instructions that don't, but also needs X86_SPECIAL_NoLoadEA.
|
||||
* Keep this here until CMPXCHG8B/CMPXCHG16B is separated from the
|
||||
* other unconverted opcodes.
|
||||
*/
|
||||
if (decode.e.gen == gen_multi0F) {
|
||||
accept_lock = true;
|
||||
}
|
||||
if ((s->prefix & PREFIX_LOCK) && !accept_lock) {
|
||||
goto illegal_op;
|
||||
}
|
||||
|
|
|
@ -114,6 +114,8 @@ typedef enum X86CPUIDFeature {
|
|||
X86_FEAT_CLWB,
|
||||
X86_FEAT_CMOV,
|
||||
X86_FEAT_CMPCCXADD,
|
||||
X86_FEAT_CX8,
|
||||
X86_FEAT_CX16,
|
||||
X86_FEAT_F16C,
|
||||
X86_FEAT_FMA,
|
||||
X86_FEAT_FSGSBASE,
|
||||
|
|
|
@ -1788,6 +1788,102 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode)
|
|||
decode->cc_op = CC_OP_SUBB + ot;
|
||||
}
|
||||
|
||||
static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode)
|
||||
{
|
||||
#ifdef TARGET_X86_64
|
||||
MemOp mop = MO_TE | MO_128 | MO_ALIGN;
|
||||
TCGv_i64 t0, t1;
|
||||
TCGv_i128 cmp, val;
|
||||
|
||||
cmp = tcg_temp_new_i128();
|
||||
val = tcg_temp_new_i128();
|
||||
tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
|
||||
tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
|
||||
|
||||
/* Only require atomic with LOCK; non-parallel handled in generator. */
|
||||
if (s->prefix & PREFIX_LOCK) {
|
||||
tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
|
||||
} else {
|
||||
tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
|
||||
}
|
||||
|
||||
tcg_gen_extr_i128_i64(s->T0, s->T1, val);
|
||||
|
||||
/* Determine success after the fact. */
|
||||
t0 = tcg_temp_new_i64();
|
||||
t1 = tcg_temp_new_i64();
|
||||
tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
|
||||
tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
|
||||
tcg_gen_or_i64(t0, t0, t1);
|
||||
|
||||
/* Update Z. */
|
||||
gen_compute_eflags(s);
|
||||
tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
|
||||
tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
|
||||
|
||||
/*
|
||||
* Extract the result values for the register pair. We may do this
|
||||
* unconditionally, because on success (Z=1), the old value matches
|
||||
* the previous value in RDX:RAX.
|
||||
*/
|
||||
tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
|
||||
tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
|
||||
#else
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode)
|
||||
{
|
||||
TCGv_i64 cmp, val, old;
|
||||
TCGv Z;
|
||||
|
||||
cmp = tcg_temp_new_i64();
|
||||
val = tcg_temp_new_i64();
|
||||
old = tcg_temp_new_i64();
|
||||
|
||||
/* Construct the comparison values from the register pair. */
|
||||
tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
|
||||
tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
|
||||
|
||||
/* Only require atomic with LOCK; non-parallel handled in generator. */
|
||||
if (s->prefix & PREFIX_LOCK) {
|
||||
tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
|
||||
} else {
|
||||
tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
|
||||
s->mem_index, MO_TEUQ);
|
||||
}
|
||||
|
||||
/* Set tmp0 to match the required value of Z. */
|
||||
tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
|
||||
Z = tcg_temp_new();
|
||||
tcg_gen_trunc_i64_tl(Z, cmp);
|
||||
|
||||
/*
|
||||
* Extract the result values for the register pair.
|
||||
* For 32-bit, we may do this unconditionally, because on success (Z=1),
|
||||
* the old value matches the previous value in EDX:EAX. For x86_64,
|
||||
* the store must be conditional, because we must leave the source
|
||||
* registers unchanged on success, and zero-extend the writeback
|
||||
* on failure (Z=0).
|
||||
*/
|
||||
if (TARGET_LONG_BITS == 32) {
|
||||
tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
|
||||
} else {
|
||||
TCGv zero = tcg_constant_tl(0);
|
||||
|
||||
tcg_gen_extr_i64_tl(s->T0, s->T1, old);
|
||||
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
|
||||
s->T0, cpu_regs[R_EAX]);
|
||||
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
|
||||
s->T1, cpu_regs[R_EDX]);
|
||||
}
|
||||
|
||||
/* Update Z. */
|
||||
gen_compute_eflags(s);
|
||||
tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
|
||||
}
|
||||
|
||||
static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode)
|
||||
{
|
||||
gen_update_cc_op(s);
|
||||
|
|
|
@ -2289,104 +2289,6 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
|
|||
tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
|
||||
}
|
||||
|
||||
static void gen_cmpxchg8b(DisasContext *s, X86DecodedInsn *decode)
|
||||
{
|
||||
TCGv_i64 cmp, val, old;
|
||||
TCGv Z;
|
||||
|
||||
gen_lea_modrm(s, decode);
|
||||
|
||||
cmp = tcg_temp_new_i64();
|
||||
val = tcg_temp_new_i64();
|
||||
old = tcg_temp_new_i64();
|
||||
|
||||
/* Construct the comparison values from the register pair. */
|
||||
tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
|
||||
tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
|
||||
|
||||
/* Only require atomic with LOCK; non-parallel handled in generator. */
|
||||
if (s->prefix & PREFIX_LOCK) {
|
||||
tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
|
||||
} else {
|
||||
tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
|
||||
s->mem_index, MO_TEUQ);
|
||||
}
|
||||
|
||||
/* Set tmp0 to match the required value of Z. */
|
||||
tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
|
||||
Z = tcg_temp_new();
|
||||
tcg_gen_trunc_i64_tl(Z, cmp);
|
||||
|
||||
/*
|
||||
* Extract the result values for the register pair.
|
||||
* For 32-bit, we may do this unconditionally, because on success (Z=1),
|
||||
* the old value matches the previous value in EDX:EAX. For x86_64,
|
||||
* the store must be conditional, because we must leave the source
|
||||
* registers unchanged on success, and zero-extend the writeback
|
||||
* on failure (Z=0).
|
||||
*/
|
||||
if (TARGET_LONG_BITS == 32) {
|
||||
tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
|
||||
} else {
|
||||
TCGv zero = tcg_constant_tl(0);
|
||||
|
||||
tcg_gen_extr_i64_tl(s->T0, s->T1, old);
|
||||
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
|
||||
s->T0, cpu_regs[R_EAX]);
|
||||
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
|
||||
s->T1, cpu_regs[R_EDX]);
|
||||
}
|
||||
|
||||
/* Update Z. */
|
||||
gen_compute_eflags(s);
|
||||
tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
|
||||
}
|
||||
|
||||
#ifdef TARGET_X86_64
|
||||
static void gen_cmpxchg16b(DisasContext *s, X86DecodedInsn *decode)
|
||||
{
|
||||
MemOp mop = MO_TE | MO_128 | MO_ALIGN;
|
||||
TCGv_i64 t0, t1;
|
||||
TCGv_i128 cmp, val;
|
||||
|
||||
gen_lea_modrm(s, decode);
|
||||
|
||||
cmp = tcg_temp_new_i128();
|
||||
val = tcg_temp_new_i128();
|
||||
tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
|
||||
tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
|
||||
|
||||
/* Only require atomic with LOCK; non-parallel handled in generator. */
|
||||
if (s->prefix & PREFIX_LOCK) {
|
||||
tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
|
||||
} else {
|
||||
tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
|
||||
}
|
||||
|
||||
tcg_gen_extr_i128_i64(s->T0, s->T1, val);
|
||||
|
||||
/* Determine success after the fact. */
|
||||
t0 = tcg_temp_new_i64();
|
||||
t1 = tcg_temp_new_i64();
|
||||
tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
|
||||
tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
|
||||
tcg_gen_or_i64(t0, t0, t1);
|
||||
|
||||
/* Update Z. */
|
||||
gen_compute_eflags(s);
|
||||
tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
|
||||
tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
|
||||
|
||||
/*
|
||||
* Extract the result values for the register pair. We may do this
|
||||
* unconditionally, because on success (Z=1), the old value matches
|
||||
* the previous value in RDX:RAX.
|
||||
*/
|
||||
tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
|
||||
tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "emit.c.inc"
|
||||
|
||||
static void gen_x87(DisasContext *s, X86DecodedInsn *decode)
|
||||
|
@ -2962,29 +2864,10 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
|
|||
|
||||
/* now check op code */
|
||||
switch (b) {
|
||||
case 0x1c7: /* cmpxchg8b */
|
||||
case 0x1c7: /* RDSEED, RDPID with f3 prefix */
|
||||
mod = (modrm >> 6) & 3;
|
||||
switch ((modrm >> 3) & 7) {
|
||||
case 1: /* CMPXCHG8, CMPXCHG16 */
|
||||
if (mod == 3) {
|
||||
goto illegal_op;
|
||||
}
|
||||
#ifdef TARGET_X86_64
|
||||
if (dflag == MO_64) {
|
||||
if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
|
||||
goto illegal_op;
|
||||
}
|
||||
gen_cmpxchg16b(s, decode);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
if (!(s->cpuid_features & CPUID_CX8)) {
|
||||
goto illegal_op;
|
||||
}
|
||||
gen_cmpxchg8b(s, decode);
|
||||
break;
|
||||
|
||||
case 7: /* RDSEED, RDPID with f3 prefix */
|
||||
case 7:
|
||||
if (mod != 3 ||
|
||||
(s->prefix & (PREFIX_LOCK | PREFIX_REPNZ))) {
|
||||
goto illegal_op;
|
||||
|
|
Loading…
Reference in New Issue