target/i386: convert SHLD/SHRD to new decoder

Use the same flag generation code as SHL and SHR, but use
the existing gen_shiftd_rm_T1 function to compute the result
as well as CC_SRC.

Decoding-wise, SHLD/SHRD by immediate count as a 4 operand
instruction because s->T0 and s->T1 actually occupy three op
slots.  The infrastructure used by opcodes in the 0F 3A table
works fine.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2024-05-09 11:46:59 +02:00
parent e4e5981daf
commit 6476902740
3 changed files with 50 additions and 84 deletions

View File

@ -1118,6 +1118,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
[0xa1] = X86_OP_ENTRYw(POP, FS, w),
[0xa2] = X86_OP_ENTRY0(CPUID),
[0xa4] = X86_OP_ENTRY4(SHLD, E,v, 2op,v, G,v),
[0xa5] = X86_OP_ENTRY3(SHLD, E,v, 2op,v, G,v),
[0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None),
[0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None),
@ -1244,6 +1246,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa8] = X86_OP_ENTRYr(PUSH, GS, w),
[0xa9] = X86_OP_ENTRYw(POP, GS, w),
[0xaa] = X86_OP_ENTRY0(RSM, chk(smm) svm(RSM)),
[0xac] = X86_OP_ENTRY4(SHRD, E,v, 2op,v, G,v),
[0xad] = X86_OP_ENTRY3(SHRD, E,v, 2op,v, G,v),
[0xae] = X86_OP_GROUP0(group15),
/*
* It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
@ -2540,8 +2544,8 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
switch (b) {
case 0x00 ... 0x01: /* mostly privileged instructions */
case 0x1a ... 0x1b: /* MPX */
case 0xa3 ... 0xa5: /* BT, SHLD */
case 0xab ... 0xad: /* BTS, SHRD */
case 0xa3: /* bt */
case 0xab: /* bts */
case 0xb0 ... 0xb1: /* cmpxchg */
case 0xb3: /* btr */
case 0xb8: /* integer ops */

View File

@ -3588,6 +3588,27 @@ static void gen_SHL(DisasContext *s, X86DecodedInsn *decode)
}
}
static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT;
MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit);
if (!count) {
return;
}
decode->cc_dst = s->T0;
decode->cc_src = s->tmp0;
gen_shiftd_rm_T1(s, ot, false, count);
if (can_be_zero) {
gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot);
} else {
decode->cc_op = CC_OP_SHLB + ot;
}
}
static void gen_SHLX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@ -3620,6 +3641,27 @@ static void gen_SHR(DisasContext *s, X86DecodedInsn *decode)
}
}
static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT;
MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit);
if (!count) {
return;
}
decode->cc_dst = s->T0;
decode->cc_src = s->tmp0;
gen_shiftd_rm_T1(s, ot, true, count);
if (can_be_zero) {
gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot);
} else {
decode->cc_op = CC_OP_SARB + ot;
}
}
static void gen_SHRX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;

View File

@ -1434,57 +1434,11 @@ static bool check_cpl0(DisasContext *s)
return false;
}
static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
TCGv shm1, TCGv count, bool is_right)
{
TCGv_i32 z32, s32, oldop;
TCGv z_tl;
/* Store the results into the CC variables. If we know that the
variable must be dead, store unconditionally. Otherwise we'll
need to not disrupt the current contents. */
z_tl = tcg_constant_tl(0);
if (cc_op_live[s->cc_op] & USES_CC_DST) {
tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
result, cpu_cc_dst);
} else {
tcg_gen_mov_tl(cpu_cc_dst, result);
}
if (cc_op_live[s->cc_op] & USES_CC_SRC) {
tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
shm1, cpu_cc_src);
} else {
tcg_gen_mov_tl(cpu_cc_src, shm1);
}
/* Get the two potential CC_OP values into temporaries. */
tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
if (s->cc_op == CC_OP_DYNAMIC) {
oldop = cpu_cc_op;
} else {
tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
oldop = s->tmp3_i32;
}
/* Conditionally store the CC_OP value. */
z32 = tcg_constant_i32(0);
s32 = tcg_temp_new_i32();
tcg_gen_trunc_tl_i32(s32, count);
tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
/* The CC_OP value is no longer predictable. */
set_cc_op(s, CC_OP_DYNAMIC);
}
/* XXX: add faster immediate case */
static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
bool is_right, TCGv count_in)
static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
bool is_right, TCGv count)
{
target_ulong mask = (ot == MO_64 ? 63 : 31);
TCGv count;
count = tcg_temp_new();
tcg_gen_andi_tl(count, count_in, mask);
switch (ot) {
case MO_16:
@ -1546,8 +1500,6 @@ static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
tcg_gen_or_tl(s->T0, s->T0, s->T1);
break;
}
return count;
}
#define X86_MAX_INSN_LENGTH 15
@ -3057,7 +3009,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
CPUX86State *env = cpu_env(cpu);
int prefixes = s->prefix;
MemOp dflag = s->dflag;
TCGv shift;
MemOp ot;
int modrm, reg, rm, mod, op, val;
@ -3221,37 +3172,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
break;
/**************************/
/* shifts */
case 0x1a4: /* shld imm */
op = 0;
shift = NULL;
goto do_shiftd;
case 0x1a5: /* shld cl */
op = 0;
shift = cpu_regs[R_ECX];
goto do_shiftd;
case 0x1ac: /* shrd imm */
op = 1;
shift = NULL;
goto do_shiftd;
case 0x1ad: /* shrd cl */
op = 1;
shift = cpu_regs[R_ECX];
do_shiftd:
ot = dflag;
modrm = x86_ldub_code(env, s);
reg = ((modrm >> 3) & 7) | REX_R(s);
gen_ld_modrm(env, s, modrm, ot);
if (!shift) {
shift = tcg_constant_tl(x86_ldub_code(env, s));
}
gen_op_mov_v_reg(s, ot, s->T1, reg);
shift = gen_shiftd_rm_T1(s, ot, op, shift);
gen_st_modrm(env, s, modrm, ot);
gen_shift_flags(s, ot, s->T0, s->tmp0, shift, op);
break;
/************************/
/* bit operations */
case 0x1ba: /* bt/bts/btr/btc Gv, im */