target/i386: add core of new i386 decoder
The new decoder is based on three principles:
- use mostly table-driven decoding, using tables derived as much as possible
from the Intel manual. Centralizing the decode the operands makes it
more homogeneous, for example all immediates are signed. All modrm
handling is in one function, and can be shared between SSE and ALU
instructions (including XMM<->GPR instructions). The SSE/AVX decoder
will also not have duplicated code between the 0F, 0F38 and 0F3A tables.
- keep the code as "non-branchy" as possible. Generally, the code for
the new decoder is more verbose, but the control flow is simpler.
Conditionals are not nested and have small bodies. All instruction
groups are resolved even before operands are decoded, and code
generation is separated as much as possible within small functions
that only handle one instruction each.
- keep address generation and (for ALU operands) memory loads and writeback
as much in common code as possible. All ALU operations for example
are implemented as T0=f(T0,T1). For non-ALU instructions,
read-modify-write memory operations are rare, but registers do not
have TCGv equivalents: therefore, the common logic sets up pointer
temporaries with the operands, while load and writeback are handled
by gvec or by helpers.
These principles make future code review and extensibility simpler, at
the cost of having a relatively large amount of code in the form of this
patch. Even EVEX should not be _too_ hard to implement (it's just a crazy
large amount of possibilities).
This patch introduces the main decoder flow, and integrates the old
decoder with the new one. The old decoder takes care of parsing
prefixes and then optionally drops to the new one. The changes to the
old decoder are minimal and allow it to be replaced incrementally with
the new one.
There is a debugging mechanism through a "LIMIT" environment variable.
In user-mode emulation, the variable is the number of instructions
decoded by the new decoder before permanently switching to the old one.
In system emulation, the variable is the highest opcode that is decoded
by the new decoder (this is less friendly, but it's the best that can
be done without requiring deterministic execution).
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-08-23 09:20:55 +00:00
|
|
|
/*
|
|
|
|
* New-style TCG opcode generator for i386 instructions
|
|
|
|
*
|
|
|
|
* Copyright (c) 2022 Red Hat, Inc.
|
|
|
|
*
|
|
|
|
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void gen_illegal(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
|
|
|
|
{
|
|
|
|
gen_illegal_opcode(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gen_load_ea(DisasContext *s, AddressParts *mem)
|
|
|
|
{
|
|
|
|
TCGv ea = gen_lea_modrm_1(s, *mem);
|
|
|
|
gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override);
|
|
|
|
}
|
2022-08-23 12:55:56 +00:00
|
|
|
|
|
|
|
static inline int mmx_offset(MemOp ot)
|
|
|
|
{
|
|
|
|
switch (ot) {
|
|
|
|
case MO_8:
|
|
|
|
return offsetof(MMXReg, MMX_B(0));
|
|
|
|
case MO_16:
|
|
|
|
return offsetof(MMXReg, MMX_W(0));
|
|
|
|
case MO_32:
|
|
|
|
return offsetof(MMXReg, MMX_L(0));
|
|
|
|
case MO_64:
|
|
|
|
return offsetof(MMXReg, MMX_Q(0));
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int xmm_offset(MemOp ot)
|
|
|
|
{
|
|
|
|
switch (ot) {
|
|
|
|
case MO_8:
|
|
|
|
return offsetof(ZMMReg, ZMM_B(0));
|
|
|
|
case MO_16:
|
|
|
|
return offsetof(ZMMReg, ZMM_W(0));
|
|
|
|
case MO_32:
|
|
|
|
return offsetof(ZMMReg, ZMM_L(0));
|
|
|
|
case MO_64:
|
|
|
|
return offsetof(ZMMReg, ZMM_Q(0));
|
|
|
|
case MO_128:
|
|
|
|
return offsetof(ZMMReg, ZMM_X(0));
|
|
|
|
case MO_256:
|
|
|
|
return offsetof(ZMMReg, ZMM_Y(0));
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void compute_mmx_offset(X86DecodedOp *op)
|
|
|
|
{
|
|
|
|
if (!op->has_ea) {
|
|
|
|
op->offset = offsetof(CPUX86State, fpregs[op->n].mmx) + mmx_offset(op->ot);
|
|
|
|
} else {
|
|
|
|
op->offset = offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void compute_xmm_offset(X86DecodedOp *op)
|
|
|
|
{
|
|
|
|
if (!op->has_ea) {
|
|
|
|
op->offset = ZMM_OFFSET(op->n) + xmm_offset(op->ot);
|
|
|
|
} else {
|
|
|
|
op->offset = offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, bool aligned)
|
|
|
|
{
|
|
|
|
switch(ot) {
|
|
|
|
case MO_8:
|
|
|
|
gen_op_ld_v(s, MO_8, temp, s->A0);
|
|
|
|
tcg_gen_st8_tl(temp, cpu_env, dest_ofs);
|
|
|
|
break;
|
|
|
|
case MO_16:
|
|
|
|
gen_op_ld_v(s, MO_16, temp, s->A0);
|
|
|
|
tcg_gen_st16_tl(temp, cpu_env, dest_ofs);
|
|
|
|
break;
|
|
|
|
case MO_32:
|
|
|
|
gen_op_ld_v(s, MO_32, temp, s->A0);
|
|
|
|
tcg_gen_st32_tl(temp, cpu_env, dest_ofs);
|
|
|
|
break;
|
|
|
|
case MO_64:
|
|
|
|
gen_ldq_env_A0(s, dest_ofs);
|
|
|
|
break;
|
|
|
|
case MO_128:
|
|
|
|
gen_ldo_env_A0(s, dest_ofs, aligned);
|
|
|
|
break;
|
|
|
|
case MO_256:
|
|
|
|
gen_ldy_env_A0(s, dest_ofs, aligned);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
|
|
|
|
{
|
|
|
|
X86DecodedOp *op = &decode->op[opn];
|
|
|
|
|
|
|
|
switch (op->unit) {
|
|
|
|
case X86_OP_SKIP:
|
|
|
|
return;
|
|
|
|
case X86_OP_SEG:
|
|
|
|
tcg_gen_ld32u_tl(v, cpu_env,
|
|
|
|
offsetof(CPUX86State,segs[op->n].selector));
|
|
|
|
break;
|
|
|
|
case X86_OP_CR:
|
|
|
|
tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, cr[op->n]));
|
|
|
|
break;
|
|
|
|
case X86_OP_DR:
|
|
|
|
tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, dr[op->n]));
|
|
|
|
break;
|
|
|
|
case X86_OP_INT:
|
|
|
|
if (op->has_ea) {
|
|
|
|
gen_op_ld_v(s, op->ot, v, s->A0);
|
|
|
|
} else {
|
|
|
|
gen_op_mov_v_reg(s, op->ot, v, op->n);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case X86_OP_IMM:
|
|
|
|
tcg_gen_movi_tl(v, decode->immediate);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86_OP_MMX:
|
|
|
|
compute_mmx_offset(op);
|
|
|
|
goto load_vector;
|
|
|
|
|
|
|
|
case X86_OP_SSE:
|
|
|
|
compute_xmm_offset(op);
|
|
|
|
load_vector:
|
|
|
|
if (op->has_ea) {
|
|
|
|
gen_load_sse(s, v, op->ot, op->offset, true);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
|
|
|
|
{
|
|
|
|
X86DecodedOp *op = &decode->op[opn];
|
|
|
|
switch (op->unit) {
|
|
|
|
case X86_OP_SKIP:
|
|
|
|
break;
|
|
|
|
case X86_OP_SEG:
|
|
|
|
/* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF. */
|
|
|
|
gen_movl_seg_T0(s, op->n);
|
|
|
|
break;
|
|
|
|
case X86_OP_INT:
|
|
|
|
if (op->has_ea) {
|
|
|
|
gen_op_st_v(s, op->ot, v, s->A0);
|
|
|
|
} else {
|
|
|
|
gen_op_mov_reg_v(s, op->ot, op->n, v);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case X86_OP_MMX:
|
|
|
|
case X86_OP_SSE:
|
|
|
|
break;
|
|
|
|
case X86_OP_CR:
|
|
|
|
case X86_OP_DR:
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
}
|