xemu/target/i386/tcg/emit.c.inc

/*
 * New-style TCG opcode generator for i386 instructions
 *
 *  Copyright (c) 2022 Red Hat, Inc.
 *
 * Author: Paolo Bonzini <pbonzini@redhat.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */

static void gen_illegal(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
    gen_illegal_opcode(s);
}

static void gen_load_ea(DisasContext *s, AddressParts *mem)
{
    TCGv ea = gen_lea_modrm_1(s, *mem);
    gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override);
}

static inline int mmx_offset(MemOp ot)
{
    switch (ot) {
    case MO_8:
        return offsetof(MMXReg, MMX_B(0));
    case MO_16:
        return offsetof(MMXReg, MMX_W(0));
    case MO_32:
        return offsetof(MMXReg, MMX_L(0));
    case MO_64:
        return offsetof(MMXReg, MMX_Q(0));
    default:
        g_assert_not_reached();
    }
}

static inline int xmm_offset(MemOp ot)
{
    switch (ot) {
    case MO_8:
        return offsetof(ZMMReg, ZMM_B(0));
    case MO_16:
        return offsetof(ZMMReg, ZMM_W(0));
    case MO_32:
        return offsetof(ZMMReg, ZMM_L(0));
    case MO_64:
        return offsetof(ZMMReg, ZMM_Q(0));
    case MO_128:
        return offsetof(ZMMReg, ZMM_X(0));
    case MO_256:
        return offsetof(ZMMReg, ZMM_Y(0));
    default:
        g_assert_not_reached();
    }
}

static void compute_mmx_offset(X86DecodedOp *op)
{
    if (!op->has_ea) {
        op->offset = offsetof(CPUX86State, fpregs[op->n].mmx) + mmx_offset(op->ot);
    } else {
        op->offset = offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot);
    }
}

static void compute_xmm_offset(X86DecodedOp *op)
{
    if (!op->has_ea) {
        op->offset = ZMM_OFFSET(op->n) + xmm_offset(op->ot);
    } else {
        op->offset = offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot);
    }
}

static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, bool aligned)
{
    switch(ot) {
    case MO_8:
        gen_op_ld_v(s, MO_8, temp, s->A0);
        tcg_gen_st8_tl(temp, cpu_env, dest_ofs);
        break;
    case MO_16:
        gen_op_ld_v(s, MO_16, temp, s->A0);
        tcg_gen_st16_tl(temp, cpu_env, dest_ofs);
        break;
    case MO_32:
        gen_op_ld_v(s, MO_32, temp, s->A0);
        tcg_gen_st32_tl(temp, cpu_env, dest_ofs);
        break;
    case MO_64:
        gen_ldq_env_A0(s, dest_ofs);
        break;
    case MO_128:
        gen_ldo_env_A0(s, dest_ofs, aligned);
        break;
    case MO_256:
        gen_ldy_env_A0(s, dest_ofs, aligned);
        break;
    default:
        g_assert_not_reached();
    }
}

static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
{
    X86DecodedOp *op = &decode->op[opn];

    switch (op->unit) {
    case X86_OP_SKIP:
        return;
    case X86_OP_SEG:
        tcg_gen_ld32u_tl(v, cpu_env,
                         offsetof(CPUX86State,segs[op->n].selector));
        break;
    case X86_OP_CR:
        tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, cr[op->n]));
        break;
    case X86_OP_DR:
        tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, dr[op->n]));
        break;
    case X86_OP_INT:
        if (op->has_ea) {
            gen_op_ld_v(s, op->ot, v, s->A0);
        } else {
            gen_op_mov_v_reg(s, op->ot, v, op->n);
        }
        break;
    case X86_OP_IMM:
        tcg_gen_movi_tl(v, decode->immediate);
        break;

    case X86_OP_MMX:
        compute_mmx_offset(op);
        goto load_vector;

    case X86_OP_SSE:
        compute_xmm_offset(op);
    load_vector:
        if (op->has_ea) {
            gen_load_sse(s, v, op->ot, op->offset, true);
        }
        break;

    default:
        g_assert_not_reached();
    }
}

static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
{
    X86DecodedOp *op = &decode->op[opn];
    switch (op->unit) {
    case X86_OP_SKIP:
        break;
    case X86_OP_SEG:
        /* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF.  */
        gen_movl_seg_T0(s, op->n);
        break;
    case X86_OP_INT:
        if (op->has_ea) {
            gen_op_st_v(s, op->ot, v, s->A0);
        } else {
            gen_op_mov_reg_v(s, op->ot, op->n, v);
        }
        break;
    case X86_OP_MMX:
    case X86_OP_SSE:
        break;
    case X86_OP_CR:
    case X86_OP_DR:
    default:
        g_assert_not_reached();
    }
}
target/i386: add core of new i386 decoder The new decoder is based on three principles: - use mostly table-driven decoding, using tables derived as much as possible from the Intel manual. Centralizing the decode the operands makes it more homogeneous, for example all immediates are signed. All modrm handling is in one function, and can be shared between SSE and ALU instructions (including XMM<->GPR instructions). The SSE/AVX decoder will also not have duplicated code between the 0F, 0F38 and 0F3A tables. - keep the code as "non-branchy" as possible. Generally, the code for the new decoder is more verbose, but the control flow is simpler. Conditionals are not nested and have small bodies. All instruction groups are resolved even before operands are decoded, and code generation is separated as much as possible within small functions that only handle one instruction each. - keep address generation and (for ALU operands) memory loads and writeback as much in common code as possible. All ALU operations for example are implemented as T0=f(T0,T1). For non-ALU instructions, read-modify-write memory operations are rare, but registers do not have TCGv equivalents: therefore, the common logic sets up pointer temporaries with the operands, while load and writeback are handled by gvec or by helpers. These principles make future code review and extensibility simpler, at the cost of having a relatively large amount of code in the form of this patch. Even EVEX should not be _too_ hard to implement (it's just a crazy large amount of possibilities). This patch introduces the main decoder flow, and integrates the old decoder with the new one. The old decoder takes care of parsing prefixes and then optionally drops to the new one. The changes to the old decoder are minimal and allow it to be replaced incrementally with the new one. There is a debugging mechanism through a "LIMIT" environment variable. In user-mode emulation, the variable is the number of instructions decoded by the new decoder before permanently switching to the old one. In system emulation, the variable is the highest opcode that is decoded by the new decoder (this is less friendly, but it's the best that can be done without requiring deterministic execution). Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2022-08-23 09:20:55 +00:00			`/*`
			`* New-style TCG opcode generator for i386 instructions`
			`*`
			`* Copyright (c) 2022 Red Hat, Inc.`
			`*`
			`* Author: Paolo Bonzini <pbonzini@redhat.com>`
			`*`
			`* This library is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* This library is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with this library; if not, see <http://www.gnu.org/licenses/>.`
			`*/`

			`static void gen_illegal(DisasContext s, CPUX86State env, X86DecodedInsn *decode)`
			`{`
			`gen_illegal_opcode(s);`
			`}`

			`static void gen_load_ea(DisasContext s, AddressParts mem)`
			`{`
			`TCGv ea = gen_lea_modrm_1(s, *mem);`
			`gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override);`
			`}`
target/i386: add ALU load/writeback core Add generic code generation that takes care of preparing operands around calls to decode.e.gen in a table-driven manner, so that ALU operations need not take care of that. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2022-08-23 12:55:56 +00:00
			`static inline int mmx_offset(MemOp ot)`
			`{`
			`switch (ot) {`
			`case MO_8:`
			`return offsetof(MMXReg, MMX_B(0));`
			`case MO_16:`
			`return offsetof(MMXReg, MMX_W(0));`
			`case MO_32:`
			`return offsetof(MMXReg, MMX_L(0));`
			`case MO_64:`
			`return offsetof(MMXReg, MMX_Q(0));`
			`default:`
			`g_assert_not_reached();`
			`}`
			`}`

			`static inline int xmm_offset(MemOp ot)`
			`{`
			`switch (ot) {`
			`case MO_8:`
			`return offsetof(ZMMReg, ZMM_B(0));`
			`case MO_16:`
			`return offsetof(ZMMReg, ZMM_W(0));`
			`case MO_32:`
			`return offsetof(ZMMReg, ZMM_L(0));`
			`case MO_64:`
			`return offsetof(ZMMReg, ZMM_Q(0));`
			`case MO_128:`
			`return offsetof(ZMMReg, ZMM_X(0));`
			`case MO_256:`
			`return offsetof(ZMMReg, ZMM_Y(0));`
			`default:`
			`g_assert_not_reached();`
			`}`
			`}`

			`static void compute_mmx_offset(X86DecodedOp *op)`
			`{`
			`if (!op->has_ea) {`
			`op->offset = offsetof(CPUX86State, fpregs[op->n].mmx) + mmx_offset(op->ot);`
			`} else {`
			`op->offset = offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot);`
			`}`
			`}`

			`static void compute_xmm_offset(X86DecodedOp *op)`
			`{`
			`if (!op->has_ea) {`
			`op->offset = ZMM_OFFSET(op->n) + xmm_offset(op->ot);`
			`} else {`
			`op->offset = offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot);`
			`}`
			`}`

			`static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, bool aligned)`
			`{`
			`switch(ot) {`
			`case MO_8:`
			`gen_op_ld_v(s, MO_8, temp, s->A0);`
			`tcg_gen_st8_tl(temp, cpu_env, dest_ofs);`
			`break;`
			`case MO_16:`
			`gen_op_ld_v(s, MO_16, temp, s->A0);`
			`tcg_gen_st16_tl(temp, cpu_env, dest_ofs);`
			`break;`
			`case MO_32:`
			`gen_op_ld_v(s, MO_32, temp, s->A0);`
			`tcg_gen_st32_tl(temp, cpu_env, dest_ofs);`
			`break;`
			`case MO_64:`
			`gen_ldq_env_A0(s, dest_ofs);`
			`break;`
			`case MO_128:`
			`gen_ldo_env_A0(s, dest_ofs, aligned);`
			`break;`
			`case MO_256:`
			`gen_ldy_env_A0(s, dest_ofs, aligned);`
			`break;`
			`default:`
			`g_assert_not_reached();`
			`}`
			`}`

			`static void gen_load(DisasContext s, X86DecodedInsn decode, int opn, TCGv v)`
			`{`
			`X86DecodedOp *op = &decode->op[opn];`

			`switch (op->unit) {`
			`case X86_OP_SKIP:`
			`return;`
			`case X86_OP_SEG:`
			`tcg_gen_ld32u_tl(v, cpu_env,`
			`offsetof(CPUX86State,segs[op->n].selector));`
			`break;`
			`case X86_OP_CR:`
			`tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, cr[op->n]));`
			`break;`
			`case X86_OP_DR:`
			`tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, dr[op->n]));`
			`break;`
			`case X86_OP_INT:`
			`if (op->has_ea) {`
			`gen_op_ld_v(s, op->ot, v, s->A0);`
			`} else {`
			`gen_op_mov_v_reg(s, op->ot, v, op->n);`
			`}`
			`break;`
			`case X86_OP_IMM:`
			`tcg_gen_movi_tl(v, decode->immediate);`
			`break;`

			`case X86_OP_MMX:`
			`compute_mmx_offset(op);`
			`goto load_vector;`

			`case X86_OP_SSE:`
			`compute_xmm_offset(op);`
			`load_vector:`
			`if (op->has_ea) {`
			`gen_load_sse(s, v, op->ot, op->offset, true);`
			`}`
			`break;`

			`default:`
			`g_assert_not_reached();`
			`}`
			`}`

			`static void gen_writeback(DisasContext s, X86DecodedInsn decode, int opn, TCGv v)`
			`{`
			`X86DecodedOp *op = &decode->op[opn];`
			`switch (op->unit) {`
			`case X86_OP_SKIP:`
			`break;`
			`case X86_OP_SEG:`
			`/* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF. */`
			`gen_movl_seg_T0(s, op->n);`
			`break;`
			`case X86_OP_INT:`
			`if (op->has_ea) {`
			`gen_op_st_v(s, op->ot, v, s->A0);`
			`} else {`
			`gen_op_mov_reg_v(s, op->ot, op->n, v);`
			`}`
			`break;`
			`case X86_OP_MMX:`
			`case X86_OP_SSE:`
			`break;`
			`case X86_OP_CR:`
			`case X86_OP_DR:`
			`default:`
			`g_assert_not_reached();`
			`}`
			`}`