diff --git a/tcg/tcg.c b/tcg/tcg.c index 057423c121..88fe01f59f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -181,6 +181,22 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct); static int tcg_out_ldst_finalize(TCGContext *s); #endif +typedef struct TCGLdstHelperParam { + TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); + unsigned ntmp; + int tmp[3]; +} TCGLdstHelperParam; + +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, + const TCGLdstHelperParam *p) + __attribute__((unused)); +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, + bool load_sign, const TCGLdstHelperParam *p) + __attribute__((unused)); +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, + const TCGLdstHelperParam *p) + __attribute__((unused)); + TCGContext tcg_init_ctx; __thread TCGContext *tcg_ctx; @@ -366,8 +382,17 @@ void tcg_raise_tb_overflow(TCGContext *s) siglongjmp(s->jmp_trans, -2); } +/* + * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. + * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. + * + * However, tcg_out_helper_load_slots reuses this field to hold an + * argument slot number (which may designate a argument register or an + * argument stack slot), converting to TCGReg once all arguments that + * are destined for the stack are processed. + */ typedef struct TCGMovExtend { - TCGReg dst; + unsigned dst; TCGReg src; TCGType dst_type; TCGType src_type; @@ -459,9 +484,8 @@ static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) * between the sources and destinations. */ -static void __attribute__((unused)) -tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, - const TCGMovExtend *i2, int scratch) +static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, + const TCGMovExtend *i2, int scratch) { TCGReg src1 = i1->src; TCGReg src2 = i2->src; @@ -715,6 +739,58 @@ static TCGHelperInfo all_helpers[] = { }; static GHashTable *helper_table; +/* + * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, + * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. + * We only use these for layout in tcg_out_ld_helper_ret and + * tcg_out_st_helper_args, and share them between several of + * the helpers, with the end result that it's easier to build manually. + */ + +#if TCG_TARGET_REG_BITS == 32 +# define dh_typecode_ttl dh_typecode_i32 +#else +# define dh_typecode_ttl dh_typecode_i64 +#endif + +static TCGHelperInfo info_helper_ld32_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* unsigned oi */ + | dh_typemask(ptr, 4) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_ld64_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(i64, 0) /* return uint64_t */ + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* unsigned oi */ + | dh_typemask(ptr, 4) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_st32_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(void, 0) + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* uint32_t data */ + | dh_typemask(i32, 4) /* unsigned oi */ + | dh_typemask(ptr, 5) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_st64_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(void, 0) + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i64, 3) /* uint64_t data */ + | dh_typemask(i32, 4) /* unsigned oi */ + | dh_typemask(ptr, 5) /* uintptr_t ra */ +}; + #ifdef CONFIG_TCG_INTERPRETER static ffi_type *typecode_to_ffi(int argmask) { @@ -1126,6 +1202,11 @@ static void tcg_context_init(unsigned max_cpus) (gpointer)&all_helpers[i]); } + init_call_layout(&info_helper_ld32_mmu); + init_call_layout(&info_helper_ld64_mmu); + init_call_layout(&info_helper_st32_mmu); + init_call_layout(&info_helper_st64_mmu); + #ifdef CONFIG_TCG_INTERPRETER init_ffi_layouts(); #endif @@ -5011,6 +5092,392 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) } } +/* + * Similarly for qemu_ld/st slow path helpers. + * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, + * using only the provided backend tcg_out_* functions. + */ + +static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) +{ + int ofs = arg_slot_stk_ofs(slot); + + /* + * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not + * require extension to uint64_t, adjust the address for uint32_t. + */ + if (HOST_BIG_ENDIAN && + TCG_TARGET_REG_BITS == 64 && + type == TCG_TYPE_I32) { + ofs += 4; + } + return ofs; +} + +static void tcg_out_helper_load_regs(TCGContext *s, + unsigned nmov, TCGMovExtend *mov, + unsigned ntmp, const int *tmp) +{ + switch (nmov) { + default: + /* The backend must have provided enough temps for the worst case. */ + tcg_debug_assert(ntmp + 1 >= nmov); + + for (unsigned i = nmov - 1; i >= 2; --i) { + TCGReg dst = mov[i].dst; + + for (unsigned j = 0; j < i; ++j) { + if (dst == mov[j].src) { + /* + * Conflict. + * Copy the source to a temporary, recurse for the + * remaining moves, perform the extension from our + * scratch on the way out. + */ + TCGReg scratch = tmp[--ntmp]; + tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src); + mov[i].src = scratch; + + tcg_out_helper_load_regs(s, i, mov, ntmp, tmp); + tcg_out_movext1(s, &mov[i]); + return; + } + } + + /* No conflicts: perform this move and continue. */ + tcg_out_movext1(s, &mov[i]); + } + /* fall through for the final two moves */ + + case 2: + tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1); + return; + case 1: + tcg_out_movext1(s, mov); + return; + case 0: + g_assert_not_reached(); + } +} + +static void tcg_out_helper_load_slots(TCGContext *s, + unsigned nmov, TCGMovExtend *mov, + const TCGLdstHelperParam *parm) +{ + unsigned i; + + /* + * Start from the end, storing to the stack first. + * This frees those registers, so we need not consider overlap. + */ + for (i = nmov; i-- > 0; ) { + unsigned slot = mov[i].dst; + + if (arg_slot_reg_p(slot)) { + goto found_reg; + } + + TCGReg src = mov[i].src; + TCGType dst_type = mov[i].dst_type; + MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; + + /* The argument is going onto the stack; extend into scratch. */ + if ((mov[i].src_ext & MO_SIZE) != dst_mo) { + tcg_debug_assert(parm->ntmp != 0); + mov[i].dst = src = parm->tmp[0]; + tcg_out_movext1(s, &mov[i]); + } + + tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, + tcg_out_helper_stk_ofs(dst_type, slot)); + } + return; + + found_reg: + /* + * The remaining arguments are in registers. + * Convert slot numbers to argument registers. + */ + nmov = i + 1; + for (i = 0; i < nmov; ++i) { + mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; + } + tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp); +} + +static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, + TCGType type, tcg_target_long imm, + const TCGLdstHelperParam *parm) +{ + if (arg_slot_reg_p(slot)) { + tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); + } else { + int ofs = tcg_out_helper_stk_ofs(type, slot); + if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { + tcg_debug_assert(parm->ntmp != 0); + tcg_out_movi(s, type, parm->tmp[0], imm); + tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); + } + } +} + +static void tcg_out_helper_load_common_args(TCGContext *s, + const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm, + const TCGHelperInfo *info, + unsigned next_arg) +{ + TCGMovExtend ptr_mov = { + .dst_type = TCG_TYPE_PTR, + .src_type = TCG_TYPE_PTR, + .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 + }; + const TCGCallArgumentLoc *loc = &info->in[0]; + TCGType type; + unsigned slot; + tcg_target_ulong imm; + + /* + * Handle env, which is always first. + */ + ptr_mov.dst = loc->arg_slot; + ptr_mov.src = TCG_AREG0; + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); + + /* + * Handle oi. + */ + imm = ldst->oi; + loc = &info->in[next_arg]; + type = TCG_TYPE_I32; + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + break; + case TCG_CALL_ARG_EXTEND_U: + case TCG_CALL_ARG_EXTEND_S: + /* No extension required for MemOpIdx. */ + tcg_debug_assert(imm <= INT32_MAX); + type = TCG_TYPE_REG; + break; + default: + g_assert_not_reached(); + } + tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); + next_arg++; + + /* + * Handle ra. + */ + loc = &info->in[next_arg]; + slot = loc->arg_slot; + if (parm->ra_gen) { + int arg_reg = -1; + TCGReg ra_reg; + + if (arg_slot_reg_p(slot)) { + arg_reg = tcg_target_call_iarg_regs[slot]; + } + ra_reg = parm->ra_gen(s, ldst, arg_reg); + + ptr_mov.dst = slot; + ptr_mov.src = ra_reg; + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); + } else { + imm = (uintptr_t)ldst->raddr; + tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); + } +} + +static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, + const TCGCallArgumentLoc *loc, + TCGType dst_type, TCGType src_type, + TCGReg lo, TCGReg hi) +{ + if (dst_type <= TCG_TYPE_REG) { + MemOp src_ext; + + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; + break; + case TCG_CALL_ARG_EXTEND_U: + dst_type = TCG_TYPE_REG; + src_ext = MO_UL; + break; + case TCG_CALL_ARG_EXTEND_S: + dst_type = TCG_TYPE_REG; + src_ext = MO_SL; + break; + default: + g_assert_not_reached(); + } + + mov[0].dst = loc->arg_slot; + mov[0].dst_type = dst_type; + mov[0].src = lo; + mov[0].src_type = src_type; + mov[0].src_ext = src_ext; + return 1; + } + + assert(TCG_TARGET_REG_BITS == 32); + + mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; + mov[0].src = lo; + mov[0].dst_type = TCG_TYPE_I32; + mov[0].src_type = TCG_TYPE_I32; + mov[0].src_ext = MO_32; + + mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; + mov[1].src = hi; + mov[1].dst_type = TCG_TYPE_I32; + mov[1].src_type = TCG_TYPE_I32; + mov[1].src_ext = MO_32; + + return 2; +} + +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm) +{ + const TCGHelperInfo *info; + const TCGCallArgumentLoc *loc; + TCGMovExtend mov[2]; + unsigned next_arg, nmov; + MemOp mop = get_memop(ldst->oi); + + switch (mop & MO_SIZE) { + case MO_8: + case MO_16: + case MO_32: + info = &info_helper_ld32_mmu; + break; + case MO_64: + info = &info_helper_ld64_mmu; + break; + default: + g_assert_not_reached(); + } + + /* Defer env argument. */ + next_arg = 1; + + loc = &info->in[next_arg]; + nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL, + ldst->addrlo_reg, ldst->addrhi_reg); + next_arg += nmov; + + tcg_out_helper_load_slots(s, nmov, mov, parm); + + /* No special attention for 32 and 64-bit return values. */ + tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL); + + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); +} + +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, + bool load_sign, + const TCGLdstHelperParam *parm) +{ + TCGMovExtend mov[2]; + + if (ldst->type <= TCG_TYPE_REG) { + MemOp mop = get_memop(ldst->oi); + + mov[0].dst = ldst->datalo_reg; + mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); + mov[0].dst_type = ldst->type; + mov[0].src_type = TCG_TYPE_REG; + + /* + * If load_sign, then we allowed the helper to perform the + * appropriate sign extension to tcg_target_ulong, and all + * we need now is a plain move. + * + * If they do not, then we expect the relevant extension + * instruction to be no more expensive than a move, and + * we thus save the icache etc by only using one of two + * helper functions. + */ + if (load_sign || !(mop & MO_SIGN)) { + if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { + mov[0].src_ext = MO_32; + } else { + mov[0].src_ext = MO_64; + } + } else { + mov[0].src_ext = mop & MO_SSIZE; + } + tcg_out_movext1(s, mov); + } else { + assert(TCG_TARGET_REG_BITS == 32); + + mov[0].dst = ldst->datalo_reg; + mov[0].src = + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); + mov[0].dst_type = TCG_TYPE_I32; + mov[0].src_type = TCG_TYPE_I32; + mov[0].src_ext = MO_32; + + mov[1].dst = ldst->datahi_reg; + mov[1].src = + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); + mov[1].dst_type = TCG_TYPE_REG; + mov[1].src_type = TCG_TYPE_REG; + mov[1].src_ext = MO_32; + + tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); + } +} + +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm) +{ + const TCGHelperInfo *info; + const TCGCallArgumentLoc *loc; + TCGMovExtend mov[4]; + TCGType data_type; + unsigned next_arg, nmov, n; + MemOp mop = get_memop(ldst->oi); + + switch (mop & MO_SIZE) { + case MO_8: + case MO_16: + case MO_32: + info = &info_helper_st32_mmu; + data_type = TCG_TYPE_I32; + break; + case MO_64: + info = &info_helper_st64_mmu; + data_type = TCG_TYPE_I64; + break; + default: + g_assert_not_reached(); + } + + /* Defer env argument. */ + next_arg = 1; + nmov = 0; + + /* Handle addr argument. */ + loc = &info->in[next_arg]; + n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL, + ldst->addrlo_reg, ldst->addrhi_reg); + next_arg += n; + nmov += n; + + /* Handle data argument. */ + loc = &info->in[next_arg]; + n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, + ldst->datalo_reg, ldst->datahi_reg); + next_arg += n; + nmov += n; + tcg_debug_assert(nmov <= ARRAY_SIZE(mov)); + + tcg_out_helper_load_slots(s, nmov, mov, parm); + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); +} + #ifdef CONFIG_PROFILER /* avoid copy/paste errors */