From 479eb12108b66265a8149b0fa90e16900bc217af Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 24 Apr 2014 08:25:03 -0700 Subject: [PATCH 01/24] tcg-mips: Layout executable and code_gen_buffer Choosing good addresses for them means we can use JAL for helper calls. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- configure | 7 +++++-- translate-all.c | 13 +++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 605a0ece0c..4d1e79de3d 100755 --- a/configure +++ b/configure @@ -4029,11 +4029,14 @@ fi if test "$pie" = "no" ; then textseg_addr= case "$cpu" in - arm | hppa | i386 | m68k | ppc | ppc64 | s390* | sparc | sparc64 | x86_64 | x32) + arm | i386 | ppc* | s390* | sparc* | x86_64 | x32) + # ??? Rationale for choosing this address textseg_addr=0x60000000 ;; mips) - textseg_addr=0x400000 + # A 256M aligned address, high in the address space, with enough + # room for the code_gen_buffer above it before the stack. + textseg_addr=0x60000000 ;; esac if [ -n "$textseg_addr" ]; then diff --git a/translate-all.c b/translate-all.c index 5549a85ed5..c631694a6e 100644 --- a/translate-all.c +++ b/translate-all.c @@ -475,6 +475,10 @@ static inline PageDesc *page_find(tb_page_addr_t index) #elif defined(__s390x__) /* We have a +- 4GB range on the branches; leave some slop. */ # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) +#elif defined(__mips__) + /* We have a 256MB branch region, but leave room to make sure the + main executable is also within that region. */ +# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) #else # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #endif @@ -545,6 +549,15 @@ static inline void *alloc_code_gen_buffer(void) start = 0x40000000ul; # elif defined(__s390x__) start = 0x90000000ul; +# elif defined(__mips__) + /* ??? We ought to more explicitly manage layout for softmmu too. */ +# ifdef CONFIG_USER_ONLY + start = 0x68000000ul; +# elif _MIPS_SIM == _ABI64 + start = 0x128000000ul; +# else + start = 0x08000000ul; +# endif # endif buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size, From 483c76e14043a60926f7df4415d0e3749694158d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 24 Apr 2014 09:16:07 -0700 Subject: [PATCH 02/24] tcg-mips: Constrain the code_gen_buffer to be within one 256mb segment This assures us use of J for exit_tb and goto_tb, and JAL for calling into the generated bswap helpers. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- translate-all.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 5 deletions(-) diff --git a/translate-all.c b/translate-all.c index c631694a6e..6b7b46e761 100644 --- a/translate-all.c +++ b/translate-all.c @@ -513,14 +513,47 @@ static inline size_t size_code_gen_buffer(size_t tb_size) return tb_size; } +#ifdef __mips__ +/* In order to use J and JAL within the code_gen_buffer, we require + that the buffer not cross a 256MB boundary. */ +static inline bool cross_256mb(void *addr, size_t size) +{ + return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & 0xf0000000; +} + +/* We weren't able to allocate a buffer without crossing that boundary, + so make do with the larger portion of the buffer that doesn't cross. + Returns the new base of the buffer, and adjusts code_gen_buffer_size. */ +static inline void *split_cross_256mb(void *buf1, size_t size1) +{ + void *buf2 = (void *)(((uintptr_t)buf1 + size1) & 0xf0000000); + size_t size2 = buf1 + size1 - buf2; + + size1 = buf2 - buf1; + if (size1 < size2) { + size1 = size2; + buf1 = buf2; + } + + tcg_ctx.code_gen_buffer_size = size1; + return buf1; +} +#endif + #ifdef USE_STATIC_CODE_GEN_BUFFER static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] __attribute__((aligned(CODE_GEN_ALIGN))); static inline void *alloc_code_gen_buffer(void) { - map_exec(static_code_gen_buffer, tcg_ctx.code_gen_buffer_size); - return static_code_gen_buffer; + void *buf = static_code_gen_buffer; +#ifdef __mips__ + if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) { + buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size); + } +#endif + map_exec(buf, tcg_ctx.code_gen_buffer_size); + return buf; } #elif defined(USE_MMAP) static inline void *alloc_code_gen_buffer(void) @@ -562,16 +595,63 @@ static inline void *alloc_code_gen_buffer(void) buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size, PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0); - return buf == MAP_FAILED ? NULL : buf; + if (buf == MAP_FAILED) { + return NULL; + } + +#ifdef __mips__ + if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) { + /* Try again, with the original still mapped, to avoid re-aquiring + that 256mb crossing. This time don't specify an address. */ + size_t size2, size1 = tcg_ctx.code_gen_buffer_size; + void *buf2 = mmap(NULL, size1, PROT_WRITE | PROT_READ | PROT_EXEC, + flags, -1, 0); + if (buf2 != MAP_FAILED) { + if (!cross_256mb(buf2, size1)) { + /* Success! Use the new buffer. */ + munmap(buf, size1); + return buf2; + } + /* Failure. Work with what we had. */ + munmap(buf2, size1); + } + + /* Split the original buffer. Free the smaller half. */ + buf2 = split_cross_256mb(buf, size1); + size2 = tcg_ctx.code_gen_buffer_size; + munmap(buf + (buf == buf2 ? size2 : 0), size1 - size2); + return buf2; + } +#endif + + return buf; } #else static inline void *alloc_code_gen_buffer(void) { void *buf = g_malloc(tcg_ctx.code_gen_buffer_size); - if (buf) { - map_exec(buf, tcg_ctx.code_gen_buffer_size); + if (buf == NULL) { + return NULL; } + +#ifdef __mips__ + if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) { + void *buf2 = g_malloc(tcg_ctx.code_gen_buffer_size); + if (buf2 != NULL && !cross_256mb(buf2, size1)) { + /* Success! Use the new buffer. */ + free(buf); + buf = buf2; + } else { + /* Failure. Work with what we had. Since this is malloc + and not mmap, we can't free the other half. */ + free(buf2); + buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size); + } + } +#endif + + map_exec(buf, tcg_ctx.code_gen_buffer_size); return buf; } #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */ From f8c9eddb2bd3610cdafbdbc222e460c55c590b5c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Mar 2014 16:57:18 +0000 Subject: [PATCH 03/24] tcg-mips: Use J and JAL opcodes For userland builds calls will normally be in range, and for the exit_tb opcode the branch to the epilogue. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 46 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 0ae495c586..d4236c0828 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -222,6 +222,8 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, /* instruction opcodes */ enum { + OPC_J = 0x02 << 26, + OPC_JAL = 0x03 << 26, OPC_BEQ = 0x04 << 26, OPC_BNE = 0x05 << 26, OPC_BLEZ = 0x06 << 26, @@ -345,6 +347,29 @@ static inline void tcg_out_opc_sa(TCGContext *s, int opc, } +/* + * Type jump. + * Returns true if the branch was in range and the insn was emitted. + */ +static bool tcg_out_opc_jmp(TCGContext *s, int opc, void *target) +{ + uintptr_t dest = (uintptr_t)target; + uintptr_t from = (uintptr_t)s->code_ptr + 4; + int32_t inst; + + /* The pc-region branch happens within the 256MB region of + the delay slot (thus the +4). */ + if ((from ^ dest) & -(1 << 28)) { + return false; + } + assert((dest & 3) == 0); + + inst = opc; + inst |= (dest >> 2) & 0x3ffffff; + tcg_out32(s, inst); + return true; +} + static inline void tcg_out_nop(TCGContext *s) { tcg_out32(s, 0); @@ -1247,10 +1272,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, #endif } -static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (intptr_t)target); - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); + /* Note that the ABI requires the called function's address to be + loaded into T9, even if a direct branch is in range. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); + + /* But do try a direct branch, allowing the cpu better insn prefetch. */ + if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { + tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); + } + tcg_out_nop(s); } @@ -1259,9 +1291,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, { switch(opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_V0, args[0]); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, (uintptr_t)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, args[0]); + if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, (uintptr_t)tb_ret_addr); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); + } tcg_out_nop(s); break; case INDEX_op_goto_tb: From 7dae901d2d0476945a2dc353bb685501fd365868 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Mar 2014 17:15:21 +0000 Subject: [PATCH 04/24] tcg-mips: Fill the exit_tb delay slot Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index d4236c0828..2d2073f7e9 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -392,8 +392,10 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, } else if (arg == (uint16_t)arg) { tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg); } else { - tcg_out_opc_imm(s, OPC_LUI, reg, 0, arg >> 16); - tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); + tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16); + if (arg & 0xffff) { + tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); + } } } @@ -1291,12 +1293,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, { switch(opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, args[0]); - if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, (uintptr_t)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); + { + uintptr_t a0 = args[0]; + TCGReg b0 = TCG_REG_ZERO; + + if (a0 & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); + b0 = TCG_REG_V0; + } + if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, + (uintptr_t)tb_ret_addr); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); + } + tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); } - tcg_out_nop(s); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { From f9a716325facc32064d491ddbce4aa2e81f9a1ce Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Mar 2014 17:45:23 +0000 Subject: [PATCH 05/24] tcg-mips: Split large ldst offsets Use this to reduce goto_tb by one insn. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 2d2073f7e9..5ccfb1d76c 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -480,16 +480,18 @@ static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) } } -static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg, - TCGReg arg1, TCGArg arg2) +static void tcg_out_ldst(TCGContext *s, int opc, TCGReg data, + TCGReg addr, intptr_t ofs) { - if (arg2 == (int16_t) arg2) { - tcg_out_opc_imm(s, opc, arg, arg1, arg2); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, arg2); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_AT, TCG_REG_AT, arg1); - tcg_out_opc_imm(s, opc, arg, TCG_REG_AT, 0); + int16_t lo = ofs; + if (ofs != lo) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, ofs - lo); + if (addr != TCG_REG_ZERO) { + tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_AT, TCG_REG_AT, addr); + } + addr = TCG_REG_AT; } + tcg_out_opc_imm(s, opc, data, addr, lo); } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, @@ -1315,9 +1317,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_abort(); } else { /* indirect jump method */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, - (uintptr_t)(s->tb_next + args[0])); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_AT, TCG_REG_AT, 0); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_AT, TCG_REG_ZERO, + (uintptr_t)(s->tb_next + args[0])); tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); } tcg_out_nop(s); From 9d8bf2d125f2d602d59d8fe1bdb27e17565b1fb8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 2 May 2014 15:01:31 -0700 Subject: [PATCH 06/24] tcg-mips: Move softmmu slow path out of line At the same time, tidy up the call helpers, avoiding a memory reference. Split out several subroutines. Use TCGMemOp constants. Make endianness selectable at runtime. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 920 +++++++++++++++++++++--------------------- 1 file changed, 457 insertions(+), 463 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 5ccfb1d76c..e7dbb3b79f 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -24,14 +24,17 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" +#include "tcg-be-ldst.h" -#if defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN) -# define TCG_NEED_BSWAP 0 +#ifdef HOST_WORDS_BIGENDIAN +# define MIPS_BE 1 #else -# define TCG_NEED_BSWAP 1 +# define MIPS_BE 0 #endif +#define LO_OFF (MIPS_BE * 4) +#define HI_OFF (4 - LO_OFF) + #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "zero", @@ -161,11 +164,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'l': /* qemu_ld input arg constraint */ ct->ct |= TCG_CT_REG; tcg_regset_set(ct->u.regs, 0xffffffff); -#if defined(CONFIG_SOFTMMU) tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); -# if (TARGET_LONG_BITS == 64) - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); -# endif +#if defined(CONFIG_SOFTMMU) + if (TARGET_LONG_BITS == 64) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + } #endif break; case 'S': /* qemu_st constraint */ @@ -173,13 +176,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_set(ct->u.regs, 0xffffffff); tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); #if defined(CONFIG_SOFTMMU) -# if (TARGET_LONG_BITS == 32) - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); -# endif - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); -# if TARGET_LONG_BITS == 64 - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); -# endif + if (TARGET_LONG_BITS == 32) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); + } else { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); + } #endif break; case 'I': @@ -516,67 +518,6 @@ static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) } } -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * arg_num is where we want to put this argument, and is updated to be ready - * for the next call. arg is the argument itself. Note that arg_num 0..3 is - * real registers, 4+ on stack. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ -#define DEFINE_TCG_OUT_CALL_IARG(NAME, ARGPARAM) \ - static inline void NAME(TCGContext *s, int *arg_num, ARGPARAM) \ - { \ - if (*arg_num < 4) { \ - DEFINE_TCG_OUT_CALL_IARG_GET_ARG(tcg_target_call_iarg_regs[*arg_num]); \ - } else { \ - DEFINE_TCG_OUT_CALL_IARG_GET_ARG(TCG_REG_AT); \ - tcg_out_st(s, TCG_TYPE_I32, TCG_REG_AT, TCG_REG_SP, 4 * (*arg_num)); \ - } \ - (*arg_num)++; \ -} -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_opc_imm(s, OPC_ANDI, A, arg, 0xff); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg8, TCGReg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_opc_imm(s, OPC_ANDI, A, arg, 0xffff); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg16, TCGReg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_movi(s, TCG_TYPE_I32, A, arg); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG - -/* We don't use the macro for this one to avoid an unnecessary reg-reg - move when storing to the stack. */ -static inline void tcg_out_call_iarg_reg32(TCGContext *s, int *arg_num, - TCGReg arg) -{ - if (*arg_num < 4) { - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[*arg_num], arg); - } else { - tcg_out_st(s, TCG_TYPE_I32, arg, TCG_REG_SP, 4 * (*arg_num)); - } - (*arg_num)++; -} - -static inline void tcg_out_call_iarg_reg64(TCGContext *s, int *arg_num, - TCGReg arg_low, TCGReg arg_high) -{ - (*arg_num) = (*arg_num + 1) & ~1; - -#if defined(HOST_WORDS_BIGENDIAN) - tcg_out_call_iarg_reg32(s, arg_num, arg_high); - tcg_out_call_iarg_reg32(s, arg_num, arg_low); -#else - tcg_out_call_iarg_reg32(s, arg_num, arg_low); - tcg_out_call_iarg_reg32(s, arg_num, arg_high); -#endif -} - static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int label_index) { @@ -899,383 +840,6 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); } -#if defined(CONFIG_SOFTMMU) -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, -}; - -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, -}; -#endif - -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - int opc) -{ - TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label1_ptr, *label2_ptr; - int arg_num; - int mem_index, s_bits; - int addr_meml; -# if TARGET_LONG_BITS == 64 - tcg_insn_unit *label3_ptr; - TCGReg addr_regh; - int addr_memh; -# endif -#endif - data_regl = *args++; - if (opc == 3) - data_regh = *args++; - else - data_regh = 0; - addr_regl = *args++; -#if defined(CONFIG_SOFTMMU) -# if TARGET_LONG_BITS == 64 - addr_regh = *args++; -# if defined(HOST_WORDS_BIGENDIAN) - addr_memh = 0; - addr_meml = 4; -# else - addr_memh = 4; - addr_meml = 0; -# endif -# else - addr_meml = 0; -# endif - mem_index = *args; - s_bits = opc & 3; -#endif - - if (opc == 3) { -#if defined(HOST_WORDS_BIGENDIAN) - data_reg1 = data_regh; - data_reg2 = data_regl; -#else - data_reg1 = data_regl; - data_reg2 = data_regh; -#endif - } else { - data_reg1 = data_regl; - data_reg2 = 0; - } -#if defined(CONFIG_SOFTMMU) - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + addr_meml); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addr_regl); - -# if TARGET_LONG_BITS == 64 - label3_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); - - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + addr_memh); - - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT); - tcg_out_nop(s); - - reloc_pc16(label3_ptr, s->code_ptr); -# else - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); -# endif - - /* slow path */ - arg_num = 0; - tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0); -# if TARGET_LONG_BITS == 64 - tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh); -# else - tcg_out_call_iarg_reg32(s, &arg_num, addr_regl); -# endif - tcg_out_call_iarg_imm32(s, &arg_num, mem_index); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_ld_helpers[s_bits]); - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - tcg_out_nop(s); - - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_ANDI, data_reg1, TCG_REG_V0, 0xff); - break; - case 0 | 4: - tcg_out_ext8s(s, data_reg1, TCG_REG_V0); - break; - case 1: - tcg_out_opc_imm(s, OPC_ANDI, data_reg1, TCG_REG_V0, 0xffff); - break; - case 1 | 4: - tcg_out_ext16s(s, data_reg1, TCG_REG_V0); - break; - case 2: - tcg_out_mov(s, TCG_TYPE_I32, data_reg1, TCG_REG_V0); - break; - case 3: - tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_V1); - tcg_out_mov(s, TCG_TYPE_I32, data_reg1, TCG_REG_V0); - break; - default: - tcg_abort(); - } - - label2_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - tcg_out_nop(s); - - /* label1: fast path */ - reloc_pc16(label1_ptr, s->code_ptr); - - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_V0, TCG_REG_A0, addr_regl); -#else - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_V0, addr_regl, GUEST_BASE); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, GUEST_BASE); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_V0, TCG_REG_V0, addr_regl); - } -#endif - - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_LBU, data_reg1, TCG_REG_V0, 0); - break; - case 0 | 4: - tcg_out_opc_imm(s, OPC_LB, data_reg1, TCG_REG_V0, 0); - break; - case 1: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap16(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LHU, data_reg1, TCG_REG_V0, 0); - } - break; - case 1 | 4: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap16s(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LH, data_reg1, TCG_REG_V0, 0); - } - break; - case 2: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap32(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LW, data_reg1, TCG_REG_V0, 0); - } - break; - case 3: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 4); - tcg_out_bswap32(s, data_reg1, TCG_REG_T0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap32(s, data_reg2, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LW, data_reg1, TCG_REG_V0, 0); - tcg_out_opc_imm(s, OPC_LW, data_reg2, TCG_REG_V0, 4); - } - break; - default: - tcg_abort(); - } - -#if defined(CONFIG_SOFTMMU) - reloc_pc16(label2_ptr, s->code_ptr); -#endif -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - int opc) -{ - TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label1_ptr, *label2_ptr; - int arg_num; - int mem_index, s_bits; - int addr_meml; -#endif -#if TARGET_LONG_BITS == 64 -# if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label3_ptr; - TCGReg addr_regh; - int addr_memh; -# endif -#endif - data_regl = *args++; - if (opc == 3) { - data_regh = *args++; - } else { - data_regh = 0; - } - addr_regl = *args++; -#if defined(CONFIG_SOFTMMU) -# if TARGET_LONG_BITS == 64 - addr_regh = *args++; -# if defined(HOST_WORDS_BIGENDIAN) - addr_memh = 0; - addr_meml = 4; -# else - addr_memh = 4; - addr_meml = 0; -# endif -# else - addr_meml = 0; -# endif - mem_index = *args; - s_bits = opc; -#endif - - if (opc == 3) { -#if defined(HOST_WORDS_BIGENDIAN) - data_reg1 = data_regh; - data_reg2 = data_regl; -#else - data_reg1 = data_regl; - data_reg2 = data_regh; -#endif - } else { - data_reg1 = data_regl; - data_reg2 = 0; - } - -#if defined(CONFIG_SOFTMMU) - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write) + addr_meml); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addr_regl); - -# if TARGET_LONG_BITS == 64 - label3_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); - - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write) + addr_memh); - - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT); - tcg_out_nop(s); - - reloc_pc16(label3_ptr, s->code_ptr); -# else - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); -# endif - - /* slow path */ - arg_num = 0; - tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0); -# if TARGET_LONG_BITS == 64 - tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh); -# else - tcg_out_call_iarg_reg32(s, &arg_num, addr_regl); -# endif - switch(opc) { - case 0: - tcg_out_call_iarg_reg8(s, &arg_num, data_regl); - break; - case 1: - tcg_out_call_iarg_reg16(s, &arg_num, data_regl); - break; - case 2: - tcg_out_call_iarg_reg32(s, &arg_num, data_regl); - break; - case 3: - tcg_out_call_iarg_reg64(s, &arg_num, data_regl, data_regh); - break; - default: - tcg_abort(); - } - tcg_out_call_iarg_imm32(s, &arg_num, mem_index); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_st_helpers[s_bits]); - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - tcg_out_nop(s); - - label2_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - tcg_out_nop(s); - - /* label1: fast path */ - reloc_pc16(label1_ptr, s->code_ptr); - - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, addr_regl); -#else - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, addr_regl, GUEST_BASE); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, GUEST_BASE); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, addr_regl); - } - -#endif - - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_SB, data_reg1, TCG_REG_A0, 0); - break; - case 1: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff); - tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0); - tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0); - } else { - tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0); - } - break; - case 2: - if (TCG_NEED_BSWAP) { - tcg_out_bswap32(s, TCG_REG_T0, data_reg1); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 0); - } else { - tcg_out_opc_imm(s, OPC_SW, data_reg1, TCG_REG_A0, 0); - } - break; - case 3: - if (TCG_NEED_BSWAP) { - tcg_out_bswap32(s, TCG_REG_T0, data_reg2); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 0); - tcg_out_bswap32(s, TCG_REG_T0, data_reg1); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 4); - } else { - tcg_out_opc_imm(s, OPC_SW, data_reg1, TCG_REG_A0, 0); - tcg_out_opc_imm(s, OPC_SW, data_reg2, TCG_REG_A0, 4); - } - break; - default: - tcg_abort(); - } - -#if defined(CONFIG_SOFTMMU) - reloc_pc16(label2_ptr, s->code_ptr); -#endif -} - static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) { /* Note that the ABI requires the called function's address to be @@ -1290,6 +854,436 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) tcg_out_nop(s); } +#if defined(CONFIG_SOFTMMU) +/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, + int mmu_idx) */ +static void * const qemu_ld_helpers[4] = { + helper_ldb_mmu, + helper_ldw_mmu, + helper_ldl_mmu, + helper_ldq_mmu, +}; + +/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, + uintxx_t val, int mmu_idx) */ +static void * const qemu_st_helpers[4] = { + helper_stb_mmu, + helper_stw_mmu, + helper_stl_mmu, + helper_stq_mmu, +}; + +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * I is where we want to put this argument, and is updated and returned + * for the next call. ARG is the argument itself. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ + +static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) +{ + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); + } else { + tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); + } + return i + 1; +} + +static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_REG_AT; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_REG_AT; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) +{ + TCGReg tmp = TCG_REG_AT; + if (arg == 0) { + tmp = TCG_REG_ZERO; + } else { + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); + } + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) +{ + i = (i + 1) & ~1; + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); + return i; +} + +/* Perform the tlb comparison operation. The complete host address is + placed in BASE. Clobbers AT, T0, A0. */ +static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, + TCGReg addrh, int mem_index, TCGMemOp s_bits, + tcg_insn_unit *label_ptr[2], bool is_load) +{ + int cmp_off + = (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0); + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } + + /* Load the tlb comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, cmp_off + LO_OFF); + if (TARGET_LONG_BITS == 64) { + tcg_out_opc_imm(s, OPC_LW, base, TCG_REG_A0, cmp_off + HI_OFF); + } + + /* Mask the page bits, keeping the alignment bits to compare against. + In between, load the tlb addend for the fast path. */ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addrl); + + label_ptr[0] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, TCG_REG_T0, TCG_REG_AT); + + if (TARGET_LONG_BITS == 64) { + /* delay slot */ + tcg_out_nop(s); + + label_ptr[1] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, addrh, base); + } + + /* delay slot */ + tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl); +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + int mem_index, void *raddr, + tcg_insn_unit *label_ptr[2]) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; + if (TARGET_LONG_BITS == 64) { + label->label_ptr[1] = label_ptr[1]; + } +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOp opc = l->opc; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } + + i = 0; + i = tcg_out_call_iarg_reg(s, i, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); + } else { + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + i = tcg_out_call_iarg_imm(s, i, l->mem_index); + tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); + + switch (opc & MO_SSIZE) { + case MO_UB: + tcg_out_opc_imm(s, OPC_ANDI, l->datalo_reg, TCG_REG_V0, 0xff); + break; + case MO_SB: + tcg_out_ext8s(s, l->datalo_reg, TCG_REG_V0); + break; + case MO_UW: + tcg_out_opc_imm(s, OPC_ANDI, l->datalo_reg, TCG_REG_V0, 0xffff); + break; + case MO_SW: + tcg_out_ext16s(s, l->datalo_reg, TCG_REG_V0); + break; + case MO_UL: + tcg_out_mov(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_V0); + break; + case MO_Q: + /* We eliminated V0 from the possible output registers, so it + cannot be clobbered here. So we must move V1 first. */ + tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? l->datalo_reg : l->datahi_reg, + TCG_REG_V1); + tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? l->datahi_reg : l->datalo_reg, + TCG_REG_V0); + break; + default: + tcg_abort(); + } + + reloc_pc16(s->code_ptr, l->raddr); + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); + tcg_out_nop(s); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOp opc = l->opc; + TCGMemOp s_bits = opc & MO_SIZE; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } + + i = 0; + i = tcg_out_call_iarg_reg(s, i, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); + } else { + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + switch (s_bits) { + case MO_8: + i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); + break; + case MO_16: + i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); + break; + case MO_32: + i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); + break; + case MO_64: + i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); + break; + default: + tcg_abort(); + } + i = tcg_out_call_iarg_imm(s, i, l->mem_index); + tcg_out_call(s, qemu_st_helpers[s_bits]); + + reloc_pc16(s->code_ptr, l->raddr); + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); + tcg_out_nop(s); +} +#endif + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc) { + case MO_UB: + tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); + break; + case MO_SB: + tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); + break; + case MO_UW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, base, 0); + tcg_out_bswap16(s, datalo, TCG_REG_T0); + break; + case MO_UW: + tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); + break; + case MO_SW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, base, 0); + tcg_out_bswap16s(s, datalo, TCG_REG_T0); + break; + case MO_SW: + tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); + break; + case MO_UL | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, base, 0); + tcg_out_bswap32(s, datalo, TCG_REG_T0); + break; + case MO_UL: + tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); + break; + case MO_Q | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, base, HI_OFF); + tcg_out_bswap32(s, datalo, TCG_REG_T0); + tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, base, LO_OFF); + tcg_out_bswap32(s, datahi, TCG_REG_T0); + break; + case MO_Q: + tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[2]; + int mem_index; + TCGMemOp s_bits; +#endif + /* Note that we've eliminated V0 from the output registers, + so we won't overwrite the base register during loading. */ + TCGReg base = TCG_REG_V0; + + data_regl = *args++; + data_regh = ((opc & MO_SIZE) == MO_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + +#if defined(CONFIG_SOFTMMU) + mem_index = *args; + s_bits = opc & MO_SIZE; + + tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, + s_bits, label_ptr, 1); + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 1, opc, data_regl, data_regh, addr_regl, addr_regh, + mem_index, s->code_ptr, label_ptr); +#else + if (GUEST_BASE == 0 && data_regl != addr_regl) { + base = addr_regl; + } else if (GUEST_BASE == (int16_t)GUEST_BASE) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); + } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc) { + case MO_8: + tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); + break; + + case MO_16 | MO_BSWAP: + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, datalo, 0xffff); + tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0); + datalo = TCG_REG_T0; + /* FALLTHRU */ + case MO_16: + tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); + break; + + case MO_32 | MO_BSWAP: + tcg_out_bswap32(s, TCG_REG_T0, datalo); + datalo = TCG_REG_T0; + /* FALLTHRU */ + case MO_32: + tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); + break; + + case MO_64 | MO_BSWAP: + tcg_out_bswap32(s, TCG_REG_T0, datalo); + tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, base, HI_OFF); + tcg_out_bswap32(s, TCG_REG_T0, datahi); + tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, base, LO_OFF); + break; + case MO_64: + tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh, base; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[2]; + int mem_index; + TCGMemOp s_bits; +#endif + + data_regl = *args++; + data_regh = ((opc & MO_SIZE) == MO_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + +#if defined(CONFIG_SOFTMMU) + mem_index = *args; + s_bits = opc & 3; + + /* Note that we eliminated the helper's address argument, + so we can reuse that for the base. */ + base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); + tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, + s_bits, label_ptr, 1); + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 0, opc, data_regl, data_regh, addr_regl, addr_regh, + mem_index, s->code_ptr, label_ptr); +#else + if (GUEST_BASE == 0) { + base = addr_regl; + } else { + base = TCG_REG_A0; + if (GUEST_BASE == (int16_t)GUEST_BASE) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); + } + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); +#endif +} + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1545,34 +1539,34 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, 0); + tcg_out_qemu_ld(s, args, MO_UB); break; case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 0 | 4); + tcg_out_qemu_ld(s, args, MO_SB); break; case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, 1); + tcg_out_qemu_ld(s, args, MO_TEUW); break; case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 1 | 4); + tcg_out_qemu_ld(s, args, MO_TESW); break; case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, 2); + tcg_out_qemu_ld(s, args, MO_TEUL); break; case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); + tcg_out_qemu_ld(s, args, MO_TEQ); break; case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, 0); + tcg_out_qemu_st(s, args, MO_UB); break; case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, 1); + tcg_out_qemu_st(s, args, MO_TEUW); break; case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); + tcg_out_qemu_st(s, args, MO_TEUL); break; case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); + tcg_out_qemu_st(s, args, MO_TEQ); break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ From ce0236cfbdf9d4d67e017685e305ec2ad7bfaf3b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 14 Apr 2014 16:59:41 +0000 Subject: [PATCH 07/24] tcg-mips: Convert to new qemu_l/st helpers In addition, fill delay slots calling the helpers and tail call to the store helpers. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 107 ++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index e7dbb3b79f..f41081bfef 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -840,37 +840,52 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); } -static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) { /* Note that the ABI requires the called function's address to be loaded into T9, even if a direct branch is in range. */ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); /* But do try a direct branch, allowing the cpu better insn prefetch. */ - if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); + if (tail) { + if (!tcg_out_opc_jmp(s, OPC_J, arg)) { + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0); + } + } else { + if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { + tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); + } } +} +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, false); tcg_out_nop(s); } #if defined(CONFIG_SOFTMMU) -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; /* Helper routines for marshalling helper function arguments into @@ -1019,6 +1034,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { TCGMemOp opc = l->opc; + TCGReg v0; int i; /* resolve label address */ @@ -1027,47 +1043,34 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) reloc_pc16(l->label_ptr[1], s->code_ptr); } - i = 0; - i = tcg_out_call_iarg_reg(s, i, TCG_AREG0); + i = 1; if (TARGET_LONG_BITS == 64) { i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); } else { i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); } i = tcg_out_call_iarg_imm(s, i, l->mem_index); - tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); + i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); + tcg_out_call_int(s, qemu_ld_helpers[opc], false); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - switch (opc & MO_SSIZE) { - case MO_UB: - tcg_out_opc_imm(s, OPC_ANDI, l->datalo_reg, TCG_REG_V0, 0xff); - break; - case MO_SB: - tcg_out_ext8s(s, l->datalo_reg, TCG_REG_V0); - break; - case MO_UW: - tcg_out_opc_imm(s, OPC_ANDI, l->datalo_reg, TCG_REG_V0, 0xffff); - break; - case MO_SW: - tcg_out_ext16s(s, l->datalo_reg, TCG_REG_V0); - break; - case MO_UL: - tcg_out_mov(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_V0); - break; - case MO_Q: + v0 = l->datalo_reg; + if ((opc & MO_SIZE) == MO_64) { /* We eliminated V0 from the possible output registers, so it cannot be clobbered here. So we must move V1 first. */ - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? l->datalo_reg : l->datahi_reg, - TCG_REG_V1); - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? l->datahi_reg : l->datalo_reg, - TCG_REG_V0); - break; - default: - tcg_abort(); + if (MIPS_BE) { + tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); + v0 = l->datahi_reg; + } else { + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); + } } reloc_pc16(s->code_ptr, l->raddr); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - tcg_out_nop(s); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0); } static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) @@ -1082,8 +1085,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) reloc_pc16(l->label_ptr[1], s->code_ptr); } - i = 0; - i = tcg_out_call_iarg_reg(s, i, TCG_AREG0); + i = 1; if (TARGET_LONG_BITS == 64) { i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); } else { @@ -1106,11 +1108,14 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_abort(); } i = tcg_out_call_iarg_imm(s, i, l->mem_index); - tcg_out_call(s, qemu_st_helpers[s_bits]); - reloc_pc16(s->code_ptr, l->raddr); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - tcg_out_nop(s); + /* Tail call to the store helper. Thus force the return address + computation to take place in the return address register. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); + i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); + tcg_out_call_int(s, qemu_st_helpers[opc], true); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); } #endif From fbef2cc80fbc7836447d5b3d4741c4d15a144ec1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 14 Apr 2014 18:10:11 +0000 Subject: [PATCH 08/24] tcg-mips: Convert to new_ldst Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 76 ++++++++++++++----------------------------- tcg/mips/tcg-target.h | 2 +- 2 files changed, 25 insertions(+), 53 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index f41081bfef..4c3102783b 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1165,10 +1165,11 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) { TCGReg addr_regl, addr_regh __attribute__((unused)); TCGReg data_regl, data_regh; + TCGMemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; int mem_index; @@ -1179,9 +1180,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc) TCGReg base = TCG_REG_V0; data_regl = *args++; - data_regh = ((opc & MO_SIZE) == MO_64 ? *args++ : 0); + data_regh = (is_64 ? *args++ : 0); addr_regl = *args++; addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; #if defined(CONFIG_SOFTMMU) mem_index = *args; @@ -1246,10 +1248,11 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) { TCGReg addr_regl, addr_regh __attribute__((unused)); TCGReg data_regl, data_regh, base; + TCGMemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; int mem_index; @@ -1257,9 +1260,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc) #endif data_regl = *args++; - data_regh = ((opc & MO_SIZE) == MO_64 ? *args++ : 0); + data_regh = (is_64 ? *args++ : 0); addr_regl = *args++; addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; #if defined(CONFIG_SOFTMMU) mem_index = *args; @@ -1543,35 +1547,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_setcond2(s, args[5], args[0], args[1], args[2], args[3], args[4]); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, MO_UB); + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, MO_SB); + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, MO_TEUW); + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, MO_TESW); - break; - case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, MO_TEUL); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, MO_TEQ); - break; - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, MO_UB); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, MO_TEUW); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, MO_TEUL); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, MO_TEQ); + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ @@ -1638,29 +1624,15 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, #if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld8u, { "L", "lZ" } }, - { INDEX_op_qemu_ld8s, { "L", "lZ" } }, - { INDEX_op_qemu_ld16u, { "L", "lZ" } }, - { INDEX_op_qemu_ld16s, { "L", "lZ" } }, - { INDEX_op_qemu_ld32, { "L", "lZ" } }, - { INDEX_op_qemu_ld64, { "L", "L", "lZ" } }, - - { INDEX_op_qemu_st8, { "SZ", "SZ" } }, - { INDEX_op_qemu_st16, { "SZ", "SZ" } }, - { INDEX_op_qemu_st32, { "SZ", "SZ" } }, - { INDEX_op_qemu_st64, { "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i32, { "L", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } }, #else - { INDEX_op_qemu_ld8u, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld8s, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld16u, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld16s, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld32, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld64, { "L", "L", "lZ", "lZ" } }, - - { INDEX_op_qemu_st8, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st16, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st32, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st64, { "SZ", "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } }, #endif { -1 }, }; diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index c6d2267d77..7509fa1426 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -120,7 +120,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_new_ldst 0 +#define TCG_TARGET_HAS_new_ldst 1 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */ From 418839044e7ab7d0282f8685f8db40bd6cd97b96 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 09:03:59 -0700 Subject: [PATCH 09/24] tcg-mips: Rearrange register allocation Use FP (also known as S8) as a normal call-saved register. Include T0 in the allocation order and call-clobbered list even though it's currently used as a TCG temporary. Put the argument registers at the end of the allocation order. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 22 +++++++++++++++------- tcg/mips/tcg-target.h | 8 ++++---- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 4c3102783b..16b8f7fc0d 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -67,13 +67,14 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "k1", "gp", "sp", - "fp", + "s8", "ra", }; #endif /* check if we really need so many registers :P */ static const TCGReg tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ TCG_REG_S0, TCG_REG_S1, TCG_REG_S2, @@ -82,6 +83,10 @@ static const TCGReg tcg_target_reg_alloc_order[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, + TCG_REG_S8, + + /* Call clobbered registers. */ + TCG_REG_T0, TCG_REG_T1, TCG_REG_T2, TCG_REG_T3, @@ -91,12 +96,14 @@ static const TCGReg tcg_target_reg_alloc_order[] = { TCG_REG_T7, TCG_REG_T8, TCG_REG_T9, - TCG_REG_A0, - TCG_REG_A1, - TCG_REG_A2, - TCG_REG_A3, + TCG_REG_V1, TCG_REG_V0, - TCG_REG_V1 + + /* Argument registers, opposite order of allocation. */ + TCG_REG_A3, + TCG_REG_A2, + TCG_REG_A1, + TCG_REG_A0, }; static const TCGReg tcg_target_call_iarg_regs[4] = { @@ -1646,7 +1653,7 @@ static int tcg_target_callee_save_regs[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, - TCG_REG_FP, + TCG_REG_S8, TCG_REG_RA, /* should be last for ABI compliance */ }; @@ -1778,6 +1785,7 @@ static void tcg_target_init(TCGContext *s) (1 << TCG_REG_A1) | (1 << TCG_REG_A2) | (1 << TCG_REG_A3) | + (1 << TCG_REG_T0) | (1 << TCG_REG_T1) | (1 << TCG_REG_T2) | (1 << TCG_REG_T3) | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 7509fa1426..c959d1c959 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -60,8 +60,11 @@ typedef enum { TCG_REG_K1, TCG_REG_GP, TCG_REG_SP, - TCG_REG_FP, + TCG_REG_S8, TCG_REG_RA, + + TCG_REG_CALL_STACK = TCG_REG_SP, + TCG_AREG0 = TCG_REG_S0, } TCGReg; #define TCG_CT_CONST_ZERO 0x100 @@ -69,7 +72,6 @@ typedef enum { #define TCG_CT_CONST_S16 0x400 /* used for function call generation */ -#define TCG_REG_CALL_STACK TCG_REG_SP #define TCG_TARGET_STACK_ALIGN 8 #define TCG_TARGET_CALL_STACK_OFFSET 16 #define TCG_TARGET_CALL_ALIGN_ARGS 1 @@ -127,8 +129,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */ #define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */ -#define TCG_AREG0 TCG_REG_S0 - #ifdef __OpenBSD__ #include #else From 6c530e32f4d2edacf870d88fd733af3442f2f0f1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 09:07:52 -0700 Subject: [PATCH 10/24] tcg-mips: Introduce TCG_TMP0, TCG_TMP1 Use these instead of hard-coding the registers to use for temporaries. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 231 +++++++++++++++++++++--------------------- 1 file changed, 117 insertions(+), 114 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 16b8f7fc0d..54c91479a7 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -72,6 +72,9 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif +#define TCG_TMP0 TCG_REG_AT +#define TCG_TMP1 TCG_REG_T0 + /* check if we really need so many registers :P */ static const TCGReg tcg_target_reg_alloc_order[] = { /* Call saved registers. */ @@ -414,14 +417,14 @@ static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); } else { /* ret and arg can't be register at */ - if (ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -432,14 +435,14 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); } else { /* ret and arg can't be register at */ - if (ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -450,22 +453,22 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); } else { /* ret and arg must be different and can't be register at */ - if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, arg, 0xff00); - tcg_out_opc_sa(s, OPC_SLL, TCG_REG_AT, TCG_REG_AT, 8); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -494,11 +497,11 @@ static void tcg_out_ldst(TCGContext *s, int opc, TCGReg data, { int16_t lo = ofs; if (ofs != lo) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, ofs - lo); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo); if (addr != TCG_REG_ZERO) { - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_AT, TCG_REG_AT, addr); + tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr); } - addr = TCG_REG_AT; + addr = TCG_TMP0; } tcg_out_opc_imm(s, opc, data, addr, lo); } @@ -520,8 +523,8 @@ static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) if (val == (int16_t)val) { tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, val); - tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_REG_AT); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val); + tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0); } } @@ -541,49 +544,49 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, if (arg2 == 0) { tcg_out_opc_br(s, OPC_BLTZ, 0, arg1); } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, arg1, arg2); + tcg_out_opc_br(s, OPC_BNE, TCG_TMP0, TCG_REG_ZERO); } break; case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, arg1, arg2); + tcg_out_opc_br(s, OPC_BNE, TCG_TMP0, TCG_REG_ZERO); break; case TCG_COND_GE: if (arg2 == 0) { tcg_out_opc_br(s, OPC_BGEZ, 0, arg1); } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, arg1, arg2); + tcg_out_opc_br(s, OPC_BEQ, TCG_TMP0, TCG_REG_ZERO); } break; case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, arg1, arg2); + tcg_out_opc_br(s, OPC_BEQ, TCG_TMP0, TCG_REG_ZERO); break; case TCG_COND_LE: if (arg2 == 0) { tcg_out_opc_br(s, OPC_BLEZ, 0, arg1); } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, arg2, arg1); + tcg_out_opc_br(s, OPC_BEQ, TCG_TMP0, TCG_REG_ZERO); } break; case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, arg2, arg1); + tcg_out_opc_br(s, OPC_BEQ, TCG_TMP0, TCG_REG_ZERO); break; case TCG_COND_GT: if (arg2 == 0) { tcg_out_opc_br(s, OPC_BGTZ, 0, arg1); } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, arg2, arg1); + tcg_out_opc_br(s, OPC_BNE, TCG_TMP0, TCG_REG_ZERO); } break; case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, arg2, arg1); + tcg_out_opc_br(s, OPC_BNE, TCG_TMP0, TCG_REG_ZERO); break; default: tcg_abort(); @@ -673,8 +676,8 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } else if (c2 == 0) { tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1); } else { - tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); } break; case TCG_COND_NE: @@ -683,41 +686,41 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } else if (c2 == 0) { tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1); } else { - tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); } break; case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); break; case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); break; case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); break; case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); break; case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, c2, c1); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); break; case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, c2, c1); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); break; case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, c2, c1); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); break; case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, c2, c1); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); break; default: tcg_abort(); @@ -790,37 +793,37 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, { switch (cond) { case TCG_COND_EQ: - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_AT, arg2, arg4); - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, arg1, arg3); - tcg_out_opc_reg(s, OPC_AND, ret, TCG_REG_AT, TCG_REG_T0); + tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP0, arg2, arg4); + tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP1, arg1, arg3); + tcg_out_opc_reg(s, OPC_AND, ret, TCG_TMP0, TCG_TMP1); return; case TCG_COND_NE: - tcg_out_setcond(s, TCG_COND_NE, TCG_REG_AT, arg2, arg4); - tcg_out_setcond(s, TCG_COND_NE, TCG_REG_T0, arg1, arg3); - tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_AT, TCG_REG_T0); + tcg_out_setcond(s, TCG_COND_NE, TCG_TMP0, arg2, arg4); + tcg_out_setcond(s, TCG_COND_NE, TCG_TMP1, arg1, arg3); + tcg_out_opc_reg(s, OPC_OR, ret, TCG_TMP0, TCG_TMP1); return; case TCG_COND_LT: case TCG_COND_LE: - tcg_out_setcond(s, TCG_COND_LT, TCG_REG_AT, arg2, arg4); + tcg_out_setcond(s, TCG_COND_LT, TCG_TMP0, arg2, arg4); break; case TCG_COND_GT: case TCG_COND_GE: - tcg_out_setcond(s, TCG_COND_GT, TCG_REG_AT, arg2, arg4); + tcg_out_setcond(s, TCG_COND_GT, TCG_TMP0, arg2, arg4); break; case TCG_COND_LTU: case TCG_COND_LEU: - tcg_out_setcond(s, TCG_COND_LTU, TCG_REG_AT, arg2, arg4); + tcg_out_setcond(s, TCG_COND_LTU, TCG_TMP0, arg2, arg4); break; case TCG_COND_GTU: case TCG_COND_GEU: - tcg_out_setcond(s, TCG_COND_GTU, TCG_REG_AT, arg2, arg4); + tcg_out_setcond(s, TCG_COND_GTU, TCG_TMP0, arg2, arg4); break; default: tcg_abort(); break; } - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, arg2, arg4); + tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP1, arg2, arg4); switch(cond) { case TCG_COND_LT: @@ -843,8 +846,8 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, tcg_abort(); } - tcg_out_opc_reg(s, OPC_AND, ret, ret, TCG_REG_T0); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_AND, ret, ret, TCG_TMP1); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) @@ -917,7 +920,7 @@ static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) { - TCGReg tmp = TCG_REG_AT; + TCGReg tmp = TCG_TMP0; if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { tmp = tcg_target_call_iarg_regs[i]; } @@ -927,7 +930,7 @@ static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) { - TCGReg tmp = TCG_REG_AT; + TCGReg tmp = TCG_TMP0; if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { tmp = tcg_target_call_iarg_regs[i]; } @@ -937,7 +940,7 @@ static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) { - TCGReg tmp = TCG_REG_AT; + TCGReg tmp = TCG_TMP0; if (arg == 0) { tmp = TCG_REG_ZERO; } else { @@ -989,20 +992,20 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, } /* Load the tlb comparator. */ - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, cmp_off + LO_OFF); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF); if (TARGET_LONG_BITS == 64) { tcg_out_opc_imm(s, OPC_LW, base, TCG_REG_A0, cmp_off + HI_OFF); } /* Mask the page bits, keeping the alignment bits to compare against. In between, load the tlb addend for the fast path. */ - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addrl); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); label_ptr[0] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_REG_T0, TCG_REG_AT); + tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); if (TARGET_LONG_BITS == 64) { /* delay slot */ @@ -1137,31 +1140,31 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); break; case MO_UW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, base, 0); - tcg_out_bswap16(s, datalo, TCG_REG_T0); + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16(s, datalo, TCG_TMP1); break; case MO_UW: tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); break; case MO_SW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, base, 0); - tcg_out_bswap16s(s, datalo, TCG_REG_T0); + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16s(s, datalo, TCG_TMP1); break; case MO_SW: tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); break; case MO_UL | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, base, 0); - tcg_out_bswap32(s, datalo, TCG_REG_T0); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); + tcg_out_bswap32(s, datalo, TCG_TMP1); break; case MO_UL: tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); break; case MO_Q | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, base, HI_OFF); - tcg_out_bswap32(s, datalo, TCG_REG_T0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, base, LO_OFF); - tcg_out_bswap32(s, datahi, TCG_REG_T0); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, datalo, TCG_TMP1); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); + tcg_out_bswap32(s, datahi, TCG_TMP1); break; case MO_Q: tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); @@ -1223,27 +1226,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, break; case MO_16 | MO_BSWAP: - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, datalo, 0xffff); - tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0); - datalo = TCG_REG_T0; + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); + tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); + datalo = TCG_TMP1; /* FALLTHRU */ case MO_16: tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); break; case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_REG_T0, datalo); - datalo = TCG_REG_T0; + tcg_out_bswap32(s, TCG_TMP1, datalo); + datalo = TCG_TMP1; /* FALLTHRU */ case MO_32: tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); break; case MO_64 | MO_BSWAP: - tcg_out_bswap32(s, TCG_REG_T0, datalo); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, base, HI_OFF); - tcg_out_bswap32(s, TCG_REG_T0, datahi); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, base, LO_OFF); + tcg_out_bswap32(s, TCG_TMP1, datalo); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, TCG_TMP1, datahi); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF); break; case MO_64: tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); @@ -1314,9 +1317,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, b0 = TCG_REG_V0; } if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); } tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); } @@ -1327,9 +1330,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_abort(); } else { /* indirect jump method */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_AT, TCG_REG_ZERO, + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, (uintptr_t)(s->tb_next + args[0])); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); } tcg_out_nop(s); s->tb_next_offset[args[0]] = tcg_current_code_size(s); @@ -1372,18 +1375,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_add2_i32: if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_AT, args[2], args[4]); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP0, args[2], args[4]); } else { - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_AT, args[2], args[4]); + tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, args[2], args[4]); } - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_T0, TCG_REG_AT, args[2]); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP1, TCG_TMP0, args[2]); if (const_args[5]) { tcg_out_opc_imm(s, OPC_ADDIU, args[1], args[3], args[5]); } else { tcg_out_opc_reg(s, OPC_ADDU, args[1], args[3], args[5]); } - tcg_out_opc_reg(s, OPC_ADDU, args[1], args[1], TCG_REG_T0); - tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT); + tcg_out_opc_reg(s, OPC_ADDU, args[1], args[1], TCG_TMP1); + tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_TMP0); break; case INDEX_op_sub_i32: if (const_args[2]) { @@ -1394,18 +1397,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_sub2_i32: if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_AT, args[2], -args[4]); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP0, args[2], -args[4]); } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, args[2], args[4]); + tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, args[2], args[4]); } - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_T0, args[2], TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP1, args[2], TCG_TMP0); if (const_args[5]) { tcg_out_opc_imm(s, OPC_ADDIU, args[1], args[3], -args[5]); } else { tcg_out_opc_reg(s, OPC_SUBU, args[1], args[3], args[5]); } - tcg_out_opc_reg(s, OPC_SUBU, args[1], args[1], TCG_REG_T0); - tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT); + tcg_out_opc_reg(s, OPC_SUBU, args[1], args[1], TCG_TMP1); + tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_TMP0); break; case INDEX_op_mul_i32: if (use_mips32_instructions) { @@ -1503,9 +1506,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, if (const_args[2]) { tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]); } else { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32); - tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]); - tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]); + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP0, 32); + tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_TMP0, args[2]); + tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_TMP0, args[1]); } break; case INDEX_op_rotr_i32: @@ -1800,8 +1803,8 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_AT); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ From f216a35f364df893519656648677a7629a36de9e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 09:10:16 -0700 Subject: [PATCH 11/24] tcg-mips: Use T9 for TCG_TMP1 T0 is an argument register for the n32 and n64 abis. T9 is the call address register for the abis, and is more directly under the control of the backend. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 54c91479a7..78caf25c3a 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -73,7 +73,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif #define TCG_TMP0 TCG_REG_AT -#define TCG_TMP1 TCG_REG_T0 +#define TCG_TMP1 TCG_REG_T9 /* check if we really need so many registers :P */ static const TCGReg tcg_target_reg_alloc_order[] = { From 1c4182687e20a4255ea827fe05081578d9c518f9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 09:27:40 -0700 Subject: [PATCH 12/24] tcg-mips: Use EXT for AND on mips32r2 At the same time, tidy deposit by introducing tcg_out_opc_bf. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 60 +++++++++++++++++++++++++++++++++++-------- tcg/mips/tcg-target.h | 4 --- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 78caf25c3a..54571fb07a 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -155,6 +155,16 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, reloc_pc16(code_ptr, (tcg_insn_unit *)value); } +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_U16 0x200 +#define TCG_CT_CONST_S16 0x400 +#define TCG_CT_CONST_P2M1 0x800 + +static inline bool is_p2m1(tcg_target_long val) +{ + return val && ((val + 1) & val) == 0; +} + /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -200,6 +210,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'J': ct->ct |= TCG_CT_CONST_S16; break; + case 'K': + ct->ct |= TCG_CT_CONST_P2M1; + break; case 'Z': /* We are cheating a bit here, using the fact that the register ZERO is also the register number 0. Hence there is no need @@ -220,16 +233,19 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, { int ct; ct = arg_ct->ct; - if (ct & TCG_CT_CONST) + if (ct & TCG_CT_CONST) { return 1; - else if ((ct & TCG_CT_CONST_ZERO) && val == 0) + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { return 1; - else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { return 1; - else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) + } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; - else - return 0; + } else if ((ct & TCG_CT_CONST_P2M1) + && use_mips32r2_instructions && is_p2m1(val)) { + return 1; + } + return 0; } /* instruction opcodes */ @@ -293,6 +309,7 @@ enum { OPC_MUL = OPC_SPECIAL2 | 0x002, OPC_SPECIAL3 = 0x1f << 26, + OPC_EXT = OPC_SPECIAL3 | 0x000, OPC_INS = OPC_SPECIAL3 | 0x004, OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, @@ -329,6 +346,22 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, tcg_out32(s, inst); } +/* + * Type bitfield + */ +static inline void tcg_out_opc_bf(TCGContext *s, int opc, TCGReg rt, + TCGReg rs, int msb, int lsb) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (msb & 0x1F) << 11; + inst |= (lsb & 0x1F) << 6; + tcg_out32(s, inst); +} + /* * Type branch */ @@ -1455,7 +1488,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_and_i32: if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ANDI, args[0], args[1], args[2]); + if (args[2] == (uint16_t)args[2]) { + tcg_out_opc_imm(s, OPC_ANDI, args[0], args[1], args[2]); + } else { + int msb = ctz32(~args[2]) - 1; + assert(use_mips32r2_instructions); + assert(is_p2m1(args[2])); + tcg_out_opc_bf(s, OPC_EXT, args[0], args[1], msb, 0); + } } else { tcg_out_opc_reg(s, OPC_AND, args[0], args[1], args[2]); } @@ -1535,8 +1575,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_deposit_i32: - tcg_out_opc_imm(s, OPC_INS, args[0], args[2], - ((args[3] + args[4] - 1) << 11) | (args[3] << 6)); + tcg_out_opc_bf(s, OPC_INS, args[0], args[2], + args[3] + args[4] - 1, args[3]); break; case INDEX_op_brcond_i32: @@ -1604,7 +1644,7 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_and_i32, { "r", "rZ", "rI" } }, + { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, { INDEX_op_not_i32, { "r", "rZ" } }, { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index c959d1c959..b5face8b4d 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -67,10 +67,6 @@ typedef enum { TCG_AREG0 = TCG_REG_S0, } TCGReg; -#define TCG_CT_CONST_ZERO 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S16 0x400 - /* used for function call generation */ #define TCG_TARGET_STACK_ALIGN 8 #define TCG_TARGET_CALL_STACK_OFFSET 16 From ac0f3b12636f58062129064e3e8f8f65ccc18b1e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 09:30:47 -0700 Subject: [PATCH 13/24] tcg-mips: Name the opcode enumeration And use it in the opcode emission functions. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 54571fb07a..920208a3a6 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -249,7 +249,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, } /* instruction opcodes */ -enum { +typedef enum { OPC_J = 0x02 << 26, OPC_JAL = 0x03 << 26, OPC_BEQ = 0x04 << 26, @@ -314,12 +314,12 @@ enum { OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, OPC_SEH = OPC_SPECIAL3 | 0x620, -}; +} MIPSInsn; /* * Type reg */ -static inline void tcg_out_opc_reg(TCGContext *s, int opc, +static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc, TCGReg rd, TCGReg rs, TCGReg rt) { int32_t inst; @@ -334,7 +334,7 @@ static inline void tcg_out_opc_reg(TCGContext *s, int opc, /* * Type immediate */ -static inline void tcg_out_opc_imm(TCGContext *s, int opc, +static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs, TCGArg imm) { int32_t inst; @@ -349,7 +349,7 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, /* * Type bitfield */ -static inline void tcg_out_opc_bf(TCGContext *s, int opc, TCGReg rt, +static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs, int msb, int lsb) { int32_t inst; @@ -365,7 +365,7 @@ static inline void tcg_out_opc_bf(TCGContext *s, int opc, TCGReg rt, /* * Type branch */ -static inline void tcg_out_opc_br(TCGContext *s, int opc, +static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs) { /* We pay attention here to not modify the branch target by reading @@ -379,7 +379,7 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc, /* * Type sa */ -static inline void tcg_out_opc_sa(TCGContext *s, int opc, +static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc, TCGReg rd, TCGReg rt, TCGArg sa) { int32_t inst; @@ -396,7 +396,7 @@ static inline void tcg_out_opc_sa(TCGContext *s, int opc, * Type jump. * Returns true if the branch was in range and the insn was emitted. */ -static bool tcg_out_opc_jmp(TCGContext *s, int opc, void *target) +static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) { uintptr_t dest = (uintptr_t)target; uintptr_t from = (uintptr_t)s->code_ptr + 4; @@ -525,7 +525,7 @@ static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) } } -static void tcg_out_ldst(TCGContext *s, int opc, TCGReg data, +static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data, TCGReg addr, intptr_t ofs) { int16_t lo = ofs; From 070603f62b6e97bfdc3f33798fbdc2772798cdfc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 18:54:07 -0700 Subject: [PATCH 14/24] tcg-mips: Fix subtract immediate range Since we must use ADDUI, we would generate incorrect code for -32768. Leaving off subtract of +32768 makes things easier for a follow-on patch. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 920208a3a6..5021dea5f8 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -156,9 +156,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, } #define TCG_CT_CONST_ZERO 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S16 0x400 -#define TCG_CT_CONST_P2M1 0x800 +#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */ +#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */ +#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */ +#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */ static inline bool is_p2m1(tcg_target_long val) { @@ -213,6 +214,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'K': ct->ct |= TCG_CT_CONST_P2M1; break; + case 'N': + ct->ct |= TCG_CT_CONST_N16; + break; case 'Z': /* We are cheating a bit here, using the fact that the register ZERO is also the register number 0. Hence there is no need @@ -241,6 +245,8 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, return 1; } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; + } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { + return 1; } else if ((ct & TCG_CT_CONST_P2M1) && use_mips32r2_instructions && is_p2m1(val)) { return 1; @@ -1642,7 +1648,7 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rN" } }, { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, @@ -1670,7 +1676,7 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, #if TARGET_LONG_BITS == 32 From 22ee3a987d5522785908d0c2cbbcce0f112a0c45 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 19:05:52 -0700 Subject: [PATCH 15/24] tcg-mips: Hoist args loads Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 201 ++++++++++++++++++++++-------------------- 1 file changed, 103 insertions(+), 98 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 5021dea5f8..516c2b71ba 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1345,10 +1345,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { - switch(opc) { + TCGArg a0, a1, a2; + int c2; + + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; + + switch (opc) { case INDEX_op_exit_tb: { - uintptr_t a0 = args[0]; TCGReg b0 = TCG_REG_ZERO; if (a0 & ~0xffff) { @@ -1370,237 +1377,235 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } else { /* indirect jump method */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_next + args[0])); + (uintptr_t)(s->tb_next + a0)); tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); } tcg_out_nop(s); - s->tb_next_offset[args[0]] = tcg_current_code_size(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, args[0]); + tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, a0); break; case INDEX_op_ld8u_i32: - tcg_out_ldst(s, OPC_LBU, args[0], args[1], args[2]); + tcg_out_ldst(s, OPC_LBU, a0, a1, a2); break; case INDEX_op_ld8s_i32: - tcg_out_ldst(s, OPC_LB, args[0], args[1], args[2]); + tcg_out_ldst(s, OPC_LB, a0, a1, a2); break; case INDEX_op_ld16u_i32: - tcg_out_ldst(s, OPC_LHU, args[0], args[1], args[2]); + tcg_out_ldst(s, OPC_LHU, a0, a1, a2); break; case INDEX_op_ld16s_i32: - tcg_out_ldst(s, OPC_LH, args[0], args[1], args[2]); + tcg_out_ldst(s, OPC_LH, a0, a1, a2); break; case INDEX_op_ld_i32: - tcg_out_ldst(s, OPC_LW, args[0], args[1], args[2]); + tcg_out_ldst(s, OPC_LW, a0, a1, a2); break; case INDEX_op_st8_i32: - tcg_out_ldst(s, OPC_SB, args[0], args[1], args[2]); + tcg_out_ldst(s, OPC_SB, a0, a1, a2); break; case INDEX_op_st16_i32: - tcg_out_ldst(s, OPC_SH, args[0], args[1], args[2]); + tcg_out_ldst(s, OPC_SH, a0, a1, a2); break; case INDEX_op_st_i32: - tcg_out_ldst(s, OPC_SW, args[0], args[1], args[2]); + tcg_out_ldst(s, OPC_SW, a0, a1, a2); break; case INDEX_op_add_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_ADDU, args[0], args[1], args[2]); + tcg_out_opc_reg(s, OPC_ADDU, a0, a1, a2); } break; case INDEX_op_add2_i32: if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP0, args[2], args[4]); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP0, a2, args[4]); } else { - tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, args[2], args[4]); + tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, a2, args[4]); } - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP1, TCG_TMP0, args[2]); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP1, TCG_TMP0, a2); if (const_args[5]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[1], args[3], args[5]); + tcg_out_opc_imm(s, OPC_ADDIU, a1, args[3], args[5]); } else { - tcg_out_opc_reg(s, OPC_ADDU, args[1], args[3], args[5]); + tcg_out_opc_reg(s, OPC_ADDU, a1, args[3], args[5]); } - tcg_out_opc_reg(s, OPC_ADDU, args[1], args[1], TCG_TMP1); - tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_TMP0); + tcg_out_opc_reg(s, OPC_ADDU, a1, a1, TCG_TMP1); + tcg_out_mov(s, TCG_TYPE_I32, a0, TCG_TMP0); break; case INDEX_op_sub_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[0], args[1], -args[2]); + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); } else { - tcg_out_opc_reg(s, OPC_SUBU, args[0], args[1], args[2]); + tcg_out_opc_reg(s, OPC_SUBU, a0, a1, a2); } break; case INDEX_op_sub2_i32: if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP0, args[2], -args[4]); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP0, a2, -args[4]); } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, args[2], args[4]); + tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, a2, args[4]); } - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP1, args[2], TCG_TMP0); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP1, a2, TCG_TMP0); if (const_args[5]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[1], args[3], -args[5]); + tcg_out_opc_imm(s, OPC_ADDIU, a1, args[3], -args[5]); } else { - tcg_out_opc_reg(s, OPC_SUBU, args[1], args[3], args[5]); + tcg_out_opc_reg(s, OPC_SUBU, a1, args[3], args[5]); } - tcg_out_opc_reg(s, OPC_SUBU, args[1], args[1], TCG_TMP1); - tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_TMP0); + tcg_out_opc_reg(s, OPC_SUBU, a1, a1, TCG_TMP1); + tcg_out_mov(s, TCG_TYPE_I32, a0, TCG_TMP0); break; case INDEX_op_mul_i32: if (use_mips32_instructions) { - tcg_out_opc_reg(s, OPC_MUL, args[0], args[1], args[2]); + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_MULT, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); } break; case INDEX_op_muls2_i32: - tcg_out_opc_reg(s, OPC_MULT, 0, args[2], args[3]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); + tcg_out_opc_reg(s, OPC_MULT, 0, a2, args[3]); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); break; case INDEX_op_mulu2_i32: - tcg_out_opc_reg(s, OPC_MULTU, 0, args[2], args[3]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); + tcg_out_opc_reg(s, OPC_MULTU, 0, a2, args[3]); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); break; case INDEX_op_mulsh_i32: - tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_MULT, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); break; case INDEX_op_muluh_i32: - tcg_out_opc_reg(s, OPC_MULTU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_MULTU, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); break; case INDEX_op_div_i32: - tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_DIV, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); break; case INDEX_op_divu_i32: - tcg_out_opc_reg(s, OPC_DIVU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_DIVU, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); break; case INDEX_op_rem_i32: - tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_DIV, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); break; case INDEX_op_remu_i32: - tcg_out_opc_reg(s, OPC_DIVU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_DIVU, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); break; case INDEX_op_and_i32: - if (const_args[2]) { - if (args[2] == (uint16_t)args[2]) { - tcg_out_opc_imm(s, OPC_ANDI, args[0], args[1], args[2]); + if (c2) { + if (a2 == (uint16_t)a2) { + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); } else { - int msb = ctz32(~args[2]) - 1; + int msb = ctz32(~a2) - 1; assert(use_mips32r2_instructions); - assert(is_p2m1(args[2])); - tcg_out_opc_bf(s, OPC_EXT, args[0], args[1], msb, 0); + assert(is_p2m1(a2)); + tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); } } else { - tcg_out_opc_reg(s, OPC_AND, args[0], args[1], args[2]); + tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); } break; case INDEX_op_or_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ORI, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_OR, args[0], args[1], args[2]); + tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); } break; case INDEX_op_nor_i32: - tcg_out_opc_reg(s, OPC_NOR, args[0], args[1], args[2]); + tcg_out_opc_reg(s, OPC_NOR, a0, a1, a2); break; case INDEX_op_not_i32: - tcg_out_opc_reg(s, OPC_NOR, args[0], TCG_REG_ZERO, args[1]); + tcg_out_opc_reg(s, OPC_NOR, a0, TCG_REG_ZERO, a1); break; case INDEX_op_xor_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_XORI, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_XOR, args[0], args[1], args[2]); + tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); } break; case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SRA, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_sa(s, OPC_SRA, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_SRAV, args[0], args[2], args[1]); + tcg_out_opc_reg(s, OPC_SRAV, a0, a2, a1); } break; case INDEX_op_shl_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SLL, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_sa(s, OPC_SLL, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_SLLV, args[0], args[2], args[1]); + tcg_out_opc_reg(s, OPC_SLLV, a0, a2, a1); } break; case INDEX_op_shr_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SRL, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_sa(s, OPC_SRL, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]); + tcg_out_opc_reg(s, OPC_SRLV, a0, a2, a1); } break; case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]); + if (c2) { + tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); } else { - tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP0, 32); - tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_TMP0, args[2]); - tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_TMP0, args[1]); + tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); + tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); } break; case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_sa(s, OPC_ROTR, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]); + tcg_out_opc_reg(s, OPC_ROTRV, a0, a2, a1); } break; case INDEX_op_bswap16_i32: - tcg_out_opc_reg(s, OPC_WSBH, args[0], 0, args[1]); + tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); break; case INDEX_op_bswap32_i32: - tcg_out_opc_reg(s, OPC_WSBH, args[0], 0, args[1]); - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[0], 16); + tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); + tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); break; case INDEX_op_ext8s_i32: - tcg_out_opc_reg(s, OPC_SEB, args[0], 0, args[1]); + tcg_out_opc_reg(s, OPC_SEB, a0, 0, a1); break; case INDEX_op_ext16s_i32: - tcg_out_opc_reg(s, OPC_SEH, args[0], 0, args[1]); + tcg_out_opc_reg(s, OPC_SEH, a0, 0, a1); break; case INDEX_op_deposit_i32: - tcg_out_opc_bf(s, OPC_INS, args[0], args[2], - args[3] + args[4] - 1, args[3]); + tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); break; case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], args[3]); + tcg_out_brcond(s, a2, a0, a1, args[3]); break; case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]); + tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], args[5]); break; case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]); + tcg_out_movcond(s, args[5], a0, a1, a2, args[3]); break; case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], args[0], args[1], args[2]); + tcg_out_setcond(s, args[3], a0, a1, a2); break; case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args[5], args[0], args[1], args[2], args[3], args[4]); + tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); break; case INDEX_op_qemu_ld_i32: From 741f117d9ac1a1a8deabc5f9b575308e88ad5fb3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 24 Apr 2014 02:56:44 +0000 Subject: [PATCH 16/24] tcg-mips: Improve add2/sub2 Reduce insn count from 5 to either 3 or 4. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 87 ++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 31 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 516c2b71ba..76a78527bc 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1297,6 +1297,52 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, + TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, + bool cbh, bool is_sub) +{ + TCGReg th = TCG_TMP1; + + /* If we have a negative constant such that negating it would + make the high part zero, we can (usually) eliminate one insn. */ + if (cbl && cbh && bh == -1 && bl != 0) { + bl = -bl; + bh = 0; + is_sub = !is_sub; + } + + /* By operating on the high part first, we get to use the final + carry operation to move back from the temporary. */ + if (!cbh) { + tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); + } else if (bh != 0 || ah == rl) { + tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); + } else { + th = ah; + } + + /* Note that tcg optimization should eliminate the bl == 0 case. */ + if (is_sub) { + if (cbl) { + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); + } else { + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); + tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); + } + tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); + } else { + if (cbl) { + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); + } else { + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); + } + tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); + } +} + static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) { TCGReg addr_regl, addr_regh __attribute__((unused)); @@ -1419,21 +1465,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_ADDU, a0, a1, a2); } break; - case INDEX_op_add2_i32: - if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP0, a2, args[4]); - } else { - tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, a2, args[4]); - } - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP1, TCG_TMP0, a2); - if (const_args[5]) { - tcg_out_opc_imm(s, OPC_ADDIU, a1, args[3], args[5]); - } else { - tcg_out_opc_reg(s, OPC_ADDU, a1, args[3], args[5]); - } - tcg_out_opc_reg(s, OPC_ADDU, a1, a1, TCG_TMP1); - tcg_out_mov(s, TCG_TYPE_I32, a0, TCG_TMP0); - break; case INDEX_op_sub_i32: if (c2) { tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); @@ -1441,21 +1472,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_SUBU, a0, a1, a2); } break; - case INDEX_op_sub2_i32: - if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP0, a2, -args[4]); - } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, a2, args[4]); - } - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP1, a2, TCG_TMP0); - if (const_args[5]) { - tcg_out_opc_imm(s, OPC_ADDIU, a1, args[3], -args[5]); - } else { - tcg_out_opc_reg(s, OPC_SUBU, a1, args[3], args[5]); - } - tcg_out_opc_reg(s, OPC_SUBU, a1, a1, TCG_TMP1); - tcg_out_mov(s, TCG_TYPE_I32, a0, TCG_TMP0); - break; case INDEX_op_mul_i32: if (use_mips32_instructions) { tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); @@ -1621,6 +1637,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_qemu_st(s, args, true); break; + case INDEX_op_add2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true); + break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ case INDEX_op_call: /* Always emitted via tcg_out_call. */ @@ -1680,7 +1705,7 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, From 4f048535cd11d0950c12d31115c9cbe883cde969 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 20:30:46 -0700 Subject: [PATCH 17/24] tcg-mips: Commonize opcode implementations Most opcodes fall in to one of a couple of patterns. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 212 +++++++++++++++++++----------------------- 1 file changed, 98 insertions(+), 114 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 76a78527bc..810f351d19 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1391,6 +1391,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { + MIPSInsn i1, i2; TCGArg a0, a1, a2; int c2; @@ -1434,141 +1435,141 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_ld8u_i32: - tcg_out_ldst(s, OPC_LBU, a0, a1, a2); - break; + i1 = OPC_LBU; + goto do_ldst; case INDEX_op_ld8s_i32: - tcg_out_ldst(s, OPC_LB, a0, a1, a2); - break; + i1 = OPC_LB; + goto do_ldst; case INDEX_op_ld16u_i32: - tcg_out_ldst(s, OPC_LHU, a0, a1, a2); - break; + i1 = OPC_LHU; + goto do_ldst; case INDEX_op_ld16s_i32: - tcg_out_ldst(s, OPC_LH, a0, a1, a2); - break; + i1 = OPC_LH; + goto do_ldst; case INDEX_op_ld_i32: - tcg_out_ldst(s, OPC_LW, a0, a1, a2); - break; + i1 = OPC_LW; + goto do_ldst; case INDEX_op_st8_i32: - tcg_out_ldst(s, OPC_SB, a0, a1, a2); - break; + i1 = OPC_SB; + goto do_ldst; case INDEX_op_st16_i32: - tcg_out_ldst(s, OPC_SH, a0, a1, a2); - break; + i1 = OPC_SH; + goto do_ldst; case INDEX_op_st_i32: - tcg_out_ldst(s, OPC_SW, a0, a1, a2); + i1 = OPC_SW; + do_ldst: + tcg_out_ldst(s, i1, a0, a1, a2); break; case INDEX_op_add_i32: + i1 = OPC_ADDU, i2 = OPC_ADDIU; + goto do_binary; + case INDEX_op_or_i32: + i1 = OPC_OR, i2 = OPC_ORI; + goto do_binary; + case INDEX_op_xor_i32: + i1 = OPC_XOR, i2 = OPC_XORI; + do_binary: if (c2) { - tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_ADDU, a0, a1, a2); + tcg_out_opc_imm(s, i2, a0, a1, a2); + break; } + do_binaryv: + tcg_out_opc_reg(s, i1, a0, a1, a2); break; + case INDEX_op_sub_i32: if (c2) { tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); - } else { - tcg_out_opc_reg(s, OPC_SUBU, a0, a1, a2); + break; } - break; + i1 = OPC_SUBU; + goto do_binary; + case INDEX_op_and_i32: + if (c2 && a2 != (uint16_t)a2) { + int msb = ctz32(~a2) - 1; + assert(use_mips32r2_instructions); + assert(is_p2m1(a2)); + tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); + break; + } + i1 = OPC_AND, i2 = OPC_ANDI; + goto do_binary; + case INDEX_op_nor_i32: + i1 = OPC_NOR; + goto do_binaryv; + case INDEX_op_mul_i32: if (use_mips32_instructions) { tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_MULT, 0, a1, a2); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + break; } - break; - case INDEX_op_muls2_i32: - tcg_out_opc_reg(s, OPC_MULT, 0, a2, args[3]); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); - break; - case INDEX_op_mulu2_i32: - tcg_out_opc_reg(s, OPC_MULTU, 0, a2, args[3]); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); - break; + i1 = OPC_MULT, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_mulsh_i32: - tcg_out_opc_reg(s, OPC_MULT, 0, a1, a2); - tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); - break; + i1 = OPC_MULT, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_muluh_i32: - tcg_out_opc_reg(s, OPC_MULTU, 0, a1, a2); - tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); - break; + i1 = OPC_MULTU, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_div_i32: - tcg_out_opc_reg(s, OPC_DIV, 0, a1, a2); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); - break; + i1 = OPC_DIV, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_divu_i32: - tcg_out_opc_reg(s, OPC_DIVU, 0, a1, a2); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); - break; + i1 = OPC_DIVU, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_rem_i32: - tcg_out_opc_reg(s, OPC_DIV, 0, a1, a2); - tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); - break; + i1 = OPC_DIV, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_remu_i32: - tcg_out_opc_reg(s, OPC_DIVU, 0, a1, a2); - tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + i1 = OPC_DIVU, i2 = OPC_MFHI; + do_hilo1: + tcg_out_opc_reg(s, i1, 0, a1, a2); + tcg_out_opc_reg(s, i2, a0, 0, 0); break; - case INDEX_op_and_i32: - if (c2) { - if (a2 == (uint16_t)a2) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); - } else { - int msb = ctz32(~a2) - 1; - assert(use_mips32r2_instructions); - assert(is_p2m1(a2)); - tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); - } - } else { - tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); - } - break; - case INDEX_op_or_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); - } - break; - case INDEX_op_nor_i32: - tcg_out_opc_reg(s, OPC_NOR, a0, a1, a2); + case INDEX_op_muls2_i32: + i1 = OPC_MULT; + goto do_hilo2; + case INDEX_op_mulu2_i32: + i1 = OPC_MULTU; + do_hilo2: + tcg_out_opc_reg(s, i1, 0, a2, args[3]); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); break; + case INDEX_op_not_i32: - tcg_out_opc_reg(s, OPC_NOR, a0, TCG_REG_ZERO, a1); - break; - case INDEX_op_xor_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); - } + i1 = OPC_NOR; + goto do_unary; + case INDEX_op_bswap16_i32: + i1 = OPC_WSBH; + goto do_unary; + case INDEX_op_ext8s_i32: + i1 = OPC_SEB; + goto do_unary; + case INDEX_op_ext16s_i32: + i1 = OPC_SEH; + do_unary: + tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); break; case INDEX_op_sar_i32: - if (c2) { - tcg_out_opc_sa(s, OPC_SRA, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_SRAV, a0, a2, a1); - } - break; + i1 = OPC_SRAV, i2 = OPC_SRA; + goto do_shift; case INDEX_op_shl_i32: - if (c2) { - tcg_out_opc_sa(s, OPC_SLL, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_SLLV, a0, a2, a1); - } - break; + i1 = OPC_SLLV, i2 = OPC_SLL; + goto do_shift; case INDEX_op_shr_i32: + i1 = OPC_SRLV, i2 = OPC_SRL; + goto do_shift; + case INDEX_op_rotr_i32: + i1 = OPC_ROTRV, i2 = OPC_ROTR; + do_shift: if (c2) { - tcg_out_opc_sa(s, OPC_SRL, a0, a1, a2); + tcg_out_opc_sa(s, i2, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_SRLV, a0, a2, a1); + tcg_out_opc_reg(s, i1, a0, a2, a1); } break; case INDEX_op_rotl_i32: @@ -1579,29 +1580,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); } break; - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_opc_sa(s, OPC_ROTR, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_ROTRV, a0, a2, a1); - } - break; - case INDEX_op_bswap16_i32: - tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); - break; case INDEX_op_bswap32_i32: tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); break; - case INDEX_op_ext8s_i32: - tcg_out_opc_reg(s, OPC_SEB, a0, 0, a1); - break; - case INDEX_op_ext16s_i32: - tcg_out_opc_reg(s, OPC_SEH, a0, 0, a1); - break; - case INDEX_op_deposit_i32: tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); break; From fd1cf66630a220991c837e53d9a958cd29444ba1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 22:10:09 -0700 Subject: [PATCH 18/24] tcg-mips: Simplify setcond Use a table to fold comparisons to less-than. Also, move the function up in the file for futher simplifications. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 125 ++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 58 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 810f351d19..278925c6ff 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -567,6 +567,73 @@ static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) } } +/* Bit 0 set if inversion required; bit 1 set if swapping required. */ +#define MIPS_CMP_INV 1 +#define MIPS_CMP_SWAP 2 + +static const uint8_t mips_cmp_map[16] = { + [TCG_COND_LT] = 0, + [TCG_COND_LTU] = 0, + [TCG_COND_GE] = MIPS_CMP_INV, + [TCG_COND_GEU] = MIPS_CMP_INV, + [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_GT] = MIPS_CMP_SWAP, + [TCG_COND_GTU] = MIPS_CMP_SWAP, +}; + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) +{ + MIPSInsn s_opc = OPC_SLTU; + int cmp_map; + + switch (cond) { + case TCG_COND_EQ: + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; + } + tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); + break; + + case TCG_COND_NE: + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; + } + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); + break; + + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + s_opc = OPC_SLT; + /* FALLTHRU */ + + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); + if (cmp_map & MIPS_CMP_INV) { + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + } + break; + + default: + tcg_abort(); + break; + } +} + static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int label_index) { @@ -767,64 +834,6 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg arg1, TCGArg arg2) -{ - switch (cond) { - case TCG_COND_EQ: - if (arg1 == 0) { - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg2, 1); - } else if (arg2 == 0) { - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); - } else { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); - } - break; - case TCG_COND_NE: - if (arg1 == 0) { - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg2); - } else if (arg2 == 0) { - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); - } else { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); - } - break; - case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - break; - case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); - break; - case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); - break; - default: - tcg_abort(); - break; - } -} - /* XXX: we implement it at the target level to avoid having to handle cross basic blocks temporaries */ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, From c068896f7f10f82d96a986ceea0d69d8579e3932 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 22:20:57 -0700 Subject: [PATCH 19/24] tcg-mips: Simplify brcond Use the same table to fold comparisons as with setcond. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 89 ++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 47 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 278925c6ff..c0a7a048ca 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -634,70 +634,65 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, } } -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, int label_index) +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, int label_index) { - TCGLabel *l = &s->labels[label_index]; + static const MIPSInsn b_zero[16] = { + [TCG_COND_LT] = OPC_BLTZ, + [TCG_COND_GT] = OPC_BGTZ, + [TCG_COND_LE] = OPC_BLEZ, + [TCG_COND_GE] = OPC_BGEZ, + }; + + TCGLabel *l; + MIPSInsn s_opc = OPC_SLTU; + MIPSInsn b_opc; + int cmp_map; switch (cond) { case TCG_COND_EQ: - tcg_out_opc_br(s, OPC_BEQ, arg1, arg2); + b_opc = OPC_BEQ; break; case TCG_COND_NE: - tcg_out_opc_br(s, OPC_BNE, arg1, arg2); + b_opc = OPC_BNE; break; + case TCG_COND_LT: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BLTZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_TMP0, TCG_REG_ZERO); - } - break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_TMP0, TCG_REG_ZERO); - break; + case TCG_COND_GT: + case TCG_COND_LE: case TCG_COND_GE: if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BGEZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_TMP0, TCG_REG_ZERO); + b_opc = b_zero[cond]; + arg2 = arg1; + arg1 = 0; + break; } - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_TMP0, TCG_REG_ZERO); - break; - case TCG_COND_LE: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BLEZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_TMP0, TCG_REG_ZERO); - } - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_TMP0, TCG_REG_ZERO); - break; - case TCG_COND_GT: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BGTZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_TMP0, TCG_REG_ZERO); - } - break; + s_opc = OPC_SLT; + /* FALLTHRU */ + + case TCG_COND_LTU: case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_TMP0, TCG_REG_ZERO); + case TCG_COND_LEU: + case TCG_COND_GEU: + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); + b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); + arg1 = TCG_TMP0; + arg2 = TCG_REG_ZERO; break; + default: tcg_abort(); break; } + + tcg_out_opc_br(s, b_opc, arg1, arg2); + l = &s->labels[label_index]; if (l->has_value) { reloc_pc16(s->code_ptr - 1, l->u.value_ptr); } else { From 9a2f0bfe32bc61d079eb2d763dae1616697f85c4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 22:34:30 -0700 Subject: [PATCH 20/24] tcg-mips: Simplify setcond2 Using tcg_unsigned_cond and tcg_high_cond. Also, move the function up in the file for future cleanups. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 95 ++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 64 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index c0a7a048ca..1429ec715e 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -701,6 +701,37 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_nop(s); } +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +{ + TCGReg tmp0 = TCG_TMP0; + TCGReg tmp1 = ret; + + assert(ret != TCG_TMP0); + if (ret == ah || ret == bh) { + assert(ret != TCG_TMP1); + tmp1 = TCG_TMP1; + } + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + tcg_out_setcond(s, cond, tmp0, ah, bh); + tcg_out_setcond(s, cond, ret, al, bl); + tcg_out_opc_reg(s, (cond == TCG_COND_EQ ? OPC_AND : OPC_OR), + ret, ret, tmp0); + break; + + default: + tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); + tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); + tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); + tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); + tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); + break; + } +} + /* XXX: we implement it at the target level to avoid having to handle cross basic blocks temporaries */ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1, @@ -829,70 +860,6 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4) -{ - switch (cond) { - case TCG_COND_EQ: - tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP0, arg2, arg4); - tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP1, arg1, arg3); - tcg_out_opc_reg(s, OPC_AND, ret, TCG_TMP0, TCG_TMP1); - return; - case TCG_COND_NE: - tcg_out_setcond(s, TCG_COND_NE, TCG_TMP0, arg2, arg4); - tcg_out_setcond(s, TCG_COND_NE, TCG_TMP1, arg1, arg3); - tcg_out_opc_reg(s, OPC_OR, ret, TCG_TMP0, TCG_TMP1); - return; - case TCG_COND_LT: - case TCG_COND_LE: - tcg_out_setcond(s, TCG_COND_LT, TCG_TMP0, arg2, arg4); - break; - case TCG_COND_GT: - case TCG_COND_GE: - tcg_out_setcond(s, TCG_COND_GT, TCG_TMP0, arg2, arg4); - break; - case TCG_COND_LTU: - case TCG_COND_LEU: - tcg_out_setcond(s, TCG_COND_LTU, TCG_TMP0, arg2, arg4); - break; - case TCG_COND_GTU: - case TCG_COND_GEU: - tcg_out_setcond(s, TCG_COND_GTU, TCG_TMP0, arg2, arg4); - break; - default: - tcg_abort(); - break; - } - - tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP1, arg2, arg4); - - switch(cond) { - case TCG_COND_LT: - case TCG_COND_LTU: - tcg_out_setcond(s, TCG_COND_LTU, ret, arg1, arg3); - break; - case TCG_COND_LE: - case TCG_COND_LEU: - tcg_out_setcond(s, TCG_COND_LEU, ret, arg1, arg3); - break; - case TCG_COND_GT: - case TCG_COND_GTU: - tcg_out_setcond(s, TCG_COND_GTU, ret, arg1, arg3); - break; - case TCG_COND_GE: - case TCG_COND_GEU: - tcg_out_setcond(s, TCG_COND_GEU, ret, arg1, arg3); - break; - default: - tcg_abort(); - } - - tcg_out_opc_reg(s, OPC_AND, ret, ret, TCG_TMP1); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); -} - static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) { /* Note that the ABI requires the called function's address to be From 1db1c4d7d91843dd6a7dab93946e01bc19e8ba38 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 22:41:46 -0700 Subject: [PATCH 21/24] tcg-mips: Improve setcond eq/ne vs zeros The original code results in one too many insns per zero present in the input. And since comparing 64-bit numbers vs zero is common... Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 1429ec715e..ccf262b217 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -701,6 +701,40 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_nop(s); } +static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, + TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh) +{ + /* Merge highpart comparison into AH. */ + if (bh != 0) { + if (ah != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); + ah = tmp0; + } else { + ah = bh; + } + } + /* Merge lowpart comparison into AL. */ + if (bl != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); + al = tmp1; + } else { + al = bl; + } + } + /* Merge high and low part comparisons into AL. */ + if (ah != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); + al = tmp0; + } else { + al = ah; + } + } + return al; +} + static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) { @@ -716,10 +750,8 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, switch (cond) { case TCG_COND_EQ: case TCG_COND_NE: - tcg_out_setcond(s, cond, tmp0, ah, bh); - tcg_out_setcond(s, cond, ret, al, bl); - tcg_out_opc_reg(s, (cond == TCG_COND_EQ ? OPC_AND : OPC_OR), - ret, ret, tmp0); + tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); + tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); break; default: From 3401fd259e37f08e1333169494bea30ea02a1d63 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 22:47:22 -0700 Subject: [PATCH 22/24] tcg-mips: Simplify brcond2 Emitting a single branch instead of (up to) 3, using setcond2 to generate the composite compare. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 79 ++++++++++++------------------------------- 1 file changed, 21 insertions(+), 58 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index ccf262b217..0e5ecf4974 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -764,70 +764,33 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, } } -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, TCGArg arg3, TCGArg arg4, - int label_index) +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh, int label_index) { - tcg_insn_unit *label_ptr; + TCGCond b_cond = TCG_COND_NE; + TCGReg tmp = TCG_TMP1; - switch(cond) { - case TCG_COND_NE: - tcg_out_brcond(s, TCG_COND_NE, arg2, arg4, label_index); - tcg_out_brcond(s, TCG_COND_NE, arg1, arg3, label_index); - return; - case TCG_COND_EQ: - break; - case TCG_COND_LT: - case TCG_COND_LE: - tcg_out_brcond(s, TCG_COND_LT, arg2, arg4, label_index); - break; - case TCG_COND_GT: - case TCG_COND_GE: - tcg_out_brcond(s, TCG_COND_GT, arg2, arg4, label_index); - break; - case TCG_COND_LTU: - case TCG_COND_LEU: - tcg_out_brcond(s, TCG_COND_LTU, arg2, arg4, label_index); - break; - case TCG_COND_GTU: - case TCG_COND_GEU: - tcg_out_brcond(s, TCG_COND_GTU, arg2, arg4, label_index); + /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. + With setcond, we emit between 3 and 10 insns and only 1 branch, + which ought to get better branch prediction. */ + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + b_cond = cond; + tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); break; + default: - tcg_abort(); + /* Minimize code size by prefering a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + b_cond = TCG_COND_EQ; + } + tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); + break; } - label_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, arg2, arg4); - tcg_out_nop(s); - - switch(cond) { - case TCG_COND_EQ: - tcg_out_brcond(s, TCG_COND_EQ, arg1, arg3, label_index); - break; - case TCG_COND_LT: - case TCG_COND_LTU: - tcg_out_brcond(s, TCG_COND_LTU, arg1, arg3, label_index); - break; - case TCG_COND_LE: - case TCG_COND_LEU: - tcg_out_brcond(s, TCG_COND_LEU, arg1, arg3, label_index); - break; - case TCG_COND_GT: - case TCG_COND_GTU: - tcg_out_brcond(s, TCG_COND_GTU, arg1, arg3, label_index); - break; - case TCG_COND_GE: - case TCG_COND_GEU: - tcg_out_brcond(s, TCG_COND_GEU, arg1, arg3, label_index); - break; - default: - tcg_abort(); - } - - reloc_pc16(label_ptr, s->code_ptr); + tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, label_index); } static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, From 33fac20bb2ae7fe48932bd950a74ff2f7b134b0f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Apr 2014 22:53:12 -0700 Subject: [PATCH 23/24] tcg-mips: Simplify movcond Use the same table to fold comparisons as with setcond. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 66 +++++++++++-------------------------------- 1 file changed, 17 insertions(+), 49 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 0e5ecf4974..ad752c43cd 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -794,65 +794,33 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, } static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg c1, TCGArg c2, TCGArg v) + TCGReg c1, TCGReg c2, TCGReg v) { + MIPSInsn m_opc = OPC_MOVN; + switch (cond) { case TCG_COND_EQ: - if (c1 == 0) { - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2); - } else if (c2 == 0) { - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1); - } else { - tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); - } - break; + m_opc = OPC_MOVZ; + /* FALLTHRU */ case TCG_COND_NE: - if (c1 == 0) { - tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2); - } else if (c2 == 0) { - tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1); - } else { + if (c2 != 0) { tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); + c1 = TCG_TMP0; } break; - case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); - break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); - break; - case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); - break; - case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, c2, c1); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, c2, c1); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_TMP0); - break; - case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, TCG_TMP0, c2, c1); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); - break; - case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, c2, c1); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_TMP0); - break; + default: - tcg_abort(); + /* Minimize code size by prefering a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + m_opc = OPC_MOVZ; + } + tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; break; } + + tcg_out_opc_reg(s, m_opc, ret, v, c1); } static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) From b6bfeea92aea8dbad61ea21cc0c3a2df4d42b96b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 24 Apr 2014 19:39:20 +0000 Subject: [PATCH 24/24] tcg-mips: Enable direct chaining of TBs Now that the code_gen_buffer is constrained to not cross 256mb regions, we are assured that we can use J to reach another TB. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 4 ++-- tcg/mips/tcg-target.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 8bc2eb663e..c964ca4f0b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -131,7 +131,7 @@ static inline void tlb_flush(CPUState *cpu, int flush_global) #if defined(__arm__) || defined(_ARCH_PPC) \ || defined(__x86_64__) || defined(__i386__) \ || defined(__sparc__) || defined(__aarch64__) \ - || defined(__s390x__) \ + || defined(__s390x__) || defined(__mips__) \ || defined(CONFIG_TCG_INTERPRETER) #define USE_DIRECT_JUMP #endif @@ -268,7 +268,7 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); #endif } -#elif defined(__sparc__) +#elif defined(__sparc__) || defined(__mips__) void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr); #else #error tb_set_jmp_target1 is missing diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index ad752c43cd..8855d5039d 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1354,7 +1354,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ - tcg_abort(); + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* Avoid clobbering the address during retranslation. */ + tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); } else { /* indirect jump method */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, @@ -1805,3 +1807,10 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(mips_op_defs); } + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + uint32_t *ptr = (uint32_t *)jmp_addr; + *ptr = deposit32(*ptr, 0, 26, addr >> 2); + flush_icache_range(jmp_addr, jmp_addr + 4); +}