diff --git a/configure b/configure index fbb6a93c99..13e040d28c 100755 --- a/configure +++ b/configure @@ -1213,12 +1213,12 @@ case "$cpu" in LDFLAGS="-m64 $LDFLAGS" ;; sparc) - LDFLAGS="-m32 $LDFLAGS" - CPU_CFLAGS="-m32 -mcpu=ultrasparc" + CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" + LDFLAGS="-m32 -mv8plus $LDFLAGS" ;; sparc64) - LDFLAGS="-m64 $LDFLAGS" CPU_CFLAGS="-m64 -mcpu=ultrasparc" + LDFLAGS="-m64 $LDFLAGS" ;; s390) CPU_CFLAGS="-m31" diff --git a/cpu-exec.c b/cpu-exec.c index 63a56d0407..5b181c18ed 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -309,10 +309,8 @@ static bool tb_cmp(const void *p, const void *d) return false; } -static TranslationBlock *tb_htable_lookup(CPUState *cpu, - target_ulong pc, - target_ulong cs_base, - uint32_t flags) +TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags) { tb_page_addr_t phys_pc; struct tb_desc desc; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index bcde1e6a14..87ae10bcc9 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -368,6 +368,8 @@ struct TranslationBlock { void tb_free(TranslationBlock *tb); void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); +TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags); #if defined(USE_DIRECT_JUMP) diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index 2c27490cb8..b1fe2d0161 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -22,6 +22,8 @@ #include "exec/tb-hash-xx.h" +#ifdef CONFIG_SOFTMMU + /* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for addresses on the same page. The top bits are the same. This allows TLB invalidation to quickly clear a subset of the hash table. */ @@ -45,6 +47,16 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) | (tmp & TB_JMP_ADDR_MASK)); } +#else + +/* In user-mode we can get better hashing because we do not have a TLB */ +static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) +{ + return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1); +} + +#endif /* CONFIG_SOFTMMU */ + static inline uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags) { diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 878fa0700d..e07c7972ab 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -88,6 +88,24 @@ #define smp_read_barrier_depends() barrier() #endif +/* Sanity check that the size of an atomic operation isn't "overly large". + * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not + * want to use them because we ought not need them, and this lets us do a + * bit of sanity checking that other 32-bit hosts might build. + * + * That said, we have a problem on 64-bit ILP32 hosts in that in order to + * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS. + * We'd prefer not want to pull in everything else TCG related, so handle + * those few cases by hand. + * + * Note that x32 is fully detected with __x64_64__ + _ILP32, and that for + * Sparc we always force the use of sparcv9 in configure. + */ +#if defined(__x86_64__) || defined(__sparc__) +# define ATOMIC_REG_SIZE 8 +#else +# define ATOMIC_REG_SIZE sizeof(void *) +#endif /* Weak atomic operations prevent the compiler moving other * loads/stores past the atomic operation load/store. However there is @@ -104,7 +122,7 @@ #define atomic_read(ptr) \ ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ atomic_read__nocheck(ptr); \ }) @@ -112,7 +130,7 @@ __atomic_store_n(ptr, i, __ATOMIC_RELAXED) #define atomic_set(ptr, i) do { \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ atomic_set__nocheck(ptr, i); \ } while(0) @@ -130,27 +148,27 @@ #define atomic_rcu_read(ptr) \ ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ typeof_strip_qual(*ptr) _val; \ atomic_rcu_read__nocheck(ptr, &_val); \ _val; \ }) #define atomic_rcu_set(ptr, i) do { \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ } while(0) #define atomic_load_acquire(ptr) \ ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ typeof_strip_qual(*ptr) _val; \ __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \ _val; \ }) #define atomic_store_release(ptr, i) do { \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ } while(0) @@ -162,7 +180,7 @@ }) #define atomic_xchg(ptr, i) ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ atomic_xchg__nocheck(ptr, i); \ }) @@ -175,7 +193,7 @@ }) #define atomic_cmpxchg(ptr, old, new) ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ atomic_cmpxchg__nocheck(ptr, old, new); \ }) diff --git a/target/alpha/translate.c b/target/alpha/translate.c index df5d695344..7c45ae360c 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -89,6 +89,9 @@ typedef enum { updated the PC for the next instruction to be executed. */ EXIT_PC_STALE, + /* We are exiting the TB due to page crossing or space constraints. */ + EXIT_FALLTHRU, + /* We are ending the TB with a noreturn function call, e.g. longjmp. No following code will be executed. */ EXIT_NORETURN, @@ -1157,6 +1160,7 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode) #ifndef CONFIG_USER_ONLY /* Privileged PAL code */ if (palcode < 0x40 && (ctx->tb->flags & TB_FLAGS_USER_MODE) == 0) { + TCGv tmp; switch (palcode) { case 0x01: /* CFLUSH */ @@ -1182,10 +1186,8 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode) offsetof(CPUAlphaState, sysval)); break; - case 0x35: { + case 0x35: /* SWPIPL */ - TCGv tmp; - /* Note that we already know we're in kernel mode, so we know that PS only contains the 3 IPL bits. */ tcg_gen_ld8u_i64(ctx->ir[IR_V0], cpu_env, @@ -1197,7 +1199,6 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode) tcg_gen_st8_i64(tmp, cpu_env, offsetof(CPUAlphaState, ps)); tcg_temp_free(tmp); break; - } case 0x36: /* RDPS */ @@ -1220,6 +1221,14 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode) -offsetof(AlphaCPU, env) + offsetof(CPUState, cpu_index)); break; + case 0x3E: + /* WTINT */ + tmp = tcg_const_i64(1); + tcg_gen_st32_i64(tmp, cpu_env, -offsetof(AlphaCPU, env) + + offsetof(CPUState, halted)); + tcg_gen_movi_i64(ctx->ir[IR_V0], 0); + return gen_excp(ctx, EXCP_HALTED, 0); + default: palcode &= 0x3f; goto do_call_pal; @@ -1369,7 +1378,7 @@ static ExitStatus gen_mtpr(DisasContext *ctx, TCGv vb, int regno) tmp = tcg_const_i64(1); tcg_gen_st32_i64(tmp, cpu_env, -offsetof(AlphaCPU, env) + offsetof(CPUState, halted)); - return gen_excp(ctx, EXCP_HLT, 0); + return gen_excp(ctx, EXCP_HALTED, 0); case 252: /* HALT */ @@ -2978,7 +2987,7 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) || num_insns >= max_insns || singlestep || ctx.singlestep_enabled)) { - ret = EXIT_PC_STALE; + ret = EXIT_FALLTHRU; } } while (ret == NO_EXIT); @@ -2990,6 +2999,13 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) case EXIT_GOTO_TB: case EXIT_NORETURN: break; + case EXIT_FALLTHRU: + if (use_goto_tb(&ctx, ctx.pc)) { + tcg_gen_goto_tb(0); + tcg_gen_movi_i64(cpu_pc, ctx.pc); + tcg_gen_exit_tb((uintptr_t)ctx.tb); + } + /* FALLTHRU */ case EXIT_PC_STALE: tcg_gen_movi_i64(cpu_pc, ctx.pc); /* FALLTHRU */ @@ -3001,7 +3017,7 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) } break; default: - abort(); + g_assert_not_reached(); } gen_tb_end(tb, num_insns); diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index a82ab49c94..860e279658 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -379,7 +379,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) } else if (s->singlestep_enabled) { gen_exception_internal(EXCP_DEBUG); } else { - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_pc); s->is_jmp = DISAS_TB_JUMP; } } @@ -11367,8 +11367,7 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) gen_a64_set_pc_im(dc->pc); /* fall through */ case DISAS_JUMP: - /* indicate that the hash table must be used to find the next TB */ - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_pc); break; case DISAS_TB_JUMP: case DISAS_EXC: diff --git a/target/arm/translate.c b/target/arm/translate.c index ae6646c05b..0862f9e4aa 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1182,7 +1182,7 @@ static void gen_exception_internal_insn(DisasContext *s, int offset, int excp) gen_set_condexec(s); gen_set_pc_im(s, s->pc - offset); gen_exception_internal(excp); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXC; } static void gen_exception_insn(DisasContext *s, int offset, int excp, @@ -1191,14 +1191,14 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp, gen_set_condexec(s); gen_set_pc_im(s, s->pc - offset); gen_exception(excp, syn, target_el); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXC; } /* Force a TB lookup after an instruction that changes the CPU state. */ static inline void gen_lookup_tb(DisasContext *s) { tcg_gen_movi_i32(cpu_R[15], s->pc & ~1); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXIT; } static inline void gen_hlt(DisasContext *s, int imm) @@ -4150,7 +4150,15 @@ static inline bool use_goto_tb(DisasContext *s, target_ulong dest) #endif } -static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) +static void gen_goto_ptr(void) +{ + TCGv addr = tcg_temp_new(); + tcg_gen_extu_i32_tl(addr, cpu_R[15]); + tcg_gen_lookup_and_goto_ptr(addr); + tcg_temp_free(addr); +} + +static void gen_goto_tb(DisasContext *s, int n, target_ulong dest) { if (use_goto_tb(s, dest)) { tcg_gen_goto_tb(n); @@ -4158,7 +4166,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) tcg_gen_exit_tb((uintptr_t)s->tb + n); } else { gen_set_pc_im(s, dest); - tcg_gen_exit_tb(0); + gen_goto_ptr(); } } @@ -12091,11 +12099,14 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) gen_set_pc_im(dc, dc->pc); /* fall through */ case DISAS_JUMP: + gen_goto_ptr(); + break; default: /* indicate that the hash table must be used to find the next TB */ tcg_gen_exit_tb(0); break; case DISAS_TB_JUMP: + case DISAS_EXC: /* nothing more to generate */ break; case DISAS_WFI: diff --git a/target/arm/translate.h b/target/arm/translate.h index 6b2cc34c33..15d383d9af 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -139,6 +139,10 @@ static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * custom end-of-TB code) */ #define DISAS_BX_EXCRET 11 +/* For instructions which want an immediate exit to the main loop, + * as opposed to attempting to use lookup_and_goto_ptr. + */ +#define DISAS_EXIT 12 #ifdef TARGET_AARCH64 void a64_translate_init(void); diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 9e8c233501..e10abc5e04 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -517,7 +517,7 @@ static void gen_goto_tb(DisasContext *ctx, int which, if (ctx->singlestep_enabled) { gen_excp_1(EXCP_DEBUG); } else { - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); } } } @@ -1510,7 +1510,7 @@ static ExitStatus do_ibranch(DisasContext *ctx, TCGv dest, } else if (is_n && use_nullify_skip(ctx)) { /* The (conditional) branch, B, nullifies the next insn, N, and we're allowed to skip execution N (no single-step or - tracepoint in effect). Since the exit_tb that we must use + tracepoint in effect). Since the goto_ptr that we must use for the indirect branch consumes no special resources, we can (conditionally) skip B and continue execution. */ /* The use_nullify_skip test implies we have a known control path. */ @@ -1527,7 +1527,7 @@ static ExitStatus do_ibranch(DisasContext *ctx, TCGv dest, if (link != 0) { tcg_gen_movi_tl(cpu_gr[link], ctx->iaoq_n); } - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); return nullify_end(ctx, NO_EXIT); } else { cond_prep(&ctx->null_cond); @@ -3885,7 +3885,7 @@ void gen_intermediate_code(CPUHPPAState *env, struct TranslationBlock *tb) if (ctx.singlestep_enabled) { gen_excp_1(EXCP_DEBUG); } else { - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); } break; default: diff --git a/target/i386/translate.c b/target/i386/translate.c index 1d1372fb43..674ec96d5a 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -141,6 +141,7 @@ typedef struct DisasContext { } DisasContext; static void gen_eob(DisasContext *s); +static void gen_jr(DisasContext *s, TCGv dest); static void gen_jmp(DisasContext *s, target_ulong eip); static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num); static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d); @@ -2153,9 +2154,9 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) gen_jmp_im(eip); tcg_gen_exit_tb((uintptr_t)s->tb + tb_num); } else { - /* jump to another page: currently not optimized */ + /* jump to another page */ gen_jmp_im(eip); - gen_eob(s); + gen_jr(s, cpu_tmp0); } } @@ -2509,7 +2510,8 @@ static void gen_bnd_jmp(DisasContext *s) If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of S->TF. This is used by the syscall/sysret insns. */ -static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) +static void +do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, TCGv jr) { gen_update_cc_op(s); @@ -2530,12 +2532,27 @@ static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) tcg_gen_exit_tb(0); } else if (s->tf) { gen_helper_single_step(cpu_env); + } else if (!TCGV_IS_UNUSED(jr)) { + TCGv vaddr = tcg_temp_new(); + + tcg_gen_add_tl(vaddr, jr, cpu_seg_base[R_CS]); + tcg_gen_lookup_and_goto_ptr(vaddr); + tcg_temp_free(vaddr); } else { tcg_gen_exit_tb(0); } s->is_jmp = DISAS_TB_JUMP; } +static inline void +gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) +{ + TCGv unused; + + TCGV_UNUSED(unused); + do_gen_eob_worker(s, inhibit, recheck_tf, unused); +} + /* End of block. If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */ static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) @@ -2549,6 +2566,12 @@ static void gen_eob(DisasContext *s) gen_eob_worker(s, false, false); } +/* Jump to register */ +static void gen_jr(DisasContext *s, TCGv dest) +{ + do_gen_eob_worker(s, false, false, dest); +} + /* generate a jump to eip. No segment change must happen before as a direct call to the next block may occur */ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) @@ -4973,7 +4996,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_push_v(s, cpu_T1); gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 3: /* lcall Ev */ gen_op_ld_v(s, ot, cpu_T1, cpu_A0); @@ -4991,7 +5014,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_const_i32(dflag - 1), tcg_const_i32(s->pc - s->cs_base)); } - gen_eob(s); + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); + gen_jr(s, cpu_tmp4); break; case 4: /* jmp Ev */ if (dflag == MO_16) { @@ -4999,7 +5023,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 5: /* ljmp Ev */ gen_op_ld_v(s, ot, cpu_T1, cpu_A0); @@ -5014,7 +5038,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_movl_seg_T0_vm(R_CS); gen_op_jmp_v(cpu_T1); } - gen_eob(s); + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); + gen_jr(s, cpu_tmp4); break; case 6: /* push Ev */ gen_push_v(s, cpu_T0); @@ -6394,7 +6419,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 0xc3: /* ret */ ot = gen_pop_T0(s); @@ -6402,7 +6427,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 0xca: /* lret im */ val = cpu_ldsw_code(env, s->pc); diff --git a/target/mips/translate.c b/target/mips/translate.c index 3022f349cb..559f8fed89 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -4233,7 +4233,7 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) save_cpu_state(ctx, 0); gen_helper_raise_exception_debug(cpu_env); } - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_PC); } } @@ -10725,7 +10725,7 @@ static void gen_branch(DisasContext *ctx, int insn_bytes) save_cpu_state(ctx, 0); gen_helper_raise_exception_debug(cpu_env); } - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_PC); break; default: fprintf(stderr, "unknown branch 0x%x\n", proc_hflags); diff --git a/target/nios2/translate.c b/target/nios2/translate.c index cfec47959d..2f3c2e5dfb 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -164,7 +164,7 @@ static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest) if (use_goto_tb(dc, dest)) { tcg_gen_goto_tb(n); tcg_gen_movi_tl(dc->cpu_R[R_PC], dest); - tcg_gen_exit_tb((tcg_target_long)tb + n); + tcg_gen_exit_tb((uintptr_t)tb + n); } else { tcg_gen_movi_tl(dc->cpu_R[R_PC], dest); tcg_gen_exit_tb(0); diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 4c48c593cd..628fb8685d 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -608,11 +608,16 @@ static void gen_op_calc_cc(DisasContext *s) set_cc_static(s); } -static int use_goto_tb(DisasContext *s, uint64_t dest) +static bool use_exit_tb(DisasContext *s) { - if (unlikely(s->singlestep_enabled) || - (s->tb->cflags & CF_LAST_IO) || - (s->tb->flags & FLAG_MASK_PER)) { + return (s->singlestep_enabled || + (s->tb->cflags & CF_LAST_IO) || + (s->tb->flags & FLAG_MASK_PER)); +} + +static bool use_goto_tb(DisasContext *s, uint64_t dest) +{ + if (unlikely(use_exit_tb(s))) { return false; } #ifndef CONFIG_USER_ONLY @@ -5461,8 +5466,10 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb) /* Exit the TB, either by raising a debug exception or by return. */ if (do_debug) { gen_exception(EXCP_DEBUG); - } else { + } else if (use_exit_tb(&dc)) { tcg_gen_exit_tb(0); + } else { + tcg_gen_lookup_and_goto_ptr(psw_addr); } break; default: diff --git a/tcg-runtime.c b/tcg-runtime.c index 4c60c96658..7fa90ce508 100644 --- a/tcg-runtime.c +++ b/tcg-runtime.c @@ -27,6 +27,9 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" +#include "exec/tb-hash.h" +#include "disas/disas.h" +#include "exec/log.h" /* 32-bit helpers */ @@ -141,6 +144,35 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) return ctpop64(arg); } +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) +{ + CPUState *cpu = ENV_GET_CPU(env); + TranslationBlock *tb; + target_ulong cs_base, pc; + uint32_t flags; + + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); + if (likely(tb)) { + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + if (likely(tb->pc == addr && tb->cs_base == cs_base && + tb->flags == flags)) { + goto found; + } + tb = tb_htable_lookup(cpu, addr, cs_base, flags); + if (likely(tb)) { + atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb); + goto found; + } + } + return tcg_ctx.code_gen_epilogue; + found: + qemu_log_mask_and_addr(CPU_LOG_EXEC, addr, + "Chain %p [%d: " TARGET_FMT_lx "] %s\n", + tb->tc_ptr, cpu->cpu_index, addr, + lookup_symbol(addr)); + return tb->tc_ptr; +} + void HELPER(exit_atomic)(CPUArchState *env) { cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); diff --git a/tcg/README b/tcg/README index a9858c2f74..bf49e8242b 100644 --- a/tcg/README +++ b/tcg/README @@ -477,6 +477,14 @@ current TB was linked to this TB. Otherwise execute the next instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued at most once with each slot index per TB. +* lookup_and_goto_ptr tb_addr + +Look up a TB address ('tb_addr') and jump to it if valid. If not valid, +jump to the TCG epilogue to go back to the exec loop. + +This operation is optional. If the TCG backend does not implement the +goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). + * qemu_ld_i32/i64 t0, t1, flags, memidx * qemu_st_i32/i64 t0, t1, flags, memidx diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 1a5ea23844..55a46ac825 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -77,6 +77,7 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 290de6dae6..5f185458f1 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -1357,8 +1357,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); - tcg_out_goto(s, tb_ret_addr); + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_goto(s, s->code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); + tcg_out_goto(s, tb_ret_addr); + } break; case INDEX_op_goto_tb: @@ -1374,6 +1379,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out_insn(s, 3207, BR, a0); + break; + case INDEX_op_br: tcg_out_goto_label(s, arg_label(a0)); break; @@ -1735,6 +1744,7 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -1942,6 +1952,14 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); + + /* TB epilogue */ tb_ret_addr = s->code_ptr; /* Remove TCG locals stack space. */ diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 75ea247bc4..5ef1086710 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -123,6 +123,7 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 +#define TCG_TARGET_HAS_goto_ptr 1 enum { TCG_AREG0 = TCG_REG_R6, diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index e75a6d4943..9f5cb66718 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -329,11 +329,6 @@ static const uint8_t tcg_cond_to_arm_cond[] = { [TCG_COND_GTU] = COND_HI, }; -static inline void tcg_out_bx(TCGContext *s, int cond, int rn) -{ - tcg_out32(s, (cond << 28) | 0x012fff10 | rn); -} - static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) { tcg_out32(s, (cond << 28) | 0x0a000000 | @@ -402,6 +397,18 @@ static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) } } +static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn) +{ + /* Unless the C portion of QEMU is compiled as thumb, we don't + actually need true BX semantics; merely a branch to an address + held in a register. */ + if (use_armv5t_instructions) { + tcg_out32(s, (cond << 28) | 0x012fff10 | rn); + } else { + tcg_out_mov_reg(s, cond, TCG_REG_PC, rn); + } +} + static inline void tcg_out_dat_imm(TCGContext *s, int cond, int opc, int rd, int rn, int im) { @@ -977,7 +984,7 @@ static inline void tcg_out_st8(TCGContext *s, int cond, * with the code buffer limited to 16MB we wouldn't need the long case. * But we also use it for the tail-call to the qemu_ld/st helpers, which does. */ -static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) +static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) { intptr_t addri = (intptr_t)addr; ptrdiff_t disp = tcg_pcrel_diff(s, addr); @@ -987,15 +994,9 @@ static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) return; } + assert(use_armv5t_instructions || (addri & 1) == 0); tcg_out_movi32(s, cond, TCG_REG_TMP, addri); - if (use_armv5t_instructions) { - tcg_out_bx(s, cond, TCG_REG_TMP); - } else { - if (addri & 1) { - tcg_abort(); - } - tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); - } + tcg_out_bx(s, cond, TCG_REG_TMP); } /* The call case is mostly used for helpers - so it's not unreasonable @@ -1654,8 +1655,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); - tcg_out_goto(s, COND_AL, tb_ret_addr); + /* Reuse the zeroing that exists for goto_ptr. */ + a0 = args[0]; + if (a0 == 0) { + tcg_out_goto(s, COND_AL, s->code_gen_epilogue); + } else { + tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); + tcg_out_goto(s, COND_AL, tb_ret_addr); + } break; case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { @@ -1670,6 +1677,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out_bx(s, COND_AL, args[0]); + break; case INDEX_op_br: tcg_out_goto_label(s, COND_AL, arg_label(args[0])); break; @@ -1960,6 +1970,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -2135,9 +2146,16 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); - tb_ret_addr = s->code_ptr; - /* Epilogue. We branch here via tb_ret_addr. */ + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0); + + /* TB epilogue */ + tb_ret_addr = s->code_ptr; tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, TCG_REG_CALL_STACK, stack_addend, 1); diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 4275787db9..73a15f7e80 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -107,6 +107,7 @@ extern bool have_popcnt; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_goto_ptr 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extrl_i64_i32 0 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 5918008296..01e3b4e95c 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1882,8 +1882,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); - tcg_out_jmp(s, tb_ret_addr); + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_jmp(s, s->code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); + tcg_out_jmp(s, tb_ret_addr); + } break; case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { @@ -1906,6 +1911,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + /* jmp to the given host address (could be epilogue) */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); + break; case INDEX_op_br: tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); break; @@ -2277,6 +2286,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) { + static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } }; static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } }; static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } }; @@ -2299,6 +2309,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "L", "L", "L", "L" } }; switch (op) { + case INDEX_op_goto_ptr: + return &r; + case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i32: @@ -2567,6 +2580,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); #endif + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0); + /* TB epilogue */ tb_ret_addr = s->code_ptr; diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 42aea03a8b..901bb7575d 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -173,6 +173,7 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index f46d64a3a7..d75cb63ed3 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -130,6 +130,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_goto_ptr 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index 2a7e1c7f5b..8cff9a6bf9 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -1747,6 +1747,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_nop(s); s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + /* jmp to the given host address (could be epilogue) */ + tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); + tcg_out_nop(s); + break; case INDEX_op_br: tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, arg_label(a0)); @@ -2160,6 +2165,7 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -2451,6 +2457,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* delay slot */ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO); + /* TB epilogue */ tb_ret_addr = s->code_ptr; for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index abd8b3d6cd..5f4a40a5b4 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -82,6 +82,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 +#define TCG_TARGET_HAS_goto_ptr 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 64f67d2c77..8d50f18328 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -1932,6 +1932,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* Epilogue */ tcg_debug_assert(tb_ret_addr == s->code_ptr); + s->code_gen_epilogue = tb_ret_addr; tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { @@ -1986,6 +1987,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, #endif s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out32(s, MTSPR | RS(args[0]) | CTR); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0); + tcg_out32(s, BCCTR | BO_ALWAYS); + break; case INDEX_op_br: { TCGLabel *l = arg_label(args[0]); @@ -2555,6 +2561,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index cbdd2a6275..957f0c0afe 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -92,6 +92,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index a679280b92..5d7083e90c 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -1741,9 +1741,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - /* return value */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); - tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); + /* Reuse the zeroing that exists for goto_ptr. */ + a0 = args[0]; + if (a0 == 0) { + tgen_gotoi(s, S390_CC_ALWAYS, s->code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0); + tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); + } break; case INDEX_op_goto_tb: @@ -1767,6 +1772,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, args[0]); + break; + OP_32_64(ld8u): /* ??? LLC (RXY format) is only present with the extended-immediate facility, whereas LLGC is always present. */ @@ -2241,6 +2250,7 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -2439,6 +2449,14 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* br %r3 (go to TB) */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]); + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0); + + /* TB epilogue */ tb_ret_addr = s->code_ptr; /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index b8b74f96ff..854a0afd70 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -123,6 +123,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_extrl_i64_i32 1 #define TCG_TARGET_HAS_extrh_i64_i32 1 diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index 3785d77f62..18afce2f87 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -1003,7 +1003,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* delay slot */ tcg_out_nop(s); - /* No epilogue required. We issue ret + restore directly in the TB. */ + /* Epilogue for goto_ptr. */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + /* delay slot */ + tcg_out_movi_imm13(s, TCG_REG_O0, 0); #ifdef CONFIG_SOFTMMU build_trampolines(s); @@ -1288,6 +1292,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_nop(s); s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); + tcg_out_nop(s); + break; case INDEX_op_br: tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); tcg_out_nop(s); @@ -1513,6 +1521,7 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 6b1f41500c..87f673ef49 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2587,6 +2587,18 @@ void tcg_gen_goto_tb(unsigned idx) tcg_gen_op1i(INDEX_op_goto_tb, idx); } +void tcg_gen_lookup_and_goto_ptr(TCGv addr) +{ + if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + TCGv_ptr ptr = tcg_temp_new_ptr(); + gen_helper_lookup_tb_ptr(ptr, tcg_ctx.tcg_env, addr); + tcg_gen_op1i(INDEX_op_goto_ptr, GET_TCGV_PTR(ptr)); + tcg_temp_free_ptr(ptr); + } else { + tcg_gen_exit_tb(0); + } +} + static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) { /* Trigger the asserts within as early as possible. */ diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index c68e300a68..5d3278f243 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -796,6 +796,17 @@ static inline void tcg_gen_exit_tb(uintptr_t val) */ void tcg_gen_goto_tb(unsigned idx); +/** + * tcg_gen_lookup_and_goto_ptr() - look up a TB and jump to it if valid + * @addr: Guest address of the target TB + * + * If the TB is not valid, jump to the epilogue. + * + * This operation is optional. If the TCG backend does not implement goto_ptr, + * this op is equivalent to calling tcg_gen_exit_tb() with 0 as the argument. + */ +void tcg_gen_lookup_and_goto_ptr(TCGv addr); + #if TARGET_LONG_BITS == 32 #define tcg_temp_new() tcg_temp_new_i32() #define tcg_global_reg_new tcg_global_reg_new_i32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index f06f89405e..956fb1e9f3 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -193,6 +193,7 @@ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) +DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr)) DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index 114ea6fecf..c41d38a557 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) + DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) #ifdef CONFIG_SOFTMMU diff --git a/tcg/tcg.c b/tcg/tcg.c index cb898f1636..564292f54d 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -424,6 +424,11 @@ void tcg_prologue_init(TCGContext *s) qemu_log_unlock(); } #endif + + /* Assert that goto_ptr is implemented completely. */ + if (TCG_TARGET_HAS_goto_ptr) { + tcg_debug_assert(s->code_gen_epilogue != NULL); + } } void tcg_func_start(TCGContext *s) diff --git a/tcg/tcg.h b/tcg/tcg.h index 6c216bb73f..5ec48d1787 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -699,6 +699,7 @@ struct TCGContext { extension that allows arithmetic on void*. */ int code_gen_max_blocks; void *code_gen_prologue; + void *code_gen_epilogue; void *code_gen_buffer; size_t code_gen_buffer_size; void *code_gen_ptr; diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 838bf3a858..06963288dc 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -85,6 +85,7 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extrl_i64_i32 0