From 1639a965d30b45b10134b69bf49dd3e657d2ef09 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2017 10:37:42 -0700 Subject: [PATCH 01/26] target/nios2: Fix 64-bit ilp32 compilation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid a "cast from pointer to integer of different size" warning by using the proper host type. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- target/nios2/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/nios2/translate.c b/target/nios2/translate.c index cfec47959d..2f3c2e5dfb 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -164,7 +164,7 @@ static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest) if (use_goto_tb(dc, dest)) { tcg_gen_goto_tb(n); tcg_gen_movi_tl(dc->cpu_R[R_PC], dest); - tcg_gen_exit_tb((tcg_target_long)tb + n); + tcg_gen_exit_tb((uintptr_t)tb + n); } else { tcg_gen_movi_tl(dc->cpu_R[R_PC], dest); tcg_gen_exit_tb(0); From f1079bb8f9a28495937408acc60a681a7b2536a8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2017 10:39:08 -0700 Subject: [PATCH 02/26] tcg/sparc: Use the proper compilation flags for 32-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have required a v9 cpu since 9b9c37c36439ee0452632253dac7a31897f27f70. However, the flags we were using did not reliably enable v8plus, which meant that the compiler didn't know it could inline 64-bit atomics. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- configure | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configure b/configure index fbb6a93c99..13e040d28c 100755 --- a/configure +++ b/configure @@ -1213,12 +1213,12 @@ case "$cpu" in LDFLAGS="-m64 $LDFLAGS" ;; sparc) - LDFLAGS="-m32 $LDFLAGS" - CPU_CFLAGS="-m32 -mcpu=ultrasparc" + CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" + LDFLAGS="-m32 -mv8plus $LDFLAGS" ;; sparc64) - LDFLAGS="-m64 $LDFLAGS" CPU_CFLAGS="-m64 -mcpu=ultrasparc" + LDFLAGS="-m64 $LDFLAGS" ;; s390) CPU_CFLAGS="-m31" From 374aae653499f4d405caf32b7fff0c8639113fe4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2017 10:41:55 -0700 Subject: [PATCH 03/26] qemu/atomic: Loosen restrictions for 64-bit ILP32 hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to coordinate with the TCG_OVERSIZED_GUEST test in cputlb.c, and allow 64-bit atomics even though sizeof(void *) == 4. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- include/qemu/atomic.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 878fa0700d..e07c7972ab 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -88,6 +88,24 @@ #define smp_read_barrier_depends() barrier() #endif +/* Sanity check that the size of an atomic operation isn't "overly large". + * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not + * want to use them because we ought not need them, and this lets us do a + * bit of sanity checking that other 32-bit hosts might build. + * + * That said, we have a problem on 64-bit ILP32 hosts in that in order to + * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS. + * We'd prefer not want to pull in everything else TCG related, so handle + * those few cases by hand. + * + * Note that x32 is fully detected with __x64_64__ + _ILP32, and that for + * Sparc we always force the use of sparcv9 in configure. + */ +#if defined(__x86_64__) || defined(__sparc__) +# define ATOMIC_REG_SIZE 8 +#else +# define ATOMIC_REG_SIZE sizeof(void *) +#endif /* Weak atomic operations prevent the compiler moving other * loads/stores past the atomic operation load/store. However there is @@ -104,7 +122,7 @@ #define atomic_read(ptr) \ ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ atomic_read__nocheck(ptr); \ }) @@ -112,7 +130,7 @@ __atomic_store_n(ptr, i, __ATOMIC_RELAXED) #define atomic_set(ptr, i) do { \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ atomic_set__nocheck(ptr, i); \ } while(0) @@ -130,27 +148,27 @@ #define atomic_rcu_read(ptr) \ ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ typeof_strip_qual(*ptr) _val; \ atomic_rcu_read__nocheck(ptr, &_val); \ _val; \ }) #define atomic_rcu_set(ptr, i) do { \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ } while(0) #define atomic_load_acquire(ptr) \ ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ typeof_strip_qual(*ptr) _val; \ __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \ _val; \ }) #define atomic_store_release(ptr, i) do { \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ } while(0) @@ -162,7 +180,7 @@ }) #define atomic_xchg(ptr, i) ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ atomic_xchg__nocheck(ptr, i); \ }) @@ -175,7 +193,7 @@ }) #define atomic_cmpxchg(ptr, old, new) ({ \ - QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ + QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \ atomic_cmpxchg__nocheck(ptr, old, new); \ }) From cedbcb01529cb6cf9a2289cdbebbc63f6149fc18 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 26 Apr 2017 23:29:14 -0400 Subject: [PATCH 04/26] tcg: Introduce goto_ptr opcode and tcg_gen_lookup_and_goto_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of exporting goto_ptr directly to TCG frontends, export tcg_gen_lookup_and_goto_ptr(), which calls goto_ptr with the pointer returned by the lookup_tb_ptr() helper. This is the only use case we have for goto_ptr and lookup_tb_ptr, so having this function is very convenient. Furthermore, it trivially allows us to avoid calling the lookup helper if goto_ptr is not implemented by the backend. Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Message-Id: <1493263764-18657-2-git-send-email-cota@braap.org> Message-Id: <1493263764-18657-3-git-send-email-cota@braap.org> Message-Id: <1493263764-18657-4-git-send-email-cota@braap.org> Message-Id: <1493263764-18657-5-git-send-email-cota@braap.org> [rth: Squashed 4 related commits.] Signed-off-by: Richard Henderson --- cpu-exec.c | 6 ++---- include/exec/exec-all.h | 2 ++ tcg-runtime.c | 32 ++++++++++++++++++++++++++++++++ tcg/README | 8 ++++++++ tcg/aarch64/tcg-target.h | 1 + tcg/arm/tcg-target.h | 1 + tcg/i386/tcg-target.h | 1 + tcg/ia64/tcg-target.h | 1 + tcg/mips/tcg-target.h | 1 + tcg/ppc/tcg-target.h | 1 + tcg/s390/tcg-target.h | 1 + tcg/sparc/tcg-target.h | 1 + tcg/tcg-op.c | 12 ++++++++++++ tcg/tcg-op.h | 11 +++++++++++ tcg/tcg-opc.h | 1 + tcg/tcg-runtime.h | 2 ++ tcg/tcg.c | 5 +++++ tcg/tcg.h | 1 + tcg/tci/tcg-target.h | 1 + 19 files changed, 85 insertions(+), 4 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 63a56d0407..5b181c18ed 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -309,10 +309,8 @@ static bool tb_cmp(const void *p, const void *d) return false; } -static TranslationBlock *tb_htable_lookup(CPUState *cpu, - target_ulong pc, - target_ulong cs_base, - uint32_t flags) +TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags) { tb_page_addr_t phys_pc; struct tb_desc desc; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index bcde1e6a14..87ae10bcc9 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -368,6 +368,8 @@ struct TranslationBlock { void tb_free(TranslationBlock *tb); void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); +TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, uint32_t flags); #if defined(USE_DIRECT_JUMP) diff --git a/tcg-runtime.c b/tcg-runtime.c index 4c60c96658..7fa90ce508 100644 --- a/tcg-runtime.c +++ b/tcg-runtime.c @@ -27,6 +27,9 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" +#include "exec/tb-hash.h" +#include "disas/disas.h" +#include "exec/log.h" /* 32-bit helpers */ @@ -141,6 +144,35 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) return ctpop64(arg); } +void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) +{ + CPUState *cpu = ENV_GET_CPU(env); + TranslationBlock *tb; + target_ulong cs_base, pc; + uint32_t flags; + + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]); + if (likely(tb)) { + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + if (likely(tb->pc == addr && tb->cs_base == cs_base && + tb->flags == flags)) { + goto found; + } + tb = tb_htable_lookup(cpu, addr, cs_base, flags); + if (likely(tb)) { + atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb); + goto found; + } + } + return tcg_ctx.code_gen_epilogue; + found: + qemu_log_mask_and_addr(CPU_LOG_EXEC, addr, + "Chain %p [%d: " TARGET_FMT_lx "] %s\n", + tb->tc_ptr, cpu->cpu_index, addr, + lookup_symbol(addr)); + return tb->tc_ptr; +} + void HELPER(exit_atomic)(CPUArchState *env) { cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); diff --git a/tcg/README b/tcg/README index a9858c2f74..bf49e8242b 100644 --- a/tcg/README +++ b/tcg/README @@ -477,6 +477,14 @@ current TB was linked to this TB. Otherwise execute the next instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued at most once with each slot index per TB. +* lookup_and_goto_ptr tb_addr + +Look up a TB address ('tb_addr') and jump to it if valid. If not valid, +jump to the TCG epilogue to go back to the exec loop. + +This operation is optional. If the TCG backend does not implement the +goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). + * qemu_ld_i32/i64 t0, t1, flags, memidx * qemu_st_i32/i64 t0, t1, flags, memidx diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 1a5ea23844..b82eac42ae 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -77,6 +77,7 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 75ea247bc4..c114df7ed5 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -123,6 +123,7 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 enum { TCG_AREG0 = TCG_REG_R6, diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 4275787db9..59d983525c 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -107,6 +107,7 @@ extern bool have_popcnt; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extrl_i64_i32 0 diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 42aea03a8b..901bb7575d 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -173,6 +173,7 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index f46d64a3a7..e3240cfba7 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -130,6 +130,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_goto_ptr 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index abd8b3d6cd..a9aa974f77 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -82,6 +82,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 +#define TCG_TARGET_HAS_goto_ptr 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index cbdd2a6275..6b7bcfbdf7 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -92,6 +92,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index b8b74f96ff..9348ddd046 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -123,6 +123,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 #define TCG_TARGET_HAS_extrl_i64_i32 1 #define TCG_TARGET_HAS_extrh_i64_i32 1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 6b1f41500c..87f673ef49 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2587,6 +2587,18 @@ void tcg_gen_goto_tb(unsigned idx) tcg_gen_op1i(INDEX_op_goto_tb, idx); } +void tcg_gen_lookup_and_goto_ptr(TCGv addr) +{ + if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + TCGv_ptr ptr = tcg_temp_new_ptr(); + gen_helper_lookup_tb_ptr(ptr, tcg_ctx.tcg_env, addr); + tcg_gen_op1i(INDEX_op_goto_ptr, GET_TCGV_PTR(ptr)); + tcg_temp_free_ptr(ptr); + } else { + tcg_gen_exit_tb(0); + } +} + static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) { /* Trigger the asserts within as early as possible. */ diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index c68e300a68..5d3278f243 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -796,6 +796,17 @@ static inline void tcg_gen_exit_tb(uintptr_t val) */ void tcg_gen_goto_tb(unsigned idx); +/** + * tcg_gen_lookup_and_goto_ptr() - look up a TB and jump to it if valid + * @addr: Guest address of the target TB + * + * If the TB is not valid, jump to the epilogue. + * + * This operation is optional. If the TCG backend does not implement goto_ptr, + * this op is equivalent to calling tcg_gen_exit_tb() with 0 as the argument. + */ +void tcg_gen_lookup_and_goto_ptr(TCGv addr); + #if TARGET_LONG_BITS == 32 #define tcg_temp_new() tcg_temp_new_i32() #define tcg_global_reg_new tcg_global_reg_new_i32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index f06f89405e..956fb1e9f3 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -193,6 +193,7 @@ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) +DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr)) DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index 114ea6fecf..c41d38a557 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) + DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) #ifdef CONFIG_SOFTMMU diff --git a/tcg/tcg.c b/tcg/tcg.c index cb898f1636..564292f54d 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -424,6 +424,11 @@ void tcg_prologue_init(TCGContext *s) qemu_log_unlock(); } #endif + + /* Assert that goto_ptr is implemented completely. */ + if (TCG_TARGET_HAS_goto_ptr) { + tcg_debug_assert(s->code_gen_epilogue != NULL); + } } void tcg_func_start(TCGContext *s) diff --git a/tcg/tcg.h b/tcg/tcg.h index 6c216bb73f..5ec48d1787 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -699,6 +699,7 @@ struct TCGContext { extension that allows arithmetic on void*. */ int code_gen_max_blocks; void *code_gen_prologue; + void *code_gen_epilogue; void *code_gen_buffer; size_t code_gen_buffer_size; void *code_gen_ptr; diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 838bf3a858..06963288dc 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -85,6 +85,7 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_goto_ptr 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extrl_i64_i32 0 From 5cb4ef80f65252dd85b86fa7f3c985015423d670 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 26 Apr 2017 23:29:18 -0400 Subject: [PATCH 05/26] tcg/i386: implement goto_ptr Suggested-by: Richard Henderson Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Message-Id: <1493263764-18657-6-git-send-email-cota@braap.org> [rth: Reuse goto_ptr epilogue for exit_tb 0.] Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.h | 2 +- tcg/i386/tcg-target.inc.c | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 59d983525c..73a15f7e80 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -107,7 +107,7 @@ extern bool have_popcnt; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_goto_ptr 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extrl_i64_i32 0 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 5918008296..01e3b4e95c 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1882,8 +1882,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); - tcg_out_jmp(s, tb_ret_addr); + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_jmp(s, s->code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); + tcg_out_jmp(s, tb_ret_addr); + } break; case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { @@ -1906,6 +1911,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + /* jmp to the given host address (could be epilogue) */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); + break; case INDEX_op_br: tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); break; @@ -2277,6 +2286,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) { + static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } }; static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } }; static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } }; @@ -2299,6 +2309,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "L", "L", "L", "L" } }; switch (op) { + case INDEX_op_goto_ptr: + return &r; + case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i32: @@ -2567,6 +2580,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); #endif + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0); + /* TB epilogue */ tb_ret_addr = s->code_ptr; From 7ad55b4ffd982c80f26f7f3658138d94cdc678e8 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 26 Apr 2017 23:29:19 -0400 Subject: [PATCH 06/26] target/arm: optimize cross-page direct jumps in softmmu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of unconditionally exiting to the exec loop, use the lookup_and_goto_ptr helper to jump to the target if it is valid. Perf impact: see next commit's log. Reviewed-by: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Message-Id: <1493263764-18657-7-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson --- target/arm/translate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index ae6646c05b..d72953f124 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -4157,8 +4157,12 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) gen_set_pc_im(s, dest); tcg_gen_exit_tb((uintptr_t)s->tb + n); } else { + TCGv addr = tcg_temp_new(); + gen_set_pc_im(s, dest); - tcg_gen_exit_tb(0); + tcg_gen_extu_i32_tl(addr, cpu_R[15]); + tcg_gen_lookup_and_goto_ptr(addr); + tcg_temp_free(addr); } } From 8a6b28c7b5104263344508df0f4bce97f22cfcaf Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 26 Apr 2017 23:29:20 -0400 Subject: [PATCH 07/26] target/arm: optimize indirect branches Speed up indirect branches by jumping to the target if it is valid. Softmmu measurements (see later commit for user-mode results): Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0. - Impact on Boot time | setup | ARM debian jessie boot+shutdown time | stddev | |--------+--------------------------------------+--------| | v2.9.0 | 8.84 | 0.07 | | +cross | 8.85 | 0.03 | | +jr | 8.83 | 0.06 | - NBench, arm-softmmu (debian jessie guest). Host: Intel i7-4790K @ 4.00GHz 1.3x +-+-------------------------------------------------------------------------------------------------------------+-+ | | | cross #### | 1.25x +cross+jr..........................................................#++#.........................................+-+ | #### # # | | +++# # # # | | +++ **** # # # | 1.2x +-+...................................####............*..*..#......#..#.........................................+-+ | **** # * * # # # #### | | * * # * * # # # # # | 1.15x +-+................................*..*..#............*..*..#......#..#.....#..#................................+-+ | * * # * * # # # # # | | * * # #### * * # # # # # | | * * # # # * * # # # # # #### | 1.1x +-+................................*..*..#......#..#..*..*..#......#..#.....#..#.........................#..#...+-+ | * * # # # * * # # # # # # # | | * * # # # * * # # # # # # # | 1.05x +-+..........................####..*..*..#......#..#..*..*..#......#..#.....#..#......+++............*****..#...+-+ | ***** # * * # # # * * # ***** # # # +++ | ****### * * # | | *+++* # * * # # # * * # *+++* # **** # *****### * * # * * # | | *****### +++#### * * # * * # ***** # * * # * * # * * # * | *++# * * # * * # | 1x +-++-+*+++*-+#++****++#++*+-+*++#+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-++-+ | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.95x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+ ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT hmean png: http://imgur.com/eOLmZNR NB. 'cross' represents the previous commit. Signed-off-by: Emilio G. Cota Message-Id: <1493263764-18657-8-git-send-email-cota@braap.org> [rth: Replace gen_jr global variable with DISAS_EXIT state.] Signed-off-by: Richard Henderson --- target/arm/translate.c | 25 ++++++++++++++++--------- target/arm/translate.h | 4 ++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index d72953f124..0862f9e4aa 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1182,7 +1182,7 @@ static void gen_exception_internal_insn(DisasContext *s, int offset, int excp) gen_set_condexec(s); gen_set_pc_im(s, s->pc - offset); gen_exception_internal(excp); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXC; } static void gen_exception_insn(DisasContext *s, int offset, int excp, @@ -1191,14 +1191,14 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp, gen_set_condexec(s); gen_set_pc_im(s, s->pc - offset); gen_exception(excp, syn, target_el); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXC; } /* Force a TB lookup after an instruction that changes the CPU state. */ static inline void gen_lookup_tb(DisasContext *s) { tcg_gen_movi_i32(cpu_R[15], s->pc & ~1); - s->is_jmp = DISAS_JUMP; + s->is_jmp = DISAS_EXIT; } static inline void gen_hlt(DisasContext *s, int imm) @@ -4150,19 +4150,23 @@ static inline bool use_goto_tb(DisasContext *s, target_ulong dest) #endif } -static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) +static void gen_goto_ptr(void) +{ + TCGv addr = tcg_temp_new(); + tcg_gen_extu_i32_tl(addr, cpu_R[15]); + tcg_gen_lookup_and_goto_ptr(addr); + tcg_temp_free(addr); +} + +static void gen_goto_tb(DisasContext *s, int n, target_ulong dest) { if (use_goto_tb(s, dest)) { tcg_gen_goto_tb(n); gen_set_pc_im(s, dest); tcg_gen_exit_tb((uintptr_t)s->tb + n); } else { - TCGv addr = tcg_temp_new(); - gen_set_pc_im(s, dest); - tcg_gen_extu_i32_tl(addr, cpu_R[15]); - tcg_gen_lookup_and_goto_ptr(addr); - tcg_temp_free(addr); + gen_goto_ptr(); } } @@ -12095,11 +12099,14 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) gen_set_pc_im(dc, dc->pc); /* fall through */ case DISAS_JUMP: + gen_goto_ptr(); + break; default: /* indicate that the hash table must be used to find the next TB */ tcg_gen_exit_tb(0); break; case DISAS_TB_JUMP: + case DISAS_EXC: /* nothing more to generate */ break; case DISAS_WFI: diff --git a/target/arm/translate.h b/target/arm/translate.h index 6b2cc34c33..15d383d9af 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -139,6 +139,10 @@ static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * custom end-of-TB code) */ #define DISAS_BX_EXCRET 11 +/* For instructions which want an immediate exit to the main loop, + * as opposed to attempting to use lookup_and_goto_ptr. + */ +#define DISAS_EXIT 12 #ifdef TARGET_AARCH64 void a64_translate_init(void); From 1ebb1af1b8068fca36f48f738eb7146ecdf03625 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 26 Apr 2017 23:29:21 -0400 Subject: [PATCH 08/26] target/i386: introduce gen_jr helper to generate lookup_and_goto_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helper will be used by subsequent changes. Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Message-Id: <1493263764-18657-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson --- target/i386/translate.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/target/i386/translate.c b/target/i386/translate.c index 1d1372fb43..f0e48dc361 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -141,6 +141,7 @@ typedef struct DisasContext { } DisasContext; static void gen_eob(DisasContext *s); +static void gen_jr(DisasContext *s, TCGv dest); static void gen_jmp(DisasContext *s, target_ulong eip); static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num); static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d); @@ -2509,7 +2510,8 @@ static void gen_bnd_jmp(DisasContext *s) If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of S->TF. This is used by the syscall/sysret insns. */ -static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) +static void +do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, TCGv jr) { gen_update_cc_op(s); @@ -2530,12 +2532,27 @@ static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) tcg_gen_exit_tb(0); } else if (s->tf) { gen_helper_single_step(cpu_env); + } else if (!TCGV_IS_UNUSED(jr)) { + TCGv vaddr = tcg_temp_new(); + + tcg_gen_add_tl(vaddr, jr, cpu_seg_base[R_CS]); + tcg_gen_lookup_and_goto_ptr(vaddr); + tcg_temp_free(vaddr); } else { tcg_gen_exit_tb(0); } s->is_jmp = DISAS_TB_JUMP; } +static inline void +gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) +{ + TCGv unused; + + TCGV_UNUSED(unused); + do_gen_eob_worker(s, inhibit, recheck_tf, unused); +} + /* End of block. If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */ static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) @@ -2549,6 +2566,12 @@ static void gen_eob(DisasContext *s) gen_eob_worker(s, false, false); } +/* Jump to register */ +static void gen_jr(DisasContext *s, TCGv dest) +{ + do_gen_eob_worker(s, false, false, dest); +} + /* generate a jump to eip. No segment change must happen before as a direct call to the next block may occur */ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) From fe62089563ffc6a42f16ff28a6b6be34d2697766 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 26 Apr 2017 23:29:22 -0400 Subject: [PATCH 09/26] target/i386: optimize cross-page direct jumps in softmmu Instead of unconditionally exiting to the exec loop, use the gen_jr helper to jump to the target if it is valid. Perf impact: see next commit's log. Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Message-Id: <1493263764-18657-10-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson --- target/i386/translate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/translate.c b/target/i386/translate.c index f0e48dc361..ea113fe5aa 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -2154,9 +2154,9 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) gen_jmp_im(eip); tcg_gen_exit_tb((uintptr_t)s->tb + tb_num); } else { - /* jump to another page: currently not optimized */ + /* jump to another page */ gen_jmp_im(eip); - gen_eob(s); + gen_jr(s, cpu_tmp0); } } From b4aa297781ceddef79deb0e99da7817551fa89f8 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 26 Apr 2017 23:29:23 -0400 Subject: [PATCH 10/26] target/i386: optimize indirect branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Speed up indirect branches by jumping to the target if it is valid. Softmmu measurements (see later commit for user-mode numbers): Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0. - SPECint06 (test set), x86_64-softmmu (Ubuntu 16.04 guest). Host: Intel i7-4790K @ 4.00GHz 2.4x +-+--------------------------------------------------------------------------------------------------------------+-+ | | | cross | 2.2x +cross+jr..........................................................................+++...........................+-+ | | | | +++ | | 2x +-+..............................................................................|..|............................+-+ | | | | | | | | 1.8x +-+..............................................................................|####...........................+-+ | |# |# | | **** |# | 1.6x +-+............................................................................*.|*.|#...........................+-+ | * |* |# | | * |* |# | 1.4x +-+.......................................................................+++..*.|*.|#...........................+-+ | ++++++ #### * |*++# +++ | | +++ | | #++# *++* # +++ | | 1.2x +-+......................###.....####....+++............|..|...........****..#.*..*..#....####...|.###.....####..+-+ | +++ **** # **** # #### ***### *++* # * * # #++# ****|# +++#++# | | ****### +++ *++* # *++* # ++# # #### *|* |# +++ * * # * * # *** # *| *|# **** # | 1x +-++-*++*++#++***###++*++*+#++*+-*++#+****++#++***++#+-*+*++#-+****##++*++*-+#+*++*-+#++*+*++#++*-+*+#++*++*++#-++-+ | * * # * * # * * # * * # * * # * * # *|* |# *++* # * * # * * # * * # * * # * * # | | * * # * * # * * # * * # * * # * * # *+*++# * * # * * # * * # * * # * * # * * # | 0.8x +-+--****###--***###--****##--****###-****###--***###--***###--****##--****###-****###--***###--****##--****###--+-+ astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean png: http://imgur.com/DU36YFU NB. 'cross' represents the previous commit. Reviewed-by: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Message-Id: <1493263764-18657-11-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson --- target/i386/translate.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/target/i386/translate.c b/target/i386/translate.c index ea113fe5aa..674ec96d5a 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -4996,7 +4996,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_push_v(s, cpu_T1); gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 3: /* lcall Ev */ gen_op_ld_v(s, ot, cpu_T1, cpu_A0); @@ -5014,7 +5014,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_const_i32(dflag - 1), tcg_const_i32(s->pc - s->cs_base)); } - gen_eob(s); + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); + gen_jr(s, cpu_tmp4); break; case 4: /* jmp Ev */ if (dflag == MO_16) { @@ -5022,7 +5023,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 5: /* ljmp Ev */ gen_op_ld_v(s, ot, cpu_T1, cpu_A0); @@ -5037,7 +5038,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_movl_seg_T0_vm(R_CS); gen_op_jmp_v(cpu_T1); } - gen_eob(s); + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); + gen_jr(s, cpu_tmp4); break; case 6: /* push Ev */ gen_push_v(s, cpu_T0); @@ -6417,7 +6419,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 0xc3: /* ret */ ot = gen_pop_T0(s); @@ -6425,7 +6427,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 0xca: /* lret im */ val = cpu_ldsw_code(env, s->pc); From 6f1653180f5701c6a8f1b35b89a80b1e3260928e Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 26 Apr 2017 23:29:24 -0400 Subject: [PATCH 11/26] tb-hash: improve tb_jmp_cache hash function in user mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimizations to cross-page chaining and indirect branches make performance more sensitive to the hit rate of tb_jmp_cache. The constraint of reserving some bits for the page number lowers the achievable quality of the hashing function. However, user-mode does not have this requirement. Thus, with this change we use for user-mode a hashing function that is both faster and of better quality than the previous one. Measurements: Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0. - SPECint06 (test set), x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz 2.2x +-+--------------------------------------------------------------------------------------------------------------+-+ | | | jr | 2x +jr+multhash +....................................................+++++...................................+-+ | jr+hash |$$$ | | |$+$ | | ### $ | 1.8x +-+......................................................................#|#.$...................................+-+ | ++#+# $ | | |# # $ | 1.6x +-+....................................................................***.#.$....................++$$$..........+-+ | $$$ *+* # $ |$+$ | | ++$$$ ### $ * * # $ +++|$ $ | | ++###+$ # # $ * * # $ ### ****## $ | 1.4x +-+...................***+#.$.........***.#.$..........................*.*.#.$...........#+#$$.*++*|#.$..........+-+ | *+* # $ * * # $ * * # $ # # $ * *+# $ | | * * # $ +++++ * * # $ * * # $ *** # $ * * # $ ###$$ | 1.2x +-+...................*.*.#.$.***##$$.*.*.#.$..........................*.*.#.$.........*.*.#.$.*..*.#.$.***+#+$..+-+ | * * # $ *+* # $ * * # $ +++ * * # $ ++###$$ * * # $ * * # $ * * # $ | | ***##$$ * * # $ * * # $ * * # $ ***##$$ ++### * * # $ *** #+$ * * # $ * * # $ * * # $ | | *+*+#+$ ***##$$$ * * # $ * * # $ * * # $ *+* # $ ++####$$ ***+# * * # $ * * # $ * * # $ * * # $ * * # $ | 1x +-++-*+*+#+$+*+*+#-+$+*+*-#+$+*+*+#+$+*+*+#+$+*-*+#+$+***++#+$+*+*+#$$+*+*+#+$+*+*+#+$+*+*-#+$+*+-*+#+$+*+*+#+$-++-+ | * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ | | * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ | 0.8x +-+--***##$$-***##$$$-***##$$-***##$$-***##$$-***##$$-***###$$-***##$$-***##$$-***##$$-***##$$-****##$$-***##$$--+-+ astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean png: http://imgur.com/4UXTrEc Here I also tried the hash function suggested by Paolo ("multhash"): return ((uint64_t) (pc * 2654435761) >> 32) & (TB_JMP_CACHE_SIZE - 1); As you can see it is just as good as the other new function ("hash"), which is what I ended up going with. - SPECint06 (train set), x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz 2.6x +-+--------------------------------------------------------------------------------------------------------------+-+ | | | jr ### | 2.4x +jr+hash...........................................................................................#.#...........+-+ | # # | | # # | 2.2x +-+................................................................................................#.#...........+-+ | # # | | # # | 2x +-+................................................................................................#.#...........+-+ | **** # | | * * # | 1.8x +-+.............................................................................................*..*.#...........+-+ | +++ * * # | | #### #### * * # | 1.6x +-+......................................####.............................#..#.****..#..........*..*.#...........+-+ | +++ #++# **** # * * # #### * * # | | ### # # * * # * * # # # * * # | 1.4x +-+...................****+#..........****..#..........................*..*..#.*..*..#....#..#..*..*.#...........+-+ | *++* # * * # * * # * * # *** # * * # #### | | * * # #### * * # * * # * * # * * # * * # **** # | 1.2x +-+...................*..*.#..****++#.*..*..#..........................*..*..#.*..*..#..*.*..#..*..*.#..*..*..#..+-+ | ****### * * # * * # * * # * * # * * # * * # * * # * * # | | * * # ***### * * # * * # * * # ****## * * # * * # * * # * * # * * # | 1x +-+--****###--***###--****##--****###-****###--***###--***###--****##--****###-****###--***###--****##--****###--+-+ astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean png: http://imgur.com/ArCbHqo - NBench, x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz 1.12x +-+-------------------------------------------------------------------------------------------------------------+-+ | | | jr +++ | 1.1x +jr+hash...........................................................####.........................................+-+ | +++#| # | | | #++# | 1.08x +-+................................+++................+++.+++..*****..#.........................................+-+ | | +++ | | * | * # | | | | | | *+++* # | 1.06x +-+................................****###.............|...|...*...*..#.........................+++.............+-+ | *| * |# ****### * * # | | | *| *++# *| * |# * * # #### | 1.04x +-+................................*++*..#............*|.*.|#..*...*..#........................#.|#.............+-+ | * * # *++*++# * * # +++#++# | | * * # * * # * * # | # # +++#### | 1.02x +-+................................*..*..#......+++...*..*..#..*...*..#.....................****..#..*****++#...+-+ | +++ * * # +++ | * * # * * # +++ *| * # *+++* # | | +++ | +++ +++ ++++++ * * # *****### * * # * * # | +++ ++++++ *++* # * * # | 1x +-++-+++++####++****###++++-+####+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-+++####-+*****###++*++*++#++*+-+*++#+-++-+ | *****| # *++* |# *****| # * * # * *++# * * # * * # **** |# * * # * * # * * # | | * | *| # * *++# * | *++# * * # * * # * * # * * # *| *++# * * # * * # * * # | 0.98x +-+...*.|.*++#..*..*..#..*+++*..#..*..*..#..*...*..#..*..*..#..*...*..#..*++*..#..*...*..#..*..*..#..*...*..#...+-+ | *+++* # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.96x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+ ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT hmean png: http://imgur.com/ZXFX0hJ - NBench, arm-linux-user. Host: Intel i7-4790K @ 4.00GHz 1.3x +-+-------------------------------------------------------------------------------------------------------------+-+ | #### | | jr # # +++ | 1.25x +jr+hash.....................#..#...........................................####................................+-+ | # # # # | | # # # # | 1.2x +-+..........................#..#...........................................#..#................................+-+ | # # # # | | # # # # | 1.15x +-+..........................#..#...........................................#..#................................+-+ | # # #### # # | | # # # # # # | 1.1x +-+..........................#..#..................................#..#.....#..#................................+-+ | # # # # # # +++ | | # # #### # # # # #### | 1.05x +-+..........................#..#...............#..#.....####......#..#.....#..#.........................#..#...+-+ | # # # # # # # # # # +++ # # | | +++ ***** # #### ***** # # # +++# # **** # ****### # # | 1x +-++-+*****###++****+++++*+-+*++#+-****++#-+*+++*-+#+++++#++#++*****++#+-*++*++#-+*****-++++*++*++#++*****++#+-++-+ | * * # * * | * * # * * # * * # **** # * * # * * # * *### * *++# * * # | | * * # * *### * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.95x +-+...*...*..#..*..*.|#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#...+-+ | * * # * * |# * * # * * # * * # * * # * * # * * # * * # * * # * * # | | * * # * * |# * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.9x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+ ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT hmean png: http://imgur.com/FfD27ey Reviewed-by: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Message-Id: <1493263764-18657-12-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson --- include/exec/tb-hash.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index 2c27490cb8..b1fe2d0161 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -22,6 +22,8 @@ #include "exec/tb-hash-xx.h" +#ifdef CONFIG_SOFTMMU + /* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for addresses on the same page. The top bits are the same. This allows TLB invalidation to quickly clear a subset of the hash table. */ @@ -45,6 +47,16 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) | (tmp & TB_JMP_ADDR_MASK)); } +#else + +/* In user-mode we can get better hashing because we do not have a TLB */ +static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) +{ + return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1); +} + +#endif /* CONFIG_SOFTMMU */ + static inline uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags) { From 0c240785a843f70df5116cea2652c6c042ade36b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2017 11:50:31 +0000 Subject: [PATCH 12/26] tcg/ppc: Implement goto_ptr Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.h | 2 +- tcg/ppc/tcg-target.inc.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index a9aa974f77..5f4a40a5b4 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -82,7 +82,7 @@ extern bool have_isa_3_00; #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 -#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_goto_ptr 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 64f67d2c77..8d50f18328 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -1932,6 +1932,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* Epilogue */ tcg_debug_assert(tb_ret_addr == s->code_ptr); + s->code_gen_epilogue = tb_ret_addr; tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { @@ -1986,6 +1987,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, #endif s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out32(s, MTSPR | RS(args[0]) | CTR); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0); + tcg_out32(s, BCCTR | BO_ALWAYS); + break; case INDEX_op_br: { TCGLabel *l = arg_label(args[0]); @@ -2555,6 +2561,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, From b19f0c2e7d344d4d62daf554951acdb6c94a34b0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2017 08:42:58 -0700 Subject: [PATCH 13/26] tcg/aarch64: Implement goto_ptr Measurements: SPECint06 (test set), x86_64-linux-user. Host: APM 64-bit ARMv8 (Atlas/A57) @ 2.4 GHz 1.45x +-+-------------------------------------------------------------------------------------------------------------+-+ | ***** | | +++ * * +goto-ptr | 1.4x +-+...*****............................*...*....................................................................+-+ | *+++* * * +++ | 1.35x +-+...*...*............................*...*...........................*****....................................+-+ | * * * * *+++* | | * * * * * * | 1.3x +-+...*...*............................*...*...........................*...*....................................+-+ | * * * * * * | | * * * * * * ***** | 1.25x +-+...*...*...........*****............*...*...........................*...*............*****...*...*...........+-+ | * * * * * * * * *+++* * * | 1.2x +-+...*...*...........*...*............*...*...........................*...*............*...*...*...*...........+-+ | * * * * * * * * * * * * | | * * * * * * * * * * * * ***** | 1.15x +-+...*...*...........*...*............*...*...........................*...*............*...*...*...*...*...*...+-+ | * * * * * * * * +++ * * * * * * | | * * * * * * * * ***** * * * * * * | 1.1x +-+...*...*...........*...*....*****...*...*...*****...................*...*...*...*....*...*...*...*...*...*...+-+ | * * * * * * * * * * * * * * * * * * * * | 1.05x +-+...*...*...........*...*....*...*...*...*...*...*...................*...*...*...*....*...*...*...*...*...*...+-+ | * * ***** * * * * * * * * * * * * * * * * * * | | * * * * * * * * * * * * ***** ***** * * * * * * * * * * | 1x +-+---*****---*****---*****----*****---*****---*****---*****---*****---*****---*****----*****---*****---*****---+-+ astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjenxalancbmk hmean png: http://imgur.com/en9HE8L Tested-by: Emilio G. Cota Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 2 +- tcg/aarch64/tcg-target.inc.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index b82eac42ae..55a46ac825 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -77,7 +77,7 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 -#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 290de6dae6..5f185458f1 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -1357,8 +1357,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); - tcg_out_goto(s, tb_ret_addr); + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_goto(s, s->code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); + tcg_out_goto(s, tb_ret_addr); + } break; case INDEX_op_goto_tb: @@ -1374,6 +1379,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out_insn(s, 3207, BR, a0); + break; + case INDEX_op_br: tcg_out_goto_label(s, arg_label(a0)); break; @@ -1735,6 +1744,7 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -1942,6 +1952,14 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); + + /* TB epilogue */ tb_ret_addr = s->code_ptr; /* Remove TCG locals stack space. */ From 38f81dc5938fb7025531c5ed602afd41fef799a7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2017 10:46:12 -0700 Subject: [PATCH 14/26] tcg/sparc: Implement goto_ptr Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.h | 2 +- tcg/sparc/tcg-target.inc.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 9348ddd046..854a0afd70 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -123,7 +123,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_extrl_i64_i32 1 #define TCG_TARGET_HAS_extrh_i64_i32 1 diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index 3785d77f62..18afce2f87 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -1003,7 +1003,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* delay slot */ tcg_out_nop(s); - /* No epilogue required. We issue ret + restore directly in the TB. */ + /* Epilogue for goto_ptr. */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + /* delay slot */ + tcg_out_movi_imm13(s, TCG_REG_O0, 0); #ifdef CONFIG_SOFTMMU build_trampolines(s); @@ -1288,6 +1292,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_nop(s); s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); + tcg_out_nop(s); + break; case INDEX_op_br: tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); tcg_out_nop(s); @@ -1513,6 +1521,7 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, From 46644483cae978c734460131bb1d9071f813b287 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2017 18:40:59 -0400 Subject: [PATCH 15/26] tcg/s390: Implement goto_ptr Tested-by: Aurelien Jarno Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/s390/tcg-target.h | 2 +- tcg/s390/tcg-target.inc.c | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 6b7bcfbdf7..957f0c0afe 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -92,7 +92,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 -#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index a679280b92..5d7083e90c 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -1741,9 +1741,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - /* return value */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); - tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); + /* Reuse the zeroing that exists for goto_ptr. */ + a0 = args[0]; + if (a0 == 0) { + tgen_gotoi(s, S390_CC_ALWAYS, s->code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0); + tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); + } break; case INDEX_op_goto_tb: @@ -1767,6 +1772,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, args[0]); + break; + OP_32_64(ld8u): /* ??? LLC (RXY format) is only present with the extended-immediate facility, whereas LLGC is always present. */ @@ -2241,6 +2250,7 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -2439,6 +2449,14 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* br %r3 (go to TB) */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]); + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0); + + /* TB epilogue */ tb_ret_addr = s->code_ptr; /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ From 702a947484eb3e615183dafc93de590ab0679f60 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Apr 2017 09:45:57 +0200 Subject: [PATCH 16/26] tcg/arm: Clarify tcg_out_bx for arm4 host In theory this would re-enable usage of QEMU on an armv4 host. Whether this is worthwhile is debatable -- we've been unconditionally issuing the armv5t BX instruction in the prologue since 2011 without complaint. Possibly we should simply require an armv6 host. Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.inc.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index e75a6d4943..590c57d6b5 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -329,11 +329,6 @@ static const uint8_t tcg_cond_to_arm_cond[] = { [TCG_COND_GTU] = COND_HI, }; -static inline void tcg_out_bx(TCGContext *s, int cond, int rn) -{ - tcg_out32(s, (cond << 28) | 0x012fff10 | rn); -} - static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) { tcg_out32(s, (cond << 28) | 0x0a000000 | @@ -402,6 +397,18 @@ static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) } } +static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn) +{ + /* Unless the C portion of QEMU is compiled as thumb, we don't + actually need true BX semantics; merely a branch to an address + held in a register. */ + if (use_armv5t_instructions) { + tcg_out32(s, (cond << 28) | 0x012fff10 | rn); + } else { + tcg_out_mov_reg(s, cond, TCG_REG_PC, rn); + } +} + static inline void tcg_out_dat_imm(TCGContext *s, int cond, int opc, int rd, int rn, int im) { @@ -977,7 +984,7 @@ static inline void tcg_out_st8(TCGContext *s, int cond, * with the code buffer limited to 16MB we wouldn't need the long case. * But we also use it for the tail-call to the qemu_ld/st helpers, which does. */ -static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) +static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) { intptr_t addri = (intptr_t)addr; ptrdiff_t disp = tcg_pcrel_diff(s, addr); @@ -987,15 +994,9 @@ static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) return; } + assert(use_armv5t_instructions || (addri & 1) == 0); tcg_out_movi32(s, cond, TCG_REG_TMP, addri); - if (use_armv5t_instructions) { - tcg_out_bx(s, cond, TCG_REG_TMP); - } else { - if (addri & 1) { - tcg_abort(); - } - tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); - } + tcg_out_bx(s, cond, TCG_REG_TMP); } /* The call case is mostly used for helpers - so it's not unreasonable From 085c648bef7301eabe7d4a3301c8d012ae4423b8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 28 Apr 2017 09:49:45 +0200 Subject: [PATCH 17/26] tcg/arm: Implement goto_ptr Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.h | 2 +- tcg/arm/tcg-target.inc.c | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index c114df7ed5..5ef1086710 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -123,7 +123,7 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_goto_ptr 1 enum { TCG_AREG0 = TCG_REG_R6, diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 590c57d6b5..9f5cb66718 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -1655,8 +1655,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); - tcg_out_goto(s, COND_AL, tb_ret_addr); + /* Reuse the zeroing that exists for goto_ptr. */ + a0 = args[0]; + if (a0 == 0) { + tcg_out_goto(s, COND_AL, s->code_gen_epilogue); + } else { + tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); + tcg_out_goto(s, COND_AL, tb_ret_addr); + } break; case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { @@ -1671,6 +1677,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + tcg_out_bx(s, COND_AL, args[0]); + break; case INDEX_op_br: tcg_out_goto_label(s, COND_AL, arg_label(args[0])); break; @@ -1961,6 +1970,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -2136,9 +2146,16 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); - tb_ret_addr = s->code_ptr; - /* Epilogue. We branch here via tb_ret_addr. */ + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0); + + /* TB epilogue */ + tb_ret_addr = s->code_ptr; tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, TCG_REG_CALL_STACK, stack_addend, 1); From 5786e0683c4f8170dd05a550814b8809d8ae6d86 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 30 Apr 2017 16:52:52 +0200 Subject: [PATCH 18/26] tcg/mips: implement goto_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Aurelien Jarno Message-Id: <20170430145254.25616-2-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.h | 2 +- tcg/mips/tcg-target.inc.c | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index e3240cfba7..d75cb63ed3 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -130,7 +130,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_goto_ptr 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index 2a7e1c7f5b..8cff9a6bf9 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -1747,6 +1747,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_nop(s); s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_goto_ptr: + /* jmp to the given host address (could be epilogue) */ + tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); + tcg_out_nop(s); + break; case INDEX_op_br: tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, arg_label(a0)); @@ -2160,6 +2165,7 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, + { INDEX_op_goto_ptr, { "r" } }, { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, @@ -2451,6 +2457,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* delay slot */ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + /* + * Return path for goto_ptr. Set return value to 0, a-la exit_tb, + * and fall through to the rest of the epilogue. + */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO); + /* TB epilogue */ tb_ret_addr = s->code_ptr; for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { From 6350001e831defb5ede5337baaa7dc4a730c1508 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 30 Apr 2017 13:32:10 +0200 Subject: [PATCH 19/26] target/s390: Use tcg_gen_lookup_and_goto_ptr Tested-by: Aurelien Jarno Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson --- target/s390x/translate.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 4c48c593cd..628fb8685d 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -608,11 +608,16 @@ static void gen_op_calc_cc(DisasContext *s) set_cc_static(s); } -static int use_goto_tb(DisasContext *s, uint64_t dest) +static bool use_exit_tb(DisasContext *s) { - if (unlikely(s->singlestep_enabled) || - (s->tb->cflags & CF_LAST_IO) || - (s->tb->flags & FLAG_MASK_PER)) { + return (s->singlestep_enabled || + (s->tb->cflags & CF_LAST_IO) || + (s->tb->flags & FLAG_MASK_PER)); +} + +static bool use_goto_tb(DisasContext *s, uint64_t dest) +{ + if (unlikely(use_exit_tb(s))) { return false; } #ifndef CONFIG_USER_ONLY @@ -5461,8 +5466,10 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb) /* Exit the TB, either by raising a debug exception or by return. */ if (do_debug) { gen_exception(EXCP_DEBUG); - } else { + } else if (use_exit_tb(&dc)) { tcg_gen_exit_tb(0); + } else { + tcg_gen_lookup_and_goto_ptr(psw_addr); } break; default: From 4137cb83fa24eb81c4468eddf717d5257bfdfe5a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 30 Apr 2017 13:38:18 +0200 Subject: [PATCH 20/26] target/hppa: Use tcg_gen_lookup_and_goto_ptr Signed-off-by: Richard Henderson --- target/hppa/translate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 9e8c233501..e10abc5e04 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -517,7 +517,7 @@ static void gen_goto_tb(DisasContext *ctx, int which, if (ctx->singlestep_enabled) { gen_excp_1(EXCP_DEBUG); } else { - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); } } } @@ -1510,7 +1510,7 @@ static ExitStatus do_ibranch(DisasContext *ctx, TCGv dest, } else if (is_n && use_nullify_skip(ctx)) { /* The (conditional) branch, B, nullifies the next insn, N, and we're allowed to skip execution N (no single-step or - tracepoint in effect). Since the exit_tb that we must use + tracepoint in effect). Since the goto_ptr that we must use for the indirect branch consumes no special resources, we can (conditionally) skip B and continue execution. */ /* The use_nullify_skip test implies we have a known control path. */ @@ -1527,7 +1527,7 @@ static ExitStatus do_ibranch(DisasContext *ctx, TCGv dest, if (link != 0) { tcg_gen_movi_tl(cpu_gr[link], ctx->iaoq_n); } - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); return nullify_end(ctx, NO_EXIT); } else { cond_prep(&ctx->null_cond); @@ -3885,7 +3885,7 @@ void gen_intermediate_code(CPUHPPAState *env, struct TranslationBlock *tb) if (ctx.singlestep_enabled) { gen_excp_1(EXCP_DEBUG); } else { - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); } break; default: From e78722368c721f3c5b8109ed525adac1653ae97b Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Fri, 28 Apr 2017 14:57:41 -0400 Subject: [PATCH 21/26] target/aarch64: optimize cross-page direct jumps in softmmu Perf numbers in next commit's log. Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- target/arm/translate-a64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index a82ab49c94..ab61d96099 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -379,7 +379,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) } else if (s->singlestep_enabled) { gen_exception_internal(EXCP_DEBUG); } else { - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_pc); s->is_jmp = DISAS_TB_JUMP; } } From e75449a346bf558296966a44277bfd93412c6da6 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Fri, 28 Apr 2017 14:59:23 -0400 Subject: [PATCH 22/26] target/aarch64: optimize indirect branches Measurements: [Baseline performance is that before applying this and the previous commit] - NBench, aarch64-softmmu. Host: Intel i7-4790K @ 4.00GHz 1.7x +-+--------------------------------------------------------------------------------------------------------------+-+ | | | cross | 1.6x +cross+jr.................................................####...................................................+-+ | #++# | | # # | 1.5x +-+...................................................*****..#...................................................+-+ | *+++* # | | * * # | 1.4x +-+...................................................*...*..#...................................................+-+ | * * # | | ##### * * # | 1.3x +-+................................****+++#...........*...*..#...................................................+-+ | *++* # * * # | | * * # * * # | 1.2x +-+................................*..*...#...........*...*..#...................................................+-+ | * * # * * # | | #### * * # * * # | 1.1x +-+.......................+++#..#..*..*...#...........*...*..#...................................................+-+ | **** # * * # * * # ****#### | | * * # * * # * * # ****### +++#### ****### * * # | 1x +-++-++++++-++++****###++-*++*++#++*++*+-+#++****+++++*+++*++#++*++*-+#++*****++#++****###-++*++*-+#++*+-*+++#+-++-+ | *****### * * # * * # * * # *++*### * * # * * # * * # * *++# * * # * * # | | * *++# * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.9x +-+---*****###--****###---****###--****####--****###--*****###--****###--*****###--****###---****###--****####---+-+ ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONNEURAL NUMERIC SORSTRING SORT hmean png: http://imgur.com/qO9ubtk NB. cross here represents the previous commit. - SPECint06 (test set), aarch64-linux-user. Host: Intel i7-4790K @ 4.00GHz 1.5x +-+--------------------------------------------------------------------------------------------------------------+-+ | ***** | | *+++* jr | | * * | 1.4x +-+.....................................................................*...*.....................+++............+-+ | * * | | | ***** * * | | | * * * * ***** | 1.3x +-+....................................*...*............................*...*....................*.|.*...........+-+ | +++ * * * * * | * | | ***** * * * * *+++* | | * * * * * * * * | 1.2x +-+....................*...*...........*...*............................*...*...........*****....*...*...........+-+ | ***** * * * * * * * * * * +++ | | * * * * * * * * * * * * ***** | | * * * * ***** * * * * * * * * * * | 1.1x +-+...*...*............*...*...*...*...*...*............................*...*....+++....*...*....*...*...*...*...+-+ | * * * * * * * * * * ***** * * * * * * | | * * * * * * * * ***** * * * * * * * * * * | | * * ***** * * * * * * * * ****** * * * * * * * * * * | 1x +-++-+*+++*-++*+++*++++*+-+*+++*-++*+++*-++*+++*+++*++-*++++*-++*****+++*++-*+++*++-*+++*+-+*++++*+++*++-*+++*+-++-+ | * * * * * * * * * * * * * * *+++* * * * * * * * * * * | | * * * * * * * * * * * * * * * * * * * * * * * * * * | | * * * * * * * * * * * * * * * * * * * * * * * * * * | 0.9x +-+---*****---*****----*****---*****---*****---*****---******---*****---*****---*****---*****----*****---*****---+-+ astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean png: http://imgur.com/3Dp4vvq - SPECint06 (train set), aarch64-linux-user. Host: Intel i7-4790K @ 4.00GHz 1.7x +-+--------------------------------------------------------------------------------------------------------------+-+ | | | jr | 1.6x +-+...............................................................................................+++............+-+ | ***** | | *+++* | | * * | 1.5x +-+..............................................................................................*...*...........+-+ | +++ * * | | ***** * * | 1.4x +-+.....................................................................*+++*....................*...*...........+-+ | * * * * | | ***** * * * * | | * * * * ***** * * | 1.3x +-+....................................*...*............................*...*...*...*............*...*...........+-+ | +++ * * * * * * * * | | ***** * * * * * * ***** * * | 1.2x +-+....................*...*...........*...*............................*...*...*...*...*+++*....*...*...*****...+-+ | * * * * * * * * * * * * *+++* | | ***** * * ***** * * * * * * * * * * * * | | * * * * *+++* * * * * * * * * * * * * | 1.1x +-+...*...*............*...*...*...*...*...*............................*...*...*...*...*...*....*...*...*...*...+-+ | * * ***** * * * * * * ***** * * * * * * * * * * | | * * * * * * * * * * +++ ****** *+++* * * * * * * * * * * | 1x +-+---*****---*****----*****---*****---*****---*****---******---*****---*****---*****---*****----*****---*****---+-+ astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean png: http://imgur.com/vRrdc9j Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- target/arm/translate-a64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index ab61d96099..860e279658 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -11367,8 +11367,7 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) gen_a64_set_pc_im(dc->pc); /* fall through */ case DISAS_JUMP: - /* indicate that the hash table must be used to find the next TB */ - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_pc); break; case DISAS_TB_JUMP: case DISAS_EXC: From d9a9acde64b862107933f9e9a01435e51bf8f91b Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 30 Apr 2017 16:52:53 +0200 Subject: [PATCH 23/26] target/mips: optimize cross-page direct jumps in softmmu Cc: Yongbok Kim Signed-off-by: Aurelien Jarno Message-Id: <20170430145254.25616-3-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- target/mips/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/mips/translate.c b/target/mips/translate.c index 3022f349cb..1a7ac07c67 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -4233,7 +4233,7 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) save_cpu_state(ctx, 0); gen_helper_raise_exception_debug(cpu_env); } - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_PC); } } From e350d8ca3ac7e31c6af71a4ab74d2442dfefc697 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 30 Apr 2017 16:52:54 +0200 Subject: [PATCH 24/26] target/mips: optimize indirect branches Cc: Yongbok Kim Signed-off-by: Aurelien Jarno Message-Id: <20170430145254.25616-4-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- target/mips/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/mips/translate.c b/target/mips/translate.c index 1a7ac07c67..559f8fed89 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -10725,7 +10725,7 @@ static void gen_branch(DisasContext *ctx, int insn_bytes) save_cpu_state(ctx, 0); gen_helper_raise_exception_debug(cpu_env); } - tcg_gen_exit_tb(0); + tcg_gen_lookup_and_goto_ptr(cpu_PC); break; default: fprintf(stderr, "unknown branch 0x%x\n", proc_hflags); From bec5e2b97572d23360fb08ad9cb9c93b449a25f6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 May 2017 10:23:29 -0700 Subject: [PATCH 25/26] target/alpha: Implement WTINT inline Signed-off-by: Richard Henderson --- target/alpha/translate.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/target/alpha/translate.c b/target/alpha/translate.c index df5d695344..4523c4cc85 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -1157,6 +1157,7 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode) #ifndef CONFIG_USER_ONLY /* Privileged PAL code */ if (palcode < 0x40 && (ctx->tb->flags & TB_FLAGS_USER_MODE) == 0) { + TCGv tmp; switch (palcode) { case 0x01: /* CFLUSH */ @@ -1182,10 +1183,8 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode) offsetof(CPUAlphaState, sysval)); break; - case 0x35: { + case 0x35: /* SWPIPL */ - TCGv tmp; - /* Note that we already know we're in kernel mode, so we know that PS only contains the 3 IPL bits. */ tcg_gen_ld8u_i64(ctx->ir[IR_V0], cpu_env, @@ -1197,7 +1196,6 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode) tcg_gen_st8_i64(tmp, cpu_env, offsetof(CPUAlphaState, ps)); tcg_temp_free(tmp); break; - } case 0x36: /* RDPS */ @@ -1220,6 +1218,14 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode) -offsetof(AlphaCPU, env) + offsetof(CPUState, cpu_index)); break; + case 0x3E: + /* WTINT */ + tmp = tcg_const_i64(1); + tcg_gen_st32_i64(tmp, cpu_env, -offsetof(AlphaCPU, env) + + offsetof(CPUState, halted)); + tcg_gen_movi_i64(ctx->ir[IR_V0], 0); + return gen_excp(ctx, EXCP_HALTED, 0); + default: palcode &= 0x3f; goto do_call_pal; @@ -1369,7 +1375,7 @@ static ExitStatus gen_mtpr(DisasContext *ctx, TCGv vb, int regno) tmp = tcg_const_i64(1); tcg_gen_st32_i64(tmp, cpu_env, -offsetof(AlphaCPU, env) + offsetof(CPUState, halted)); - return gen_excp(ctx, EXCP_HLT, 0); + return gen_excp(ctx, EXCP_HALTED, 0); case 252: /* HALT */ From 2d826cdc8a43ba1817a44f481f8dc8f08668b0a6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 24 May 2017 19:17:51 -0700 Subject: [PATCH 26/26] target/alpha: Use goto_tb for fallthru between TBs Signed-off-by: Richard Henderson --- target/alpha/translate.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 4523c4cc85..7c45ae360c 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -89,6 +89,9 @@ typedef enum { updated the PC for the next instruction to be executed. */ EXIT_PC_STALE, + /* We are exiting the TB due to page crossing or space constraints. */ + EXIT_FALLTHRU, + /* We are ending the TB with a noreturn function call, e.g. longjmp. No following code will be executed. */ EXIT_NORETURN, @@ -2984,7 +2987,7 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) || num_insns >= max_insns || singlestep || ctx.singlestep_enabled)) { - ret = EXIT_PC_STALE; + ret = EXIT_FALLTHRU; } } while (ret == NO_EXIT); @@ -2996,6 +2999,13 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) case EXIT_GOTO_TB: case EXIT_NORETURN: break; + case EXIT_FALLTHRU: + if (use_goto_tb(&ctx, ctx.pc)) { + tcg_gen_goto_tb(0); + tcg_gen_movi_i64(cpu_pc, ctx.pc); + tcg_gen_exit_tb((uintptr_t)ctx.tb); + } + /* FALLTHRU */ case EXIT_PC_STALE: tcg_gen_movi_i64(cpu_pc, ctx.pc); /* FALLTHRU */ @@ -3007,7 +3017,7 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) } break; default: - abort(); + g_assert_not_reached(); } gen_tb_end(tb, num_insns);