From 83974cf4f8a46513f799ff0d7c7eb151acafda7b Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Thu, 6 Jul 2017 14:42:26 -0400 Subject: [PATCH 01/20] cputlb: bring back tlb_flush_count under !TLB_DEBUG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit f0aff0f124 ("cputlb: add assert_cpu_is_self checks") buried the increment of tlb_flush_count under TLB_DEBUG. This results in "info jit" always (mis)reporting 0 TLB flushes when !TLB_DEBUG. Besides, under MTTCG tlb_flush_count is updated by several threads, so in order not to lose counts we'd either have to use atomic ops or distribute the counter, which is more scalable. This patch does the latter by embedding tlb_flush_count in CPUArchState. The global count is then easily obtained by iterating over the CPU list. Note that this change also requires updating the accessors to tlb_flush_count to use atomic_read/set whenever there may be conflicting accesses (as defined in C11) to it. Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 17 ++++++++++++++--- accel/tcg/translate-all.c | 2 +- include/exec/cpu-defs.h | 1 + include/exec/cputlb.h | 3 +-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index bcbcc4db6c..5b1ef1442c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -92,8 +92,18 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn, } } -/* statistics */ -int tlb_flush_count; +size_t tlb_flush_count(void) +{ + CPUState *cpu; + size_t count = 0; + + CPU_FOREACH(cpu) { + CPUArchState *env = cpu->env_ptr; + + count += atomic_read(&env->tlb_flush_count); + } + return count; +} /* This is OK because CPU architectures generally permit an * implementation to drop entries from the TLB at any time, so @@ -112,7 +122,8 @@ static void tlb_flush_nocheck(CPUState *cpu) } assert_cpu_is_self(cpu); - tlb_debug("(count: %d)\n", tlb_flush_count++); + atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1); + tlb_debug("(count: %zu)\n", tlb_flush_count()); tb_lock(); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 2d1ed06065..6b5d4bece2 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1936,7 +1936,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) atomic_read(&tcg_ctx.tb_ctx.tb_flush_count)); cpu_fprintf(f, "TB invalidate count %d\n", tcg_ctx.tb_ctx.tb_phys_invalidate_count); - cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); + cpu_fprintf(f, "TLB flush count %zu\n", tlb_flush_count()); tcg_dump_info(f, cpu_fprintf); tb_unlock(); diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index bc8e7f848d..e43ff8346b 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -137,6 +137,7 @@ typedef struct CPUIOTLBEntry { CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \ CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \ CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE]; \ + size_t tlb_flush_count; \ target_ulong tlb_flush_addr; \ target_ulong tlb_flush_mask; \ target_ulong vtlb_index; \ diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h index 3f941783c5..c91db211bc 100644 --- a/include/exec/cputlb.h +++ b/include/exec/cputlb.h @@ -23,7 +23,6 @@ /* cputlb.c */ void tlb_protect_code(ram_addr_t ram_addr); void tlb_unprotect_code(ram_addr_t ram_addr); -extern int tlb_flush_count; - +size_t tlb_flush_count(void); #endif #endif From 0aecede6121e56ccc5d6a82243f2ccccdfabe6d5 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Fri, 7 Jul 2017 18:22:49 -0400 Subject: [PATCH 02/20] tcg: fix corruption of code_time profiling counter upon tb_flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever there is an overflow in code_gen_buffer (e.g. we run out of space in it and have to flush it), the code_time profiling counter ends up with an invalid value (that is, code_time -= profile_getclock(), without later on getting += profile_getclock() due to the goto). Fix it by using the ti variable, so that we only update code_time when there is no overflow. Note that in case there is an overflow we fail to account for the elapsed coding time, but this is quite rare so we can probably live with it. "info jit" before/after, roughly at the same time during debian-arm bootup: - before: Statistics: TB flush count 1 TB invalidate count 4665 TLB flush count 998 JIT cycles -615191529184601 (-256329.804 s at 2.4 GHz) translated TBs 302310 (aborted=0 0.0%) avg ops/TB 48.4 max=438 deleted ops/TB 8.54 avg temps/TB 32.31 max=38 avg host code/TB 361.5 avg search data/TB 24.5 cycles/op -42014693.0 cycles/in byte -121444900.2 cycles/out byte -5629031.1 cycles/search byte -83114481.0 gen_interm time -0.0% gen_code time 100.0% optim./code time -0.0% liveness/code time -0.0% cpu_restore count 6236 avg cycles 110.4 - after: Statistics: TB flush count 1 TB invalidate count 4665 TLB flush count 1010 JIT cycles 1996899624 (0.832 s at 2.4 GHz) translated TBs 297961 (aborted=0 0.0%) avg ops/TB 48.5 max=438 deleted ops/TB 8.56 avg temps/TB 32.31 max=38 avg host code/TB 361.8 avg search data/TB 24.5 cycles/op 138.2 cycles/in byte 398.4 cycles/out byte 18.5 cycles/search byte 273.1 gen_interm time 14.0% gen_code time 86.0% optim./code time 19.4% liveness/code time 10.3% cpu_restore count 6372 avg cycles 111.0 Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 6b5d4bece2..b3bfe65059 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1300,7 +1300,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #ifdef CONFIG_PROFILER tcg_ctx.tb_count++; tcg_ctx.interm_time += profile_getclock() - ti; - tcg_ctx.code_time -= profile_getclock(); + ti = profile_getclock(); #endif /* ??? Overflow could be handled better here. In particular, we @@ -1318,7 +1318,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } #ifdef CONFIG_PROFILER - tcg_ctx.code_time += profile_getclock(); + tcg_ctx.code_time += profile_getclock() - ti; tcg_ctx.code_in_len += tb->size; tcg_ctx.code_out_len += gen_code_size; tcg_ctx.search_out_len += search_size; From eb5e2b9e3b141de0c435eedc31c26cbbdefbee1b Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Fri, 23 Jun 2017 19:43:01 -0400 Subject: [PATCH 03/20] exec-all: fix typos in TranslationBlock's documentation Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 673fc066d0..a9a8bb6f83 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -332,7 +332,7 @@ struct TranslationBlock { /* The following data are used to directly call another TB from * the code of this one. This can be done either by emitting direct or * indirect native jump instructions. These jumps are reset so that the TB - * just continue its execution. The TB can be linked to another one by + * just continues its execution. The TB can be linked to another one by * setting one of the jump targets (or patching the jump instruction). Only * two of such jumps are supported. */ @@ -340,7 +340,7 @@ struct TranslationBlock { #define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */ uintptr_t jmp_target_arg[2]; /* target address or offset */ - /* Each TB has an assosiated circular list of TBs jumping to this one. + /* Each TB has an associated circular list of TBs jumping to this one. * jmp_list_first points to the first TB jumping to this one. * jmp_list_next is used to point to the next TB in a list. * Since each TB can have two jumps, it can participate in two lists. From 13e10947358f889d175c42e3db6fef368faeb7e0 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Thu, 6 Jul 2017 21:28:52 -0400 Subject: [PATCH 04/20] translate-all: make have_tb_lock static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is only used by this object, and it's not exported to any other. Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index b3bfe65059..a7c1d4e3f2 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -139,7 +139,7 @@ TCGContext tcg_ctx; bool parallel_cpus; /* translation block context */ -__thread int have_tb_lock; +static __thread int have_tb_lock; static void page_table_config_init(void) { From 841710c78e022cbc1f798cced035789725702dac Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 12 Jul 2017 14:29:26 -0400 Subject: [PATCH 05/20] cpu-exec: rename have_tb_lock to acquired_tb_lock in tb_find Reusing the have_tb_lock name, which is also defined in translate-all.c, makes code reviewing unnecessarily harder. Avoid potential confusion by renaming the local have_tb_lock variable to something else. Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index ff6866624a..32104b8d8c 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -372,7 +372,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - bool have_tb_lock = false; + bool acquired_tb_lock = false; /* we record a subset of the CPU state. It will always be the same before a given translated block @@ -391,7 +391,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, */ mmap_lock(); tb_lock(); - have_tb_lock = true; + acquired_tb_lock = true; /* There's a chance that our desired tb has been translated while * taking the locks so we check again inside the lock. @@ -419,15 +419,15 @@ static inline TranslationBlock *tb_find(CPUState *cpu, #endif /* See if we can patch the calling TB. */ if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - if (!have_tb_lock) { + if (!acquired_tb_lock) { tb_lock(); - have_tb_lock = true; + acquired_tb_lock = true; } if (!tb->invalid) { tb_add_jump(last_tb, tb_exit, tb); } } - if (have_tb_lock) { + if (acquired_tb_lock) { tb_unlock(); } return tb; From e268f4c036d2b47a4f8bf293c1371b328e03ca04 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 5 Jul 2017 18:12:56 -0400 Subject: [PATCH 06/20] tcg/i386: constify tcg_target_callee_save_regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 69e49c9f58..63d27f10e7 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -2499,7 +2499,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) return NULL; } -static int tcg_target_callee_save_regs[] = { +static const int tcg_target_callee_save_regs[] = { #if TCG_TARGET_REG_BITS == 64 TCG_REG_RBP, TCG_REG_RBX, From d453ec78251d03cbd4ffc28dbf6070931c8ae469 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 5 Jul 2017 18:13:07 -0400 Subject: [PATCH 07/20] tcg/mips: constify tcg_target_callee_save_regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.inc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index ce4030602f..e993138930 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -2341,7 +2341,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) } } -static int tcg_target_callee_save_regs[] = { +static const int tcg_target_callee_save_regs[] = { TCG_REG_S0, /* used for the global env (TCG_AREG0) */ TCG_REG_S1, TCG_REG_S2, From 7f11636dbee89b0e4d03e9e2b96e14649a7db778 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Tue, 11 Jul 2017 17:06:48 -0400 Subject: [PATCH 08/20] tcg: remove addr argument from lookup_tb_ptr It is unlikely that we will ever want to call this helper passing an argument other than the current PC. So just remove the argument, and use the pc we already get from cpu_get_tb_cpu_state. This change paves the way to having a common "tb_lookup" function. Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/tcg-runtime.c | 20 ++++++++++---------- accel/tcg/tcg-runtime.h | 2 +- target/alpha/translate.c | 2 +- target/arm/translate-a64.c | 4 ++-- target/arm/translate.c | 5 +---- target/hppa/translate.c | 6 +++--- target/i386/translate.c | 17 +++++------------ target/mips/translate.c | 4 ++-- target/s390x/translate.c | 2 +- target/sh4/translate.c | 4 ++-- tcg/tcg-op.c | 4 ++-- tcg/tcg-op.h | 4 ++-- 12 files changed, 32 insertions(+), 42 deletions(-) diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index aafb171294..b75394aba8 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -144,33 +144,33 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) return ctpop64(arg); } -void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) +void *HELPER(lookup_tb_ptr)(CPUArchState *env) { CPUState *cpu = ENV_GET_CPU(env); TranslationBlock *tb; target_ulong cs_base, pc; - uint32_t flags, addr_hash; + uint32_t flags, hash; - addr_hash = tb_jmp_cache_hash_func(addr); - tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]); cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + hash = tb_jmp_cache_hash_func(pc); + tb = atomic_rcu_read(&cpu->tb_jmp_cache[hash]); if (unlikely(!(tb - && tb->pc == addr + && tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags && tb->trace_vcpu_dstate == *cpu->trace_dstate))) { - tb = tb_htable_lookup(cpu, addr, cs_base, flags); + tb = tb_htable_lookup(cpu, pc, cs_base, flags); if (!tb) { return tcg_ctx.code_gen_epilogue; } - atomic_set(&cpu->tb_jmp_cache[addr_hash], tb); + atomic_set(&cpu->tb_jmp_cache[hash], tb); } - qemu_log_mask_and_addr(CPU_LOG_EXEC, addr, + qemu_log_mask_and_addr(CPU_LOG_EXEC, pc, "Chain %p [%d: " TARGET_FMT_lx "] %s\n", - tb->tc_ptr, cpu->cpu_index, addr, - lookup_symbol(addr)); + tb->tc_ptr, cpu->cpu_index, pc, + lookup_symbol(pc)); return tb->tc_ptr; } diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index c41d38a557..1df17d0ba9 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -24,7 +24,7 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) +DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env) DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 5a92c4accb..f32c95b9a1 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -3029,7 +3029,7 @@ static void alpha_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) /* FALLTHRU */ case DISAS_PC_UPDATED: if (!use_exit_tb(ctx)) { - tcg_gen_lookup_and_goto_ptr(cpu_pc); + tcg_gen_lookup_and_goto_ptr(); break; } /* FALLTHRU */ diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 899ffb96fc..a39b9d3633 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -379,7 +379,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) } else if (s->base.singlestep_enabled) { gen_exception_internal(EXCP_DEBUG); } else { - tcg_gen_lookup_and_goto_ptr(cpu_pc); + tcg_gen_lookup_and_goto_ptr(); s->base.is_jmp = DISAS_NORETURN; } } @@ -11363,7 +11363,7 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_a64_set_pc_im(dc->pc); /* fall through */ case DISAS_JUMP: - tcg_gen_lookup_and_goto_ptr(cpu_pc); + tcg_gen_lookup_and_goto_ptr(); break; case DISAS_EXIT: tcg_gen_exit_tb(0); diff --git a/target/arm/translate.c b/target/arm/translate.c index ab1a12a1b8..fdc46cc525 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -4173,10 +4173,7 @@ static inline bool use_goto_tb(DisasContext *s, target_ulong dest) static void gen_goto_ptr(void) { - TCGv addr = tcg_temp_new(); - tcg_gen_extu_i32_tl(addr, cpu_R[15]); - tcg_gen_lookup_and_goto_ptr(addr); - tcg_temp_free(addr); + tcg_gen_lookup_and_goto_ptr(); } /* This will end the TB but doesn't guarantee we'll return to diff --git a/target/hppa/translate.c b/target/hppa/translate.c index b6e2652341..26242f4b3c 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -505,7 +505,7 @@ static void gen_goto_tb(DisasContext *ctx, int which, if (ctx->base.singlestep_enabled) { gen_excp_1(EXCP_DEBUG); } else { - tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); + tcg_gen_lookup_and_goto_ptr(); } } } @@ -1515,7 +1515,7 @@ static DisasJumpType do_ibranch(DisasContext *ctx, TCGv dest, if (link != 0) { tcg_gen_movi_tl(cpu_gr[link], ctx->iaoq_n); } - tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); + tcg_gen_lookup_and_goto_ptr(); return nullify_end(ctx, DISAS_NEXT); } else { cond_prep(&ctx->null_cond); @@ -3873,7 +3873,7 @@ static void hppa_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) if (ctx->base.singlestep_enabled) { gen_excp_1(EXCP_DEBUG); } else { - tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f); + tcg_gen_lookup_and_goto_ptr(); } break; default: diff --git a/target/i386/translate.c b/target/i386/translate.c index 7b920115f9..5d61fa96ad 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -2511,7 +2511,7 @@ static void gen_bnd_jmp(DisasContext *s) If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of S->TF. This is used by the syscall/sysret insns. */ static void -do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, TCGv jr) +do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) { gen_update_cc_op(s); @@ -2532,12 +2532,8 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, TCGv jr) tcg_gen_exit_tb(0); } else if (s->tf) { gen_helper_single_step(cpu_env); - } else if (!TCGV_IS_UNUSED(jr)) { - TCGv vaddr = tcg_temp_new(); - - tcg_gen_add_tl(vaddr, jr, cpu_seg_base[R_CS]); - tcg_gen_lookup_and_goto_ptr(vaddr); - tcg_temp_free(vaddr); + } else if (jr) { + tcg_gen_lookup_and_goto_ptr(); } else { tcg_gen_exit_tb(0); } @@ -2547,10 +2543,7 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, TCGv jr) static inline void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) { - TCGv unused; - - TCGV_UNUSED(unused); - do_gen_eob_worker(s, inhibit, recheck_tf, unused); + do_gen_eob_worker(s, inhibit, recheck_tf, false); } /* End of block. @@ -2569,7 +2562,7 @@ static void gen_eob(DisasContext *s) /* Jump to register */ static void gen_jr(DisasContext *s, TCGv dest) { - do_gen_eob_worker(s, false, false, dest); + do_gen_eob_worker(s, false, false, true); } /* generate a jump to eip. No segment change must happen before as a diff --git a/target/mips/translate.c b/target/mips/translate.c index d16d879df7..ac05f3aa09 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -4303,7 +4303,7 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) save_cpu_state(ctx, 0); gen_helper_raise_exception_debug(cpu_env); } - tcg_gen_lookup_and_goto_ptr(cpu_PC); + tcg_gen_lookup_and_goto_ptr(); } } @@ -10883,7 +10883,7 @@ static void gen_branch(DisasContext *ctx, int insn_bytes) save_cpu_state(ctx, 0); gen_helper_raise_exception_debug(cpu_env); } - tcg_gen_lookup_and_goto_ptr(cpu_PC); + tcg_gen_lookup_and_goto_ptr(); break; default: fprintf(stderr, "unknown branch 0x%x\n", proc_hflags); diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 9ef95141f9..165d2cac3e 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -5949,7 +5949,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb) } else if (use_exit_tb(&dc) || status == EXIT_PC_STALE_NOCHAIN) { tcg_gen_exit_tb(0); } else { - tcg_gen_lookup_and_goto_ptr(psw_addr); + tcg_gen_lookup_and_goto_ptr(); } break; default: diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 10191073b2..8db9fba26e 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -261,7 +261,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) } else if (use_exit_tb(ctx)) { tcg_gen_exit_tb(0); } else { - tcg_gen_lookup_and_goto_ptr(cpu_pc); + tcg_gen_lookup_and_goto_ptr(); } } } @@ -278,7 +278,7 @@ static void gen_jump(DisasContext * ctx) } else if (use_exit_tb(ctx)) { tcg_gen_exit_tb(0); } else { - tcg_gen_lookup_and_goto_ptr(cpu_pc); + tcg_gen_lookup_and_goto_ptr(); } } else { gen_goto_tb(ctx, 0, ctx->delayed_pc); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 688d91755b..d3c0e4799e 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2588,11 +2588,11 @@ void tcg_gen_goto_tb(unsigned idx) tcg_gen_op1i(INDEX_op_goto_tb, idx); } -void tcg_gen_lookup_and_goto_ptr(TCGv addr) +void tcg_gen_lookup_and_goto_ptr(void) { if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { TCGv_ptr ptr = tcg_temp_new_ptr(); - gen_helper_lookup_tb_ptr(ptr, tcg_ctx.tcg_env, addr); + gen_helper_lookup_tb_ptr(ptr, tcg_ctx.tcg_env); tcg_gen_op1i(INDEX_op_goto_ptr, GET_TCGV_PTR(ptr)); tcg_temp_free_ptr(ptr); } else { diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 5d3278f243..18d01b2f43 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -797,7 +797,7 @@ static inline void tcg_gen_exit_tb(uintptr_t val) void tcg_gen_goto_tb(unsigned idx); /** - * tcg_gen_lookup_and_goto_ptr() - look up a TB and jump to it if valid + * tcg_gen_lookup_and_goto_ptr() - look up the current TB, jump to it if valid * @addr: Guest address of the target TB * * If the TB is not valid, jump to the epilogue. @@ -805,7 +805,7 @@ void tcg_gen_goto_tb(unsigned idx); * This operation is optional. If the TCG backend does not implement goto_ptr, * this op is equivalent to calling tcg_gen_exit_tb() with 0 as the argument. */ -void tcg_gen_lookup_and_goto_ptr(TCGv addr); +void tcg_gen_lookup_and_goto_ptr(void); #if TARGET_LONG_BITS == 32 #define tcg_temp_new() tcg_temp_new_i32() From f6bb84d53110398f4899c19dab4e0fe9908ec060 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Tue, 11 Jul 2017 17:33:33 -0400 Subject: [PATCH 09/20] tcg: consolidate TB lookups in tb_lookup__cpu_state This avoids duplicating code. cpu_exec_step will also use the new common function once we integrate parallel_cpus into tb->cflags. Note that in this commit we also fix a race, described by Richard Henderson during review. Think of this scenario with threads A and B: (A) Lookup succeeds for TB in hash without tb_lock (B) Sets the TB's tb->invalid flag (B) Removes the TB from tb_htable (B) Clears all CPU's tb_jmp_cache (A) Store TB into local tb_jmp_cache Given that order of events, (A) will keep executing that invalid TB until another flush of its tb_jmp_cache happens, which in theory might never happen. We can fix this by checking the tb->invalid flag every time we look up a TB from tb_jmp_cache, so that in the above scenario, next time we try to find that TB in tb_jmp_cache, we won't, and will therefore be forced to look it up in tb_htable. Performance-wise, I measured a small improvement when booting debian-arm. Note that inlining pays off: Performance counter stats for 'taskset -c 0 qemu-system-arm \ -machine type=virt -nographic -smp 1 -m 4096 \ -netdev user,id=unet,hostfwd=tcp::2222-:22 \ -device virtio-net-device,netdev=unet \ -drive file=jessie.qcow2,id=myblock,index=0,if=none \ -device virtio-blk-device,drive=myblock \ -kernel kernel.img -append console=ttyAMA0 root=/dev/vda1 \ -name arm,debug-threads=on -smp 1' (10 runs): Before: 18714.917392 task-clock # 0.952 CPUs utilized ( +- 0.95% ) 23,142 context-switches # 0.001 M/sec ( +- 0.50% ) 1 CPU-migrations # 0.000 M/sec 10,558 page-faults # 0.001 M/sec ( +- 0.95% ) 53,957,727,252 cycles # 2.883 GHz ( +- 0.91% ) [83.33%] 24,440,599,852 stalled-cycles-frontend # 45.30% frontend cycles idle ( +- 1.20% ) [83.33%] 16,495,714,424 stalled-cycles-backend # 30.57% backend cycles idle ( +- 0.95% ) [66.66%] 76,267,572,582 instructions # 1.41 insns per cycle # 0.32 stalled cycles per insn ( +- 0.87% ) [83.34%] 12,692,186,323 branches # 678.186 M/sec ( +- 0.92% ) [83.35%] 263,486,879 branch-misses # 2.08% of all branches ( +- 0.73% ) [83.34%] 19.648474449 seconds time elapsed ( +- 0.82% ) After, w/ inline (this patch): 18471.376627 task-clock # 0.955 CPUs utilized ( +- 0.96% ) 23,048 context-switches # 0.001 M/sec ( +- 0.48% ) 1 CPU-migrations # 0.000 M/sec 10,708 page-faults # 0.001 M/sec ( +- 0.81% ) 53,208,990,796 cycles # 2.881 GHz ( +- 0.98% ) [83.34%] 23,941,071,673 stalled-cycles-frontend # 44.99% frontend cycles idle ( +- 0.95% ) [83.34%] 16,161,773,848 stalled-cycles-backend # 30.37% backend cycles idle ( +- 0.76% ) [66.67%] 75,786,269,766 instructions # 1.42 insns per cycle # 0.32 stalled cycles per insn ( +- 1.24% ) [83.34%] 12,573,617,143 branches # 680.708 M/sec ( +- 1.34% ) [83.33%] 260,235,550 branch-misses # 2.07% of all branches ( +- 0.66% ) [83.33%] 19.340502161 seconds time elapsed ( +- 0.56% ) After, w/o inline: 18791.253967 task-clock # 0.954 CPUs utilized ( +- 0.78% ) 23,230 context-switches # 0.001 M/sec ( +- 0.42% ) 1 CPU-migrations # 0.000 M/sec 10,563 page-faults # 0.001 M/sec ( +- 1.27% ) 54,168,674,622 cycles # 2.883 GHz ( +- 0.80% ) [83.34%] 24,244,712,629 stalled-cycles-frontend # 44.76% frontend cycles idle ( +- 1.37% ) [83.33%] 16,288,648,572 stalled-cycles-backend # 30.07% backend cycles idle ( +- 0.95% ) [66.66%] 77,659,755,503 instructions # 1.43 insns per cycle # 0.31 stalled cycles per insn ( +- 0.97% ) [83.34%] 12,922,780,045 branches # 687.702 M/sec ( +- 1.06% ) [83.34%] 261,962,386 branch-misses # 2.03% of all branches ( +- 0.71% ) [83.35%] 19.700174670 seconds time elapsed ( +- 0.56% ) Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 47 +++++++++++++++----------------------- accel/tcg/tcg-runtime.c | 22 ++++-------------- include/exec/tb-lookup.h | 49 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 46 deletions(-) create mode 100644 include/exec/tb-lookup.h diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 32104b8d8c..f8a1d68db7 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -28,6 +28,7 @@ #include "exec/address-spaces.h" #include "qemu/rcu.h" #include "exec/tb-hash.h" +#include "exec/tb-lookup.h" #include "exec/log.h" #include "qemu/main-loop.h" #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) @@ -368,43 +369,31 @@ static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *last_tb, int tb_exit) { - CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; bool acquired_tb_lock = false; - /* we record a subset of the CPU state. It will - always be the same before a given translated block - is executed. */ - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); - if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || - tb->flags != flags || - tb->trace_vcpu_dstate != *cpu->trace_dstate)) { + tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags); + if (tb == NULL) { + /* mmap_lock is needed by tb_gen_code, and mmap_lock must be + * taken outside tb_lock. As system emulation is currently + * single threaded the locks are NOPs. + */ + mmap_lock(); + tb_lock(); + acquired_tb_lock = true; + + /* There's a chance that our desired tb has been translated while + * taking the locks so we check again inside the lock. + */ tb = tb_htable_lookup(cpu, pc, cs_base, flags); - if (!tb) { - - /* mmap_lock is needed by tb_gen_code, and mmap_lock must be - * taken outside tb_lock. As system emulation is currently - * single threaded the locks are NOPs. - */ - mmap_lock(); - tb_lock(); - acquired_tb_lock = true; - - /* There's a chance that our desired tb has been translated while - * taking the locks so we check again inside the lock. - */ - tb = tb_htable_lookup(cpu, pc, cs_base, flags); - if (!tb) { - /* if no translated code available, then translate it now */ - tb = tb_gen_code(cpu, pc, cs_base, flags, 0); - } - - mmap_unlock(); + if (likely(tb == NULL)) { + /* if no translated code available, then translate it now */ + tb = tb_gen_code(cpu, pc, cs_base, flags, 0); } + mmap_unlock(); /* We add the TB in the virtual pc hash table for the fast lookup */ atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); } diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index b75394aba8..d0edd944b0 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -27,7 +27,7 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" -#include "exec/tb-hash.h" +#include "exec/tb-lookup.h" #include "disas/disas.h" #include "exec/log.h" @@ -149,24 +149,12 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env) CPUState *cpu = ENV_GET_CPU(env); TranslationBlock *tb; target_ulong cs_base, pc; - uint32_t flags, hash; + uint32_t flags; - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - hash = tb_jmp_cache_hash_func(pc); - tb = atomic_rcu_read(&cpu->tb_jmp_cache[hash]); - - if (unlikely(!(tb - && tb->pc == pc - && tb->cs_base == cs_base - && tb->flags == flags - && tb->trace_vcpu_dstate == *cpu->trace_dstate))) { - tb = tb_htable_lookup(cpu, pc, cs_base, flags); - if (!tb) { - return tcg_ctx.code_gen_epilogue; - } - atomic_set(&cpu->tb_jmp_cache[hash], tb); + tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags); + if (tb == NULL) { + return tcg_ctx.code_gen_epilogue; } - qemu_log_mask_and_addr(CPU_LOG_EXEC, pc, "Chain %p [%d: " TARGET_FMT_lx "] %s\n", tb->tc_ptr, cpu->cpu_index, pc, diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h new file mode 100644 index 0000000000..9d32cb0c6e --- /dev/null +++ b/include/exec/tb-lookup.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2017, Emilio G. Cota + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef EXEC_TB_LOOKUP_H +#define EXEC_TB_LOOKUP_H + +#include "qemu/osdep.h" + +#ifdef NEED_CPU_H +#include "cpu.h" +#else +#include "exec/poison.h" +#endif + +#include "exec/exec-all.h" +#include "exec/tb-hash.h" + +/* Might cause an exception, so have a longjmp destination ready */ +static inline TranslationBlock * +tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, + uint32_t *flags) +{ + CPUArchState *env = (CPUArchState *)cpu->env_ptr; + TranslationBlock *tb; + uint32_t hash; + + cpu_get_tb_cpu_state(env, pc, cs_base, flags); + hash = tb_jmp_cache_hash_func(*pc); + tb = atomic_rcu_read(&cpu->tb_jmp_cache[hash]); + if (likely(tb && + tb->pc == *pc && + tb->cs_base == *cs_base && + tb->flags == *flags && + tb->trace_vcpu_dstate == *cpu->trace_dstate && + !atomic_read(&tb->invalid))) { + return tb; + } + tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags); + if (tb == NULL) { + return NULL; + } + atomic_set(&cpu->tb_jmp_cache[hash], tb); + return tb; +} + +#endif /* EXEC_TB_LOOKUP_H */ From 84f1c148da2b35fbb5a436597872765257e8914e Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Mon, 10 Jul 2017 20:03:50 -0400 Subject: [PATCH 10/20] exec-all: bring tb->invalid into tb->cflags This gets rid of a hole in struct TranslationBlock. Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 4 ++-- accel/tcg/translate-all.c | 3 +-- include/exec/exec-all.h | 3 +-- include/exec/tb-lookup.h | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index f8a1d68db7..9cd809d607 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -294,7 +294,7 @@ static bool tb_cmp(const void *p, const void *d) tb->cs_base == desc->cs_base && tb->flags == desc->flags && tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && - !atomic_read(&tb->invalid)) { + !(atomic_read(&tb->cflags) & CF_INVALID)) { /* check next page if needed */ if (tb->page_addr[1] == -1) { return true; @@ -412,7 +412,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, tb_lock(); acquired_tb_lock = true; } - if (!tb->invalid) { + if (!(tb->cflags & CF_INVALID)) { tb_add_jump(last_tb, tb_exit, tb); } } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index a7c1d4e3f2..ed65d68709 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1073,7 +1073,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) assert_tb_locked(); - atomic_set(&tb->invalid, true); + atomic_set(&tb->cflags, tb->cflags | CF_INVALID); /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); @@ -1269,7 +1269,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->flags = flags; tb->cflags = cflags; tb->trace_vcpu_dstate = *cpu->trace_dstate; - tb->invalid = false; #ifdef CONFIG_PROFILER tcg_ctx.tb_count1++; /* includes aborted translations because of diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index a9a8bb6f83..3135aaf4c9 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -314,12 +314,11 @@ struct TranslationBlock { #define CF_NOCACHE 0x10000 /* To be freed after execution */ #define CF_USE_ICOUNT 0x20000 #define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */ +#define CF_INVALID 0x80000 /* TB is stale. Setters must acquire tb_lock */ /* Per-vCPU dynamic tracing state used to generate this TB */ uint32_t trace_vcpu_dstate; - uint16_t invalid; - void *tc_ptr; /* pointer to the translated code */ uint8_t *tc_search; /* pointer to search data */ /* original tb when cflags has CF_NOCACHE */ diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h index 9d32cb0c6e..436b6d5ecf 100644 --- a/include/exec/tb-lookup.h +++ b/include/exec/tb-lookup.h @@ -35,7 +35,7 @@ tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, tb->cs_base == *cs_base && tb->flags == *flags && tb->trace_vcpu_dstate == *cpu->trace_dstate && - !atomic_read(&tb->invalid))) { + !(atomic_read(&tb->cflags) & CF_INVALID))) { return tb; } tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags); From 424079c13b692cfcd08866bc9ffec77b887fed4e Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 12 Jul 2017 15:01:07 -0400 Subject: [PATCH 11/20] translate-all: define and use DEBUG_TB_FLUSH_GATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gets rid of some ifdef checks while ensuring that the debug code is compiled, which prevents bit rot. Suggested-by: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index ed65d68709..799b027e79 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -65,6 +65,12 @@ /* make various TB consistency checks */ /* #define DEBUG_TB_CHECK */ +#ifdef DEBUG_TB_FLUSH +#define DEBUG_TB_FLUSH_GATE 1 +#else +#define DEBUG_TB_FLUSH_GATE 0 +#endif + #if !defined(CONFIG_USER_ONLY) /* TB consistency checks only implemented for usermode emulation. */ #undef DEBUG_TB_CHECK @@ -899,13 +905,13 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) goto done; } -#if defined(DEBUG_TB_FLUSH) - printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", - (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer), - tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? - ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) / - tcg_ctx.tb_ctx.nb_tbs : 0); -#endif + if (DEBUG_TB_FLUSH_GATE) { + printf("qemu: flush code_size=%td nb_tbs=%d avg_tb_size=%td\n", + tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer, + tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? + (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) / + tcg_ctx.tb_ctx.nb_tbs : 0); + } if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) > tcg_ctx.code_gen_buffer_size) { cpu_abort(cpu, "Internal error: code buffer overflow\n"); From 67a5b5d2f6eb6d3b980570223ba5c478487ddb6f Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Thu, 13 Jul 2017 17:18:15 -0400 Subject: [PATCH 12/20] exec-all: introduce TB_PAGE_ADDR_FMT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And fix the following warning when DEBUG_TB_INVALIDATE is enabled in translate-all.c: CC mipsn32-linux-user/accel/tcg/translate-all.o /data/src/qemu/accel/tcg/translate-all.c: In function ‘tb_alloc_page’: /data/src/qemu/accel/tcg/translate-all.c:1201:16: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 2 has type ‘tb_page_addr_t {aka unsigned int}’ [-Werror=format=] printf("protecting code page: 0x" TARGET_FMT_lx "\n", ^ cc1: all warnings being treated as errors /data/src/qemu/rules.mak:66: recipe for target 'accel/tcg/translate-all.o' failed make[1]: *** [accel/tcg/translate-all.o] Error 1 Makefile:328: recipe for target 'subdir-mipsn32-linux-user' failed make: *** [subdir-mipsn32-linux-user] Error 2 cota@flamenco:/data/src/qemu/build ((18f3fe1...) *$)$ Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 3 +-- include/exec/exec-all.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 799b027e79..90b3eed9c6 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1193,8 +1193,7 @@ static inline void tb_alloc_page(TranslationBlock *tb, mprotect(g2h(page_addr), qemu_host_page_size, (prot & PAGE_BITS) & ~PAGE_WRITE); #ifdef DEBUG_TB_INVALIDATE - printf("protecting code page: 0x" TARGET_FMT_lx "\n", - page_addr); + printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr); #endif } #else diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 3135aaf4c9..79f8041811 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -31,8 +31,10 @@ type. */ #if defined(CONFIG_USER_ONLY) typedef abi_ulong tb_page_addr_t; +#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx #else typedef ram_addr_t tb_page_addr_t; +#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT #endif #include "qemu/log.h" From dae9e03aed8e652f5dce2e5cab05dff83aa193b8 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 12 Jul 2017 15:04:02 -0400 Subject: [PATCH 13/20] translate-all: define and use DEBUG_TB_INVALIDATE_GATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gets rid of an ifdef check while ensuring that the debug code is compiled, which prevents bit rot. Suggested-by: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 90b3eed9c6..6b853b329c 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -65,6 +65,12 @@ /* make various TB consistency checks */ /* #define DEBUG_TB_CHECK */ +#ifdef DEBUG_TB_INVALIDATE +#define DEBUG_TB_INVALIDATE_GATE 1 +#else +#define DEBUG_TB_INVALIDATE_GATE 0 +#endif + #ifdef DEBUG_TB_FLUSH #define DEBUG_TB_FLUSH_GATE 1 #else @@ -1192,9 +1198,9 @@ static inline void tb_alloc_page(TranslationBlock *tb, } mprotect(g2h(page_addr), qemu_host_page_size, (prot & PAGE_BITS) & ~PAGE_WRITE); -#ifdef DEBUG_TB_INVALIDATE - printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr); -#endif + if (DEBUG_TB_INVALIDATE_GATE) { + printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr); + } } #else /* if some code is already present, then the pages are already From 6eb062abd66611333056633899d3f09c2e795f4c Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 12 Jul 2017 15:31:57 -0400 Subject: [PATCH 14/20] translate-all: define and use DEBUG_TB_CHECK_GATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevents bit rot by ensuring the debug code is compiled when building a user-mode target. Unfortunately the helpers are user-mode-only so we cannot fully get rid of the ifdef checks. Add a comment to explain this. Suggested-by: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 6b853b329c..26efad302d 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -82,6 +82,12 @@ #undef DEBUG_TB_CHECK #endif +#ifdef DEBUG_TB_CHECK +#define DEBUG_TB_CHECK_GATE 1 +#else +#define DEBUG_TB_CHECK_GATE 0 +#endif + /* Access to the various translations structures need to be serialised via locks * for consistency. This is automatic for SoftMMU based system * emulation due to its single threaded nature. In user-mode emulation @@ -950,7 +956,13 @@ void tb_flush(CPUState *cpu) } } -#ifdef DEBUG_TB_CHECK +/* + * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only, + * so in order to prevent bit rot we compile them unconditionally in user-mode, + * and let the optimizer get rid of them by wrapping their user-only callers + * with if (DEBUG_TB_CHECK_GATE). + */ +#ifdef CONFIG_USER_ONLY static void do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp) @@ -994,7 +1006,7 @@ static void tb_page_check(void) qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_page_check, NULL); } -#endif +#endif /* CONFIG_USER_ONLY */ static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) { @@ -1236,8 +1248,10 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate); qht_insert(&tcg_ctx.tb_ctx.htable, tb, h); -#ifdef DEBUG_TB_CHECK - tb_page_check(); +#ifdef CONFIG_USER_ONLY + if (DEBUG_TB_CHECK_GATE) { + tb_page_check(); + } #endif } @@ -2223,8 +2237,10 @@ int page_unprotect(target_ulong address, uintptr_t pc) /* and since the content will be modified, we must invalidate the corresponding translated code. */ current_tb_invalidated |= tb_invalidate_phys_page(addr, pc); -#ifdef DEBUG_TB_CHECK - tb_invalidate_check(addr); +#ifdef CONFIG_USER_ONLY + if (DEBUG_TB_CHECK_GATE) { + tb_invalidate_check(addr); + } #endif } mprotect((void *)g2h(host_start), qemu_host_page_size, From e7e168f41364c6e83d0f75fc1b3ce7f9c41ccf76 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 12 Jul 2017 00:08:21 -0400 Subject: [PATCH 15/20] exec-all: extract tb->tc_* into a separate struct tc_tb In preparation for adding tc.size to be able to keep track of TB's using the binary search tree implementation from glib. Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 14 +++++++------- accel/tcg/tcg-runtime.c | 4 ++-- accel/tcg/translate-all.c | 24 ++++++++++++------------ include/exec/exec-all.h | 12 ++++++++++-- tcg/tcg.c | 4 ++-- 5 files changed, 33 insertions(+), 25 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 9cd809d607..363dfa208a 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -143,11 +143,11 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) uintptr_t ret; TranslationBlock *last_tb; int tb_exit; - uint8_t *tb_ptr = itb->tc_ptr; + uint8_t *tb_ptr = itb->tc.ptr; qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc, "Trace %p [%d: " TARGET_FMT_lx "] %s\n", - itb->tc_ptr, cpu->cpu_index, itb->pc, + itb->tc.ptr, cpu->cpu_index, itb->pc, lookup_symbol(itb->pc)); #if defined(DEBUG_DISAS) @@ -179,7 +179,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc, "Stopped execution of TB chain before %p [" TARGET_FMT_lx "] %s\n", - last_tb->tc_ptr, last_tb->pc, + last_tb->tc.ptr, last_tb->pc, lookup_symbol(last_tb->pc)); if (cc->synchronize_from_tb) { cc->synchronize_from_tb(cpu, last_tb); @@ -334,7 +334,7 @@ void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { if (TCG_TARGET_HAS_direct_jump) { uintptr_t offset = tb->jmp_target_arg[n]; - uintptr_t tc_ptr = (uintptr_t)tb->tc_ptr; + uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr; tb_target_set_jmp_target(tc_ptr, tc_ptr + offset, addr); } else { tb->jmp_target_arg[n] = addr; @@ -354,11 +354,11 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, "Linking TBs %p [" TARGET_FMT_lx "] index %d -> %p [" TARGET_FMT_lx "]\n", - tb->tc_ptr, tb->pc, n, - tb_next->tc_ptr, tb_next->pc); + tb->tc.ptr, tb->pc, n, + tb_next->tc.ptr, tb_next->pc); /* patch the native jump address */ - tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc_ptr); + tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr); /* add in TB jmp circular list */ tb->jmp_list_next[n] = tb_next->jmp_list_first; diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index d0edd944b0..54d89100d9 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -157,9 +157,9 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env) } qemu_log_mask_and_addr(CPU_LOG_EXEC, pc, "Chain %p [%d: " TARGET_FMT_lx "] %s\n", - tb->tc_ptr, cpu->cpu_index, pc, + tb->tc.ptr, cpu->cpu_index, pc, lookup_symbol(pc)); - return tb->tc_ptr; + return tb->tc.ptr; } void HELPER(exit_atomic)(CPUArchState *env) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 26efad302d..c5ce99d549 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -260,7 +260,7 @@ static target_long decode_sleb128(uint8_t **pp) which comes from the host pc of the end of the code implementing the insn. Each line of the table is encoded as sleb128 deltas from the previous - line. The seed for the first line is { tb->pc, 0..., tb->tc_ptr }. + line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. That is, the first column is seeded with the guest pc, the last column with the host pc, and the middle columns with zeros. */ @@ -270,7 +270,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) uint8_t *p = block; int i, j, n; - tb->tc_search = block; + tb->tc.search = block; for (i = 0, n = tb->icount; i < n; ++i) { target_ulong prev; @@ -305,9 +305,9 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc) { target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc }; - uintptr_t host_pc = (uintptr_t)tb->tc_ptr; + uintptr_t host_pc = (uintptr_t)tb->tc.ptr; CPUArchState *env = cpu->env_ptr; - uint8_t *p = tb->tc_search; + uint8_t *p = tb->tc.search; int i, j, num_insns = tb->icount; #ifdef CONFIG_PROFILER int64_t ti = profile_getclock(); @@ -858,7 +858,7 @@ void tb_free(TranslationBlock *tb) tb == tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) { size_t struct_size = ROUND_UP(sizeof(*tb), qemu_icache_linesize); - tcg_ctx.code_gen_ptr = tb->tc_ptr - struct_size; + tcg_ctx.code_gen_ptr = tb->tc.ptr - struct_size; tcg_ctx.tb_ctx.nb_tbs--; } } @@ -1059,7 +1059,7 @@ static inline void tb_remove_from_jmp_list(TranslationBlock *tb, int n) another TB */ static inline void tb_reset_jump(TranslationBlock *tb, int n) { - uintptr_t addr = (uintptr_t)(tb->tc_ptr + tb->jmp_reset_offset[n]); + uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]); tb_set_jmp_target(tb, n, addr); } @@ -1288,7 +1288,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } gen_code_buf = tcg_ctx.code_gen_ptr; - tb->tc_ptr = gen_code_buf; + tb->tc.ptr = gen_code_buf; tb->pc = pc; tb->cs_base = cs_base; tb->flags = flags; @@ -1307,7 +1307,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, gen_intermediate_code(cpu, tb); tcg_ctx.cpu = NULL; - trace_translate_block(tb, tb->pc, tb->tc_ptr); + trace_translate_block(tb, tb->pc, tb->tc.ptr); /* generate machine code */ tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; @@ -1354,11 +1354,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, qemu_log_lock(); qemu_log("OUT: [size=%d]\n", gen_code_size); if (tcg_ctx.data_gen_ptr) { - size_t code_size = tcg_ctx.data_gen_ptr - tb->tc_ptr; + size_t code_size = tcg_ctx.data_gen_ptr - tb->tc.ptr; size_t data_size = gen_code_size - code_size; size_t i; - log_disas(tb->tc_ptr, code_size); + log_disas(tb->tc.ptr, code_size); for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { if (sizeof(tcg_target_ulong) == 8) { @@ -1372,7 +1372,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } } } else { - log_disas(tb->tc_ptr, gen_code_size); + log_disas(tb->tc.ptr, gen_code_size); } qemu_log("\n"); qemu_log_flush(); @@ -1699,7 +1699,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) while (m_min <= m_max) { m = (m_min + m_max) >> 1; tb = tcg_ctx.tb_ctx.tbs[m]; - v = (uintptr_t)tb->tc_ptr; + v = (uintptr_t)tb->tc.ptr; if (v == tc_ptr) { return tb; } else if (tc_ptr < v) { diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 79f8041811..53f1835c43 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -303,6 +303,14 @@ static inline void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) #define CODE_GEN_AVG_BLOCK_SIZE 150 #endif +/* + * Translation Cache-related fields of a TB. + */ +struct tb_tc { + void *ptr; /* pointer to the translated code */ + uint8_t *search; /* pointer to search data */ +}; + struct TranslationBlock { target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ target_ulong cs_base; /* CS base for this block */ @@ -321,8 +329,8 @@ struct TranslationBlock { /* Per-vCPU dynamic tracing state used to generate this TB */ uint32_t trace_vcpu_dstate; - void *tc_ptr; /* pointer to the translated code */ - uint8_t *tc_search; /* pointer to search data */ + struct tb_tc tc; + /* original tb when cflags has CF_NOCACHE */ struct TranslationBlock *orig_tb; /* first and second physical page containing code. The lower bit diff --git a/tcg/tcg.c b/tcg/tcg.c index dff9999bc6..a874bdd41f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2836,8 +2836,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) tcg_reg_alloc_start(s); - s->code_buf = tb->tc_ptr; - s->code_ptr = tb->tc_ptr; + s->code_buf = tb->tc.ptr; + s->code_ptr = tb->tc.ptr; #ifdef TCG_TARGET_NEED_LDST_LABELS s->ldst_labels = NULL; From 5e75150cdf573d9aa21fc9e8552aa2cc6a48bcb1 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Thu, 13 Jul 2017 17:10:31 -0400 Subject: [PATCH 16/20] tci: move tci_regs to tcg_qemu_tb_exec's stack Groundwork for supporting multiple TCG contexts. Compile-tested for all targets on an x86_64 host. Suggested-by: Richard Henderson Acked-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/tci.c | 552 +++++++++++++++++++++++++++--------------------------- 1 file changed, 279 insertions(+), 273 deletions(-) diff --git a/tcg/tci.c b/tcg/tci.c index f39bfb95c0..63f2cd54ab 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -55,93 +55,95 @@ typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, tcg_target_ulong); #endif -static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS]; - -static tcg_target_ulong tci_read_reg(TCGReg index) +static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index) { - tci_assert(index < ARRAY_SIZE(tci_reg)); - return tci_reg[index]; + tci_assert(index < TCG_TARGET_NB_REGS); + return regs[index]; } #if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 -static int8_t tci_read_reg8s(TCGReg index) +static int8_t tci_read_reg8s(const tcg_target_ulong *regs, TCGReg index) { - return (int8_t)tci_read_reg(index); + return (int8_t)tci_read_reg(regs, index); } #endif #if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 -static int16_t tci_read_reg16s(TCGReg index) +static int16_t tci_read_reg16s(const tcg_target_ulong *regs, TCGReg index) { - return (int16_t)tci_read_reg(index); + return (int16_t)tci_read_reg(regs, index); } #endif #if TCG_TARGET_REG_BITS == 64 -static int32_t tci_read_reg32s(TCGReg index) +static int32_t tci_read_reg32s(const tcg_target_ulong *regs, TCGReg index) { - return (int32_t)tci_read_reg(index); + return (int32_t)tci_read_reg(regs, index); } #endif -static uint8_t tci_read_reg8(TCGReg index) +static uint8_t tci_read_reg8(const tcg_target_ulong *regs, TCGReg index) { - return (uint8_t)tci_read_reg(index); + return (uint8_t)tci_read_reg(regs, index); } -static uint16_t tci_read_reg16(TCGReg index) +static uint16_t tci_read_reg16(const tcg_target_ulong *regs, TCGReg index) { - return (uint16_t)tci_read_reg(index); + return (uint16_t)tci_read_reg(regs, index); } -static uint32_t tci_read_reg32(TCGReg index) +static uint32_t tci_read_reg32(const tcg_target_ulong *regs, TCGReg index) { - return (uint32_t)tci_read_reg(index); + return (uint32_t)tci_read_reg(regs, index); } #if TCG_TARGET_REG_BITS == 64 -static uint64_t tci_read_reg64(TCGReg index) +static uint64_t tci_read_reg64(const tcg_target_ulong *regs, TCGReg index) { - return tci_read_reg(index); + return tci_read_reg(regs, index); } #endif -static void tci_write_reg(TCGReg index, tcg_target_ulong value) +static void +tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value) { - tci_assert(index < ARRAY_SIZE(tci_reg)); + tci_assert(index < TCG_TARGET_NB_REGS); tci_assert(index != TCG_AREG0); tci_assert(index != TCG_REG_CALL_STACK); - tci_reg[index] = value; + regs[index] = value; } #if TCG_TARGET_REG_BITS == 64 -static void tci_write_reg32s(TCGReg index, int32_t value) +static void +tci_write_reg32s(tcg_target_ulong *regs, TCGReg index, int32_t value) { - tci_write_reg(index, value); + tci_write_reg(regs, index, value); } #endif -static void tci_write_reg8(TCGReg index, uint8_t value) +static void tci_write_reg8(tcg_target_ulong *regs, TCGReg index, uint8_t value) { - tci_write_reg(index, value); + tci_write_reg(regs, index, value); } -static void tci_write_reg32(TCGReg index, uint32_t value) +static void +tci_write_reg32(tcg_target_ulong *regs, TCGReg index, uint32_t value) { - tci_write_reg(index, value); + tci_write_reg(regs, index, value); } #if TCG_TARGET_REG_BITS == 32 -static void tci_write_reg64(uint32_t high_index, uint32_t low_index, - uint64_t value) +static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index, + uint32_t low_index, uint64_t value) { - tci_write_reg(low_index, value); - tci_write_reg(high_index, value >> 32); + tci_write_reg(regs, low_index, value); + tci_write_reg(regs, high_index, value >> 32); } #elif TCG_TARGET_REG_BITS == 64 -static void tci_write_reg64(TCGReg index, uint64_t value) +static void +tci_write_reg64(tcg_target_ulong *regs, TCGReg index, uint64_t value) { - tci_write_reg(index, value); + tci_write_reg(regs, index, value); } #endif @@ -188,94 +190,97 @@ static uint64_t tci_read_i64(uint8_t **tb_ptr) #endif /* Read indexed register (native size) from bytecode. */ -static tcg_target_ulong tci_read_r(uint8_t **tb_ptr) +static tcg_target_ulong +tci_read_r(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - tcg_target_ulong value = tci_read_reg(**tb_ptr); + tcg_target_ulong value = tci_read_reg(regs, **tb_ptr); *tb_ptr += 1; return value; } /* Read indexed register (8 bit) from bytecode. */ -static uint8_t tci_read_r8(uint8_t **tb_ptr) +static uint8_t tci_read_r8(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - uint8_t value = tci_read_reg8(**tb_ptr); + uint8_t value = tci_read_reg8(regs, **tb_ptr); *tb_ptr += 1; return value; } #if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 /* Read indexed register (8 bit signed) from bytecode. */ -static int8_t tci_read_r8s(uint8_t **tb_ptr) +static int8_t tci_read_r8s(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - int8_t value = tci_read_reg8s(**tb_ptr); + int8_t value = tci_read_reg8s(regs, **tb_ptr); *tb_ptr += 1; return value; } #endif /* Read indexed register (16 bit) from bytecode. */ -static uint16_t tci_read_r16(uint8_t **tb_ptr) +static uint16_t tci_read_r16(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - uint16_t value = tci_read_reg16(**tb_ptr); + uint16_t value = tci_read_reg16(regs, **tb_ptr); *tb_ptr += 1; return value; } #if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 /* Read indexed register (16 bit signed) from bytecode. */ -static int16_t tci_read_r16s(uint8_t **tb_ptr) +static int16_t tci_read_r16s(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - int16_t value = tci_read_reg16s(**tb_ptr); + int16_t value = tci_read_reg16s(regs, **tb_ptr); *tb_ptr += 1; return value; } #endif /* Read indexed register (32 bit) from bytecode. */ -static uint32_t tci_read_r32(uint8_t **tb_ptr) +static uint32_t tci_read_r32(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - uint32_t value = tci_read_reg32(**tb_ptr); + uint32_t value = tci_read_reg32(regs, **tb_ptr); *tb_ptr += 1; return value; } #if TCG_TARGET_REG_BITS == 32 /* Read two indexed registers (2 * 32 bit) from bytecode. */ -static uint64_t tci_read_r64(uint8_t **tb_ptr) +static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - uint32_t low = tci_read_r32(tb_ptr); - return tci_uint64(tci_read_r32(tb_ptr), low); + uint32_t low = tci_read_r32(regs, tb_ptr); + return tci_uint64(tci_read_r32(regs, tb_ptr), low); } #elif TCG_TARGET_REG_BITS == 64 /* Read indexed register (32 bit signed) from bytecode. */ -static int32_t tci_read_r32s(uint8_t **tb_ptr) +static int32_t tci_read_r32s(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - int32_t value = tci_read_reg32s(**tb_ptr); + int32_t value = tci_read_reg32s(regs, **tb_ptr); *tb_ptr += 1; return value; } /* Read indexed register (64 bit) from bytecode. */ -static uint64_t tci_read_r64(uint8_t **tb_ptr) +static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - uint64_t value = tci_read_reg64(**tb_ptr); + uint64_t value = tci_read_reg64(regs, **tb_ptr); *tb_ptr += 1; return value; } #endif /* Read indexed register(s) with target address from bytecode. */ -static target_ulong tci_read_ulong(uint8_t **tb_ptr) +static target_ulong +tci_read_ulong(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - target_ulong taddr = tci_read_r(tb_ptr); + target_ulong taddr = tci_read_r(regs, tb_ptr); #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS - taddr += (uint64_t)tci_read_r(tb_ptr) << 32; + taddr += (uint64_t)tci_read_r(regs, tb_ptr) << 32; #endif return taddr; } /* Read indexed register or constant (native size) from bytecode. */ -static tcg_target_ulong tci_read_ri(uint8_t **tb_ptr) +static tcg_target_ulong +tci_read_ri(const tcg_target_ulong *regs, uint8_t **tb_ptr) { tcg_target_ulong value; TCGReg r = **tb_ptr; @@ -283,13 +288,13 @@ static tcg_target_ulong tci_read_ri(uint8_t **tb_ptr) if (r == TCG_CONST) { value = tci_read_i(tb_ptr); } else { - value = tci_read_reg(r); + value = tci_read_reg(regs, r); } return value; } /* Read indexed register or constant (32 bit) from bytecode. */ -static uint32_t tci_read_ri32(uint8_t **tb_ptr) +static uint32_t tci_read_ri32(const tcg_target_ulong *regs, uint8_t **tb_ptr) { uint32_t value; TCGReg r = **tb_ptr; @@ -297,21 +302,21 @@ static uint32_t tci_read_ri32(uint8_t **tb_ptr) if (r == TCG_CONST) { value = tci_read_i32(tb_ptr); } else { - value = tci_read_reg32(r); + value = tci_read_reg32(regs, r); } return value; } #if TCG_TARGET_REG_BITS == 32 /* Read two indexed registers or constants (2 * 32 bit) from bytecode. */ -static uint64_t tci_read_ri64(uint8_t **tb_ptr) +static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr) { - uint32_t low = tci_read_ri32(tb_ptr); - return tci_uint64(tci_read_ri32(tb_ptr), low); + uint32_t low = tci_read_ri32(regs, tb_ptr); + return tci_uint64(tci_read_ri32(regs, tb_ptr), low); } #elif TCG_TARGET_REG_BITS == 64 /* Read indexed register or constant (64 bit) from bytecode. */ -static uint64_t tci_read_ri64(uint8_t **tb_ptr) +static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr) { uint64_t value; TCGReg r = **tb_ptr; @@ -319,7 +324,7 @@ static uint64_t tci_read_ri64(uint8_t **tb_ptr) if (r == TCG_CONST) { value = tci_read_i64(tb_ptr); } else { - value = tci_read_reg64(r); + value = tci_read_reg64(regs, r); } return value; } @@ -465,12 +470,13 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) /* Interpret pseudo code in tb. */ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) { + tcg_target_ulong regs[TCG_TARGET_NB_REGS]; long tcg_temps[CPU_TEMP_BUF_NLONGS]; uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS); uintptr_t ret = 0; - tci_reg[TCG_AREG0] = (tcg_target_ulong)env; - tci_reg[TCG_REG_CALL_STACK] = sp_value; + regs[TCG_AREG0] = (tcg_target_ulong)env; + regs[TCG_REG_CALL_STACK] = sp_value; tci_assert(tb_ptr); for (;;) { @@ -503,27 +509,27 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) switch (opc) { case INDEX_op_call: - t0 = tci_read_ri(&tb_ptr); + t0 = tci_read_ri(regs, &tb_ptr); #if TCG_TARGET_REG_BITS == 32 - tmp64 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0), - tci_read_reg(TCG_REG_R1), - tci_read_reg(TCG_REG_R2), - tci_read_reg(TCG_REG_R3), - tci_read_reg(TCG_REG_R5), - tci_read_reg(TCG_REG_R6), - tci_read_reg(TCG_REG_R7), - tci_read_reg(TCG_REG_R8), - tci_read_reg(TCG_REG_R9), - tci_read_reg(TCG_REG_R10)); - tci_write_reg(TCG_REG_R0, tmp64); - tci_write_reg(TCG_REG_R1, tmp64 >> 32); + tmp64 = ((helper_function)t0)(tci_read_reg(regs, TCG_REG_R0), + tci_read_reg(regs, TCG_REG_R1), + tci_read_reg(regs, TCG_REG_R2), + tci_read_reg(regs, TCG_REG_R3), + tci_read_reg(regs, TCG_REG_R5), + tci_read_reg(regs, TCG_REG_R6), + tci_read_reg(regs, TCG_REG_R7), + tci_read_reg(regs, TCG_REG_R8), + tci_read_reg(regs, TCG_REG_R9), + tci_read_reg(regs, TCG_REG_R10)); + tci_write_reg(regs, TCG_REG_R0, tmp64); + tci_write_reg(regs, TCG_REG_R1, tmp64 >> 32); #else - tmp64 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0), - tci_read_reg(TCG_REG_R1), - tci_read_reg(TCG_REG_R2), - tci_read_reg(TCG_REG_R3), - tci_read_reg(TCG_REG_R5)); - tci_write_reg(TCG_REG_R0, tmp64); + tmp64 = ((helper_function)t0)(tci_read_reg(regs, TCG_REG_R0), + tci_read_reg(regs, TCG_REG_R1), + tci_read_reg(regs, TCG_REG_R2), + tci_read_reg(regs, TCG_REG_R3), + tci_read_reg(regs, TCG_REG_R5)); + tci_write_reg(regs, TCG_REG_R0, tmp64); #endif break; case INDEX_op_br: @@ -533,46 +539,46 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) continue; case INDEX_op_setcond_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); + t1 = tci_read_r32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); condition = *tb_ptr++; - tci_write_reg32(t0, tci_compare32(t1, t2, condition)); + tci_write_reg32(regs, t0, tci_compare32(t1, t2, condition)); break; #if TCG_TARGET_REG_BITS == 32 case INDEX_op_setcond2_i32: t0 = *tb_ptr++; - tmp64 = tci_read_r64(&tb_ptr); - v64 = tci_read_ri64(&tb_ptr); + tmp64 = tci_read_r64(regs, &tb_ptr); + v64 = tci_read_ri64(regs, &tb_ptr); condition = *tb_ptr++; - tci_write_reg32(t0, tci_compare64(tmp64, v64, condition)); + tci_write_reg32(regs, t0, tci_compare64(tmp64, v64, condition)); break; #elif TCG_TARGET_REG_BITS == 64 case INDEX_op_setcond_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); + t1 = tci_read_r64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); condition = *tb_ptr++; - tci_write_reg64(t0, tci_compare64(t1, t2, condition)); + tci_write_reg64(regs, t0, tci_compare64(t1, t2, condition)); break; #endif case INDEX_op_mov_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(&tb_ptr); - tci_write_reg32(t0, t1); + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); break; case INDEX_op_movi_i32: t0 = *tb_ptr++; t1 = tci_read_i32(&tb_ptr); - tci_write_reg32(t0, t1); + tci_write_reg32(regs, t0, t1); break; /* Load/store operations (32 bit). */ case INDEX_op_ld8u_i32: t0 = *tb_ptr++; - t1 = tci_read_r(&tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); - tci_write_reg8(t0, *(uint8_t *)(t1 + t2)); + tci_write_reg8(regs, t0, *(uint8_t *)(t1 + t2)); break; case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: @@ -583,25 +589,25 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) break; case INDEX_op_ld_i32: t0 = *tb_ptr++; - t1 = tci_read_r(&tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); - tci_write_reg32(t0, *(uint32_t *)(t1 + t2)); + tci_write_reg32(regs, t0, *(uint32_t *)(t1 + t2)); break; case INDEX_op_st8_i32: - t0 = tci_read_r8(&tb_ptr); - t1 = tci_read_r(&tb_ptr); + t0 = tci_read_r8(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint8_t *)(t1 + t2) = t0; break; case INDEX_op_st16_i32: - t0 = tci_read_r16(&tb_ptr); - t1 = tci_read_r(&tb_ptr); + t0 = tci_read_r16(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint16_t *)(t1 + t2) = t0; break; case INDEX_op_st_i32: - t0 = tci_read_r32(&tb_ptr); - t1 = tci_read_r(&tb_ptr); + t0 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); tci_assert(t1 != sp_value || (int32_t)t2 < 0); *(uint32_t *)(t1 + t2) = t0; @@ -611,46 +617,46 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) case INDEX_op_add_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 + t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 + t2); break; case INDEX_op_sub_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 - t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 - t2); break; case INDEX_op_mul_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 * t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 * t2); break; #if TCG_TARGET_HAS_div_i32 case INDEX_op_div_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, (int32_t)t1 / (int32_t)t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, (int32_t)t1 / (int32_t)t2); break; case INDEX_op_divu_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 / t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 / t2); break; case INDEX_op_rem_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, (int32_t)t1 % (int32_t)t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, (int32_t)t1 % (int32_t)t2); break; case INDEX_op_remu_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 % t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 % t2); break; #elif TCG_TARGET_HAS_div2_i32 case INDEX_op_div2_i32: @@ -660,71 +666,71 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) #endif case INDEX_op_and_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 & t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 & t2); break; case INDEX_op_or_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 | t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 | t2); break; case INDEX_op_xor_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 ^ t2); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 ^ t2); break; /* Shift/rotate operations (32 bit). */ case INDEX_op_shl_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 << (t2 & 31)); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 << (t2 & 31)); break; case INDEX_op_shr_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, t1 >> (t2 & 31)); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1 >> (t2 & 31)); break; case INDEX_op_sar_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, ((int32_t)t1 >> (t2 & 31))); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, ((int32_t)t1 >> (t2 & 31))); break; #if TCG_TARGET_HAS_rot_i32 case INDEX_op_rotl_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, rol32(t1, t2 & 31)); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, rol32(t1, t2 & 31)); break; case INDEX_op_rotr_i32: t0 = *tb_ptr++; - t1 = tci_read_ri32(&tb_ptr); - t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, ror32(t1, t2 & 31)); + t1 = tci_read_ri32(regs, &tb_ptr); + t2 = tci_read_ri32(regs, &tb_ptr); + tci_write_reg32(regs, t0, ror32(t1, t2 & 31)); break; #endif #if TCG_TARGET_HAS_deposit_i32 case INDEX_op_deposit_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(&tb_ptr); - t2 = tci_read_r32(&tb_ptr); + t1 = tci_read_r32(regs, &tb_ptr); + t2 = tci_read_r32(regs, &tb_ptr); tmp16 = *tb_ptr++; tmp8 = *tb_ptr++; tmp32 = (((1 << tmp8) - 1) << tmp16); - tci_write_reg32(t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32)); + tci_write_reg32(regs, t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32)); break; #endif case INDEX_op_brcond_i32: - t0 = tci_read_r32(&tb_ptr); - t1 = tci_read_ri32(&tb_ptr); + t0 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_ri32(regs, &tb_ptr); condition = *tb_ptr++; label = tci_read_label(&tb_ptr); if (tci_compare32(t0, t1, condition)) { @@ -737,20 +743,20 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) case INDEX_op_add2_i32: t0 = *tb_ptr++; t1 = *tb_ptr++; - tmp64 = tci_read_r64(&tb_ptr); - tmp64 += tci_read_r64(&tb_ptr); - tci_write_reg64(t1, t0, tmp64); + tmp64 = tci_read_r64(regs, &tb_ptr); + tmp64 += tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t1, t0, tmp64); break; case INDEX_op_sub2_i32: t0 = *tb_ptr++; t1 = *tb_ptr++; - tmp64 = tci_read_r64(&tb_ptr); - tmp64 -= tci_read_r64(&tb_ptr); - tci_write_reg64(t1, t0, tmp64); + tmp64 = tci_read_r64(regs, &tb_ptr); + tmp64 -= tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t1, t0, tmp64); break; case INDEX_op_brcond2_i32: - tmp64 = tci_read_r64(&tb_ptr); - v64 = tci_read_ri64(&tb_ptr); + tmp64 = tci_read_r64(regs, &tb_ptr); + v64 = tci_read_ri64(regs, &tb_ptr); condition = *tb_ptr++; label = tci_read_label(&tb_ptr); if (tci_compare64(tmp64, v64, condition)) { @@ -762,86 +768,86 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) case INDEX_op_mulu2_i32: t0 = *tb_ptr++; t1 = *tb_ptr++; - t2 = tci_read_r32(&tb_ptr); - tmp64 = tci_read_r32(&tb_ptr); - tci_write_reg64(t1, t0, t2 * tmp64); + t2 = tci_read_r32(regs, &tb_ptr); + tmp64 = tci_read_r32(regs, &tb_ptr); + tci_write_reg64(regs, t1, t0, t2 * tmp64); break; #endif /* TCG_TARGET_REG_BITS == 32 */ #if TCG_TARGET_HAS_ext8s_i32 case INDEX_op_ext8s_i32: t0 = *tb_ptr++; - t1 = tci_read_r8s(&tb_ptr); - tci_write_reg32(t0, t1); + t1 = tci_read_r8s(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); break; #endif #if TCG_TARGET_HAS_ext16s_i32 case INDEX_op_ext16s_i32: t0 = *tb_ptr++; - t1 = tci_read_r16s(&tb_ptr); - tci_write_reg32(t0, t1); + t1 = tci_read_r16s(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); break; #endif #if TCG_TARGET_HAS_ext8u_i32 case INDEX_op_ext8u_i32: t0 = *tb_ptr++; - t1 = tci_read_r8(&tb_ptr); - tci_write_reg32(t0, t1); + t1 = tci_read_r8(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); break; #endif #if TCG_TARGET_HAS_ext16u_i32 case INDEX_op_ext16u_i32: t0 = *tb_ptr++; - t1 = tci_read_r16(&tb_ptr); - tci_write_reg32(t0, t1); + t1 = tci_read_r16(regs, &tb_ptr); + tci_write_reg32(regs, t0, t1); break; #endif #if TCG_TARGET_HAS_bswap16_i32 case INDEX_op_bswap16_i32: t0 = *tb_ptr++; - t1 = tci_read_r16(&tb_ptr); - tci_write_reg32(t0, bswap16(t1)); + t1 = tci_read_r16(regs, &tb_ptr); + tci_write_reg32(regs, t0, bswap16(t1)); break; #endif #if TCG_TARGET_HAS_bswap32_i32 case INDEX_op_bswap32_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(&tb_ptr); - tci_write_reg32(t0, bswap32(t1)); + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg32(regs, t0, bswap32(t1)); break; #endif #if TCG_TARGET_HAS_not_i32 case INDEX_op_not_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(&tb_ptr); - tci_write_reg32(t0, ~t1); + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg32(regs, t0, ~t1); break; #endif #if TCG_TARGET_HAS_neg_i32 case INDEX_op_neg_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(&tb_ptr); - tci_write_reg32(t0, -t1); + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg32(regs, t0, -t1); break; #endif #if TCG_TARGET_REG_BITS == 64 case INDEX_op_mov_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(&tb_ptr); - tci_write_reg64(t0, t1); + t1 = tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); break; case INDEX_op_movi_i64: t0 = *tb_ptr++; t1 = tci_read_i64(&tb_ptr); - tci_write_reg64(t0, t1); + tci_write_reg64(regs, t0, t1); break; /* Load/store operations (64 bit). */ case INDEX_op_ld8u_i64: t0 = *tb_ptr++; - t1 = tci_read_r(&tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); - tci_write_reg8(t0, *(uint8_t *)(t1 + t2)); + tci_write_reg8(regs, t0, *(uint8_t *)(t1 + t2)); break; case INDEX_op_ld8s_i64: case INDEX_op_ld16u_i64: @@ -850,43 +856,43 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) break; case INDEX_op_ld32u_i64: t0 = *tb_ptr++; - t1 = tci_read_r(&tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); - tci_write_reg32(t0, *(uint32_t *)(t1 + t2)); + tci_write_reg32(regs, t0, *(uint32_t *)(t1 + t2)); break; case INDEX_op_ld32s_i64: t0 = *tb_ptr++; - t1 = tci_read_r(&tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); - tci_write_reg32s(t0, *(int32_t *)(t1 + t2)); + tci_write_reg32s(regs, t0, *(int32_t *)(t1 + t2)); break; case INDEX_op_ld_i64: t0 = *tb_ptr++; - t1 = tci_read_r(&tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); - tci_write_reg64(t0, *(uint64_t *)(t1 + t2)); + tci_write_reg64(regs, t0, *(uint64_t *)(t1 + t2)); break; case INDEX_op_st8_i64: - t0 = tci_read_r8(&tb_ptr); - t1 = tci_read_r(&tb_ptr); + t0 = tci_read_r8(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint8_t *)(t1 + t2) = t0; break; case INDEX_op_st16_i64: - t0 = tci_read_r16(&tb_ptr); - t1 = tci_read_r(&tb_ptr); + t0 = tci_read_r16(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint16_t *)(t1 + t2) = t0; break; case INDEX_op_st32_i64: - t0 = tci_read_r32(&tb_ptr); - t1 = tci_read_r(&tb_ptr); + t0 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint32_t *)(t1 + t2) = t0; break; case INDEX_op_st_i64: - t0 = tci_read_r64(&tb_ptr); - t1 = tci_read_r(&tb_ptr); + t0 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); tci_assert(t1 != sp_value || (int32_t)t2 < 0); *(uint64_t *)(t1 + t2) = t0; @@ -896,21 +902,21 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) case INDEX_op_add_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, t1 + t2); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 + t2); break; case INDEX_op_sub_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, t1 - t2); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 - t2); break; case INDEX_op_mul_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, t1 * t2); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 * t2); break; #if TCG_TARGET_HAS_div_i64 case INDEX_op_div_i64: @@ -927,71 +933,71 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) #endif case INDEX_op_and_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, t1 & t2); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 & t2); break; case INDEX_op_or_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, t1 | t2); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 | t2); break; case INDEX_op_xor_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, t1 ^ t2); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 ^ t2); break; /* Shift/rotate operations (64 bit). */ case INDEX_op_shl_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, t1 << (t2 & 63)); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 << (t2 & 63)); break; case INDEX_op_shr_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, t1 >> (t2 & 63)); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1 >> (t2 & 63)); break; case INDEX_op_sar_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, ((int64_t)t1 >> (t2 & 63))); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, ((int64_t)t1 >> (t2 & 63))); break; #if TCG_TARGET_HAS_rot_i64 case INDEX_op_rotl_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, rol64(t1, t2 & 63)); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, rol64(t1, t2 & 63)); break; case INDEX_op_rotr_i64: t0 = *tb_ptr++; - t1 = tci_read_ri64(&tb_ptr); - t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, ror64(t1, t2 & 63)); + t1 = tci_read_ri64(regs, &tb_ptr); + t2 = tci_read_ri64(regs, &tb_ptr); + tci_write_reg64(regs, t0, ror64(t1, t2 & 63)); break; #endif #if TCG_TARGET_HAS_deposit_i64 case INDEX_op_deposit_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(&tb_ptr); - t2 = tci_read_r64(&tb_ptr); + t1 = tci_read_r64(regs, &tb_ptr); + t2 = tci_read_r64(regs, &tb_ptr); tmp16 = *tb_ptr++; tmp8 = *tb_ptr++; tmp64 = (((1ULL << tmp8) - 1) << tmp16); - tci_write_reg64(t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64)); + tci_write_reg64(regs, t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64)); break; #endif case INDEX_op_brcond_i64: - t0 = tci_read_r64(&tb_ptr); - t1 = tci_read_ri64(&tb_ptr); + t0 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_ri64(regs, &tb_ptr); condition = *tb_ptr++; label = tci_read_label(&tb_ptr); if (tci_compare64(t0, t1, condition)) { @@ -1003,29 +1009,29 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) #if TCG_TARGET_HAS_ext8u_i64 case INDEX_op_ext8u_i64: t0 = *tb_ptr++; - t1 = tci_read_r8(&tb_ptr); - tci_write_reg64(t0, t1); + t1 = tci_read_r8(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); break; #endif #if TCG_TARGET_HAS_ext8s_i64 case INDEX_op_ext8s_i64: t0 = *tb_ptr++; - t1 = tci_read_r8s(&tb_ptr); - tci_write_reg64(t0, t1); + t1 = tci_read_r8s(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); break; #endif #if TCG_TARGET_HAS_ext16s_i64 case INDEX_op_ext16s_i64: t0 = *tb_ptr++; - t1 = tci_read_r16s(&tb_ptr); - tci_write_reg64(t0, t1); + t1 = tci_read_r16s(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); break; #endif #if TCG_TARGET_HAS_ext16u_i64 case INDEX_op_ext16u_i64: t0 = *tb_ptr++; - t1 = tci_read_r16(&tb_ptr); - tci_write_reg64(t0, t1); + t1 = tci_read_r16(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); break; #endif #if TCG_TARGET_HAS_ext32s_i64 @@ -1033,50 +1039,50 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) #endif case INDEX_op_ext_i32_i64: t0 = *tb_ptr++; - t1 = tci_read_r32s(&tb_ptr); - tci_write_reg64(t0, t1); + t1 = tci_read_r32s(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); break; #if TCG_TARGET_HAS_ext32u_i64 case INDEX_op_ext32u_i64: #endif case INDEX_op_extu_i32_i64: t0 = *tb_ptr++; - t1 = tci_read_r32(&tb_ptr); - tci_write_reg64(t0, t1); + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg64(regs, t0, t1); break; #if TCG_TARGET_HAS_bswap16_i64 case INDEX_op_bswap16_i64: t0 = *tb_ptr++; - t1 = tci_read_r16(&tb_ptr); - tci_write_reg64(t0, bswap16(t1)); + t1 = tci_read_r16(regs, &tb_ptr); + tci_write_reg64(regs, t0, bswap16(t1)); break; #endif #if TCG_TARGET_HAS_bswap32_i64 case INDEX_op_bswap32_i64: t0 = *tb_ptr++; - t1 = tci_read_r32(&tb_ptr); - tci_write_reg64(t0, bswap32(t1)); + t1 = tci_read_r32(regs, &tb_ptr); + tci_write_reg64(regs, t0, bswap32(t1)); break; #endif #if TCG_TARGET_HAS_bswap64_i64 case INDEX_op_bswap64_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(&tb_ptr); - tci_write_reg64(t0, bswap64(t1)); + t1 = tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t0, bswap64(t1)); break; #endif #if TCG_TARGET_HAS_not_i64 case INDEX_op_not_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(&tb_ptr); - tci_write_reg64(t0, ~t1); + t1 = tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t0, ~t1); break; #endif #if TCG_TARGET_HAS_neg_i64 case INDEX_op_neg_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(&tb_ptr); - tci_write_reg64(t0, -t1); + t1 = tci_read_r64(regs, &tb_ptr); + tci_write_reg64(regs, t0, -t1); break; #endif #endif /* TCG_TARGET_REG_BITS == 64 */ @@ -1097,7 +1103,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) continue; case INDEX_op_qemu_ld_i32: t0 = *tb_ptr++; - taddr = tci_read_ulong(&tb_ptr); + taddr = tci_read_ulong(regs, &tb_ptr); oi = tci_read_i(&tb_ptr); switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { case MO_UB: @@ -1127,14 +1133,14 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) default: tcg_abort(); } - tci_write_reg(t0, tmp32); + tci_write_reg(regs, t0, tmp32); break; case INDEX_op_qemu_ld_i64: t0 = *tb_ptr++; if (TCG_TARGET_REG_BITS == 32) { t1 = *tb_ptr++; } - taddr = tci_read_ulong(&tb_ptr); + taddr = tci_read_ulong(regs, &tb_ptr); oi = tci_read_i(&tb_ptr); switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { case MO_UB: @@ -1176,14 +1182,14 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) default: tcg_abort(); } - tci_write_reg(t0, tmp64); + tci_write_reg(regs, t0, tmp64); if (TCG_TARGET_REG_BITS == 32) { - tci_write_reg(t1, tmp64 >> 32); + tci_write_reg(regs, t1, tmp64 >> 32); } break; case INDEX_op_qemu_st_i32: - t0 = tci_read_r(&tb_ptr); - taddr = tci_read_ulong(&tb_ptr); + t0 = tci_read_r(regs, &tb_ptr); + taddr = tci_read_ulong(regs, &tb_ptr); oi = tci_read_i(&tb_ptr); switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { case MO_UB: @@ -1206,8 +1212,8 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) } break; case INDEX_op_qemu_st_i64: - tmp64 = tci_read_r64(&tb_ptr); - taddr = tci_read_ulong(&tb_ptr); + tmp64 = tci_read_r64(regs, &tb_ptr); + taddr = tci_read_ulong(regs, &tb_ptr); oi = tci_read_i(&tb_ptr); switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { case MO_UB: From 619205fd1facd5078d127cd94e4713b590770f96 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 5 Jul 2017 18:41:23 -0400 Subject: [PATCH 17/20] tcg: take .helpers out of TCGContext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Groundwork for supporting multiple TCG contexts. The hash table becomes read-only after it is filled in, so we can save space by keeping just a global pointer to it. Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/tcg.c | 10 +++++----- tcg/tcg.h | 2 -- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index a874bdd41f..ee60798438 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -318,6 +318,7 @@ typedef struct TCGHelperInfo { static const TCGHelperInfo all_helpers[] = { #include "exec/helper-tcg.h" }; +static GHashTable *helper_table; static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; static void process_op_defs(TCGContext *s); @@ -328,7 +329,6 @@ void tcg_context_init(TCGContext *s) TCGOpDef *def; TCGArgConstraint *args_ct; int *sorted_args; - GHashTable *helper_table; memset(s, 0, sizeof(*s)); s->nb_globals = 0; @@ -356,7 +356,7 @@ void tcg_context_init(TCGContext *s) /* Register helpers. */ /* Use g_direct_hash/equal for direct pointer comparisons on func. */ - s->helpers = helper_table = g_hash_table_new(NULL, NULL); + helper_table = g_hash_table_new(NULL, NULL); for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, @@ -982,7 +982,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, unsigned sizemask, flags; TCGHelperInfo *info; - info = g_hash_table_lookup(s->helpers, (gpointer)func); + info = g_hash_table_lookup(helper_table, (gpointer)func); flags = info->flags; sizemask = info->sizemask; @@ -1211,8 +1211,8 @@ static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) { const char *ret = NULL; - if (s->helpers) { - TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val); + if (helper_table) { + TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); if (info) { ret = info->name; } diff --git a/tcg/tcg.h b/tcg/tcg.h index 25662c36d4..b2d42e3136 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -656,8 +656,6 @@ struct TCGContext { tcg_insn_unit *code_ptr; - GHashTable *helpers; - #ifdef CONFIG_PROFILER /* profiling info */ int64_t tb_count1; From 3637cf58f9441ad277fd70299a29d0e39b32c96c Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Sat, 15 Jul 2017 03:24:27 -0400 Subject: [PATCH 18/20] util: move qemu_real_host_page_size/mask to osdep.h These only depend on the host and therefore belong in the common osdep, not in a target-dependent object. While at it, query the host during an init constructor, which guarantees the page size will be well-defined throughout the execution of the program. Suggested-by: Richard Henderson Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- exec.c | 4 ---- include/exec/cpu-all.h | 2 -- include/qemu/osdep.h | 6 ++++++ util/Makefile.objs | 1 + util/pagesize.c | 18 ++++++++++++++++++ 5 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 util/pagesize.c diff --git a/exec.c b/exec.c index 7a80460725..6378714a2b 100644 --- a/exec.c +++ b/exec.c @@ -120,8 +120,6 @@ int use_icount; uintptr_t qemu_host_page_size; intptr_t qemu_host_page_mask; -uintptr_t qemu_real_host_page_size; -intptr_t qemu_real_host_page_mask; bool set_preferred_target_page_bits(int bits) { @@ -3606,8 +3604,6 @@ void page_size_init(void) { /* NOTE: we can always suppose that qemu_host_page_size >= TARGET_PAGE_SIZE */ - qemu_real_host_page_size = getpagesize(); - qemu_real_host_page_mask = -(intptr_t)qemu_real_host_page_size; if (qemu_host_page_size == 0) { qemu_host_page_size = qemu_real_host_page_size; } diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index ffe43d5654..778031c3d7 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -229,8 +229,6 @@ extern int target_page_bits; /* Using intptr_t ensures that qemu_*_page_mask is sign-extended even * when intptr_t is 32-bit and we are aligning a long long. */ -extern uintptr_t qemu_real_host_page_size; -extern intptr_t qemu_real_host_page_mask; extern uintptr_t qemu_host_page_size; extern intptr_t qemu_host_page_mask; diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 9dd318a7dd..826650c58a 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -505,6 +505,12 @@ char *qemu_get_pid_name(pid_t pid); */ pid_t qemu_fork(Error **errp); +/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even + * when intptr_t is 32-bit and we are aligning a long long. + */ +extern uintptr_t qemu_real_host_page_size; +extern intptr_t qemu_real_host_page_mask; + extern int qemu_icache_linesize; extern int qemu_dcache_linesize; diff --git a/util/Makefile.objs b/util/Makefile.objs index 50a55ecc75..2973b0a323 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -40,6 +40,7 @@ util-obj-y += buffer.o util-obj-y += timed-average.o util-obj-y += base64.o util-obj-y += log.o +util-obj-y += pagesize.o util-obj-y += qdist.o util-obj-y += qht.o util-obj-y += range.o diff --git a/util/pagesize.c b/util/pagesize.c new file mode 100644 index 0000000000..998632cf6e --- /dev/null +++ b/util/pagesize.c @@ -0,0 +1,18 @@ +/* + * pagesize.c - query the host about its page size + * + * Copyright (C) 2017, Emilio G. Cota + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +uintptr_t qemu_real_host_page_size; +intptr_t qemu_real_host_page_mask; + +static void __attribute__((constructor)) init_real_host_page_size(void) +{ + qemu_real_host_page_size = getpagesize(); + qemu_real_host_page_mask = -(intptr_t)qemu_real_host_page_size; +} From a505785cd221994dd3713bde860861869a059940 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Fri, 7 Jul 2017 19:00:30 -0400 Subject: [PATCH 19/20] tcg: define TCG_HIGHWATER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will come in handy very soon. Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/tcg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index ee60798438..4492e1eb3f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -116,6 +116,8 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, static bool tcg_out_ldst_finalize(TCGContext *s); #endif +#define TCG_HIGHWATER 1024 + static TCGRegSet tcg_target_available_regs[2]; static TCGRegSet tcg_target_call_clobber_regs; @@ -430,7 +432,7 @@ void tcg_prologue_init(TCGContext *s) /* Compute a high-water mark, at which we voluntarily flush the buffer and start over. The size here is arbitrary, significantly larger than we expect the code generation for any one opcode to require. */ - s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024); + s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); tcg_register_jit(s->code_gen_buffer, total_size); From 8df8d529ed958de4e23dcbf38bd34eff1a4716f2 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Sat, 30 Sep 2017 17:00:42 +0800 Subject: [PATCH 20/20] tcg/mips: delete commented out extern keyword. Delete commented out extern keyword on link_error(). Signed-off-by: Jiang Biao Message-Id: <1506762042-32145-1-git-send-email-jiang.biao2@zte.com.cn> Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.inc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index e993138930..4b55ab8856 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -36,7 +36,7 @@ #else /* To assert at compile-time that these values are never used for TCG_TARGET_REG_BITS == 64. */ -/* extern */ int link_error(void); +int link_error(void); # define LO_OFF link_error() # define HI_OFF link_error() #endif