From 5b38ee31616d1532c3c3a6dc644a9160d608ed2f Mon Sep 17 00:00:00 2001 From: Richard Henderson <richard.henderson@linaro.org> Date: Wed, 25 Oct 2017 07:14:20 -0700 Subject: [PATCH 1/3] tcg: Allow constant pool entries in the prologue Both ARMv6 and AArch64 currently may drop complex guest_base values into the constant pool. But generic code wasn't expecting that, and the pool is not emitted. Correct that. Tested-by: Emilio G. Cota <cota@braap.org> Tested-by: Laurent Desnogues <laurent.desnogues@gmail.com> Reported-by: Laurent Desnogues <laurent.desnogues@gmail.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/tcg.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 683ff4abb7..c22f1c4441 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -771,12 +771,32 @@ void tcg_prologue_init(TCGContext *s) /* Put the prologue at the beginning of code_gen_buffer. */ buf0 = s->code_gen_buffer; + total_size = s->code_gen_buffer_size; s->code_ptr = buf0; s->code_buf = buf0; + s->data_gen_ptr = NULL; s->code_gen_prologue = buf0; + /* Compute a high-water mark, at which we voluntarily flush the buffer + and start over. The size here is arbitrary, significantly larger + than we expect the code generation for any one opcode to require. */ + s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); + +#ifdef TCG_TARGET_NEED_POOL_LABELS + s->pool_labels = NULL; +#endif + /* Generate the prologue. */ tcg_target_qemu_prologue(s); + +#ifdef TCG_TARGET_NEED_POOL_LABELS + /* Allow the prologue to put e.g. guest_base into a pool entry. */ + { + bool ok = tcg_out_pool_finalize(s); + tcg_debug_assert(ok); + } +#endif + buf1 = s->code_ptr; flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); @@ -785,21 +805,36 @@ void tcg_prologue_init(TCGContext *s) s->code_gen_ptr = buf1; s->code_gen_buffer = buf1; s->code_buf = buf1; - total_size = s->code_gen_buffer_size - prologue_size; + total_size -= prologue_size; s->code_gen_buffer_size = total_size; - /* Compute a high-water mark, at which we voluntarily flush the buffer - and start over. The size here is arbitrary, significantly larger - than we expect the code generation for any one opcode to require. */ - s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); - tcg_register_jit(s->code_gen_buffer, total_size); #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { qemu_log_lock(); qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); - log_disas(buf0, prologue_size); + if (s->data_gen_ptr) { + size_t code_size = s->data_gen_ptr - buf0; + size_t data_size = prologue_size - code_size; + size_t i; + + log_disas(buf0, code_size); + + for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { + if (sizeof(tcg_target_ulong) == 8) { + qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", + (uintptr_t)s->data_gen_ptr + i, + *(uint64_t *)(s->data_gen_ptr + i)); + } else { + qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", + (uintptr_t)s->data_gen_ptr + i, + *(uint32_t *)(s->data_gen_ptr + i)); + } + } + } else { + log_disas(buf0, prologue_size); + } qemu_log("\n"); qemu_log_flush(); qemu_log_unlock(); From ba2c747992f8c315c2fbddba196ce9137430d61d Mon Sep 17 00:00:00 2001 From: Richard Henderson <richard.henderson@linaro.org> Date: Wed, 25 Oct 2017 18:03:27 +0200 Subject: [PATCH 2/3] tcg/s390x: Use constant pool for prologue Rather than have separate code only used for guest_base, rely on a recent change to handle constant pool entries. Cc: qemu-s390x@nongnu.org Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/s390/tcg-target.inc.c | 44 +++++++++++---------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 38a7cdab75..9af6dcef05 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -555,9 +555,6 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) static const S390Opcode lli_insns[4] = { RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH }; -static const S390Opcode ii_insns[4] = { - RI_IILL, RI_IILH, RI_IIHL, RI_IIHH -}; static bool maybe_out_small_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long sval) @@ -647,36 +644,19 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } - /* When allowed, stuff it in the constant pool. */ - if (!in_prologue) { - if (USE_REG_TB) { - tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0); - new_pool_label(s, sval, R_390_20, s->code_ptr - 2, - -(intptr_t)s->code_gen_ptr); - } else { - tcg_out_insn(s, RIL, LGRL, ret, 0); - new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2); - } - return; - } - - /* What's left is for the prologue, loading GUEST_BASE, and because - it failed to match above, is known to be a full 64-bit quantity. - We could try more than this, but it probably wouldn't pay off. */ - if (s390_facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RIL, LLILF, ret, uval); - tcg_out_insn(s, RIL, IIHF, ret, uval >> 32); + /* Otherwise, stuff it in the constant pool. */ + if (s390_facilities & FACILITY_GEN_INST_EXT) { + tcg_out_insn(s, RIL, LGRL, ret, 0); + new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2); + } else if (USE_REG_TB && !in_prologue) { + tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0); + new_pool_label(s, sval, R_390_20, s->code_ptr - 2, + -(intptr_t)s->code_gen_ptr); } else { - const S390Opcode *insns = lli_insns; - int i; - - for (i = 0; i < 4; i++) { - uint16_t part = uval >> (16 * i); - if (part) { - tcg_out_insn_RI(s, insns[i], ret, part); - insns = ii_insns; - } - } + TCGReg base = ret ? ret : TCG_TMP0; + tcg_out_insn(s, RIL, LARL, base, 0); + new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2); + tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0); } } From 426eeecdf5d9cf1695a53c08f46394f8e5351750 Mon Sep 17 00:00:00 2001 From: Peter Maydell <peter.maydell@linaro.org> Date: Thu, 2 Nov 2017 16:35:36 +0000 Subject: [PATCH 3/3] cpu-exec: Exit exclusive region on longjmp from step_atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit ac03ee5331612e44be narrowed the scope of the exclusive region so it only covers when we're executing the TB, not when we're generating it. However it missed that there is more than one execution path out of cpu_tb_exec -- if the atomic insn causes an exception then the code will longjmp out, skipping the code to end the exclusive region. This causes QEMU to hang the next time the CPU calls start_exclusive(), waiting for itself to exit the region. Move the "end the region" code out to the end of the function so that it is run for both normal exit and also for exit-via-longjmp. We have to use a volatile bool flag to decide whether we need to end the region, because we can longjump out of the codegen as well as the execution. (For some reason this only reproduces for me with a clang optimized build, not a gcc debug build.) Reviewed-by: Emilio G. Cota <cota@braap.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Fixes: ac03ee5331612e44beb393df2b578c951d27dc0d Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Message-Id: <1509640536-32160-1-git-send-email-peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- accel/tcg/cpu-exec.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 4318441e4c..61297f8f4a 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -233,6 +233,8 @@ void cpu_exec_step_atomic(CPUState *cpu) uint32_t flags; uint32_t cflags = 1; uint32_t cf_mask = cflags & CF_HASH_MASK; + /* volatile because we modify it between setjmp and longjmp */ + volatile bool in_exclusive_region = false; if (sigsetjmp(cpu->jmp_env, 0) == 0) { tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); @@ -251,14 +253,12 @@ void cpu_exec_step_atomic(CPUState *cpu) /* Since we got here, we know that parallel_cpus must be true. */ parallel_cpus = false; + in_exclusive_region = true; cc->cpu_exec_enter(cpu); /* execute the generated code */ trace_exec_tb(tb, pc); cpu_tb_exec(cpu, tb); cc->cpu_exec_exit(cpu); - parallel_cpus = true; - - end_exclusive(); } else { /* We may have exited due to another problem here, so we need * to reset any tb_locks we may have taken but didn't release. @@ -270,6 +270,15 @@ void cpu_exec_step_atomic(CPUState *cpu) #endif tb_lock_reset(); } + + if (in_exclusive_region) { + /* We might longjump out of either the codegen or the + * execution, so must make sure we only end the exclusive + * region if we started it. + */ + parallel_cpus = true; + end_exclusive(); + } } struct tb_desc {