From 5b38ee31616d1532c3c3a6dc644a9160d608ed2f Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Wed, 25 Oct 2017 07:14:20 -0700
Subject: [PATCH 1/3] tcg: Allow constant pool entries in the prologue

Both ARMv6 and AArch64 currently may drop complex guest_base values
into the constant pool.  But generic code wasn't expecting that, and
the pool is not emitted.  Correct that.

Tested-by: Emilio G. Cota <cota@braap.org>
Tested-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Reported-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg.c | 49 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 683ff4abb7..c22f1c4441 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -771,12 +771,32 @@ void tcg_prologue_init(TCGContext *s)
 
     /* Put the prologue at the beginning of code_gen_buffer.  */
     buf0 = s->code_gen_buffer;
+    total_size = s->code_gen_buffer_size;
     s->code_ptr = buf0;
     s->code_buf = buf0;
+    s->data_gen_ptr = NULL;
     s->code_gen_prologue = buf0;
 
+    /* Compute a high-water mark, at which we voluntarily flush the buffer
+       and start over.  The size here is arbitrary, significantly larger
+       than we expect the code generation for any one opcode to require.  */
+    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
+
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+    s->pool_labels = NULL;
+#endif
+
     /* Generate the prologue.  */
     tcg_target_qemu_prologue(s);
+
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
+    {
+        bool ok = tcg_out_pool_finalize(s);
+        tcg_debug_assert(ok);
+    }
+#endif
+
     buf1 = s->code_ptr;
     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
 
@@ -785,21 +805,36 @@ void tcg_prologue_init(TCGContext *s)
     s->code_gen_ptr = buf1;
     s->code_gen_buffer = buf1;
     s->code_buf = buf1;
-    total_size = s->code_gen_buffer_size - prologue_size;
+    total_size -= prologue_size;
     s->code_gen_buffer_size = total_size;
 
-    /* Compute a high-water mark, at which we voluntarily flush the buffer
-       and start over.  The size here is arbitrary, significantly larger
-       than we expect the code generation for any one opcode to require.  */
-    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
-
     tcg_register_jit(s->code_gen_buffer, total_size);
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
         qemu_log_lock();
         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
-        log_disas(buf0, prologue_size);
+        if (s->data_gen_ptr) {
+            size_t code_size = s->data_gen_ptr - buf0;
+            size_t data_size = prologue_size - code_size;
+            size_t i;
+
+            log_disas(buf0, code_size);
+
+            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
+                if (sizeof(tcg_target_ulong) == 8) {
+                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
+                             (uintptr_t)s->data_gen_ptr + i,
+                             *(uint64_t *)(s->data_gen_ptr + i));
+                } else {
+                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
+                             (uintptr_t)s->data_gen_ptr + i,
+                             *(uint32_t *)(s->data_gen_ptr + i));
+                }
+            }
+        } else {
+            log_disas(buf0, prologue_size);
+        }
         qemu_log("\n");
         qemu_log_flush();
         qemu_log_unlock();

From ba2c747992f8c315c2fbddba196ce9137430d61d Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Wed, 25 Oct 2017 18:03:27 +0200
Subject: [PATCH 2/3] tcg/s390x: Use constant pool for prologue

Rather than have separate code only used for guest_base,
rely on a recent change to handle constant pool entries.

Cc: qemu-s390x@nongnu.org
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/s390/tcg-target.inc.c | 44 +++++++++++----------------------------
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
index 38a7cdab75..9af6dcef05 100644
--- a/tcg/s390/tcg-target.inc.c
+++ b/tcg/s390/tcg-target.inc.c
@@ -555,9 +555,6 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
 static const S390Opcode lli_insns[4] = {
     RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
 };
-static const S390Opcode ii_insns[4] = {
-    RI_IILL, RI_IILH, RI_IIHL, RI_IIHH
-};
 
 static bool maybe_out_small_movi(TCGContext *s, TCGType type,
                                  TCGReg ret, tcg_target_long sval)
@@ -647,36 +644,19 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
         return;
     }
 
-    /* When allowed, stuff it in the constant pool.  */
-    if (!in_prologue) {
-        if (USE_REG_TB) {
-            tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
-            new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
-                           -(intptr_t)s->code_gen_ptr);
-        } else {
-            tcg_out_insn(s, RIL, LGRL, ret, 0);
-            new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
-        }
-        return;
-    }
-
-    /* What's left is for the prologue, loading GUEST_BASE, and because
-       it failed to match above, is known to be a full 64-bit quantity.
-       We could try more than this, but it probably wouldn't pay off.  */
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        tcg_out_insn(s, RIL, LLILF, ret, uval);
-        tcg_out_insn(s, RIL, IIHF, ret, uval >> 32);
+    /* Otherwise, stuff it in the constant pool.  */
+    if (s390_facilities & FACILITY_GEN_INST_EXT) {
+        tcg_out_insn(s, RIL, LGRL, ret, 0);
+        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
+    } else if (USE_REG_TB && !in_prologue) {
+        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
+        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
+                       -(intptr_t)s->code_gen_ptr);
     } else {
-        const S390Opcode *insns = lli_insns;
-        int i;
-
-        for (i = 0; i < 4; i++) {
-            uint16_t part = uval >> (16 * i);
-            if (part) {
-                tcg_out_insn_RI(s, insns[i], ret, part);
-                insns = ii_insns;
-            }
-        }
+        TCGReg base = ret ? ret : TCG_TMP0;
+        tcg_out_insn(s, RIL, LARL, base, 0);
+        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
+        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
     }
 }
 

From 426eeecdf5d9cf1695a53c08f46394f8e5351750 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 2 Nov 2017 16:35:36 +0000
Subject: [PATCH 3/3] cpu-exec: Exit exclusive region on longjmp from
 step_atomic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit ac03ee5331612e44be narrowed the scope of the exclusive
region so it only covers when we're executing the TB, not when
we're generating it. However it missed that there is more than
one execution path out of cpu_tb_exec -- if the atomic insn
causes an exception then the code will longjmp out, skipping
the code to end the exclusive region. This causes QEMU to hang
the next time the CPU calls start_exclusive(), waiting for
itself to exit the region.

Move the "end the region" code out to the end of the
function so that it is run for both normal exit and also
for exit-via-longjmp. We have to use a volatile bool flag
to decide whether we need to end the region, because we
can longjump out of the codegen as well as the execution.

(For some reason this only reproduces for me with a clang
optimized build, not a gcc debug build.)

Reviewed-by: Emilio G. Cota <cota@braap.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Fixes: ac03ee5331612e44beb393df2b578c951d27dc0d
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <1509640536-32160-1-git-send-email-peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cpu-exec.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 4318441e4c..61297f8f4a 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -233,6 +233,8 @@ void cpu_exec_step_atomic(CPUState *cpu)
     uint32_t flags;
     uint32_t cflags = 1;
     uint32_t cf_mask = cflags & CF_HASH_MASK;
+    /* volatile because we modify it between setjmp and longjmp */
+    volatile bool in_exclusive_region = false;
 
     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
         tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
@@ -251,14 +253,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
 
         /* Since we got here, we know that parallel_cpus must be true.  */
         parallel_cpus = false;
+        in_exclusive_region = true;
         cc->cpu_exec_enter(cpu);
         /* execute the generated code */
         trace_exec_tb(tb, pc);
         cpu_tb_exec(cpu, tb);
         cc->cpu_exec_exit(cpu);
-        parallel_cpus = true;
-
-        end_exclusive();
     } else {
         /* We may have exited due to another problem here, so we need
          * to reset any tb_locks we may have taken but didn't release.
@@ -270,6 +270,15 @@ void cpu_exec_step_atomic(CPUState *cpu)
 #endif
         tb_lock_reset();
     }
+
+    if (in_exclusive_region) {
+        /* We might longjump out of either the codegen or the
+         * execution, so must make sure we only end the exclusive
+         * region if we started it.
+         */
+        parallel_cpus = true;
+        end_exclusive();
+    }
 }
 
 struct tb_desc {