From 23b7aa1d2af04ba57cc94f74d9f0ab25dce72fa0 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Fri, 30 Jun 2017 10:36:12 -0400 Subject: [PATCH 1/5] tcg/aarch64: Introduce and use long branch to register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use a branch to register instruction for exit_tb for offsets greater than 128MB. CC: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Pranith Kumar Message-Id: <20170630143614.31059-1-bobby.prani@gmail.com> Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.inc.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 1fa3bccc89..8fce11ace7 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -819,6 +819,17 @@ static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) tcg_out_insn(s, 3206, B, offset); } +static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - s->code_ptr; + if (offset == sextract64(offset, 0, 26)) { + tcg_out_insn(s, 3206, BL, offset); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); + tcg_out_insn(s, 3207, BR, TCG_REG_TMP); + } +} + static inline void tcg_out_goto_noaddr(TCGContext *s) { /* We pay attention here to not modify the branch target by reading from @@ -1364,10 +1375,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_exit_tb: /* Reuse the zeroing that exists for goto_ptr. */ if (a0 == 0) { - tcg_out_goto(s, s->code_gen_epilogue); + tcg_out_goto_long(s, s->code_gen_epilogue); } else { tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); - tcg_out_goto(s, tb_ret_addr); + tcg_out_goto_long(s, tb_ret_addr); } break; From b68686bd4bfeb70040b4099df993dfa0b4f37b03 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Fri, 30 Jun 2017 10:36:13 -0400 Subject: [PATCH 2/5] tcg/aarch64: Use ADRP+ADD to compute target address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We use ADRP+ADD to compute the target address for goto_tb. This patch introduces the NOP instruction which is used to align the above instruction pair so that we can use one atomic instruction to patch the destination offsets. CC: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Pranith Kumar Message-Id: <20170630143614.31059-2-bobby.prani@gmail.com> Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 2 +- tcg/aarch64/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index dfb9f0de46..0caf80db75 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -504,7 +504,7 @@ static inline PageDesc *page_find(tb_page_addr_t index) #elif defined(__powerpc__) # define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024) #elif defined(__aarch64__) -# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) +# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) #elif defined(__s390x__) /* We have a +- 4GB range on the branches; leave some slop. */ # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 8fce11ace7..a84422d633 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -372,6 +372,7 @@ typedef enum { I3510_EON = 0x4a200000, I3510_ANDS = 0x6a000000, + NOP = 0xd503201f, /* System instructions. */ DMB_ISH = 0xd50338bf, DMB_LD = 0x00000100, @@ -865,11 +866,27 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { - tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; - tcg_insn_unit *target = (tcg_insn_unit *)addr; + tcg_insn_unit i1, i2; + TCGType rt = TCG_TYPE_I64; + TCGReg rd = TCG_REG_TMP; + uint64_t pair; - reloc_pc26_atomic(code_ptr, target); - flush_icache_range(jmp_addr, jmp_addr + 4); + ptrdiff_t offset = addr - jmp_addr; + + if (offset == sextract64(offset, 0, 26)) { + i1 = I3206_B | ((offset >> 2) & 0x3ffffff); + i2 = NOP; + } else { + offset = (addr >> 12) - (jmp_addr >> 12); + + /* patch ADRP */ + i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; + /* patch ADDI */ + i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; + } + pair = (uint64_t)i2 << 32 | i1; + atomic_set((uint64_t *)jmp_addr, pair); + flush_icache_range(jmp_addr, jmp_addr + 8); } static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) @@ -1388,10 +1405,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #endif /* consistency for USE_DIRECT_JUMP */ tcg_debug_assert(s->tb_jmp_insn_offset != NULL); + /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic + write can be used to patch the target address. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); + } s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); /* actual branch destination will be patched by - aarch64_tb_set_jmp_target later, beware retranslation. */ - tcg_out_goto_noaddr(s); + aarch64_tb_set_jmp_target later. */ + tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); + tcg_out_insn(s, 3207, BR, TCG_REG_TMP); s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; From 2acee8b2b5e6bba2935bb6ce5be92d0f0f9799cb Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Fri, 30 Jun 2017 10:36:14 -0400 Subject: [PATCH 3/5] tcg/aarch64: Enable indirect jump path using LDR (literal) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables the indirect jump path using an LDR (literal) instruction. It will be interesting to test and see which performs better among the two paths. CC: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Pranith Kumar Message-Id: <20170630143614.31059-3-bobby.prani@gmail.com> Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.inc.c | 42 ++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index a84422d633..04bc369a92 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -269,6 +269,8 @@ typedef enum { I3207_BLR = 0xd63f0000, I3207_RET = 0xd65f0000, + /* Load literal for loading the address at pc-relative offset */ + I3305_LDR = 0x58000000, /* Load/store register. Described here as 3.3.12, but the helper that emits them can transform to 3.3.10 or 3.3.13. */ I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, @@ -389,6 +391,11 @@ static inline uint32_t tcg_in32(TCGContext *s) #define tcg_out_insn(S, FMT, OP, ...) \ glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) +static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt) +{ + tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); +} + static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, TCGReg rt, int imm19) { @@ -864,6 +871,8 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) } } +#ifdef USE_DIRECT_JUMP + void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { tcg_insn_unit i1, i2; @@ -889,6 +898,8 @@ void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) flush_icache_range(jmp_addr, jmp_addr + 8); } +#endif + static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) { if (!l->has_value) { @@ -1400,21 +1411,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_goto_tb: -#ifndef USE_DIRECT_JUMP -#error "USE_DIRECT_JUMP required for aarch64" -#endif - /* consistency for USE_DIRECT_JUMP */ - tcg_debug_assert(s->tb_jmp_insn_offset != NULL); - /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic - write can be used to patch the target address. */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out32(s, NOP); + if (s->tb_jmp_insn_offset != NULL) { + /* USE_DIRECT_JUMP */ + /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic + write can be used to patch the target address. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); + } + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + /* actual branch destination will be patched by + aarch64_tb_set_jmp_target later. */ + tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); + } else { + /* !USE_DIRECT_JUMP */ + tcg_debug_assert(s->tb_jmp_target_addr != NULL); + intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2; + tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); } - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - /* actual branch destination will be patched by - aarch64_tb_set_jmp_target later. */ - tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); tcg_out_insn(s, 3207, BR, TCG_REG_TMP); s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; From 2ae96c157ab3155baf6595c08cf5d3fe3c023a60 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Fri, 30 Jun 2017 11:39:46 -0400 Subject: [PATCH 4/5] util/cacheinfo: Fix warning generated by clang Clang generates the following warning on aarch64 host: CC util/cacheinfo.o /home/pranith/qemu/util/cacheinfo.c:121:48: warning: value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths] asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); ^ /home/pranith/qemu/util/cacheinfo.c:121:28: note: use constraint modifier "w" asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); ^~ %w0 Constraint modifier 'w' is not (yet?) accepted by gcc. Fix this by increasing the ctr size. Tested-by: Emilio G. Cota Reviewed-by: Emilio G. Cota Signed-off-by: Pranith Kumar Message-Id: <20170630153946.11997-1-bobby.prani@gmail.com> Signed-off-by: Richard Henderson --- util/cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/cacheinfo.c b/util/cacheinfo.c index f987522df4..6253049533 100644 --- a/util/cacheinfo.c +++ b/util/cacheinfo.c @@ -112,7 +112,7 @@ static void sys_cache_info(int *isize, int *dsize) static void arch_cache_info(int *isize, int *dsize) { if (*isize == 0 || *dsize == 0) { - unsigned ctr; + unsigned long ctr; /* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, but (at least under Linux) these are marked protected by the From 8b8d768f19037a825a0bc81654492caa7c8fab8b Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Mon, 10 Jul 2017 14:59:39 +0800 Subject: [PATCH 5/5] tcg/mips: Bugfix for crash when running program with qemu-i386. When running a helloworld program with qemu-i386 in linux-user mode on Loongson 3A3000, it will crash. This patch fix the bug. Signed-off-by: Jiang Biao Message-Id: <1499669979-25904-1-git-send-email-jiang.biao2@zte.com.cn> Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.inc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index 8cff9a6bf9..85756b81d5 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -1547,8 +1547,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) } else if (guest_base == (int16_t)guest_base) { tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base); } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); - tcg_out_opc_reg(s, ALIAS_PADD, base, base, addr_regl); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, guest_base); + tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP0, addr_regl); } tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); #endif @@ -1652,8 +1652,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) } else if (guest_base == (int16_t)guest_base) { tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base); } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); - tcg_out_opc_reg(s, ALIAS_PADD, base, base, addr_regl); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, guest_base); + tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP0, addr_regl); } tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); #endif