From f6823cbe3787aa47db62deede6683077e3da9a2c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 17 Dec 2018 00:30:21 +0300 Subject: [PATCH 01/33] target/sparc: Remove the constant pool Partially reverts ab20bdc1162. The 14-bit displacement that we allowed to reach the constant pool is not always sufficient. Retain the tb-relative addressing, as that is how most return values from the tb are computed. Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.inc.c | 47 ++++++++------------------------------ 1 file changed, 9 insertions(+), 38 deletions(-) diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index 04bdc3df5e..3237ae6a2f 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -311,24 +311,6 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, insn &= ~INSN_OFF19(-1); insn |= INSN_OFF19(pcrel); break; - case R_SPARC_13: - /* Note that we're abusing this reloc type for our own needs. */ - if (!check_fit_ptr(value, 13)) { - int adj = (value > 0 ? 0xff8 : -0x1000); - value -= adj; - assert(check_fit_ptr(value, 13)); - *code_ptr++ = (ARITH_ADD | INSN_RD(TCG_REG_T2) - | INSN_RS1(TCG_REG_TB) | INSN_IMM13(adj)); - insn ^= INSN_RS1(TCG_REG_TB) ^ INSN_RS1(TCG_REG_T2); - } - insn &= ~INSN_IMM13(-1); - insn |= INSN_IMM13(value); - break; - case R_SPARC_32: - /* Note that we're abusing this reloc type for our own needs. */ - code_ptr[0] = deposit32(code_ptr[0], 0, 22, value >> 10); - code_ptr[1] = deposit32(code_ptr[1], 0, 10, value); - return; default: g_assert_not_reached(); } @@ -459,6 +441,15 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } + /* A 13-bit constant relative to the TB. */ + if (!in_prologue && USE_REG_TB) { + test = arg - (uintptr_t)s->code_gen_ptr; + if (check_fit_ptr(test, 13)) { + tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD); + return; + } + } + /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { tcg_out_sethi(s, ret, arg); @@ -488,26 +479,6 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } - if (!in_prologue) { - if (USE_REG_TB) { - intptr_t diff = arg - (uintptr_t)s->code_gen_ptr; - if (check_fit_ptr(diff, 13)) { - tcg_out_arithi(s, ret, TCG_REG_TB, diff, ARITH_ADD); - } else { - new_pool_label(s, arg, R_SPARC_13, s->code_ptr, - -(intptr_t)s->code_gen_ptr); - tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB)); - /* May be used to extend the 13-bit range in patch_reloc. */ - tcg_out32(s, NOP); - } - } else { - new_pool_label(s, arg, R_SPARC_32, s->code_ptr, 0); - tcg_out_sethi(s, ret, 0); - tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) | INSN_IMM13(0)); - } - return; - } - /* A 64-bit constant decomposed into 2 32-bit pieces. */ if (check_fit_i32(lo, 13)) { hi = (arg - lo) >> 32; From 5740d9f714835964873325d1210b26811252843f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 30 Oct 2018 21:52:44 +0000 Subject: [PATCH 02/33] tcg/i386: Always use %ebp for TCG_AREG0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For x86_64, this can remove a REX prefix resulting in smaller code when manipulating globals of type i32, as we move them between backing store via cpu_env, aka TCG_AREG0. Reviewed-by: Alex Bennée Reviewed-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 9fdf37f23c..7488c3d869 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -84,6 +84,8 @@ typedef enum { TCG_REG_RBP = TCG_REG_EBP, TCG_REG_RSI = TCG_REG_ESI, TCG_REG_RDI = TCG_REG_EDI, + + TCG_AREG0 = TCG_REG_EBP, } TCGReg; /* used for function call generation */ @@ -194,12 +196,6 @@ extern bool have_avx2; #define TCG_TARGET_extract_i64_valid(ofs, len) \ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) -#if TCG_TARGET_REG_BITS == 64 -# define TCG_AREG0 TCG_REG_R14 -#else -# define TCG_AREG0 TCG_REG_EBP -#endif - static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { } From 66c0285df4270d184afce5ac8b97ac175c89562f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 30 Oct 2018 21:55:43 +0000 Subject: [PATCH 03/33] tcg/i386: Move TCG_REG_CALL_STACK from define to enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Reviewed-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 7488c3d869..2441658865 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -86,10 +86,10 @@ typedef enum { TCG_REG_RDI = TCG_REG_EDI, TCG_AREG0 = TCG_REG_EBP, + TCG_REG_CALL_STACK = TCG_REG_ESP } TCGReg; /* used for function call generation */ -#define TCG_REG_CALL_STACK TCG_REG_ESP #define TCG_TARGET_STACK_ALIGN 16 #if defined(_WIN64) #define TCG_TARGET_CALL_STACK_OFFSET 32 From 90d6cb781130891f96eb54f8315e29fbd4e99a71 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 29 Nov 2018 20:41:14 +0000 Subject: [PATCH 04/33] tcg/aarch64: Remove reloc_pc26_atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is unused since b68686bd4bfeb70040b4099df993dfa0b4f37b03. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.inc.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 083592a4d7..a41b633960 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -87,18 +87,6 @@ static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) *code_ptr = deposit32(*code_ptr, 0, 26, offset); } -static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr, - tcg_insn_unit *target) -{ - ptrdiff_t offset = target - code_ptr; - tcg_insn_unit insn; - tcg_debug_assert(offset == sextract64(offset, 0, 26)); - /* read instruction, mask away previous PC_REL26 parameter contents, - set the proper offset, then write back the instruction. */ - insn = atomic_read(code_ptr); - atomic_set(code_ptr, deposit32(insn, 0, 26, offset)); -} - static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { ptrdiff_t offset = target - code_ptr; From 733589b3382afcb0ae9f43e72e083a5ddd38abd5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 29 Nov 2018 20:52:47 +0000 Subject: [PATCH 05/33] tcg/aarch64: Fold away "noaddr" branch routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are one use apiece for these. There is no longer a need for preserving branch offset operands, as we no longer re-translate. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.inc.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index a41b633960..28de0226fb 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -1129,23 +1129,6 @@ static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) } } -static inline void tcg_out_goto_noaddr(TCGContext *s) -{ - /* We pay attention here to not modify the branch target by reading from - the buffer. This ensure that caches and memory are kept coherent during - retranslation. Mask away possible garbage in the high bits for the - first translation, while keeping the offset bits for retranslation. */ - uint32_t old = tcg_in32(s); - tcg_out_insn(s, 3206, B, old); -} - -static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) -{ - /* See comments in tcg_out_goto_noaddr. */ - uint32_t old = tcg_in32(s) >> 5; - tcg_out_insn(s, 3202, B_C, c, old); -} - static inline void tcg_out_callr(TCGContext *s, TCGReg reg) { tcg_out_insn(s, 3207, BLR, reg); @@ -1192,7 +1175,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) { if (!l->has_value) { tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); - tcg_out_goto_noaddr(s); + tcg_out_insn(s, 3206, B, 0); } else { tcg_out_goto(s, l->u.value_ptr); } @@ -1523,7 +1506,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, /* If not equal, we jump to the slow path. */ *label_ptr = s->code_ptr; - tcg_out_goto_cond_noaddr(s, TCG_COND_NE); + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); } #endif /* CONFIG_SOFTMMU */ From 2672ccc7eee742e23928f4bf60a13a77d64f540d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 29 Nov 2018 13:17:29 -0800 Subject: [PATCH 06/33] tcg/arm: Remove reloc_pc24_atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is unused since 3fb53fb4d12f2e7833bd1659e6013237b130ef20. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.inc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index e1fbf465cb..1142eb13ad 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -193,14 +193,6 @@ static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); } -static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target) -{ - ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; - tcg_insn_unit insn = atomic_read(code_ptr); - tcg_debug_assert(offset == sextract32(offset, 0, 24)); - atomic_set(code_ptr, deposit32(insn, 0, 24, offset)); -} - static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { From 37ee93a974c49ab9edfcd1db0aad3838b0395b14 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 29 Nov 2018 13:13:59 -0800 Subject: [PATCH 07/33] tcg/arm: Fold away "noaddr" branch routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are one use apiece for these. There is no longer a need for preserving branch offset operands, as we no longer re-translate. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.inc.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 1142eb13ad..1651f00281 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -366,22 +366,6 @@ static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) (((offset - 8) >> 2) & 0x00ffffff)); } -static inline void tcg_out_b_noaddr(TCGContext *s, int cond) -{ - /* We pay attention here to not modify the branch target by masking - the corresponding bytes. This ensure that caches and memory are - kept coherent during retranslation. */ - tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a)); -} - -static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) -{ - /* We pay attention here to not modify the branch target by masking - the corresponding bytes. This ensure that caches and memory are - kept coherent during retranslation. */ - tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b)); -} - static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) { tcg_out32(s, (cond << 28) | 0x0b000000 | @@ -1082,7 +1066,7 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) tcg_out_goto(s, cond, l->u.value_ptr); } else { tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0); - tcg_out_b_noaddr(s, cond); + tcg_out_b(s, cond, 0); } } @@ -1628,7 +1612,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) /* This a conditional BL only to load a pointer within this opcode into LR for the slow path. We will not be using the value for a tail call. */ label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); + tcg_out_bl(s, COND_NE, 0); tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); @@ -1760,7 +1744,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) /* The conditional call must come last, as we're going to return here. */ label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); + tcg_out_bl(s, COND_NE, 0); add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); From f9c7246faa279237200a2a53beacaa8100ea1900 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 29 Nov 2018 22:48:26 +0000 Subject: [PATCH 08/33] tcg/ppc: Fold away "noaddr" branch routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no longer a need for preserving branch offset operands, as we no longer re-translate. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.inc.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index c2f729ee8f..2e2a22f579 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -210,18 +210,6 @@ static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); } -static inline void tcg_out_b_noaddr(TCGContext *s, int insn) -{ - unsigned retrans = *s->code_ptr & 0x3fffffc; - tcg_out32(s, insn | retrans); -} - -static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) -{ - unsigned retrans = *s->code_ptr & 0xfffc; - tcg_out32(s, insn | retrans); -} - /* parse target specific constraints */ static const char *target_parse_constraint(TCGArgConstraint *ct, const char *ct_str, TCGType type) @@ -1179,11 +1167,11 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) { if (l->has_value) { - tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr)); + bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr); } else { tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); - tcg_out_bc_noaddr(s, bc); } + tcg_out32(s, bc); } static void tcg_out_brcond(TCGContext *s, TCGCond cond, @@ -1771,7 +1759,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) /* Load a pointer into the current opcode w/conditional branch-link. */ label_ptr = s->code_ptr; - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ @@ -1846,7 +1834,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) /* Load a pointer into the current opcode w/conditional branch-link. */ label_ptr = s->code_ptr; - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ @@ -2044,13 +2032,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_br: { TCGLabel *l = arg_label(args[0]); + uint32_t insn = B; if (l->has_value) { - tcg_out_b(s, 0, l->u.value_ptr); + insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr); } else { tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); - tcg_out_b_noaddr(s, B); } + tcg_out32(s, insn); } break; case INDEX_op_ld8u_i32: From 3661612fc3e4b65be03482bf6bafd116101881e1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 19:28:51 +0000 Subject: [PATCH 09/33] tcg/s390: Remove retranslation code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no longer a need for preserving branch offset operands, as we no longer re-translate. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/s390/tcg-target.inc.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 17c435ade5..96c344142e 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -1329,13 +1329,11 @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, TCGReg r2, TCGLabel *l) { - intptr_t off; + intptr_t off = 0; if (l->has_value) { off = l->u.value_ptr - s->code_ptr; } else { - /* We need to keep the offset unchanged for retranslation. */ - off = s->code_ptr[1]; tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); } @@ -1347,13 +1345,11 @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, int i2, TCGLabel *l) { - tcg_target_long off; + tcg_target_long off = 0; if (l->has_value) { off = l->u.value_ptr - s->code_ptr; } else { - /* We need to keep the offset unchanged for retranslation. */ - off = s->code_ptr[1]; tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); } @@ -1696,7 +1692,6 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); - /* We need to keep the offset unchanged for retranslation. */ tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); label_ptr = s->code_ptr; s->code_ptr += 1; @@ -1724,7 +1719,6 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); - /* We need to keep the offset unchanged for retranslation. */ tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); label_ptr = s->code_ptr; s->code_ptr += 1; From 791645f0227c9d52ce5fe1ad6e1cda55a9bfe633 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 11:36:03 -0800 Subject: [PATCH 10/33] tcg/sparc: Remove retranslation code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no longer a need for preserving branch offset operands, as we no longer re-translate. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.inc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index 3237ae6a2f..8a151b9cac 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -610,13 +610,11 @@ static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19) static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) { - int off19; + int off19 = 0; if (l->has_value) { off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr)); } else { - /* Make sure to preserve destinations during retranslation. */ - off19 = *s->code_ptr & INSN_OFF19(-1); tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0); } tcg_out_bpcc0(s, scond, flags, off19); @@ -656,13 +654,11 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, { /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ if (arg2 == 0 && !is_unsigned_cond(cond)) { - int off16; + int off16 = 0; if (l->has_value) { off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr)); } else { - /* Make sure to preserve destinations during retranslation. */ - off16 = *s->code_ptr & INSN_OFF16(-1); tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0); } tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) From 8c1b079279fadaee10dc39ca9a58c4c91c7a1854 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 11:42:17 -0800 Subject: [PATCH 11/33] tcg/mips: Remove retranslation code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no longer a need for preserving branch offset operands, as we no longer re-translate. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.inc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index cff525373b..e21cb1ae28 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -483,12 +483,7 @@ static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm, static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs) { - /* We pay attention here to not modify the branch target by reading - the existing value and using it again. This ensure that caches and - memory are kept coherent during retranslation. */ - uint16_t offset = (uint16_t)*s->code_ptr; - - tcg_out_opc_imm(s, opc, rt, rs, offset); + tcg_out_opc_imm(s, opc, rt, rs, 0); } /* From 6ac1778676f4259c10b0629ccd9df319a5d1baeb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 11:52:48 -0800 Subject: [PATCH 12/33] tcg: Return success from patch_reloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will move the assert for success from within (subroutines of) patch_reloc into the callers. It will also let new code do something different when a relocation is out of range. For the moment, all backends are trivially converted to return true. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.inc.c | 3 ++- tcg/arm/tcg-target.inc.c | 3 ++- tcg/i386/tcg-target.inc.c | 3 ++- tcg/mips/tcg-target.inc.c | 3 ++- tcg/ppc/tcg-target.inc.c | 3 ++- tcg/s390/tcg-target.inc.c | 3 ++- tcg/sparc/tcg-target.inc.c | 3 ++- tcg/tcg.c | 8 +++++--- tcg/tci/tcg-target.inc.c | 3 ++- 9 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 28de0226fb..16f08c59c4 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -94,7 +94,7 @@ static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) *code_ptr = deposit32(*code_ptr, 5, 19, offset); } -static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, +static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { tcg_debug_assert(addend == 0); @@ -109,6 +109,7 @@ static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, default: tcg_abort(); } + return true; } #define TCG_CT_CONST_AIMM 0x100 diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 1651f00281..deefa20fbf 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -193,7 +193,7 @@ static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); } -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { tcg_debug_assert(addend == 0); @@ -229,6 +229,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, } else { g_assert_not_reached(); } + return true; } #define TCG_CT_CONST_ARM 0x100 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 436195894b..5c88f1f36b 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -167,7 +167,7 @@ static bool have_lzcnt; static tcg_insn_unit *tb_ret_addr; -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { value += addend; @@ -191,6 +191,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, default: tcg_abort(); } + return true; } #if TCG_TARGET_REG_BITS == 64 diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index e21cb1ae28..a06ff257fa 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -168,12 +168,13 @@ static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); } -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { tcg_debug_assert(type == R_MIPS_PC16); tcg_debug_assert(addend == 0); reloc_pc16(code_ptr, (tcg_insn_unit *)value); + return true; } #define TCG_CT_CONST_ZERO 0x100 diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 2e2a22f579..860b0d36e1 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -513,7 +513,7 @@ static const uint32_t tcg_to_isel[] = { [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), }; -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { tcg_insn_unit *target; @@ -548,6 +548,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, default: g_assert_not_reached(); } + return true; } static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 96c344142e..68a4c60394 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -366,7 +366,7 @@ static void * const qemu_st_helpers[16] = { static tcg_insn_unit *tb_ret_addr; uint64_t s390_facilities; -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { intptr_t pcrel2; @@ -393,6 +393,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, default: g_assert_not_reached(); } + return true; } /* parse target specific constraints */ diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index 8a151b9cac..55144c437c 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -291,7 +291,7 @@ static inline int check_fit_i32(int32_t val, unsigned int bits) # define check_fit_ptr check_fit_i32 #endif -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { uint32_t insn = *code_ptr; @@ -316,6 +316,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, } *code_ptr = insn; + return true; } /* parse target specific constraints */ diff --git a/tcg/tcg.c b/tcg/tcg.c index e85133ef05..54f1272187 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -66,7 +66,7 @@ static void tcg_target_init(TCGContext *s); static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); static void tcg_target_qemu_prologue(TCGContext *s); -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend); /* The CIE and FDE header definitions will be common to all hosts. */ @@ -268,7 +268,8 @@ static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, /* FIXME: This may break relocations on RISC targets that modify instruction fields in place. The caller may not have written the initial value. */ - patch_reloc(code_ptr, type, l->u.value, addend); + bool ok = patch_reloc(code_ptr, type, l->u.value, addend); + tcg_debug_assert(ok); } else { /* add a new relocation entry */ r = tcg_malloc(sizeof(TCGRelocation)); @@ -288,7 +289,8 @@ static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) tcg_debug_assert(!l->has_value); for (r = l->u.first_reloc; r != NULL; r = r->next) { - patch_reloc(r->ptr, r->type, value, r->addend); + bool ok = patch_reloc(r->ptr, r->type, value, r->addend); + tcg_debug_assert(ok); } l->has_value = 1; diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c index 62ed097254..0015a98485 100644 --- a/tcg/tci/tcg-target.inc.c +++ b/tcg/tci/tcg-target.inc.c @@ -369,7 +369,7 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { /* tcg_out_reloc always uses the same type, addend. */ @@ -381,6 +381,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, } else { tcg_patch64(code_ptr, value); } + return true; } /* Parse target specific constraints. */ From bec3afd5fc6ab0b6e9d8a01575d58db8d1ad82ce Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 12:31:59 -0800 Subject: [PATCH 13/33] tcg/i386: Return false on failure from patch_reloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 5c88f1f36b..28192f4608 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -175,7 +175,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, case R_386_PC32: value -= (uintptr_t)code_ptr; if (value != (int32_t)value) { - tcg_abort(); + return false; } /* FALLTHRU */ case R_386_32: @@ -184,7 +184,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, case R_386_PC8: value -= (uintptr_t)code_ptr; if (value != (int8_t)value) { - tcg_abort(); + return false; } tcg_patch8(code_ptr, value); break; From 214bfe83d5a5af70bac2b8d0bd649b018c33c03b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 12:44:53 -0800 Subject: [PATCH 14/33] tcg/aarch64: Return false on failure from patch_reloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This does require an extra two checks within the slow paths to replace the assert that we're moving. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.inc.c | 37 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 16f08c59c4..0562e0aa40 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -78,20 +78,26 @@ static const int tcg_target_call_oarg_regs[1] = { #define TCG_REG_GUEST_BASE TCG_REG_X28 #endif -static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { ptrdiff_t offset = target - code_ptr; - tcg_debug_assert(offset == sextract64(offset, 0, 26)); - /* read instruction, mask away previous PC_REL26 parameter contents, - set the proper offset, then write back the instruction. */ - *code_ptr = deposit32(*code_ptr, 0, 26, offset); + if (offset == sextract64(offset, 0, 26)) { + /* read instruction, mask away previous PC_REL26 parameter contents, + set the proper offset, then write back the instruction. */ + *code_ptr = deposit32(*code_ptr, 0, 26, offset); + return true; + } + return false; } -static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { ptrdiff_t offset = target - code_ptr; - tcg_debug_assert(offset == sextract64(offset, 0, 19)); - *code_ptr = deposit32(*code_ptr, 5, 19, offset); + if (offset == sextract64(offset, 0, 19)) { + *code_ptr = deposit32(*code_ptr, 5, 19, offset); + return true; + } + return false; } static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, @@ -101,15 +107,12 @@ static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, switch (type) { case R_AARCH64_JUMP26: case R_AARCH64_CALL26: - reloc_pc26(code_ptr, (tcg_insn_unit *)value); - break; + return reloc_pc26(code_ptr, (tcg_insn_unit *)value); case R_AARCH64_CONDBR19: - reloc_pc19(code_ptr, (tcg_insn_unit *)value); - break; + return reloc_pc19(code_ptr, (tcg_insn_unit *)value); default: - tcg_abort(); + g_assert_not_reached(); } - return true; } #define TCG_CT_CONST_AIMM 0x100 @@ -1387,7 +1390,8 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOp opc = get_memop(oi); TCGMemOp size = opc & MO_SIZE; - reloc_pc19(lb->label_ptr[0], s->code_ptr); + bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); + tcg_debug_assert(ok); tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); @@ -1409,7 +1413,8 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOp opc = get_memop(oi); TCGMemOp size = opc & MO_SIZE; - reloc_pc19(lb->label_ptr[0], s->code_ptr); + bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); + tcg_debug_assert(ok); tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); From 43fabd30e2f411e8d70ff347902a7c8ed308233e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 13:01:57 -0800 Subject: [PATCH 15/33] tcg/arm: Return false on failure from patch_reloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This does require an extra two checks within the slow paths to replace the assert that we're moving. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.inc.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index deefa20fbf..49f57d655e 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -187,10 +187,14 @@ static const uint8_t tcg_cond_to_arm_cond[] = { [TCG_COND_GTU] = COND_HI, }; -static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) { ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; - *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); + if (offset == sextract32(offset, 0, 24)) { + *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); + return true; + } + return false; } static bool patch_reloc(tcg_insn_unit *code_ptr, int type, @@ -199,7 +203,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, tcg_debug_assert(addend == 0); if (type == R_ARM_PC24) { - reloc_pc24(code_ptr, (tcg_insn_unit *)value); + return reloc_pc24(code_ptr, (tcg_insn_unit *)value); } else if (type == R_ARM_PC13) { intptr_t diff = value - (uintptr_t)(code_ptr + 2); tcg_insn_unit insn = *code_ptr; @@ -213,7 +217,11 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, } else { int rd = extract32(insn, 12, 4); int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd; - assert(diff >= 0x1000 && diff < 0x100000); + + if (diff < 0x1000 || diff >= 0x100000) { + return false; + } + /* add rt, pc, #high */ *code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD | (TCG_REG_PC << 16) | (rt << 12) @@ -1372,7 +1380,8 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOp opc = get_memop(oi); void *func; - reloc_pc24(lb->label_ptr[0], s->code_ptr); + bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); + tcg_debug_assert(ok); argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1432,7 +1441,8 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - reloc_pc24(lb->label_ptr[0], s->code_ptr); + bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); + tcg_debug_assert(ok); argreg = TCG_REG_R0; argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); From d5132903518fadad579ef2de9e45fce98eefaa63 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 21:25:13 +0000 Subject: [PATCH 16/33] tcg/ppc: Return false on failure from patch_reloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reloc_pc{14,24}_val routines retain their asserts. Use these directly within the slow paths. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.inc.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 860b0d36e1..8c1cfdd7ac 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -193,9 +193,14 @@ static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) return disp & 0x3fffffc; } -static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) +static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) { - *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target); + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + if (in_range_b(disp)) { + *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc); + return true; + } + return false; } static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) @@ -205,9 +210,14 @@ static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) return disp & 0xfffc; } -static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) +static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) { - *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + if (disp == (int16_t) disp) { + *pc = (*pc & ~0xfffc) | (disp & 0xfffc); + return true; + } + return false; } /* parse target specific constraints */ @@ -524,11 +534,9 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, switch (type) { case R_PPC_REL14: - reloc_pc14(code_ptr, target); - break; + return reloc_pc14(code_ptr, target); case R_PPC_REL24: - reloc_pc24(code_ptr, target); - break; + return reloc_pc24(code_ptr, target); case R_PPC_ADDR16: /* We are abusing this relocation type. This points to a pair of insns, addis + load. If the displacement is small, we @@ -540,7 +548,9 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, } else { int16_t lo = value; int hi = value - lo; - assert(hi + lo == value); + if (hi + lo != value) { + return false; + } code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); } @@ -1638,7 +1648,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOp opc = get_memop(oi); TCGReg hi, lo, arg = TCG_REG_R3; - reloc_pc14(lb->label_ptr[0], s->code_ptr); + **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); @@ -1683,7 +1693,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOp s_bits = opc & MO_SIZE; TCGReg hi, lo, arg = TCG_REG_R3; - reloc_pc14(lb->label_ptr[0], s->code_ptr); + **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); From 55dfd8fedceb1311d9cdded1a0f94b2da91a387d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 21:41:51 +0000 Subject: [PATCH 17/33] tcg/s390x: Return false on failure from patch_reloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This does require an extra two checks within the slow paths to replace the assert that we're moving. Also add two checks within existing functions that lacked any kind of assert for out of range branch. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/s390/tcg-target.inc.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 68a4c60394..39ecf609a1 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -377,23 +377,29 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, switch (type) { case R_390_PC16DBL: - assert(pcrel2 == (int16_t)pcrel2); - tcg_patch16(code_ptr, pcrel2); + if (pcrel2 == (int16_t)pcrel2) { + tcg_patch16(code_ptr, pcrel2); + return true; + } break; case R_390_PC32DBL: - assert(pcrel2 == (int32_t)pcrel2); - tcg_patch32(code_ptr, pcrel2); + if (pcrel2 == (int32_t)pcrel2) { + tcg_patch32(code_ptr, pcrel2); + return true; + } break; case R_390_20: - assert(value == sextract64(value, 0, 20)); - old = *(uint32_t *)code_ptr & 0xf00000ff; - old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4); - tcg_patch32(code_ptr, old); + if (value == sextract64(value, 0, 20)) { + old = *(uint32_t *)code_ptr & 0xf00000ff; + old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4); + tcg_patch32(code_ptr, old); + return true; + } break; default: g_assert_not_reached(); } - return true; + return false; } /* parse target specific constraints */ @@ -1334,6 +1340,7 @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, if (l->has_value) { off = l->u.value_ptr - s->code_ptr; + tcg_debug_assert(off == (int16_t)off); } else { tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); } @@ -1350,6 +1357,7 @@ static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, if (l->has_value) { off = l->u.value_ptr - s->code_ptr; + tcg_debug_assert(off == (int16_t)off); } else { tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); } @@ -1615,7 +1623,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, 2); + bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, + (intptr_t)s->code_ptr, 2); + tcg_debug_assert(ok); tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); if (TARGET_LONG_BITS == 64) { @@ -1636,7 +1646,9 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOpIdx oi = lb->oi; TCGMemOp opc = get_memop(oi); - patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, 2); + bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, + (intptr_t)s->code_ptr, 2); + tcg_debug_assert(ok); tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); if (TARGET_LONG_BITS == 64) { From 1d21d95b6101786d44d3b4a12400eb80a1ecc647 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 20 Nov 2018 23:43:17 +0100 Subject: [PATCH 18/33] tcg/i386: Propagate is64 to tcg_out_qemu_ld_direct This helps preserve the invariant that all TCG_TYPE_I32 values are stored zero-extended in the 64-bit host registers. Reviewed-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 28192f4608..6bf4f84b20 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1883,10 +1883,11 @@ static inline void setup_guest_base_seg(void) { } static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg base, int index, intptr_t ofs, - int seg, TCGMemOp memop) + int seg, bool is64, TCGMemOp memop) { const TCGMemOp real_bswap = memop & MO_BSWAP; TCGMemOp bswap = real_bswap; + int rexw = is64 * P_REXW; int movop = OPC_MOVL_GvEv; if (have_movbe && real_bswap) { @@ -1900,7 +1901,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, base, index, 0, ofs); break; case MO_SB: - tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, + tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + seg, datalo, base, index, 0, ofs); break; case MO_UW: @@ -1920,9 +1921,9 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, base, index, 0, ofs); tcg_out_rolw_8(s, datalo); } - tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); + tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo); } else { - tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg, + tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg, datalo, base, index, 0, ofs); } break; @@ -2010,7 +2011,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) label_ptr, offsetof(CPUTLBEntry, addr_read)); /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc); /* Record the current context of a load into ldst label */ add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, @@ -2045,7 +2046,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) } tcg_out_qemu_ld_direct(s, datalo, datahi, - base, index, offset, seg, opc); + base, index, offset, seg, is64, opc); } #endif } From 3dbc8c61de4e0d0a2afe0897cda7ab28cd37a164 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 13 Dec 2018 18:42:52 -0600 Subject: [PATCH 19/33] tcg/i386: Propagate is64 to tcg_out_qemu_ld_slow_path This helps preserve the invariant that all TCG_TYPE_I32 values are stored zero-extended in the 64-bit host registers. Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 6bf4f84b20..695b406b4e 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1692,7 +1692,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, * Record the context of a call to the out of line helper code for the slow path * for a load or store, so that we can later generate the correct helper code */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64, + TCGMemOpIdx oi, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, tcg_insn_unit *raddr, @@ -1702,6 +1703,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->is_ld = is_ld; label->oi = oi; + label->type = is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32; label->datalo_reg = datalo; label->datahi_reg = datahi; label->addrlo_reg = addrlo; @@ -1722,6 +1724,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) TCGMemOp opc = get_memop(oi); TCGReg data_reg; tcg_insn_unit **label_ptr = &l->label_ptr[0]; + int rexw = (l->type == TCG_TYPE_I64 ? P_REXW : 0); /* resolve label address */ tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); @@ -1760,10 +1763,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) data_reg = l->datalo_reg; switch (opc & MO_SSIZE) { case MO_SB: - tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); + tcg_out_ext8s(s, data_reg, TCG_REG_EAX, rexw); break; case MO_SW: - tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); + tcg_out_ext16s(s, data_reg, TCG_REG_EAX, rexw); break; #if TCG_TARGET_REG_BITS == 64 case MO_SL: @@ -2014,7 +2017,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc); /* Record the current context of a load into ldst label */ - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, + add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else { @@ -2154,7 +2157,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); /* Record the current context of a store into ldst label */ - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, + add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else { From 75478279a0c1eafc7b69d5382356da138f58f1bd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Nov 2018 16:31:15 -0800 Subject: [PATCH 20/33] tcg/i386: Implement INDEX_op_extr{lh}_i64_i32 for 32-bit guests This preserves the invariant that all TCG_TYPE_I32 values are zero-extended in the 64-bit host register. Reviewed-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.h | 5 +++-- tcg/i386/tcg-target.inc.c | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 2441658865..c523d5f5e1 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -135,8 +135,9 @@ extern bool have_avx2; #define TCG_TARGET_HAS_direct_jump 1 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_extrl_i64_i32 0 -#define TCG_TARGET_HAS_extrh_i64_i32 0 +/* Keep target addresses zero-extended in a register. */ +#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32) +#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32) #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 695b406b4e..fe864e9ef9 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -2549,12 +2549,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: + case INDEX_op_extrl_i64_i32: tcg_out_ext32u(s, a0, a1); break; case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_ext32s(s, a0, a1); break; + case INDEX_op_extrh_i64_i32: + tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); + break; #endif OP_32_64(deposit): @@ -2918,6 +2922,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: + case INDEX_op_extrh_i64_i32: return &r_0; case INDEX_op_ext8s_i32: @@ -2933,6 +2938,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: case INDEX_op_extract_i32: case INDEX_op_extract_i64: case INDEX_op_sextract_i32: From 4810d96f03be4d3820563e3c6bf13dfc0627f205 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 3 Dec 2018 08:43:17 -0600 Subject: [PATCH 21/33] tcg/i386: Assume 32-bit values are zero-extended We now have an invariant that all TCG_TYPE_I32 values are zero-extended, which means that we do not need to extend them again during qemu_ld/st, either explicitly via a separate tcg_out_ext32u or implicitly via P_ADDR32. Reviewed-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 103 +++++++++++++++----------------------- 1 file changed, 40 insertions(+), 63 deletions(-) diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index fe864e9ef9..f7b548545a 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -309,13 +309,11 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ #define P_DATA16 0x400 /* 0x66 opcode prefix */ #if TCG_TARGET_REG_BITS == 64 -# define P_ADDR32 0x800 /* 0x67 opcode prefix */ # define P_REXW 0x1000 /* Set REX.W = 1 */ # define P_REXB_R 0x2000 /* REG field as byte register */ # define P_REXB_RM 0x4000 /* R/M field as byte register */ # define P_GS 0x8000 /* gs segment override */ #else -# define P_ADDR32 0 # define P_REXW 0 # define P_REXB_R 0 # define P_REXB_RM 0 @@ -528,9 +526,6 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) tcg_debug_assert((opc & P_REXW) == 0); tcg_out8(s, 0x66); } - if (opc & P_ADDR32) { - tcg_out8(s, 0x67); - } if (opc & P_SIMDF3) { tcg_out8(s, 0xf3); } else if (opc & P_SIMDF2) { @@ -1659,11 +1654,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); /* Prepare for both the fast path add of the tlb addend, and the slow - path function argument setup. There are two cases worth note: - For 32-bit guest and x86_64 host, MOVL zero-extends the guest address - before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ - copies the entire guest address for the slow path, while truncation - for the 32-bit host happens with the fastpath ADDL below. */ + path function argument setup. */ tcg_out_mov(s, ttype, r1, addrlo); /* jne slow_path */ @@ -2022,41 +2013,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #else { int32_t offset = guest_base; - TCGReg base = addrlo; int index = -1; int seg = 0; - /* For a 32-bit guest, the high 32 bits may contain garbage. - We can do this with the ADDR32 prefix if we're not using - a guest base, or when using segmentation. Otherwise we - need to zero-extend manually. */ + /* + * Recall we store 32-bit values zero-extended. No need for + * further manual extension or an addr32 (0x67) prefix. + */ if (guest_base == 0 || guest_base_flags) { seg = guest_base_flags; offset = 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - seg |= P_ADDR32; - } - } else if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L0, base); - base = TCG_REG_L0; - } - if (offset != guest_base) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); - index = TCG_REG_L1; - offset = 0; - } + } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); + index = TCG_REG_L1; + offset = 0; } tcg_out_qemu_ld_direct(s, datalo, datahi, - base, index, offset, seg, is64, opc); + addrlo, index, offset, seg, is64, opc); } #endif } static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, intptr_t ofs, int seg, - TCGMemOp memop) + TCGReg base, int index, intptr_t ofs, + int seg, TCGMemOp memop) { /* ??? Ideally we wouldn't need a scratch register. For user-only, we could perform the bswap twice to restore the original value @@ -2080,8 +2061,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); datalo = scratch; } - tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, - datalo, base, ofs); + tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, + datalo, base, index, 0, ofs); break; case MO_16: if (bswap) { @@ -2089,7 +2070,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_rolw_8(s, scratch); datalo = scratch; } - tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs); + tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo, + base, index, 0, ofs); break; case MO_32: if (bswap) { @@ -2097,7 +2079,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_bswap32(s, scratch); datalo = scratch; } - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); + tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); break; case MO_64: if (TCG_TARGET_REG_BITS == 64) { @@ -2106,22 +2088,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_bswap64(s, scratch); datalo = scratch; } - tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); + tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, + base, index, 0, ofs); } else if (bswap) { tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); tcg_out_bswap32(s, scratch); - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); + tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch, + base, index, 0, ofs); tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); tcg_out_bswap32(s, scratch); - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); + tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch, + base, index, 0, ofs + 4); } else { if (real_bswap) { int t = datalo; datalo = datahi; datahi = t; } - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); - tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4); + tcg_out_modrm_sib_offset(s, movop + seg, datalo, + base, index, 0, ofs); + tcg_out_modrm_sib_offset(s, movop + seg, datahi, + base, index, 0, ofs + 4); } break; default: @@ -2154,7 +2141,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) label_ptr, offsetof(CPUTLBEntry, addr_write)); /* TLB Hit. */ - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); /* Record the current context of a store into ldst label */ add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi, @@ -2162,35 +2149,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #else { int32_t offset = guest_base; - TCGReg base = addrlo; + int index = -1; int seg = 0; - /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ + /* + * Recall we store 32-bit values zero-extended. No need for + * further manual extension or an addr32 (0x67) prefix. + */ if (guest_base == 0 || guest_base_flags) { seg = guest_base_flags; offset = 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - seg |= P_ADDR32; - } - } else if (TCG_TARGET_REG_BITS == 64) { - /* ??? Note that we can't use the same SIB addressing scheme - as for loads, since we require L0 free for bswap. */ - if (offset != guest_base) { - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L0, base); - base = TCG_REG_L0; - } - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); - base = TCG_REG_L1; - offset = 0; - } else if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L1, base); - base = TCG_REG_L1; - } + } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) { + /* ??? Note that we require L0 free for bswap. */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); + index = TCG_REG_L1; + offset = 0; } - tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); + tcg_out_qemu_st_direct(s, datalo, datahi, + addrlo, index, offset, seg, opc); } #endif } From 913c2bddc2f29fc57ddf70f762a0a218cf17f3fc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 3 Dec 2018 09:22:57 -0600 Subject: [PATCH 22/33] tcg/i386: Precompute all guest_base parameters These values are constant between all qemu_ld/st invocations; there is no need to figure this out each time. If we cannot use a segment or an offset directly for guest_base, load the value into a register in the prologue. Reviewed-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 101 +++++++++++++++----------------------- 1 file changed, 40 insertions(+), 61 deletions(-) diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index f7b548545a..3fb2f4b971 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1857,22 +1857,31 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_push(s, retaddr); tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); } -#elif defined(__x86_64__) && defined(__linux__) -# include -# include - +#elif TCG_TARGET_REG_BITS == 32 +# define x86_guest_base_seg 0 +# define x86_guest_base_index -1 +# define x86_guest_base_offset guest_base +#else +static int x86_guest_base_seg; +static int x86_guest_base_index = -1; +static int32_t x86_guest_base_offset; +# if defined(__x86_64__) && defined(__linux__) +# include +# include int arch_prctl(int code, unsigned long addr); - -static int guest_base_flags; -static inline void setup_guest_base_seg(void) +static inline int setup_guest_base_seg(void) { if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { - guest_base_flags = P_GS; + return P_GS; } + return 0; } -#else -# define guest_base_flags 0 -static inline void setup_guest_base_seg(void) { } +# else +static inline int setup_guest_base_seg(void) +{ + return 0; +} +# endif #endif /* SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, @@ -2011,27 +2020,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else - { - int32_t offset = guest_base; - int index = -1; - int seg = 0; - - /* - * Recall we store 32-bit values zero-extended. No need for - * further manual extension or an addr32 (0x67) prefix. - */ - if (guest_base == 0 || guest_base_flags) { - seg = guest_base_flags; - offset = 0; - } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); - index = TCG_REG_L1; - offset = 0; - } - - tcg_out_qemu_ld_direct(s, datalo, datahi, - addrlo, index, offset, seg, is64, opc); - } + tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index, + x86_guest_base_offset, x86_guest_base_seg, + is64, opc); #endif } @@ -2147,28 +2138,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else - { - int32_t offset = guest_base; - int index = -1; - int seg = 0; - - /* - * Recall we store 32-bit values zero-extended. No need for - * further manual extension or an addr32 (0x67) prefix. - */ - if (guest_base == 0 || guest_base_flags) { - seg = guest_base_flags; - offset = 0; - } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) { - /* ??? Note that we require L0 free for bswap. */ - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); - index = TCG_REG_L1; - offset = 0; - } - - tcg_out_qemu_st_direct(s, datalo, datahi, - addrlo, index, offset, seg, opc); - } + tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index, + x86_guest_base_offset, x86_guest_base_seg, opc); #endif } @@ -3415,6 +3386,21 @@ static void tcg_target_qemu_prologue(TCGContext *s) (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 + stack_addend); #else +# if !defined(CONFIG_SOFTMMU) && TCG_TARGET_REG_BITS == 64 + if (guest_base) { + int seg = setup_guest_base_seg(); + if (seg != 0) { + x86_guest_base_seg = seg; + } else if (guest_base == (int32_t)guest_base) { + x86_guest_base_offset = guest_base; + } else { + /* Choose R12 because, as a base, it requires a SIB byte. */ + x86_guest_base_index = TCG_REG_R12; + tcg_out_mov(s, TCG_TYPE_PTR, x86_guest_base_index, guest_base); + tcg_regset_set_reg(s->reserved_regs, x86_guest_base_index); + } + } +# endif tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_addi(s, TCG_REG_ESP, -stack_addend); /* jmp *tb. */ @@ -3440,13 +3426,6 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_pop(s, tcg_target_callee_save_regs[i]); } tcg_out_opc(s, OPC_RET, 0, 0, 0); - -#if !defined(CONFIG_SOFTMMU) - /* Try to set up a segment register to point to guest_base. */ - if (guest_base) { - setup_guest_base_seg(); - } -#endif } static void tcg_out_nop_fill(tcg_insn_unit *p, int count) From 5785c17f319106d6709f4ffc05de888cb0a412f3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 3 Dec 2018 09:25:10 -0600 Subject: [PATCH 23/33] tcg/i386: Add setup_guest_base_seg for FreeBSD Reviewed-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 3fb2f4b971..c21c3272f2 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1876,6 +1876,15 @@ static inline int setup_guest_base_seg(void) } return 0; } +# elif defined (__FreeBSD__) || defined (__FreeBSD_kernel__) +# include +static inline int setup_guest_base_seg(void) +{ + if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) { + return P_GS; + } + return 0; +} # else static inline int setup_guest_base_seg(void) { From a686dc71d89b1d7934becd95c843aa1375cdb7e7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 20 Nov 2018 20:38:16 +0100 Subject: [PATCH 24/33] tcg: Clean up generic bswap32 Based on the only current user, Sparc: New code uses 1 constant that takes 2 insns to create, plus 8. Old code used 2 constants that took 2 insns to create, plus 9. The result is a new total of 10 vs an old total of 13. Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 54 ++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 7a8015c5a9..dd33e1b713 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1012,24 +1012,24 @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) if (TCG_TARGET_HAS_bswap32_i32) { tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg); } else { - TCGv_i32 t0, t1; - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_const_i32(0x00ff00ff); - tcg_gen_shli_i32(t0, arg, 24); + /* arg = abcd */ + tcg_gen_shri_i32(t0, arg, 8); /* t0 = .abc */ + tcg_gen_and_i32(t1, arg, t2); /* t1 = .b.d */ + tcg_gen_and_i32(t0, t0, t2); /* t0 = .a.c */ + tcg_gen_shli_i32(t1, t1, 8); /* t1 = b.d. */ + tcg_gen_or_i32(ret, t0, t1); /* ret = badc */ - tcg_gen_andi_i32(t1, arg, 0x0000ff00); - tcg_gen_shli_i32(t1, t1, 8); - tcg_gen_or_i32(t0, t0, t1); + tcg_gen_shri_i32(t0, ret, 16); /* t0 = ..ba */ + tcg_gen_shli_i32(t1, ret, 16); /* t1 = dc.. */ + tcg_gen_or_i32(ret, t0, t1); /* ret = dcba */ - tcg_gen_shri_i32(t1, arg, 8); - tcg_gen_andi_i32(t1, t1, 0x0000ff00); - tcg_gen_or_i32(t0, t0, t1); - - tcg_gen_shri_i32(t1, arg, 24); - tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); } } @@ -1638,25 +1638,25 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) } else if (TCG_TARGET_HAS_bswap32_i64) { tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_const_i64(0x00ff00ff); - tcg_gen_shli_i64(t0, arg, 24); - tcg_gen_ext32u_i64(t0, t0); + /* arg = ....abcd */ + tcg_gen_shri_i64(t0, arg, 8); /* t0 = .....abc */ + tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */ + tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */ + tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */ + tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */ - tcg_gen_andi_i64(t1, arg, 0x0000ff00); - tcg_gen_shli_i64(t1, t1, 8); - tcg_gen_or_i64(t0, t0, t1); + tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */ + tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */ + tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */ + tcg_gen_or_i64(ret, t0, t1); /* ret = ....dcba */ - tcg_gen_shri_i64(t1, arg, 8); - tcg_gen_andi_i64(t1, t1, 0x0000ff00); - tcg_gen_or_i64(t0, t0, t1); - - tcg_gen_shri_i64(t1, arg, 24); - tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); } } From 9e821eab0ab708add35fa0446d880086e845ee3e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 20 Nov 2018 21:05:03 +0100 Subject: [PATCH 25/33] tcg: Clean up generic bswap64 Based on the only current user, Sparc: New code uses 2 constants that take 2 insns to load from constant pool, plus 13. Old code used 6 constants that took 1 or 2 insns to create, plus 21. The result is a new total of 17 vs an old total of 29. Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index dd33e1b713..d7a30665a6 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1678,37 +1678,30 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); - tcg_gen_shli_i64(t0, arg, 56); + /* arg = abcdefgh */ + tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull); + tcg_gen_shri_i64(t0, arg, 8); /* t0 = .abcdefg */ + tcg_gen_and_i64(t1, arg, t2); /* t1 = .b.d.f.h */ + tcg_gen_and_i64(t0, t0, t2); /* t0 = .a.c.e.g */ + tcg_gen_shli_i64(t1, t1, 8); /* t1 = b.d.f.h. */ + tcg_gen_or_i64(ret, t0, t1); /* ret = badcfehg */ - tcg_gen_andi_i64(t1, arg, 0x0000ff00); - tcg_gen_shli_i64(t1, t1, 40); - tcg_gen_or_i64(t0, t0, t1); + tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull); + tcg_gen_shri_i64(t0, ret, 16); /* t0 = ..badcfe */ + tcg_gen_and_i64(t1, ret, t2); /* t1 = ..dc..hg */ + tcg_gen_and_i64(t0, t0, t2); /* t0 = ..ba..fe */ + tcg_gen_shli_i64(t1, t1, 16); /* t1 = dc..hg.. */ + tcg_gen_or_i64(ret, t0, t1); /* ret = dcbahgfe */ - tcg_gen_andi_i64(t1, arg, 0x00ff0000); - tcg_gen_shli_i64(t1, t1, 24); - tcg_gen_or_i64(t0, t0, t1); + tcg_gen_shri_i64(t0, ret, 32); /* t0 = ....dcba */ + tcg_gen_shli_i64(t1, ret, 32); /* t1 = hgfe.... */ + tcg_gen_or_i64(ret, t0, t1); /* ret = hgfedcba */ - tcg_gen_andi_i64(t1, arg, 0xff000000); - tcg_gen_shli_i64(t1, t1, 8); - tcg_gen_or_i64(t0, t0, t1); - - tcg_gen_shri_i64(t1, arg, 8); - tcg_gen_andi_i64(t1, t1, 0xff000000); - tcg_gen_or_i64(t0, t0, t1); - - tcg_gen_shri_i64(t1, arg, 24); - tcg_gen_andi_i64(t1, t1, 0x00ff0000); - tcg_gen_or_i64(t0, t0, t1); - - tcg_gen_shri_i64(t1, arg, 40); - tcg_gen_andi_i64(t1, t1, 0x0000ff00); - tcg_gen_or_i64(t0, t0, t1); - - tcg_gen_shri_i64(t1, arg, 56); - tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); } } From 6498594c8eda83c5f5915afc34bd03396f8de6df Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 20 Nov 2018 08:53:34 +0100 Subject: [PATCH 26/33] tcg/optimize: Optimize bswap Somehow we forgot these operations, once upon a time. This will allow immediate stores to have their bswap optimized away. Signed-off-by: Richard Henderson --- tcg/optimize.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tcg/optimize.c b/tcg/optimize.c index 5dbe11c3c8..6b98ec13e6 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -353,6 +353,15 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) CASE_OP_32_64(ext16u): return (uint16_t)x; + CASE_OP_32_64(bswap16): + return bswap16(x); + + CASE_OP_32_64(bswap32): + return bswap32(x); + + case INDEX_op_bswap64_i64: + return bswap64(x); + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: return (int32_t)x; @@ -1105,6 +1114,9 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext16s): CASE_OP_32_64(ext16u): CASE_OP_32_64(ctpop): + CASE_OP_32_64(bswap16): + CASE_OP_32_64(bswap32): + case INDEX_op_bswap64_i64: case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: From e1dcf3529d0797b25bb49a20e94b62eb93e7276a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 20 Nov 2018 08:37:42 +0100 Subject: [PATCH 27/33] tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP For now, defined universally as true, since we previously required backends to implement swapped memory operations. Future patches may now remove that support where it is onerous. Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 1 + tcg/arm/tcg-target.h | 1 + tcg/i386/tcg-target.h | 2 + tcg/mips/tcg-target.h | 1 + tcg/ppc/tcg-target.h | 1 + tcg/s390/tcg-target.h | 1 + tcg/sparc/tcg-target.h | 1 + tcg/tcg-op.c | 118 ++++++++++++++++++++++++++++++++++++++- tcg/tci/tcg-target.h | 2 + 9 files changed, 126 insertions(+), 2 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 9aea1d1771..f966a4fcb3 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -137,6 +137,7 @@ typedef enum { #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_DEFAULT_MO (0) +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 94b3578c55..16172f73a3 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -131,6 +131,7 @@ enum { }; #define TCG_TARGET_DEFAULT_MO (0) +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index c523d5f5e1..f378d29568 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -220,6 +220,8 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 + #ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS #endif diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index a8222476f0..5cb8672470 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -203,6 +203,7 @@ extern bool use_mips32r2_instructions; #endif #define TCG_TARGET_DEFAULT_MO (0) +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index be52ad1d2e..52c1bb04b1 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -128,6 +128,7 @@ void flush_icache_range(uintptr_t start, uintptr_t stop); void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 #ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 6f2b06a7d1..853ed6e7aa 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -135,6 +135,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_CALL_STACK_OFFSET 160 #define TCG_TARGET_EXTEND_ARGS 1 +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index d8339bf010..a0ed2a3342 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -164,6 +164,7 @@ extern bool use_vis3_instructions; #define TCG_AREG0 TCG_REG_I0 #define TCG_TARGET_DEFAULT_MO (0) +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index d7a30665a6..38652db32c 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2694,25 +2694,78 @@ static void tcg_gen_req_mo(TCGBar type) void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { + TCGMemOp orig_memop; + tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); memop = tcg_canonicalize_memop(memop, 0, 0); trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, trace_mem_get_info(memop, 0)); + + orig_memop = memop; + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { + memop &= ~MO_BSWAP; + /* The bswap primitive requires zero-extended input. */ + if ((memop & MO_SSIZE) == MO_SW) { + memop &= ~MO_SIGN; + } + } + gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx); + + if ((orig_memop ^ memop) & MO_BSWAP) { + switch (orig_memop & MO_SIZE) { + case MO_16: + tcg_gen_bswap16_i32(val, val); + if (orig_memop & MO_SIGN) { + tcg_gen_ext16s_i32(val, val); + } + break; + case MO_32: + tcg_gen_bswap32_i32(val, val); + break; + default: + g_assert_not_reached(); + } + } } void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { + TCGv_i32 swap = NULL; + tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); memop = tcg_canonicalize_memop(memop, 0, 1); trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, trace_mem_get_info(memop, 1)); + + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { + swap = tcg_temp_new_i32(); + switch (memop & MO_SIZE) { + case MO_16: + tcg_gen_ext16u_i32(swap, val); + tcg_gen_bswap16_i32(swap, swap); + break; + case MO_32: + tcg_gen_bswap32_i32(swap, val); + break; + default: + g_assert_not_reached(); + } + val = swap; + memop &= ~MO_BSWAP; + } + gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); + + if (swap) { + tcg_temp_free_i32(swap); + } } void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) { - tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + TCGMemOp orig_memop; + if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop); if (memop & MO_SIGN) { @@ -2723,24 +2776,85 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) return; } + tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); memop = tcg_canonicalize_memop(memop, 1, 0); trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, trace_mem_get_info(memop, 0)); + + orig_memop = memop; + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { + memop &= ~MO_BSWAP; + /* The bswap primitive requires zero-extended input. */ + if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) { + memop &= ~MO_SIGN; + } + } + gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx); + + if ((orig_memop ^ memop) & MO_BSWAP) { + switch (orig_memop & MO_SIZE) { + case MO_16: + tcg_gen_bswap16_i64(val, val); + if (orig_memop & MO_SIGN) { + tcg_gen_ext16s_i64(val, val); + } + break; + case MO_32: + tcg_gen_bswap32_i64(val, val); + if (orig_memop & MO_SIGN) { + tcg_gen_ext32s_i64(val, val); + } + break; + case MO_64: + tcg_gen_bswap64_i64(val, val); + break; + default: + g_assert_not_reached(); + } + } } void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) { - tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + TCGv_i64 swap = NULL; + if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop); return; } + tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); memop = tcg_canonicalize_memop(memop, 1, 1); trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, trace_mem_get_info(memop, 1)); + + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { + swap = tcg_temp_new_i64(); + switch (memop & MO_SIZE) { + case MO_16: + tcg_gen_ext16u_i64(swap, val); + tcg_gen_bswap16_i64(swap, swap); + break; + case MO_32: + tcg_gen_ext32u_i64(swap, val); + tcg_gen_bswap32_i64(swap, swap); + break; + case MO_64: + tcg_gen_bswap64_i64(swap, val); + break; + default: + g_assert_not_reached(); + } + val = swap; + memop &= ~MO_BSWAP; + } + gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx); + + if (swap) { + tcg_temp_free_i64(swap); + } } static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc) diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 26140d78cb..086f34e69a 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -198,6 +198,8 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop) We prefer consistency across hosts on this. */ #define TCG_TARGET_DEFAULT_MO (0) +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 + static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, uintptr_t addr) { From 161dec9d1b03552e78e5728186eae9cf1dfbe035 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 12 Dec 2018 20:58:11 +0000 Subject: [PATCH 28/33] tcg/mips: Improve the add2/sub2 command to use TCG_TARGET_REG_BITS Instead of hard coding 31 for the shift right use TCG_TARGET_REG_BITS - 1. Signed-off-by: Alistair Francis Message-Id: <7dfbddf7014a595150aa79011ddb342c3cc17ec3.1544648105.git.alistair.francis@wdc.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.inc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index a06ff257fa..be0bc92e8e 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -792,7 +792,7 @@ static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); } else if (rl == al && rl == bl) { - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, TCG_TARGET_REG_BITS - 1); tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); } else { tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); From ac1043f6d607aaac206c8aac42bc32f634f59395 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Sun, 9 Dec 2018 14:37:19 -0500 Subject: [PATCH 29/33] tcg: Drop nargs from tcg_op_insert_{before,after} It's unused since 75e8b9b7aa0b95a761b9add7e2f09248b101a392. Signed-off-by: Emilio G. Cota Message-Id: <20181209193749.12277-9-cota@braap.org> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- tcg/optimize.c | 4 ++-- tcg/tcg.c | 10 ++++------ tcg/tcg.h | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 6b98ec13e6..01e80c3e46 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1261,7 +1261,7 @@ void tcg_optimize(TCGContext *s) uint64_t a = ((uint64_t)ah << 32) | al; uint64_t b = ((uint64_t)bh << 32) | bl; TCGArg rl, rh; - TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); + TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); if (opc == INDEX_op_add2_i32) { a += b; @@ -1283,7 +1283,7 @@ void tcg_optimize(TCGContext *s) uint32_t b = arg_info(op->args[3])->val; uint64_t r = (uint64_t)a * b; TCGArg rl, rh; - TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); + TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); rl = op->args[0]; rh = op->args[1]; diff --git a/tcg/tcg.c b/tcg/tcg.c index 54f1272187..963cb37892 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2205,16 +2205,14 @@ TCGOp *tcg_emit_op(TCGOpcode opc) return op; } -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, int nargs) +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) { TCGOp *new_op = tcg_op_alloc(opc); QTAILQ_INSERT_BEFORE(old_op, new_op, link); return new_op; } -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, int nargs) +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) { TCGOp *new_op = tcg_op_alloc(opc); QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); @@ -2552,7 +2550,7 @@ static bool liveness_pass_2(TCGContext *s) TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_ld_i32 : INDEX_op_ld_i64); - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + TCGOp *lop = tcg_op_insert_before(s, op, lopc); lop->args[0] = temp_arg(dir_ts); lop->args[1] = temp_arg(arg_ts->mem_base); @@ -2621,7 +2619,7 @@ static bool liveness_pass_2(TCGContext *s) TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_st_i32 : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, sopc); sop->args[0] = temp_arg(dir_ts); sop->args[1] = temp_arg(arg_ts->mem_base); diff --git a/tcg/tcg.h b/tcg/tcg.h index f9a56a9520..ade692fdf5 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -1071,8 +1071,8 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args); TCGOp *tcg_emit_op(TCGOpcode opc); void tcg_op_remove(TCGContext *s, TCGOp *op); -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc); +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); void tcg_optimize(TCGContext *s); From e132fde25f309bc560d8a29a764142654d44d996 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Fri, 23 Nov 2018 17:39:04 -0500 Subject: [PATCH 30/33] qht-bench: document -p flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Which we forgot to do in bd224fce60 ("qht-bench: add -p flag to precompute hash values", 2018-09-26). Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- tests/qht-bench.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qht-bench.c b/tests/qht-bench.c index 2089e2bed1..636750d39f 100644 --- a/tests/qht-bench.c +++ b/tests/qht-bench.c @@ -72,6 +72,7 @@ static const char commands_string[] = " -n = number of threads\n" "\n" " -o = offset at which keys start\n" + " -p = precompute hashes\n" "\n" " -g = set -s,-k,-K,-l,-r to the same value\n" " -s = initial size hint\n" From c971d8fa73ff92996d751fa87d90f220cf3c8194 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Sat, 20 Oct 2018 18:46:28 -0400 Subject: [PATCH 31/33] exec: introduce qemu_xxhash{2,4,5,6,7} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before moving them all to include/qemu/xxhash.h. Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- include/exec/tb-hash-xx.h | 41 +++++++++++++++++++++++++++++---------- include/exec/tb-hash.h | 2 +- tests/qht-bench.c | 2 +- util/qsp.c | 12 ++++++------ 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h index 747a9a612c..98ce4b628a 100644 --- a/include/exec/tb-hash-xx.h +++ b/include/exec/tb-hash-xx.h @@ -42,23 +42,23 @@ #define PRIME32_4 668265263U #define PRIME32_5 374761393U -#define TB_HASH_XX_SEED 1 +#define QEMU_XXHASH_SEED 1 /* * xxhash32, customized for input variables that are not guaranteed to be * contiguous in memory. */ static inline uint32_t -tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g) +qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) { - uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2; - uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2; - uint32_t v3 = TB_HASH_XX_SEED + 0; - uint32_t v4 = TB_HASH_XX_SEED - PRIME32_1; - uint32_t a = a0 >> 32; - uint32_t b = a0; - uint32_t c = b0 >> 32; - uint32_t d = b0; + uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2; + uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2; + uint32_t v3 = QEMU_XXHASH_SEED + 0; + uint32_t v4 = QEMU_XXHASH_SEED - PRIME32_1; + uint32_t a = ab >> 32; + uint32_t b = ab; + uint32_t c = cd >> 32; + uint32_t d = cd; uint32_t h32; v1 += a * PRIME32_2; @@ -98,4 +98,25 @@ tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g) return h32; } +static inline uint32_t qemu_xxhash2(uint64_t ab) +{ + return qemu_xxhash7(ab, 0, 0, 0, 0); +} + +static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd) +{ + return qemu_xxhash7(ab, cd, 0, 0, 0); +} + +static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e) +{ + return qemu_xxhash7(ab, cd, e, 0, 0); +} + +static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e, + uint32_t f) +{ + return qemu_xxhash7(ab, cd, e, f, 0); +} + #endif /* EXEC_TB_HASH_XX_H */ diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index 0526c4f678..731ba4c272 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -61,7 +61,7 @@ static inline uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, uint32_t cf_mask, uint32_t trace_vcpu_dstate) { - return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); + return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); } #endif diff --git a/tests/qht-bench.c b/tests/qht-bench.c index 636750d39f..0278f4da04 100644 --- a/tests/qht-bench.c +++ b/tests/qht-bench.c @@ -105,7 +105,7 @@ static bool is_equal(const void *ap, const void *bp) static uint32_t h(unsigned long v) { - return tb_hash_func7(v, 0, 0, 0, 0); + return qemu_xxhash2(v); } static uint32_t hval(unsigned long v) diff --git a/util/qsp.c b/util/qsp.c index a848b09c6d..dc29c41fde 100644 --- a/util/qsp.c +++ b/util/qsp.c @@ -135,13 +135,13 @@ QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl; * without it we still get a pretty unique hash. */ static inline -uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t a) +uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab) { - uint64_t b = (uint64_t)(uintptr_t)callsite->obj; + uint64_t cd = (uint64_t)(uintptr_t)callsite->obj; uint32_t e = callsite->line; uint32_t f = callsite->type; - return tb_hash_func7(a, b, e, f, 0); + return qemu_xxhash6(ab, cd, e, f); } static inline @@ -169,11 +169,11 @@ static uint32_t qsp_entry_no_thread_hash(const QSPEntry *entry) static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry) { const QSPCallSite *callsite = entry->callsite; - uint64_t a = g_str_hash(callsite->file); - uint64_t b = callsite->line; + uint64_t ab = g_str_hash(callsite->file); + uint64_t cd = callsite->line; uint32_t e = callsite->type; - return tb_hash_func7(a, b, e, 0, 0); + return qemu_xxhash5(ab, cd, e); } static bool qsp_callsite_cmp(const void *ap, const void *bp) From fe656e3185fa10973d43492c867643e80fa433cd Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Sat, 20 Oct 2018 18:49:53 -0400 Subject: [PATCH 32/33] include: move exec/tb-hash-xx.h to qemu/xxhash.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- include/exec/tb-hash.h | 2 +- include/{exec/tb-hash-xx.h => qemu/xxhash.h} | 6 +++--- tests/qht-bench.c | 2 +- util/qsp.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) rename include/{exec/tb-hash-xx.h => qemu/xxhash.h} (97%) diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index 731ba4c272..4f3a37d927 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -20,7 +20,7 @@ #ifndef EXEC_TB_HASH_H #define EXEC_TB_HASH_H -#include "exec/tb-hash-xx.h" +#include "qemu/xxhash.h" #ifdef CONFIG_SOFTMMU diff --git a/include/exec/tb-hash-xx.h b/include/qemu/xxhash.h similarity index 97% rename from include/exec/tb-hash-xx.h rename to include/qemu/xxhash.h index 98ce4b628a..fe35dde328 100644 --- a/include/exec/tb-hash-xx.h +++ b/include/qemu/xxhash.h @@ -31,8 +31,8 @@ * - xxHash source repository : https://github.com/Cyan4973/xxHash */ -#ifndef EXEC_TB_HASH_XX_H -#define EXEC_TB_HASH_XX_H +#ifndef QEMU_XXHASH_H +#define QEMU_XXHASH_H #include "qemu/bitops.h" @@ -119,4 +119,4 @@ static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e, return qemu_xxhash7(ab, cd, e, f, 0); } -#endif /* EXEC_TB_HASH_XX_H */ +#endif /* QEMU_XXHASH_H */ diff --git a/tests/qht-bench.c b/tests/qht-bench.c index 0278f4da04..ab4e708180 100644 --- a/tests/qht-bench.c +++ b/tests/qht-bench.c @@ -9,7 +9,7 @@ #include "qemu/atomic.h" #include "qemu/qht.h" #include "qemu/rcu.h" -#include "exec/tb-hash-xx.h" +#include "qemu/xxhash.h" struct thread_stats { size_t rd; diff --git a/util/qsp.c b/util/qsp.c index dc29c41fde..410f1ba004 100644 --- a/util/qsp.c +++ b/util/qsp.c @@ -61,7 +61,7 @@ #include "qemu/timer.h" #include "qemu/qht.h" #include "qemu/rcu.h" -#include "exec/tb-hash-xx.h" +#include "qemu/xxhash.h" enum QSPType { QSP_MUTEX, From b7c2cd08a6f68010ad27c9c0bf2fde02fb743a0e Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Fri, 23 Nov 2018 17:41:43 -0500 Subject: [PATCH 33/33] xxhash: match output against the original xxhash32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the order in which we extract a/b and c/d to match the output of the upstream xxhash32. Tested with: https://github.com/cota/xxhash/tree/qemu Reviewed-by: Alex Bennée Tested-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- include/qemu/xxhash.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h index fe35dde328..076f1f6054 100644 --- a/include/qemu/xxhash.h +++ b/include/qemu/xxhash.h @@ -55,10 +55,10 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2; uint32_t v3 = QEMU_XXHASH_SEED + 0; uint32_t v4 = QEMU_XXHASH_SEED - PRIME32_1; - uint32_t a = ab >> 32; - uint32_t b = ab; - uint32_t c = cd >> 32; - uint32_t d = cd; + uint32_t a = ab; + uint32_t b = ab >> 32; + uint32_t c = cd; + uint32_t d = cd >> 32; uint32_t h32; v1 += a * PRIME32_2;