From e2e7168a214b0ed98dc357bba96816486a289762 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 30 Aug 2020 08:57:20 -0700 Subject: [PATCH 01/11] tcg: Adjust simd_desc size encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With larger vector sizes, it turns out oprsz == maxsz, and we only need to represent mismatch for oprsz <= 32. We do, however, need to represent larger oprsz and do so without reducing SIMD_DATA_BITS. Reduce the size of the oprsz field and increase the maxsz field. Steal the oprsz value of 24 to indicate equality with maxsz. Tested-by: Frank Chang Reviewed-by: Frank Chang Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/tcg/tcg-gvec-desc.h | 38 ++++++++++++++++++++++++------------- tcg/tcg-op-gvec.c | 35 ++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h index 0224ac3e78..704bd86454 100644 --- a/include/tcg/tcg-gvec-desc.h +++ b/include/tcg/tcg-gvec-desc.h @@ -20,29 +20,41 @@ #ifndef TCG_TCG_GVEC_DESC_H #define TCG_TCG_GVEC_DESC_H -/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */ -#define SIMD_OPRSZ_SHIFT 0 -#define SIMD_OPRSZ_BITS 5 +/* + * This configuration allows MAXSZ to represent 2048 bytes, and + * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32. + * + * Encode this with: + * 0, 1, 3 -> 8, 16, 32 + * 2 -> maxsz + * + * This steals the input that would otherwise map to 24 to match maxsz. + */ +#define SIMD_MAXSZ_SHIFT 0 +#define SIMD_MAXSZ_BITS 8 -#define SIMD_MAXSZ_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS) -#define SIMD_MAXSZ_BITS 5 +#define SIMD_OPRSZ_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS) +#define SIMD_OPRSZ_BITS 2 -#define SIMD_DATA_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS) +#define SIMD_DATA_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS) #define SIMD_DATA_BITS (32 - SIMD_DATA_SHIFT) /* Create a descriptor from components. */ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data); -/* Extract the operation size from a descriptor. */ -static inline intptr_t simd_oprsz(uint32_t desc) -{ - return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8; -} - /* Extract the max vector size from a descriptor. */ static inline intptr_t simd_maxsz(uint32_t desc) { - return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8; + return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8; +} + +/* Extract the operation size from a descriptor. */ +static inline intptr_t simd_oprsz(uint32_t desc) +{ + uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS); + intptr_t o = f * 8 + 8; + intptr_t m = simd_maxsz(desc); + return f == 2 ? m : o; } /* Extract the operation-specific data from a descriptor. */ diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 7ebd9e8298..ddbe06b71a 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -37,11 +37,21 @@ static const TCGOpcode vecop_list_empty[1] = { 0 }; of the operand offsets so that we can check them all at once. */ static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs) { - uint32_t opr_align = oprsz >= 16 ? 15 : 7; - uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7; - tcg_debug_assert(oprsz > 0); - tcg_debug_assert(oprsz <= maxsz); - tcg_debug_assert((oprsz & opr_align) == 0); + uint32_t max_align; + + switch (oprsz) { + case 8: + case 16: + case 32: + tcg_debug_assert(oprsz <= maxsz); + break; + default: + tcg_debug_assert(oprsz == maxsz); + break; + } + tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS)); + + max_align = maxsz >= 16 ? 15 : 7; tcg_debug_assert((maxsz & max_align) == 0); tcg_debug_assert((ofs & max_align) == 0); } @@ -77,12 +87,21 @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data) { uint32_t desc = 0; - assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS)); - assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS)); - assert(data == sextract32(data, 0, SIMD_DATA_BITS)); + check_size_align(oprsz, maxsz, 0); + tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS)); oprsz = (oprsz / 8) - 1; maxsz = (maxsz / 8) - 1; + + /* + * We have just asserted in check_size_align that either + * oprsz is {8,16,32} or matches maxsz. Encode the final + * case with '2', as that would otherwise map to 24. + */ + if (oprsz == maxsz) { + oprsz = 2; + } + desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz); desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz); desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data); From 9be0d08019465b38e2f1a605960961a491430c21 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 3 Sep 2020 15:19:03 -0700 Subject: [PATCH 02/11] tcg: Drop union from TCGArgConstraint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The union is unused; let "regs" appear in the main structure without the "u.regs" wrapping. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/tcg/tcg.h | 4 +--- tcg/aarch64/tcg-target.c.inc | 14 +++++++------- tcg/arm/tcg-target.c.inc | 26 +++++++++++++------------- tcg/i386/tcg-target.c.inc | 26 +++++++++++++------------- tcg/mips/tcg-target.c.inc | 18 +++++++++--------- tcg/ppc/tcg-target.c.inc | 24 ++++++++++++------------ tcg/riscv/tcg-target.c.inc | 14 +++++++------- tcg/s390/tcg-target.c.inc | 18 +++++++++--------- tcg/sparc/tcg-target.c.inc | 16 ++++++++-------- tcg/tcg.c | 22 +++++++++++----------- tcg/tci/tcg-target.c.inc | 2 +- 11 files changed, 91 insertions(+), 93 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 53ce94c2c5..a5a0ea4ada 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -985,9 +985,7 @@ void tcg_dump_op_count(void); typedef struct TCGArgConstraint { uint16_t ct; uint8_t alias_index; - union { - TCGRegSet regs; - } u; + TCGRegSet regs; } TCGArgConstraint; #define TCG_MAX_OP_ARGS 16 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 2607fe4ab9..dbe5c6a14c 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -129,22 +129,22 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, switch (*ct_str++) { case 'r': /* general registers */ ct->ct |= TCG_CT_REG; - ct->u.regs |= 0xffffffffu; + ct->regs |= 0xffffffffu; break; case 'w': /* advsimd registers */ ct->ct |= TCG_CT_REG; - ct->u.regs |= 0xffffffff00000000ull; + ct->regs |= 0xffffffff00000000ull; break; case 'l': /* qemu_ld / qemu_st address, data_reg */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffffu; + ct->regs = 0xffffffffu; #ifdef CONFIG_SOFTMMU /* x0 and x1 will be overwritten when reading the tlb entry, and x2, and x3 for helper args, better to avoid using them. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); + tcg_regset_reset_reg(ct->regs, TCG_REG_X0); + tcg_regset_reset_reg(ct->regs, TCG_REG_X1); + tcg_regset_reset_reg(ct->regs, TCG_REG_X2); + tcg_regset_reset_reg(ct->regs, TCG_REG_X3); #endif break; case 'A': /* Valid for arithmetic immediate (positive or negative). */ diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index bc1e1b5a71..978eb1dd70 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -254,40 +254,40 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, case 'r': ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffff; + ct->regs = 0xffff; break; /* qemu_ld address */ case 'l': ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffff; + ct->regs = 0xffff; #ifdef CONFIG_SOFTMMU /* r0-r2,lr will be overwritten when reading the tlb entry, so don't use these. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); + tcg_regset_reset_reg(ct->regs, TCG_REG_R0); + tcg_regset_reset_reg(ct->regs, TCG_REG_R1); + tcg_regset_reset_reg(ct->regs, TCG_REG_R2); + tcg_regset_reset_reg(ct->regs, TCG_REG_R3); + tcg_regset_reset_reg(ct->regs, TCG_REG_R14); #endif break; /* qemu_st address & data */ case 's': ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffff; + ct->regs = 0xffff; /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) and r0-r1 doing the byte swapping, so don't use these. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); + tcg_regset_reset_reg(ct->regs, TCG_REG_R0); + tcg_regset_reset_reg(ct->regs, TCG_REG_R1); #if defined(CONFIG_SOFTMMU) /* Avoid clashes with registers being used for helper args */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); + tcg_regset_reset_reg(ct->regs, TCG_REG_R2); #if TARGET_LONG_BITS == 64 /* Avoid clashes with registers being used for helper args */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); + tcg_regset_reset_reg(ct->regs, TCG_REG_R3); #endif - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); + tcg_regset_reset_reg(ct->regs, TCG_REG_R14); #endif break; diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 0155c0691c..8661ec3393 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -209,42 +209,42 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, switch(*ct_str++) { case 'a': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); + tcg_regset_set_reg(ct->regs, TCG_REG_EAX); break; case 'b': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); + tcg_regset_set_reg(ct->regs, TCG_REG_EBX); break; case 'c': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); + tcg_regset_set_reg(ct->regs, TCG_REG_ECX); break; case 'd': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); + tcg_regset_set_reg(ct->regs, TCG_REG_EDX); break; case 'S': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); + tcg_regset_set_reg(ct->regs, TCG_REG_ESI); break; case 'D': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); + tcg_regset_set_reg(ct->regs, TCG_REG_EDI); break; case 'q': /* A register that can be used as a byte operand. */ ct->ct |= TCG_CT_REG; - ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf; + ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf; break; case 'Q': /* A register with an addressable second byte (e.g. %ah). */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xf; + ct->regs = 0xf; break; case 'r': /* A general register. */ ct->ct |= TCG_CT_REG; - ct->u.regs |= ALL_GENERAL_REGS; + ct->regs |= ALL_GENERAL_REGS; break; case 'W': /* With TZCNT/LZCNT, we can have operand-size as an input. */ @@ -253,15 +253,15 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, case 'x': /* A vector register. */ ct->ct |= TCG_CT_REG; - ct->u.regs |= ALL_VECTOR_REGS; + ct->regs |= ALL_VECTOR_REGS; break; /* qemu_ld/st address constraint */ case 'L': ct->ct |= TCG_CT_REG; - ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff; - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); + ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff; + tcg_regset_reset_reg(ct->regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->regs, TCG_REG_L1); break; case 'e': diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 7aa2073520..aae4fd187b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -196,28 +196,28 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, switch(*ct_str++) { case 'r': ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; + ct->regs = 0xffffffff; break; case 'L': /* qemu_ld input arg constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); + ct->regs = 0xffffffff; + tcg_regset_reset_reg(ct->regs, TCG_REG_A0); #if defined(CONFIG_SOFTMMU) if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + tcg_regset_reset_reg(ct->regs, TCG_REG_A2); } #endif break; case 'S': /* qemu_st constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); + ct->regs = 0xffffffff; + tcg_regset_reset_reg(ct->regs, TCG_REG_A0); #if defined(CONFIG_SOFTMMU) if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); + tcg_regset_reset_reg(ct->regs, TCG_REG_A2); + tcg_regset_reset_reg(ct->regs, TCG_REG_A3); } else { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); + tcg_regset_reset_reg(ct->regs, TCG_REG_A1); } #endif break; diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 7cb40b0466..0bd947b788 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -225,33 +225,33 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, switch (*ct_str++) { case 'A': case 'B': case 'C': case 'D': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); + tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A'); break; case 'r': ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; + ct->regs = 0xffffffff; break; case 'v': ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff00000000ull; + ct->regs = 0xffffffff00000000ull; break; case 'L': /* qemu_ld constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); + ct->regs = 0xffffffff; + tcg_regset_reset_reg(ct->regs, TCG_REG_R3); #ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); + tcg_regset_reset_reg(ct->regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->regs, TCG_REG_R5); #endif break; case 'S': /* qemu_st constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); + ct->regs = 0xffffffff; + tcg_regset_reset_reg(ct->regs, TCG_REG_R3); #ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); + tcg_regset_reset_reg(ct->regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->regs, TCG_REG_R5); + tcg_regset_reset_reg(ct->regs, TCG_REG_R6); #endif break; case 'I': diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 2dfb07e247..0a69839adb 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -138,19 +138,19 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, switch (*ct_str++) { case 'r': ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; + ct->regs = 0xffffffff; break; case 'L': /* qemu_ld/qemu_st constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; + ct->regs = 0xffffffff; /* qemu_ld/qemu_st uses TCG_REG_TMP0 */ #if defined(CONFIG_SOFTMMU) - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]); + tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[0]); + tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[1]); + tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[2]); + tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[3]); + tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[4]); #endif break; case 'I': diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc index 985115acfb..9cd266a2d0 100644 --- a/tcg/s390/tcg-target.c.inc +++ b/tcg/s390/tcg-target.c.inc @@ -409,24 +409,24 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, switch (*ct_str++) { case 'r': /* all registers */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffff; + ct->regs = 0xffff; break; case 'L': /* qemu_ld/st constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffff; - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + ct->regs = 0xffff; + tcg_regset_reset_reg(ct->regs, TCG_REG_R2); + tcg_regset_reset_reg(ct->regs, TCG_REG_R3); + tcg_regset_reset_reg(ct->regs, TCG_REG_R4); break; case 'a': /* force R2 for division */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0; - tcg_regset_set_reg(ct->u.regs, TCG_REG_R2); + ct->regs = 0; + tcg_regset_set_reg(ct->regs, TCG_REG_R2); break; case 'b': /* force R3 for division */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0; - tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); + ct->regs = 0; + tcg_regset_set_reg(ct->regs, TCG_REG_R3); break; case 'A': ct->ct |= TCG_CT_CONST_S33; diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 40bc12290c..e2de749af7 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -326,27 +326,27 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, switch (*ct_str++) { case 'r': ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; + ct->regs = 0xffffffff; break; case 'R': ct->ct |= TCG_CT_REG; - ct->u.regs = ALL_64; + ct->regs = ALL_64; break; case 'A': /* qemu_ld/st address constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff; + ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff; reserve_helpers: - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); + tcg_regset_reset_reg(ct->regs, TCG_REG_O0); + tcg_regset_reset_reg(ct->regs, TCG_REG_O1); + tcg_regset_reset_reg(ct->regs, TCG_REG_O2); break; case 's': /* qemu_st data 32-bit constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = 0xffffffff; + ct->regs = 0xffffffff; goto reserve_helpers; case 'S': /* qemu_st data 64-bit constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = ALL_64; + ct->regs = ALL_64; goto reserve_helpers; case 'I': ct->ct |= TCG_CT_CONST_S11; diff --git a/tcg/tcg.c b/tcg/tcg.c index 164a141d74..8e0df353ec 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2210,7 +2210,7 @@ static int get_constraint_priority(const TCGOpDef *def, int k) return 0; n = 0; for(i = 0; i < TCG_TARGET_NB_REGS; i++) { - if (tcg_regset_test_reg(arg_ct->u.regs, i)) + if (tcg_regset_test_reg(arg_ct->regs, i)) n++; } } @@ -2268,7 +2268,7 @@ static void process_op_defs(TCGContext *s) /* Incomplete TCGTargetOpDef entry. */ tcg_debug_assert(ct_str != NULL); - def->args_ct[i].u.regs = 0; + def->args_ct[i].regs = 0; def->args_ct[i].ct = 0; while (*ct_str != '\0') { switch(*ct_str) { @@ -2855,13 +2855,13 @@ static void liveness_pass_1(TCGContext *s) pset = la_temp_pref(ts); set = *pset; - set &= ct->u.regs; + set &= ct->regs; if (ct->ct & TCG_CT_IALIAS) { set &= op->output_pref[ct->alias_index]; } /* If the combination is not possible, restart. */ if (set == 0) { - set = ct->u.regs; + set = ct->regs; } *pset = set; } @@ -3551,8 +3551,8 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) return; } - dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs; - dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs; + dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; + dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; /* Allocate the output register now. */ if (ots->val_type != TEMP_VAL_REG) { @@ -3706,10 +3706,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } } - temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs); + temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); reg = ts->reg; - if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { + if (tcg_regset_test_reg(arg_ct->regs, reg)) { /* nothing to do : the constraint is satisfied */ } else { allocate_in_reg: @@ -3717,7 +3717,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) and move the temporary register into it */ temp_load(s, ts, tcg_target_available_regs[ts->type], i_allocated_regs, 0); - reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, + reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, o_preferred_regs, ts->indirect_base); if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { /* @@ -3772,11 +3772,11 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) && !const_args[arg_ct->alias_index]) { reg = new_args[arg_ct->alias_index]; } else if (arg_ct->ct & TCG_CT_NEWREG) { - reg = tcg_reg_alloc(s, arg_ct->u.regs, + reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs | o_allocated_regs, op->output_pref[k], ts->indirect_base); } else { - reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, + reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, op->output_pref[k], ts->indirect_base); } tcg_regset_set_reg(o_allocated_regs, reg); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 992d50cb1e..a7215f346f 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -393,7 +393,7 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, case 'L': /* qemu_ld constraint */ case 'S': /* qemu_st constraint */ ct->ct |= TCG_CT_REG; - ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1; + ct->regs = BIT(TCG_TARGET_NB_REGS) - 1; break; default: return NULL; From 66792f90f14fef18b25a168922877a367ecdca05 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 4 Apr 2019 09:37:38 +0700 Subject: [PATCH 03/11] tcg: Move sorted_args into TCGArgConstraint.sort_index This uses an existing hole in the TCGArgConstraint structure and will be convenient for keeping the data in one place. Signed-off-by: Richard Henderson --- include/tcg/tcg.h | 2 +- tcg/tcg.c | 35 +++++++++++++++++------------------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index a5a0ea4ada..63955ac85b 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -985,6 +985,7 @@ void tcg_dump_op_count(void); typedef struct TCGArgConstraint { uint16_t ct; uint8_t alias_index; + uint8_t sort_index; TCGRegSet regs; } TCGArgConstraint; @@ -1015,7 +1016,6 @@ typedef struct TCGOpDef { uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; uint8_t flags; TCGArgConstraint *args_ct; - int *sorted_args; #if defined(CONFIG_DEBUG_TCG) int used; #endif diff --git a/tcg/tcg.c b/tcg/tcg.c index 8e0df353ec..da01e39fe4 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -944,7 +944,6 @@ void tcg_context_init(TCGContext *s) int op, total_args, n, i; TCGOpDef *def; TCGArgConstraint *args_ct; - int *sorted_args; TCGTemp *ts; memset(s, 0, sizeof(*s)); @@ -960,14 +959,11 @@ void tcg_context_init(TCGContext *s) } args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); - sorted_args = g_malloc(sizeof(int) * total_args); for(op = 0; op < NB_OPS; op++) { def = &tcg_op_defs[op]; def->args_ct = args_ct; - def->sorted_args = sorted_args; n = def->nb_iargs + def->nb_oargs; - sorted_args += n; args_ct += n; } @@ -2220,20 +2216,23 @@ static int get_constraint_priority(const TCGOpDef *def, int k) /* sort from highest priority to lowest */ static void sort_constraints(TCGOpDef *def, int start, int n) { - int i, j, p1, p2, tmp; + int i, j; + TCGArgConstraint *a = def->args_ct; - for(i = 0; i < n; i++) - def->sorted_args[start + i] = start + i; - if (n <= 1) + for (i = 0; i < n; i++) { + a[start + i].sort_index = start + i; + } + if (n <= 1) { return; - for(i = 0; i < n - 1; i++) { - for(j = i + 1; j < n; j++) { - p1 = get_constraint_priority(def, def->sorted_args[start + i]); - p2 = get_constraint_priority(def, def->sorted_args[start + j]); + } + for (i = 0; i < n - 1; i++) { + for (j = i + 1; j < n; j++) { + int p1 = get_constraint_priority(def, a[start + i].sort_index); + int p2 = get_constraint_priority(def, a[start + j].sort_index); if (p1 < p2) { - tmp = def->sorted_args[start + i]; - def->sorted_args[start + i] = def->sorted_args[start + j]; - def->sorted_args[start + j] = tmp; + int tmp = a[start + i].sort_index; + a[start + i].sort_index = a[start + j].sort_index; + a[start + j].sort_index = tmp; } } } @@ -3659,7 +3658,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) for (k = 0; k < nb_iargs; k++) { TCGRegSet i_preferred_regs, o_preferred_regs; - i = def->sorted_args[nb_oargs + k]; + i = def->args_ct[nb_oargs + k].sort_index; arg = op->args[i]; arg_ct = &def->args_ct[i]; ts = arg_temp(arg); @@ -3695,7 +3694,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) int k2, i2; reg = ts->reg; for (k2 = 0 ; k2 < k ; k2++) { - i2 = def->sorted_args[nb_oargs + k2]; + i2 = def->args_ct[nb_oargs + k2].sort_index; if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && reg == new_args[i2]) { goto allocate_in_reg; @@ -3760,7 +3759,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* satisfy the output constraints */ for(k = 0; k < nb_oargs; k++) { - i = def->sorted_args[k]; + i = def->args_ct[k].sort_index; arg = op->args[i]; arg_ct = &def->args_ct[i]; ts = arg_temp(arg); From 74a117906b87ff9220e4baae5a7431d6f4eadd45 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 3 Sep 2020 15:56:24 -0700 Subject: [PATCH 04/11] tcg: Remove TCG_CT_REG This wasn't actually used for anything, really. All variable operands must accept registers, and which are indicated by the set in TCGArgConstraint.regs. Signed-off-by: Richard Henderson --- include/tcg/tcg.h | 1 - tcg/aarch64/tcg-target.c.inc | 3 --- tcg/arm/tcg-target.c.inc | 3 --- tcg/i386/tcg-target.c.inc | 11 ----------- tcg/mips/tcg-target.c.inc | 3 --- tcg/ppc/tcg-target.c.inc | 5 ----- tcg/riscv/tcg-target.c.inc | 2 -- tcg/s390/tcg-target.c.inc | 4 ---- tcg/sparc/tcg-target.c.inc | 5 ----- tcg/tcg.c | 15 ++++----------- tcg/tci/tcg-target.c.inc | 1 - 11 files changed, 4 insertions(+), 49 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 63955ac85b..3168315bb8 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -979,7 +979,6 @@ void tcg_dump_op_count(void); #define TCG_CT_ALIAS 0x80 #define TCG_CT_IALIAS 0x40 #define TCG_CT_NEWREG 0x20 /* output requires a new register */ -#define TCG_CT_REG 0x01 #define TCG_CT_CONST 0x02 /* any constant of register size */ typedef struct TCGArgConstraint { diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index dbe5c6a14c..26f71cb599 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -128,15 +128,12 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, { switch (*ct_str++) { case 'r': /* general registers */ - ct->ct |= TCG_CT_REG; ct->regs |= 0xffffffffu; break; case 'w': /* advsimd registers */ - ct->ct |= TCG_CT_REG; ct->regs |= 0xffffffff00000000ull; break; case 'l': /* qemu_ld / qemu_st address, data_reg */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffffu; #ifdef CONFIG_SOFTMMU /* x0 and x1 will be overwritten when reading the tlb entry, diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 978eb1dd70..62c37a954b 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -253,13 +253,11 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, break; case 'r': - ct->ct |= TCG_CT_REG; ct->regs = 0xffff; break; /* qemu_ld address */ case 'l': - ct->ct |= TCG_CT_REG; ct->regs = 0xffff; #ifdef CONFIG_SOFTMMU /* r0-r2,lr will be overwritten when reading the tlb entry, @@ -274,7 +272,6 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, /* qemu_st address & data */ case 's': - ct->ct |= TCG_CT_REG; ct->regs = 0xffff; /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) and r0-r1 doing the byte swapping, so don't use these. */ diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 8661ec3393..2f696074ab 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -208,42 +208,33 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, { switch(*ct_str++) { case 'a': - ct->ct |= TCG_CT_REG; tcg_regset_set_reg(ct->regs, TCG_REG_EAX); break; case 'b': - ct->ct |= TCG_CT_REG; tcg_regset_set_reg(ct->regs, TCG_REG_EBX); break; case 'c': - ct->ct |= TCG_CT_REG; tcg_regset_set_reg(ct->regs, TCG_REG_ECX); break; case 'd': - ct->ct |= TCG_CT_REG; tcg_regset_set_reg(ct->regs, TCG_REG_EDX); break; case 'S': - ct->ct |= TCG_CT_REG; tcg_regset_set_reg(ct->regs, TCG_REG_ESI); break; case 'D': - ct->ct |= TCG_CT_REG; tcg_regset_set_reg(ct->regs, TCG_REG_EDI); break; case 'q': /* A register that can be used as a byte operand. */ - ct->ct |= TCG_CT_REG; ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf; break; case 'Q': /* A register with an addressable second byte (e.g. %ah). */ - ct->ct |= TCG_CT_REG; ct->regs = 0xf; break; case 'r': /* A general register. */ - ct->ct |= TCG_CT_REG; ct->regs |= ALL_GENERAL_REGS; break; case 'W': @@ -252,13 +243,11 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, break; case 'x': /* A vector register. */ - ct->ct |= TCG_CT_REG; ct->regs |= ALL_VECTOR_REGS; break; /* qemu_ld/st address constraint */ case 'L': - ct->ct |= TCG_CT_REG; ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff; tcg_regset_reset_reg(ct->regs, TCG_REG_L0); tcg_regset_reset_reg(ct->regs, TCG_REG_L1); diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index aae4fd187b..41be574e89 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -195,11 +195,9 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, { switch(*ct_str++) { case 'r': - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; break; case 'L': /* qemu_ld input arg constraint */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; tcg_regset_reset_reg(ct->regs, TCG_REG_A0); #if defined(CONFIG_SOFTMMU) @@ -209,7 +207,6 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, #endif break; case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; tcg_regset_reset_reg(ct->regs, TCG_REG_A0); #if defined(CONFIG_SOFTMMU) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 0bd947b788..18ee989f95 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -224,19 +224,15 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, { switch (*ct_str++) { case 'A': case 'B': case 'C': case 'D': - ct->ct |= TCG_CT_REG; tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A'); break; case 'r': - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; break; case 'v': - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff00000000ull; break; case 'L': /* qemu_ld constraint */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; tcg_regset_reset_reg(ct->regs, TCG_REG_R3); #ifdef CONFIG_SOFTMMU @@ -245,7 +241,6 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, #endif break; case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; tcg_regset_reset_reg(ct->regs, TCG_REG_R3); #ifdef CONFIG_SOFTMMU diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 0a69839adb..d536f3ccc1 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -137,12 +137,10 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, { switch (*ct_str++) { case 'r': - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; break; case 'L': /* qemu_ld/qemu_st constraint */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; /* qemu_ld/qemu_st uses TCG_REG_TMP0 */ #if defined(CONFIG_SOFTMMU) diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc index 9cd266a2d0..c5e096449b 100644 --- a/tcg/s390/tcg-target.c.inc +++ b/tcg/s390/tcg-target.c.inc @@ -408,23 +408,19 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, { switch (*ct_str++) { case 'r': /* all registers */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffff; break; case 'L': /* qemu_ld/st constraint */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffff; tcg_regset_reset_reg(ct->regs, TCG_REG_R2); tcg_regset_reset_reg(ct->regs, TCG_REG_R3); tcg_regset_reset_reg(ct->regs, TCG_REG_R4); break; case 'a': /* force R2 for division */ - ct->ct |= TCG_CT_REG; ct->regs = 0; tcg_regset_set_reg(ct->regs, TCG_REG_R2); break; case 'b': /* force R3 for division */ - ct->ct |= TCG_CT_REG; ct->regs = 0; tcg_regset_set_reg(ct->regs, TCG_REG_R3); break; diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index e2de749af7..6775bd30fc 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -325,15 +325,12 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, { switch (*ct_str++) { case 'r': - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; break; case 'R': - ct->ct |= TCG_CT_REG; ct->regs = ALL_64; break; case 'A': /* qemu_ld/st address constraint */ - ct->ct |= TCG_CT_REG; ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff; reserve_helpers: tcg_regset_reset_reg(ct->regs, TCG_REG_O0); @@ -341,11 +338,9 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, tcg_regset_reset_reg(ct->regs, TCG_REG_O2); break; case 's': /* qemu_st data 32-bit constraint */ - ct->ct |= TCG_CT_REG; ct->regs = 0xffffffff; goto reserve_helpers; case 'S': /* qemu_st data 64-bit constraint */ - ct->ct |= TCG_CT_REG; ct->regs = ALL_64; goto reserve_helpers; case 'I': diff --git a/tcg/tcg.c b/tcg/tcg.c index da01e39fe4..55b2fc3ae3 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2194,21 +2194,14 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) /* we give more priority to constraints with less registers */ static int get_constraint_priority(const TCGOpDef *def, int k) { - const TCGArgConstraint *arg_ct; + const TCGArgConstraint *arg_ct = &def->args_ct[k]; + int n; - int i, n; - arg_ct = &def->args_ct[k]; if (arg_ct->ct & TCG_CT_ALIAS) { /* an alias is equivalent to a single register */ n = 1; } else { - if (!(arg_ct->ct & TCG_CT_REG)) - return 0; - n = 0; - for(i = 0; i < TCG_TARGET_NB_REGS; i++) { - if (tcg_regset_test_reg(arg_ct->regs, i)) - n++; - } + n = ctpop64(arg_ct->regs); } return TCG_TARGET_NB_REGS - n + 1; } @@ -2276,7 +2269,7 @@ static void process_op_defs(TCGContext *s) int oarg = *ct_str - '0'; tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); tcg_debug_assert(oarg < def->nb_oargs); - tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG); + tcg_debug_assert(def->args_ct[oarg].regs != 0); /* TCG_CT_ALIAS is for the output arguments. The input is tagged with TCG_CT_IALIAS. */ def->args_ct[i] = def->args_ct[oarg]; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index a7215f346f..231b9b1775 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -392,7 +392,6 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, case 'r': case 'L': /* qemu_ld constraint */ case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; ct->regs = BIT(TCG_TARGET_NB_REGS) - 1; break; default: From bc2b17e6ea582ef3ade2bdca750de269c674c915 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 4 Apr 2019 19:34:19 -0700 Subject: [PATCH 05/11] tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields These are easier to set and test when they have their own fields. Reduce the size of alias_index and sort_index to 4 bits, which is sufficient for TCG_MAX_OP_ARGS. This leaves only the bits indicating constants within the ct field. Move all initialization to allocation time, rather than init individual fields in process_op_defs. Signed-off-by: Richard Henderson --- include/tcg/tcg.h | 14 +++++++------- tcg/tcg.c | 28 ++++++++++++---------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 3168315bb8..e8629b58c8 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -976,15 +976,15 @@ int64_t tcg_cpu_exec_time(void); void tcg_dump_info(void); void tcg_dump_op_count(void); -#define TCG_CT_ALIAS 0x80 -#define TCG_CT_IALIAS 0x40 -#define TCG_CT_NEWREG 0x20 /* output requires a new register */ -#define TCG_CT_CONST 0x02 /* any constant of register size */ +#define TCG_CT_CONST 1 /* any constant of register size */ typedef struct TCGArgConstraint { - uint16_t ct; - uint8_t alias_index; - uint8_t sort_index; + unsigned ct : 16; + unsigned alias_index : 4; + unsigned sort_index : 4; + bool oalias : 1; + bool ialias : 1; + bool newreg : 1; TCGRegSet regs; } TCGArgConstraint; diff --git a/tcg/tcg.c b/tcg/tcg.c index 55b2fc3ae3..a8c28440e2 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -958,7 +958,7 @@ void tcg_context_init(TCGContext *s) total_args += n; } - args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); + args_ct = g_new0(TCGArgConstraint, total_args); for(op = 0; op < NB_OPS; op++) { def = &tcg_op_defs[op]; @@ -2197,7 +2197,7 @@ static int get_constraint_priority(const TCGOpDef *def, int k) const TCGArgConstraint *arg_ct = &def->args_ct[k]; int n; - if (arg_ct->ct & TCG_CT_ALIAS) { + if (arg_ct->oalias) { /* an alias is equivalent to a single register */ n = 1; } else { @@ -2260,8 +2260,6 @@ static void process_op_defs(TCGContext *s) /* Incomplete TCGTargetOpDef entry. */ tcg_debug_assert(ct_str != NULL); - def->args_ct[i].regs = 0; - def->args_ct[i].ct = 0; while (*ct_str != '\0') { switch(*ct_str) { case '0' ... '9': @@ -2270,18 +2268,18 @@ static void process_op_defs(TCGContext *s) tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); tcg_debug_assert(oarg < def->nb_oargs); tcg_debug_assert(def->args_ct[oarg].regs != 0); - /* TCG_CT_ALIAS is for the output arguments. - The input is tagged with TCG_CT_IALIAS. */ def->args_ct[i] = def->args_ct[oarg]; - def->args_ct[oarg].ct |= TCG_CT_ALIAS; + /* The output sets oalias. */ + def->args_ct[oarg].oalias = true; def->args_ct[oarg].alias_index = i; - def->args_ct[i].ct |= TCG_CT_IALIAS; + /* The input sets ialias. */ + def->args_ct[i].ialias = true; def->args_ct[i].alias_index = oarg; } ct_str++; break; case '&': - def->args_ct[i].ct |= TCG_CT_NEWREG; + def->args_ct[i].newreg = true; ct_str++; break; case 'i': @@ -2848,7 +2846,7 @@ static void liveness_pass_1(TCGContext *s) set = *pset; set &= ct->regs; - if (ct->ct & TCG_CT_IALIAS) { + if (ct->ialias) { set &= op->output_pref[ct->alias_index]; } /* If the combination is not possible, restart. */ @@ -3665,7 +3663,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } i_preferred_regs = o_preferred_regs = 0; - if (arg_ct->ct & TCG_CT_IALIAS) { + if (arg_ct->ialias) { o_preferred_regs = op->output_pref[arg_ct->alias_index]; if (ts->fixed_reg) { /* if fixed register, we must allocate a new register @@ -3688,8 +3686,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) reg = ts->reg; for (k2 = 0 ; k2 < k ; k2++) { i2 = def->args_ct[nb_oargs + k2].sort_index; - if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && - reg == new_args[i2]) { + if (def->args_ct[i2].ialias && reg == new_args[i2]) { goto allocate_in_reg; } } @@ -3760,10 +3757,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* ENV should not be modified. */ tcg_debug_assert(!ts->fixed_reg); - if ((arg_ct->ct & TCG_CT_ALIAS) - && !const_args[arg_ct->alias_index]) { + if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { reg = new_args[arg_ct->alias_index]; - } else if (arg_ct->ct & TCG_CT_NEWREG) { + } else if (arg_ct->newreg) { reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs | o_allocated_regs, op->output_pref[k], ts->indirect_base); From 70cad3c400bce4e1d364b81c09ac656e6166a573 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 5 Apr 2019 12:02:05 +0700 Subject: [PATCH 06/11] tcg: Remove TCGOpDef.used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last user of this field disappeared in f69d277ece4. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/tcg/tcg.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index e8629b58c8..8804a8c4a2 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -1015,9 +1015,6 @@ typedef struct TCGOpDef { uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; uint8_t flags; TCGArgConstraint *args_ct; -#if defined(CONFIG_DEBUG_TCG) - int used; -#endif } TCGOpDef; extern TCGOpDef tcg_op_defs[]; From f80d09b599a5e0fd7f44653f23b04104cb703f7a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 6 Sep 2020 17:38:32 -0700 Subject: [PATCH 07/11] tcg/i386: Fix dupi for avx2 32-bit hosts The previous change wrongly stated that 32-bit avx2 should have used VPBROADCASTW. But that's a 16-bit broadcast and we want a 32-bit broadcast. Fixes: 7b60ef3264e Cc: qemu-stable@nongnu.org Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 2f696074ab..d8797ed398 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -958,7 +958,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); } else { if (have_avx2) { - tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret); + tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret); } else { tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); } From a5b30d950c42b14bc9da24d1e68add6538d23336 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 6 Sep 2020 18:27:41 -0700 Subject: [PATCH 08/11] tcg: Fix generation of dupi_vec for 32-bit host The definition of INDEX_op_dupi_vec is that it operates on units of tcg_target_ulong -- in this case 32 bits. It does not work to use this for a uint64_t value that happens to be small enough to fit in tcg_target_ulong. Fixes: d2fd745fe8b Fixes: db432672dc5 Cc: qemu-stable@nongnu.org Signed-off-by: Richard Henderson --- tcg/tcg-op-vec.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c index ed6fb55fe1..cdbf11c573 100644 --- a/tcg/tcg-op-vec.c +++ b/tcg/tcg-op-vec.c @@ -252,10 +252,10 @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m) void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a) { - if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) { - do_dupi_vec(r, MO_32, a); - } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) { + if (TCG_TARGET_REG_BITS == 64) { do_dupi_vec(r, MO_64, a); + } else if (a == dup_const(MO_32, a)) { + do_dupi_vec(r, MO_32, a); } else { TCGv_i64 c = tcg_const_i64(a); tcg_gen_dup_i64_vec(MO_64, r, c); @@ -280,7 +280,11 @@ void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a) void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) { - do_dupi_vec(r, MO_REG, dup_const(vece, a)); + if (vece == MO_64) { + tcg_gen_dup64i_vec(r, a); + } else { + do_dupi_vec(r, MO_REG, dup_const(vece, a)); + } } void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) From 1dc4fe70128db05237a00eda6eb15e2b44deb31f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 5 Sep 2020 17:03:35 -0700 Subject: [PATCH 09/11] tcg/optimize: Fold dup2_vec When the two arguments are identical, this can be reduced to dup_vec or to mov_vec from a tcg_constant_vec. Signed-off-by: Richard Henderson --- tcg/optimize.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tcg/optimize.c b/tcg/optimize.c index 53aa8e5329..220f4601d5 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1109,6 +1109,21 @@ void tcg_optimize(TCGContext *s) } goto do_default; + case INDEX_op_dup2_vec: + assert(TCG_TARGET_REG_BITS == 32); + if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { + tmp = arg_info(op->args[1])->val; + if (tmp == arg_info(op->args[2])->val) { + tcg_opt_gen_movi(s, op, op->args[0], tmp); + break; + } + } else if (args_are_copies(op->args[1], op->args[2])) { + op->opc = INDEX_op_dup_vec; + TCGOP_VECE(op) = MO_32; + nb_iargs = 1; + } + goto do_default; + CASE_OP_32_64(not): CASE_OP_32_64(neg): CASE_OP_32_64(ext8s): From cae5d53b9e72d7a1e43cebeb36471d77a16c6e43 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 5 Sep 2020 12:01:03 -0700 Subject: [PATCH 10/11] tcg: Remove TCG_TARGET_HAS_cmp_vec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cmp_vec opcode is mandatory; this symbol is unused. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 1 - tcg/i386/tcg-target.h | 1 - tcg/ppc/tcg-target.h | 1 - 3 files changed, 3 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 9bc2a5ecbe..663dd0b95e 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -139,7 +139,6 @@ typedef enum { #define TCG_TARGET_HAS_shi_vec 1 #define TCG_TARGET_HAS_shs_vec 0 #define TCG_TARGET_HAS_shv_vec 1 -#define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index d2baf796b0..abd4ac7fc0 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -189,7 +189,6 @@ extern bool have_avx2; #define TCG_TARGET_HAS_shi_vec 1 #define TCG_TARGET_HAS_shs_vec 1 #define TCG_TARGET_HAS_shv_vec have_avx2 -#define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index aee38157a2..be10363956 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -169,7 +169,6 @@ extern bool have_vsx; #define TCG_TARGET_HAS_shi_vec 0 #define TCG_TARGET_HAS_shs_vec 0 #define TCG_TARGET_HAS_shv_vec 1 -#define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 From 62475e9d007d83db4d0a6ccebcda8914f392e9c9 Mon Sep 17 00:00:00 2001 From: Kele Huang Date: Fri, 2 Oct 2020 16:14:20 +0800 Subject: [PATCH 11/11] accel/tcg: Fix computing of is_write for MIPS Detect all MIPS store instructions in cpu_signal_handler for all available MIPS versions, and set is_write if encountering such store instructions. This fixed the error while dealing with self-modified code for MIPS. Reviewed-by: Richard Henderson Signed-off-by: Kele Huang Signed-off-by: Xu Zou Message-Id: <20201002081420.10814-1-kele.hwang@gmail.com> [rth: Use uintptr_t for pc to fix n32 build error.] Signed-off-by: Richard Henderson --- accel/tcg/user-exec.c | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 5c96819ded..4ebe25461a 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -702,16 +702,51 @@ int cpu_signal_handler(int host_signum, void *pinfo, #elif defined(__mips__) +#if defined(__misp16) || defined(__mips_micromips) +#error "Unsupported encoding" +#endif + int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; ucontext_t *uc = puc; - greg_t pc = uc->uc_mcontext.pc; - int is_write; + uintptr_t pc = uc->uc_mcontext.pc; + uint32_t insn = *(uint32_t *)pc; + int is_write = 0; + + /* Detect all store instructions at program counter. */ + switch((insn >> 26) & 077) { + case 050: /* SB */ + case 051: /* SH */ + case 052: /* SWL */ + case 053: /* SW */ + case 054: /* SDL */ + case 055: /* SDR */ + case 056: /* SWR */ + case 070: /* SC */ + case 071: /* SWC1 */ + case 074: /* SCD */ + case 075: /* SDC1 */ + case 077: /* SD */ +#if !defined(__mips_isa_rev) || __mips_isa_rev < 6 + case 072: /* SWC2 */ + case 076: /* SDC2 */ +#endif + is_write = 1; + break; + case 023: /* COP1X */ + /* Required in all versions of MIPS64 since + MIPS64r1 and subsequent versions of MIPS32r2. */ + switch (insn & 077) { + case 010: /* SWXC1 */ + case 011: /* SDXC1 */ + case 015: /* SUXC1 */ + is_write = 1; + } + break; + } - /* XXX: compute is_write */ - is_write = 0; return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); }