From 6d3ef04893bdea3e7aa08be3cce5141902836a31 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 15 Dec 2020 11:47:59 -0600 Subject: [PATCH 1/3] tcg: Use memset for large vector byte replication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In f47db80cc07, we handled odd-sized tail clearing for the case of hosts that have vector operations, but did not handle the case of hosts that do not have vector ops. This was ok until e2e7168a214b, which changed the encoding of simd_desc such that the odd sizes are impossible. Add memset as a tcg helper, and use that for all out-of-line byte stores to vectors. This includes, but is not limited to, the tail clearing operation in question. Cc: qemu-stable@nongnu.org Buglink: https://bugs.launchpad.net/bugs/1907817 Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/tcg-runtime.h | 11 +++++++++++ include/exec/helper-proto.h | 4 ++++ tcg/tcg-op-gvec.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index 4eda24e63a..2e36d6eb0c 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -28,6 +28,17 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env) DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) +#ifndef IN_HELPER_PROTO +/* + * Pass calls to memset directly to libc, without a thunk in qemu. + * Do not re-declare memset, especially since we fudge the type here; + * we assume sizeof(void *) == sizeof(size_t), which is true for + * all supported hosts. + */ +#define helper_memset memset +DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr) +#endif /* IN_HELPER_PROTO */ + #ifdef CONFIG_SOFTMMU DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG, diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h index a0a8d9aa46..659f9298e8 100644 --- a/include/exec/helper-proto.h +++ b/include/exec/helper-proto.h @@ -35,11 +35,15 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \ dh_ctype(t7)); +#define IN_HELPER_PROTO + #include "helper.h" #include "trace/generated-helpers.h" #include "tcg-runtime.h" #include "plugin-helpers.h" +#undef IN_HELPER_PROTO + #undef DEF_HELPER_FLAGS_0 #undef DEF_HELPER_FLAGS_1 #undef DEF_HELPER_FLAGS_2 diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index ddbe06b71a..1a41dfa908 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -547,6 +547,9 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, in_c = dup_const(vece, in_c); if (in_c == 0) { oprsz = maxsz; + vece = MO_8; + } else if (in_c == dup_const(MO_8, in_c)) { + vece = MO_8; } } @@ -628,6 +631,35 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, /* Otherwise implement out of line. */ t_ptr = tcg_temp_new_ptr(); tcg_gen_addi_ptr(t_ptr, cpu_env, dofs); + + /* + * This may be expand_clr for the tail of an operation, e.g. + * oprsz == 8 && maxsz == 64. The size of the clear is misaligned + * wrt simd_desc and will assert. Simply pass all replicated byte + * stores through to memset. + */ + if (oprsz == maxsz && vece == MO_8) { + TCGv_ptr t_size = tcg_const_ptr(oprsz); + TCGv_i32 t_val; + + if (in_32) { + t_val = in_32; + } else if (in_64) { + t_val = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t_val, in_64); + } else { + t_val = tcg_const_i32(in_c); + } + gen_helper_memset(t_ptr, t_ptr, t_val, t_size); + + if (!in_32) { + tcg_temp_free_i32(t_val); + } + tcg_temp_free_ptr(t_size); + tcg_temp_free_ptr(t_ptr); + return; + } + t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0)); if (vece == MO_64) { From d2f3066eb2af5d6867974493833834e2aaa427f7 Mon Sep 17 00:00:00 2001 From: Zihao Yu Date: Wed, 16 Dec 2020 16:12:06 +0800 Subject: [PATCH 2/3] tcg/riscv: Fix illegal shift instructions Out-of-range shifts have undefined results, but must not trap. Mask off immediate shift counts to solve this problem. This bug can be reproduced by running the following guest instructions: xor %ecx,%ecx sar %cl,%eax cmovne %edi,%eax After optimization, the tcg opcodes of the sar are movi_i32 tmp3,$0xffffffffffffffff pref=all sar_i32 tmp3,eax,tmp3 dead: 2 pref=all mov_i32 cc_dst,eax sync: 0 dead: 1 pref=0xffc0300 mov_i32 cc_src,tmp3 sync: 0 dead: 0 1 pref=all movi_i32 cc_op,$0x31 sync: 0 dead: 0 pref=all The sar_i32 opcode is a shift by -1, which unmasked generates 0x200808d618: fffa5b9b illegal Signed-off-by: Zihao Yu Message-Id: <20201216081206.9628-1-yuzihao@ict.ac.cn> [rth: Reworded the patch description.] Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index d536f3ccc1..4089e29cd9 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1462,14 +1462,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_shl_i32: if (c2) { - tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f); } else { tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2); } break; case INDEX_op_shl_i64: if (c2) { - tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f); } else { tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2); } @@ -1477,14 +1477,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_shr_i32: if (c2) { - tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f); } else { tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2); } break; case INDEX_op_shr_i64: if (c2) { - tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f); } else { tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2); } @@ -1492,14 +1492,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sar_i32: if (c2) { - tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f); } else { tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2); } break; case INDEX_op_sar_i64: if (c2) { - tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f); } else { tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2); } From a66424ba17d661007dc13d78c9e3014ccbaf0efb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 16 Dec 2020 11:59:06 -0600 Subject: [PATCH 3/3] tcg: Add tcg_gen_bswap_tl alias The alias is intended to indicate that the bswap is for the entire target_long. This should avoid ifdefs on some targets. Reviewed-by: Frank Chang Signed-off-by: Richard Henderson --- include/tcg/tcg-op.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index 5abf17fecc..5b3bdacc39 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -1085,6 +1085,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_bswap16_tl tcg_gen_bswap16_i64 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 +#define tcg_gen_bswap_tl tcg_gen_bswap64_i64 #define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64 #define tcg_gen_extr_i64_tl tcg_gen_extr32_i64 #define tcg_gen_andc_tl tcg_gen_andc_i64 @@ -1197,6 +1198,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_ext32s_tl tcg_gen_mov_i32 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 +#define tcg_gen_bswap_tl tcg_gen_bswap32_i32 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 #define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32 #define tcg_gen_andc_tl tcg_gen_andc_i32