From e6e903db6a5e960e595f9f1fd034adb942dd9508 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Tue, 23 Apr 2024 17:24:39 +0200 Subject: [PATCH 01/38] linux-user: Add ioctl for BLKBSZSET Tiny patch to add the ioctl wrapper definition for BLKBSZSET. Signed-off-by: Michael Vogt Message-Id: <20240423152438.19841-2-mvogt@redhat.com> --- linux-user/ioctls.h | 1 + 1 file changed, 1 insertion(+) diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h index d508d0c04a..3b41128fd7 100644 --- a/linux-user/ioctls.h +++ b/linux-user/ioctls.h @@ -102,6 +102,7 @@ IOCTL(BLKRAGET, IOC_R, MK_PTR(TYPE_LONG)) IOCTL(BLKSSZGET, IOC_R, MK_PTR(TYPE_INT)) IOCTL(BLKBSZGET, IOC_R, MK_PTR(TYPE_INT)) + IOCTL(BLKBSZSET, IOC_W, MK_PTR(TYPE_INT)) IOCTL_SPECIAL(BLKPG, IOC_W, do_ioctl_blkpg, MK_PTR(MK_STRUCT(STRUCT_blkpg_ioctl_arg))) From fa9079a86d94c202c316c97ca2eb61ca3e763907 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 24 May 2024 19:23:11 -0700 Subject: [PATCH 02/38] target/sparc: Fix ARRAY8 Follow the Oracle Sparc 2015 implementation note and bound the input value of N to 5 from the lower 3 bits of rs2. Spell out all of the intermediate values, matching the diagram in the manual. Fix extraction of upper_x and upper_y for N=0. Signed-off-by: Richard Henderson --- target/sparc/vis_helper.c | 53 ++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index e15c6bb34e..f46fcf1f6a 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -21,25 +21,42 @@ #include "cpu.h" #include "exec/helper-proto.h" -/* This function uses non-native bit order */ -#define GET_FIELD(X, FROM, TO) \ - ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) - -/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ -#define GET_FIELD_SP(X, FROM, TO) \ - GET_FIELD(X, 63 - (TO), 63 - (FROM)) - -target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) +target_ulong helper_array8(target_ulong rs1, target_ulong rs2) { - return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | - (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | - (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | - (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | - (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | - (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | - (((pixel_addr >> 55) & 1) << 4) | - (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | - GET_FIELD_SP(pixel_addr, 11, 12); + /* + * From Oracle SPARC Architecture 2015: + * Architecturally, an illegal R[rs2] value (>5) causes the array + * instructions to produce undefined results. For historic reference, + * past implementations of these instructions have ignored R[rs2]{63:3} + * and have treated R[rs2] values of 6 and 7 as if they were 5. + */ + target_ulong n = MIN(rs2 & 7, 5); + + target_ulong x_int = (rs1 >> 11) & 0x7ff; + target_ulong y_int = (rs1 >> 33) & 0x7ff; + target_ulong z_int = rs1 >> 55; + + target_ulong lower_x = x_int & 3; + target_ulong lower_y = y_int & 3; + target_ulong lower_z = z_int & 1; + + target_ulong middle_x = (x_int >> 2) & 15; + target_ulong middle_y = (y_int >> 2) & 15; + target_ulong middle_z = (z_int >> 1) & 15; + + target_ulong upper_x = (x_int >> 6) & ((1 << n) - 1); + target_ulong upper_y = (y_int >> 6) & ((1 << n) - 1); + target_ulong upper_z = z_int >> 5; + + return (upper_z << (17 + 2 * n)) + | (upper_y << (17 + n)) + | (upper_x << 17) + | (middle_z << 13) + | (middle_y << 9) + | (middle_x << 5) + | (lower_z << 4) + | (lower_y << 2) + | lower_x; } #if HOST_BIG_ENDIAN From 43db5838022a32752a27e64de6599b8dc427ba9f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 24 May 2024 21:26:24 -0700 Subject: [PATCH 03/38] target/sparc: Rewrite gen_edge Drop the tables and compute the left and right edges directly. Signed-off-by: Richard Henderson --- target/sparc/translate.c | 98 +++++++++++++++------------------------- 1 file changed, 37 insertions(+), 61 deletions(-) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index dca072888a..00c2a11353 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -3519,11 +3519,10 @@ static bool trans_SDIVX(DisasContext *dc, arg_r_r_ri *a) } static bool gen_edge(DisasContext *dc, arg_r_r_r *a, - int width, bool cc, bool left) + int width, bool cc, bool little_endian) { - TCGv dst, s1, s2, lo1, lo2; - uint64_t amask, tabl, tabr; - int shift, imask, omask; + TCGv dst, s1, s2, l, r, t, m; + uint64_t amask = address_mask_i(dc, -8); dst = gen_dest_gpr(dc, a->rd); s1 = gen_load_gpr(dc, a->rs1); @@ -3533,75 +3532,52 @@ static bool gen_edge(DisasContext *dc, arg_r_r_r *a, gen_op_subcc(cpu_cc_N, s1, s2); } - /* - * Theory of operation: there are two tables, left and right (not to - * be confused with the left and right versions of the opcode). These - * are indexed by the low 3 bits of the inputs. To make things "easy", - * these tables are loaded into two constants, TABL and TABR below. - * The operation index = (input & imask) << shift calculates the index - * into the constant, while val = (table >> index) & omask calculates - * the value we're looking for. - */ + l = tcg_temp_new(); + r = tcg_temp_new(); + t = tcg_temp_new(); + switch (width) { case 8: - imask = 0x7; - shift = 3; - omask = 0xff; - if (left) { - tabl = 0x80c0e0f0f8fcfeffULL; - tabr = 0xff7f3f1f0f070301ULL; - } else { - tabl = 0x0103070f1f3f7fffULL; - tabr = 0xfffefcf8f0e0c080ULL; - } + tcg_gen_andi_tl(l, s1, 7); + tcg_gen_andi_tl(r, s2, 7); + tcg_gen_xori_tl(r, r, 7); + m = tcg_constant_tl(0xff); break; case 16: - imask = 0x6; - shift = 1; - omask = 0xf; - if (left) { - tabl = 0x8cef; - tabr = 0xf731; - } else { - tabl = 0x137f; - tabr = 0xfec8; - } + tcg_gen_extract_tl(l, s1, 1, 2); + tcg_gen_extract_tl(r, s2, 1, 2); + tcg_gen_xori_tl(r, r, 3); + m = tcg_constant_tl(0xf); break; case 32: - imask = 0x4; - shift = 0; - omask = 0x3; - if (left) { - tabl = (2 << 2) | 3; - tabr = (3 << 2) | 1; - } else { - tabl = (1 << 2) | 3; - tabr = (3 << 2) | 2; - } + tcg_gen_extract_tl(l, s1, 2, 1); + tcg_gen_extract_tl(r, s2, 2, 1); + tcg_gen_xori_tl(r, r, 1); + m = tcg_constant_tl(0x3); break; default: abort(); } - lo1 = tcg_temp_new(); - lo2 = tcg_temp_new(); - tcg_gen_andi_tl(lo1, s1, imask); - tcg_gen_andi_tl(lo2, s2, imask); - tcg_gen_shli_tl(lo1, lo1, shift); - tcg_gen_shli_tl(lo2, lo2, shift); + /* Compute Left Edge */ + if (little_endian) { + tcg_gen_shl_tl(l, m, l); + tcg_gen_and_tl(l, l, m); + } else { + tcg_gen_shr_tl(l, m, l); + } + /* Compute Right Edge */ + if (little_endian) { + tcg_gen_shr_tl(r, m, r); + } else { + tcg_gen_shl_tl(r, m, r); + tcg_gen_and_tl(r, r, m); + } - tcg_gen_shr_tl(lo1, tcg_constant_tl(tabl), lo1); - tcg_gen_shr_tl(lo2, tcg_constant_tl(tabr), lo2); - tcg_gen_andi_tl(lo1, lo1, omask); - tcg_gen_andi_tl(lo2, lo2, omask); - - amask = address_mask_i(dc, -8); - tcg_gen_andi_tl(s1, s1, amask); - tcg_gen_andi_tl(s2, s2, amask); - - /* Compute dst = (s1 == s2 ? lo1 : lo1 & lo2). */ - tcg_gen_and_tl(lo2, lo2, lo1); - tcg_gen_movcond_tl(TCG_COND_EQ, dst, s1, s2, lo1, lo2); + /* Compute dst = (s1 == s2 under amask ? l : l & r) */ + tcg_gen_xor_tl(t, s1, s2); + tcg_gen_and_tl(r, r, l); + tcg_gen_movcond_tl(TCG_COND_TSTEQ, dst, t, tcg_constant_tl(amask), r, l); gen_store_gpr(dc, a->rd, dst); return advance_pc(dc); From 04d5bf30419412cfbf984e90938b411e22fd8916 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 24 May 2024 21:33:01 -0700 Subject: [PATCH 04/38] target/sparc: Fix do_dc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply DFPREG to compute the register number. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/translate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 00c2a11353..1eb1a6decf 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -4253,6 +4253,7 @@ static bool do_dc(DisasContext *dc, int rd, int64_t c) return true; } + rd = DFPREG(rd); tcg_gen_movi_i64(cpu_fpr[rd / 2], c); gen_update_fprs_dirty(dc, rd); return advance_pc(dc); From b5c960470d0d7a976aa83e6e3a9b0fdc1d83c7cd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 24 May 2024 22:04:03 -0700 Subject: [PATCH 05/38] target/sparc: Fix helper_fmul8ulx16 This operation returns the high 16 bits of a 24-bit multiply that has been sign-extended to 32 bits. Signed-off-by: Richard Henderson --- target/sparc/vis_helper.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index f46fcf1f6a..41312deda4 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -174,10 +174,10 @@ uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) s.ll = src1; d.ll = src2; - d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); - d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); - d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); - d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); + d.VIS_W64(0) = (s.VIS_B64(0) * d.VIS_SW64(0) + 0x8000) >> 16; + d.VIS_W64(1) = (s.VIS_B64(2) * d.VIS_SW64(1) + 0x8000) >> 16; + d.VIS_W64(2) = (s.VIS_B64(4) * d.VIS_SW64(2) + 0x8000) >> 16; + d.VIS_W64(3) = (s.VIS_B64(6) * d.VIS_SW64(3) + 0x8000) >> 16; return d.ll; } From 0bba7572d40d5d3f79dd2392051fe3970a41e9db Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 3 Nov 2023 13:13:05 -0700 Subject: [PATCH 06/38] target/sparc: Perform DFPREG/QFPREG in decodetree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Form the proper register decoding from the start. Because we're removing the translation from the inner-most gen_load_fpr_* and gen_store_fpr_* routines, this must be done for all insns at once. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 220 +++++++++++++++++++++++--------------- target/sparc/translate.c | 39 +++---- 2 files changed, 148 insertions(+), 111 deletions(-) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index e2d8a07dc4..02fa505b49 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -26,6 +26,14 @@ CALL 01 i:s30 ## Major Opcode 10 -- integer, floating-point, vis, and system insns. ## +%dfp_rd 25:5 !function=extract_dfpreg +%dfp_rs1 14:5 !function=extract_dfpreg +%dfp_rs2 0:5 !function=extract_dfpreg + +%qfp_rd 25:5 !function=extract_qfpreg +%qfp_rs1 14:5 !function=extract_qfpreg +%qfp_rs2 0:5 !function=extract_qfpreg + &r_r_ri rd rs1 rs2_or_imm imm:bool @n_r_ri .. ..... ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri rd=0 @r_r_ri .. rd:5 ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri @@ -37,11 +45,40 @@ CALL 01 i:s30 &r_r_r rd rs1 rs2 @r_r_r .. rd:5 ...... rs1:5 . ........ rs2:5 &r_r_r +@d_r_r .. ..... ...... rs1:5 . ........ rs2:5 \ + &r_r_r rd=%dfp_rd +@r_d_d .. rd:5 ...... ..... . ........ ..... \ + &r_r_r rs1=%dfp_rs1 rs2=%dfp_rs2 +@d_r_d .. ..... ...... rs1:5 . ........ ..... \ + &r_r_r rd=%dfp_rd rs2=%dfp_rs2 +@d_d_d .. ..... ...... ..... . ........ ..... \ + &r_r_r rd=%dfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2 +@q_q_q .. ..... ...... ..... . ........ ..... \ + &r_r_r rd=%qfp_rd rs1=%qfp_rs1 rs2=%qfp_rs2 +@q_d_d .. ..... ...... ..... . ........ ..... \ + &r_r_r rd=%qfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2 + @r_r_r_swap .. rd:5 ...... rs2:5 . ........ rs1:5 &r_r_r +@d_d_d_swap .. ..... ...... ..... . ........ ..... \ + &r_r_r rd=%dfp_rd rs1=%dfp_rs2 rs2=%dfp_rs1 &r_r rd rs @r_r1 .. rd:5 ...... rs:5 . ........ ..... &r_r @r_r2 .. rd:5 ...... ..... . ........ rs:5 &r_r +@r_d2 .. rd:5 ...... ..... . ........ ..... &r_r rs=%dfp_rs2 +@r_q2 .. rd:5 ...... ..... . ........ ..... &r_r rs=%qfp_rs2 +@d_r2 .. ..... ...... ..... . ........ rs:5 &r_r rd=%dfp_rd +@q_r2 .. ..... ...... ..... . ........ rs:5 &r_r rd=%qfp_rd +@d_d1 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%dfp_rd rs=%dfp_rs1 +@d_d2 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%dfp_rd rs=%dfp_rs2 +@d_q2 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%dfp_rd rs=%qfp_rs2 +@q_q2 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%qfp_rd rs=%qfp_rs2 +@q_d2 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%qfp_rd rs=%dfp_rs2 { [ @@ -241,68 +278,78 @@ DONE 10 00000 111110 00000 0 0000000000000 RETRY 10 00001 111110 00000 0 0000000000000 FMOVs 10 ..... 110100 00000 0 0000 0001 ..... @r_r2 -FMOVd 10 ..... 110100 00000 0 0000 0010 ..... @r_r2 -FMOVq 10 ..... 110100 00000 0 0000 0011 ..... @r_r2 +FMOVd 10 ..... 110100 00000 0 0000 0010 ..... @d_d2 +FMOVq 10 ..... 110100 00000 0 0000 0011 ..... @q_q2 FNEGs 10 ..... 110100 00000 0 0000 0101 ..... @r_r2 -FNEGd 10 ..... 110100 00000 0 0000 0110 ..... @r_r2 -FNEGq 10 ..... 110100 00000 0 0000 0111 ..... @r_r2 +FNEGd 10 ..... 110100 00000 0 0000 0110 ..... @d_d2 +FNEGq 10 ..... 110100 00000 0 0000 0111 ..... @q_q2 FABSs 10 ..... 110100 00000 0 0000 1001 ..... @r_r2 -FABSd 10 ..... 110100 00000 0 0000 1010 ..... @r_r2 -FABSq 10 ..... 110100 00000 0 0000 1011 ..... @r_r2 +FABSd 10 ..... 110100 00000 0 0000 1010 ..... @d_d2 +FABSq 10 ..... 110100 00000 0 0000 1011 ..... @q_q2 FSQRTs 10 ..... 110100 00000 0 0010 1001 ..... @r_r2 -FSQRTd 10 ..... 110100 00000 0 0010 1010 ..... @r_r2 -FSQRTq 10 ..... 110100 00000 0 0010 1011 ..... @r_r2 +FSQRTd 10 ..... 110100 00000 0 0010 1010 ..... @d_d2 +FSQRTq 10 ..... 110100 00000 0 0010 1011 ..... @q_q2 FADDs 10 ..... 110100 ..... 0 0100 0001 ..... @r_r_r -FADDd 10 ..... 110100 ..... 0 0100 0010 ..... @r_r_r -FADDq 10 ..... 110100 ..... 0 0100 0011 ..... @r_r_r +FADDd 10 ..... 110100 ..... 0 0100 0010 ..... @d_d_d +FADDq 10 ..... 110100 ..... 0 0100 0011 ..... @q_q_q FSUBs 10 ..... 110100 ..... 0 0100 0101 ..... @r_r_r -FSUBd 10 ..... 110100 ..... 0 0100 0110 ..... @r_r_r -FSUBq 10 ..... 110100 ..... 0 0100 0111 ..... @r_r_r +FSUBd 10 ..... 110100 ..... 0 0100 0110 ..... @d_d_d +FSUBq 10 ..... 110100 ..... 0 0100 0111 ..... @q_q_q FMULs 10 ..... 110100 ..... 0 0100 1001 ..... @r_r_r -FMULd 10 ..... 110100 ..... 0 0100 1010 ..... @r_r_r -FMULq 10 ..... 110100 ..... 0 0100 1011 ..... @r_r_r +FMULd 10 ..... 110100 ..... 0 0100 1010 ..... @d_d_d +FMULq 10 ..... 110100 ..... 0 0100 1011 ..... @q_q_q FDIVs 10 ..... 110100 ..... 0 0100 1101 ..... @r_r_r -FDIVd 10 ..... 110100 ..... 0 0100 1110 ..... @r_r_r -FDIVq 10 ..... 110100 ..... 0 0100 1111 ..... @r_r_r -FsMULd 10 ..... 110100 ..... 0 0110 1001 ..... @r_r_r -FdMULq 10 ..... 110100 ..... 0 0110 1110 ..... @r_r_r +FDIVd 10 ..... 110100 ..... 0 0100 1110 ..... @d_d_d +FDIVq 10 ..... 110100 ..... 0 0100 1111 ..... @q_q_q +FsMULd 10 ..... 110100 ..... 0 0110 1001 ..... @d_r_r +FdMULq 10 ..... 110100 ..... 0 0110 1110 ..... @q_d_d FsTOx 10 ..... 110100 00000 0 1000 0001 ..... @r_r2 -FdTOx 10 ..... 110100 00000 0 1000 0010 ..... @r_r2 -FqTOx 10 ..... 110100 00000 0 1000 0011 ..... @r_r2 +FdTOx 10 ..... 110100 00000 0 1000 0010 ..... @r_d2 +FqTOx 10 ..... 110100 00000 0 1000 0011 ..... @r_q2 FxTOs 10 ..... 110100 00000 0 1000 0100 ..... @r_r2 -FxTOd 10 ..... 110100 00000 0 1000 1000 ..... @r_r2 -FxTOq 10 ..... 110100 00000 0 1000 1100 ..... @r_r2 +FxTOd 10 ..... 110100 00000 0 1000 1000 ..... @d_r2 +FxTOq 10 ..... 110100 00000 0 1000 1100 ..... @q_r2 FiTOs 10 ..... 110100 00000 0 1100 0100 ..... @r_r2 -FdTOs 10 ..... 110100 00000 0 1100 0110 ..... @r_r2 -FqTOs 10 ..... 110100 00000 0 1100 0111 ..... @r_r2 -FiTOd 10 ..... 110100 00000 0 1100 1000 ..... @r_r2 -FsTOd 10 ..... 110100 00000 0 1100 1001 ..... @r_r2 -FqTOd 10 ..... 110100 00000 0 1100 1011 ..... @r_r2 -FiTOq 10 ..... 110100 00000 0 1100 1100 ..... @r_r2 -FsTOq 10 ..... 110100 00000 0 1100 1101 ..... @r_r2 -FdTOq 10 ..... 110100 00000 0 1100 1110 ..... @r_r2 +FdTOs 10 ..... 110100 00000 0 1100 0110 ..... @r_d2 +FqTOs 10 ..... 110100 00000 0 1100 0111 ..... @r_q2 +FiTOd 10 ..... 110100 00000 0 1100 1000 ..... @d_r2 +FsTOd 10 ..... 110100 00000 0 1100 1001 ..... @d_r2 +FqTOd 10 ..... 110100 00000 0 1100 1011 ..... @d_q2 +FiTOq 10 ..... 110100 00000 0 1100 1100 ..... @q_r2 +FsTOq 10 ..... 110100 00000 0 1100 1101 ..... @q_r2 +FdTOq 10 ..... 110100 00000 0 1100 1110 ..... @q_d2 FsTOi 10 ..... 110100 00000 0 1101 0001 ..... @r_r2 -FdTOi 10 ..... 110100 00000 0 1101 0010 ..... @r_r2 -FqTOi 10 ..... 110100 00000 0 1101 0011 ..... @r_r2 +FdTOi 10 ..... 110100 00000 0 1101 0010 ..... @r_d2 +FqTOi 10 ..... 110100 00000 0 1101 0011 ..... @r_q2 FMOVscc 10 rd:5 110101 0 cond:4 1 cc:1 0 000001 rs2:5 -FMOVdcc 10 rd:5 110101 0 cond:4 1 cc:1 0 000010 rs2:5 -FMOVqcc 10 rd:5 110101 0 cond:4 1 cc:1 0 000011 rs2:5 +FMOVdcc 10 ..... 110101 0 cond:4 1 cc:1 0 000010 ..... \ + rd=%dfp_rd rs2=%dfp_rs2 +FMOVqcc 10 ..... 110101 0 cond:4 1 cc:1 0 000011 ..... \ + rd=%qfp_rd rs2=%qfp_rs2 FMOVsfcc 10 rd:5 110101 0 cond:4 0 cc:2 000001 rs2:5 -FMOVdfcc 10 rd:5 110101 0 cond:4 0 cc:2 000010 rs2:5 -FMOVqfcc 10 rd:5 110101 0 cond:4 0 cc:2 000011 rs2:5 +FMOVdfcc 10 ..... 110101 0 cond:4 0 cc:2 000010 ..... \ + rd=%dfp_rd rs2=%dfp_rs2 +FMOVqfcc 10 ..... 110101 0 cond:4 0 cc:2 000011 ..... \ + rd=%qfp_rd rs2=%qfp_rs2 FMOVRs 10 rd:5 110101 rs1:5 0 cond:3 00101 rs2:5 -FMOVRd 10 rd:5 110101 rs1:5 0 cond:3 00110 rs2:5 -FMOVRq 10 rd:5 110101 rs1:5 0 cond:3 00111 rs2:5 +FMOVRd 10 ..... 110101 rs1:5 0 cond:3 00110 ..... \ + rd=%dfp_rd rs2=%dfp_rs2 +FMOVRq 10 ..... 110101 rs1:5 0 cond:3 00111 ..... \ + rd=%qfp_rd rs2=%qfp_rs2 FCMPs 10 000 cc:2 110101 rs1:5 0 0101 0001 rs2:5 -FCMPd 10 000 cc:2 110101 rs1:5 0 0101 0010 rs2:5 -FCMPq 10 000 cc:2 110101 rs1:5 0 0101 0011 rs2:5 +FCMPd 10 000 cc:2 110101 ..... 0 0101 0010 ..... \ + rs1=%dfp_rs1 rs2=%dfp_rs2 +FCMPq 10 000 cc:2 110101 ..... 0 0101 0011 ..... \ + rs1=%qfp_rs1 rs2=%qfp_rs2 FCMPEs 10 000 cc:2 110101 rs1:5 0 0101 0101 rs2:5 -FCMPEd 10 000 cc:2 110101 rs1:5 0 0101 0110 rs2:5 -FCMPEq 10 000 cc:2 110101 rs1:5 0 0101 0111 rs2:5 +FCMPEd 10 000 cc:2 110101 ..... 0 0101 0110 ..... \ + rs1=%dfp_rs1 rs2=%dfp_rs2 +FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ + rs1=%qfp_rs1 rs2=%qfp_rs2 { [ @@ -328,74 +375,74 @@ FCMPEq 10 000 cc:2 110101 rs1:5 0 0101 0111 rs2:5 BMASK 10 ..... 110110 ..... 0 0001 1001 ..... @r_r_r - FPCMPLE16 10 ..... 110110 ..... 0 0010 0000 ..... @r_r_r - FPCMPNE16 10 ..... 110110 ..... 0 0010 0010 ..... @r_r_r - FPCMPGT16 10 ..... 110110 ..... 0 0010 1000 ..... @r_r_r - FPCMPEQ16 10 ..... 110110 ..... 0 0010 1010 ..... @r_r_r - FPCMPLE32 10 ..... 110110 ..... 0 0010 0100 ..... @r_r_r - FPCMPNE32 10 ..... 110110 ..... 0 0010 0110 ..... @r_r_r - FPCMPGT32 10 ..... 110110 ..... 0 0010 1100 ..... @r_r_r - FPCMPEQ32 10 ..... 110110 ..... 0 0010 1110 ..... @r_r_r + FPCMPLE16 10 ..... 110110 ..... 0 0010 0000 ..... @r_d_d + FPCMPNE16 10 ..... 110110 ..... 0 0010 0010 ..... @r_d_d + FPCMPGT16 10 ..... 110110 ..... 0 0010 1000 ..... @r_d_d + FPCMPEQ16 10 ..... 110110 ..... 0 0010 1010 ..... @r_d_d + FPCMPLE32 10 ..... 110110 ..... 0 0010 0100 ..... @r_d_d + FPCMPNE32 10 ..... 110110 ..... 0 0010 0110 ..... @r_d_d + FPCMPGT32 10 ..... 110110 ..... 0 0010 1100 ..... @r_d_d + FPCMPEQ32 10 ..... 110110 ..... 0 0010 1110 ..... @r_d_d - FMUL8x16 10 ..... 110110 ..... 0 0011 0001 ..... @r_r_r - FMUL8x16AU 10 ..... 110110 ..... 0 0011 0011 ..... @r_r_r - FMUL8x16AL 10 ..... 110110 ..... 0 0011 0101 ..... @r_r_r - FMUL8SUx16 10 ..... 110110 ..... 0 0011 0110 ..... @r_r_r - FMUL8ULx16 10 ..... 110110 ..... 0 0011 0111 ..... @r_r_r - FMULD8SUx16 10 ..... 110110 ..... 0 0011 1000 ..... @r_r_r - FMULD8ULx16 10 ..... 110110 ..... 0 0011 1001 ..... @r_r_r - FPACK32 10 ..... 110110 ..... 0 0011 1010 ..... @r_r_r - FPACK16 10 ..... 110110 00000 0 0011 1011 ..... @r_r2 - FPACKFIX 10 ..... 110110 00000 0 0011 1101 ..... @r_r2 - PDIST 10 ..... 110110 ..... 0 0011 1110 ..... @r_r_r + FMUL8x16 10 ..... 110110 ..... 0 0011 0001 ..... @d_r_d + FMUL8x16AU 10 ..... 110110 ..... 0 0011 0011 ..... @d_r_r + FMUL8x16AL 10 ..... 110110 ..... 0 0011 0101 ..... @d_r_r + FMUL8SUx16 10 ..... 110110 ..... 0 0011 0110 ..... @d_d_d + FMUL8ULx16 10 ..... 110110 ..... 0 0011 0111 ..... @d_d_d + FMULD8SUx16 10 ..... 110110 ..... 0 0011 1000 ..... @d_r_r + FMULD8ULx16 10 ..... 110110 ..... 0 0011 1001 ..... @d_r_r + FPACK32 10 ..... 110110 ..... 0 0011 1010 ..... @d_d_d + FPACK16 10 ..... 110110 00000 0 0011 1011 ..... @r_d2 + FPACKFIX 10 ..... 110110 00000 0 0011 1101 ..... @r_d2 + PDIST 10 ..... 110110 ..... 0 0011 1110 ..... @d_d_d - FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @r_r_r - FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @r_r_r - BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @r_r_r - FEXPAND 10 ..... 110110 00000 0 0100 1101 ..... @r_r2 + FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @d_d_d + FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @d_r_r + BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @d_d_d + FEXPAND 10 ..... 110110 00000 0 0100 1101 ..... @d_r2 - FSRCd 10 ..... 110110 ..... 0 0111 0100 00000 @r_r1 # FSRC1d + FSRCd 10 ..... 110110 ..... 0 0111 0100 00000 @d_d1 # FSRC1d FSRCs 10 ..... 110110 ..... 0 0111 0101 00000 @r_r1 # FSRC1s - FSRCd 10 ..... 110110 00000 0 0111 1000 ..... @r_r2 # FSRC2d + FSRCd 10 ..... 110110 00000 0 0111 1000 ..... @d_d2 # FSRC2d FSRCs 10 ..... 110110 00000 0 0111 1001 ..... @r_r2 # FSRC2s - FNOTd 10 ..... 110110 ..... 0 0110 1010 00000 @r_r1 # FNOT1d + FNOTd 10 ..... 110110 ..... 0 0110 1010 00000 @d_d1 # FNOT1d FNOTs 10 ..... 110110 ..... 0 0110 1011 00000 @r_r1 # FNOT1s - FNOTd 10 ..... 110110 00000 0 0110 0110 ..... @r_r2 # FNOT2d + FNOTd 10 ..... 110110 00000 0 0110 0110 ..... @d_d2 # FNOT2d FNOTs 10 ..... 110110 00000 0 0110 0111 ..... @r_r2 # FNOT2s - FPADD16 10 ..... 110110 ..... 0 0101 0000 ..... @r_r_r + FPADD16 10 ..... 110110 ..... 0 0101 0000 ..... @d_d_d FPADD16s 10 ..... 110110 ..... 0 0101 0001 ..... @r_r_r - FPADD32 10 ..... 110110 ..... 0 0101 0010 ..... @r_r_r + FPADD32 10 ..... 110110 ..... 0 0101 0010 ..... @d_d_d FPADD32s 10 ..... 110110 ..... 0 0101 0011 ..... @r_r_r - FPSUB16 10 ..... 110110 ..... 0 0101 0100 ..... @r_r_r + FPSUB16 10 ..... 110110 ..... 0 0101 0100 ..... @d_d_d FPSUB16s 10 ..... 110110 ..... 0 0101 0101 ..... @r_r_r - FPSUB32 10 ..... 110110 ..... 0 0101 0110 ..... @r_r_r + FPSUB32 10 ..... 110110 ..... 0 0101 0110 ..... @d_d_d FPSUB32s 10 ..... 110110 ..... 0 0101 0111 ..... @r_r_r - FNORd 10 ..... 110110 ..... 0 0110 0010 ..... @r_r_r + FNORd 10 ..... 110110 ..... 0 0110 0010 ..... @d_d_d FNORs 10 ..... 110110 ..... 0 0110 0011 ..... @r_r_r - FANDNOTd 10 ..... 110110 ..... 0 0110 0100 ..... @r_r_r # FANDNOT2d + FANDNOTd 10 ..... 110110 ..... 0 0110 0100 ..... @d_d_d # FANDNOT2d FANDNOTs 10 ..... 110110 ..... 0 0110 0101 ..... @r_r_r # FANDNOT2s - FANDNOTd 10 ..... 110110 ..... 0 0110 1000 ..... @r_r_r_swap # ... 1d + FANDNOTd 10 ..... 110110 ..... 0 0110 1000 ..... @d_d_d_swap # ... 1d FANDNOTs 10 ..... 110110 ..... 0 0110 1001 ..... @r_r_r_swap # ... 1s - FXORd 10 ..... 110110 ..... 0 0110 1100 ..... @r_r_r + FXORd 10 ..... 110110 ..... 0 0110 1100 ..... @d_d_d FXORs 10 ..... 110110 ..... 0 0110 1101 ..... @r_r_r - FNANDd 10 ..... 110110 ..... 0 0110 1110 ..... @r_r_r + FNANDd 10 ..... 110110 ..... 0 0110 1110 ..... @d_d_d FNANDs 10 ..... 110110 ..... 0 0110 1111 ..... @r_r_r - FANDd 10 ..... 110110 ..... 0 0111 0000 ..... @r_r_r + FANDd 10 ..... 110110 ..... 0 0111 0000 ..... @d_d_d FANDs 10 ..... 110110 ..... 0 0111 0001 ..... @r_r_r - FXNORd 10 ..... 110110 ..... 0 0111 0010 ..... @r_r_r + FXNORd 10 ..... 110110 ..... 0 0111 0010 ..... @d_d_d FXNORs 10 ..... 110110 ..... 0 0111 0011 ..... @r_r_r - FORNOTd 10 ..... 110110 ..... 0 0111 0110 ..... @r_r_r # FORNOT2d + FORNOTd 10 ..... 110110 ..... 0 0111 0110 ..... @d_d_d # FORNOT2d FORNOTs 10 ..... 110110 ..... 0 0111 0111 ..... @r_r_r # FORNOT2s - FORNOTd 10 ..... 110110 ..... 0 0111 1010 ..... @r_r_r_swap # ... 1d + FORNOTd 10 ..... 110110 ..... 0 0111 1010 ..... @d_d_d_swap # ... 1d FORNOTs 10 ..... 110110 ..... 0 0111 1011 ..... @r_r_r_swap # ... 1s - FORd 10 ..... 110110 ..... 0 0111 1100 ..... @r_r_r + FORd 10 ..... 110110 ..... 0 0111 1100 ..... @d_d_d FORs 10 ..... 110110 ..... 0 0111 1101 ..... @r_r_r - FZEROd 10 rd:5 110110 00000 0 0110 0000 00000 + FZEROd 10 ..... 110110 00000 0 0110 0000 00000 rd=%dfp_rd FZEROs 10 rd:5 110110 00000 0 0110 0001 00000 - FONEd 10 rd:5 110110 00000 0 0111 1110 00000 + FONEd 10 ..... 110110 00000 0 0111 1110 00000 rd=%dfp_rd FONEs 10 rd:5 110110 00000 0 0111 1111 00000 ] NCP 10 ----- 110110 ----- --------- ----- # v8 CPop1 @@ -407,9 +454,6 @@ NCP 10 ----- 110111 ----- --------- ----- # v8 CPop2 ## Major Opcode 11 -- load and store instructions ## -%dfp_rd 25:5 !function=extract_dfpreg -%qfp_rd 25:5 !function=extract_qfpreg - &r_r_ri_asi rd rs1 rs2_or_imm asi imm:bool @r_r_ri_na .. rd:5 ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri_asi asi=-1 @d_r_ri_na .. ..... ...... rs1:5 imm:1 rs2_or_imm:s13 \ diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 1eb1a6decf..f3c52c7c48 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -190,14 +190,6 @@ typedef struct DisasContext { #define GET_FIELDs(x,a,b) sign_extend (GET_FIELD(x,a,b), (b) - (a) + 1) #define GET_FIELD_SPs(x,a,b) sign_extend (GET_FIELD_SP(x,a,b), ((b) - (a) + 1)) -#ifdef TARGET_SPARC64 -#define DFPREG(r) (((r & 1) << 5) | (r & 0x1e)) -#define QFPREG(r) (((r & 1) << 5) | (r & 0x1c)) -#else -#define DFPREG(r) (r & 0x1e) -#define QFPREG(r) (r & 0x1c) -#endif - #define UA2005_HTRAP_MASK 0xff #define V8_TRAP_MASK 0x7f @@ -240,34 +232,30 @@ static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v) static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src) { - src = DFPREG(src); return cpu_fpr[src / 2]; } static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v) { - dst = DFPREG(dst); tcg_gen_mov_i64(cpu_fpr[dst / 2], v); gen_update_fprs_dirty(dc, dst); } static TCGv_i64 gen_dest_fpr_D(DisasContext *dc, unsigned int dst) { - return cpu_fpr[DFPREG(dst) / 2]; + return cpu_fpr[dst / 2]; } static TCGv_i128 gen_load_fpr_Q(DisasContext *dc, unsigned int src) { TCGv_i128 ret = tcg_temp_new_i128(); - src = QFPREG(src); tcg_gen_concat_i64_i128(ret, cpu_fpr[src / 2 + 1], cpu_fpr[src / 2]); return ret; } static void gen_store_fpr_Q(DisasContext *dc, unsigned int dst, TCGv_i128 v) { - dst = DFPREG(dst); tcg_gen_extr_i128_i64(cpu_fpr[dst / 2 + 1], cpu_fpr[dst / 2], v); gen_update_fprs_dirty(dc, dst); } @@ -2045,16 +2033,14 @@ static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs) static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs) { #ifdef TARGET_SPARC64 - int qd = QFPREG(rd); - int qs = QFPREG(rs); TCGv c2 = tcg_constant_tl(cmp->c2); - tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, c2, - cpu_fpr[qs / 2], cpu_fpr[qd / 2]); - tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, c2, - cpu_fpr[qs / 2 + 1], cpu_fpr[qd / 2 + 1]); + tcg_gen_movcond_i64(cmp->cond, cpu_fpr[rd / 2], cmp->c1, c2, + cpu_fpr[rs / 2], cpu_fpr[rd / 2]); + tcg_gen_movcond_i64(cmp->cond, cpu_fpr[rd / 2 + 1], cmp->c1, c2, + cpu_fpr[rs / 2 + 1], cpu_fpr[rd / 2 + 1]); - gen_update_fprs_dirty(dc, qd); + gen_update_fprs_dirty(dc, rd); #else qemu_build_not_reached(); #endif @@ -2086,12 +2072,20 @@ static void gen_load_trap_state_at_tl(TCGv_ptr r_tsptr) static int extract_dfpreg(DisasContext *dc, int x) { - return DFPREG(x); + int r = x & 0x1e; +#ifdef TARGET_SPARC64 + r |= (x & 1) << 5; +#endif + return r; } static int extract_qfpreg(DisasContext *dc, int x) { - return QFPREG(x); + int r = x & 0x1c; +#ifdef TARGET_SPARC64 + r |= (x & 1) << 5; +#endif + return r; } /* Include the auto-generated decoder. */ @@ -4253,7 +4247,6 @@ static bool do_dc(DisasContext *dc, int rd, int64_t c) return true; } - rd = DFPREG(rd); tcg_gen_movi_i64(cpu_fpr[rd / 2], c); gen_update_fprs_dirty(dc, rd); return advance_pc(dc); From 52f46d4627a3f5ce52636ff6b01895e4fcd5e924 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 3 Nov 2023 13:21:36 -0700 Subject: [PATCH 07/38] target/sparc: Remove gen_dest_fpr_D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace with tcg_temp_new_i64. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/translate.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index f3c52c7c48..750a3e6554 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -241,11 +241,6 @@ static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v) gen_update_fprs_dirty(dc, dst); } -static TCGv_i64 gen_dest_fpr_D(DisasContext *dc, unsigned int dst) -{ - return cpu_fpr[dst / 2]; -} - static TCGv_i128 gen_load_fpr_Q(DisasContext *dc, unsigned int src) { TCGv_i128 ret = tcg_temp_new_i128(); @@ -2020,7 +2015,7 @@ static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs) static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs) { #ifdef TARGET_SPARC64 - TCGv_i64 dst = gen_dest_fpr_D(dc, rd); + TCGv_i64 dst = tcg_temp_new_i64(); tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, tcg_constant_tl(cmp->c2), gen_load_fpr_D(dc, rs), gen_load_fpr_D(dc, rd)); @@ -4345,7 +4340,7 @@ static bool do_dd(DisasContext *dc, arg_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src = gen_load_fpr_D(dc, a->rs); func(dst, src); gen_store_fpr_D(dc, a->rd, dst); @@ -4367,7 +4362,7 @@ static bool do_env_dd(DisasContext *dc, arg_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src = gen_load_fpr_D(dc, a->rs); func(dst, tcg_env, src); gen_store_fpr_D(dc, a->rd, dst); @@ -4407,7 +4402,7 @@ static bool do_env_df(DisasContext *dc, arg_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src = gen_load_fpr_F(dc, a->rs); func(dst, tcg_env, src); gen_store_fpr_D(dc, a->rd, dst); @@ -4498,7 +4493,7 @@ static bool do_env_dq(DisasContext *dc, arg_r_r *a, } src = gen_load_fpr_Q(dc, a->rs); - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); func(dst, tcg_env, src); gen_store_fpr_D(dc, a->rd, dst); return advance_pc(dc); @@ -4613,7 +4608,7 @@ static bool do_dff(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_F(dc, a->rs1); src2 = gen_load_fpr_F(dc, a->rs2); func(dst, src1, src2); @@ -4637,7 +4632,7 @@ static bool do_dfd(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_F(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); func(dst, src1, src2); @@ -4656,7 +4651,7 @@ static bool do_ddd(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_D(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); func(dst, src1, src2); @@ -4721,7 +4716,7 @@ static bool do_env_ddd(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_D(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); func(dst, tcg_env, src1, src2); @@ -4746,7 +4741,7 @@ static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a) return raise_unimpfpop(dc); } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_F(dc, a->rs1); src2 = gen_load_fpr_F(dc, a->rs2); gen_helper_fsmuld(dst, tcg_env, src1, src2); @@ -4763,7 +4758,7 @@ static bool do_dddd(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src0 = gen_load_fpr_D(dc, a->rd); src1 = gen_load_fpr_D(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); From 1210a0367d5e9eca7679d401e422c48b61b421b0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 3 Nov 2023 14:38:55 -0700 Subject: [PATCH 08/38] target/sparc: Remove cpu_fpr[] Use explicit loads and stores to env instead. Signed-off-by: Richard Henderson --- target/sparc/translate.c | 158 +++++++++++++++++++++------------------ 1 file changed, 84 insertions(+), 74 deletions(-) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 750a3e6554..362e88de18 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -123,8 +123,7 @@ static TCGv cpu_gsr; #define cpu_xcc_C ({ qemu_build_not_reached(); NULL; }) #endif -/* Floating point registers */ -static TCGv_i64 cpu_fpr[TARGET_DPREGS]; +/* Floating point comparison registers */ static TCGv_i32 cpu_fcc[TARGET_FCCREGS]; #define env_field_offsetof(X) offsetof(CPUSPARCState, X) @@ -209,50 +208,72 @@ static void gen_update_fprs_dirty(DisasContext *dc, int rd) } /* floating point registers moves */ + +static int gen_offset_fpr_F(unsigned int reg) +{ + int ret; + + tcg_debug_assert(reg < 32); + ret= offsetof(CPUSPARCState, fpr[reg / 2]); + if (reg & 1) { + ret += offsetof(CPU_DoubleU, l.lower); + } else { + ret += offsetof(CPU_DoubleU, l.upper); + } + return ret; +} + static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) { TCGv_i32 ret = tcg_temp_new_i32(); - if (src & 1) { - tcg_gen_extrl_i64_i32(ret, cpu_fpr[src / 2]); - } else { - tcg_gen_extrh_i64_i32(ret, cpu_fpr[src / 2]); - } + tcg_gen_ld_i32(ret, tcg_env, gen_offset_fpr_F(src)); return ret; } static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v) { - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_extu_i32_i64(t, v); - tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t, - (dst & 1 ? 0 : 32), 32); + tcg_gen_st_i32(v, tcg_env, gen_offset_fpr_F(dst)); gen_update_fprs_dirty(dc, dst); } +static int gen_offset_fpr_D(unsigned int reg) +{ + tcg_debug_assert(reg < 64); + tcg_debug_assert(reg % 2 == 0); + return offsetof(CPUSPARCState, fpr[reg / 2]); +} + static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src) { - return cpu_fpr[src / 2]; + TCGv_i64 ret = tcg_temp_new_i64(); + tcg_gen_ld_i64(ret, tcg_env, gen_offset_fpr_D(src)); + return ret; } static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v) { - tcg_gen_mov_i64(cpu_fpr[dst / 2], v); + tcg_gen_st_i64(v, tcg_env, gen_offset_fpr_D(dst)); gen_update_fprs_dirty(dc, dst); } static TCGv_i128 gen_load_fpr_Q(DisasContext *dc, unsigned int src) { TCGv_i128 ret = tcg_temp_new_i128(); + TCGv_i64 h = gen_load_fpr_D(dc, src); + TCGv_i64 l = gen_load_fpr_D(dc, src + 2); - tcg_gen_concat_i64_i128(ret, cpu_fpr[src / 2 + 1], cpu_fpr[src / 2]); + tcg_gen_concat_i64_i128(ret, l, h); return ret; } static void gen_store_fpr_Q(DisasContext *dc, unsigned int dst, TCGv_i128 v) { - tcg_gen_extr_i128_i64(cpu_fpr[dst / 2 + 1], cpu_fpr[dst / 2], v); - gen_update_fprs_dirty(dc, dst); + TCGv_i64 h = tcg_temp_new_i64(); + TCGv_i64 l = tcg_temp_new_i64(); + + tcg_gen_extr_i128_i64(l, h, v); + gen_store_fpr_D(dc, dst, h); + gen_store_fpr_D(dc, dst + 2, l); } /* moves */ @@ -1610,7 +1631,7 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, MemOp memop = da->memop; MemOp size = memop & MO_SIZE; TCGv_i32 d32; - TCGv_i64 d64; + TCGv_i64 d64, l64; TCGv addr_tmp; /* TODO: Use 128-bit load/store below. */ @@ -1632,16 +1653,20 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, break; case MO_64: - tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da->mem_idx, memop); + d64 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop); + gen_store_fpr_D(dc, rd, d64); break; case MO_128: d64 = tcg_temp_new_i64(); + l64 = tcg_temp_new_i64(); tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop); addr_tmp = tcg_temp_new(); tcg_gen_addi_tl(addr_tmp, addr, 8); - tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + 1], addr_tmp, da->mem_idx, memop); - tcg_gen_mov_i64(cpu_fpr[rd / 2], d64); + tcg_gen_qemu_ld_i64(l64, addr_tmp, da->mem_idx, memop); + gen_store_fpr_D(dc, rd, d64); + gen_store_fpr_D(dc, rd + 2, l64); break; default: g_assert_not_reached(); @@ -1653,9 +1678,11 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, if (orig_size == MO_64 && (rd & 7) == 0) { /* The first operation checks required alignment. */ addr_tmp = tcg_temp_new(); + d64 = tcg_temp_new_i64(); for (int i = 0; ; ++i) { - tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + i], addr, da->mem_idx, + tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop | (i == 0 ? MO_ALIGN_64 : 0)); + gen_store_fpr_D(dc, rd + 2 * i, d64); if (i == 7) { break; } @@ -1670,8 +1697,9 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, case GET_ASI_SHORT: /* Valid for lddfa only. */ if (orig_size == MO_64) { - tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da->mem_idx, - memop | MO_ALIGN); + d64 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop | MO_ALIGN); + gen_store_fpr_D(dc, rd, d64); } else { gen_exception(dc, TT_ILL_INSN); } @@ -1696,17 +1724,19 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, gen_store_fpr_F(dc, rd, d32); break; case MO_64: - gen_helper_ld_asi(cpu_fpr[rd / 2], tcg_env, addr, - r_asi, r_mop); + d64 = tcg_temp_new_i64(); + gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop); + gen_store_fpr_D(dc, rd, d64); break; case MO_128: d64 = tcg_temp_new_i64(); + l64 = tcg_temp_new_i64(); gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop); addr_tmp = tcg_temp_new(); tcg_gen_addi_tl(addr_tmp, addr, 8); - gen_helper_ld_asi(cpu_fpr[rd / 2 + 1], tcg_env, addr_tmp, - r_asi, r_mop); - tcg_gen_mov_i64(cpu_fpr[rd / 2], d64); + gen_helper_ld_asi(l64, tcg_env, addr_tmp, r_asi, r_mop); + gen_store_fpr_D(dc, rd, d64); + gen_store_fpr_D(dc, rd + 2, l64); break; default: g_assert_not_reached(); @@ -1722,6 +1752,7 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, MemOp memop = da->memop; MemOp size = memop & MO_SIZE; TCGv_i32 d32; + TCGv_i64 d64; TCGv addr_tmp; /* TODO: Use 128-bit load/store below. */ @@ -1741,8 +1772,8 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, tcg_gen_qemu_st_i32(d32, addr, da->mem_idx, memop | MO_ALIGN); break; case MO_64: - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx, - memop | MO_ALIGN_4); + d64 = gen_load_fpr_D(dc, rd); + tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN_4); break; case MO_128: /* Only 4-byte alignment required. However, it is legal for the @@ -1750,11 +1781,12 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, required to fix it up. Requiring 16-byte alignment here avoids having to probe the second page before performing the first write. */ - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx, - memop | MO_ALIGN_16); + d64 = gen_load_fpr_D(dc, rd); + tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN_16); addr_tmp = tcg_temp_new(); tcg_gen_addi_tl(addr_tmp, addr, 8); - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + 1], addr_tmp, da->mem_idx, memop); + d64 = gen_load_fpr_D(dc, rd + 2); + tcg_gen_qemu_st_i64(d64, addr_tmp, da->mem_idx, memop); break; default: g_assert_not_reached(); @@ -1767,7 +1799,8 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, /* The first operation checks required alignment. */ addr_tmp = tcg_temp_new(); for (int i = 0; ; ++i) { - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + i], addr, da->mem_idx, + d64 = gen_load_fpr_D(dc, rd + 2 * i); + tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | (i == 0 ? MO_ALIGN_64 : 0)); if (i == 7) { break; @@ -1783,8 +1816,8 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, case GET_ASI_SHORT: /* Valid for stdfa only. */ if (orig_size == MO_64) { - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx, - memop | MO_ALIGN); + d64 = gen_load_fpr_D(dc, rd); + tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN); } else { gen_exception(dc, TT_ILL_INSN); } @@ -2029,13 +2062,17 @@ static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs) { #ifdef TARGET_SPARC64 TCGv c2 = tcg_constant_tl(cmp->c2); + TCGv_i64 h = tcg_temp_new_i64(); + TCGv_i64 l = tcg_temp_new_i64(); - tcg_gen_movcond_i64(cmp->cond, cpu_fpr[rd / 2], cmp->c1, c2, - cpu_fpr[rs / 2], cpu_fpr[rd / 2]); - tcg_gen_movcond_i64(cmp->cond, cpu_fpr[rd / 2 + 1], cmp->c1, c2, - cpu_fpr[rs / 2 + 1], cpu_fpr[rd / 2 + 1]); - - gen_update_fprs_dirty(dc, rd); + tcg_gen_movcond_i64(cmp->cond, h, cmp->c1, c2, + gen_load_fpr_D(dc, rs), + gen_load_fpr_D(dc, rd)); + tcg_gen_movcond_i64(cmp->cond, l, cmp->c1, c2, + gen_load_fpr_D(dc, rs + 2), + gen_load_fpr_D(dc, rd + 2)); + gen_store_fpr_D(dc, rd, h); + gen_store_fpr_D(dc, rd + 2, l); #else qemu_build_not_reached(); #endif @@ -4211,39 +4248,24 @@ static bool do_stfsr(DisasContext *dc, arg_r_r_ri *a, MemOp mop) TRANS(STFSR, ALL, do_stfsr, a, MO_TEUL) TRANS(STXFSR, 64, do_stfsr, a, MO_TEUQ) -static bool do_fc(DisasContext *dc, int rd, bool c) +static bool do_fc(DisasContext *dc, int rd, int32_t c) { - uint64_t mask; - if (gen_trap_ifnofpu(dc)) { return true; } - - if (rd & 1) { - mask = MAKE_64BIT_MASK(0, 32); - } else { - mask = MAKE_64BIT_MASK(32, 32); - } - if (c) { - tcg_gen_ori_i64(cpu_fpr[rd / 2], cpu_fpr[rd / 2], mask); - } else { - tcg_gen_andi_i64(cpu_fpr[rd / 2], cpu_fpr[rd / 2], ~mask); - } - gen_update_fprs_dirty(dc, rd); + gen_store_fpr_F(dc, rd, tcg_constant_i32(c)); return advance_pc(dc); } TRANS(FZEROs, VIS1, do_fc, a->rd, 0) -TRANS(FONEs, VIS1, do_fc, a->rd, 1) +TRANS(FONEs, VIS1, do_fc, a->rd, -1) static bool do_dc(DisasContext *dc, int rd, int64_t c) { if (gen_trap_ifnofpu(dc)) { return true; } - - tcg_gen_movi_i64(cpu_fpr[rd / 2], c); - gen_update_fprs_dirty(dc, rd); + gen_store_fpr_D(dc, rd, tcg_constant_i64(c)); return advance_pc(dc); } @@ -5137,12 +5159,6 @@ void sparc_tcg_init(void) "l0", "l1", "l2", "l3", "l4", "l5", "l6", "l7", "i0", "i1", "i2", "i3", "i4", "i5", "i6", "i7", }; - static const char fregnames[32][4] = { - "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14", - "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30", - "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", - "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", - }; static const struct { TCGv_i32 *ptr; int off; const char *name; } r32[] = { #ifdef TARGET_SPARC64 @@ -5199,12 +5215,6 @@ void sparc_tcg_init(void) (i - 8) * sizeof(target_ulong), gregnames[i]); } - - for (i = 0; i < TARGET_DPREGS; i++) { - cpu_fpr[i] = tcg_global_mem_new_i64(tcg_env, - offsetof(CPUSPARCState, fpr[i]), - fregnames[i]); - } } void sparc_restore_state_to_opc(CPUState *cs, From 28c131a34d402811bdb51a0f289bd975074884bc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 3 Nov 2023 14:52:45 -0700 Subject: [PATCH 09/38] target/sparc: Use gvec for VIS1 parallel add/sub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/translate.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 362e88de18..7c290293ea 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -4664,6 +4664,24 @@ static bool do_dfd(DisasContext *dc, arg_r_r_r *a, TRANS(FMUL8x16, VIS1, do_dfd, a, gen_helper_fmul8x16) +static bool do_gvec_ddd(DisasContext *dc, arg_r_r_r *a, MemOp vece, + void (*func)(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t)) +{ + if (gen_trap_ifnofpu(dc)) { + return true; + } + + func(vece, gen_offset_fpr_D(a->rd), gen_offset_fpr_D(a->rs1), + gen_offset_fpr_D(a->rs2), 8, 8); + return advance_pc(dc); +} + +TRANS(FPADD16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_add) +TRANS(FPADD32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_add) +TRANS(FPSUB16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sub) +TRANS(FPSUB32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sub) + static bool do_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64)) { @@ -4684,10 +4702,6 @@ static bool do_ddd(DisasContext *dc, arg_r_r_r *a, TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16) TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16) -TRANS(FPADD16, VIS1, do_ddd, a, tcg_gen_vec_add16_i64) -TRANS(FPADD32, VIS1, do_ddd, a, tcg_gen_vec_add32_i64) -TRANS(FPSUB16, VIS1, do_ddd, a, tcg_gen_vec_sub16_i64) -TRANS(FPSUB32, VIS1, do_ddd, a, tcg_gen_vec_sub32_i64) TRANS(FNORd, VIS1, do_ddd, a, tcg_gen_nor_i64) TRANS(FANDNOTd, VIS1, do_ddd, a, tcg_gen_andc_i64) TRANS(FXORd, VIS1, do_ddd, a, tcg_gen_xor_i64) From 4fd71d19acd6e05b74927a0b5c4a5b0650e3d6f5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 12:13:00 -0700 Subject: [PATCH 10/38] target/sparc: Implement FMAf extension Rearrange PDIST so that do_dddd is general purpose and may be re-used for FMADDd etc. Add pickNaN and pickNaNMulAdd. Signed-off-by: Richard Henderson --- fpu/softfloat-specialize.c.inc | 31 +++++++++++++ linux-user/elfload.c | 1 + target/sparc/cpu-feature.h.inc | 1 + target/sparc/cpu.c | 3 ++ target/sparc/fop_helper.c | 16 +++++++ target/sparc/helper.h | 2 + target/sparc/insns.decode | 23 +++++++++- target/sparc/translate.c | 84 ++++++++++++++++++++++++++++++++-- 8 files changed, 155 insertions(+), 6 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index f573014532..8f3b97d9bf 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -447,6 +447,17 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, } else { return 1; } +#elif defined(TARGET_SPARC) + /* Prefer SNaN over QNaN, order B then A. */ + if (is_snan(b_cls)) { + return 1; + } else if (is_snan(a_cls)) { + return 0; + } else if (is_qnan(b_cls)) { + return 1; + } else { + return 0; + } #elif defined(TARGET_XTENSA) /* * Xtensa has two NaN propagation modes. @@ -624,6 +635,26 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls, float_raise(float_flag_invalid | float_flag_invalid_imz, status); } return 3; /* default NaN */ +#elif defined(TARGET_SPARC) + /* For (inf,0,nan) return c. */ + if (infzero) { + float_raise(float_flag_invalid | float_flag_invalid_imz, status); + return 2; + } + /* Prefer SNaN over QNaN, order C, B, A. */ + if (is_snan(c_cls)) { + return 2; + } else if (is_snan(b_cls)) { + return 1; + } else if (is_snan(a_cls)) { + return 0; + } else if (is_qnan(c_cls)) { + return 2; + } else if (is_qnan(b_cls)) { + return 1; + } else { + return 0; + } #elif defined(TARGET_XTENSA) /* * For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns diff --git a/linux-user/elfload.c b/linux-user/elfload.c index c1e1511ff2..6a1457346a 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1003,6 +1003,7 @@ static uint32_t get_elf_hwcap(void) r |= features & CPU_FEATURE_FSMULD ? HWCAP_SPARC_FSMULD : 0; r |= features & CPU_FEATURE_VIS1 ? HWCAP_SPARC_VIS : 0; r |= features & CPU_FEATURE_VIS2 ? HWCAP_SPARC_VIS2 : 0; + r |= features & CPU_FEATURE_FMAF ? HWCAP_SPARC_FMAF : 0; #endif return r; diff --git a/target/sparc/cpu-feature.h.inc b/target/sparc/cpu-feature.h.inc index d800f18c4e..a30b9255b2 100644 --- a/target/sparc/cpu-feature.h.inc +++ b/target/sparc/cpu-feature.h.inc @@ -12,3 +12,4 @@ FEATURE(ASR17) FEATURE(CACHE_CTRL) FEATURE(POWERDOWN) FEATURE(CASA) +FEATURE(FMAF) diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 5be1592e66..ed9238a69d 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -549,6 +549,7 @@ static const char * const feature_name[] = { [CPU_FEATURE_BIT_HYPV] = "hypv", [CPU_FEATURE_BIT_VIS1] = "vis1", [CPU_FEATURE_BIT_VIS2] = "vis2", + [CPU_FEATURE_BIT_FMAF] = "fmaf", #else [CPU_FEATURE_BIT_MUL] = "mul", [CPU_FEATURE_BIT_DIV] = "div", @@ -877,6 +878,8 @@ static Property sparc_cpu_properties[] = { CPU_FEATURE_BIT_VIS1, false), DEFINE_PROP_BIT("vis2", SPARCCPU, env.def.features, CPU_FEATURE_BIT_VIS2, false), + DEFINE_PROP_BIT("fmaf", SPARCCPU, env.def.features, + CPU_FEATURE_BIT_FMAF, false), #else DEFINE_PROP_BIT("mul", SPARCCPU, env.def.features, CPU_FEATURE_BIT_MUL, false), diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index 1205a599ef..1de44d79c1 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -343,6 +343,22 @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src) return f128_ret(ret); } +float32 helper_fmadds(CPUSPARCState *env, float32 s1, + float32 s2, float32 s3, uint32_t op) +{ + float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status); + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float64 helper_fmaddd(CPUSPARCState *env, float64 s1, + float64 s2, float64 s3, uint32_t op) +{ + float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status); + check_ieee_exceptions(env, GETPC()); + return ret; +} + static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra) { check_ieee_exceptions(env, ra); diff --git a/target/sparc/helper.h b/target/sparc/helper.h index 97fbf6f66c..f4d3311ac4 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -56,6 +56,7 @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64) +DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32) DEF_HELPER_FLAGS_3(faddq, TCG_CALL_NO_WG, i128, env, i128, i128) DEF_HELPER_FLAGS_3(fsubq, TCG_CALL_NO_WG, i128, env, i128, i128) @@ -66,6 +67,7 @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32) +DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32) DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_WG, f64, env, f32, f32) DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_WG, i128, env, f64, f64) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 02fa505b49..056fba98f9 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -29,6 +29,7 @@ CALL 01 i:s30 %dfp_rd 25:5 !function=extract_dfpreg %dfp_rs1 14:5 !function=extract_dfpreg %dfp_rs2 0:5 !function=extract_dfpreg +%dfp_rs3 9:5 !function=extract_dfpreg %qfp_rd 25:5 !function=extract_qfpreg %qfp_rs1 14:5 !function=extract_qfpreg @@ -80,6 +81,11 @@ CALL 01 i:s30 @q_d2 .. ..... ...... ..... . ........ ..... \ &r_r rd=%qfp_rd rs=%dfp_rs2 +&r_r_r_r rd rs1 rs2 rs3 +@r_r_r_r .. rd:5 ...... rs1:5 rs3:5 .... rs2:5 &r_r_r_r +@d_d_d_d .. ..... ...... ..... ..... .... ..... \ + &r_r_r_r rd=%dfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2 rs3=%dfp_rs3 + { [ STBAR 10 00000 101000 01111 0 0000000000000 @@ -394,7 +400,8 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPACK32 10 ..... 110110 ..... 0 0011 1010 ..... @d_d_d FPACK16 10 ..... 110110 00000 0 0011 1011 ..... @r_d2 FPACKFIX 10 ..... 110110 00000 0 0011 1101 ..... @r_d2 - PDIST 10 ..... 110110 ..... 0 0011 1110 ..... @d_d_d + PDIST 10 ..... 110110 ..... 0 0011 1110 ..... \ + &r_r_r_r rd=%dfp_rd rs1=%dfp_rd rs2=%dfp_rs1 rs3=%dfp_rs2 FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @d_d_d FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @d_r_r @@ -448,7 +455,19 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ NCP 10 ----- 110110 ----- --------- ----- # v8 CPop1 } -NCP 10 ----- 110111 ----- --------- ----- # v8 CPop2 +{ + [ + FMADDs 10 ..... 110111 ..... ..... 0001 ..... @r_r_r_r + FMADDd 10 ..... 110111 ..... ..... 0010 ..... @d_d_d_d + FMSUBs 10 ..... 110111 ..... ..... 0101 ..... @r_r_r_r + FMSUBd 10 ..... 110111 ..... ..... 0110 ..... @d_d_d_d + FNMSUBs 10 ..... 110111 ..... ..... 1001 ..... @r_r_r_r + FNMSUBd 10 ..... 110111 ..... ..... 1010 ..... @d_d_d_d + FNMADDs 10 ..... 110111 ..... ..... 1101 ..... @r_r_r_r + FNMADDd 10 ..... 110111 ..... ..... 1110 ..... @d_d_d_d + ] + NCP 10 ----- 110111 ----- --------- ----- # v8 CPop2 +} ## ## Major Opcode 11 -- load and store instructions diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 7c290293ea..5efd09f4f4 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -28,6 +28,7 @@ #include "exec/helper-gen.h" #include "exec/translator.h" #include "exec/log.h" +#include "fpu/softfloat.h" #include "asi.h" #define HELPER_H "helper.h" @@ -1142,6 +1143,52 @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src) tcg_gen_concat_i64_i128(dst, l, h); } +static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3) +{ + gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0)); +} + +static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) +{ + gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0)); +} + +static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3) +{ + int op = float_muladd_negate_c; + gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) +{ + int op = float_muladd_negate_c; + gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3) +{ + int op = float_muladd_negate_c | float_muladd_negate_result; + gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) +{ + int op = float_muladd_negate_c | float_muladd_negate_result; + gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3) +{ + int op = float_muladd_negate_result; + gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) +{ + int op = float_muladd_negate_result; + gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + static void gen_op_fpexception_im(DisasContext *dc, int ftt) { /* @@ -2136,6 +2183,7 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_MUL(C) true # define avail_POWERDOWN(C) false # define avail_64(C) true +# define avail_FMAF(C) ((C)->def->features & CPU_FEATURE_FMAF) # define avail_GL(C) ((C)->def->features & CPU_FEATURE_GL) # define avail_HYPV(C) ((C)->def->features & CPU_FEATURE_HYPV) # define avail_VIS1(C) ((C)->def->features & CPU_FEATURE_VIS1) @@ -2148,6 +2196,7 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_MUL(C) ((C)->def->features & CPU_FEATURE_MUL) # define avail_POWERDOWN(C) ((C)->def->features & CPU_FEATURE_POWERDOWN) # define avail_64(C) false +# define avail_FMAF(C) false # define avail_GL(C) false # define avail_HYPV(C) false # define avail_VIS1(C) false @@ -4785,25 +4834,52 @@ static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a) return advance_pc(dc); } -static bool do_dddd(DisasContext *dc, arg_r_r_r *a, +static bool do_ffff(DisasContext *dc, arg_r_r_r_r *a, + void (*func)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32)) +{ + TCGv_i32 dst, src1, src2, src3; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_F(dc, a->rs2); + src3 = gen_load_fpr_F(dc, a->rs3); + dst = tcg_temp_new_i32(); + func(dst, src1, src2, src3); + gen_store_fpr_F(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(FMADDs, FMAF, do_ffff, a, gen_op_fmadds) +TRANS(FMSUBs, FMAF, do_ffff, a, gen_op_fmsubs) +TRANS(FNMSUBs, FMAF, do_ffff, a, gen_op_fnmsubs) +TRANS(FNMADDs, FMAF, do_ffff, a, gen_op_fnmadds) + +static bool do_dddd(DisasContext *dc, arg_r_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) { - TCGv_i64 dst, src0, src1, src2; + TCGv_i64 dst, src1, src2, src3; if (gen_trap_ifnofpu(dc)) { return true; } dst = tcg_temp_new_i64(); - src0 = gen_load_fpr_D(dc, a->rd); src1 = gen_load_fpr_D(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); - func(dst, src0, src1, src2); + src3 = gen_load_fpr_D(dc, a->rs3); + func(dst, src1, src2, src3); gen_store_fpr_D(dc, a->rd, dst); return advance_pc(dc); } TRANS(PDIST, VIS1, do_dddd, a, gen_helper_pdist) +TRANS(FMADDd, FMAF, do_dddd, a, gen_op_fmaddd) +TRANS(FMSUBd, FMAF, do_dddd, a, gen_op_fmsubd) +TRANS(FNMSUBd, FMAF, do_dddd, a, gen_op_fnmsubd) +TRANS(FNMADDd, FMAF, do_dddd, a, gen_op_fnmaddd) static bool do_env_qqq(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i128, TCGv_env, TCGv_i128, TCGv_i128)) From 3335a04806d337c69f44a707cdc27515d6c91d84 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 12:21:37 -0700 Subject: [PATCH 11/38] target/sparc: Add feature bits for VIS 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The manual separates VIS 3 and VIS 3B, even though they are both present in all extant cpus. For clarity, let the translator match the manual but otherwise leave them on the same feature bit. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/cpu-feature.h.inc | 1 + target/sparc/translate.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/target/sparc/cpu-feature.h.inc b/target/sparc/cpu-feature.h.inc index a30b9255b2..3913fb4a54 100644 --- a/target/sparc/cpu-feature.h.inc +++ b/target/sparc/cpu-feature.h.inc @@ -13,3 +13,4 @@ FEATURE(CACHE_CTRL) FEATURE(POWERDOWN) FEATURE(CASA) FEATURE(FMAF) +FEATURE(VIS3) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 5efd09f4f4..59b922c903 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -2188,6 +2188,8 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_HYPV(C) ((C)->def->features & CPU_FEATURE_HYPV) # define avail_VIS1(C) ((C)->def->features & CPU_FEATURE_VIS1) # define avail_VIS2(C) ((C)->def->features & CPU_FEATURE_VIS2) +# define avail_VIS3(C) ((C)->def->features & CPU_FEATURE_VIS3) +# define avail_VIS3B(C) avail_VIS3(C) #else # define avail_32(C) true # define avail_ASR17(C) ((C)->def->features & CPU_FEATURE_ASR17) @@ -2201,6 +2203,8 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_HYPV(C) false # define avail_VIS1(C) false # define avail_VIS2(C) false +# define avail_VIS3(C) false +# define avail_VIS3B(C) false #endif /* Default case for non jump instructions. */ From 015fc6fcdb90034a7551091f8cd9a47ca1bd26b1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 12:33:08 -0700 Subject: [PATCH 12/38] target/sparc: Implement ADDXC, ADDXCcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 3 +++ target/sparc/translate.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 056fba98f9..5d1c55aa78 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -376,6 +376,9 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ ARRAY16 10 ..... 110110 ..... 0 0001 0010 ..... @r_r_r ARRAY32 10 ..... 110110 ..... 0 0001 0100 ..... @r_r_r + ADDXC 10 ..... 110110 ..... 0 0001 0001 ..... @r_r_r + ADDXCcc 10 ..... 110110 ..... 0 0001 0011 ..... @r_r_r + ALIGNADDR 10 ..... 110110 ..... 0 0001 1000 ..... @r_r_r ALIGNADDRL 10 ..... 110110 ..... 0 0001 1010 ..... @r_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 59b922c903..ad12486758 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -433,6 +433,17 @@ static void gen_op_addccc(TCGv dst, TCGv src1, TCGv src2) gen_op_addcc_int(dst, src1, src2, gen_carry32()); } +static void gen_op_addxc(TCGv dst, TCGv src1, TCGv src2) +{ + tcg_gen_add_tl(dst, src1, src2); + tcg_gen_add_tl(dst, dst, cpu_cc_C); +} + +static void gen_op_addxccc(TCGv dst, TCGv src1, TCGv src2) +{ + gen_op_addcc_int(dst, src1, src2, cpu_cc_C); +} + static void gen_op_subcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin) { TCGv z = tcg_constant_tl(0); @@ -3692,6 +3703,9 @@ TRANS(ARRAY8, VIS1, do_rrr, a, gen_helper_array8) TRANS(ARRAY16, VIS1, do_rrr, a, gen_op_array16) TRANS(ARRAY32, VIS1, do_rrr, a, gen_op_array32) +TRANS(ADDXC, VIS3, do_rrr, a, gen_op_addxc) +TRANS(ADDXCcc, VIS3, do_rrr, a, gen_op_addxccc) + static void gen_op_alignaddr(TCGv dst, TCGv s1, TCGv s2) { #ifdef TARGET_SPARC64 From c973b4e8df6e4a7b0080e3a93742a4d56baeb84b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 12:55:49 -0700 Subject: [PATCH 13/38] target/sparc: Implement CMASK instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/helper.h | 3 +++ target/sparc/insns.decode | 4 ++++ target/sparc/translate.c | 13 +++++++++++++ target/sparc/vis_helper.c | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/target/sparc/helper.h b/target/sparc/helper.h index f4d3311ac4..84435b0932 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -107,6 +107,9 @@ DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_NO_RWG_SE, i32, i64, i64) DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_NO_RWG_SE, i32, i64, i64) DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) +DEF_HELPER_FLAGS_2(cmask8, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(cmask16, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(cmask32, TCG_CALL_NO_RWG_SE, i64, i64, i64) #define VIS_CMPHELPER(name) \ DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE, \ i64, i64, i64) \ diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 5d1c55aa78..8be808d065 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -384,6 +384,10 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ BMASK 10 ..... 110110 ..... 0 0001 1001 ..... @r_r_r + CMASK8 10 00000 110110 00000 0 0001 1011 rs2:5 + CMASK16 10 00000 110110 00000 0 0001 1101 rs2:5 + CMASK32 10 00000 110110 00000 0 0001 1111 rs2:5 + FPCMPLE16 10 ..... 110110 ..... 0 0010 0000 ..... @r_d_d FPCMPNE16 10 ..... 110110 ..... 0 0010 0010 ..... @r_d_d FPCMPGT16 10 ..... 110110 ..... 0 0010 1000 ..... @r_d_d diff --git a/target/sparc/translate.c b/target/sparc/translate.c index ad12486758..2b0a1f5a9a 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -61,6 +61,9 @@ # define gen_helper_write_softint(E, S) qemu_build_not_reached() # define gen_helper_wrpil(E, S) qemu_build_not_reached() # define gen_helper_wrpstate(E, S) qemu_build_not_reached() +# define gen_helper_cmask8 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_cmask16 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_cmask32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpeq16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpeq32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpgt16 ({ qemu_build_not_reached(); NULL; }) @@ -3748,6 +3751,16 @@ static void gen_op_bmask(TCGv dst, TCGv s1, TCGv s2) TRANS(BMASK, VIS2, do_rrr, a, gen_op_bmask) +static bool do_cmask(DisasContext *dc, int rs2, void (*func)(TCGv, TCGv, TCGv)) +{ + func(cpu_gsr, cpu_gsr, gen_load_gpr(dc, rs2)); + return true; +} + +TRANS(CMASK8, VIS3, do_cmask, a->rs2, gen_helper_cmask8) +TRANS(CMASK16, VIS3, do_cmask, a->rs2, gen_helper_cmask16) +TRANS(CMASK32, VIS3, do_cmask, a->rs2, gen_helper_cmask32) + static bool do_shift_r(DisasContext *dc, arg_shiftr *a, bool l, bool u) { TCGv dst, src1, src2; diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index 41312deda4..20baa4ff71 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -351,3 +351,41 @@ uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) return r.ll; } + +uint64_t helper_cmask8(uint64_t gsr, uint64_t src) +{ + uint32_t mask = 0; + + mask |= (src & 0x01 ? 0x00000007 : 0x0000000f); + mask |= (src & 0x02 ? 0x00000060 : 0x000000e0); + mask |= (src & 0x04 ? 0x00000500 : 0x00000d00); + mask |= (src & 0x08 ? 0x00004000 : 0x0000c000); + mask |= (src & 0x10 ? 0x00030000 : 0x000b0000); + mask |= (src & 0x20 ? 0x00200000 : 0x00a00000); + mask |= (src & 0x40 ? 0x01000000 : 0x09000000); + mask |= (src & 0x80 ? 0x00000000 : 0x80000000); + + return deposit64(gsr, 32, 32, mask); +} + +uint64_t helper_cmask16(uint64_t gsr, uint64_t src) +{ + uint32_t mask = 0; + + mask |= (src & 0x1 ? 0x00000067 : 0x000000ef); + mask |= (src & 0x2 ? 0x00004500 : 0x0000cd00); + mask |= (src & 0x4 ? 0x00230000 : 0x00ab0000); + mask |= (src & 0x8 ? 0x01000000 : 0x89000000); + + return deposit64(gsr, 32, 32, mask); +} + +uint64_t helper_cmask32(uint64_t gsr, uint64_t src) +{ + uint32_t mask = 0; + + mask |= (src & 0x1 ? 0x00004567 : 0x0000cdef); + mask |= (src & 0x2 ? 0x01230000 : 0x89ab0000); + + return deposit64(gsr, 32, 32, mask); +} From 7837185e40dc8b5e97e33e3fbdf94be0b55ef585 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 14:57:27 -0700 Subject: [PATCH 14/38] target/sparc: Implement FCHKSM16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/helper.h | 1 + target/sparc/insns.decode | 1 + target/sparc/translate.c | 32 ++++++++++++++++++++++++++++++++ target/sparc/vis_helper.c | 23 +++++++++++++++++++++++ 4 files changed, 57 insertions(+) diff --git a/target/sparc/helper.h b/target/sparc/helper.h index 84435b0932..e59307efc2 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -110,6 +110,7 @@ DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) DEF_HELPER_FLAGS_2(cmask8, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(cmask16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(cmask32, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fchksm16, TCG_CALL_NO_RWG_SE, i64, i64, i64) #define VIS_CMPHELPER(name) \ DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE, \ i64, i64, i64) \ diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 8be808d065..18d068d2af 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -410,6 +410,7 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ PDIST 10 ..... 110110 ..... 0 0011 1110 ..... \ &r_r_r_r rd=%dfp_rd rs1=%dfp_rd rs2=%dfp_rs1 rs3=%dfp_rs2 + FCHKSM16 10 ..... 110110 ..... 0 0100 0100 ..... @d_d_d FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @d_d_d FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @d_r_r BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @d_d_d diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 2b0a1f5a9a..b8ba8eea74 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -788,6 +788,37 @@ static void gen_op_fmuld8sux16(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) tcg_gen_concat_i32_i64(dst, t0, t1); } +#ifdef TARGET_SPARC64 +static void gen_vec_fchksm16(unsigned vece, TCGv_vec dst, + TCGv_vec src1, TCGv_vec src2) +{ + TCGv_vec a = tcg_temp_new_vec_matching(dst); + TCGv_vec c = tcg_temp_new_vec_matching(dst); + + tcg_gen_add_vec(vece, a, src1, src2); + tcg_gen_cmp_vec(TCG_COND_LTU, vece, c, a, src1); + /* Vector cmp produces -1 for true, so subtract to add carry. */ + tcg_gen_sub_vec(vece, dst, a, c); +} + +static void gen_op_fchksm16(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_cmp_vec, INDEX_op_add_vec, INDEX_op_sub_vec, + }; + static const GVecGen3 op = { + .fni8 = gen_helper_fchksm16, + .fniv = gen_vec_fchksm16, + .opt_opc = vecop_list, + .vece = MO_16, + }; + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op); +} +#else +#define gen_op_fchksm16 ({ qemu_build_not_reached(); NULL; }) +#endif + static void finishing_insn(DisasContext *dc) { /* @@ -4761,6 +4792,7 @@ TRANS(FPADD16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_add) TRANS(FPADD32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_add) TRANS(FPSUB16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sub) TRANS(FPSUB32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sub) +TRANS(FCHKSM16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fchksm16) static bool do_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64)) diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index 20baa4ff71..fa607375d2 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -389,3 +389,26 @@ uint64_t helper_cmask32(uint64_t gsr, uint64_t src) return deposit64(gsr, 32, 32, mask); } + +static inline uint16_t do_fchksm16(uint16_t src1, uint16_t src2) +{ + uint16_t a = src1 + src2; + uint16_t c = a < src1; + return a + c; +} + +uint64_t helper_fchksm16(uint64_t src1, uint64_t src2) +{ + VIS64 r, s1, s2; + + s1.ll = src1; + s2.ll = src2; + r.ll = 0; + + r.VIS_W64(0) = do_fchksm16(s1.VIS_W64(0), s2.VIS_W64(0)); + r.VIS_W64(1) = do_fchksm16(s1.VIS_W64(1), s2.VIS_W64(1)); + r.VIS_W64(2) = do_fchksm16(s1.VIS_W64(2), s2.VIS_W64(2)); + r.VIS_W64(3) = do_fchksm16(s1.VIS_W64(3), s2.VIS_W64(3)); + + return r.ll; +} From 3d50b7287e3c11b769945fd7352788d69b1a5a5e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 14:59:24 -0700 Subject: [PATCH 15/38] target/sparc: Implement FHADD, FHSUB, FNHADD, FNADD, FNMUL Signed-off-by: Richard Henderson --- target/sparc/fop_helper.c | 68 +++++++++++++++++++++++++++++++++++ target/sparc/helper.h | 5 +++ target/sparc/insns.decode | 11 ++++++ target/sparc/translate.c | 76 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+) diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index 1de44d79c1..08b5f96f95 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -359,6 +359,74 @@ float64 helper_fmaddd(CPUSPARCState *env, float64 s1, return ret; } +float32 helper_fnadds(CPUSPARCState *env, float32 src1, float32 src2) +{ + float32 ret = float32_add(src1, src2, &env->fp_status); + + /* + * NaN inputs or result do not get a sign change. + * Nor, apparently, does zero: on hardware, -(x + -x) yields +0. + */ + if (!float32_is_any_nan(ret) && !float32_is_zero(ret)) { + ret = float32_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float32 helper_fnmuls(CPUSPARCState *env, float32 src1, float32 src2) +{ + float32 ret = float32_mul(src1, src2, &env->fp_status); + + /* NaN inputs or result do not get a sign change. */ + if (!float32_is_any_nan(ret)) { + ret = float32_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float64 helper_fnaddd(CPUSPARCState *env, float64 src1, float64 src2) +{ + float64 ret = float64_add(src1, src2, &env->fp_status); + + /* + * NaN inputs or result do not get a sign change. + * Nor, apparently, does zero: on hardware, -(x + -x) yields +0. + */ + if (!float64_is_any_nan(ret) && !float64_is_zero(ret)) { + ret = float64_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float64 helper_fnmuld(CPUSPARCState *env, float64 src1, float64 src2) +{ + float64 ret = float64_mul(src1, src2, &env->fp_status); + + /* NaN inputs or result do not get a sign change. */ + if (!float64_is_any_nan(ret)) { + ret = float64_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float64 helper_fnsmuld(CPUSPARCState *env, float32 src1, float32 src2) +{ + float64 ret = float64_mul(float32_to_float64(src1, &env->fp_status), + float32_to_float64(src2, &env->fp_status), + &env->fp_status); + + /* NaN inputs or result do not get a sign change. */ + if (!float64_is_any_nan(ret)) { + ret = float64_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra) { check_ieee_exceptions(env, ra); diff --git a/target/sparc/helper.h b/target/sparc/helper.h index e59307efc2..15f0907a1b 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -57,6 +57,8 @@ DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32) +DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64) +DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(faddq, TCG_CALL_NO_WG, i128, env, i128, i128) DEF_HELPER_FLAGS_3(fsubq, TCG_CALL_NO_WG, i128, env, i128, i128) @@ -68,8 +70,11 @@ DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32) +DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32) +DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_WG, f64, env, f32, f32) +DEF_HELPER_FLAGS_3(fnsmuld, TCG_CALL_NO_WG, f64, env, f32, f32) DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_WG, i128, env, f64, f64) DEF_HELPER_FLAGS_2(fitod, TCG_CALL_NO_WG, f64, env, s32) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 18d068d2af..6ec3838865 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -307,8 +307,19 @@ FMULq 10 ..... 110100 ..... 0 0100 1011 ..... @q_q_q FDIVs 10 ..... 110100 ..... 0 0100 1101 ..... @r_r_r FDIVd 10 ..... 110100 ..... 0 0100 1110 ..... @d_d_d FDIVq 10 ..... 110100 ..... 0 0100 1111 ..... @q_q_q +FNADDs 10 ..... 110100 ..... 0 0101 0001 ..... @r_r_r +FNADDd 10 ..... 110100 ..... 0 0101 0010 ..... @d_d_d +FNMULs 10 ..... 110100 ..... 0 0101 1001 ..... @r_r_r +FNMULd 10 ..... 110100 ..... 0 0101 1010 ..... @d_d_d +FHADDs 10 ..... 110100 ..... 0 0110 0001 ..... @r_r_r +FHADDd 10 ..... 110100 ..... 0 0110 0010 ..... @d_d_d +FHSUBs 10 ..... 110100 ..... 0 0110 0101 ..... @r_r_r +FHSUBd 10 ..... 110100 ..... 0 0110 0110 ..... @d_d_d FsMULd 10 ..... 110100 ..... 0 0110 1001 ..... @d_r_r FdMULq 10 ..... 110100 ..... 0 0110 1110 ..... @q_d_d +FNHADDs 10 ..... 110100 ..... 0 0111 0001 ..... @r_r_r +FNHADDd 10 ..... 110100 ..... 0 0111 0010 ..... @d_d_d +FNsMULd 10 ..... 110100 ..... 0 0111 1001 ..... @d_r_r FsTOx 10 ..... 110100 00000 0 1000 0001 ..... @r_r2 FdTOx 10 ..... 110100 00000 0 1000 0010 ..... @r_d2 FqTOx 10 ..... 110100 00000 0 1000 0011 ..... @r_q2 diff --git a/target/sparc/translate.c b/target/sparc/translate.c index b8ba8eea74..d81b9ce5a8 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -1234,6 +1234,51 @@ static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); } +/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */ +static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2) +{ + TCGv_i32 one = tcg_constant_i32(float32_one); + int op = float_muladd_halve_result; + gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2) +{ + TCGv_i64 one = tcg_constant_i64(float64_one); + int op = float_muladd_halve_result; + gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */ +static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2) +{ + TCGv_i32 one = tcg_constant_i32(float32_one); + int op = float_muladd_negate_c | float_muladd_halve_result; + gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2) +{ + TCGv_i64 one = tcg_constant_i64(float64_one); + int op = float_muladd_negate_c | float_muladd_halve_result; + gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */ +static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2) +{ + TCGv_i32 one = tcg_constant_i32(float32_one); + int op = float_muladd_negate_result | float_muladd_halve_result; + gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2) +{ + TCGv_i64 one = tcg_constant_i64(float64_one); + int op = float_muladd_negate_result | float_muladd_halve_result; + gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + static void gen_op_fpexception_im(DisasContext *dc, int ftt) { /* @@ -4710,6 +4755,10 @@ TRANS(FXNORs, VIS1, do_fff, a, tcg_gen_eqv_i32) TRANS(FORNOTs, VIS1, do_fff, a, tcg_gen_orc_i32) TRANS(FORs, VIS1, do_fff, a, tcg_gen_or_i32) +TRANS(FHADDs, VIS3, do_fff, a, gen_op_fhadds) +TRANS(FHSUBs, VIS3, do_fff, a, gen_op_fhsubs) +TRANS(FNHADDs, VIS3, do_fff, a, gen_op_fnhadds) + static bool do_env_fff(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32)) { @@ -4730,6 +4779,8 @@ TRANS(FADDs, ALL, do_env_fff, a, gen_helper_fadds) TRANS(FSUBs, ALL, do_env_fff, a, gen_helper_fsubs) TRANS(FMULs, ALL, do_env_fff, a, gen_helper_fmuls) TRANS(FDIVs, ALL, do_env_fff, a, gen_helper_fdivs) +TRANS(FNADDs, VIS3, do_env_fff, a, gen_helper_fnadds) +TRANS(FNMULs, VIS3, do_env_fff, a, gen_helper_fnmuls) static bool do_dff(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i32, TCGv_i32)) @@ -4827,6 +4878,10 @@ TRANS(FPACK32, VIS1, do_ddd, a, gen_op_fpack32) TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata) TRANS(BSHUFFLE, VIS2, do_ddd, a, gen_op_bshuffle) +TRANS(FHADDd, VIS3, do_ddd, a, gen_op_fhaddd) +TRANS(FHSUBd, VIS3, do_ddd, a, gen_op_fhsubd) +TRANS(FNHADDd, VIS3, do_ddd, a, gen_op_fnhaddd) + static bool do_rdd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv, TCGv_i64, TCGv_i64)) { @@ -4876,6 +4931,8 @@ TRANS(FADDd, ALL, do_env_ddd, a, gen_helper_faddd) TRANS(FSUBd, ALL, do_env_ddd, a, gen_helper_fsubd) TRANS(FMULd, ALL, do_env_ddd, a, gen_helper_fmuld) TRANS(FDIVd, ALL, do_env_ddd, a, gen_helper_fdivd) +TRANS(FNADDd, VIS3, do_env_ddd, a, gen_helper_fnaddd) +TRANS(FNMULd, VIS3, do_env_ddd, a, gen_helper_fnmuld) static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a) { @@ -4897,6 +4954,25 @@ static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a) return advance_pc(dc); } +static bool trans_FNsMULd(DisasContext *dc, arg_r_r_r *a) +{ + TCGv_i64 dst; + TCGv_i32 src1, src2; + + if (!avail_VIS3(dc)) { + return false; + } + if (gen_trap_ifnofpu(dc)) { + return true; + } + dst = tcg_temp_new_i64(); + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_F(dc, a->rs2); + gen_helper_fnsmuld(dst, tcg_env, src1, src2); + gen_store_fpr_D(dc, a->rd, dst); + return advance_pc(dc); +} + static bool do_ffff(DisasContext *dc, arg_r_r_r_r *a, void (*func)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32)) { From 1d3ed3d728f81dee4ae87028a8a3e9beb4fa4a17 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 15:23:35 -0700 Subject: [PATCH 16/38] target/sparc: Implement FLCMP Signed-off-by: Richard Henderson --- target/sparc/fop_helper.c | 46 +++++++++++++++++++++++++++++++++++++++ target/sparc/helper.h | 2 ++ target/sparc/insns.decode | 4 ++++ target/sparc/translate.c | 34 +++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+) diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index 08b5f96f95..1b524c6d3c 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -490,6 +490,52 @@ uint32_t helper_fcmpeq(CPUSPARCState *env, Int128 src1, Int128 src2) return finish_fcmp(env, r, GETPC()); } +uint32_t helper_flcmps(float32 src1, float32 src2) +{ + /* + * FLCMP never raises an exception nor modifies any FSR fields. + * Perform the comparison with a dummy fp environment. + */ + float_status discard = { }; + FloatRelation r = float32_compare_quiet(src1, src2, &discard); + + switch (r) { + case float_relation_equal: + if (src2 == float32_zero && src1 != float32_zero) { + return 1; /* -0.0 < +0.0 */ + } + return 0; + case float_relation_less: + return 1; + case float_relation_greater: + return 0; + case float_relation_unordered: + return float32_is_any_nan(src2) ? 3 : 2; + } + g_assert_not_reached(); +} + +uint32_t helper_flcmpd(float64 src1, float64 src2) +{ + float_status discard = { }; + FloatRelation r = float64_compare_quiet(src1, src2, &discard); + + switch (r) { + case float_relation_equal: + if (src2 == float64_zero && src1 != float64_zero) { + return 1; /* -0.0 < +0.0 */ + } + return 0; + case float_relation_less: + return 1; + case float_relation_greater: + return 0; + case float_relation_unordered: + return float64_is_any_nan(src2) ? 3 : 2; + } + g_assert_not_reached(); +} + target_ulong cpu_get_fsr(CPUSPARCState *env) { target_ulong fsr = env->fsr | env->fsr_cexc_ftt; diff --git a/target/sparc/helper.h b/target/sparc/helper.h index 15f0907a1b..ab79954bb5 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -50,6 +50,8 @@ DEF_HELPER_FLAGS_3(fcmpd, TCG_CALL_NO_WG, i32, env, f64, f64) DEF_HELPER_FLAGS_3(fcmped, TCG_CALL_NO_WG, i32, env, f64, f64) DEF_HELPER_FLAGS_3(fcmpq, TCG_CALL_NO_WG, i32, env, i128, i128) DEF_HELPER_FLAGS_3(fcmpeq, TCG_CALL_NO_WG, i32, env, i128, i128) +DEF_HELPER_FLAGS_2(flcmps, TCG_CALL_NO_RWG_SE, i32, f32, f32) +DEF_HELPER_FLAGS_2(flcmpd, TCG_CALL_NO_RWG_SE, i32, f64, f64) DEF_HELPER_2(raise_exception, noreturn, env, int) DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 6ec3838865..de29996304 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -470,6 +470,10 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FZEROs 10 rd:5 110110 00000 0 0110 0001 00000 FONEd 10 ..... 110110 00000 0 0111 1110 00000 rd=%dfp_rd FONEs 10 rd:5 110110 00000 0 0111 1111 00000 + + FLCMPs 10 000 cc:2 110110 rs1:5 1 0101 0001 rs2:5 + FLCMPd 10 000 cc:2 110110 ..... 1 0101 0010 ..... \ + rs1=%dfp_rs1 rs2=%dfp_rs2 ] NCP 10 ----- 110110 ----- --------- ----- # v8 CPop1 } diff --git a/target/sparc/translate.c b/target/sparc/translate.c index d81b9ce5a8..db3a153c6e 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -5207,6 +5207,40 @@ static bool do_fcmpq(DisasContext *dc, arg_FCMPq *a, bool e) TRANS(FCMPq, ALL, do_fcmpq, a, false) TRANS(FCMPEq, ALL, do_fcmpq, a, true) +static bool trans_FLCMPs(DisasContext *dc, arg_FLCMPs *a) +{ + TCGv_i32 src1, src2; + + if (!avail_VIS3(dc)) { + return false; + } + if (gen_trap_ifnofpu(dc)) { + return true; + } + + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_F(dc, a->rs2); + gen_helper_flcmps(cpu_fcc[a->cc], src1, src2); + return advance_pc(dc); +} + +static bool trans_FLCMPd(DisasContext *dc, arg_FLCMPd *a) +{ + TCGv_i64 src1, src2; + + if (!avail_VIS3(dc)) { + return false; + } + if (gen_trap_ifnofpu(dc)) { + return true; + } + + src1 = gen_load_fpr_D(dc, a->rs1); + src2 = gen_load_fpr_D(dc, a->rs2); + gen_helper_flcmpd(cpu_fcc[a->cc], src1, src2); + return advance_pc(dc); +} + static void sparc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *dc = container_of(dcbase, DisasContext, base); From d6ff1ccb45f9b228e20f74f6e6c801c88a2885b2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 15:40:39 -0700 Subject: [PATCH 17/38] target/sparc: Implement FMEAN16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/helper.h | 1 + target/sparc/insns.decode | 1 + target/sparc/translate.c | 30 ++++++++++++++++++++++++++++++ target/sparc/vis_helper.c | 21 +++++++++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/target/sparc/helper.h b/target/sparc/helper.h index ab79954bb5..f1b84dc9b3 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -118,6 +118,7 @@ DEF_HELPER_FLAGS_2(cmask8, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(cmask16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(cmask32, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fchksm16, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fmean16, TCG_CALL_NO_RWG_SE, i64, i64, i64) #define VIS_CMPHELPER(name) \ DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE, \ i64, i64, i64) \ diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index de29996304..febd1a4a13 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -421,6 +421,7 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ PDIST 10 ..... 110110 ..... 0 0011 1110 ..... \ &r_r_r_r rd=%dfp_rd rs1=%dfp_rd rs2=%dfp_rs1 rs3=%dfp_rs2 + FMEAN16 10 ..... 110110 ..... 0 0100 0000 ..... @d_d_d FCHKSM16 10 ..... 110110 ..... 0 0100 0100 ..... @d_d_d FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @d_d_d FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @d_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index db3a153c6e..c3956f489b 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -815,8 +815,37 @@ static void gen_op_fchksm16(unsigned vece, uint32_t dofs, uint32_t aofs, }; tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op); } + +static void gen_vec_fmean16(unsigned vece, TCGv_vec dst, + TCGv_vec src1, TCGv_vec src2) +{ + TCGv_vec t = tcg_temp_new_vec_matching(dst); + + tcg_gen_or_vec(vece, t, src1, src2); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(dst, vece, 1)); + tcg_gen_sari_vec(vece, src1, src1, 1); + tcg_gen_sari_vec(vece, src2, src2, 1); + tcg_gen_add_vec(vece, dst, src1, src2); + tcg_gen_add_vec(vece, dst, dst, t); +} + +static void gen_op_fmean16(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_add_vec, INDEX_op_sari_vec, + }; + static const GVecGen3 op = { + .fni8 = gen_helper_fmean16, + .fniv = gen_vec_fmean16, + .opt_opc = vecop_list, + .vece = MO_16, + }; + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op); +} #else #define gen_op_fchksm16 ({ qemu_build_not_reached(); NULL; }) +#define gen_op_fmean16 ({ qemu_build_not_reached(); NULL; }) #endif static void finishing_insn(DisasContext *dc) @@ -4844,6 +4873,7 @@ TRANS(FPADD32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_add) TRANS(FPSUB16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sub) TRANS(FPSUB32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sub) TRANS(FCHKSM16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fchksm16) +TRANS(FMEAN16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fmean16) static bool do_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64)) diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index fa607375d2..6ef36755c3 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -412,3 +412,24 @@ uint64_t helper_fchksm16(uint64_t src1, uint64_t src2) return r.ll; } + +static inline int16_t do_fmean16(int16_t src1, int16_t src2) +{ + return (src1 + src2 + 1) / 2; +} + +uint64_t helper_fmean16(uint64_t src1, uint64_t src2) +{ + VIS64 r, s1, s2; + + s1.ll = src1; + s2.ll = src2; + r.ll = 0; + + r.VIS_SW64(0) = do_fmean16(s1.VIS_SW64(0), s2.VIS_SW64(0)); + r.VIS_SW64(1) = do_fmean16(s1.VIS_SW64(1), s2.VIS_SW64(1)); + r.VIS_SW64(2) = do_fmean16(s1.VIS_SW64(2), s2.VIS_SW64(2)); + r.VIS_SW64(3) = do_fmean16(s1.VIS_SW64(3), s2.VIS_SW64(3)); + + return r.ll; +} From bc3f14a9ed777ddab8c8915a9804f9b3ca90839d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 16:39:12 -0700 Subject: [PATCH 18/38] target/sparc: Implement FPADD64, FPSUB64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 2 ++ target/sparc/translate.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index febd1a4a13..70ca41a69a 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -441,10 +441,12 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPADD16s 10 ..... 110110 ..... 0 0101 0001 ..... @r_r_r FPADD32 10 ..... 110110 ..... 0 0101 0010 ..... @d_d_d FPADD32s 10 ..... 110110 ..... 0 0101 0011 ..... @r_r_r + FPADD64 10 ..... 110110 ..... 0 0100 0010 ..... @d_d_d FPSUB16 10 ..... 110110 ..... 0 0101 0100 ..... @d_d_d FPSUB16s 10 ..... 110110 ..... 0 0101 0101 ..... @r_r_r FPSUB32 10 ..... 110110 ..... 0 0101 0110 ..... @d_d_d FPSUB32s 10 ..... 110110 ..... 0 0101 0111 ..... @r_r_r + FPSUB64 10 ..... 110110 ..... 0 0100 0110 ..... @d_d_d FNORd 10 ..... 110110 ..... 0 0110 0010 ..... @d_d_d FNORs 10 ..... 110110 ..... 0 0110 0011 ..... @r_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index c3956f489b..48cab59c07 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -4912,6 +4912,9 @@ TRANS(FHADDd, VIS3, do_ddd, a, gen_op_fhaddd) TRANS(FHSUBd, VIS3, do_ddd, a, gen_op_fhsubd) TRANS(FNHADDd, VIS3, do_ddd, a, gen_op_fnhaddd) +TRANS(FPADD64, VIS3B, do_ddd, a, tcg_gen_add_i64) +TRANS(FPSUB64, VIS3B, do_ddd, a, tcg_gen_sub_i64) + static bool do_rdd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv, TCGv_i64, TCGv_i64)) { From 0d1d3aaf6405f9ecf67af886c06f1f710b046563 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 20:54:48 -0700 Subject: [PATCH 19/38] target/sparc: Implement FPADDS, FPSUBS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 9 +++++ target/sparc/translate.c | 82 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 70ca41a69a..b6553362eb 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -448,6 +448,15 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPSUB32s 10 ..... 110110 ..... 0 0101 0111 ..... @r_r_r FPSUB64 10 ..... 110110 ..... 0 0100 0110 ..... @d_d_d + FPADDS16 10 ..... 110110 ..... 0 0101 1000 ..... @d_d_d + FPADDS16s 10 ..... 110110 ..... 0 0101 1001 ..... @r_r_r + FPADDS32 10 ..... 110110 ..... 0 0101 1010 ..... @d_d_d + FPADDS32s 10 ..... 110110 ..... 0 0101 1011 ..... @r_r_r + FPSUBS16 10 ..... 110110 ..... 0 0101 1100 ..... @d_d_d + FPSUBS16s 10 ..... 110110 ..... 0 0101 1101 ..... @r_r_r + FPSUBS32 10 ..... 110110 ..... 0 0101 1110 ..... @d_d_d + FPSUBS32s 10 ..... 110110 ..... 0 0101 1111 ..... @r_r_r + FNORd 10 ..... 110110 ..... 0 0110 0010 ..... @d_d_d FNORs 10 ..... 110110 ..... 0 0110 0011 ..... @r_r_r FANDNOTd 10 ..... 110110 ..... 0 0110 0100 ..... @d_d_d # FANDNOT2d diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 48cab59c07..7a5e8e0a9a 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -698,6 +698,78 @@ static void gen_op_fpack32(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) #endif } +static void gen_op_fpadds16s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 t[2]; + + for (int i = 0; i < 2; i++) { + TCGv_i32 u = tcg_temp_new_i32(); + TCGv_i32 v = tcg_temp_new_i32(); + + tcg_gen_sextract_i32(u, src1, i * 16, 16); + tcg_gen_sextract_i32(v, src2, i * 16, 16); + tcg_gen_add_i32(u, u, v); + tcg_gen_smax_i32(u, u, tcg_constant_i32(INT16_MIN)); + tcg_gen_smin_i32(u, u, tcg_constant_i32(INT16_MAX)); + t[i] = u; + } + tcg_gen_deposit_i32(d, t[0], t[1], 16, 16); +} + +static void gen_op_fpsubs16s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 t[2]; + + for (int i = 0; i < 2; i++) { + TCGv_i32 u = tcg_temp_new_i32(); + TCGv_i32 v = tcg_temp_new_i32(); + + tcg_gen_sextract_i32(u, src1, i * 16, 16); + tcg_gen_sextract_i32(v, src2, i * 16, 16); + tcg_gen_sub_i32(u, u, v); + tcg_gen_smax_i32(u, u, tcg_constant_i32(INT16_MIN)); + tcg_gen_smin_i32(u, u, tcg_constant_i32(INT16_MAX)); + t[i] = u; + } + tcg_gen_deposit_i32(d, t[0], t[1], 16, 16); +} + +static void gen_op_fpadds32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 r = tcg_temp_new_i32(); + TCGv_i32 t = tcg_temp_new_i32(); + TCGv_i32 v = tcg_temp_new_i32(); + TCGv_i32 z = tcg_constant_i32(0); + + tcg_gen_add_i32(r, src1, src2); + tcg_gen_xor_i32(t, src1, src2); + tcg_gen_xor_i32(v, r, src2); + tcg_gen_andc_i32(v, v, t); + + tcg_gen_setcond_i32(TCG_COND_GE, t, r, z); + tcg_gen_addi_i32(t, t, INT32_MAX); + + tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r); +} + +static void gen_op_fpsubs32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 r = tcg_temp_new_i32(); + TCGv_i32 t = tcg_temp_new_i32(); + TCGv_i32 v = tcg_temp_new_i32(); + TCGv_i32 z = tcg_constant_i32(0); + + tcg_gen_sub_i32(r, src1, src2); + tcg_gen_xor_i32(t, src1, src2); + tcg_gen_xor_i32(v, r, src1); + tcg_gen_and_i32(v, v, t); + + tcg_gen_setcond_i32(TCG_COND_GE, t, r, z); + tcg_gen_addi_i32(t, t, INT32_MAX); + + tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r); +} + static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) { #ifdef TARGET_SPARC64 @@ -4788,6 +4860,11 @@ TRANS(FHADDs, VIS3, do_fff, a, gen_op_fhadds) TRANS(FHSUBs, VIS3, do_fff, a, gen_op_fhsubs) TRANS(FNHADDs, VIS3, do_fff, a, gen_op_fnhadds) +TRANS(FPADDS16s, VIS3, do_fff, a, gen_op_fpadds16s) +TRANS(FPSUBS16s, VIS3, do_fff, a, gen_op_fpsubs16s) +TRANS(FPADDS32s, VIS3, do_fff, a, gen_op_fpadds32s) +TRANS(FPSUBS32s, VIS3, do_fff, a, gen_op_fpsubs32s) + static bool do_env_fff(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32)) { @@ -4875,6 +4952,11 @@ TRANS(FPSUB32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sub) TRANS(FCHKSM16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fchksm16) TRANS(FMEAN16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fmean16) +TRANS(FPADDS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ssadd) +TRANS(FPADDS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_ssadd) +TRANS(FPSUBS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sssub) +TRANS(FPSUBS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sssub) + static bool do_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64)) { From 669e077437d5782682e2ac4f1082fcbf102b5680 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 16:53:28 -0700 Subject: [PATCH 20/38] target/sparc: Implement FPCMPEQ8, FPCMPNE8, FPCMPULE8, FPCMPUGT8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/helper.h | 4 ++++ target/sparc/insns.decode | 5 +++++ target/sparc/translate.c | 9 +++++++++ target/sparc/vis_helper.c | 40 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/target/sparc/helper.h b/target/sparc/helper.h index f1b84dc9b3..ed295c01e0 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -128,6 +128,10 @@ VIS_CMPHELPER(cmpgt) VIS_CMPHELPER(cmpeq) VIS_CMPHELPER(cmple) VIS_CMPHELPER(cmpne) +DEF_HELPER_FLAGS_2(fcmpeq8, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fcmpne8, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fcmpule8, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fcmpugt8, TCG_CALL_NO_RWG_SE, i64, i64, i64) #endif #undef VIS_HELPER #undef VIS_CMPHELPER diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index b6553362eb..295fc36128 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -408,6 +408,11 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPCMPGT32 10 ..... 110110 ..... 0 0010 1100 ..... @r_d_d FPCMPEQ32 10 ..... 110110 ..... 0 0010 1110 ..... @r_d_d + FPCMPULE8 10 ..... 110110 ..... 1 0010 0000 ..... @r_d_d + FPCMPUGT8 10 ..... 110110 ..... 1 0010 1000 ..... @r_d_d + FPCMPNE8 10 ..... 110110 ..... 1 0010 0010 ..... @r_d_d + FPCMPEQ8 10 ..... 110110 ..... 1 0010 1010 ..... @r_d_d + FMUL8x16 10 ..... 110110 ..... 0 0011 0001 ..... @d_r_d FMUL8x16AU 10 ..... 110110 ..... 0 0011 0011 ..... @d_r_r FMUL8x16AL 10 ..... 110110 ..... 0 0011 0101 ..... @d_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 7a5e8e0a9a..1da40f6db2 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -64,14 +64,18 @@ # define gen_helper_cmask8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_cmask16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_cmask32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpeq8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpeq16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpeq32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpgt16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpgt32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmple16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmple32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpne8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpne16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpne32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpule8 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpugt8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fdtox ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fexpand ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; }) @@ -5025,6 +5029,11 @@ TRANS(FPCMPNE32, VIS1, do_rdd, a, gen_helper_fcmpne32) TRANS(FPCMPGT32, VIS1, do_rdd, a, gen_helper_fcmpgt32) TRANS(FPCMPEQ32, VIS1, do_rdd, a, gen_helper_fcmpeq32) +TRANS(FPCMPEQ8, VIS3B, do_rdd, a, gen_helper_fcmpeq8) +TRANS(FPCMPNE8, VIS3B, do_rdd, a, gen_helper_fcmpne8) +TRANS(FPCMPULE8, VIS3B, do_rdd, a, gen_helper_fcmpule8) +TRANS(FPCMPUGT8, VIS3B, do_rdd, a, gen_helper_fcmpugt8) + static bool do_env_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64)) { diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index 6ef36755c3..5a5da17132 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -238,6 +238,46 @@ VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) VIS_CMPHELPER(helper_fcmple, FCMPLE) VIS_CMPHELPER(helper_fcmpne, FCMPNE) +uint64_t helper_fcmpeq8(uint64_t src1, uint64_t src2) +{ + uint64_t a = src1 ^ src2; + uint64_t m = 0x7f7f7f7f7f7f7f7fULL; + uint64_t c = ~(((a & m) + m) | a | m); + + /* a.......b.......c.......d.......e.......f.......g.......h....... */ + c |= c << 7; + /* ab......bc......cd......de......ef......fg......gh......h....... */ + c |= c << 14; + /* abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... */ + c |= c << 28; + /* abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... */ + return c >> 56; +} + +uint64_t helper_fcmpne8(uint64_t src1, uint64_t src2) +{ + return helper_fcmpeq8(src1, src2) ^ 0xff; +} + +uint64_t helper_fcmpule8(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 8; ++i) { + r |= (s1.VIS_B64(i) <= s2.VIS_B64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpugt8(uint64_t src1, uint64_t src2) +{ + return helper_fcmpule8(src1, src2) ^ 0xff; +} + uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) { int i; From fbc5c8d4e8f10fdb780c450aa49b503e6d592cc6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 17:24:01 -0700 Subject: [PATCH 21/38] target/sparc: Implement FSLL, FSRL, FSRA, FSLAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/helper.h | 2 ++ target/sparc/insns.decode | 9 +++++++++ target/sparc/translate.c | 11 +++++++++++ target/sparc/vis_helper.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/target/sparc/helper.h b/target/sparc/helper.h index ed295c01e0..219f0e04c7 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -119,6 +119,8 @@ DEF_HELPER_FLAGS_2(cmask16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(cmask32, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fchksm16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fmean16, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fslas16, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fslas32, TCG_CALL_NO_RWG_SE, i64, i64, i64) #define VIS_CMPHELPER(name) \ DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE, \ i64, i64, i64) \ diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 295fc36128..a5eefebfbc 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -408,6 +408,15 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPCMPGT32 10 ..... 110110 ..... 0 0010 1100 ..... @r_d_d FPCMPEQ32 10 ..... 110110 ..... 0 0010 1110 ..... @r_d_d + FSLL16 10 ..... 110110 ..... 0 0010 0001 ..... @d_d_d + FSRL16 10 ..... 110110 ..... 0 0010 0011 ..... @d_d_d + FSLAS16 10 ..... 110110 ..... 0 0010 1001 ..... @d_d_d + FSRA16 10 ..... 110110 ..... 0 0010 1011 ..... @d_d_d + FSLL32 10 ..... 110110 ..... 0 0010 0101 ..... @d_d_d + FSRL32 10 ..... 110110 ..... 0 0010 0111 ..... @d_d_d + FSLAS32 10 ..... 110110 ..... 0 0010 1101 ..... @d_d_d + FSRA32 10 ..... 110110 ..... 0 0010 1111 ..... @d_d_d + FPCMPULE8 10 ..... 110110 ..... 1 0010 0000 ..... @r_d_d FPCMPUGT8 10 ..... 110110 ..... 1 0010 1000 ..... @r_d_d FPCMPNE8 10 ..... 110110 ..... 1 0010 0010 ..... @r_d_d diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 1da40f6db2..e144217347 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -83,6 +83,8 @@ # define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fpmerge ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fqtox ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fslas16 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fslas32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fstox ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fxtod ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fxtoq ({ qemu_build_not_reached(); NULL; }) @@ -4961,6 +4963,13 @@ TRANS(FPADDS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_ssadd) TRANS(FPSUBS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sssub) TRANS(FPSUBS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sssub) +TRANS(FSLL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shlv) +TRANS(FSLL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shlv) +TRANS(FSRL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shrv) +TRANS(FSRL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shrv) +TRANS(FSRA16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sarv) +TRANS(FSRA32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sarv) + static bool do_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64)) { @@ -5000,6 +5009,8 @@ TRANS(FNHADDd, VIS3, do_ddd, a, gen_op_fnhaddd) TRANS(FPADD64, VIS3B, do_ddd, a, tcg_gen_add_i64) TRANS(FPSUB64, VIS3B, do_ddd, a, tcg_gen_sub_i64) +TRANS(FSLAS16, VIS3, do_ddd, a, gen_helper_fslas16) +TRANS(FSLAS32, VIS3, do_ddd, a, gen_helper_fslas32) static bool do_rdd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv, TCGv_i64, TCGv_i64)) diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index 5a5da17132..c21522c533 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -473,3 +473,39 @@ uint64_t helper_fmean16(uint64_t src1, uint64_t src2) return r.ll; } + +uint64_t helper_fslas16(uint64_t src1, uint64_t src2) +{ + VIS64 r, s1, s2; + + s1.ll = src1; + s2.ll = src2; + r.ll = 0; + + for (int i = 0; i < 4; ++i) { + int t = s1.VIS_SW64(i) << (s2.VIS_W64(i) % 16); + t = MIN(t, INT16_MAX); + t = MAX(t, INT16_MIN); + r.VIS_SW64(i) = t; + } + + return r.ll; +} + +uint64_t helper_fslas32(uint64_t src1, uint64_t src2) +{ + VIS64 r, s1, s2; + + s1.ll = src1; + s2.ll = src2; + r.ll = 0; + + for (int i = 0; i < 2; ++i) { + int64_t t = (int64_t)(int32_t)s1.VIS_L64(i) << (s2.VIS_L64(i) % 32); + t = MIN(t, INT32_MAX); + t = MAX(t, INT32_MIN); + r.VIS_L64(i) = t; + } + + return r.ll; +} From 298c52f784ac0f6c09a4621609ac9008b7fa15f9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 17:38:34 -0700 Subject: [PATCH 22/38] target/sparc: Implement LDXEFSR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/fop_helper.c | 6 ++++++ target/sparc/helper.h | 1 + target/sparc/insns.decode | 1 + target/sparc/translate.c | 11 +++++++++-- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index 1b524c6d3c..0b30665b51 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -602,3 +602,9 @@ void helper_set_fsr_nofcc_noftt(CPUSPARCState *env, uint32_t fsr) env->fsr_cexc_ftt |= fsr & FSR_CEXC_MASK; set_fsr_nonsplit(env, fsr); } + +void helper_set_fsr_nofcc(CPUSPARCState *env, uint32_t fsr) +{ + env->fsr_cexc_ftt = fsr & (FSR_CEXC_MASK | FSR_FTT_MASK); + set_fsr_nonsplit(env, fsr); +} diff --git a/target/sparc/helper.h b/target/sparc/helper.h index 219f0e04c7..4ae97866af 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -40,6 +40,7 @@ DEF_HELPER_FLAGS_4(ld_asi, TCG_CALL_NO_WG, i64, env, tl, int, i32) DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32) #endif DEF_HELPER_FLAGS_1(get_fsr, TCG_CALL_NO_WG_SE, tl, env) +DEF_HELPER_FLAGS_2(set_fsr_nofcc, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_2(set_fsr_nofcc_noftt, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_2(fsqrts, TCG_CALL_NO_WG, f32, env, f32) DEF_HELPER_FLAGS_2(fsqrtd, TCG_CALL_NO_WG, f64, env, f64) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index a5eefebfbc..fec055910e 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -589,6 +589,7 @@ STX 11 ..... 011110 ..... . ............. @r_r_i_asi # STXA LDF 11 ..... 100000 ..... . ............. @r_r_ri_na LDFSR 11 00000 100001 ..... . ............. @n_r_ri LDXFSR 11 00001 100001 ..... . ............. @n_r_ri +LDXEFSR 11 00011 100001 ..... . ............. @n_r_ri LDQF 11 ..... 100010 ..... . ............. @q_r_ri_na LDDF 11 ..... 100011 ..... . ............. @d_r_ri_na diff --git a/target/sparc/translate.c b/target/sparc/translate.c index e144217347..c40992df96 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -4458,7 +4458,7 @@ static bool trans_LDFSR(DisasContext *dc, arg_r_r_ri *a) return advance_pc(dc); } -static bool trans_LDXFSR(DisasContext *dc, arg_r_r_ri *a) +static bool do_ldxfsr(DisasContext *dc, arg_r_r_ri *a, bool entire) { #ifdef TARGET_SPARC64 TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm); @@ -4483,13 +4483,20 @@ static bool trans_LDXFSR(DisasContext *dc, arg_r_r_ri *a) tcg_gen_extract_i32(cpu_fcc[2], hi, FSR_FCC2_SHIFT - 32, 2); tcg_gen_extract_i32(cpu_fcc[3], hi, FSR_FCC3_SHIFT - 32, 2); - gen_helper_set_fsr_nofcc_noftt(tcg_env, lo); + if (entire) { + gen_helper_set_fsr_nofcc(tcg_env, lo); + } else { + gen_helper_set_fsr_nofcc_noftt(tcg_env, lo); + } return advance_pc(dc); #else return false; #endif } +TRANS(LDXFSR, 64, do_ldxfsr, a, false) +TRANS(LDXEFSR, VIS3B, do_ldxfsr, a, true) + static bool do_stfsr(DisasContext *dc, arg_r_r_ri *a, MemOp mop) { TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm); From 875ce3929aef9d0dcee67b506991ea84e505729b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 17:48:25 -0700 Subject: [PATCH 23/38] target/sparc: Implement LZCNT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 1 + target/sparc/translate.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index fec055910e..4766964893 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -389,6 +389,7 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ ADDXC 10 ..... 110110 ..... 0 0001 0001 ..... @r_r_r ADDXCcc 10 ..... 110110 ..... 0 0001 0011 ..... @r_r_r + LZCNT 10 ..... 110110 00000 0 0001 0111 ..... @r_r2 ALIGNADDR 10 ..... 110110 ..... 0 0001 1000 ..... @r_r_r ALIGNADDRL 10 ..... 110110 ..... 0 0001 1010 ..... @r_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index c40992df96..a12cc9f796 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -658,6 +658,11 @@ static void gen_op_popc(TCGv dst, TCGv src1, TCGv src2) tcg_gen_ctpop_tl(dst, src2); } +static void gen_op_lzcnt(TCGv dst, TCGv src) +{ + tcg_gen_clzi_tl(dst, src, TARGET_LONG_BITS); +} + #ifndef TARGET_SPARC64 static void gen_helper_array8(TCGv dst, TCGv src1, TCGv src2) { @@ -3873,6 +3878,19 @@ TRANS(EDGE16LN, VIS2, gen_edge, a, 16, 0, 1) TRANS(EDGE32N, VIS2, gen_edge, a, 32, 0, 0) TRANS(EDGE32LN, VIS2, gen_edge, a, 32, 0, 1) +static bool do_rr(DisasContext *dc, arg_r_r *a, + void (*func)(TCGv, TCGv)) +{ + TCGv dst = gen_dest_gpr(dc, a->rd); + TCGv src = gen_load_gpr(dc, a->rs); + + func(dst, src); + gen_store_gpr(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(LZCNT, VIS3, do_rr, a, gen_op_lzcnt) + static bool do_rrr(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv, TCGv, TCGv)) { From 09b157e6283d02e02ec9f47d8d4a2fd0cd8612ce Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 18:07:35 -0700 Subject: [PATCH 24/38] target/sparc: Implement MOVsTOw, MOVdTOx, MOVwTOs, MOVxTOd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 6 ++++++ target/sparc/translate.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 4766964893..e0e9248982 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -498,6 +498,12 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FONEd 10 ..... 110110 00000 0 0111 1110 00000 rd=%dfp_rd FONEs 10 rd:5 110110 00000 0 0111 1111 00000 + MOVsTOuw 10 ..... 110110 00000 1 0001 0001 ..... @r_r2 + MOVsTOsw 10 ..... 110110 00000 1 0001 0011 ..... @r_r2 + MOVwTOs 10 ..... 110110 00000 1 0001 1001 ..... @r_r2 + MOVdTOx 10 ..... 110110 00000 1 0001 0000 ..... @r_d2 + MOVxTOd 10 ..... 110110 00000 1 0001 1000 ..... @d_r2 + FLCMPs 10 000 cc:2 110110 rs1:5 1 0101 0001 rs2:5 FLCMPd 10 000 cc:2 110110 ..... 1 0101 0010 ..... \ rs1=%dfp_rs1 rs2=%dfp_rs2 diff --git a/target/sparc/translate.c b/target/sparc/translate.c index a12cc9f796..496d490cdd 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -5401,6 +5401,42 @@ static bool trans_FLCMPd(DisasContext *dc, arg_FLCMPd *a) return advance_pc(dc); } +static bool do_movf2r(DisasContext *dc, arg_r_r *a, + int (*offset)(unsigned int), + void (*load)(TCGv, TCGv_ptr, tcg_target_long)) +{ + TCGv dst; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + dst = gen_dest_gpr(dc, a->rd); + load(dst, tcg_env, offset(a->rs)); + gen_store_gpr(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(MOVsTOsw, VIS3B, do_movf2r, a, gen_offset_fpr_F, tcg_gen_ld32s_tl) +TRANS(MOVsTOuw, VIS3B, do_movf2r, a, gen_offset_fpr_F, tcg_gen_ld32u_tl) +TRANS(MOVdTOx, VIS3B, do_movf2r, a, gen_offset_fpr_D, tcg_gen_ld_tl) + +static bool do_movr2f(DisasContext *dc, arg_r_r *a, + int (*offset)(unsigned int), + void (*store)(TCGv, TCGv_ptr, tcg_target_long)) +{ + TCGv src; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + src = gen_load_gpr(dc, a->rs); + store(src, tcg_env, offset(a->rd)); + return advance_pc(dc); +} + +TRANS(MOVwTOs, VIS3B, do_movr2f, a, gen_offset_fpr_F, tcg_gen_st32_tl) +TRANS(MOVxTOd, VIS3B, do_movr2f, a, gen_offset_fpr_D, tcg_gen_st_tl) + static void sparc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *dc = container_of(dcbase, DisasContext, base); From 7d5ebd8ffe241c57d6857ae75b67de6c6af3429c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 18:16:44 -0700 Subject: [PATCH 25/38] target/sparc: Implement PDISTN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 1 + target/sparc/translate.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index e0e9248982..09c8adca37 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -435,6 +435,7 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPACKFIX 10 ..... 110110 00000 0 0011 1101 ..... @r_d2 PDIST 10 ..... 110110 ..... 0 0011 1110 ..... \ &r_r_r_r rd=%dfp_rd rs1=%dfp_rd rs2=%dfp_rs1 rs3=%dfp_rs2 + PDISTN 10 ..... 110110 ..... 0 0011 1111 ..... @r_d_d FMEAN16 10 ..... 110110 ..... 0 0100 0000 ..... @d_d_d FCHKSM16 10 ..... 110110 ..... 0 0100 0100 ..... @d_d_d diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 496d490cdd..2480eed1e7 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -817,6 +817,15 @@ static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) #endif } +static void gen_op_pdistn(TCGv dst, TCGv_i64 src1, TCGv_i64 src2) +{ +#ifdef TARGET_SPARC64 + gen_helper_pdist(dst, tcg_constant_i64(0), src1, src2); +#else + g_assert_not_reached(); +#endif +} + static void gen_op_fmul8x16al(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) { tcg_gen_ext16s_i32(src2, src2); @@ -5070,6 +5079,8 @@ TRANS(FPCMPNE8, VIS3B, do_rdd, a, gen_helper_fcmpne8) TRANS(FPCMPULE8, VIS3B, do_rdd, a, gen_helper_fcmpule8) TRANS(FPCMPUGT8, VIS3B, do_rdd, a, gen_helper_fcmpugt8) +TRANS(PDISTN, VIS3, do_rdd, a, gen_op_pdistn) + static bool do_env_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64)) { From 680af1b4a523cd7f15690cad7afb069e6efbbec0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 18:22:42 -0700 Subject: [PATCH 26/38] target/sparc: Implement UMULXHI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 1 + target/sparc/translate.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 09c8adca37..508175eccd 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -389,6 +389,7 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ ADDXC 10 ..... 110110 ..... 0 0001 0001 ..... @r_r_r ADDXCcc 10 ..... 110110 ..... 0 0001 0011 ..... @r_r_r + UMULXHI 10 ..... 110110 ..... 0 0001 0110 ..... @r_r_r LZCNT 10 ..... 110110 00000 0 0001 0111 ..... @r_r2 ALIGNADDR 10 ..... 110110 ..... 0 0001 1000 ..... @r_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 2480eed1e7..2a38152f58 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -581,6 +581,12 @@ static void gen_op_smul(TCGv dst, TCGv src1, TCGv src2) gen_op_multiply(dst, src1, src2, 1); } +static void gen_op_umulxhi(TCGv dst, TCGv src1, TCGv src2) +{ + TCGv discard = tcg_temp_new(); + tcg_gen_mulu2_tl(discard, dst, src1, src2); +} + static void gen_op_sdiv(TCGv dst, TCGv src1, TCGv src2) { #ifdef TARGET_SPARC64 @@ -3919,6 +3925,8 @@ TRANS(ARRAY32, VIS1, do_rrr, a, gen_op_array32) TRANS(ADDXC, VIS3, do_rrr, a, gen_op_addxc) TRANS(ADDXCcc, VIS3, do_rrr, a, gen_op_addxccc) +TRANS(UMULXHI, VIS3, do_rrr, a, gen_op_umulxhi) + static void gen_op_alignaddr(TCGv dst, TCGv s1, TCGv s2) { #ifdef TARGET_SPARC64 From 029b0283dfe64c38e48f9dd9aad0dc6d254c4ea4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 18:31:59 -0700 Subject: [PATCH 27/38] target/sparc: Implement XMULX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/helper.h | 2 ++ target/sparc/insns.decode | 2 ++ target/sparc/translate.c | 4 ++++ target/sparc/vis_helper.c | 11 +++++++++++ 4 files changed, 19 insertions(+) diff --git a/target/sparc/helper.h b/target/sparc/helper.h index 4ae97866af..fe0d8bc593 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -135,6 +135,8 @@ DEF_HELPER_FLAGS_2(fcmpeq8, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fcmpne8, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fcmpule8, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fcmpugt8, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(xmulx, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(xmulxhi, TCG_CALL_NO_RWG_SE, i64, i64, i64) #endif #undef VIS_HELPER #undef VIS_CMPHELPER diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 508175eccd..1d54de5367 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -391,6 +391,8 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ ADDXCcc 10 ..... 110110 ..... 0 0001 0011 ..... @r_r_r UMULXHI 10 ..... 110110 ..... 0 0001 0110 ..... @r_r_r LZCNT 10 ..... 110110 00000 0 0001 0111 ..... @r_r2 + XMULX 10 ..... 110110 ..... 1 0001 0101 ..... @r_r_r + XMULXHI 10 ..... 110110 ..... 1 0001 0110 ..... @r_r_r ALIGNADDR 10 ..... 110110 ..... 0 0001 1000 ..... @r_r_r ALIGNADDRL 10 ..... 110110 ..... 0 0001 1010 ..... @r_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 2a38152f58..971e7dae80 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -90,6 +90,8 @@ # define gen_helper_fxtoq ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fxtos ({ qemu_build_not_reached(); NULL; }) # define gen_helper_pdist ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_xmulx ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_xmulxhi ({ qemu_build_not_reached(); NULL; }) # define MAXTL_MASK 0 #endif @@ -5088,6 +5090,8 @@ TRANS(FPCMPULE8, VIS3B, do_rdd, a, gen_helper_fcmpule8) TRANS(FPCMPUGT8, VIS3B, do_rdd, a, gen_helper_fcmpugt8) TRANS(PDISTN, VIS3, do_rdd, a, gen_op_pdistn) +TRANS(XMULX, VIS3, do_rrr, a, gen_helper_xmulx) +TRANS(XMULXHI, VIS3, do_rrr, a, gen_helper_xmulxhi) static bool do_env_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64)) diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index c21522c533..c927a054b8 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" +#include "crypto/clmul.h" target_ulong helper_array8(target_ulong rs1, target_ulong rs2) { @@ -509,3 +510,13 @@ uint64_t helper_fslas32(uint64_t src1, uint64_t src2) return r.ll; } + +uint64_t helper_xmulx(uint64_t src1, uint64_t src2) +{ + return int128_getlo(clmul_64(src1, src2)); +} + +uint64_t helper_xmulxhi(uint64_t src1, uint64_t src2) +{ + return int128_gethi(clmul_64(src1, src2)); +} From deadbb14ba7a9cbdabbd102f7bf470c0baf9f25a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 18:49:08 -0700 Subject: [PATCH 28/38] target/sparc: Enable VIS3 feature bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/elfload.c | 1 + target/sparc/cpu.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 6a1457346a..cb79580431 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1004,6 +1004,7 @@ static uint32_t get_elf_hwcap(void) r |= features & CPU_FEATURE_VIS1 ? HWCAP_SPARC_VIS : 0; r |= features & CPU_FEATURE_VIS2 ? HWCAP_SPARC_VIS2 : 0; r |= features & CPU_FEATURE_FMAF ? HWCAP_SPARC_FMAF : 0; + r |= features & CPU_FEATURE_VIS3 ? HWCAP_SPARC_VIS3 : 0; #endif return r; diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index ed9238a69d..8ea977b49f 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -550,6 +550,7 @@ static const char * const feature_name[] = { [CPU_FEATURE_BIT_VIS1] = "vis1", [CPU_FEATURE_BIT_VIS2] = "vis2", [CPU_FEATURE_BIT_FMAF] = "fmaf", + [CPU_FEATURE_BIT_VIS3] = "vis3", #else [CPU_FEATURE_BIT_MUL] = "mul", [CPU_FEATURE_BIT_DIV] = "div", @@ -880,6 +881,8 @@ static Property sparc_cpu_properties[] = { CPU_FEATURE_BIT_VIS2, false), DEFINE_PROP_BIT("fmaf", SPARCCPU, env.def.features, CPU_FEATURE_BIT_FMAF, false), + DEFINE_PROP_BIT("vis3", SPARCCPU, env.def.features, + CPU_FEATURE_BIT_VIS3, false), #else DEFINE_PROP_BIT("mul", SPARCCPU, env.def.features, CPU_FEATURE_BIT_MUL, false), From 68a414e99d438ff5e3e598d140c8f81638a8ea9e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 19:40:36 -0700 Subject: [PATCH 29/38] target/sparc: Implement IMA extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/elfload.c | 1 + target/sparc/cpu-feature.h.inc | 1 + target/sparc/cpu.c | 3 +++ target/sparc/insns.decode | 3 +++ target/sparc/translate.c | 24 ++++++++++++++++++++++++ 5 files changed, 32 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index cb79580431..0d4dc1f6d1 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1005,6 +1005,7 @@ static uint32_t get_elf_hwcap(void) r |= features & CPU_FEATURE_VIS2 ? HWCAP_SPARC_VIS2 : 0; r |= features & CPU_FEATURE_FMAF ? HWCAP_SPARC_FMAF : 0; r |= features & CPU_FEATURE_VIS3 ? HWCAP_SPARC_VIS3 : 0; + r |= features & CPU_FEATURE_IMA ? HWCAP_SPARC_IMA : 0; #endif return r; diff --git a/target/sparc/cpu-feature.h.inc b/target/sparc/cpu-feature.h.inc index 3913fb4a54..e2e6de9144 100644 --- a/target/sparc/cpu-feature.h.inc +++ b/target/sparc/cpu-feature.h.inc @@ -14,3 +14,4 @@ FEATURE(POWERDOWN) FEATURE(CASA) FEATURE(FMAF) FEATURE(VIS3) +FEATURE(IMA) diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 8ea977b49f..88da5254e8 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -551,6 +551,7 @@ static const char * const feature_name[] = { [CPU_FEATURE_BIT_VIS2] = "vis2", [CPU_FEATURE_BIT_FMAF] = "fmaf", [CPU_FEATURE_BIT_VIS3] = "vis3", + [CPU_FEATURE_BIT_IMA] = "ima", #else [CPU_FEATURE_BIT_MUL] = "mul", [CPU_FEATURE_BIT_DIV] = "div", @@ -883,6 +884,8 @@ static Property sparc_cpu_properties[] = { CPU_FEATURE_BIT_FMAF, false), DEFINE_PROP_BIT("vis3", SPARCCPU, env.def.features, CPU_FEATURE_BIT_VIS3, false), + DEFINE_PROP_BIT("ima", SPARCCPU, env.def.features, + CPU_FEATURE_BIT_IMA, false), #else DEFINE_PROP_BIT("mul", SPARCCPU, env.def.features, CPU_FEATURE_BIT_MUL, false), diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 1d54de5367..5d85e124ed 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -525,6 +525,9 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FNMSUBd 10 ..... 110111 ..... ..... 1010 ..... @d_d_d_d FNMADDs 10 ..... 110111 ..... ..... 1101 ..... @r_r_r_r FNMADDd 10 ..... 110111 ..... ..... 1110 ..... @d_d_d_d + + FPMADDX 10 ..... 110111 ..... ..... 0000 ..... @d_d_d_d + FPMADDXHI 10 ..... 110111 ..... ..... 0100 ..... @d_d_d_d ] NCP 10 ----- 110111 ----- --------- ----- # v8 CPop2 } diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 971e7dae80..640406570d 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -589,6 +589,26 @@ static void gen_op_umulxhi(TCGv dst, TCGv src1, TCGv src2) tcg_gen_mulu2_tl(discard, dst, src1, src2); } +static void gen_op_fpmaddx(TCGv_i64 dst, TCGv_i64 src1, + TCGv_i64 src2, TCGv_i64 src3) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_mul_i64(t, src1, src2); + tcg_gen_add_i64(dst, src3, t); +} + +static void gen_op_fpmaddxhi(TCGv_i64 dst, TCGv_i64 src1, + TCGv_i64 src2, TCGv_i64 src3) +{ + TCGv_i64 l = tcg_temp_new_i64(); + TCGv_i64 h = tcg_temp_new_i64(); + TCGv_i64 z = tcg_constant_i64(0); + + tcg_gen_mulu2_i64(l, h, src1, src2); + tcg_gen_add2_i64(l, dst, l, h, src3, z); +} + static void gen_op_sdiv(TCGv dst, TCGv src1, TCGv src2) { #ifdef TARGET_SPARC64 @@ -2405,6 +2425,7 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_FMAF(C) ((C)->def->features & CPU_FEATURE_FMAF) # define avail_GL(C) ((C)->def->features & CPU_FEATURE_GL) # define avail_HYPV(C) ((C)->def->features & CPU_FEATURE_HYPV) +# define avail_IMA(C) ((C)->def->features & CPU_FEATURE_IMA) # define avail_VIS1(C) ((C)->def->features & CPU_FEATURE_VIS1) # define avail_VIS2(C) ((C)->def->features & CPU_FEATURE_VIS2) # define avail_VIS3(C) ((C)->def->features & CPU_FEATURE_VIS3) @@ -2420,6 +2441,7 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_FMAF(C) false # define avail_GL(C) false # define avail_HYPV(C) false +# define avail_IMA(C) false # define avail_VIS1(C) false # define avail_VIS2(C) false # define avail_VIS3(C) false @@ -5202,6 +5224,8 @@ TRANS(FMADDd, FMAF, do_dddd, a, gen_op_fmaddd) TRANS(FMSUBd, FMAF, do_dddd, a, gen_op_fmsubd) TRANS(FNMSUBd, FMAF, do_dddd, a, gen_op_fnmsubd) TRANS(FNMADDd, FMAF, do_dddd, a, gen_op_fnmaddd) +TRANS(FPMADDX, IMA, do_dddd, a, gen_op_fpmaddx) +TRANS(FPMADDXHI, IMA, do_dddd, a, gen_op_fpmaddxhi) static bool do_env_qqq(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i128, TCGv_env, TCGv_i128, TCGv_i128)) From 90b1433da8d51ecf0ab36d4c61eec949ee2fffbb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 19:44:28 -0700 Subject: [PATCH 30/38] target/sparc: Add feature bit for VIS4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/cpu-feature.h.inc | 1 + target/sparc/translate.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/target/sparc/cpu-feature.h.inc b/target/sparc/cpu-feature.h.inc index e2e6de9144..be81005237 100644 --- a/target/sparc/cpu-feature.h.inc +++ b/target/sparc/cpu-feature.h.inc @@ -15,3 +15,4 @@ FEATURE(CASA) FEATURE(FMAF) FEATURE(VIS3) FEATURE(IMA) +FEATURE(VIS4) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 640406570d..6fa0bb6ff5 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -2430,6 +2430,7 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_VIS2(C) ((C)->def->features & CPU_FEATURE_VIS2) # define avail_VIS3(C) ((C)->def->features & CPU_FEATURE_VIS3) # define avail_VIS3B(C) avail_VIS3(C) +# define avail_VIS4(C) ((C)->def->features & CPU_FEATURE_VIS4) #else # define avail_32(C) true # define avail_ASR17(C) ((C)->def->features & CPU_FEATURE_ASR17) @@ -2446,6 +2447,7 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_VIS2(C) false # define avail_VIS3(C) false # define avail_VIS3B(C) false +# define avail_VIS4(C) false #endif /* Default case for non jump instructions. */ From b2b48493362b9f77ca66fffb1464f7fc5a32c6e9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 20:00:10 -0700 Subject: [PATCH 31/38] target/sparc: Implement FALIGNDATAi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 1 + target/sparc/translate.c | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 5d85e124ed..0913fe7a86 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -446,6 +446,7 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @d_r_r BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @d_d_d FEXPAND 10 ..... 110110 00000 0 0100 1101 ..... @d_r2 + FALIGNDATAi 10 ..... 110110 ..... 0 0100 1001 ..... @d_r_d FSRCd 10 ..... 110110 ..... 0 0111 0100 00000 @d_d1 # FSRC1d FSRCs 10 ..... 110110 ..... 0 0111 0101 00000 @r_r1 # FSRC1s diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 6fa0bb6ff5..f51ce2ff2c 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -809,7 +809,8 @@ static void gen_op_fpsubs32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r); } -static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) +static void gen_op_faligndata_i(TCGv_i64 dst, TCGv_i64 s1, + TCGv_i64 s2, TCGv gsr) { #ifdef TARGET_SPARC64 TCGv t1, t2, shift; @@ -818,7 +819,7 @@ static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) t2 = tcg_temp_new(); shift = tcg_temp_new(); - tcg_gen_andi_tl(shift, cpu_gsr, 7); + tcg_gen_andi_tl(shift, gsr, 7); tcg_gen_shli_tl(shift, shift, 3); tcg_gen_shl_tl(t1, s1, shift); @@ -836,6 +837,11 @@ static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) #endif } +static void gen_op_faligndata_g(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) +{ + gen_op_faligndata_i(dst, s1, s2, cpu_gsr); +} + static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) { #ifdef TARGET_SPARC64 @@ -5068,7 +5074,7 @@ TRANS(FORNOTd, VIS1, do_ddd, a, tcg_gen_orc_i64) TRANS(FORd, VIS1, do_ddd, a, tcg_gen_or_i64) TRANS(FPACK32, VIS1, do_ddd, a, gen_op_fpack32) -TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata) +TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata_g) TRANS(BSHUFFLE, VIS2, do_ddd, a, gen_op_bshuffle) TRANS(FHADDd, VIS3, do_ddd, a, gen_op_fhaddd) @@ -5229,6 +5235,27 @@ TRANS(FNMADDd, FMAF, do_dddd, a, gen_op_fnmaddd) TRANS(FPMADDX, IMA, do_dddd, a, gen_op_fpmaddx) TRANS(FPMADDXHI, IMA, do_dddd, a, gen_op_fpmaddxhi) +static bool trans_FALIGNDATAi(DisasContext *dc, arg_r_r_r *a) +{ + TCGv_i64 dst, src1, src2; + TCGv src3; + + if (!avail_VIS4(dc)) { + return false; + } + if (gen_trap_ifnofpu(dc)) { + return true; + } + + dst = tcg_temp_new_i64(); + src1 = gen_load_fpr_D(dc, a->rd); + src2 = gen_load_fpr_D(dc, a->rs2); + src3 = gen_load_gpr(dc, a->rs1); + gen_op_faligndata_i(dst, src1, src2, src3); + gen_store_fpr_D(dc, a->rd, dst); + return advance_pc(dc); +} + static bool do_env_qqq(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i128, TCGv_env, TCGv_i128, TCGv_i128)) { From b99c1bbddd86d170029c3e7ad45bec55d603b927 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 21:04:27 -0700 Subject: [PATCH 32/38] target/sparc: Implement 8-bit FPADD, FPADDS, and FPADDUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 9 +++++++++ target/sparc/translate.c | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 0913fe7a86..80579642d1 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -509,6 +509,15 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ MOVdTOx 10 ..... 110110 00000 1 0001 0000 ..... @r_d2 MOVxTOd 10 ..... 110110 00000 1 0001 1000 ..... @d_r2 + FPADD8 10 ..... 110110 ..... 1 0010 0100 ..... @d_d_d + FPADDS8 10 ..... 110110 ..... 1 0010 0110 ..... @d_d_d + FPADDUS8 10 ..... 110110 ..... 1 0010 0111 ..... @d_d_d + FPADDUS16 10 ..... 110110 ..... 1 0010 0011 ..... @d_d_d + FPSUB8 10 ..... 110110 ..... 1 0101 0100 ..... @d_d_d + FPSUBS8 10 ..... 110110 ..... 1 0101 0110 ..... @d_d_d + FPSUBUS8 10 ..... 110110 ..... 1 0101 0111 ..... @d_d_d + FPSUBUS16 10 ..... 110110 ..... 1 0101 0011 ..... @d_d_d + FLCMPs 10 000 cc:2 110110 rs1:5 1 0101 0001 rs2:5 FLCMPd 10 000 cc:2 110110 ..... 1 0101 0010 ..... \ rs1=%dfp_rs1 rs2=%dfp_rs2 diff --git a/target/sparc/translate.c b/target/sparc/translate.c index f51ce2ff2c..973c0cec07 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -5025,17 +5025,28 @@ static bool do_gvec_ddd(DisasContext *dc, arg_r_r_r *a, MemOp vece, return advance_pc(dc); } +TRANS(FPADD8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_add) TRANS(FPADD16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_add) TRANS(FPADD32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_add) + +TRANS(FPSUB8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_sub) TRANS(FPSUB16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sub) TRANS(FPSUB32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sub) + TRANS(FCHKSM16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fchksm16) TRANS(FMEAN16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fmean16) +TRANS(FPADDS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_ssadd) TRANS(FPADDS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ssadd) TRANS(FPADDS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_ssadd) +TRANS(FPADDUS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_usadd) +TRANS(FPADDUS16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_usadd) + +TRANS(FPSUBS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_sssub) TRANS(FPSUBS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sssub) TRANS(FPSUBS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sssub) +TRANS(FPSUBUS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_ussub) +TRANS(FPSUBUS16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ussub) TRANS(FSLL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shlv) TRANS(FSLL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shlv) From b3c934dd3457810b5c0810b0d85cf58dda53d8cd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 21:31:54 -0700 Subject: [PATCH 33/38] target/sparc: Implement VIS4 comparisons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VIS4 completes the set, adding missing signed 8-bit ops and missing unsigned 16 and 32-bit ops. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/helper.h | 12 +-- target/sparc/insns.decode | 6 ++ target/sparc/translate.c | 12 +++ target/sparc/vis_helper.c | 170 +++++++++++++++++++++++++++++--------- 4 files changed, 153 insertions(+), 47 deletions(-) diff --git a/target/sparc/helper.h b/target/sparc/helper.h index fe0d8bc593..134e519a37 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -122,19 +122,19 @@ DEF_HELPER_FLAGS_2(fchksm16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fmean16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fslas16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fslas32, TCG_CALL_NO_RWG_SE, i64, i64, i64) -#define VIS_CMPHELPER(name) \ +#define VIS_CMPHELPER(name) \ + DEF_HELPER_FLAGS_2(f##name##8, TCG_CALL_NO_RWG_SE, \ + i64, i64, i64) \ DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE, \ - i64, i64, i64) \ + i64, i64, i64) \ DEF_HELPER_FLAGS_2(f##name##32, TCG_CALL_NO_RWG_SE, \ i64, i64, i64) VIS_CMPHELPER(cmpgt) VIS_CMPHELPER(cmpeq) VIS_CMPHELPER(cmple) VIS_CMPHELPER(cmpne) -DEF_HELPER_FLAGS_2(fcmpeq8, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fcmpne8, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fcmpule8, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fcmpugt8, TCG_CALL_NO_RWG_SE, i64, i64, i64) +VIS_CMPHELPER(cmpugt) +VIS_CMPHELPER(cmpule) DEF_HELPER_FLAGS_2(xmulx, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(xmulxhi, TCG_CALL_NO_RWG_SE, i64, i64, i64) #endif diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 80579642d1..be591171ad 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -425,6 +425,12 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPCMPUGT8 10 ..... 110110 ..... 1 0010 1000 ..... @r_d_d FPCMPNE8 10 ..... 110110 ..... 1 0010 0010 ..... @r_d_d FPCMPEQ8 10 ..... 110110 ..... 1 0010 1010 ..... @r_d_d + FPCMPLE8 10 ..... 110110 ..... 0 0011 0100 ..... @r_d_d + FPCMPGT8 10 ..... 110110 ..... 0 0011 1100 ..... @r_d_d + FPCMPULE16 10 ..... 110110 ..... 1 0010 1110 ..... @r_d_d + FPCMPUGT16 10 ..... 110110 ..... 1 0010 1011 ..... @r_d_d + FPCMPULE32 10 ..... 110110 ..... 1 0010 1111 ..... @r_d_d + FPCMPUGT32 10 ..... 110110 ..... 1 0010 1100 ..... @r_d_d FMUL8x16 10 ..... 110110 ..... 0 0011 0001 ..... @d_r_d FMUL8x16AU 10 ..... 110110 ..... 0 0011 0011 ..... @d_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 973c0cec07..e856c811af 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -67,15 +67,21 @@ # define gen_helper_fcmpeq8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpeq16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpeq32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpgt8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpgt16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpgt32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmple8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmple16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmple32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpne8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpne16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpne32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpule8 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpule16 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpule32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpugt8 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpugt16 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpugt32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fdtox ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fexpand ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; }) @@ -5119,16 +5125,22 @@ TRANS(FPCMPLE16, VIS1, do_rdd, a, gen_helper_fcmple16) TRANS(FPCMPNE16, VIS1, do_rdd, a, gen_helper_fcmpne16) TRANS(FPCMPGT16, VIS1, do_rdd, a, gen_helper_fcmpgt16) TRANS(FPCMPEQ16, VIS1, do_rdd, a, gen_helper_fcmpeq16) +TRANS(FPCMPULE16, VIS4, do_rdd, a, gen_helper_fcmpule16) +TRANS(FPCMPUGT16, VIS4, do_rdd, a, gen_helper_fcmpugt16) TRANS(FPCMPLE32, VIS1, do_rdd, a, gen_helper_fcmple32) TRANS(FPCMPNE32, VIS1, do_rdd, a, gen_helper_fcmpne32) TRANS(FPCMPGT32, VIS1, do_rdd, a, gen_helper_fcmpgt32) TRANS(FPCMPEQ32, VIS1, do_rdd, a, gen_helper_fcmpeq32) +TRANS(FPCMPULE32, VIS4, do_rdd, a, gen_helper_fcmpule32) +TRANS(FPCMPUGT32, VIS4, do_rdd, a, gen_helper_fcmpugt32) TRANS(FPCMPEQ8, VIS3B, do_rdd, a, gen_helper_fcmpeq8) TRANS(FPCMPNE8, VIS3B, do_rdd, a, gen_helper_fcmpne8) TRANS(FPCMPULE8, VIS3B, do_rdd, a, gen_helper_fcmpule8) TRANS(FPCMPUGT8, VIS3B, do_rdd, a, gen_helper_fcmpugt8) +TRANS(FPCMPLE8, VIS4, do_rdd, a, gen_helper_fcmple8) +TRANS(FPCMPGT8, VIS4, do_rdd, a, gen_helper_fcmpgt8) TRANS(PDISTN, VIS3, do_rdd, a, gen_op_pdistn) TRANS(XMULX, VIS3, do_rrr, a, gen_helper_xmulx) diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index c927a054b8..371f5445a1 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -66,6 +66,7 @@ target_ulong helper_array8(target_ulong rs1, target_ulong rs2) #define VIS_W64(n) w[3 - (n)] #define VIS_SW64(n) sw[3 - (n)] #define VIS_L64(n) l[1 - (n)] +#define VIS_SL64(n) sl[1 - (n)] #define VIS_B32(n) b[3 - (n)] #define VIS_W32(n) w[1 - (n)] #else @@ -74,6 +75,7 @@ target_ulong helper_array8(target_ulong rs1, target_ulong rs2) #define VIS_W64(n) w[n] #define VIS_SW64(n) sw[n] #define VIS_L64(n) l[n] +#define VIS_SL64(n) sl[n] #define VIS_B32(n) b[n] #define VIS_W32(n) w[n] #endif @@ -84,6 +86,7 @@ typedef union { uint16_t w[4]; int16_t sw[4]; uint32_t l[2]; + int32_t sl[2]; uint64_t ll; float64 d; } VIS64; @@ -198,47 +201,6 @@ uint64_t helper_fexpand(uint32_t src2) return d.ll; } -#define VIS_CMPHELPER(name, F) \ - uint64_t name##16(uint64_t src1, uint64_t src2) \ - { \ - VIS64 s, d; \ - \ - s.ll = src1; \ - d.ll = src2; \ - \ - d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ - d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ - d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ - d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ - d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ - \ - return d.ll; \ - } \ - \ - uint64_t name##32(uint64_t src1, uint64_t src2) \ - { \ - VIS64 s, d; \ - \ - s.ll = src1; \ - d.ll = src2; \ - \ - d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ - d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ - d.VIS_L64(1) = 0; \ - \ - return d.ll; \ - } - -#define FCMPGT(a, b) ((a) > (b)) -#define FCMPEQ(a, b) ((a) == (b)) -#define FCMPLE(a, b) ((a) <= (b)) -#define FCMPNE(a, b) ((a) != (b)) - -VIS_CMPHELPER(helper_fcmpgt, FCMPGT) -VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) -VIS_CMPHELPER(helper_fcmple, FCMPLE) -VIS_CMPHELPER(helper_fcmpne, FCMPNE) - uint64_t helper_fcmpeq8(uint64_t src1, uint64_t src2) { uint64_t a = src1 ^ src2; @@ -260,6 +222,25 @@ uint64_t helper_fcmpne8(uint64_t src1, uint64_t src2) return helper_fcmpeq8(src1, src2) ^ 0xff; } +uint64_t helper_fcmple8(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 8; ++i) { + r |= (s1.VIS_SB64(i) <= s2.VIS_SB64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpgt8(uint64_t src1, uint64_t src2) +{ + return helper_fcmple8(src1, src2) ^ 0xff; +} + uint64_t helper_fcmpule8(uint64_t src1, uint64_t src2) { VIS64 s1, s2; @@ -279,6 +260,113 @@ uint64_t helper_fcmpugt8(uint64_t src1, uint64_t src2) return helper_fcmpule8(src1, src2) ^ 0xff; } +uint64_t helper_fcmpeq16(uint64_t src1, uint64_t src2) +{ + uint64_t a = src1 ^ src2; + uint64_t m = 0x7fff7fff7fff7fffULL; + uint64_t c = ~(((a & m) + m) | a | m); + + /* a...............b...............c...............d............... */ + c |= c << 15; + /* ab..............bc..............cd..............d............... */ + c |= c << 30; + /* abcd............bcd.............cd..............d............... */ + return c >> 60; +} + +uint64_t helper_fcmpne16(uint64_t src1, uint64_t src2) +{ + return helper_fcmpeq16(src1, src2) ^ 0xf; +} + +uint64_t helper_fcmple16(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 4; ++i) { + r |= (s1.VIS_SW64(i) <= s2.VIS_SW64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpgt16(uint64_t src1, uint64_t src2) +{ + return helper_fcmple16(src1, src2) ^ 0xf; +} + +uint64_t helper_fcmpule16(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 4; ++i) { + r |= (s1.VIS_W64(i) <= s2.VIS_W64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpugt16(uint64_t src1, uint64_t src2) +{ + return helper_fcmpule16(src1, src2) ^ 0xf; +} + +uint64_t helper_fcmpeq32(uint64_t src1, uint64_t src2) +{ + uint64_t a = src1 ^ src2; + return ((uint32_t)a == 0) | (a >> 32 ? 0 : 2); +} + +uint64_t helper_fcmpne32(uint64_t src1, uint64_t src2) +{ + uint64_t a = src1 ^ src2; + return ((uint32_t)a != 0) | (a >> 32 ? 2 : 0); +} + +uint64_t helper_fcmple32(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 2; ++i) { + r |= (s1.VIS_SL64(i) <= s2.VIS_SL64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpgt32(uint64_t src1, uint64_t src2) +{ + return helper_fcmple32(src1, src2) ^ 3; +} + +uint64_t helper_fcmpule32(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 2; ++i) { + r |= (s1.VIS_L64(i) <= s2.VIS_L64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpugt32(uint64_t src1, uint64_t src2) +{ + return helper_fcmpule32(src1, src2) ^ 3; +} + uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) { int i; From db11dfea83e35c534fef8a86603b72354be9d71b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 21:39:59 -0700 Subject: [PATCH 34/38] target/sparc: Implement FPMIN, FPMAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 14 ++++++++++++++ target/sparc/translate.c | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index be591171ad..2ebee5a1ca 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -524,6 +524,20 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ FPSUBUS8 10 ..... 110110 ..... 1 0101 0111 ..... @d_d_d FPSUBUS16 10 ..... 110110 ..... 1 0101 0011 ..... @d_d_d + FPMIN8 10 ..... 110110 ..... 1 0001 1010 ..... @d_d_d + FPMIN16 10 ..... 110110 ..... 1 0001 1011 ..... @d_d_d + FPMIN32 10 ..... 110110 ..... 1 0001 1100 ..... @d_d_d + FPMINU8 10 ..... 110110 ..... 1 0101 1010 ..... @d_d_d + FPMINU16 10 ..... 110110 ..... 1 0101 1011 ..... @d_d_d + FPMINU32 10 ..... 110110 ..... 1 0101 1100 ..... @d_d_d + + FPMAX8 10 ..... 110110 ..... 1 0001 1101 ..... @d_d_d + FPMAX16 10 ..... 110110 ..... 1 0001 1110 ..... @d_d_d + FPMAX32 10 ..... 110110 ..... 1 0001 1111 ..... @d_d_d + FPMAXU8 10 ..... 110110 ..... 1 0101 1101 ..... @d_d_d + FPMAXU16 10 ..... 110110 ..... 1 0101 1110 ..... @d_d_d + FPMAXU32 10 ..... 110110 ..... 1 0101 1111 ..... @d_d_d + FLCMPs 10 000 cc:2 110110 rs1:5 1 0101 0001 rs2:5 FLCMPd 10 000 cc:2 110110 ..... 1 0101 0010 ..... \ rs1=%dfp_rs1 rs2=%dfp_rs2 diff --git a/target/sparc/translate.c b/target/sparc/translate.c index e856c811af..5bed23a00b 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -5061,6 +5061,20 @@ TRANS(FSRL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shrv) TRANS(FSRA16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sarv) TRANS(FSRA32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sarv) +TRANS(FPMIN8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_smin) +TRANS(FPMIN16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_smin) +TRANS(FPMIN32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_smin) +TRANS(FPMINU8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_umin) +TRANS(FPMINU16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_umin) +TRANS(FPMINU32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_umin) + +TRANS(FPMAX8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_smax) +TRANS(FPMAX16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_smax) +TRANS(FPMAX32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_smax) +TRANS(FPMAXU8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_umax) +TRANS(FPMAXU16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_umax) +TRANS(FPMAXU32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_umax) + static bool do_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64)) { From 56f2ef9c7958320d574448f555cc3a82e500c485 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 21:44:01 -0700 Subject: [PATCH 35/38] target/sparc: Implement SUBXC, SUBXCcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 2 ++ target/sparc/translate.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 2ebee5a1ca..a7720560f8 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -447,6 +447,8 @@ FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ PDISTN 10 ..... 110110 ..... 0 0011 1111 ..... @r_d_d FMEAN16 10 ..... 110110 ..... 0 0100 0000 ..... @d_d_d + SUBXC 10 ..... 110110 ..... 0 0100 0001 ..... @r_r_r + SUBXCcc 10 ..... 110110 ..... 0 0100 0011 ..... @r_r_r FCHKSM16 10 ..... 110110 ..... 0 0100 0100 ..... @d_d_d FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @d_d_d FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @d_r_r diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 5bed23a00b..b10936a61a 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -515,6 +515,17 @@ static void gen_op_subccc(TCGv dst, TCGv src1, TCGv src2) gen_op_subcc_int(dst, src1, src2, gen_carry32()); } +static void gen_op_subxc(TCGv dst, TCGv src1, TCGv src2) +{ + tcg_gen_sub_tl(dst, src1, src2); + tcg_gen_sub_tl(dst, dst, cpu_cc_C); +} + +static void gen_op_subxccc(TCGv dst, TCGv src1, TCGv src2) +{ + gen_op_subcc_int(dst, src1, src2, cpu_cc_C); +} + static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2) { TCGv zero = tcg_constant_tl(0); @@ -3963,6 +3974,9 @@ TRANS(ARRAY32, VIS1, do_rrr, a, gen_op_array32) TRANS(ADDXC, VIS3, do_rrr, a, gen_op_addxc) TRANS(ADDXCcc, VIS3, do_rrr, a, gen_op_addxccc) +TRANS(SUBXC, VIS4, do_rrr, a, gen_op_subxc) +TRANS(SUBXCcc, VIS4, do_rrr, a, gen_op_subxccc) + TRANS(UMULXHI, VIS3, do_rrr, a, gen_op_umulxhi) static void gen_op_alignaddr(TCGv dst, TCGv s1, TCGv s2) From 6fbc032cbc83ba80009c4a2a18e4d5578bc9ba35 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 22:12:24 -0700 Subject: [PATCH 36/38] target/sparc: Implement MWAIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/insns.decode | 1 + target/sparc/translate.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index a7720560f8..fbcb4f7aef 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -124,6 +124,7 @@ CALL 01 i:s30 WRTICK_CMPR 10 10111 110000 ..... . ............. @n_r_ri WRSTICK 10 11000 110000 ..... . ............. @n_r_ri WRSTICK_CMPR 10 11001 110000 ..... . ............. @n_r_ri + WRMWAIT 10 11100 110000 ..... . ............. @n_r_ri ] # Before v8, rs1==0 was WRY, and the rest executed as nop. [ diff --git a/target/sparc/translate.c b/target/sparc/translate.c index b10936a61a..6d0ad38fd5 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -3344,6 +3344,17 @@ static void do_wrpowerdown(DisasContext *dc, TCGv src) TRANS(WRPOWERDOWN, POWERDOWN, do_wr_special, a, supervisor(dc), do_wrpowerdown) +static void do_wrmwait(DisasContext *dc, TCGv src) +{ + /* + * TODO: This is a stub version of mwait, which merely recognizes + * interrupts immediately and does not wait. + */ + dc->base.is_jmp = DISAS_EXIT; +} + +TRANS(WRMWAIT, VIS4, do_wr_special, a, true, do_wrmwait) + static void do_wrpsr(DisasContext *dc, TCGv src) { gen_helper_wrpsr(tcg_env, src); From eeb3f592cb364f9d5c70c5525fd90e43b216012d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 22:13:06 -0700 Subject: [PATCH 37/38] target/sparc: Implement monitor ASIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ignore the "monitor" portion and treat them the same as their base ASIs. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/asi.h | 4 ++++ target/sparc/ldst_helper.c | 4 ++++ target/sparc/translate.c | 8 ++++++++ 3 files changed, 16 insertions(+) diff --git a/target/sparc/asi.h b/target/sparc/asi.h index a66829674b..14ffaa3842 100644 --- a/target/sparc/asi.h +++ b/target/sparc/asi.h @@ -144,6 +144,8 @@ * ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4 * and later ASIs. */ +#define ASI_MON_AIUP 0x12 /* (VIS4) Primary, user, monitor */ +#define ASI_MON_AIUS 0x13 /* (VIS4) Secondary, user, monitor */ #define ASI_REAL 0x14 /* Real address, cacheable */ #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cacheable */ #define ASI_REAL_IO 0x15 /* Real address, non-cacheable */ @@ -257,6 +259,8 @@ #define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/ #define ASI_INTR_R 0x7f /* IRQ vector dispatch read */ #define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */ +#define ASI_MON_P 0x84 /* (VIS4) Primary, monitor */ +#define ASI_MON_S 0x85 /* (VIS4) Secondary, monitor */ #define ASI_PIC 0xb0 /* (NG4) PIC registers */ #define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */ #define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */ diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c index 7bdf99e0c0..2d48e98bf4 100644 --- a/target/sparc/ldst_helper.c +++ b/target/sparc/ldst_helper.c @@ -1395,6 +1395,10 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, case ASI_TWINX_PL: /* Primary, twinx, LE */ case ASI_TWINX_S: /* Secondary, twinx */ case ASI_TWINX_SL: /* Secondary, twinx, LE */ + case ASI_MON_P: + case ASI_MON_S: + case ASI_MON_AIUP: + case ASI_MON_AIUS: /* These are always handled inline. */ g_assert_not_reached(); diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 6d0ad38fd5..113639083b 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -1607,6 +1607,7 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_BLK_AIUP_L_4V: case ASI_BLK_AIUP: case ASI_BLK_AIUPL: + case ASI_MON_AIUP: mem_idx = MMU_USER_IDX; break; case ASI_AIUS: /* As if user secondary */ @@ -1617,6 +1618,7 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_BLK_AIUS_L_4V: case ASI_BLK_AIUS: case ASI_BLK_AIUSL: + case ASI_MON_AIUS: mem_idx = MMU_USER_SECONDARY_IDX; break; case ASI_S: /* Secondary */ @@ -1630,6 +1632,7 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_FL8_SL: case ASI_FL16_S: case ASI_FL16_SL: + case ASI_MON_S: if (mem_idx == MMU_USER_IDX) { mem_idx = MMU_USER_SECONDARY_IDX; } else if (mem_idx == MMU_KERNEL_IDX) { @@ -1647,6 +1650,7 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_FL8_PL: case ASI_FL16_P: case ASI_FL16_PL: + case ASI_MON_P: break; } switch (asi) { @@ -1664,6 +1668,10 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_SL: case ASI_P: case ASI_PL: + case ASI_MON_P: + case ASI_MON_S: + case ASI_MON_AIUP: + case ASI_MON_AIUS: type = GET_ASI_DIRECT; break; case ASI_TWINX_REAL: From b12b72274320ce3ee516d963efd48766163cb240 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 Nov 2023 22:19:26 -0700 Subject: [PATCH 38/38] target/sparc: Enable VIS4 feature bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/sparc/cpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 88da5254e8..9bacfb68cb 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -552,6 +552,7 @@ static const char * const feature_name[] = { [CPU_FEATURE_BIT_FMAF] = "fmaf", [CPU_FEATURE_BIT_VIS3] = "vis3", [CPU_FEATURE_BIT_IMA] = "ima", + [CPU_FEATURE_BIT_VIS4] = "vis4", #else [CPU_FEATURE_BIT_MUL] = "mul", [CPU_FEATURE_BIT_DIV] = "div", @@ -886,6 +887,8 @@ static Property sparc_cpu_properties[] = { CPU_FEATURE_BIT_VIS3, false), DEFINE_PROP_BIT("ima", SPARCCPU, env.def.features, CPU_FEATURE_BIT_IMA, false), + DEFINE_PROP_BIT("vis4", SPARCCPU, env.def.features, + CPU_FEATURE_BIT_VIS4, false), #else DEFINE_PROP_BIT("mul", SPARCCPU, env.def.features, CPU_FEATURE_BIT_MUL, false),