From ced5cfffeea22ef9003c95a83ce7c31e428702fb Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Sat, 5 Mar 2022 07:16:46 +0100 Subject: [PATCH 01/13] Use long endian options for ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC options pairs -mlittle/-mlittle-endian and -mbig/-mbig-endian are equivalent on ppc64 architecture. However, Clang supports only long version of the options. Use longer form in configure to properly support both GCC and Clang compiler. In addition, fix this issue in tcg test configure. Signed-off-by: Miroslav Rezanina Reviewed-by: Greg Kurz Message-Id: <20220131091714.4825-1-mrezanin@redhat.com> Signed-off-by: Cédric Le Goater --- configure | 4 ++-- tests/tcg/configure.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configure b/configure index c56ed53ee3..81618708e4 100755 --- a/configure +++ b/configure @@ -630,10 +630,10 @@ case "$cpu" in ppc) CPU_CFLAGS="-m32" ;; ppc64) - CPU_CFLAGS="-m64 -mbig" ;; + CPU_CFLAGS="-m64 -mbig-endian" ;; ppc64le) cpu="ppc64" - CPU_CFLAGS="-m64 -mlittle" ;; + CPU_CFLAGS="-m64 -mlittle-endian" ;; s390) CPU_CFLAGS="-m31" ;; diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh index 0663bd19f4..ed4b5ccb1f 100755 --- a/tests/tcg/configure.sh +++ b/tests/tcg/configure.sh @@ -64,9 +64,9 @@ fi : ${cross_cc_ppc="powerpc-linux-gnu-gcc"} : ${cross_cc_cflags_ppc="-m32"} : ${cross_cc_ppc64="powerpc64-linux-gnu-gcc"} -: ${cross_cc_cflags_ppc64="-m64 -mbig"} +: ${cross_cc_cflags_ppc64="-m64 -mbig-endian"} : ${cross_cc_ppc64le="$cross_cc_ppc64"} -: ${cross_cc_cflags_ppc64le="-m64 -mlittle"} +: ${cross_cc_cflags_ppc64le="-m64 -mlittle-endian"} : ${cross_cc_riscv64="riscv64-linux-gnu-gcc"} : ${cross_cc_s390x="s390x-linux-gnu-gcc"} : ${cross_cc_sh4="sh4-linux-gnu-gcc"} From d21939ca8b71a7c5479866e61a40d7b7b28a1bc0 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:46 +0100 Subject: [PATCH 02/13] tests/tcg/ppc64le: use inline asm instead of __builtin_mtfsf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLVM/Clang does not support __builtin_mtfsf. Acked-by: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: Matheus Ferst Message-Id: <20220304165417.1981159-2-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- tests/tcg/ppc64le/mtfsf.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/tcg/ppc64le/mtfsf.c b/tests/tcg/ppc64le/mtfsf.c index b3d31f3637..bed5b1afa4 100644 --- a/tests/tcg/ppc64le/mtfsf.c +++ b/tests/tcg/ppc64le/mtfsf.c @@ -1,8 +1,12 @@ #include +#include #include #include #include +#define MTFSF(FLM, FRB) asm volatile ("mtfsf %0, %1" :: "i" (FLM), "f" (FRB)) +#define MFFS(FRT) asm("mffs %0" : "=f" (FRT)) + #define FPSCR_VE 7 /* Floating-point invalid operation exception enable */ #define FPSCR_VXSOFT 10 /* Floating-point invalid operation exception (soft) */ #define FPSCR_FI 17 /* Floating-point fraction inexact */ @@ -21,10 +25,7 @@ void sigfpe_handler(int sig, siginfo_t *si, void *ucontext) int main(void) { - union { - double d; - long long ll; - } fpscr; + uint64_t fpscr; struct sigaction sa = { .sa_sigaction = sigfpe_handler, @@ -40,10 +41,9 @@ int main(void) prctl(PR_SET_FPEXC, PR_FP_EXC_PRECISE); /* First test if the FI bit is being set correctly */ - fpscr.ll = FP_FI; - __builtin_mtfsf(0b11111111, fpscr.d); - fpscr.d = __builtin_mffs(); - assert((fpscr.ll & FP_FI) != 0); + MTFSF(0b11111111, FP_FI); + MFFS(fpscr); + assert((fpscr & FP_FI) != 0); /* Then test if the deferred exception is being called correctly */ sigaction(SIGFPE, &sa, NULL); @@ -54,8 +54,7 @@ int main(void) * But if a different exception is chosen si_code check should * change accordingly. */ - fpscr.ll = FP_VE | FP_VXSOFT; - __builtin_mtfsf(0b11111111, fpscr.d); + MTFSF(0b11111111, FP_VE | FP_VXSOFT); return 1; } From 4e4b5a3eacc51e60cdf2dff585702e560d674268 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:46 +0100 Subject: [PATCH 03/13] target/ppc: change xs[n]madd[am]sp to use float64r32_muladd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change VSX Scalar Multiply-Add/Subtract Type-A/M Single Precision helpers to use float64r32_muladd. This method should correctly handle all rounding modes, so the workaround for float_round_nearest_even can be dropped. Reviewed-by: Richard Henderson Signed-off-by: Matheus Ferst Message-Id: <20220304165417.1981159-3-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/fpu_helper.c | 54 ++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 8f970288f5..2cad05c9cf 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2156,9 +2156,8 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, VsrW(i), -126, 23) * maddflgs - flags for the float*muladd routine that control the * various forms (madd, msub, nmadd, nmsub) * sfprf - set FPRF - * r2sp - round intermediate double precision result to single precision */ -#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf, r2sp) \ +#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ ppc_vsr_t *s1, ppc_vsr_t *s2, ppc_vsr_t *s3) \ { \ @@ -2170,20 +2169,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ for (i = 0; i < nels; i++) { \ float_status tstat = env->fp_status; \ set_float_exception_flags(0, &tstat); \ - if (r2sp && (tstat.float_rounding_mode == float_round_nearest_even)) {\ - /* \ - * Avoid double rounding errors by rounding the intermediate \ - * result to odd. \ - */ \ - set_float_rounding_mode(float_round_to_zero, &tstat); \ - t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, \ - maddflgs, &tstat); \ - t.fld |= (get_float_exception_flags(&tstat) & \ - float_flag_inexact) != 0; \ - } else { \ - t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, \ - maddflgs, &tstat); \ - } \ + t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, maddflgs, &tstat); \ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \ \ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ @@ -2191,10 +2177,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ sfprf, GETPC()); \ } \ \ - if (r2sp) { \ - t.fld = do_frsp(env, t.fld, GETPC()); \ - } \ - \ if (sfprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ @@ -2203,24 +2185,24 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ do_float_check_status(env, GETPC()); \ } -VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1, 0) -VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1, 0) -VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1, 0) -VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 0) -VSX_MADD(XSMADDSP, 1, float64, VsrD(0), MADD_FLGS, 1, 1) -VSX_MADD(XSMSUBSP, 1, float64, VsrD(0), MSUB_FLGS, 1, 1) -VSX_MADD(XSNMADDSP, 1, float64, VsrD(0), NMADD_FLGS, 1, 1) -VSX_MADD(XSNMSUBSP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 1) +VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1) +VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1) +VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1) +VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1) +VSX_MADD(XSMADDSP, 1, float64r32, VsrD(0), MADD_FLGS, 1) +VSX_MADD(XSMSUBSP, 1, float64r32, VsrD(0), MSUB_FLGS, 1) +VSX_MADD(XSNMADDSP, 1, float64r32, VsrD(0), NMADD_FLGS, 1) +VSX_MADD(XSNMSUBSP, 1, float64r32, VsrD(0), NMSUB_FLGS, 1) -VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0, 0) -VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0, 0) -VSX_MADD(xvnmadddp, 2, float64, VsrD(i), NMADD_FLGS, 0, 0) -VSX_MADD(xvnmsubdp, 2, float64, VsrD(i), NMSUB_FLGS, 0, 0) +VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0) +VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0) +VSX_MADD(xvnmadddp, 2, float64, VsrD(i), NMADD_FLGS, 0) +VSX_MADD(xvnmsubdp, 2, float64, VsrD(i), NMSUB_FLGS, 0) -VSX_MADD(xvmaddsp, 4, float32, VsrW(i), MADD_FLGS, 0, 0) -VSX_MADD(xvmsubsp, 4, float32, VsrW(i), MSUB_FLGS, 0, 0) -VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0, 0) -VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0, 0) +VSX_MADD(xvmaddsp, 4, float32, VsrW(i), MADD_FLGS, 0) +VSX_MADD(xvmsubsp, 4, float32, VsrW(i), MSUB_FLGS, 0) +VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0) +VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0) /* * VSX_MADDQ - VSX floating point quad-precision muliply/add From 8189cb850728fd52cd98f5ee4640b5dc4e40239d Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:46 +0100 Subject: [PATCH 04/13] tests/tcg/ppc64le: drop __int128 usage in bcdsub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using __int128 with inline asm constraints like "v" generates incorrect code when compiling with LLVM/Clang (e.g., only one doubleword of the VSR is loaded). Instead, use a GPR pair to pass the 128-bits value and load the VSR with mtvsrd/xxmrghd. Reviewed-by: Richard Henderson Signed-off-by: Matheus Ferst Message-Id: <20220304165417.1981159-4-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- tests/tcg/ppc64le/bcdsub.c | 123 +++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 65 deletions(-) diff --git a/tests/tcg/ppc64le/bcdsub.c b/tests/tcg/ppc64le/bcdsub.c index 8c188cae6d..12da19b78e 100644 --- a/tests/tcg/ppc64le/bcdsub.c +++ b/tests/tcg/ppc64le/bcdsub.c @@ -1,6 +1,7 @@ #include #include #include +#include #define CRF_LT (1 << 3) #define CRF_GT (1 << 2) @@ -8,24 +9,39 @@ #define CRF_SO (1 << 0) #define UNDEF 0 -#define BCDSUB(vra, vrb, ps) \ - asm ("bcdsub. %1,%2,%3,%4;" \ - "mfocrf %0,0b10;" \ - : "=r" (cr), "=v" (vrt) \ - : "v" (vra), "v" (vrb), "i" (ps) \ - : ); +/* + * Use GPR pairs to load the VSR values and place the resulting VSR and CR6 in + * th, tl, and cr. Note that we avoid newer instructions (e.g., mtvsrdd/mfvsrld) + * so we can run this test on POWER8 machines. + */ +#define BCDSUB(AH, AL, BH, BL, PS) \ + asm ("mtvsrd 32, %3\n\t" \ + "mtvsrd 33, %4\n\t" \ + "xxmrghd 32, 32, 33\n\t" \ + "mtvsrd 33, %5\n\t" \ + "mtvsrd 34, %6\n\t" \ + "xxmrghd 33, 33, 34\n\t" \ + "bcdsub. 0, 0, 1, %7\n\t" \ + "mfocrf %0, 0b10\n\t" \ + "mfvsrd %1, 32\n\t" \ + "xxswapd 32, 32\n\t" \ + "mfvsrd %2, 32\n\t" \ + : "=r" (cr), "=r" (th), "=r" (tl) \ + : "r" (AH), "r" (AL), "r" (BH), "r" (BL), "i" (PS) \ + : "v0", "v1", "v2"); -#define TEST(vra, vrb, ps, exp_res, exp_cr6) \ +#define TEST(AH, AL, BH, BL, PS, TH, TL, CR6) \ do { \ - __int128 vrt = 0; \ int cr = 0; \ - BCDSUB(vra, vrb, ps); \ - if (exp_res) \ - assert(vrt == exp_res); \ - assert((cr >> 4) == exp_cr6); \ + uint64_t th, tl; \ + BCDSUB(AH, AL, BH, BL, PS); \ + if (TH != UNDEF || TL != UNDEF) { \ + assert(tl == TL); \ + assert(th == TH); \ + } \ + assert((cr >> 4) == CR6); \ } while (0) - /* * Unbounded result is equal to zero: * sign = (PS) ? 0b1111 : 0b1100 @@ -33,13 +49,13 @@ */ void test_bcdsub_eq(void) { - __int128 a, b; - /* maximum positive BCD value */ - a = b = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999c); - - TEST(a, b, 0, 0xc, CRF_EQ); - TEST(a, b, 1, 0xf, CRF_EQ); + TEST(0x9999999999999999, 0x999999999999999c, + 0x9999999999999999, 0x999999999999999c, + 0, 0x0, 0xc, CRF_EQ); + TEST(0x9999999999999999, 0x999999999999999c, + 0x9999999999999999, 0x999999999999999c, + 1, 0x0, 0xf, CRF_EQ); } /* @@ -49,21 +65,16 @@ void test_bcdsub_eq(void) */ void test_bcdsub_gt(void) { - __int128 a, b, c; + /* maximum positive and negative one BCD values */ + TEST(0x9999999999999999, 0x999999999999999c, 0x0, 0x1d, 0, + 0x0, 0xc, (CRF_GT | CRF_SO)); + TEST(0x9999999999999999, 0x999999999999999c, 0x0, 0x1d, 1, + 0x0, 0xf, (CRF_GT | CRF_SO)); - /* maximum positive BCD value */ - a = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999c); - - /* negative one BCD value */ - b = (__int128) 0x1d; - - TEST(a, b, 0, 0xc, (CRF_GT | CRF_SO)); - TEST(a, b, 1, 0xf, (CRF_GT | CRF_SO)); - - c = (((__int128) 0x9999999999999999) << 64 | 0x999999999999998c); - - TEST(c, b, 0, a, CRF_GT); - TEST(c, b, 1, (a | 0x3), CRF_GT); + TEST(0x9999999999999999, 0x999999999999998c, 0x0, 0x1d, 0, + 0x9999999999999999, 0x999999999999999c, CRF_GT); + TEST(0x9999999999999999, 0x999999999999998c, 0x0, 0x1d, 1, + 0x9999999999999999, 0x999999999999999f, CRF_GT); } /* @@ -73,45 +84,27 @@ void test_bcdsub_gt(void) */ void test_bcdsub_lt(void) { - __int128 a, b; + /* positive zero and positive one BCD values */ + TEST(0x0, 0xc, 0x0, 0x1c, 0, 0x0, 0x1d, CRF_LT); + TEST(0x0, 0xc, 0x0, 0x1c, 1, 0x0, 0x1d, CRF_LT); - /* positive zero BCD value */ - a = (__int128) 0xc; - - /* positive one BCD value */ - b = (__int128) 0x1c; - - TEST(a, b, 0, 0x1d, CRF_LT); - TEST(a, b, 1, 0x1d, CRF_LT); - - /* maximum negative BCD value */ - a = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999d); - - /* positive one BCD value */ - b = (__int128) 0x1c; - - TEST(a, b, 0, 0xd, (CRF_LT | CRF_SO)); - TEST(a, b, 1, 0xd, (CRF_LT | CRF_SO)); + /* maximum negative and positive one BCD values */ + TEST(0x9999999999999999, 0x999999999999999d, 0x0, 0x1c, 0, + 0x0, 0xd, (CRF_LT | CRF_SO)); + TEST(0x9999999999999999, 0x999999999999999d, 0x0, 0x1c, 1, + 0x0, 0xd, (CRF_LT | CRF_SO)); } void test_bcdsub_invalid(void) { - __int128 a, b; + TEST(0x0, 0x1c, 0x0, 0xf00, 0, UNDEF, UNDEF, CRF_SO); + TEST(0x0, 0x1c, 0x0, 0xf00, 1, UNDEF, UNDEF, CRF_SO); - /* positive one BCD value */ - a = (__int128) 0x1c; - b = 0xf00; + TEST(0x0, 0xf00, 0x0, 0x1c, 0, UNDEF, UNDEF, CRF_SO); + TEST(0x0, 0xf00, 0x0, 0x1c, 1, UNDEF, UNDEF, CRF_SO); - TEST(a, b, 0, UNDEF, CRF_SO); - TEST(a, b, 1, UNDEF, CRF_SO); - - TEST(b, a, 0, UNDEF, CRF_SO); - TEST(b, a, 1, UNDEF, CRF_SO); - - a = 0xbad; - - TEST(a, b, 0, UNDEF, CRF_SO); - TEST(a, b, 1, UNDEF, CRF_SO); + TEST(0x0, 0xbad, 0x0, 0xf00, 0, UNDEF, UNDEF, CRF_SO); + TEST(0x0, 0xbad, 0x0, 0xf00, 1, UNDEF, UNDEF, CRF_SO); } int main(void) From 63c2b746bebef3ec9de4f29d5beeea4ee809c892 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:46 +0100 Subject: [PATCH 05/13] tests/tcg/ppc64le: emit bcdsub with .long when needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on GCC docs[1], we use the '-mpower8-vector' flag at config-time to detect the toolchain support to the bcdsub instruction. LLVM/Clang supports this flag since version 3.6[2], but the instruction and related builtins were only added in LLVM 14[3]. In the absence of other means to detect this support at config-time, we resort to __has_builtin to identify the presence of __builtin_bcdsub at compile-time. If the builtin is not available, the instruction is emitted with a ".long". [1] https://gcc.gnu.org/onlinedocs/gcc-8.3.0/gcc/PowerPC-AltiVec_002fVSX-Built-in-Functions.html [2] https://github.com/llvm/llvm-project/commit/59eb767e11d4ffefb5f55409524e5c8416b2b0db [3] https://github.com/llvm/llvm-project/commit/c933c2eb334660c131f4afc9d194fafb0cec0423 Reviewed-by: Richard Henderson Signed-off-by: Matheus Ferst Message-Id: <20220304165417.1981159-5-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- tests/tcg/ppc64le/bcdsub.c | 71 ++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/tests/tcg/ppc64le/bcdsub.c b/tests/tcg/ppc64le/bcdsub.c index 12da19b78e..87c8c44a44 100644 --- a/tests/tcg/ppc64le/bcdsub.c +++ b/tests/tcg/ppc64le/bcdsub.c @@ -9,37 +9,48 @@ #define CRF_SO (1 << 0) #define UNDEF 0 -/* - * Use GPR pairs to load the VSR values and place the resulting VSR and CR6 in - * th, tl, and cr. Note that we avoid newer instructions (e.g., mtvsrdd/mfvsrld) - * so we can run this test on POWER8 machines. - */ -#define BCDSUB(AH, AL, BH, BL, PS) \ - asm ("mtvsrd 32, %3\n\t" \ - "mtvsrd 33, %4\n\t" \ - "xxmrghd 32, 32, 33\n\t" \ - "mtvsrd 33, %5\n\t" \ - "mtvsrd 34, %6\n\t" \ - "xxmrghd 33, 33, 34\n\t" \ - "bcdsub. 0, 0, 1, %7\n\t" \ - "mfocrf %0, 0b10\n\t" \ - "mfvsrd %1, 32\n\t" \ - "xxswapd 32, 32\n\t" \ - "mfvsrd %2, 32\n\t" \ - : "=r" (cr), "=r" (th), "=r" (tl) \ - : "r" (AH), "r" (AL), "r" (BH), "r" (BL), "i" (PS) \ - : "v0", "v1", "v2"); +#ifdef __has_builtin +#if !__has_builtin(__builtin_bcdsub) +#define NO_BUILTIN_BCDSUB +#endif +#endif -#define TEST(AH, AL, BH, BL, PS, TH, TL, CR6) \ - do { \ - int cr = 0; \ - uint64_t th, tl; \ - BCDSUB(AH, AL, BH, BL, PS); \ - if (TH != UNDEF || TL != UNDEF) { \ - assert(tl == TL); \ - assert(th == TH); \ - } \ - assert((cr >> 4) == CR6); \ +#ifdef NO_BUILTIN_BCDSUB +#define BCDSUB(T, A, B, PS) \ + ".long 4 << 26 | (" #T ") << 21 | (" #A ") << 16 | (" #B ") << 11" \ + " | 1 << 10 | (" #PS ") << 9 | 65\n\t" +#else +#define BCDSUB(T, A, B, PS) "bcdsub. " #T ", " #A ", " #B ", " #PS "\n\t" +#endif + +#define TEST(AH, AL, BH, BL, PS, TH, TL, CR6) \ + do { \ + int cr = 0; \ + uint64_t th, tl; \ + /* \ + * Use GPR pairs to load the VSR values and place the resulting VSR and\ + * CR6 in th, tl, and cr. Note that we avoid newer instructions (e.g., \ + * mtvsrdd/mfvsrld) so we can run this test on POWER8 machines. \ + */ \ + asm ("mtvsrd 32, %3\n\t" \ + "mtvsrd 33, %4\n\t" \ + "xxmrghd 32, 32, 33\n\t" \ + "mtvsrd 33, %5\n\t" \ + "mtvsrd 34, %6\n\t" \ + "xxmrghd 33, 33, 34\n\t" \ + BCDSUB(0, 0, 1, PS) \ + "mfocrf %0, 0b10\n\t" \ + "mfvsrd %1, 32\n\t" \ + "xxswapd 32, 32\n\t" \ + "mfvsrd %2, 32\n\t" \ + : "=r" (cr), "=r" (th), "=r" (tl) \ + : "r" (AH), "r" (AL), "r" (BH), "r" (BL) \ + : "v0", "v1", "v2"); \ + if (TH != UNDEF || TL != UNDEF) { \ + assert(tl == TL); \ + assert(th == TH); \ + } \ + assert((cr >> 4) == CR6); \ } while (0) /* From 68455cf59394267363b090a1828b6c52994a4ee3 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:46 +0100 Subject: [PATCH 06/13] tests/tcg/ppc64le: Use Altivec register names in clobber list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLVM/Clang doesn't know the VSX registers when compiling with -mabi=elfv1. Use only registers >= 32 and list them with their Altivec name. Reviewed-by: Richard Henderson Signed-off-by: Matheus Ferst Message-Id: <20220304165417.1981159-6-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- tests/tcg/ppc64le/non_signalling_xscv.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/tcg/ppc64le/non_signalling_xscv.c b/tests/tcg/ppc64le/non_signalling_xscv.c index 91e25cad46..836df71ef0 100644 --- a/tests/tcg/ppc64le/non_signalling_xscv.c +++ b/tests/tcg/ppc64le/non_signalling_xscv.c @@ -6,16 +6,16 @@ #define TEST(INSN, B_HI, B_LO, T_HI, T_LO) \ do { \ uint64_t th, tl, bh = B_HI, bl = B_LO; \ - asm("mtvsrd 0, %2\n\t" \ - "mtvsrd 1, %3\n\t" \ - "xxmrghd 0, 0, 1\n\t" \ - INSN " 0, 0\n\t" \ - "mfvsrd %0, 0\n\t" \ - "xxswapd 0, 0\n\t" \ - "mfvsrd %1, 0\n\t" \ + asm("mtvsrd 32, %2\n\t" \ + "mtvsrd 33, %3\n\t" \ + "xxmrghd 32, 32, 33\n\t" \ + INSN " 32, 32\n\t" \ + "mfvsrd %0, 32\n\t" \ + "xxswapd 32, 32\n\t" \ + "mfvsrd %1, 32\n\t" \ : "=r" (th), "=r" (tl) \ : "r" (bh), "r" (bl) \ - : "vs0", "vs1"); \ + : "v0", "v1"); \ printf(INSN "(0x%016" PRIx64 "%016" PRIx64 ") = 0x%016" PRIx64 \ "%016" PRIx64 "\n", bh, bl, th, tl); \ assert(th == T_HI && tl == T_LO); \ From 0241ccb163b65ab568b667884bb99774b7c6fb42 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Sat, 5 Mar 2022 07:16:47 +0100 Subject: [PATCH 07/13] target/ppc: Fix vmul[eo]* instructions marked 2.07 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some ISA v2.03 Vector Multiply instructions marked to be ISA v2.07 only. This patch fixes it. Fixes: 80eca687c851 ("target/ppc: moved vector even and odd multiplication to decodetree") Reported-by: Howard Spoelstra Suggested-by: Fabiano Rosas Signed-off-by: Lucas Mateus Castro (alqotel) Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220304175156.2012315-2-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/translate/vmx-impl.c.inc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index f91bee839d..c5d02d13fe 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3141,14 +3141,14 @@ static bool trans_VMULLD(DisasContext *ctx, arg_VX *a) return true; } -TRANS_FLAGS2(ALTIVEC_207, VMULESB, do_vx_helper, gen_helper_VMULESB) -TRANS_FLAGS2(ALTIVEC_207, VMULOSB, do_vx_helper, gen_helper_VMULOSB) -TRANS_FLAGS2(ALTIVEC_207, VMULEUB, do_vx_helper, gen_helper_VMULEUB) -TRANS_FLAGS2(ALTIVEC_207, VMULOUB, do_vx_helper, gen_helper_VMULOUB) -TRANS_FLAGS2(ALTIVEC_207, VMULESH, do_vx_helper, gen_helper_VMULESH) -TRANS_FLAGS2(ALTIVEC_207, VMULOSH, do_vx_helper, gen_helper_VMULOSH) -TRANS_FLAGS2(ALTIVEC_207, VMULEUH, do_vx_helper, gen_helper_VMULEUH) -TRANS_FLAGS2(ALTIVEC_207, VMULOUH, do_vx_helper, gen_helper_VMULOUH) +TRANS_FLAGS(ALTIVEC, VMULESB, do_vx_helper, gen_helper_VMULESB) +TRANS_FLAGS(ALTIVEC, VMULOSB, do_vx_helper, gen_helper_VMULOSB) +TRANS_FLAGS(ALTIVEC, VMULEUB, do_vx_helper, gen_helper_VMULEUB) +TRANS_FLAGS(ALTIVEC, VMULOUB, do_vx_helper, gen_helper_VMULOUB) +TRANS_FLAGS(ALTIVEC, VMULESH, do_vx_helper, gen_helper_VMULESH) +TRANS_FLAGS(ALTIVEC, VMULOSH, do_vx_helper, gen_helper_VMULOSH) +TRANS_FLAGS(ALTIVEC, VMULEUH, do_vx_helper, gen_helper_VMULEUH) +TRANS_FLAGS(ALTIVEC, VMULOUH, do_vx_helper, gen_helper_VMULOUH) TRANS_FLAGS2(ALTIVEC_207, VMULESW, do_vx_helper, gen_helper_VMULESW) TRANS_FLAGS2(ALTIVEC_207, VMULOSW, do_vx_helper, gen_helper_VMULOSW) TRANS_FLAGS2(ALTIVEC_207, VMULEUW, do_vx_helper, gen_helper_VMULEUW) From 9f264141ec13b69ecaa569190121da4098ec8647 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:47 +0100 Subject: [PATCH 08/13] target/ppc: use ext32u and deposit in do_vx_vmulhw_i64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: 29e9dfcf755e ("target/ppc: vmulh* instructions without helpers") Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220304175156.2012315-3-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/translate/vmx-impl.c.inc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index c5d02d13fe..8ea1d2c96a 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3162,19 +3162,16 @@ static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign) { TCGv_i64 hh, lh, temp; - uint64_t c; hh = tcg_temp_new_i64(); lh = tcg_temp_new_i64(); temp = tcg_temp_new_i64(); - c = 0xFFFFFFFF; - if (sign) { tcg_gen_ext32s_i64(lh, a); tcg_gen_ext32s_i64(temp, b); } else { - tcg_gen_andi_i64(lh, a, c); - tcg_gen_andi_i64(temp, b, c); + tcg_gen_ext32u_i64(lh, a); + tcg_gen_ext32u_i64(temp, b); } tcg_gen_mul_i64(lh, lh, temp); @@ -3188,8 +3185,7 @@ static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign) tcg_gen_mul_i64(hh, hh, temp); tcg_gen_shri_i64(lh, lh, 32); - tcg_gen_andi_i64(hh, hh, c << 32); - tcg_gen_or_i64(t, hh, lh); + tcg_gen_deposit_i64(t, hh, lh, 0, 32); tcg_temp_free_i64(hh); tcg_temp_free_i64(lh); From 5460ca8490ef9be293301e115490e273967011d3 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:47 +0100 Subject: [PATCH 09/13] target/ppc: use extract/extract2 to create vrlqnm mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: 4e272668406b ("target/ppc: implement vrlqnm") Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220304175156.2012315-4-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/translate/vmx-impl.c.inc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 8ea1d2c96a..8108e59d4d 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -1088,10 +1088,8 @@ static void do_vrlq_mask(TCGv_i64 mh, TCGv_i64 ml, TCGv_i64 b, TCGv_i64 e) tcg_gen_or_i64(tl, t1, tl); /* t = t >> 1 */ - tcg_gen_shli_i64(t0, th, 63); - tcg_gen_shri_i64(tl, tl, 1); + tcg_gen_extract2_i64(tl, tl, th, 1); tcg_gen_shri_i64(th, th, 1); - tcg_gen_or_i64(tl, t0, tl); /* m = m ^ t */ tcg_gen_xor_i64(mh, mh, th); @@ -1148,10 +1146,8 @@ static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask, tcg_gen_or_i64(t1, ah, t1); if (mask || insert) { - tcg_gen_shri_i64(n, vrb, 8); - tcg_gen_shri_i64(vrb, vrb, 16); - tcg_gen_andi_i64(n, n, 0x7f); - tcg_gen_andi_i64(vrb, vrb, 0x7f); + tcg_gen_extract_i64(n, vrb, 8, 7); + tcg_gen_extract_i64(vrb, vrb, 16, 7); do_vrlq_mask(ah, al, vrb, n); From 4e4dd9e7caf47ce4fa60985614fcfa60f32729c3 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:47 +0100 Subject: [PATCH 10/13] target/ppc: use andc in vrlqmi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: 7e5947df6e94 ("target/ppc: implement vrlqmi") Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220304175156.2012315-5-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/translate/vmx-impl.c.inc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 8108e59d4d..6101bca3fd 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -1157,10 +1157,8 @@ static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask, if (insert) { get_avr64(n, a->vrt, true); get_avr64(vrb, a->vrt, false); - tcg_gen_not_i64(ah, ah); - tcg_gen_not_i64(al, al); - tcg_gen_and_i64(n, n, ah); - tcg_gen_and_i64(vrb, vrb, al); + tcg_gen_andc_i64(n, n, ah); + tcg_gen_andc_i64(vrb, vrb, al); tcg_gen_or_i64(t0, t0, n); tcg_gen_or_i64(t1, t1, vrb); } From 618574ddf79a44ed3f4be7e21cb398beb4bdd294 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Sat, 5 Mar 2022 07:16:47 +0100 Subject: [PATCH 11/13] target/ppc: split XXGENPCV macros for readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: b090f4f1e3c9 ("target/ppc: Implement xxgenpcv[bhwd]m instruction") Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220304175156.2012315-6-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/int_helper.c | 28 +++++++++--- target/ppc/translate/vsx-impl.c.inc | 71 +++++++++++++++-------------- 2 files changed, 57 insertions(+), 42 deletions(-) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index b2b17bb1ca..492f34c499 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1072,7 +1072,7 @@ void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) *r = result; } -#define XXGENPCV(NAME, SZ) \ +#define XXGENPCV_BE_EXP(NAME, SZ) \ void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ { \ ppc_vsr_t tmp; \ @@ -1093,8 +1093,9 @@ void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ } \ \ *t = tmp; \ -} \ - \ +} + +#define XXGENPCV_BE_COMP(NAME, SZ) \ void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ { \ ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ @@ -1111,8 +1112,9 @@ void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ } \ \ *t = tmp; \ -} \ - \ +} + +#define XXGENPCV_LE_EXP(NAME, SZ) \ void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ { \ ppc_vsr_t tmp; \ @@ -1135,8 +1137,9 @@ void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ } \ \ *t = tmp; \ -} \ - \ +} + +#define XXGENPCV_LE_COMP(NAME, SZ) \ void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ { \ ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ @@ -1157,10 +1160,21 @@ void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ *t = tmp; \ } +#define XXGENPCV(NAME, SZ) \ + XXGENPCV_BE_EXP(NAME, SZ) \ + XXGENPCV_BE_COMP(NAME, SZ) \ + XXGENPCV_LE_EXP(NAME, SZ) \ + XXGENPCV_LE_COMP(NAME, SZ) \ + XXGENPCV(XXGENPCVBM, 1) XXGENPCV(XXGENPCVHM, 2) XXGENPCV(XXGENPCVWM, 4) XXGENPCV(XXGENPCVDM, 8) + +#undef XXGENPCV_BE_EXP +#undef XXGENPCV_BE_COMP +#undef XXGENPCV_LE_EXP +#undef XXGENPCV_LE_COMP #undef XXGENPCV #if defined(HOST_WORDS_BIGENDIAN) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 2ffeab5287..48a97b2d7e 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -1204,43 +1204,44 @@ static bool trans_XXPERMX(DisasContext *ctx, arg_8RR_XX4_uim3 *a) return true; } -#define XXGENPCV(NAME) \ -static bool trans_##NAME(DisasContext *ctx, arg_X_imm5 *a) \ -{ \ - TCGv_ptr xt, vrb; \ - \ - REQUIRE_INSNS_FLAGS2(ctx, ISA310); \ - REQUIRE_VSX(ctx); \ - \ - if (a->imm & ~0x3) { \ - gen_invalid(ctx); \ - return true; \ - } \ - \ - xt = gen_vsr_ptr(a->xt); \ - vrb = gen_avr_ptr(a->vrb); \ - \ - switch (a->imm) { \ - case 0b00000: /* Big-Endian expansion */ \ - glue(gen_helper_, glue(NAME, _be_exp))(xt, vrb); \ - break; \ - case 0b00001: /* Big-Endian compression */ \ - glue(gen_helper_, glue(NAME, _be_comp))(xt, vrb); \ - break; \ - case 0b00010: /* Little-Endian expansion */ \ - glue(gen_helper_, glue(NAME, _le_exp))(xt, vrb); \ - break; \ - case 0b00011: /* Little-Endian compression */ \ - glue(gen_helper_, glue(NAME, _le_comp))(xt, vrb); \ - break; \ - } \ - \ - tcg_temp_free_ptr(xt); \ - tcg_temp_free_ptr(vrb); \ - \ - return true; \ +typedef void (*xxgenpcv_genfn)(TCGv_ptr, TCGv_ptr); + +static bool do_xxgenpcv(DisasContext *ctx, arg_X_imm5 *a, + const xxgenpcv_genfn fn[4]) +{ + TCGv_ptr xt, vrb; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + + if (a->imm & ~0x3) { + gen_invalid(ctx); + return true; + } + + xt = gen_vsr_ptr(a->xt); + vrb = gen_avr_ptr(a->vrb); + + fn[a->imm](xt, vrb); + + tcg_temp_free_ptr(xt); + tcg_temp_free_ptr(vrb); + + return true; } +#define XXGENPCV(NAME) \ + static bool trans_##NAME(DisasContext *ctx, arg_X_imm5 *a) \ + { \ + static const xxgenpcv_genfn fn[4] = { \ + gen_helper_##NAME##_be_exp, \ + gen_helper_##NAME##_be_comp, \ + gen_helper_##NAME##_le_exp, \ + gen_helper_##NAME##_le_comp, \ + }; \ + return do_xxgenpcv(ctx, a, fn); \ + } + XXGENPCV(XXGENPCVBM) XXGENPCV(XXGENPCVHM) XXGENPCV(XXGENPCVWM) From e1428e5b575ffb6e5066d1769b10bd4b47f6770b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Colombo?= Date: Sat, 5 Mar 2022 07:16:48 +0100 Subject: [PATCH 12/13] target/ppc: Add missing helper_reset_fpstatus to VSX_MAX_MINC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: da499405aa ("target/ppc: Refactor VSX_MAX_MINC helper") Signed-off-by: Víctor Colombo Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220304175156.2012315-7-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/fpu_helper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 2cad05c9cf..7d34b88577 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2522,6 +2522,8 @@ void helper_##name(CPUPPCState *env, \ ppc_vsr_t t = { }; \ bool first; \ \ + helper_reset_fpstatus(env); \ + \ if (max) { \ first = tp##_le_quiet(xb->fld, xa->fld, &env->fp_status); \ } else { \ From a9eb50376ffb27a3f348b0bccf70a34b26be2a3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Colombo?= Date: Sat, 5 Mar 2022 07:16:48 +0100 Subject: [PATCH 13/13] target/ppc: Add missing helper_reset_fpstatus to helper_XVCVSPBF16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: 3909ff1fac ("target/ppc: Implement xvcvbf16spn and xvcvspbf16 instructions") Signed-off-by: Víctor Colombo Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220304175156.2012315-8-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/fpu_helper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 7d34b88577..bd12db960a 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2774,6 +2774,8 @@ void helper_XVCVSPBF16(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) ppc_vsr_t t = { }; int i, status; + helper_reset_fpstatus(env); + for (i = 0; i < 4; i++) { t.VsrH(2 * i + 1) = float32_to_bfloat16(xb->VsrW(i), &env->fp_status); }