From f73eb9484bf07812db21de985cb9c1e4ad3c82a9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 7 May 2022 07:48:26 +0200 Subject: [PATCH 01/34] pseries: allow setting stdout-path even on machines with a VGA -machine graphics=off is the usual way to tell the firmware or the OS that the user wants a serial console. The pseries machine however does not support this, and never adds the stdout-path node to the device tree if a VGA device is provided. This is in addition to the other magic behavior of VGA devices, which is to add a keyboard and mouse to the default USB bus. Split spapr->has_graphics in two variables so that the two behaviors can be separated: the USB devices remains the same, but the stdout-path is added even with "-device VGA -machine graphics=off". Reviewed-by: Daniel Henrique Barboza Signed-off-by: Paolo Bonzini Message-Id: <20220507054826.124936-1-pbonzini@redhat.com> Signed-off-by: Daniel Henrique Barboza --- hw/ppc/spapr.c | 12 ++++++++---- include/hw/ppc/spapr.h | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 6de800524a..d112b85b4f 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1066,7 +1066,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device)); } - if (!spapr->has_graphics && stdout_path) { + if (spapr->want_stdout_path && stdout_path) { /* * "linux,stdout-path" and "stdout" properties are * deprecated by linux kernel. New platforms should only @@ -2712,6 +2712,7 @@ static void spapr_machine_init(MachineState *machine) const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; PCIHostState *phb; + bool has_vga; int i; MemoryRegion *sysmem = get_system_memory(); long load_limit, fw_size; @@ -2950,9 +2951,12 @@ static void spapr_machine_init(MachineState *machine) } /* Graphics */ - if (spapr_vga_init(phb->bus, &error_fatal)) { - spapr->has_graphics = true; + has_vga = spapr_vga_init(phb->bus, &error_fatal); + if (has_vga) { + spapr->want_stdout_path = !machine->enable_graphics; machine->usb |= defaults_enabled() && !machine->usb_disabled; + } else { + spapr->want_stdout_path = true; } if (machine->usb) { @@ -2962,7 +2966,7 @@ static void spapr_machine_init(MachineState *machine) pci_create_simple(phb->bus, -1, "nec-usb-xhci"); } - if (spapr->has_graphics) { + if (has_vga) { USBBus *usb_bus = usb_bus_find(-1); usb_create_simple(usb_bus, "usb-kbd"); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 14b01c3f59..072dda2c72 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -194,7 +194,7 @@ struct SpaprMachineState { Vof *vof; uint64_t rtc_offset; /* Now used only during incoming migration */ struct PPCTimebase tb; - bool has_graphics; + bool want_stdout_path; uint32_t vsmt; /* Virtual SMT mode (KVM's "core stride") */ /* Nested HV support (TCG only) */ From 1e665723e92cd3dae4d8943bf7bd1799a3b4a82a Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Thu, 5 May 2022 18:18:01 +0200 Subject: [PATCH 02/34] hw/ppc/e500: Remove unused BINARY_DEVICE_TREE_FILE Commit 28290f37e20cda27574f15be9e9499493e3d0fe8 'PPC: E500: Generate device tree on reset' improved device tree generation and made BINARY_DEVICE_TREE_FILE obsolete. Signed-off-by: Bernhard Beschow Reviewed-by: Daniel Henrique Barboza Message-Id: <20220505161805.11116-8-shentey@gmail.com> Signed-off-by: Daniel Henrique Barboza --- hw/ppc/e500.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 2bc3dce1fb..7f7f5b3452 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -47,7 +47,6 @@ #include "hw/irq.h" #define EPAPR_MAGIC (0x45504150) -#define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb" #define DTC_LOAD_PAD 0x1800000 #define DTC_PAD_MASK 0xFFFFF #define DTB_MAX_SIZE (8 * MiB) From 5bb55f3e3b00679519a83ffe688eae0e68e305a7 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 May 2022 16:55:36 +1000 Subject: [PATCH 03/34] spapr: Use address from elf parser for kernel address tl;dr: This allows Big Endian zImage booting via -kernel + x-vof=on. QEMU loads the kernel at 0x400000 by default which works most of the time as Linux kernels are relocatable, 64bit and compiled with "-pie" (position independent code). This works for a little endian zImage too. However a big endian zImage is compiled without -pie, is 32bit, linked to 0x4000000 so current QEMU ends up loading it at 0x4400000 but keeps spapr->kernel_addr unchanged so booting fails. This uses the kernel address returned from load_elf(). If the default kernel_addr is used, there is no change in behavior (as translate_kernel_address() takes care of this), which is: LE/BE vmlinux and LE zImage boot, BE zImage does not. If the VM created with "-machine kernel-addr=0,x-vof=on", then QEMU prints a warning and BE zImage boots. Note #1: SLOF (x-vof=off) still cannot boot a big endian zImage as SLOF enables MSR_SF for everything loaded by QEMU and this leads to early crash of 32bit zImage. Note #2: BE/LE vmlinux images set MSR_SF in early boot so these just work; a LE zImage restores MSR_SF after every CI call and we are lucky enough not to crash before the first CI call. Signed-off-by: Alexey Kardashevskiy Tested-by: Joel Stanley Reviewed-by: Fabiano Rosas Message-Id: <20220504065536.3534488-1-aik@ozlabs.ru> [danielhb: use PRIx64 instead of lx in warn_report] Signed-off-by: Daniel Henrique Barboza --- hw/ppc/spapr.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d112b85b4f..fd4942e881 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2975,14 +2975,16 @@ static void spapr_machine_init(MachineState *machine) } if (kernel_filename) { + uint64_t loaded_addr = 0; + spapr->kernel_size = load_elf(kernel_filename, NULL, translate_kernel_address, spapr, - NULL, NULL, NULL, NULL, 1, + NULL, &loaded_addr, NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0); if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) { spapr->kernel_size = load_elf(kernel_filename, NULL, translate_kernel_address, spapr, - NULL, NULL, NULL, NULL, 0, + NULL, &loaded_addr, NULL, NULL, 0, PPC_ELF_MACHINE, 0, 0); spapr->kernel_le = spapr->kernel_size > 0; } @@ -2992,6 +2994,13 @@ static void spapr_machine_init(MachineState *machine) exit(1); } + if (spapr->kernel_addr != loaded_addr) { + warn_report("spapr: kernel_addr changed from 0x%"PRIx64 + " to 0x%"PRIx64, + spapr->kernel_addr, loaded_addr); + spapr->kernel_addr = loaded_addr; + } + /* load initrd */ if (initrd_filename) { /* Try to locate the initrd in the gap between the kernel From 162eec18c0aed28eda472bacf207b7b222894169 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 6 May 2022 15:51:24 +1000 Subject: [PATCH 04/34] spapr/docs: Add a few words about x-vof The alternative small firmware needs a few words of what it can and absolutely cannot do; this adds those words. Reviewed-by: Daniel Henrique Barboza Signed-off-by: Alexey Kardashevskiy Message-Id: <20220506055124.3822112-1-aik@ozlabs.ru> [danielhb: added linebreaks before and after table] Signed-off-by: Daniel Henrique Barboza --- docs/system/ppc/pseries.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docs/system/ppc/pseries.rst b/docs/system/ppc/pseries.rst index d9b65ad4e8..3e1bbe6726 100644 --- a/docs/system/ppc/pseries.rst +++ b/docs/system/ppc/pseries.rst @@ -32,14 +32,43 @@ Missing devices Firmware ======== +The pSeries platform in QEMU comes with 2 firmwares: + `SLOF `_ (Slimline Open Firmware) is an implementation of the `IEEE 1275-1994, Standard for Boot (Initialization Configuration) Firmware: Core Requirements and Practices `_. +SLOF performs bus scanning, PCI resource allocation, provides the client +interface to boot from block devices and network. + QEMU includes a prebuilt image of SLOF which is updated when a more recent version is required. +VOF (Virtual Open Firmware) is a minimalistic firmware to work with +``-machine pseries,x-vof=on``. When enabled, the firmware acts as a slim +shim and QEMU implements parts of the IEEE 1275 Open Firmware interface. + +VOF does not have device drivers, does not do PCI resource allocation and +relies on ``-kernel`` used with Linux kernels recent enough (v5.4+) +to PCI resource assignment. It is ideal to use with petitboot. + +Booting via ``-kernel`` supports the following: + ++-------------------+-------------------+------------------+ +| kernel | pseries,x-vof=off | pseries,x-vof=on | ++===================+===================+==================+ +| vmlinux BE | ✓ | ✓ | ++-------------------+-------------------+------------------+ +| vmlinux LE | ✓ | ✓ | ++-------------------+-------------------+------------------+ +| zImage.pseries BE | x | ✓¹ | ++-------------------+-------------------+------------------+ +| zImage.pseries LE | ✓ | ✓ | ++-------------------+-------------------+------------------+ + +¹ must set kernel-addr=0 + Build directions ================ From c9f8004b6adf7020ba742d16b132e84ff6e57863 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Tue, 10 May 2022 20:54:39 -0300 Subject: [PATCH 05/34] mos6522: fix linking error when CONFIG_MOS6522 is not set When CONFIG_MOS6522 is not set, building ppc64-softmmu target fails: /usr/bin/ld: libqemu-ppc64-softmmu.fa.p/monitor_misc.c.o:(.data+0x1158): undefined reference to `hmp_info_via' Make devices configuration available in hmp-commands*.hx and check for CONFIG_MOS6522. Fixes: 409e9f7131e5 (mos6522: add "info via" HMP command for debugging) Signed-off-by: Murilo Opsfelder Araujo Cc: Mark Cave-Ayland Cc: Fabiano Rosas Cc: Thomas Huth Reviewed-by: Thomas Huth Message-Id: <20220510235439.54775-1-muriloo@linux.ibm.com> Signed-off-by: Daniel Henrique Barboza --- hmp-commands-info.hx | 2 +- monitor/misc.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index adfa085a9b..834bed089e 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -880,7 +880,7 @@ SRST Show intel SGX information. ERST -#if defined(TARGET_M68K) || defined(TARGET_PPC) +#if defined(CONFIG_MOS6522) { .name = "via", .args_type = "", diff --git a/monitor/misc.c b/monitor/misc.c index 6c5bb82d3b..3d2312ba8d 100644 --- a/monitor/misc.c +++ b/monitor/misc.c @@ -84,6 +84,9 @@ #include "hw/s390x/storage-attributes.h" #endif +/* Make devices configuration available for use in hmp-commands*.hx templates */ +#include CONFIG_DEVICES + /* file descriptors passed via SCM_RIGHTS */ typedef struct mon_fd_t mon_fd_t; struct mon_fd_t { From 4ddc104689b186c4e4ed30be59a54463501761cf Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Tue, 3 May 2022 13:39:04 -0300 Subject: [PATCH 06/34] target/ppc: Fix tlbie Commit 74c4912f097bab98 changed check_tlb_flush() to use tlb_flush_all_cpus_synced() instead of calling tlb_flush() on each CPU. However, as side effect of this, a CPU executing a ptesync after a tlbie will have its TLB flushed only after exiting its current Translation Block (TB). This causes memory accesses to invalid pages to succeed, if they happen to be on the same TB as the ptesync. To fix this, use tlb_flush_all_cpus() instead, that immediately flushes the TLB of the CPU executing the ptesync instruction. Fixes: 74c4912f097bab98 ("target/ppc: Fix synchronization of mttcg with broadcast TLB flushes") Signed-off-by: Leandro Lupori Reviewed-by: Fabiano Rosas Message-Id: <20220503163904.22575-1-leandro.lupori@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c index 6159a15b7b..12235ea2e9 100644 --- a/target/ppc/helper_regs.c +++ b/target/ppc/helper_regs.c @@ -292,7 +292,7 @@ void check_tlb_flush(CPUPPCState *env, bool global) if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) { env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH; env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH; - tlb_flush_all_cpus_synced(cs); + tlb_flush_all_cpus(cs); return; } From 3278677f6a5e048b17ea7623c107786448fdf6f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Colombo?= Date: Tue, 17 May 2022 13:15:20 -0300 Subject: [PATCH 07/34] target/ppc: Fix FPSCR.FI bit being cleared when it shouldn't MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Power ISA, the FI bit in FPSCR is non-sticky. This means that if an instruction is said to modify the FI bit, then it should be set or cleared depending on the result of the instruction. Otherwise, it should be kept as was before. However, the following inconsistency was found when comparing results from the hardware (tested on both a Power 9 processor and in Power 10 Mambo): (FI bit is set before the execution of the instruction) Hardware: xscmpeqdp(0xff..ff, 0xff..ff) = FI: SET -> SET QEMU: xscmpeqdp(0xff..ff, 0xff..ff) = FI: SET -> CLEARED As the FI bit is non-sticky, and xscmpeqdp does not list it as a field that is changed by the instruction, it should not be changed after its execution. This is happening to multiple instructions in the vsx implementations. If the ISA does not list the FI bit as altered for a particular instruction, then it should be kept as it was before the instruction. QEMU is not following this behavior. Affected instructions include: - xv* (all vsx-vector instructions); - xscmp*, xsmax*, xsmin*; - xstdivdp and similars; (to identify the affected instructions, just search in the ISA for the instructions that does not list FI in "Special Registers Altered") Most instructions use the function do_float_check_status() to commit changes in the inexact flag. So the fix is to add a parameter to it that will control if the bit FI should be changed or not. All users of do_float_check_status() are then modified to provide this argument, controlling if that specific instruction changes bit FI or not. Some macro helpers are responsible for both instructions that change and instructions that aren't suposed to change FI. This seems to always overlap with the sfprf flag. So, reuse this flag for this purpose when applicable. Signed-off-by: Víctor Colombo Reviewed-by: Richard Henderson Message-Id: <20220517161522.36132-2-victor.colombo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/cpu.h | 2 + target/ppc/fpu_helper.c | 122 +++++++++++++++++++++------------------- 2 files changed, 66 insertions(+), 58 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 48596cfb25..901ded79e9 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -735,6 +735,8 @@ enum { (1 << FPSCR_VXSOFT) | (1 << FPSCR_VXSQRT) | \ (1 << FPSCR_VXCVI)) +FIELD(FPSCR, FI, FPSCR_FI, 1) + #define FP_DRN2 (1ull << FPSCR_DRN2) #define FP_DRN1 (1ull << FPSCR_DRN1) #define FP_DRN0 (1ull << FPSCR_DRN0) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index f6c8318a71..f1ea4aa10e 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -370,7 +370,6 @@ static inline void float_inexact_excp(CPUPPCState *env) { CPUState *cs = env_cpu(env); - env->fpscr |= FP_FI; env->fpscr |= FP_XX; /* Update the floating-point exception summary */ env->fpscr |= FP_FX; @@ -462,7 +461,8 @@ void helper_fpscr_check_status(CPUPPCState *env) } } -static void do_float_check_status(CPUPPCState *env, uintptr_t raddr) +static void do_float_check_status(CPUPPCState *env, bool change_fi, + uintptr_t raddr) { CPUState *cs = env_cpu(env); int status = get_float_exception_flags(&env->fp_status); @@ -474,8 +474,10 @@ static void do_float_check_status(CPUPPCState *env, uintptr_t raddr) } if (status & float_flag_inexact) { float_inexact_excp(env); - } else { - env->fpscr &= ~FP_FI; /* clear the FPSCR[FI] bit */ + } + if (change_fi) { + env->fpscr = FIELD_DP64(env->fpscr, FPSCR, FI, + !!(status & float_flag_inexact)); } if (cs->exception_index == POWERPC_EXCP_PROGRAM && @@ -490,7 +492,7 @@ static void do_float_check_status(CPUPPCState *env, uintptr_t raddr) void helper_float_check_status(CPUPPCState *env) { - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); } void helper_reset_fpstatus(CPUPPCState *env) @@ -684,7 +686,7 @@ uint64_t helper_##op(CPUPPCState *env, uint64_t arg) \ } else { \ farg.d = cvtr(arg, &env->fp_status); \ } \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, true, GETPC()); \ return farg.ll; \ } @@ -710,7 +712,7 @@ static uint64_t do_fri(CPUPPCState *env, uint64_t arg, /* fri* does not set FPSCR[XX] */ set_float_exception_flags(flags & ~float_flag_inexact, &env->fp_status); - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); return arg; } @@ -1721,7 +1723,7 @@ void helper_##name(CPUPPCState *env, ppc_vsr_t *xt, \ } \ } \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_ADD_SUB(xsadddp, add, 1, float64, VsrD(0), 1, 0) @@ -1757,7 +1759,7 @@ void helper_xsaddqp(CPUPPCState *env, uint32_t opcode, helper_compute_fprf_float128(env, t.f128); *xt = t; - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); } /* @@ -1798,7 +1800,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_MUL(xsmuldp, 1, float64, VsrD(0), 1, 0) @@ -1828,7 +1830,7 @@ void helper_xsmulqp(CPUPPCState *env, uint32_t opcode, helper_compute_fprf_float128(env, t.f128); *xt = t; - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); } /* @@ -1872,7 +1874,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_DIV(xsdivdp, 1, float64, VsrD(0), 1, 0) @@ -1905,7 +1907,7 @@ void helper_xsdivqp(CPUPPCState *env, uint32_t opcode, helper_compute_fprf_float128(env, t.f128); *xt = t; - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); } /* @@ -1940,7 +1942,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_RE(xsredp, 1, float64, VsrD(0), 1, 0) @@ -1985,7 +1987,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_SQRT(xssqrtdp, 1, float64, VsrD(0), 1, 0) @@ -2029,7 +2031,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_RSQRTE(xsrsqrtedp, 1, float64, VsrD(0), 1, 0) @@ -2182,7 +2184,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ } \ } \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1) @@ -2234,7 +2236,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *s1, ppc_vsr_t *s2,\ \ helper_compute_fprf_float128(env, t.f128); \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, true, GETPC()); \ } VSX_MADDQ(XSMADDQP, MADD_FLGS, 0) @@ -2283,7 +2285,7 @@ VSX_MADDQ(XSNMSUBQPO, NMSUB_FLGS, 0) \ memset(xt, 0, sizeof(*xt)); \ memset(&xt->fld, -r, sizeof(xt->fld)); \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, false, GETPC()); \ } VSX_SCALAR_CMP(XSCMPEQDP, float64, eq, VsrD(0), 0) @@ -2319,7 +2321,7 @@ void helper_xscmpexpdp(CPUPPCState *env, uint32_t opcode, env->fpscr |= cc << FPSCR_FPCC; env->crf[BF(opcode)] = cc; - do_float_check_status(env, GETPC()); + do_float_check_status(env, false, GETPC()); } void helper_xscmpexpqp(CPUPPCState *env, uint32_t opcode, @@ -2348,7 +2350,7 @@ void helper_xscmpexpqp(CPUPPCState *env, uint32_t opcode, env->fpscr |= cc << FPSCR_FPCC; env->crf[BF(opcode)] = cc; - do_float_check_status(env, GETPC()); + do_float_check_status(env, false, GETPC()); } static inline void do_scalar_cmp(CPUPPCState *env, ppc_vsr_t *xa, ppc_vsr_t *xb, @@ -2401,7 +2403,7 @@ static inline void do_scalar_cmp(CPUPPCState *env, ppc_vsr_t *xa, ppc_vsr_t *xb, float_invalid_op_vxvc(env, 0, GETPC()); } - do_float_check_status(env, GETPC()); + do_float_check_status(env, false, GETPC()); } void helper_xscmpodp(CPUPPCState *env, uint32_t opcode, ppc_vsr_t *xa, @@ -2466,7 +2468,7 @@ static inline void do_scalar_cmpq(CPUPPCState *env, ppc_vsr_t *xa, float_invalid_op_vxvc(env, 0, GETPC()); } - do_float_check_status(env, GETPC()); + do_float_check_status(env, false, GETPC()); } void helper_xscmpoqp(CPUPPCState *env, uint32_t opcode, ppc_vsr_t *xa, @@ -2505,7 +2507,7 @@ void helper_##name(CPUPPCState *env, ppc_vsr_t *xt, \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, false, GETPC()); \ } VSX_MAX_MIN(xsmaxdp, maxnum, 1, float64, VsrD(0)) @@ -2688,7 +2690,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_CVT_FP_TO_FP(xscvspdp, 1, float32, float64, VsrW(0), VsrD(0), 1) @@ -2714,7 +2716,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_CVT_FP_TO_FP2(xvcvdpsp, 2, float64, float32, 0) @@ -2750,7 +2752,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode, \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, true, GETPC()); \ } VSX_CVT_FP_TO_FP_VECTOR(xscvdpqp, 1, float64, float128, VsrD(0), f128, 1) @@ -2785,7 +2787,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_CVT_FP_TO_FP_HP(xscvdphp, 1, float64, float16, VsrD(0), VsrH(3), 1) @@ -2810,7 +2812,7 @@ void helper_XVCVSPBF16(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) } *xt = t; - do_float_check_status(env, GETPC()); + do_float_check_status(env, false, GETPC()); } void helper_XSCVQPDP(CPUPPCState *env, uint32_t ro, ppc_vsr_t *xt, @@ -2833,7 +2835,7 @@ void helper_XSCVQPDP(CPUPPCState *env, uint32_t ro, ppc_vsr_t *xt, helper_compute_fprf_float64(env, t.VsrD(0)); *xt = t; - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); } uint64_t helper_xscvdpspn(CPUPPCState *env, uint64_t xb) @@ -2889,9 +2891,10 @@ uint64_t helper_xscvspdpn(CPUPPCState *env, uint64_t xb) * ttp - target type (int32, uint32, int64 or uint64) * sfld - source vsr_t field * tfld - target vsr_t field + * sfi - set FI * rnan - resulting NaN */ -#define VSX_CVT_FP_TO_INT(op, nels, stp, ttp, sfld, tfld, rnan) \ +#define VSX_CVT_FP_TO_INT(op, nels, stp, ttp, sfld, tfld, sfi, rnan) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ int all_flags = env->fp_status.float_exception_flags, flags; \ @@ -2910,20 +2913,23 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ \ *xt = t; \ env->fp_status.float_exception_flags = all_flags; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfi, GETPC()); \ } -VSX_CVT_FP_TO_INT(xscvdpsxds, 1, float64, int64, VsrD(0), VsrD(0), \ +VSX_CVT_FP_TO_INT(xscvdpsxds, 1, float64, int64, VsrD(0), VsrD(0), true, \ 0x8000000000000000ULL) -VSX_CVT_FP_TO_INT(xscvdpuxds, 1, float64, uint64, VsrD(0), VsrD(0), 0ULL) -VSX_CVT_FP_TO_INT(xvcvdpsxds, 2, float64, int64, VsrD(i), VsrD(i), \ +VSX_CVT_FP_TO_INT(xscvdpuxds, 1, float64, uint64, VsrD(0), VsrD(0), true, 0ULL) +VSX_CVT_FP_TO_INT(xvcvdpsxds, 2, float64, int64, VsrD(i), VsrD(i), false, \ 0x8000000000000000ULL) -VSX_CVT_FP_TO_INT(xvcvdpuxds, 2, float64, uint64, VsrD(i), VsrD(i), 0ULL) -VSX_CVT_FP_TO_INT(xvcvspsxds, 2, float32, int64, VsrW(2 * i), VsrD(i), \ +VSX_CVT_FP_TO_INT(xvcvdpuxds, 2, float64, uint64, VsrD(i), VsrD(i), false, \ + 0ULL) +VSX_CVT_FP_TO_INT(xvcvspsxds, 2, float32, int64, VsrW(2 * i), VsrD(i), false, \ 0x8000000000000000ULL) -VSX_CVT_FP_TO_INT(xvcvspsxws, 4, float32, int32, VsrW(i), VsrW(i), 0x80000000U) -VSX_CVT_FP_TO_INT(xvcvspuxds, 2, float32, uint64, VsrW(2 * i), VsrD(i), 0ULL) -VSX_CVT_FP_TO_INT(xvcvspuxws, 4, float32, uint32, VsrW(i), VsrW(i), 0U) +VSX_CVT_FP_TO_INT(xvcvspsxws, 4, float32, int32, VsrW(i), VsrW(i), false, \ + 0x80000000ULL) +VSX_CVT_FP_TO_INT(xvcvspuxds, 2, float32, uint64, VsrW(2 * i), VsrD(i), \ + false, 0ULL) +VSX_CVT_FP_TO_INT(xvcvspuxws, 4, float32, uint32, VsrW(i), VsrW(i), false, 0U) #define VSX_CVT_FP_TO_INT128(op, tp, rnan) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ @@ -2940,7 +2946,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, true, GETPC()); \ } VSX_CVT_FP_TO_INT128(XSCVQPUQZ, uint128, 0) @@ -2955,7 +2961,7 @@ VSX_CVT_FP_TO_INT128(XSCVQPSQZ, int128, 0x8000000000000000ULL); * words 0 and 1 (and words 2 and 3) of the result register, as * is required by this version of the architecture. */ -#define VSX_CVT_FP_TO_INT2(op, nels, stp, ttp, rnan) \ +#define VSX_CVT_FP_TO_INT2(op, nels, stp, ttp, sfi, rnan) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ int all_flags = env->fp_status.float_exception_flags, flags; \ @@ -2977,13 +2983,13 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ \ *xt = t; \ env->fp_status.float_exception_flags = all_flags; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfi, GETPC()); \ } -VSX_CVT_FP_TO_INT2(xscvdpsxws, 1, float64, int32, 0x80000000U) -VSX_CVT_FP_TO_INT2(xscvdpuxws, 1, float64, uint32, 0U) -VSX_CVT_FP_TO_INT2(xvcvdpsxws, 2, float64, int32, 0x80000000U) -VSX_CVT_FP_TO_INT2(xvcvdpuxws, 2, float64, uint32, 0U) +VSX_CVT_FP_TO_INT2(xscvdpsxws, 1, float64, int32, true, 0x80000000U) +VSX_CVT_FP_TO_INT2(xscvdpuxws, 1, float64, uint32, true, 0U) +VSX_CVT_FP_TO_INT2(xvcvdpsxws, 2, float64, int32, false, 0x80000000U) +VSX_CVT_FP_TO_INT2(xvcvdpuxws, 2, float64, uint32, false, 0U) /* * VSX_CVT_FP_TO_INT_VECTOR - VSX floating point to integer conversion @@ -3008,7 +3014,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode, \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, true, GETPC()); \ } VSX_CVT_FP_TO_INT_VECTOR(xscvqpsdz, float128, int64, f128, VsrD(0), \ @@ -3047,7 +3053,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_CVT_INT_TO_FP(xscvsxddp, 1, int64, float64, VsrD(0), VsrD(0), 1, 0) @@ -3073,7 +3079,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, false, GETPC()); \ } VSX_CVT_INT_TO_FP2(xvcvsxdsp, int64, float32) @@ -3085,7 +3091,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)\ helper_reset_fpstatus(env); \ xt->f128 = tp##_to_float128(xb->s128, &env->fp_status); \ helper_compute_fprf_float128(env, xt->f128); \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, true, GETPC()); \ } VSX_CVT_INT128_TO_FP(XSCVUQQP, uint128); @@ -3109,7 +3115,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode, \ helper_compute_fprf_##ttp(env, t.tfld); \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, true, GETPC()); \ } VSX_CVT_INT_TO_FP_VECTOR(xscvsdqp, int64, float128, VsrD(0), f128) @@ -3167,7 +3173,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, GETPC()); \ + do_float_check_status(env, sfprf, GETPC()); \ } VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_ties_away, 1) @@ -3195,7 +3201,7 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb) uint64_t xt = do_frsp(env, xb, GETPC()); helper_compute_fprf_float64(env, xt); - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); return xt; } @@ -3355,7 +3361,7 @@ void helper_xsrqpi(CPUPPCState *env, uint32_t opcode, } helper_compute_fprf_float128(env, t.f128); - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); *xt = t; } @@ -3408,7 +3414,7 @@ void helper_xsrqpxp(CPUPPCState *env, uint32_t opcode, helper_compute_fprf_float128(env, t.f128); *xt = t; - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); } void helper_xssqrtqp(CPUPPCState *env, uint32_t opcode, @@ -3434,7 +3440,7 @@ void helper_xssqrtqp(CPUPPCState *env, uint32_t opcode, helper_compute_fprf_float128(env, t.f128); *xt = t; - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); } void helper_xssubqp(CPUPPCState *env, uint32_t opcode, @@ -3460,5 +3466,5 @@ void helper_xssubqp(CPUPPCState *env, uint32_t opcode, helper_compute_fprf_float128(env, t.f128); *xt = t; - do_float_check_status(env, GETPC()); + do_float_check_status(env, true, GETPC()); } From c582a1dbc8e44f5e976ce9c2ac4ce0bc38a33cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Colombo?= Date: Tue, 17 May 2022 13:15:21 -0300 Subject: [PATCH 08/34] target/ppc: Fix FPSCR.FI changing in float_overflow_excp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes another not-so-clear situation in Power ISA regarding the inexact bits in FPSCR. The ISA states that: """ When Overflow Exception is disabled (OE=0) and an Overflow Exception occurs, the following actions are taken: ... 2. Inexact Exception is set XX <- 1 ... FI is set to 1 ... """ However, when tested on a Power 9 hardware, some instructions that trigger an OX don't set the FI bit: xvcvdpsp(0x4050533fcdb7b95ff8d561c40bf90996) = FI: CLEARED -> CLEARED xvnmsubmsp(0xf3c0c1fc8f3230, 0xbeaab9c5) = FI: CLEARED -> CLEARED (just a few examples. Other instructions are also affected) The root cause for this seems to be that only instructions that list the bit FI in the "Special Registers Altered" should modify it. QEMU is, today, not working like the hardware: xvcvdpsp(0x4050533fcdb7b95ff8d561c40bf90996) = FI: CLEARED -> SET xvnmsubmsp(0xf3c0c1fc8f3230, 0xbeaab9c5) = FI: CLEARED -> SET (all tests assume FI is cleared beforehand) Fix this by making float_overflow_excp() return float_flag_inexact if it should update the inexact flags. Signed-off-by: Víctor Colombo Reviewed-by: Richard Henderson Reviewed-by: Rashmica Gupta Message-Id: <20220517161522.36132-3-victor.colombo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/fpu_helper.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index f1ea4aa10e..88f9e756a5 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -329,24 +329,25 @@ static inline void float_zero_divide_excp(CPUPPCState *env, uintptr_t raddr) } } -static inline void float_overflow_excp(CPUPPCState *env) +static inline int float_overflow_excp(CPUPPCState *env) { CPUState *cs = env_cpu(env); env->fpscr |= FP_OX; /* Update the floating-point exception summary */ env->fpscr |= FP_FX; - if (env->fpscr & FP_OE) { + + bool overflow_enabled = !!(env->fpscr & FP_OE); + if (overflow_enabled) { /* XXX: should adjust the result */ /* Update the floating-point enabled exception summary */ env->fpscr |= FP_FEX; /* We must update the target FPR before raising the exception */ cs->exception_index = POWERPC_EXCP_PROGRAM; env->error_code = POWERPC_EXCP_FP | POWERPC_EXCP_FP_OX; - } else { - env->fpscr |= FP_XX; - env->fpscr |= FP_FI; } + + return overflow_enabled ? 0 : float_flag_inexact; } static inline void float_underflow_excp(CPUPPCState *env) @@ -468,7 +469,7 @@ static void do_float_check_status(CPUPPCState *env, bool change_fi, int status = get_float_exception_flags(&env->fp_status); if (status & float_flag_overflow) { - float_overflow_excp(env); + status |= float_overflow_excp(env); } else if (status & float_flag_underflow) { float_underflow_excp(env); } From dd657a35b429e9affa373a0e6162dd930854ba78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Colombo?= Date: Tue, 17 May 2022 13:15:22 -0300 Subject: [PATCH 09/34] target/ppc: Rename sfprf to sfifprf where it's also used as set fi flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bit FI fix used the sfprf flag as a flag for the set_fi parameter in do_float_check_status where applicable. Now, this patch rename this flag to sfifprf to state this dual usage. Signed-off-by: Víctor Colombo Reviewed-by: Richard Henderson Reviewed-by: Rashmica Gupta Message-Id: <20220517161522.36132-4-victor.colombo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/fpu_helper.c | 112 ++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 88f9e756a5..8592727792 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -1693,9 +1693,9 @@ uint32_t helper_efdcmpeq(CPUPPCState *env, uint64_t op1, uint64_t op2) * nels - number of elements (1, 2 or 4) * tp - type (float32 or float64) * fld - vsr_t field (VsrD(*) or VsrW(*)) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_ADD_SUB(name, op, nels, tp, fld, sfprf, r2sp) \ +#define VSX_ADD_SUB(name, op, nels, tp, fld, sfifprf, r2sp) \ void helper_##name(CPUPPCState *env, ppc_vsr_t *xt, \ ppc_vsr_t *xa, ppc_vsr_t *xb) \ { \ @@ -1712,19 +1712,19 @@ void helper_##name(CPUPPCState *env, ppc_vsr_t *xt, \ \ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ float_invalid_op_addsub(env, tstat.float_exception_flags, \ - sfprf, GETPC()); \ + sfifprf, GETPC()); \ } \ \ if (r2sp) { \ t.fld = do_frsp(env, t.fld, GETPC()); \ } \ \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ } \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_ADD_SUB(xsadddp, add, 1, float64, VsrD(0), 1, 0) @@ -1769,9 +1769,9 @@ void helper_xsaddqp(CPUPPCState *env, uint32_t opcode, * nels - number of elements (1, 2 or 4) * tp - type (float32 or float64) * fld - vsr_t field (VsrD(*) or VsrW(*)) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_MUL(op, nels, tp, fld, sfprf, r2sp) \ +#define VSX_MUL(op, nels, tp, fld, sfifprf, r2sp) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ ppc_vsr_t *xa, ppc_vsr_t *xb) \ { \ @@ -1788,20 +1788,20 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ \ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ float_invalid_op_mul(env, tstat.float_exception_flags, \ - sfprf, GETPC()); \ + sfifprf, GETPC()); \ } \ \ if (r2sp) { \ t.fld = do_frsp(env, t.fld, GETPC()); \ } \ \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_MUL(xsmuldp, 1, float64, VsrD(0), 1, 0) @@ -1840,9 +1840,9 @@ void helper_xsmulqp(CPUPPCState *env, uint32_t opcode, * nels - number of elements (1, 2 or 4) * tp - type (float32 or float64) * fld - vsr_t field (VsrD(*) or VsrW(*)) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_DIV(op, nels, tp, fld, sfprf, r2sp) \ +#define VSX_DIV(op, nels, tp, fld, sfifprf, r2sp) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ ppc_vsr_t *xa, ppc_vsr_t *xb) \ { \ @@ -1859,7 +1859,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ \ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ float_invalid_op_div(env, tstat.float_exception_flags, \ - sfprf, GETPC()); \ + sfifprf, GETPC()); \ } \ if (unlikely(tstat.float_exception_flags & float_flag_divbyzero)) { \ float_zero_divide_excp(env, GETPC()); \ @@ -1869,13 +1869,13 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ t.fld = do_frsp(env, t.fld, GETPC()); \ } \ \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_DIV(xsdivdp, 1, float64, VsrD(0), 1, 0) @@ -1917,9 +1917,9 @@ void helper_xsdivqp(CPUPPCState *env, uint32_t opcode, * nels - number of elements (1, 2 or 4) * tp - type (float32 or float64) * fld - vsr_t field (VsrD(*) or VsrW(*)) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_RE(op, nels, tp, fld, sfprf, r2sp) \ +#define VSX_RE(op, nels, tp, fld, sfifprf, r2sp) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ @@ -1937,13 +1937,13 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ t.fld = do_frsp(env, t.fld, GETPC()); \ } \ \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_RE(xsredp, 1, float64, VsrD(0), 1, 0) @@ -1957,9 +1957,9 @@ VSX_RE(xvresp, 4, float32, VsrW(i), 0, 0) * nels - number of elements (1, 2 or 4) * tp - type (float32 or float64) * fld - vsr_t field (VsrD(*) or VsrW(*)) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_SQRT(op, nels, tp, fld, sfprf, r2sp) \ +#define VSX_SQRT(op, nels, tp, fld, sfifprf, r2sp) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ @@ -1975,20 +1975,20 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ \ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ float_invalid_op_sqrt(env, tstat.float_exception_flags, \ - sfprf, GETPC()); \ + sfifprf, GETPC()); \ } \ \ if (r2sp) { \ t.fld = do_frsp(env, t.fld, GETPC()); \ } \ \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_SQRT(xssqrtdp, 1, float64, VsrD(0), 1, 0) @@ -2002,9 +2002,9 @@ VSX_SQRT(xvsqrtsp, 4, float32, VsrW(i), 0, 0) * nels - number of elements (1, 2 or 4) * tp - type (float32 or float64) * fld - vsr_t field (VsrD(*) or VsrW(*)) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_RSQRTE(op, nels, tp, fld, sfprf, r2sp) \ +#define VSX_RSQRTE(op, nels, tp, fld, sfifprf, r2sp) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ @@ -2020,19 +2020,19 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ float_invalid_op_sqrt(env, tstat.float_exception_flags, \ - sfprf, GETPC()); \ + sfifprf, GETPC()); \ } \ if (r2sp) { \ t.fld = do_frsp(env, t.fld, GETPC()); \ } \ \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_RSQRTE(xsrsqrtedp, 1, float64, VsrD(0), 1, 0) @@ -2158,9 +2158,9 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, VsrW(i), -126, 23) * fld - vsr_t field (VsrD(*) or VsrW(*)) * maddflgs - flags for the float*muladd routine that control the * various forms (madd, msub, nmadd, nmsub) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf) \ +#define VSX_MADD(op, nels, tp, fld, maddflgs, sfifprf) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ ppc_vsr_t *s1, ppc_vsr_t *s2, ppc_vsr_t *s3) \ { \ @@ -2177,15 +2177,15 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ \ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ float_invalid_op_madd(env, tstat.float_exception_flags, \ - sfprf, GETPC()); \ + sfifprf, GETPC()); \ } \ \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ } \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1) @@ -2670,9 +2670,9 @@ VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0) * ttp - target type (float32 or float64) * sfld - source vsr_t field * tfld - target vsr_t field (f32 or f64) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_CVT_FP_TO_FP(op, nels, stp, ttp, sfld, tfld, sfprf) \ +#define VSX_CVT_FP_TO_FP(op, nels, stp, ttp, sfld, tfld, sfifprf) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ @@ -2685,19 +2685,19 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ float_invalid_op_vxsnan(env, GETPC()); \ t.tfld = ttp##_snan_to_qnan(t.tfld); \ } \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_##ttp(env, t.tfld); \ } \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_CVT_FP_TO_FP(xscvspdp, 1, float32, float64, VsrW(0), VsrD(0), 1) VSX_CVT_FP_TO_FP(xvcvspdp, 2, float32, float64, VsrW(2 * i), VsrD(i), 0) -#define VSX_CVT_FP_TO_FP2(op, nels, stp, ttp, sfprf) \ +#define VSX_CVT_FP_TO_FP2(op, nels, stp, ttp, sfifprf) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ @@ -2710,14 +2710,14 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ float_invalid_op_vxsnan(env, GETPC()); \ t.VsrW(2 * i) = ttp##_snan_to_qnan(t.VsrW(2 * i)); \ } \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_##ttp(env, t.VsrW(2 * i)); \ } \ t.VsrW(2 * i + 1) = t.VsrW(2 * i); \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_CVT_FP_TO_FP2(xvcvdpsp, 2, float64, float32, 0) @@ -2733,9 +2733,9 @@ VSX_CVT_FP_TO_FP2(xscvdpsp, 1, float64, float32, 1) * tfld - target vsr_t field (f32 or f64) * sfprf - set FPRF */ -#define VSX_CVT_FP_TO_FP_VECTOR(op, nels, stp, ttp, sfld, tfld, sfprf) \ -void helper_##op(CPUPPCState *env, uint32_t opcode, \ - ppc_vsr_t *xt, ppc_vsr_t *xb) \ +#define VSX_CVT_FP_TO_FP_VECTOR(op, nels, stp, ttp, sfld, tfld, sfprf) \ +void helper_##op(CPUPPCState *env, uint32_t opcode, \ + ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = *xt; \ int i; \ @@ -2767,9 +2767,9 @@ VSX_CVT_FP_TO_FP_VECTOR(xscvdpqp, 1, float64, float128, VsrD(0), f128, 1) * ttp - target type * sfld - source vsr_t field * tfld - target vsr_t field - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_CVT_FP_TO_FP_HP(op, nels, stp, ttp, sfld, tfld, sfprf) \ +#define VSX_CVT_FP_TO_FP_HP(op, nels, stp, ttp, sfld, tfld, sfifprf) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ @@ -2782,13 +2782,13 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ float_invalid_op_vxsnan(env, GETPC()); \ t.tfld = ttp##_snan_to_qnan(t.tfld); \ } \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_##ttp(env, t.tfld); \ } \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_CVT_FP_TO_FP_HP(xscvdphp, 1, float64, float16, VsrD(0), VsrH(3), 1) @@ -3035,9 +3035,9 @@ VSX_CVT_FP_TO_INT_VECTOR(xscvqpuwz, float128, uint32, f128, VsrD(0), 0x0ULL) * sfld - source vsr_t field * tfld - target vsr_t field * jdef - definition of the j index (i or 2*i) - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_CVT_INT_TO_FP(op, nels, stp, ttp, sfld, tfld, sfprf, r2sp) \ +#define VSX_CVT_INT_TO_FP(op, nels, stp, ttp, sfld, tfld, sfifprf, r2sp)\ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ @@ -3048,13 +3048,13 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ if (r2sp) { \ t.tfld = do_frsp(env, t.tfld, GETPC()); \ } \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.tfld); \ } \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_CVT_INT_TO_FP(xscvsxddp, 1, int64, float64, VsrD(0), VsrD(0), 1, 0) @@ -3136,9 +3136,9 @@ VSX_CVT_INT_TO_FP_VECTOR(xscvudqp, uint64, float128, VsrD(0), f128) * tp - type (float32 or float64) * fld - vsr_t field (VsrD(*) or VsrW(*)) * rmode - rounding mode - * sfprf - set FPRF + * sfifprf - set FI and FPRF */ -#define VSX_ROUND(op, nels, tp, fld, rmode, sfprf) \ +#define VSX_ROUND(op, nels, tp, fld, rmode, sfifprf) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = { }; \ @@ -3158,7 +3158,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } else { \ t.fld = tp##_round_to_int(xb->fld, &env->fp_status); \ } \ - if (sfprf) { \ + if (sfifprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ } \ @@ -3174,7 +3174,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) \ } \ \ *xt = t; \ - do_float_check_status(env, sfprf, GETPC()); \ + do_float_check_status(env, sfifprf, GETPC()); \ } VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_ties_away, 1) From d5aa9e79048bfc42616949391591fe7947574e5d Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Mon, 23 May 2022 17:18:59 +0200 Subject: [PATCH 10/34] pnv/xive2: Don't overwrite PC registers when writing TCTXT registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When writing a register from the TCTXT memory region (4th page within the IC BAR), we were overwriting the Presentation Controller (PC) register at the same offset. It looks like a silly cut and paste error. We were somehow lucky: the TCTXT registers being touched are TCTXT_ENx/_SET/_RESET to enable physical threads and the PC registers at the same offset are either not used by our model or the update was harmless. Found through code inspection. Signed-off-by: Frederic Barrat Reviewed-by: Cédric Le Goater Message-Id: <20220523151859.72283-1-fbarrat@linux.ibm.com> Signed-off-by: Daniel Henrique Barboza --- hw/intc/pnv_xive2.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 87303b4064..a39e070e82 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -1295,7 +1295,6 @@ static void pnv_xive2_ic_tctxt_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); - uint32_t reg = offset >> 3; switch (offset) { /* @@ -1322,8 +1321,6 @@ static void pnv_xive2_ic_tctxt_write(void *opaque, hwaddr offset, xive2_error(xive, "TCTXT: invalid write @%"HWADDR_PRIx, offset); return; } - - xive->pc_regs[reg] = val; } static const MemoryRegionOps pnv_xive2_ic_tctxt_ops = { From 8f6086044b4de2907ee9660690b709b488b3d55b Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:18 -0300 Subject: [PATCH 11/34] target/ppc: declare darn32/darn64 helpers with TCG_CALL_NO_RWG Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-2-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index aa6773c4a5..44eb6b7b7c 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -59,8 +59,8 @@ DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl) DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) -DEF_HELPER_0(darn32, tl) -DEF_HELPER_0(darn64, tl) +DEF_HELPER_FLAGS_0(darn32, TCG_CALL_NO_RWG, tl) +DEF_HELPER_FLAGS_0(darn64, TCG_CALL_NO_RWG, tl) #endif DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32) From 9aa898b8977abca0af6b2ed9ec0f7cfa20b9d7f5 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:19 -0300 Subject: [PATCH 12/34] target/ppc: use TCG_CALL_NO_RWG in vector helpers without env Helpers of vector instructions without cpu_env as an argument do not access globals. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-3-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 162 ++++++++++++++++++++++---------------------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 44eb6b7b7c..da740ad9af 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -133,15 +133,15 @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) #define dh_ctype_vsr ppc_vsr_t * #define dh_typecode_vsr dh_typecode_ptr -DEF_HELPER_3(vavgub, void, avr, avr, avr) -DEF_HELPER_3(vavguh, void, avr, avr, avr) -DEF_HELPER_3(vavguw, void, avr, avr, avr) -DEF_HELPER_3(vabsdub, void, avr, avr, avr) -DEF_HELPER_3(vabsduh, void, avr, avr, avr) -DEF_HELPER_3(vabsduw, void, avr, avr, avr) -DEF_HELPER_3(vavgsb, void, avr, avr, avr) -DEF_HELPER_3(vavgsh, void, avr, avr, avr) -DEF_HELPER_3(vavgsw, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vavgub, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vavguh, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vavguw, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vabsdub, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vabsduh, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vabsduw, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vavgsb, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vavgsh, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vavgsw, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_4(vcmpeqfp, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgefp, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgtfp, void, env, avr, avr, avr) @@ -153,12 +153,12 @@ DEF_HELPER_4(vcmpeqfp_dot, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgefp_dot, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgtfp_dot, void, env, avr, avr, avr) DEF_HELPER_4(vcmpbfp_dot, void, env, avr, avr, avr) -DEF_HELPER_3(vmrglb, void, avr, avr, avr) -DEF_HELPER_3(vmrglh, void, avr, avr, avr) -DEF_HELPER_3(vmrglw, void, avr, avr, avr) -DEF_HELPER_3(vmrghb, void, avr, avr, avr) -DEF_HELPER_3(vmrghh, void, avr, avr, avr) -DEF_HELPER_3(vmrghw, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vmrglb, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vmrglh, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vmrglw, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vmrghb, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vmrghh, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vmrghw, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULESB, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULESH, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULESW, TCG_CALL_NO_RWG, void, avr, avr, avr) @@ -171,15 +171,15 @@ DEF_HELPER_FLAGS_3(VMULOSW, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUB, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr) -DEF_HELPER_3(vslo, void, avr, avr, avr) -DEF_HELPER_3(vsro, void, avr, avr, avr) -DEF_HELPER_3(vsrv, void, avr, avr, avr) -DEF_HELPER_3(vslv, void, avr, avr, avr) -DEF_HELPER_3(vaddcuw, void, avr, avr, avr) -DEF_HELPER_2(vprtybw, void, avr, avr) -DEF_HELPER_2(vprtybd, void, avr, avr) -DEF_HELPER_2(vprtybq, void, avr, avr) -DEF_HELPER_3(vsubcuw, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vslv, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vaddcuw, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_2(vprtybw, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vprtybd, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vprtybq, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_3(vsubcuw, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_5(vaddsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_5(vaddshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_5(vaddsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) @@ -192,19 +192,19 @@ DEF_HELPER_FLAGS_5(vadduws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_5(vsububs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_5(vsubuhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_3(vadduqm, void, avr, avr, avr) -DEF_HELPER_4(vaddecuq, void, avr, avr, avr, avr) -DEF_HELPER_4(vaddeuqm, void, avr, avr, avr, avr) -DEF_HELPER_3(vaddcuq, void, avr, avr, avr) -DEF_HELPER_3(vsubuqm, void, avr, avr, avr) -DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr) -DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr) -DEF_HELPER_3(vsubcuq, void, avr, avr, avr) -DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32) -DEF_HELPER_3(vextractub, void, avr, avr, i32) -DEF_HELPER_3(vextractuh, void, avr, avr, i32) -DEF_HELPER_3(vextractuw, void, avr, avr, i32) -DEF_HELPER_3(vextractd, void, avr, avr, i32) +DEF_HELPER_FLAGS_3(vadduqm, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_4(vaddecuq, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) +DEF_HELPER_FLAGS_4(vaddeuqm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) +DEF_HELPER_FLAGS_3(vaddcuq, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vsubuqm, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_4(vsubecuq, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) +DEF_HELPER_FLAGS_4(vsubeuqm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) +DEF_HELPER_FLAGS_3(vsubcuq, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_4(vsldoi, TCG_CALL_NO_RWG, void, avr, avr, avr, i32) +DEF_HELPER_FLAGS_3(vextractub, TCG_CALL_NO_RWG, void, avr, avr, i32) +DEF_HELPER_FLAGS_3(vextractuh, TCG_CALL_NO_RWG, void, avr, avr, i32) +DEF_HELPER_FLAGS_3(vextractuw, TCG_CALL_NO_RWG, void, avr, avr, i32) +DEF_HELPER_FLAGS_3(vextractd, TCG_CALL_NO_RWG, void, avr, avr, i32) DEF_HELPER_4(VINSBLX, void, env, avr, i64, tl) DEF_HELPER_4(VINSHLX, void, env, avr, i64, tl) DEF_HELPER_4(VINSWLX, void, env, avr, i64, tl) @@ -213,16 +213,16 @@ DEF_HELPER_FLAGS_2(VSTRIBL, TCG_CALL_NO_RWG, i32, avr, avr) DEF_HELPER_FLAGS_2(VSTRIBR, TCG_CALL_NO_RWG, i32, avr, avr) DEF_HELPER_FLAGS_2(VSTRIHL, TCG_CALL_NO_RWG, i32, avr, avr) DEF_HELPER_FLAGS_2(VSTRIHR, TCG_CALL_NO_RWG, i32, avr, avr) -DEF_HELPER_2(vnegw, void, avr, avr) -DEF_HELPER_2(vnegd, void, avr, avr) -DEF_HELPER_2(vupkhpx, void, avr, avr) -DEF_HELPER_2(vupklpx, void, avr, avr) -DEF_HELPER_2(vupkhsb, void, avr, avr) -DEF_HELPER_2(vupkhsh, void, avr, avr) -DEF_HELPER_2(vupkhsw, void, avr, avr) -DEF_HELPER_2(vupklsb, void, avr, avr) -DEF_HELPER_2(vupklsh, void, avr, avr) -DEF_HELPER_2(vupklsw, void, avr, avr) +DEF_HELPER_FLAGS_2(vnegw, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vnegd, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vupkhpx, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vupklpx, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vupkhsb, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vupkhsh, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vupkhsw, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vupklsb, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vupklsh, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vupklsw, TCG_CALL_NO_RWG, void, avr, avr) DEF_HELPER_5(vmsumubm, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmsummbm, void, env, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VPERM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) @@ -239,14 +239,14 @@ DEF_HELPER_4(vpkudus, void, env, avr, avr, avr) DEF_HELPER_4(vpkuhum, void, env, avr, avr, avr) DEF_HELPER_4(vpkuwum, void, env, avr, avr, avr) DEF_HELPER_4(vpkudum, void, env, avr, avr, avr) -DEF_HELPER_3(vpkpx, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vpkpx, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_5(vmhaddshs, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmhraddshs, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmsumuhm, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmsumuhs, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmsumshm, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmsumshs, void, env, avr, avr, avr, avr) -DEF_HELPER_4(vmladduhm, void, avr, avr, avr, avr) +DEF_HELPER_FLAGS_4(vmladduhm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env) DEF_HELPER_3(lvebx, void, env, avr, tl) @@ -289,43 +289,43 @@ DEF_HELPER_4(vcfsx, void, env, avr, avr, i32) DEF_HELPER_4(vctuxs, void, env, avr, avr, i32) DEF_HELPER_4(vctsxs, void, env, avr, avr, i32) -DEF_HELPER_2(vclzb, void, avr, avr) -DEF_HELPER_2(vclzh, void, avr, avr) -DEF_HELPER_2(vctzb, void, avr, avr) -DEF_HELPER_2(vctzh, void, avr, avr) -DEF_HELPER_2(vctzw, void, avr, avr) -DEF_HELPER_2(vctzd, void, avr, avr) -DEF_HELPER_2(vpopcntb, void, avr, avr) -DEF_HELPER_2(vpopcnth, void, avr, avr) -DEF_HELPER_2(vpopcntw, void, avr, avr) -DEF_HELPER_2(vpopcntd, void, avr, avr) -DEF_HELPER_1(vclzlsbb, tl, avr) -DEF_HELPER_1(vctzlsbb, tl, avr) -DEF_HELPER_3(vbpermd, void, avr, avr, avr) -DEF_HELPER_3(vbpermq, void, avr, avr, avr) -DEF_HELPER_3(vpmsumb, void, avr, avr, avr) -DEF_HELPER_3(vpmsumh, void, avr, avr, avr) -DEF_HELPER_3(vpmsumw, void, avr, avr, avr) -DEF_HELPER_3(vpmsumd, void, avr, avr, avr) -DEF_HELPER_2(vextublx, tl, tl, avr) -DEF_HELPER_2(vextuhlx, tl, tl, avr) -DEF_HELPER_2(vextuwlx, tl, tl, avr) -DEF_HELPER_2(vextubrx, tl, tl, avr) -DEF_HELPER_2(vextuhrx, tl, tl, avr) -DEF_HELPER_2(vextuwrx, tl, tl, avr) +DEF_HELPER_FLAGS_2(vclzb, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vclzh, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vctzb, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vctzh, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vctzw, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vctzd, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vpopcntb, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vpopcnth, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vpopcntw, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_2(vpopcntd, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_1(vclzlsbb, TCG_CALL_NO_RWG, tl, avr) +DEF_HELPER_FLAGS_1(vctzlsbb, TCG_CALL_NO_RWG, tl, avr) +DEF_HELPER_FLAGS_3(vbpermd, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vbpermq, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vpmsumb, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vpmsumh, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vpmsumw, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vpmsumd, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_2(vextublx, TCG_CALL_NO_RWG, tl, tl, avr) +DEF_HELPER_FLAGS_2(vextuhlx, TCG_CALL_NO_RWG, tl, tl, avr) +DEF_HELPER_FLAGS_2(vextuwlx, TCG_CALL_NO_RWG, tl, tl, avr) +DEF_HELPER_FLAGS_2(vextubrx, TCG_CALL_NO_RWG, tl, tl, avr) +DEF_HELPER_FLAGS_2(vextuhrx, TCG_CALL_NO_RWG, tl, tl, avr) +DEF_HELPER_FLAGS_2(vextuwrx, TCG_CALL_NO_RWG, tl, tl, avr) DEF_HELPER_5(VEXTDUBVLX, void, env, avr, avr, avr, tl) DEF_HELPER_5(VEXTDUHVLX, void, env, avr, avr, avr, tl) DEF_HELPER_5(VEXTDUWVLX, void, env, avr, avr, avr, tl) DEF_HELPER_5(VEXTDDVLX, void, env, avr, avr, avr, tl) -DEF_HELPER_2(vsbox, void, avr, avr) -DEF_HELPER_3(vcipher, void, avr, avr, avr) -DEF_HELPER_3(vcipherlast, void, avr, avr, avr) -DEF_HELPER_3(vncipher, void, avr, avr, avr) -DEF_HELPER_3(vncipherlast, void, avr, avr, avr) -DEF_HELPER_3(vshasigmaw, void, avr, avr, i32) -DEF_HELPER_3(vshasigmad, void, avr, avr, i32) -DEF_HELPER_4(vpermxor, void, avr, avr, avr, avr) +DEF_HELPER_FLAGS_2(vsbox, TCG_CALL_NO_RWG, void, avr, avr) +DEF_HELPER_FLAGS_3(vcipher, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vcipherlast, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vncipher, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vncipherlast, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(vshasigmaw, TCG_CALL_NO_RWG, void, avr, avr, i32) +DEF_HELPER_FLAGS_3(vshasigmad, TCG_CALL_NO_RWG, void, avr, avr, i32) +DEF_HELPER_FLAGS_4(vpermxor, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_4(bcdadd, i32, avr, avr, avr, i32) DEF_HELPER_4(bcdsub, i32, avr, avr, avr, i32) From 491bcaaa35a68157c0546c65fe6b91e65357b6c9 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:20 -0300 Subject: [PATCH 13/34] target/ppc: use TCG_CALL_NO_RWG in BCD helpers Helpers of BCD instructions only access the VSRs supplied by the TCGv_ptr arguments, no globals are accessed. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-4-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index da740ad9af..a02c4be906 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -327,21 +327,21 @@ DEF_HELPER_FLAGS_3(vshasigmaw, TCG_CALL_NO_RWG, void, avr, avr, i32) DEF_HELPER_FLAGS_3(vshasigmad, TCG_CALL_NO_RWG, void, avr, avr, i32) DEF_HELPER_FLAGS_4(vpermxor, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) -DEF_HELPER_4(bcdadd, i32, avr, avr, avr, i32) -DEF_HELPER_4(bcdsub, i32, avr, avr, avr, i32) -DEF_HELPER_3(bcdcfn, i32, avr, avr, i32) -DEF_HELPER_3(bcdctn, i32, avr, avr, i32) -DEF_HELPER_3(bcdcfz, i32, avr, avr, i32) -DEF_HELPER_3(bcdctz, i32, avr, avr, i32) -DEF_HELPER_3(bcdcfsq, i32, avr, avr, i32) -DEF_HELPER_3(bcdctsq, i32, avr, avr, i32) -DEF_HELPER_4(bcdcpsgn, i32, avr, avr, avr, i32) -DEF_HELPER_3(bcdsetsgn, i32, avr, avr, i32) -DEF_HELPER_4(bcds, i32, avr, avr, avr, i32) -DEF_HELPER_4(bcdus, i32, avr, avr, avr, i32) -DEF_HELPER_4(bcdsr, i32, avr, avr, avr, i32) -DEF_HELPER_4(bcdtrunc, i32, avr, avr, avr, i32) -DEF_HELPER_4(bcdutrunc, i32, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(bcdadd, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(bcdsub, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) +DEF_HELPER_FLAGS_3(bcdcfn, TCG_CALL_NO_RWG, i32, avr, avr, i32) +DEF_HELPER_FLAGS_3(bcdctn, TCG_CALL_NO_RWG, i32, avr, avr, i32) +DEF_HELPER_FLAGS_3(bcdcfz, TCG_CALL_NO_RWG, i32, avr, avr, i32) +DEF_HELPER_FLAGS_3(bcdctz, TCG_CALL_NO_RWG, i32, avr, avr, i32) +DEF_HELPER_FLAGS_3(bcdcfsq, TCG_CALL_NO_RWG, i32, avr, avr, i32) +DEF_HELPER_FLAGS_3(bcdctsq, TCG_CALL_NO_RWG, i32, avr, avr, i32) +DEF_HELPER_FLAGS_4(bcdcpsgn, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) +DEF_HELPER_FLAGS_3(bcdsetsgn, TCG_CALL_NO_RWG, i32, avr, avr, i32) +DEF_HELPER_FLAGS_4(bcds, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(bcdus, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(bcdsr, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(bcdtrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) +DEF_HELPER_FLAGS_4(bcdutrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) DEF_HELPER_4(xsadddp, void, env, vsr, vsr, vsr) DEF_HELPER_5(xsaddqp, void, env, i32, vsr, vsr, vsr) From f2454bfe73d1ae9e7561fc3c29e0d2729381d27c Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:21 -0300 Subject: [PATCH 14/34] target/ppc: use TCG_CALL_NO_RWG in VSX helpers without env Helpers of VSX instructions without cpu_env as an argument do not access globals. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-5-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index a02c4be906..f82c5bd0db 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -533,10 +533,10 @@ DEF_HELPER_FLAGS_5(XXPERMX, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, tl) DEF_HELPER_4(xxinsertw, void, env, vsr, vsr, i32) DEF_HELPER_3(xvxsigsp, void, env, vsr, vsr) DEF_HELPER_FLAGS_5(XXEVAL, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) -DEF_HELPER_5(XXBLENDVB, void, vsr, vsr, vsr, vsr, i32) -DEF_HELPER_5(XXBLENDVH, void, vsr, vsr, vsr, vsr, i32) -DEF_HELPER_5(XXBLENDVW, void, vsr, vsr, vsr, vsr, i32) -DEF_HELPER_5(XXBLENDVD, void, vsr, vsr, vsr, vsr, i32) +DEF_HELPER_FLAGS_5(XXBLENDVB, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) +DEF_HELPER_FLAGS_5(XXBLENDVH, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) +DEF_HELPER_FLAGS_5(XXBLENDVW, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) +DEF_HELPER_FLAGS_5(XXBLENDVD, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_2(efscfsi, i32, env, i32) DEF_HELPER_2(efscfui, i32, env, i32) From eb69a84bb07be94179eb2275cd9d4bed12afc66f Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:22 -0300 Subject: [PATCH 15/34] target/ppc: Use TCG_CALL_NO_RWG_SE in fsel helper fsel doesn't change FPSCR and CR1 is handled by gen_set_cr1_from_fpscr, so helper_fsel doesn't need the env argument and can be declared with TCG_CALL_NO_RWG_SE. We also take this opportunity to move the insn to decodetree. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-6-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/fpu_helper.c | 15 +++++++-------- target/ppc/helper.h | 2 +- target/ppc/insn32.decode | 7 +++++++ target/ppc/translate/fp-impl.c.inc | 30 ++++++++++++++++++++++++++++-- target/ppc/translate/fp-ops.c.inc | 1 - 5 files changed, 43 insertions(+), 12 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 8592727792..588448702f 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -919,18 +919,17 @@ float64 helper_frsqrtes(CPUPPCState *env, float64 arg) } /* fsel - fsel. */ -uint64_t helper_fsel(CPUPPCState *env, uint64_t arg1, uint64_t arg2, - uint64_t arg3) +uint64_t helper_FSEL(uint64_t a, uint64_t b, uint64_t c) { - CPU_DoubleU farg1; + CPU_DoubleU fa; - farg1.ll = arg1; + fa.ll = a; - if ((!float64_is_neg(farg1.d) || float64_is_zero(farg1.d)) && - !float64_is_any_nan(farg1.d)) { - return arg2; + if ((!float64_is_neg(fa.d) || float64_is_zero(fa.d)) && + !float64_is_any_nan(fa.d)) { + return c; } else { - return arg3; + return b; } } diff --git a/target/ppc/helper.h b/target/ppc/helper.h index f82c5bd0db..ddfa0308bc 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -120,7 +120,7 @@ DEF_HELPER_2(fre, i64, env, i64) DEF_HELPER_2(fres, i64, env, i64) DEF_HELPER_2(frsqrte, i64, env, i64) DEF_HELPER_2(frsqrtes, i64, env, i64) -DEF_HELPER_4(fsel, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_3(FSEL, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) DEF_HELPER_FLAGS_2(ftdiv, TCG_CALL_NO_RWG_SE, i32, i64, i64) DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 39372fe673..1d0b55bde3 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -17,6 +17,9 @@ # License along with this library; if not, see . # +&A frt fra frb frc rc:bool +@A ...... frt:5 fra:5 frb:5 frc:5 ..... rc:1 &A + &D rt ra si:int64_t @D ...... rt:5 ra:5 si:s16 &D @@ -308,6 +311,10 @@ STFDU 110111 ..... ...... ............... @D STFDX 011111 ..... ...... .... 1011010111 - @X STFDUX 011111 ..... ...... .... 1011110111 - @X +### Floating-Point Select Instruction + +FSEL 111111 ..... ..... ..... ..... 10111 . @A + ### Move To/From System Register Instructions SETBC 011111 ..... ..... ----- 0110000000 - @X_bi diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index cfb27bd020..f9b58b844e 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -222,8 +222,34 @@ static void gen_frsqrtes(DisasContext *ctx) tcg_temp_free_i64(t1); } -/* fsel */ -_GEN_FLOAT_ACB(sel, 0x3F, 0x17, 0, PPC_FLOAT_FSEL); +static bool trans_FSEL(DisasContext *ctx, arg_A *a) +{ + TCGv_i64 t0, t1, t2; + + REQUIRE_INSNS_FLAGS(ctx, FLOAT_FSEL); + REQUIRE_FPU(ctx); + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + + get_fpr(t0, a->fra); + get_fpr(t1, a->frb); + get_fpr(t2, a->frc); + + gen_helper_FSEL(t0, t0, t1, t2); + set_fpr(a->frt, t0); + if (a->rc) { + gen_set_cr1_from_fpscr(ctx); + } + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + + return true; +} + /* fsub - fsubs */ GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT); /* Optional: */ diff --git a/target/ppc/translate/fp-ops.c.inc b/target/ppc/translate/fp-ops.c.inc index 4260635a12..0538ab2d2d 100644 --- a/target/ppc/translate/fp-ops.c.inc +++ b/target/ppc/translate/fp-ops.c.inc @@ -24,7 +24,6 @@ GEN_FLOAT_AC(mul, 0x19, 0x0000F800, 1, PPC_FLOAT), GEN_FLOAT_BS(re, 0x3F, 0x18, 1, PPC_FLOAT_EXT), GEN_FLOAT_BS(res, 0x3B, 0x18, 1, PPC_FLOAT_FRES), GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE), -_GEN_FLOAT_ACB(sel, sel, 0x3F, 0x17, 0, 0, PPC_FLOAT_FSEL), GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT), GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT), GEN_FLOAT_ACB(msub, 0x1C, 1, PPC_FLOAT), From cf862bee0e0d31de4809ac7bacb1d65ff7e8b9ce Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:23 -0300 Subject: [PATCH 16/34] target/ppc: declare xscvspdpn helper with call flags Move xscvspdpn to decodetree, declare helper_xscvspdpn with TCG_CALL_NO_RWG_SE and drop the unused env argument. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-7-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/fpu_helper.c | 2 +- target/ppc/helper.h | 2 +- target/ppc/insn32.decode | 1 + target/ppc/translate/vsx-impl.c.inc | 22 +++++++++++++++++++++- target/ppc/translate/vsx-ops.c.inc | 1 - 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 588448702f..55ef4b5d48 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2878,7 +2878,7 @@ uint64_t helper_xscvdpspn(CPUPPCState *env, uint64_t xb) return (result << 32) | result; } -uint64_t helper_xscvspdpn(CPUPPCState *env, uint64_t xb) +uint64_t helper_XSCVSPDPN(uint64_t xb) { return helper_todouble(xb >> 32); } diff --git a/target/ppc/helper.h b/target/ppc/helper.h index ddfa0308bc..9be69fa91e 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -395,7 +395,7 @@ DEF_HELPER_3(XSCVSQQP, void, env, vsr, vsr) DEF_HELPER_3(xscvhpdp, void, env, vsr, vsr) DEF_HELPER_4(xscvsdqp, void, env, i32, vsr, vsr) DEF_HELPER_3(xscvspdp, void, env, vsr, vsr) -DEF_HELPER_2(xscvspdpn, i64, env, i64) +DEF_HELPER_FLAGS_1(XSCVSPDPN, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_3(xscvdpsxds, void, env, vsr, vsr) DEF_HELPER_3(xscvdpsxws, void, env, vsr, vsr) DEF_HELPER_3(xscvdpuxds, void, env, vsr, vsr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 1d0b55bde3..d4c2615b1a 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -708,6 +708,7 @@ XSCVUQQP 111111 ..... 00011 ..... 1101000100 - @X_tb XSCVSQQP 111111 ..... 01011 ..... 1101000100 - @X_tb XVCVBF16SPN 111100 ..... 10000 ..... 111011011 .. @XX2 XVCVSPBF16 111100 ..... 10001 ..... 111011011 .. @XX2 +XSCVSPDPN 111100 ..... ----- ..... 101001011 .. @XX2 ## VSX Vector Test Least-Significant Bit by Byte Instruction diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 3692740736..9b4f309d9d 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -1045,7 +1045,27 @@ GEN_VSX_HELPER_R2(xscvqpuwz, 0x04, 0x1A, 0x01, PPC2_ISA300) GEN_VSX_HELPER_X2(xscvhpdp, 0x16, 0x15, 0x10, PPC2_ISA300) GEN_VSX_HELPER_R2(xscvsdqp, 0x04, 0x1A, 0x0A, PPC2_ISA300) GEN_VSX_HELPER_X2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX) -GEN_VSX_HELPER_XT_XB_ENV(xscvspdpn, 0x16, 0x14, 0, PPC2_VSX207) + +bool trans_XSCVSPDPN(DisasContext *ctx, arg_XX2 *a) +{ + TCGv_i64 tmp; + + REQUIRE_INSNS_FLAGS2(ctx, VSX207); + REQUIRE_VSX(ctx); + + tmp = tcg_temp_new_i64(); + get_cpu_vsr(tmp, a->xb, true); + + gen_helper_XSCVSPDPN(tmp, tmp); + + set_cpu_vsr(a->xt, tmp, true); + set_cpu_vsr(a->xt, tcg_constant_i64(0), false); + + tcg_temp_free_i64(tmp); + + return true; +} + GEN_VSX_HELPER_X2(xscvdpsxds, 0x10, 0x15, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xscvdpsxws, 0x10, 0x05, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xscvdpuxds, 0x10, 0x14, 0, PPC2_VSX) diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index b8fd116728..52d7ab30cd 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -200,7 +200,6 @@ GEN_XX2FORM(xscvdpspn, 0x16, 0x10, PPC2_VSX207), GEN_XX2FORM_EO(xscvhpdp, 0x16, 0x15, 0x10, PPC2_ISA300), GEN_VSX_XFORM_300_EO(xscvsdqp, 0x04, 0x1A, 0x0A, 0x00000001), GEN_XX2FORM(xscvspdp, 0x12, 0x14, PPC2_VSX), -GEN_XX2FORM(xscvspdpn, 0x16, 0x14, PPC2_VSX207), GEN_XX2FORM(xscvdpsxds, 0x10, 0x15, PPC2_VSX), GEN_XX2FORM(xscvdpsxws, 0x10, 0x05, PPC2_VSX), GEN_XX2FORM(xscvdpuxds, 0x10, 0x14, PPC2_VSX), From c36ab970ac0ce257a6badb30f6a485a81c2289d2 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:24 -0300 Subject: [PATCH 17/34] target/ppc: declare xvxsigsp helper with call flags Move xvxsigsp to decodetree, declare helper_xvxsigsp with TCG_CALL_NO_RWG, and drop the unused env argument. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-8-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/fpu_helper.c | 2 +- target/ppc/helper.h | 2 +- target/ppc/insn32.decode | 4 ++++ target/ppc/translate/vsx-impl.c.inc | 18 +++++++++++++++++- target/ppc/translate/vsx-ops.c.inc | 1 - 5 files changed, 23 insertions(+), 4 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 55ef4b5d48..9489e06504 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -3205,7 +3205,7 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb) return xt; } -void helper_xvxsigsp(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) +void helper_XVXSIGSP(ppc_vsr_t *xt, ppc_vsr_t *xb) { ppc_vsr_t t = { }; uint32_t exp, i, fraction; diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 9be69fa91e..aed1b24fdb 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -531,7 +531,7 @@ DEF_HELPER_FLAGS_2(XXGENPCVDM_le_comp, TCG_CALL_NO_RWG, void, vsr, avr) DEF_HELPER_4(xxextractuw, void, env, vsr, vsr, i32) DEF_HELPER_FLAGS_5(XXPERMX, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, tl) DEF_HELPER_4(xxinsertw, void, env, vsr, vsr, i32) -DEF_HELPER_3(xvxsigsp, void, env, vsr, vsr) +DEF_HELPER_FLAGS_2(XVXSIGSP, TCG_CALL_NO_RWG, void, vsr, vsr) DEF_HELPER_FLAGS_5(XXEVAL, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_FLAGS_5(XXBLENDVB, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_FLAGS_5(XXBLENDVH, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index d4c2615b1a..483349ff6d 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -710,6 +710,10 @@ XVCVBF16SPN 111100 ..... 10000 ..... 111011011 .. @XX2 XVCVSPBF16 111100 ..... 10001 ..... 111011011 .. @XX2 XSCVSPDPN 111100 ..... ----- ..... 101001011 .. @XX2 +## VSX Binary Floating-Point Math Support Instructions + +XVXSIGSP 111100 ..... 01001 ..... 111011011 .. @XX2 + ## VSX Vector Test Least-Significant Bit by Byte Instruction XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 9b4f309d9d..ca11e2c4b8 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2151,7 +2151,23 @@ static void gen_xvxexpdp(DisasContext *ctx) tcg_temp_free_i64(xbl); } -GEN_VSX_HELPER_X2(xvxsigsp, 0x00, 0x04, 0, PPC2_ISA300) +static bool trans_XVXSIGSP(DisasContext *ctx, arg_XX2 *a) +{ + TCGv_ptr t, b; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VSX(ctx); + + t = gen_vsr_ptr(a->xt); + b = gen_vsr_ptr(a->xb); + + gen_helper_XVXSIGSP(t, b); + + tcg_temp_free_ptr(t); + tcg_temp_free_ptr(b); + + return true; +} static void gen_xvxsigdp(DisasContext *ctx) { diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index 52d7ab30cd..4524c5b02a 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -156,7 +156,6 @@ GEN_XX3FORM(xviexpdp, 0x00, 0x1F, PPC2_ISA300), GEN_XX2FORM_EO(xvxexpdp, 0x16, 0x1D, 0x00, PPC2_ISA300), GEN_XX2FORM_EO(xvxsigdp, 0x16, 0x1D, 0x01, PPC2_ISA300), GEN_XX2FORM_EO(xvxexpsp, 0x16, 0x1D, 0x08, PPC2_ISA300), -GEN_XX2FORM_EO(xvxsigsp, 0x16, 0x1D, 0x09, PPC2_ISA300), /* DCMX = bit[25] << 6 | bit[29] << 5 | bit[11:15] */ #define GEN_XX2FORM_DCMX(name, opc2, opc3, fl2) \ From 8f5eeee3f1f1e7da4a1bf1ecb5527071fde1b2d5 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:25 -0300 Subject: [PATCH 18/34] target/ppc: declare xxextractuw and xxinsertw helpers with call flags Move xxextractuw and xxinsertw to decodetree, declare both helpers with TCG_CALL_NO_RWG, and drop the unused env argument. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-9-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 4 +- target/ppc/insn32.decode | 9 ++++- target/ppc/int_helper.c | 6 +-- target/ppc/translate/vsx-impl.c.inc | 63 +++++++++++++---------------- target/ppc/translate/vsx-ops.c.inc | 2 - 5 files changed, 39 insertions(+), 45 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index aed1b24fdb..640a70cd5c 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -528,9 +528,9 @@ DEF_HELPER_FLAGS_2(XXGENPCVDM_be_exp, TCG_CALL_NO_RWG, void, vsr, avr) DEF_HELPER_FLAGS_2(XXGENPCVDM_be_comp, TCG_CALL_NO_RWG, void, vsr, avr) DEF_HELPER_FLAGS_2(XXGENPCVDM_le_exp, TCG_CALL_NO_RWG, void, vsr, avr) DEF_HELPER_FLAGS_2(XXGENPCVDM_le_comp, TCG_CALL_NO_RWG, void, vsr, avr) -DEF_HELPER_4(xxextractuw, void, env, vsr, vsr, i32) +DEF_HELPER_FLAGS_3(XXEXTRACTUW, TCG_CALL_NO_RWG, void, vsr, vsr, i32) DEF_HELPER_FLAGS_5(XXPERMX, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, tl) -DEF_HELPER_4(xxinsertw, void, env, vsr, vsr, i32) +DEF_HELPER_FLAGS_3(XXINSERTW, TCG_CALL_NO_RWG, void, vsr, vsr, i32) DEF_HELPER_FLAGS_2(XVXSIGSP, TCG_CALL_NO_RWG, void, vsr, vsr) DEF_HELPER_FLAGS_5(XXEVAL, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_FLAGS_5(XXBLENDVB, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 483349ff6d..435cf1320c 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -161,8 +161,10 @@ &XX2 xt xb @XX2 ...... ..... ..... ..... ......... .. &XX2 xt=%xx_xt xb=%xx_xb -&XX2_uim2 xt xb uim:uint8_t -@XX2_uim2 ...... ..... ... uim:2 ..... ......... .. &XX2_uim2 xt=%xx_xt xb=%xx_xb +&XX2_uim xt xb uim:uint8_t +@XX2_uim2 ...... ..... ... uim:2 ..... ......... .. &XX2_uim xt=%xx_xt xb=%xx_xb + +@XX2_uim4 ...... ..... . uim:4 ..... ......... .. &XX2_uim xt=%xx_xt xb=%xx_xb &XX2_bf_xb bf xb @XX2_bf_xb ...... bf:3 .. ..... ..... ......... . . &XX2_bf_xb xb=%xx_xb @@ -666,6 +668,9 @@ XXSPLTW 111100 ..... ---.. ..... 010100100 . . @XX2_uim2 ## VSX Permute Instructions +XXEXTRACTUW 111100 ..... - .... ..... 010100101 .. @XX2_uim4 +XXINSERTW 111100 ..... - .... ..... 010110101 .. @XX2_uim4 + XXPERM 111100 ..... ..... ..... 00011010 ... @XX3 XXPERMR 111100 ..... ..... ..... 00111010 ... @XX3 XXPERMDI 111100 ..... ..... ..... 0 .. 01010 ... @XX3_dm diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 8c1674510b..9a361ad241 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1647,8 +1647,7 @@ VSTRI(VSTRIHL, H, 8, true) VSTRI(VSTRIHR, H, 8, false) #undef VSTRI -void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, - ppc_vsr_t *xb, uint32_t index) +void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) { ppc_vsr_t t = { }; size_t es = sizeof(uint32_t); @@ -1663,8 +1662,7 @@ void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, *xt = t; } -void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, - ppc_vsr_t *xb, uint32_t index) +void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) { ppc_vsr_t t = *xt; size_t es = sizeof(uint32_t); diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index ca11e2c4b8..900c1a1ab2 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -1585,7 +1585,7 @@ static bool trans_XXSEL(DisasContext *ctx, arg_XX4 *a) return true; } -static bool trans_XXSPLTW(DisasContext *ctx, arg_XX2_uim2 *a) +static bool trans_XXSPLTW(DisasContext *ctx, arg_XX2_uim *a) { int tofs, bofs; @@ -1795,42 +1795,35 @@ static void gen_xxsldwi(DisasContext *ctx) tcg_temp_free_i64(xtl); } -#define VSX_EXTRACT_INSERT(name) \ -static void gen_##name(DisasContext *ctx) \ -{ \ - TCGv_ptr xt, xb; \ - TCGv_i32 t0; \ - TCGv_i64 t1; \ - uint8_t uimm = UIMM4(ctx->opcode); \ - \ - if (unlikely(!ctx->vsx_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VSXU); \ - return; \ - } \ - xt = gen_vsr_ptr(xT(ctx->opcode)); \ - xb = gen_vsr_ptr(xB(ctx->opcode)); \ - t0 = tcg_temp_new_i32(); \ - t1 = tcg_temp_new_i64(); \ - /* \ - * uimm > 15 out of bound and for \ - * uimm > 12 handle as per hardware in helper \ - */ \ - if (uimm > 15) { \ - tcg_gen_movi_i64(t1, 0); \ - set_cpu_vsr(xT(ctx->opcode), t1, true); \ - set_cpu_vsr(xT(ctx->opcode), t1, false); \ - return; \ - } \ - tcg_gen_movi_i32(t0, uimm); \ - gen_helper_##name(cpu_env, xt, xb, t0); \ - tcg_temp_free_ptr(xb); \ - tcg_temp_free_ptr(xt); \ - tcg_temp_free_i32(t0); \ - tcg_temp_free_i64(t1); \ +static bool do_vsx_extract_insert(DisasContext *ctx, arg_XX2_uim *a, + void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i32)) +{ + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_ptr xt, xb; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VSX(ctx); + + /* + * uim > 15 out of bound and for + * uim > 12 handle as per hardware in helper + */ + if (a->uim > 15) { + set_cpu_vsr(a->xt, zero, true); + set_cpu_vsr(a->xt, zero, false); + } else { + xt = gen_vsr_ptr(a->xt); + xb = gen_vsr_ptr(a->xb); + gen_helper(xt, xb, tcg_constant_i32(a->uim)); + tcg_temp_free_ptr(xb); + tcg_temp_free_ptr(xt); + } + + return true; } -VSX_EXTRACT_INSERT(xxextractuw) -VSX_EXTRACT_INSERT(xxinsertw) +TRANS(XXEXTRACTUW, do_vsx_extract_insert, gen_helper_XXEXTRACTUW) +TRANS(XXINSERTW, do_vsx_extract_insert, gen_helper_XXINSERTW) #ifdef TARGET_PPC64 static void gen_xsxexpdp(DisasContext *ctx) diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index 4524c5b02a..bff14bbece 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -320,5 +320,3 @@ VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207), GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX), GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX), GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00), -GEN_XX2FORM_EXT(xxextractuw, 0x0A, 0x0A, PPC2_ISA300), -GEN_XX2FORM_EXT(xxinsertw, 0x0A, 0x0B, PPC2_ISA300), From ffc2a2818a6ae321830de2a8c93c78d437c64171 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:26 -0300 Subject: [PATCH 19/34] target/ppc: introduce do_va_helper Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-10-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/translate/vmx-impl.c.inc | 32 +++++------------------------ 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 764ac45409..e66301c007 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2553,20 +2553,17 @@ static void gen_vmladduhm(DisasContext *ctx) tcg_temp_free_ptr(rd); } -static bool trans_VPERM(DisasContext *ctx, arg_VA *a) +static bool do_va_helper(DisasContext *ctx, arg_VA *a, + void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) { TCGv_ptr vrt, vra, vrb, vrc; - - REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); vrt = gen_avr_ptr(a->vrt); vra = gen_avr_ptr(a->vra); vrb = gen_avr_ptr(a->vrb); vrc = gen_avr_ptr(a->rc); - - gen_helper_VPERM(vrt, vra, vrb, vrc); - + gen_helper(vrt, vra, vrb, vrc); tcg_temp_free_ptr(vrt); tcg_temp_free_ptr(vra); tcg_temp_free_ptr(vrb); @@ -2575,27 +2572,8 @@ static bool trans_VPERM(DisasContext *ctx, arg_VA *a) return true; } -static bool trans_VPERMR(DisasContext *ctx, arg_VA *a) -{ - TCGv_ptr vrt, vra, vrb, vrc; - - REQUIRE_INSNS_FLAGS2(ctx, ISA300); - REQUIRE_VECTOR(ctx); - - vrt = gen_avr_ptr(a->vrt); - vra = gen_avr_ptr(a->vra); - vrb = gen_avr_ptr(a->vrb); - vrc = gen_avr_ptr(a->rc); - - gen_helper_VPERMR(vrt, vra, vrb, vrc); - - tcg_temp_free_ptr(vrt); - tcg_temp_free_ptr(vra); - tcg_temp_free_ptr(vrb); - tcg_temp_free_ptr(vrc); - - return true; -} +TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM) +TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR) static bool trans_VSEL(DisasContext *ctx, arg_VA *a) { From b2dc03a5c3a2bbfdf74121cd10d007803ea61e34 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:27 -0300 Subject: [PATCH 20/34] target/ppc: declare vmsum[um]bm helpers with call flags Move vmsumubm and vmsummbm to decodetree, declare both helpers with TCG_CALL_NO_RWG, and drop the unused env argument. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-11-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 4 ++-- target/ppc/insn32.decode | 3 +++ target/ppc/int_helper.c | 6 ++---- target/ppc/translate/vmx-impl.c.inc | 5 ++++- target/ppc/translate/vmx-ops.c.inc | 2 -- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 640a70cd5c..f0761fe38d 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -223,8 +223,8 @@ DEF_HELPER_FLAGS_2(vupkhsw, TCG_CALL_NO_RWG, void, avr, avr) DEF_HELPER_FLAGS_2(vupklsb, TCG_CALL_NO_RWG, void, avr, avr) DEF_HELPER_FLAGS_2(vupklsh, TCG_CALL_NO_RWG, void, avr, avr) DEF_HELPER_FLAGS_2(vupklsw, TCG_CALL_NO_RWG, void, avr, avr) -DEF_HELPER_5(vmsumubm, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vmsummbm, void, env, avr, avr, avr, avr) +DEF_HELPER_FLAGS_4(VMSUMUBM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) +DEF_HELPER_FLAGS_4(VMSUMMBM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VPERM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VPERMR, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_4(vpkshss, void, env, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 435cf1320c..fdb8d76456 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -599,6 +599,9 @@ VMULLD 000100 ..... ..... ..... 00111001001 @VX ## Vector Multiply-Sum Instructions +VMSUMUBM 000100 ..... ..... ..... ..... 100100 @VA +VMSUMMBM 000100 ..... ..... ..... ..... 100101 @VA + VMSUMCUD 000100 ..... ..... ..... ..... 010111 @VA VMSUMUDM 000100 ..... ..... ..... ..... 100011 @VA diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 9a361ad241..85a7442103 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -875,8 +875,7 @@ VMRG(w, u32, VsrW) #undef VMRG_DO #undef VMRG -void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, - ppc_avr_t *b, ppc_avr_t *c) +void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { int32_t prod[16]; int i; @@ -928,8 +927,7 @@ void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } } -void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, - ppc_avr_t *b, ppc_avr_t *c) +void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { uint16_t prod[16]; int i; diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index e66301c007..4cbd724641 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2587,9 +2587,12 @@ static bool trans_VSEL(DisasContext *ctx, arg_VA *a) return true; } -GEN_VAFORM_PAIRED(vmsumubm, vmsummbm, 18) GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19) GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20) + +TRANS_FLAGS(ALTIVEC, VMSUMUBM, do_va_helper, gen_helper_VMSUMUBM) +TRANS_FLAGS(ALTIVEC, VMSUMMBM, do_va_helper, gen_helper_VMSUMMBM) + GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23) GEN_VXFORM_NOA(vclzb, 1, 28) diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index d960648d52..5b85322c06 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -221,11 +221,9 @@ GEN_VXFORM_UIMM(vcfsx, 5, 13), GEN_VXFORM_UIMM(vctuxs, 5, 14), GEN_VXFORM_UIMM(vctsxs, 5, 15), - #define GEN_VAFORM_PAIRED(name0, name1, opc2) \ GEN_HANDLER(name0##_##name1, 0x04, opc2, 0xFF, 0x00000000, PPC_ALTIVEC) GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16), -GEN_VAFORM_PAIRED(vmsumubm, vmsummbm, 18), GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19), GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20), GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23), From 89a5a1aee2df29c311d11386923b750bf1ea6bc2 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:28 -0300 Subject: [PATCH 21/34] target/ppc: declare vmsumuh[ms] helper with call flags Move vmsumuhm and vmsumuhs to decodetree, declare vmsumuhm helper with TCG_CALL_NO_RWG, and drop the unused env argument. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-12-matheus.ferst@eldorado.org.br> [danielhb: added #undef VMSUMUHM to fix ppc64 build] Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 4 ++-- target/ppc/insn32.decode | 2 ++ target/ppc/int_helper.c | 5 ++--- target/ppc/translate/vmx-impl.c.inc | 24 ++++++++++++++++++++++-- target/ppc/translate/vmx-ops.c.inc | 1 - tcg/ppc/tcg-target.c.inc | 1 + 6 files changed, 29 insertions(+), 8 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index f0761fe38d..5127851f2c 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -242,8 +242,8 @@ DEF_HELPER_4(vpkudum, void, env, avr, avr, avr) DEF_HELPER_FLAGS_3(vpkpx, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_5(vmhaddshs, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmhraddshs, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vmsumuhm, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vmsumuhs, void, env, avr, avr, avr, avr) +DEF_HELPER_FLAGS_4(VMSUMUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) +DEF_HELPER_5(VMSUMUHS, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmsumshm, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmsumshs, void, env, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(vmladduhm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index fdb8d76456..43ea03c3e7 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -601,6 +601,8 @@ VMULLD 000100 ..... ..... ..... 00111001001 @VX VMSUMUBM 000100 ..... ..... ..... ..... 100100 @VA VMSUMMBM 000100 ..... ..... ..... ..... 100101 @VA +VMSUMUHM 000100 ..... ..... ..... ..... 100110 @VA +VMSUMUHS 000100 ..... ..... ..... ..... 100111 @VA VMSUMCUD 000100 ..... ..... ..... ..... 010111 @VA VMSUMUDM 000100 ..... ..... ..... ..... 100011 @VA diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 85a7442103..9285a1c2a1 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -942,8 +942,7 @@ void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) } } -void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, - ppc_avr_t *b, ppc_avr_t *c) +void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { uint32_t prod[8]; int i; @@ -957,7 +956,7 @@ void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } } -void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, +void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { uint32_t prod[8]; diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 4cbd724641..da81296b96 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2587,11 +2587,31 @@ static bool trans_VSEL(DisasContext *ctx, arg_VA *a) return true; } -GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19) GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20) - TRANS_FLAGS(ALTIVEC, VMSUMUBM, do_va_helper, gen_helper_VMSUMUBM) TRANS_FLAGS(ALTIVEC, VMSUMMBM, do_va_helper, gen_helper_VMSUMMBM) +TRANS_FLAGS(ALTIVEC, VMSUMUHM, do_va_helper, gen_helper_VMSUMUHM) + +static bool do_va_env_helper(DisasContext *ctx, arg_VA *a, + void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + TCGv_ptr vrt, vra, vrb, vrc; + REQUIRE_VECTOR(ctx); + + vrt = gen_avr_ptr(a->vrt); + vra = gen_avr_ptr(a->vra); + vrb = gen_avr_ptr(a->vrb); + vrc = gen_avr_ptr(a->rc); + gen_helper(cpu_env, vrt, vra, vrb, vrc); + tcg_temp_free_ptr(vrt); + tcg_temp_free_ptr(vra); + tcg_temp_free_ptr(vrb); + tcg_temp_free_ptr(vrc); + + return true; +} + +TRANS_FLAGS(ALTIVEC, VMSUMUHS, do_va_env_helper, gen_helper_VMSUMUHS) GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23) diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index 5b85322c06..15b3e06410 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -224,7 +224,6 @@ GEN_VXFORM_UIMM(vctsxs, 5, 15), #define GEN_VAFORM_PAIRED(name0, name1, opc2) \ GEN_HANDLER(name0##_##name1, 0x04, opc2, 0xFF, 0x00000000, PPC_ALTIVEC) GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16), -GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19), GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20), GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23), diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index cfcd121f9c..fc8ae47293 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -4008,3 +4008,4 @@ void tcg_register_jit(const void *buf, size_t buf_size) #undef VMULOUB #undef VMULOUH #undef VMULOUW +#undef VMSUMUHM From 6f52f731a666d8aeaac5a10b208e9ca6773fb1a6 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Tue, 17 May 2022 09:39:29 -0300 Subject: [PATCH 22/34] target/ppc: declare vmsumsh[ms] helper with call flags Move vmsumshm and vmsumshs to decodetree, declare vmsumshm helper with TCG_CALL_NO_RWG, and drop the unused env argument. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220517123929.284511-13-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 4 ++-- target/ppc/insn32.decode | 2 ++ target/ppc/int_helper.c | 5 ++--- target/ppc/translate/vmx-impl.c.inc | 3 ++- target/ppc/translate/vmx-ops.c.inc | 1 - 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 5127851f2c..5e43920b9e 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -244,8 +244,8 @@ DEF_HELPER_5(vmhaddshs, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmhraddshs, void, env, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VMSUMUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_5(VMSUMUHS, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vmsumshm, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vmsumshs, void, env, avr, avr, avr, avr) +DEF_HELPER_FLAGS_4(VMSUMSHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) +DEF_HELPER_5(VMSUMSHS, void, env, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(vmladduhm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 43ea03c3e7..f001c02a8c 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -601,6 +601,8 @@ VMULLD 000100 ..... ..... ..... 00111001001 @VX VMSUMUBM 000100 ..... ..... ..... ..... 100100 @VA VMSUMMBM 000100 ..... ..... ..... ..... 100101 @VA +VMSUMSHM 000100 ..... ..... ..... ..... 101000 @VA +VMSUMSHS 000100 ..... ..... ..... ..... 101001 @VA VMSUMUHM 000100 ..... ..... ..... ..... 100110 @VA VMSUMUHS 000100 ..... ..... ..... ..... 100111 @VA diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 9285a1c2a1..b9dd15d607 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -890,8 +890,7 @@ void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) } } -void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, - ppc_avr_t *b, ppc_avr_t *c) +void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { int32_t prod[8]; int i; @@ -905,7 +904,7 @@ void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } } -void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, +void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { int32_t prod[8]; diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index da81296b96..d7524c3204 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2587,9 +2587,9 @@ static bool trans_VSEL(DisasContext *ctx, arg_VA *a) return true; } -GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20) TRANS_FLAGS(ALTIVEC, VMSUMUBM, do_va_helper, gen_helper_VMSUMUBM) TRANS_FLAGS(ALTIVEC, VMSUMMBM, do_va_helper, gen_helper_VMSUMMBM) +TRANS_FLAGS(ALTIVEC, VMSUMSHM, do_va_helper, gen_helper_VMSUMSHM) TRANS_FLAGS(ALTIVEC, VMSUMUHM, do_va_helper, gen_helper_VMSUMUHM) static bool do_va_env_helper(DisasContext *ctx, arg_VA *a, @@ -2612,6 +2612,7 @@ static bool do_va_env_helper(DisasContext *ctx, arg_VA *a, } TRANS_FLAGS(ALTIVEC, VMSUMUHS, do_va_env_helper, gen_helper_VMSUMUHS) +TRANS_FLAGS(ALTIVEC, VMSUMSHS, do_va_env_helper, gen_helper_VMSUMSHS) GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23) diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index 15b3e06410..d7cc57868e 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -224,7 +224,6 @@ GEN_VXFORM_UIMM(vctsxs, 5, 15), #define GEN_VAFORM_PAIRED(name0, name1, opc2) \ GEN_HANDLER(name0##_##name1, 0x04, opc2, 0xFF, 0x00000000, PPC_ALTIVEC) GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16), -GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20), GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23), GEN_VXFORM_DUAL(vclzb, vpopcntb, 1, 28, PPC_NONE, PPC2_ALTIVEC_207), From fcb830af30adaef6219f6b712f24ec3e794cf2c8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 19 May 2022 23:59:05 +1000 Subject: [PATCH 23/34] target/ppc: Fix eieio memory ordering semantics The generated eieio memory ordering semantics do not match the instruction definition in the architecture. Add a big comment to explain this strange instruction and correct the memory ordering behaviour. Signed-off: Nicholas Piggin Reviewed-by: Richard Henderson Message-Id: <20220519135908.21282-2-npiggin@gmail.com> Signed-off-by: Daniel Henrique Barboza --- target/ppc/translate.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index fa34f81c30..eb42f7e459 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -3513,7 +3513,32 @@ static void gen_stswx(DisasContext *ctx) /* eieio */ static void gen_eieio(DisasContext *ctx) { - TCGBar bar = TCG_MO_LD_ST; + TCGBar bar = TCG_MO_ALL; + + /* + * eieio has complex semanitcs. It provides memory ordering between + * operations in the set: + * - loads from CI memory. + * - stores to CI memory. + * - stores to WT memory. + * + * It separately also orders memory for operations in the set: + * - stores to cacheble memory. + * + * It also serializes instructions: + * - dcbt and dcbst. + * + * It separately serializes: + * - tlbie and tlbsync. + * + * And separately serializes: + * - slbieg, slbiag, and slbsync. + * + * The end result is that CI memory ordering requires TCG_MO_ALL + * and it is not possible to special-case more relaxed ordering for + * cacheable accesses. TCG_BAR_SC is required to provide this + * serialization. + */ /* * POWER9 has a eieio instruction variant using bit 6 as a hint to From 9d82353826422ed0e34ad76961fe0cae5f67e58e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 19 May 2022 23:59:06 +1000 Subject: [PATCH 24/34] tcg/ppc: ST_ST memory ordering is not provided with eieio eieio does not provide ordering between stores to CI memory and stores to cacheable memory so it can't be used as a general ST_ST barrier. Reviewed-by: Richard Henderson Signed-of-by: Nicholas Piggin Message-Id: <20220519135908.21282-3-npiggin@gmail.com> Signed-off-by: Daniel Henrique Barboza --- tcg/ppc/tcg-target.c.inc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index fc8ae47293..4750091c9c 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1836,8 +1836,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) a0 &= TCG_MO_ALL; if (a0 == TCG_MO_LD_LD) { insn = LWSYNC; - } else if (a0 == TCG_MO_ST_ST) { - insn = EIEIO; } tcg_out32(s, insn); } From fc879703f74851e3e861894a0c4a6902877d0c2c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 19 May 2022 23:59:07 +1000 Subject: [PATCH 25/34] tcg/ppc: Optimize memory ordering generation with lwsync lwsync orders more than just LD_LD, importantly it matches x86 and s390 default memory ordering. Signed-off-by: Nicholas Piggin Reviewed-by: Richard Henderson Message-Id: <20220519135908.21282-4-npiggin@gmail.com> Signed-off-by: Daniel Henrique Barboza --- tcg/ppc/tcg-target.c.inc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 4750091c9c..de4483e43b 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1832,11 +1832,14 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, static void tcg_out_mb(TCGContext *s, TCGArg a0) { - uint32_t insn = HWSYNC; - a0 &= TCG_MO_ALL; - if (a0 == TCG_MO_LD_LD) { + uint32_t insn; + + if (a0 & TCG_MO_ST_LD) { + insn = HWSYNC; + } else { insn = LWSYNC; } + tcg_out32(s, insn); } From 03abfd90cfb02aa08f44bbb7141b0aaaf69042ef Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 19 May 2022 23:59:08 +1000 Subject: [PATCH 26/34] target/ppc: Implement lwsync with weaker memory ordering This allows an x86 host to no-op lwsyncs, and ppc host can use lwsync rather than sync. Signed-off-by: Nicholas Piggin Reviewed-by: Richard Henderson Message-Id: <20220519135908.21282-5-npiggin@gmail.com> Signed-off-by: Daniel Henrique Barboza --- target/ppc/cpu.h | 4 +++- target/ppc/cpu_init.c | 13 +++++++------ target/ppc/machine.c | 3 ++- target/ppc/translate.c | 8 +++++++- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 901ded79e9..bf8f8aad2c 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -2273,6 +2273,8 @@ enum { PPC2_ISA300 = 0x0000000000080000ULL, /* POWER ISA 3.1 */ PPC2_ISA310 = 0x0000000000100000ULL, + /* lwsync instruction */ + PPC2_MEM_LWSYNC = 0x0000000000200000ULL, #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \ PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \ @@ -2281,7 +2283,7 @@ enum { PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \ PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \ PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \ - PPC2_ISA300 | PPC2_ISA310) + PPC2_ISA300 | PPC2_ISA310 | PPC2_MEM_LWSYNC) }; /*****************************************************************************/ diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 527ad40fcb..0f891afa04 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5769,7 +5769,7 @@ POWERPC_FAMILY(970)(ObjectClass *oc, void *data) PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_64B | PPC_ALTIVEC | PPC_SEGMENT_64B | PPC_SLBI; - pcc->insns_flags2 = PPC2_FP_CVT_S64; + pcc->insns_flags2 = PPC2_FP_CVT_S64 | PPC2_MEM_LWSYNC; pcc->msr_mask = (1ull << MSR_SF) | (1ull << MSR_VR) | (1ull << MSR_POW) | @@ -5846,7 +5846,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data) PPC_64B | PPC_POPCNTB | PPC_SEGMENT_64B | PPC_SLBI; - pcc->insns_flags2 = PPC2_FP_CVT_S64; + pcc->insns_flags2 = PPC2_FP_CVT_S64 | PPC2_MEM_LWSYNC; pcc->msr_mask = (1ull << MSR_SF) | (1ull << MSR_VR) | (1ull << MSR_POW) | @@ -5985,7 +5985,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64 | - PPC2_PM_ISA206; + PPC2_PM_ISA206 | PPC2_MEM_LWSYNC; pcc->msr_mask = (1ull << MSR_SF) | (1ull << MSR_VR) | (1ull << MSR_VSX) | @@ -6159,7 +6159,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | - PPC2_TM | PPC2_PM_ISA206; + PPC2_TM | PPC2_PM_ISA206 | PPC2_MEM_LWSYNC; pcc->msr_mask = (1ull << MSR_SF) | (1ull << MSR_HV) | (1ull << MSR_TM) | @@ -6379,7 +6379,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | - PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL; + PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_MEM_LWSYNC; pcc->msr_mask = (1ull << MSR_SF) | (1ull << MSR_HV) | (1ull << MSR_TM) | @@ -6596,7 +6596,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data) PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | - PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310; + PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310 | + PPC2_MEM_LWSYNC; pcc->msr_mask = (1ull << MSR_SF) | (1ull << MSR_HV) | (1ull << MSR_TM) | diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 7104a5c67e..a7d9036c09 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -157,7 +157,8 @@ static int cpu_pre_save(void *opaque) | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 - | PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | PPC2_TM; + | PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | PPC2_TM + | PPC2_MEM_LWSYNC; env->spr[SPR_LR] = env->lr; env->spr[SPR_CTR] = env->ctr; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index eb42f7e459..1d6daa4608 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -4041,8 +4041,13 @@ static void gen_stqcx_(DisasContext *ctx) /* sync */ static void gen_sync(DisasContext *ctx) { + TCGBar bar = TCG_MO_ALL; uint32_t l = (ctx->opcode >> 21) & 3; + if ((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) { + bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST; + } + /* * We may need to check for a pending TLB flush. * @@ -4054,7 +4059,8 @@ static void gen_sync(DisasContext *ctx) if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { gen_check_tlb_flush(ctx, true); } - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + + tcg_gen_mb(bar | TCG_BAR_SC); } /* wait */ From a702c5339eda791b969ed531ce99456df7ca8451 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Tue, 24 May 2022 11:05:30 -0300 Subject: [PATCH 27/34] target/ppc: Implement xxm[tf]acc and xxsetaccz Implement the following PowerISA v3.1 instructions: xxmfacc: VSX Move From Accumulator xxmtacc: VSX Move To Accumulator xxsetaccz: VSX Set Accumulator to Zero The PowerISA 3.1 mentions that for the current version of the architecture, "the hardware implementation provides the effect of ACC[i] and VSRs 4*i to 4*i + 3 logically containing the same data" and "The Accumulators introduce no new logical state at this time" (page 501). For now it seems unnecessary to create new structures, so this patch just uses ACC[i] as VSRs 4*i to 4*i+3 and therefore move to and from accumulators are no-ops. Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220524140537.27451-2-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/cpu.h | 5 +++++ target/ppc/insn32.decode | 9 +++++++++ target/ppc/translate/vsx-impl.c.inc | 31 +++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index bf8f8aad2c..c865206827 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -2663,6 +2663,11 @@ static inline int vsr_full_offset(int i) return offsetof(CPUPPCState, vsr[i].u64[0]); } +static inline int acc_full_offset(int i) +{ + return vsr_full_offset(i * 4); +} + static inline int fpr_offset(int i) { return vsr64_offset(i, true); diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index f001c02a8c..c0f545ca38 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -154,6 +154,9 @@ &X_vrt_frbp vrt frbp @X_vrt_frbp ...... vrt:5 ..... ....0 .......... . &X_vrt_frbp frbp=%x_frbp +&X_a ra +@X_a ...... ra:3 .. ..... ..... .......... . &X_a + %xx_xt 0:1 21:5 %xx_xb 1:1 11:5 %xx_xa 2:1 16:5 @@ -734,3 +737,9 @@ XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb &XL_s s:uint8_t @XL_s ......-------------- s:1 .......... - &XL_s RFEBB 010011-------------- . 0010010010 - @XL_s + +## Accumulator Instructions + +XXMFACC 011111 ... -- 00000 ----- 0010110001 - @X_a +XXMTACC 011111 ... -- 00001 ----- 0010110001 - @X_a +XXSETACCZ 011111 ... -- 00011 ----- 0010110001 - @X_a diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 900c1a1ab2..235be360e2 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2816,6 +2816,37 @@ static bool trans_XVCVBF16SPN(DisasContext *ctx, arg_XX2 *a) return true; } + /* + * The PowerISA 3.1 mentions that for the current version of the + * architecture, "the hardware implementation provides the effect of + * ACC[i] and VSRs 4*i to 4*i + 3 logically containing the same data" + * and "The Accumulators introduce no new logical state at this time" + * (page 501). For now it seems unnecessary to create new structures, + * so ACC[i] is the same as VSRs 4*i to 4*i+3 and therefore + * move to and from accumulators are no-ops. + */ +static bool trans_XXMFACC(DisasContext *ctx, arg_X_a *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + return true; +} + +static bool trans_XXMTACC(DisasContext *ctx, arg_X_a *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + return true; +} + +static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + tcg_gen_gvec_dup_imm(MO_64, acc_full_offset(a->ra), 64, 64, 0); + return true; +} + #undef GEN_XX2FORM #undef GEN_XX3FORM #undef GEN_XX2IFORM From 345531533f26df49e74f16dafc88408408173ece Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Tue, 24 May 2022 11:05:31 -0300 Subject: [PATCH 28/34] target/ppc: Implemented xvi*ger* instructions Implement the following PowerISA v3.1 instructions: xvi4ger8: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) xvi4ger8pp: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate xvi8ger4: VSX Vector 4-bit Signed Integer GER (rank-8 update) xvi8ger4pp: VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate xvi8ger4spp: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate xvi16ger2: VSX Vector 16-bit Signed Integer GER (rank-2 update) xvi16ger2pp: VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate xvi16ger2s: VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation xvi16ger2spp: VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220524140537.27451-3-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/cpu.h | 1 + target/ppc/helper.h | 13 +++ target/ppc/insn32.decode | 18 ++++ target/ppc/int_helper.c | 130 ++++++++++++++++++++++++++++ target/ppc/internal.h | 15 ++++ target/ppc/translate/vsx-impl.c.inc | 41 +++++++++ 6 files changed, 218 insertions(+) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index c865206827..dff3ca8222 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -238,6 +238,7 @@ typedef union _ppc_vsr_t { typedef ppc_vsr_t ppc_avr_t; typedef ppc_vsr_t ppc_fprp_t; +typedef ppc_vsr_t ppc_acc_t; #if !defined(CONFIG_USER_ONLY) /* Software TLB cache */ diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 5e43920b9e..1666797edf 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -133,6 +133,10 @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) #define dh_ctype_vsr ppc_vsr_t * #define dh_typecode_vsr dh_typecode_ptr +#define dh_alias_acc ptr +#define dh_ctype_acc ppc_acc_t * +#define dh_typecode_acc dh_typecode_ptr + DEF_HELPER_FLAGS_3(vavgub, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vavguh, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vavguw, TCG_CALL_NO_RWG, void, avr, avr, avr) @@ -537,6 +541,15 @@ DEF_HELPER_FLAGS_5(XXBLENDVB, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_FLAGS_5(XXBLENDVH, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_FLAGS_5(XXBLENDVW, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_FLAGS_5(XXBLENDVD, TCG_CALL_NO_RWG, void, vsr, vsr, vsr, vsr, i32) +DEF_HELPER_5(XVI4GER8, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVI4GER8PP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVI8GER4, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVI8GER4PP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVI8GER4SPP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVI16GER2, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVI16GER2S, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVI16GER2PP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVI16GER2SPP, void, env, vsr, vsr, acc, i32) DEF_HELPER_2(efscfsi, i32, env, i32) DEF_HELPER_2(efscfui, i32, env, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index c0f545ca38..0e189fe2da 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -175,6 +175,12 @@ &XX3 xt xa xb @XX3 ...... ..... ..... ..... ........ ... &XX3 xt=%xx_xt xa=%xx_xa xb=%xx_xb +# 32 bit GER instructions have all mask bits considered 1 +&MMIRR_XX3 xa xb xt pmsk xmsk ymsk +%xx_at 23:3 +@XX3_at ...... ... .. ..... ..... ........ ... &MMIRR_XX3 xt=%xx_at xb=%xx_xb \ + pmsk=255 xmsk=15 ymsk=15 + &XX3_dm xt xa xb dm @XX3_dm ...... ..... ..... ..... . dm:2 ..... ... &XX3_dm xt=%xx_xt xa=%xx_xa xb=%xx_xb @@ -743,3 +749,15 @@ RFEBB 010011-------------- . 0010010010 - @XL_s XXMFACC 011111 ... -- 00000 ----- 0010110001 - @X_a XXMTACC 011111 ... -- 00001 ----- 0010110001 - @X_a XXSETACCZ 011111 ... -- 00011 ----- 0010110001 - @X_a + +## VSX GER instruction + +XVI4GER8 111011 ... -- ..... ..... 00100011 ..- @XX3_at xa=%xx_xa +XVI4GER8PP 111011 ... -- ..... ..... 00100010 ..- @XX3_at xa=%xx_xa +XVI8GER4 111011 ... -- ..... ..... 00000011 ..- @XX3_at xa=%xx_xa +XVI8GER4PP 111011 ... -- ..... ..... 00000010 ..- @XX3_at xa=%xx_xa +XVI16GER2 111011 ... -- ..... ..... 01001011 ..- @XX3_at xa=%xx_xa +XVI16GER2PP 111011 ... -- ..... ..... 01101011 ..- @XX3_at xa=%xx_xa +XVI8GER4SPP 111011 ... -- ..... ..... 01100011 ..- @XX3_at xa=%xx_xa +XVI16GER2S 111011 ... -- ..... ..... 00101011 ..- @XX3_at xa=%xx_xa +XVI16GER2SPP 111011 ... -- ..... ..... 00101010 ..- @XX3_at xa=%xx_xa diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index b9dd15d607..105b626d1b 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -782,6 +782,136 @@ VCT(uxs, cvtsduw, u32) VCT(sxs, cvtsdsw, s32) #undef VCT +typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); + +static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) +{ + int64_t psum = 0; + for (int i = 0; i < 8; i++, mask >>= 1) { + if (mask & 1) { + psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); + } + } + return psum; +} + +static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) +{ + int64_t psum = 0; + for (int i = 0; i < 4; i++, mask >>= 1) { + if (mask & 1) { + psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); + } + } + return psum; +} + +static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) +{ + int64_t psum = 0; + for (int i = 0; i < 2; i++, mask >>= 1) { + if (mask & 1) { + psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16); + } + } + return psum; +} + +static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, + uint32_t mask, bool sat, bool acc, do_ger ger) +{ + uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), + xmsk = FIELD_EX32(mask, GER_MSK, XMSK), + ymsk = FIELD_EX32(mask, GER_MSK, YMSK); + uint8_t xmsk_bit, ymsk_bit; + int64_t psum; + int i, j; + for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { + for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { + if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { + psum = ger(a->VsrW(i), b->VsrW(j), pmsk); + if (acc) { + psum += at[i].VsrSW(j); + } + if (sat && psum > INT32_MAX) { + set_vscr_sat(env); + at[i].VsrSW(j) = INT32_MAX; + } else if (sat && psum < INT32_MIN) { + set_vscr_sat(env); + at[i].VsrSW(j) = INT32_MIN; + } else { + at[i].VsrSW(j) = (int32_t) psum; + } + } else { + at[i].VsrSW(j) = 0; + } + } + } +} + +QEMU_FLATTEN +void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, false, false, ger_rank8); +} + +QEMU_FLATTEN +void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, false, true, ger_rank8); +} + +QEMU_FLATTEN +void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, false, false, ger_rank4); +} + +QEMU_FLATTEN +void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, false, true, ger_rank4); +} + +QEMU_FLATTEN +void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, true, true, ger_rank4); +} + +QEMU_FLATTEN +void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, false, false, ger_rank2); +} + +QEMU_FLATTEN +void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, true, false, ger_rank2); +} + +QEMU_FLATTEN +void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, false, true, ger_rank2); +} + +QEMU_FLATTEN +void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + xviger(env, a, b, at, mask, true, true, ger_rank2); +} + target_ulong helper_vclzlsbb(ppc_avr_t *r) { target_ulong count = 0; diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 8094e0b033..2add128cd1 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -18,6 +18,8 @@ #ifndef PPC_INTERNAL_H #define PPC_INTERNAL_H +#include "hw/registerfields.h" + #define FUNC_MASK(name, ret_type, size, max_val) \ static inline ret_type name(uint##size##_t start, \ uint##size##_t end) \ @@ -291,4 +293,17 @@ G_NORETURN void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr, uintptr_t retaddr); #endif +FIELD(GER_MSK, XMSK, 0, 4) +FIELD(GER_MSK, YMSK, 4, 4) +FIELD(GER_MSK, PMSK, 8, 8) + +static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk) +{ + int msk = 0; + msk = FIELD_DP32(msk, GER_MSK, XMSK, xmsk); + msk = FIELD_DP32(msk, GER_MSK, YMSK, ymsk); + msk = FIELD_DP32(msk, GER_MSK, PMSK, pmsk); + return msk; +} + #endif /* PPC_INTERNAL_H */ diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 235be360e2..6026b203e0 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -17,6 +17,13 @@ static inline TCGv_ptr gen_vsr_ptr(int reg) return r; } +static inline TCGv_ptr gen_acc_ptr(int reg) +{ + TCGv_ptr r = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(r, cpu_env, acc_full_offset(reg)); + return r; +} + #define VSX_LOAD_SCALAR(name, operation) \ static void gen_##name(DisasContext *ctx) \ { \ @@ -2847,6 +2854,40 @@ static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a *a) return true; } +static bool do_ger(DisasContext *ctx, arg_MMIRR_XX3 *a, + void (*helper)(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32)) +{ + uint32_t mask; + TCGv_ptr xt, xa, xb; + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + if (unlikely((a->xa / 4 == a->xt) || (a->xb / 4 == a->xt))) { + gen_invalid(ctx); + return true; + } + + xt = gen_acc_ptr(a->xt); + xa = gen_vsr_ptr(a->xa); + xb = gen_vsr_ptr(a->xb); + + mask = ger_pack_masks(a->pmsk, a->ymsk, a->xmsk); + helper(cpu_env, xa, xb, xt, tcg_constant_i32(mask)); + tcg_temp_free_ptr(xt); + tcg_temp_free_ptr(xa); + tcg_temp_free_ptr(xb); + return true; +} + +TRANS(XVI4GER8, do_ger, gen_helper_XVI4GER8) +TRANS(XVI4GER8PP, do_ger, gen_helper_XVI4GER8PP) +TRANS(XVI8GER4, do_ger, gen_helper_XVI8GER4) +TRANS(XVI8GER4PP, do_ger, gen_helper_XVI8GER4PP) +TRANS(XVI8GER4SPP, do_ger, gen_helper_XVI8GER4SPP) +TRANS(XVI16GER2, do_ger, gen_helper_XVI16GER2) +TRANS(XVI16GER2PP, do_ger, gen_helper_XVI16GER2PP) +TRANS(XVI16GER2S, do_ger, gen_helper_XVI16GER2S) +TRANS(XVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP) + #undef GEN_XX2FORM #undef GEN_XX3FORM #undef GEN_XX2IFORM From 6d525ca972085fb82fe2dec04fdf257318f9e5a5 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Tue, 24 May 2022 11:05:32 -0300 Subject: [PATCH 29/34] target/ppc: Implemented pmxvi*ger* instructions Implement the following PowerISA v3.1 instructions: pmxvi4ger8: Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) pmxvi4ger8pp: Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate pmxvi8ger4: Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) pmxvi8ger4pp: Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate pmxvi8ger4spp: Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate pmxvi16ger2: Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) pmxvi16ger2pp: Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate pmxvi16ger2s: Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation pmxvi16ger2spp: Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220524140537.27451-4-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/insn64.decode | 30 +++++++++++++++++++++++++++++ target/ppc/translate/vsx-impl.c.inc | 10 ++++++++++ 2 files changed, 40 insertions(+) diff --git a/target/ppc/insn64.decode b/target/ppc/insn64.decode index 691e8fe6c0..0eed35c8cd 100644 --- a/target/ppc/insn64.decode +++ b/target/ppc/insn64.decode @@ -68,6 +68,15 @@ ...... ..... ..... ..... ..... .. .... \ &8RR_XX4_uim3 xt=%8rr_xx_xt xa=%8rr_xx_xa xb=%8rr_xx_xb xc=%8rr_xx_xc +# Format MMIRR:XX3 +&MMIRR_XX3 !extern xa xb xt pmsk xmsk ymsk +%xx3_xa 2:1 16:5 +%xx3_xb 1:1 11:5 +%xx3_at 23:3 +@MMIRR_XX3 ...... .. .... .. . . ........ xmsk:4 ymsk:4 \ + ...... ... .. ..... ..... ........ ... \ + &MMIRR_XX3 xa=%xx3_xa xb=%xx3_xb xt=%xx3_at + ### Fixed-Point Load Instructions PLBZ 000001 10 0--.-- .................. \ @@ -115,6 +124,27 @@ PSTFS 000001 10 0--.-- .................. \ PSTFD 000001 10 0--.-- .................. \ 110110 ..... ..... ................ @PLS_D +## VSX GER instruction + +PMXVI4GER8 000001 11 1001 -- - - pmsk:8 ........ \ + 111011 ... -- ..... ..... 00100011 ..- @MMIRR_XX3 +PMXVI4GER8PP 000001 11 1001 -- - - pmsk:8 ........ \ + 111011 ... -- ..... ..... 00100010 ..- @MMIRR_XX3 +PMXVI8GER4 000001 11 1001 -- - - pmsk:4 ---- ........ \ + 111011 ... -- ..... ..... 00000011 ..- @MMIRR_XX3 +PMXVI8GER4PP 000001 11 1001 -- - - pmsk:4 ---- ........ \ + 111011 ... -- ..... ..... 00000010 ..- @MMIRR_XX3 +PMXVI16GER2 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 01001011 ..- @MMIRR_XX3 +PMXVI16GER2PP 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 01101011 ..- @MMIRR_XX3 +PMXVI8GER4SPP 000001 11 1001 -- - - pmsk:4 ---- ........ \ + 111011 ... -- ..... ..... 01100011 ..- @MMIRR_XX3 +PMXVI16GER2S 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 00101011 ..- @MMIRR_XX3 +PMXVI16GER2SPP 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 00101010 ..- @MMIRR_XX3 + ### Prefixed No-operation Instruction @PNOP 000001 11 0000-- 000000000000000000 \ diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 6026b203e0..b10eded1da 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2888,6 +2888,16 @@ TRANS(XVI16GER2PP, do_ger, gen_helper_XVI16GER2PP) TRANS(XVI16GER2S, do_ger, gen_helper_XVI16GER2S) TRANS(XVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP) +TRANS64(PMXVI4GER8, do_ger, gen_helper_XVI4GER8) +TRANS64(PMXVI4GER8PP, do_ger, gen_helper_XVI4GER8PP) +TRANS64(PMXVI8GER4, do_ger, gen_helper_XVI8GER4) +TRANS64(PMXVI8GER4PP, do_ger, gen_helper_XVI8GER4PP) +TRANS64(PMXVI8GER4SPP, do_ger, gen_helper_XVI8GER4SPP) +TRANS64(PMXVI16GER2, do_ger, gen_helper_XVI16GER2) +TRANS64(PMXVI16GER2PP, do_ger, gen_helper_XVI16GER2PP) +TRANS64(PMXVI16GER2S, do_ger, gen_helper_XVI16GER2S) +TRANS64(PMXVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP) + #undef GEN_XX2FORM #undef GEN_XX3FORM #undef GEN_XX2IFORM From c29018cc7395fdb7efd5e21c6f6c4d9cfbcf9a3c Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Tue, 24 May 2022 11:05:33 -0300 Subject: [PATCH 30/34] target/ppc: Implemented xvf*ger* Implement the following PowerISA v3.1 instructions: xvf32ger: VSX Vector 32-bit Floating-Point GER (rank-1 update) xvf32gernn: VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate xvf32gernp: VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate xvf32gerpn: VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate xvf32gerpp: VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate xvf64ger: VSX Vector 64-bit Floating-Point GER (rank-1 update) xvf64gernn: VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate xvf64gernp: VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate xvf64gerpn: VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate xvf64gerpp: VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220524140537.27451-5-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/cpu.h | 4 + target/ppc/fpu_helper.c | 194 +++++++++++++++++++++++++++- target/ppc/helper.h | 10 ++ target/ppc/insn32.decode | 13 ++ target/ppc/translate/vsx-impl.c.inc | 12 ++ 5 files changed, 231 insertions(+), 2 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index dff3ca8222..40c779f246 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -2643,6 +2643,8 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx) #define VsrSW(i) s32[i] #define VsrD(i) u64[i] #define VsrSD(i) s64[i] +#define VsrSF(i) f32[i] +#define VsrDF(i) f64[i] #else #define VsrB(i) u8[15 - (i)] #define VsrSB(i) s8[15 - (i)] @@ -2652,6 +2654,8 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx) #define VsrSW(i) s32[3 - (i)] #define VsrD(i) u64[1 - (i)] #define VsrSD(i) s64[1 - (i)] +#define VsrSF(i) f32[3 - (i)] +#define VsrDF(i) f64[1 - (i)] #endif static inline int vsr64_offset(int i, bool high) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 9489e06504..712c71162c 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -414,7 +414,7 @@ void helper_store_fpscr(CPUPPCState *env, uint64_t val, uint32_t nibbles) ppc_store_fpscr(env, val); } -void helper_fpscr_check_status(CPUPPCState *env) +static void do_fpscr_check_status(CPUPPCState *env, uintptr_t raddr) { CPUState *cs = env_cpu(env); target_ulong fpscr = env->fpscr; @@ -455,13 +455,19 @@ void helper_fpscr_check_status(CPUPPCState *env) } cs->exception_index = POWERPC_EXCP_PROGRAM; env->error_code = error | POWERPC_EXCP_FP; + env->fpscr |= error ? FP_FEX : 0; /* Deferred floating-point exception after target FPSCR update */ if (fp_exceptions_enabled(env)) { raise_exception_err_ra(env, cs->exception_index, - env->error_code, GETPC()); + env->error_code, raddr); } } +void helper_fpscr_check_status(CPUPPCState *env) +{ + do_fpscr_check_status(env, GETPC()); +} + static void do_float_check_status(CPUPPCState *env, bool change_fi, uintptr_t raddr) { @@ -3468,3 +3474,187 @@ void helper_xssubqp(CPUPPCState *env, uint32_t opcode, *xt = t; do_float_check_status(env, true, GETPC()); } + +static inline void vsxger_excp(CPUPPCState *env, uintptr_t retaddr) +{ + /* + * XV*GER instructions execute and set the FPSCR as if exceptions + * are disabled and only at the end throw an exception + */ + target_ulong enable; + enable = env->fpscr & (FP_ENABLES | FP_FI | FP_FR); + env->fpscr &= ~(FP_ENABLES | FP_FI | FP_FR); + int status = get_float_exception_flags(&env->fp_status); + if (unlikely(status & float_flag_invalid)) { + if (status & float_flag_invalid_snan) { + float_invalid_op_vxsnan(env, 0); + } + if (status & float_flag_invalid_imz) { + float_invalid_op_vximz(env, false, 0); + } + if (status & float_flag_invalid_isi) { + float_invalid_op_vxisi(env, false, 0); + } + } + do_float_check_status(env, false, retaddr); + env->fpscr |= enable; + do_fpscr_check_status(env, retaddr); +} + +typedef void vsxger_zero(ppc_vsr_t *at, int, int); + +typedef void vsxger_muladd_f(ppc_vsr_t *, ppc_vsr_t *, ppc_vsr_t *, int, int, + int flags, float_status *s); + +static void vsxger_muladd32(ppc_vsr_t *at, ppc_vsr_t *a, ppc_vsr_t *b, int i, + int j, int flags, float_status *s) +{ + at[i].VsrSF(j) = float32_muladd(a->VsrSF(i), b->VsrSF(j), + at[i].VsrSF(j), flags, s); +} + +static void vsxger_mul32(ppc_vsr_t *at, ppc_vsr_t *a, ppc_vsr_t *b, int i, + int j, int flags, float_status *s) +{ + at[i].VsrSF(j) = float32_mul(a->VsrSF(i), b->VsrSF(j), s); +} + +static void vsxger_zero32(ppc_vsr_t *at, int i, int j) +{ + at[i].VsrSF(j) = float32_zero; +} + +static void vsxger_muladd64(ppc_vsr_t *at, ppc_vsr_t *a, ppc_vsr_t *b, int i, + int j, int flags, float_status *s) +{ + if (j >= 2) { + j -= 2; + at[i].VsrDF(j) = float64_muladd(a[i / 2].VsrDF(i % 2), b->VsrDF(j), + at[i].VsrDF(j), flags, s); + } +} + +static void vsxger_mul64(ppc_vsr_t *at, ppc_vsr_t *a, ppc_vsr_t *b, int i, + int j, int flags, float_status *s) +{ + if (j >= 2) { + j -= 2; + at[i].VsrDF(j) = float64_mul(a[i / 2].VsrDF(i % 2), b->VsrDF(j), s); + } +} + +static void vsxger_zero64(ppc_vsr_t *at, int i, int j) +{ + if (j >= 2) { + j -= 2; + at[i].VsrDF(j) = float64_zero; + } +} + +static void vsxger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask, bool acc, bool neg_mul, + bool neg_acc, vsxger_muladd_f mul, vsxger_muladd_f muladd, + vsxger_zero zero) +{ + int i, j, xmsk_bit, ymsk_bit, op_flags; + uint8_t xmsk = mask & 0x0F; + uint8_t ymsk = (mask >> 4) & 0x0F; + float_status *excp_ptr = &env->fp_status; + op_flags = (neg_acc ^ neg_mul) ? float_muladd_negate_c : 0; + op_flags |= (neg_mul) ? float_muladd_negate_result : 0; + helper_reset_fpstatus(env); + for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { + for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { + if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { + if (acc) { + muladd(at, a, b, i, j, op_flags, excp_ptr); + } else { + mul(at, a, b, i, j, op_flags, excp_ptr); + } + } else { + zero(at, i, j); + } + } + } + vsxger_excp(env, GETPC()); +} + +QEMU_FLATTEN +void helper_XVF32GER(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, false, false, false, vsxger_mul32, + vsxger_muladd32, vsxger_zero32); +} + +QEMU_FLATTEN +void helper_XVF32GERPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, true, false, false, vsxger_mul32, + vsxger_muladd32, vsxger_zero32); +} + +QEMU_FLATTEN +void helper_XVF32GERPN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, true, false, true, vsxger_mul32, + vsxger_muladd32, vsxger_zero32); +} + +QEMU_FLATTEN +void helper_XVF32GERNP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, true, true, false, vsxger_mul32, + vsxger_muladd32, vsxger_zero32); +} + +QEMU_FLATTEN +void helper_XVF32GERNN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, true, true, true, vsxger_mul32, + vsxger_muladd32, vsxger_zero32); +} + +QEMU_FLATTEN +void helper_XVF64GER(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, false, false, false, vsxger_mul64, + vsxger_muladd64, vsxger_zero64); +} + +QEMU_FLATTEN +void helper_XVF64GERPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, true, false, false, vsxger_mul64, + vsxger_muladd64, vsxger_zero64); +} + +QEMU_FLATTEN +void helper_XVF64GERPN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, true, false, true, vsxger_mul64, + vsxger_muladd64, vsxger_zero64); +} + +QEMU_FLATTEN +void helper_XVF64GERNP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, true, true, false, vsxger_mul64, + vsxger_muladd64, vsxger_zero64); +} + +QEMU_FLATTEN +void helper_XVF64GERNN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger(env, a, b, at, mask, true, true, true, vsxger_mul64, + vsxger_muladd64, vsxger_zero64); +} diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 1666797edf..f38cdbe1d8 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -550,6 +550,16 @@ DEF_HELPER_5(XVI16GER2, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVI16GER2S, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVI16GER2PP, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVI16GER2SPP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF32GER, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF32GERPP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF32GERPN, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF32GERNP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF32GERNN, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF64GER, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF64GERPP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF64GERPN, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF64GERNP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF64GERNN, void, env, vsr, vsr, acc, i32) DEF_HELPER_2(efscfsi, i32, env, i32) DEF_HELPER_2(efscfui, i32, env, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 0e189fe2da..6b644155ec 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -178,6 +178,7 @@ # 32 bit GER instructions have all mask bits considered 1 &MMIRR_XX3 xa xb xt pmsk xmsk ymsk %xx_at 23:3 +%xx_xa_pair 2:1 17:4 !function=times_2 @XX3_at ...... ... .. ..... ..... ........ ... &MMIRR_XX3 xt=%xx_at xb=%xx_xb \ pmsk=255 xmsk=15 ymsk=15 @@ -761,3 +762,15 @@ XVI16GER2PP 111011 ... -- ..... ..... 01101011 ..- @XX3_at xa=%xx_xa XVI8GER4SPP 111011 ... -- ..... ..... 01100011 ..- @XX3_at xa=%xx_xa XVI16GER2S 111011 ... -- ..... ..... 00101011 ..- @XX3_at xa=%xx_xa XVI16GER2SPP 111011 ... -- ..... ..... 00101010 ..- @XX3_at xa=%xx_xa + +XVF32GER 111011 ... -- ..... ..... 00011011 ..- @XX3_at xa=%xx_xa +XVF32GERPP 111011 ... -- ..... ..... 00011010 ..- @XX3_at xa=%xx_xa +XVF32GERPN 111011 ... -- ..... ..... 10011010 ..- @XX3_at xa=%xx_xa +XVF32GERNP 111011 ... -- ..... ..... 01011010 ..- @XX3_at xa=%xx_xa +XVF32GERNN 111011 ... -- ..... ..... 11011010 ..- @XX3_at xa=%xx_xa + +XVF64GER 111011 ... -- .... 0 ..... 00111011 ..- @XX3_at xa=%xx_xa_pair +XVF64GERPP 111011 ... -- .... 0 ..... 00111010 ..- @XX3_at xa=%xx_xa_pair +XVF64GERPN 111011 ... -- .... 0 ..... 10111010 ..- @XX3_at xa=%xx_xa_pair +XVF64GERNP 111011 ... -- .... 0 ..... 01111010 ..- @XX3_at xa=%xx_xa_pair +XVF64GERNN 111011 ... -- .... 0 ..... 11111010 ..- @XX3_at xa=%xx_xa_pair diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index b10eded1da..cc754ab175 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2898,6 +2898,18 @@ TRANS64(PMXVI16GER2PP, do_ger, gen_helper_XVI16GER2PP) TRANS64(PMXVI16GER2S, do_ger, gen_helper_XVI16GER2S) TRANS64(PMXVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP) +TRANS(XVF32GER, do_ger, gen_helper_XVF32GER) +TRANS(XVF32GERPP, do_ger, gen_helper_XVF32GERPP) +TRANS(XVF32GERPN, do_ger, gen_helper_XVF32GERPN) +TRANS(XVF32GERNP, do_ger, gen_helper_XVF32GERNP) +TRANS(XVF32GERNN, do_ger, gen_helper_XVF32GERNN) + +TRANS(XVF64GER, do_ger, gen_helper_XVF64GER) +TRANS(XVF64GERPP, do_ger, gen_helper_XVF64GERPP) +TRANS(XVF64GERPN, do_ger, gen_helper_XVF64GERPN) +TRANS(XVF64GERNP, do_ger, gen_helper_XVF64GERNP) +TRANS(XVF64GERNN, do_ger, gen_helper_XVF64GERNN) + #undef GEN_XX2FORM #undef GEN_XX3FORM #undef GEN_XX2IFORM From 2d9cba74ef72232189ac5611968c1b9045a3e1a7 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Tue, 24 May 2022 11:05:34 -0300 Subject: [PATCH 31/34] target/ppc: Implemented xvf16ger* Implement the following PowerISA v3.1 instructions: xvf16ger2: VSX Vector 16-bit Floating-Point GER (rank-2 update) xvf16ger2nn: VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate xvf16ger2np: VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate xvf16ger2pn: VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate xvf16ger2pp: VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220524140537.27451-6-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/cpu.h | 3 + target/ppc/fpu_helper.c | 95 +++++++++++++++++++++++++++++ target/ppc/helper.h | 5 ++ target/ppc/insn32.decode | 6 ++ target/ppc/translate/vsx-impl.c.inc | 6 ++ 5 files changed, 115 insertions(+) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 40c779f246..6d78078f37 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -227,6 +227,7 @@ typedef union _ppc_vsr_t { int16_t s16[8]; int32_t s32[4]; int64_t s64[2]; + float16 f16[8]; float32 f32[4]; float64 f64[2]; float128 f128; @@ -2643,6 +2644,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx) #define VsrSW(i) s32[i] #define VsrD(i) u64[i] #define VsrSD(i) s64[i] +#define VsrHF(i) f16[i] #define VsrSF(i) f32[i] #define VsrDF(i) f64[i] #else @@ -2654,6 +2656,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx) #define VsrSW(i) s32[3 - (i)] #define VsrD(i) u64[1 - (i)] #define VsrSD(i) s64[1 - (i)] +#define VsrHF(i) f16[7 - (i)] #define VsrSF(i) f32[3 - (i)] #define VsrDF(i) f64[1 - (i)] #endif diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 712c71162c..a9b2ef370f 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -36,6 +36,15 @@ static inline float128 float128_snan_to_qnan(float128 x) #define float32_snan_to_qnan(x) ((x) | 0x00400000) #define float16_snan_to_qnan(x) ((x) | 0x0200) +static inline float32 bfp32_neg(float32 a) +{ + if (unlikely(float32_is_any_nan(a))) { + return a; + } else { + return float32_chs(a); + } +} + static inline bool fp_exceptions_enabled(CPUPPCState *env) { #ifdef CONFIG_USER_ONLY @@ -3501,6 +3510,57 @@ static inline void vsxger_excp(CPUPPCState *env, uintptr_t retaddr) do_fpscr_check_status(env, retaddr); } +typedef float64 extract_f16(float16, float_status *); + +static float64 extract_hf16(float16 in, float_status *fp_status) +{ + return float16_to_float64(in, true, fp_status); +} + +static void vsxger16(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask, bool acc, + bool neg_mul, bool neg_acc, extract_f16 extract) +{ + float32 r, aux_acc; + float64 psum, va, vb, vc, vd; + int i, j, xmsk_bit, ymsk_bit; + uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), + xmsk = FIELD_EX32(mask, GER_MSK, XMSK), + ymsk = FIELD_EX32(mask, GER_MSK, YMSK); + float_status *excp_ptr = &env->fp_status; + for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { + for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { + if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { + va = !(pmsk & 2) ? float64_zero : + extract(a->VsrHF(2 * i), excp_ptr); + vb = !(pmsk & 2) ? float64_zero : + extract(b->VsrHF(2 * j), excp_ptr); + vc = !(pmsk & 1) ? float64_zero : + extract(a->VsrHF(2 * i + 1), excp_ptr); + vd = !(pmsk & 1) ? float64_zero : + extract(b->VsrHF(2 * j + 1), excp_ptr); + psum = float64_mul(va, vb, excp_ptr); + psum = float64r32_muladd(vc, vd, psum, 0, excp_ptr); + r = float64_to_float32(psum, excp_ptr); + if (acc) { + aux_acc = at[i].VsrSF(j); + if (neg_mul) { + r = bfp32_neg(r); + } + if (neg_acc) { + aux_acc = bfp32_neg(aux_acc); + } + r = float32_add(r, aux_acc, excp_ptr); + } + at[i].VsrSF(j) = r; + } else { + at[i].VsrSF(j) = float32_zero; + } + } + } + vsxger_excp(env, GETPC()); +} + typedef void vsxger_zero(ppc_vsr_t *at, int, int); typedef void vsxger_muladd_f(ppc_vsr_t *, ppc_vsr_t *, ppc_vsr_t *, int, int, @@ -3579,6 +3639,41 @@ static void vsxger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, vsxger_excp(env, GETPC()); } +QEMU_FLATTEN +void helper_XVF16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, false, false, false, extract_hf16); +} + +QEMU_FLATTEN +void helper_XVF16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, true, false, false, extract_hf16); +} + +QEMU_FLATTEN +void helper_XVF16GER2PN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, true, false, true, extract_hf16); +} + +QEMU_FLATTEN +void helper_XVF16GER2NP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, true, true, false, extract_hf16); +} + +QEMU_FLATTEN +void helper_XVF16GER2NN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, true, true, true, extract_hf16); +} + QEMU_FLATTEN void helper_XVF32GER(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, uint32_t mask) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index f38cdbe1d8..4070c0891c 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -550,6 +550,11 @@ DEF_HELPER_5(XVI16GER2, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVI16GER2S, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVI16GER2PP, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVI16GER2SPP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF16GER2, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF16GER2PP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF16GER2PN, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF16GER2NP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVF16GER2NN, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF32GER, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF32GERPP, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF32GERPN, void, env, vsr, vsr, acc, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 6b644155ec..b8e317159c 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -763,6 +763,12 @@ XVI8GER4SPP 111011 ... -- ..... ..... 01100011 ..- @XX3_at xa=%xx_xa XVI16GER2S 111011 ... -- ..... ..... 00101011 ..- @XX3_at xa=%xx_xa XVI16GER2SPP 111011 ... -- ..... ..... 00101010 ..- @XX3_at xa=%xx_xa +XVF16GER2 111011 ... -- ..... ..... 00010011 ..- @XX3_at xa=%xx_xa +XVF16GER2PP 111011 ... -- ..... ..... 00010010 ..- @XX3_at xa=%xx_xa +XVF16GER2PN 111011 ... -- ..... ..... 10010010 ..- @XX3_at xa=%xx_xa +XVF16GER2NP 111011 ... -- ..... ..... 01010010 ..- @XX3_at xa=%xx_xa +XVF16GER2NN 111011 ... -- ..... ..... 11010010 ..- @XX3_at xa=%xx_xa + XVF32GER 111011 ... -- ..... ..... 00011011 ..- @XX3_at xa=%xx_xa XVF32GERPP 111011 ... -- ..... ..... 00011010 ..- @XX3_at xa=%xx_xa XVF32GERPN 111011 ... -- ..... ..... 10011010 ..- @XX3_at xa=%xx_xa diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index cc754ab175..01978a585a 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2898,6 +2898,12 @@ TRANS64(PMXVI16GER2PP, do_ger, gen_helper_XVI16GER2PP) TRANS64(PMXVI16GER2S, do_ger, gen_helper_XVI16GER2S) TRANS64(PMXVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP) +TRANS(XVF16GER2, do_ger, gen_helper_XVF16GER2) +TRANS(XVF16GER2PP, do_ger, gen_helper_XVF16GER2PP) +TRANS(XVF16GER2PN, do_ger, gen_helper_XVF16GER2PN) +TRANS(XVF16GER2NP, do_ger, gen_helper_XVF16GER2NP) +TRANS(XVF16GER2NN, do_ger, gen_helper_XVF16GER2NN) + TRANS(XVF32GER, do_ger, gen_helper_XVF32GER) TRANS(XVF32GERPP, do_ger, gen_helper_XVF32GERPP) TRANS(XVF32GERPN, do_ger, gen_helper_XVF32GERPN) From 6f642338dc929a513ba5ba8f07a23894ba235bd0 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Tue, 24 May 2022 11:05:35 -0300 Subject: [PATCH 32/34] target/ppc: Implemented pmxvf*ger* Implement the following PowerISA v3.1 instructions: pmxvf16ger2: Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) pmxvf16ger2nn: Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate pmxvf16ger2np: Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate pmxvf16ger2pn: Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate pmxvf16ger2pp: Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate pmxvf32ger: Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) pmxvf32gernn: Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate pmxvf32gernp: Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate pmxvf32gerpn: Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate pmxvf32gerpp: Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate pmxvf64ger: Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) pmxvf64gernn: Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate pmxvf64gernp: Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate pmxvf64gerpn: Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate pmxvf64gerpp: Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220524140537.27451-7-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/insn64.decode | 38 +++++++++++++++++++++++++++++ target/ppc/translate/vsx-impl.c.inc | 18 ++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/target/ppc/insn64.decode b/target/ppc/insn64.decode index 0eed35c8cd..5ecc5c85bf 100644 --- a/target/ppc/insn64.decode +++ b/target/ppc/insn64.decode @@ -73,10 +73,15 @@ %xx3_xa 2:1 16:5 %xx3_xb 1:1 11:5 %xx3_at 23:3 +%xx3_xa_pair 2:1 17:4 !function=times_2 @MMIRR_XX3 ...... .. .... .. . . ........ xmsk:4 ymsk:4 \ ...... ... .. ..... ..... ........ ... \ &MMIRR_XX3 xa=%xx3_xa xb=%xx3_xb xt=%xx3_at +@MMIRR_XX3_NO_P ...... .. .... .. . . ........ xmsk:4 .... \ + ...... ... .. ..... ..... ........ ... \ + &MMIRR_XX3 xb=%xx3_xb xt=%xx3_at pmsk=1 + ### Fixed-Point Load Instructions PLBZ 000001 10 0--.-- .................. \ @@ -145,6 +150,39 @@ PMXVI16GER2S 000001 11 1001 -- - - pmsk:2 ------ ........ \ PMXVI16GER2SPP 000001 11 1001 -- - - pmsk:2 ------ ........ \ 111011 ... -- ..... ..... 00101010 ..- @MMIRR_XX3 +PMXVF16GER2 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 00010011 ..- @MMIRR_XX3 +PMXVF16GER2PP 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 00010010 ..- @MMIRR_XX3 +PMXVF16GER2PN 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 10010010 ..- @MMIRR_XX3 +PMXVF16GER2NP 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 01010010 ..- @MMIRR_XX3 +PMXVF16GER2NN 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 11010010 ..- @MMIRR_XX3 + +PMXVF32GER 000001 11 1001 -- - - -------- .... ymsk:4 \ + 111011 ... -- ..... ..... 00011011 ..- @MMIRR_XX3_NO_P xa=%xx3_xa +PMXVF32GERPP 000001 11 1001 -- - - -------- .... ymsk:4 \ + 111011 ... -- ..... ..... 00011010 ..- @MMIRR_XX3_NO_P xa=%xx3_xa +PMXVF32GERPN 000001 11 1001 -- - - -------- .... ymsk:4 \ + 111011 ... -- ..... ..... 10011010 ..- @MMIRR_XX3_NO_P xa=%xx3_xa +PMXVF32GERNP 000001 11 1001 -- - - -------- .... ymsk:4 \ + 111011 ... -- ..... ..... 01011010 ..- @MMIRR_XX3_NO_P xa=%xx3_xa +PMXVF32GERNN 000001 11 1001 -- - - -------- .... ymsk:4 \ + 111011 ... -- ..... ..... 11011010 ..- @MMIRR_XX3_NO_P xa=%xx3_xa + +PMXVF64GER 000001 11 1001 -- - - -------- .... ymsk:2 -- \ + 111011 ... -- ....0 ..... 00111011 ..- @MMIRR_XX3_NO_P xa=%xx3_xa_pair +PMXVF64GERPP 000001 11 1001 -- - - -------- .... ymsk:2 -- \ + 111011 ... -- ....0 ..... 00111010 ..- @MMIRR_XX3_NO_P xa=%xx3_xa_pair +PMXVF64GERPN 000001 11 1001 -- - - -------- .... ymsk:2 -- \ + 111011 ... -- ....0 ..... 10111010 ..- @MMIRR_XX3_NO_P xa=%xx3_xa_pair +PMXVF64GERNP 000001 11 1001 -- - - -------- .... ymsk:2 -- \ + 111011 ... -- ....0 ..... 01111010 ..- @MMIRR_XX3_NO_P xa=%xx3_xa_pair +PMXVF64GERNN 000001 11 1001 -- - - -------- .... ymsk:2 -- \ + 111011 ... -- ....0 ..... 11111010 ..- @MMIRR_XX3_NO_P xa=%xx3_xa_pair + ### Prefixed No-operation Instruction @PNOP 000001 11 0000-- 000000000000000000 \ diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 01978a585a..e79752899a 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2916,6 +2916,24 @@ TRANS(XVF64GERPN, do_ger, gen_helper_XVF64GERPN) TRANS(XVF64GERNP, do_ger, gen_helper_XVF64GERNP) TRANS(XVF64GERNN, do_ger, gen_helper_XVF64GERNN) +TRANS64(PMXVF16GER2, do_ger, gen_helper_XVF16GER2) +TRANS64(PMXVF16GER2PP, do_ger, gen_helper_XVF16GER2PP) +TRANS64(PMXVF16GER2PN, do_ger, gen_helper_XVF16GER2PN) +TRANS64(PMXVF16GER2NP, do_ger, gen_helper_XVF16GER2NP) +TRANS64(PMXVF16GER2NN, do_ger, gen_helper_XVF16GER2NN) + +TRANS64(PMXVF32GER, do_ger, gen_helper_XVF32GER) +TRANS64(PMXVF32GERPP, do_ger, gen_helper_XVF32GERPP) +TRANS64(PMXVF32GERPN, do_ger, gen_helper_XVF32GERPN) +TRANS64(PMXVF32GERNP, do_ger, gen_helper_XVF32GERNP) +TRANS64(PMXVF32GERNN, do_ger, gen_helper_XVF32GERNN) + +TRANS64(PMXVF64GER, do_ger, gen_helper_XVF64GER) +TRANS64(PMXVF64GERPP, do_ger, gen_helper_XVF64GERPP) +TRANS64(PMXVF64GERPN, do_ger, gen_helper_XVF64GERPN) +TRANS64(PMXVF64GERNP, do_ger, gen_helper_XVF64GERNP) +TRANS64(PMXVF64GERNN, do_ger, gen_helper_XVF64GERNN) + #undef GEN_XX2FORM #undef GEN_XX3FORM #undef GEN_XX2IFORM From 5724e131ca6cd970555e66fb47b90b287ffa8ea0 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Tue, 24 May 2022 11:05:36 -0300 Subject: [PATCH 33/34] target/ppc: Implemented [pm]xvbf16ger2* Implement the following PowerISA v3.1 instructions: xvbf16ger2: VSX Vector bfloat16 GER (rank-2 update) xvbf16ger2nn: VSX Vector bfloat16 GER (rank-2 update) Negative multiply, Negative accumulate xvbf16ger2np: VSX Vector bfloat16 GER (rank-2 update) Negative multiply, Positive accumulate xvbf16ger2pn: VSX Vector bfloat16 GER (rank-2 update) Positive multiply, Negative accumulate xvbf16ger2pp: VSX Vector bfloat16 GER (rank-2 update) Positive multiply, Positive accumulate pmxvbf16ger2: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update) pmxvbf16ger2nn: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update) Negative multiply, Negative accumulate pmxvbf16ger2np: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update) Negative multiply, Positive accumulate pmxvbf16ger2pn: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update) Positive multiply, Negative accumulate pmxvbf16ger2pp: Prefixed Masked VSX Vector bfloat16 GER (rank-2 update) Positive multiply, Positive accumulate Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220524140537.27451-8-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/fpu_helper.c | 40 +++++++++++++++++++++++++++++ target/ppc/helper.h | 5 ++++ target/ppc/insn32.decode | 6 +++++ target/ppc/insn64.decode | 11 ++++++++ target/ppc/translate/vsx-impl.c.inc | 12 +++++++++ 5 files changed, 74 insertions(+) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index a9b2ef370f..fed0ce420a 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -3517,6 +3517,11 @@ static float64 extract_hf16(float16 in, float_status *fp_status) return float16_to_float64(in, true, fp_status); } +static float64 extract_bf16(bfloat16 in, float_status *fp_status) +{ + return bfloat16_to_float64(in, fp_status); +} + static void vsxger16(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, uint32_t mask, bool acc, bool neg_mul, bool neg_acc, extract_f16 extract) @@ -3639,6 +3644,41 @@ static void vsxger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, vsxger_excp(env, GETPC()); } +QEMU_FLATTEN +void helper_XVBF16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, false, false, false, extract_bf16); +} + +QEMU_FLATTEN +void helper_XVBF16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, true, false, false, extract_bf16); +} + +QEMU_FLATTEN +void helper_XVBF16GER2PN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, true, false, true, extract_bf16); +} + +QEMU_FLATTEN +void helper_XVBF16GER2NP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, true, true, false, extract_bf16); +} + +QEMU_FLATTEN +void helper_XVBF16GER2NN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, + ppc_acc_t *at, uint32_t mask) +{ + vsxger16(env, a, b, at, mask, true, true, true, extract_bf16); +} + QEMU_FLATTEN void helper_XVF16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, uint32_t mask) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 4070c0891c..6233e28d85 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -555,6 +555,11 @@ DEF_HELPER_5(XVF16GER2PP, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF16GER2PN, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF16GER2NP, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF16GER2NN, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVBF16GER2, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVBF16GER2PP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVBF16GER2PN, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVBF16GER2NP, void, env, vsr, vsr, acc, i32) +DEF_HELPER_5(XVBF16GER2NN, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF32GER, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF32GERPP, void, env, vsr, vsr, acc, i32) DEF_HELPER_5(XVF32GERPN, void, env, vsr, vsr, acc, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index b8e317159c..18a94fa3b5 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -763,6 +763,12 @@ XVI8GER4SPP 111011 ... -- ..... ..... 01100011 ..- @XX3_at xa=%xx_xa XVI16GER2S 111011 ... -- ..... ..... 00101011 ..- @XX3_at xa=%xx_xa XVI16GER2SPP 111011 ... -- ..... ..... 00101010 ..- @XX3_at xa=%xx_xa +XVBF16GER2 111011 ... -- ..... ..... 00110011 ..- @XX3_at xa=%xx_xa +XVBF16GER2PP 111011 ... -- ..... ..... 00110010 ..- @XX3_at xa=%xx_xa +XVBF16GER2PN 111011 ... -- ..... ..... 10110010 ..- @XX3_at xa=%xx_xa +XVBF16GER2NP 111011 ... -- ..... ..... 01110010 ..- @XX3_at xa=%xx_xa +XVBF16GER2NN 111011 ... -- ..... ..... 11110010 ..- @XX3_at xa=%xx_xa + XVF16GER2 111011 ... -- ..... ..... 00010011 ..- @XX3_at xa=%xx_xa XVF16GER2PP 111011 ... -- ..... ..... 00010010 ..- @XX3_at xa=%xx_xa XVF16GER2PN 111011 ... -- ..... ..... 10010010 ..- @XX3_at xa=%xx_xa diff --git a/target/ppc/insn64.decode b/target/ppc/insn64.decode index 5ecc5c85bf..de115c1943 100644 --- a/target/ppc/insn64.decode +++ b/target/ppc/insn64.decode @@ -150,6 +150,17 @@ PMXVI16GER2S 000001 11 1001 -- - - pmsk:2 ------ ........ \ PMXVI16GER2SPP 000001 11 1001 -- - - pmsk:2 ------ ........ \ 111011 ... -- ..... ..... 00101010 ..- @MMIRR_XX3 +PMXVBF16GER2 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 00110011 ..- @MMIRR_XX3 +PMXVBF16GER2PP 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 00110010 ..- @MMIRR_XX3 +PMXVBF16GER2PN 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 10110010 ..- @MMIRR_XX3 +PMXVBF16GER2NP 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 01110010 ..- @MMIRR_XX3 +PMXVBF16GER2NN 000001 11 1001 -- - - pmsk:2 ------ ........ \ + 111011 ... -- ..... ..... 11110010 ..- @MMIRR_XX3 + PMXVF16GER2 000001 11 1001 -- - - pmsk:2 ------ ........ \ 111011 ... -- ..... ..... 00010011 ..- @MMIRR_XX3 PMXVF16GER2PP 000001 11 1001 -- - - pmsk:2 ------ ........ \ diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index e79752899a..7acdbceec4 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2898,6 +2898,12 @@ TRANS64(PMXVI16GER2PP, do_ger, gen_helper_XVI16GER2PP) TRANS64(PMXVI16GER2S, do_ger, gen_helper_XVI16GER2S) TRANS64(PMXVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP) +TRANS(XVBF16GER2, do_ger, gen_helper_XVBF16GER2) +TRANS(XVBF16GER2PP, do_ger, gen_helper_XVBF16GER2PP) +TRANS(XVBF16GER2PN, do_ger, gen_helper_XVBF16GER2PN) +TRANS(XVBF16GER2NP, do_ger, gen_helper_XVBF16GER2NP) +TRANS(XVBF16GER2NN, do_ger, gen_helper_XVBF16GER2NN) + TRANS(XVF16GER2, do_ger, gen_helper_XVF16GER2) TRANS(XVF16GER2PP, do_ger, gen_helper_XVF16GER2PP) TRANS(XVF16GER2PN, do_ger, gen_helper_XVF16GER2PN) @@ -2916,6 +2922,12 @@ TRANS(XVF64GERPN, do_ger, gen_helper_XVF64GERPN) TRANS(XVF64GERNP, do_ger, gen_helper_XVF64GERNP) TRANS(XVF64GERNN, do_ger, gen_helper_XVF64GERNN) +TRANS64(PMXVBF16GER2, do_ger, gen_helper_XVBF16GER2) +TRANS64(PMXVBF16GER2PP, do_ger, gen_helper_XVBF16GER2PP) +TRANS64(PMXVBF16GER2PN, do_ger, gen_helper_XVBF16GER2PN) +TRANS64(PMXVBF16GER2NP, do_ger, gen_helper_XVBF16GER2NP) +TRANS64(PMXVBF16GER2NN, do_ger, gen_helper_XVBF16GER2NN) + TRANS64(PMXVF16GER2, do_ger, gen_helper_XVF16GER2) TRANS64(PMXVF16GER2PP, do_ger, gen_helper_XVF16GER2PP) TRANS64(PMXVF16GER2PN, do_ger, gen_helper_XVF16GER2PN) From 96c343cc774b52b010e464a219d13f8e55e1003f Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 24 May 2022 11:05:37 -0300 Subject: [PATCH 34/34] linux-user: Add PowerPC ISA 3.1 and MMA to hwcap These are new hwcap bits added for power10. Signed-off-by: Joel Stanley Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220524140537.27451-9-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- linux-user/elfload.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index acc21748f9..f7eae357f4 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -779,6 +779,8 @@ enum { QEMU_PPC_FEATURE2_DARN = 0x00200000, /* darn random number insn */ QEMU_PPC_FEATURE2_SCV = 0x00100000, /* scv syscall */ QEMU_PPC_FEATURE2_HTM_NO_SUSPEND = 0x00080000, /* TM w/o suspended state */ + QEMU_PPC_FEATURE2_ARCH_3_1 = 0x00040000, /* ISA 3.1 */ + QEMU_PPC_FEATURE2_MMA = 0x00020000, /* Matrix-Multiply Assist */ }; #define ELF_HWCAP get_elf_hwcap() @@ -836,6 +838,8 @@ static uint32_t get_elf_hwcap2(void) QEMU_PPC_FEATURE2_VEC_CRYPTO); GET_FEATURE2(PPC2_ISA300, QEMU_PPC_FEATURE2_ARCH_3_00 | QEMU_PPC_FEATURE2_DARN | QEMU_PPC_FEATURE2_HAS_IEEE128); + GET_FEATURE2(PPC2_ISA310, QEMU_PPC_FEATURE2_ARCH_3_1 | + QEMU_PPC_FEATURE2_MMA); #undef GET_FEATURE #undef GET_FEATURE2