From e9964c32ba3476db6190556293b754aa50a489d0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 26 Jul 2024 00:50:31 +1000 Subject: [PATCH 01/96] tests/tcg: Skip failing ppc64 multi-threaded tests In Gitlab CI, some ppc64 multi-threaded tcg tests crash when run in the clang-user job with an assertion failure in glibc that seems to indicate corruption: signals: allocatestack.c:223: allocate_stack: Assertion `powerof2 (pagesize_m1 + 1)' failed. Disable these tests for now. Signed-off-by: Nicholas Piggin --- tests/tcg/ppc64/Makefile.target | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target index 8c3e4e4038..509a20be2b 100644 --- a/tests/tcg/ppc64/Makefile.target +++ b/tests/tcg/ppc64/Makefile.target @@ -11,6 +11,18 @@ config-cc.mak: Makefile -include config-cc.mak +# multi-threaded tests are known to fail (e.g., clang-user CI job) +# See: https://gitlab.com/qemu-project/qemu/-/issues/2456 +run-signals: signals + $(call skip-test, $<, "BROKEN (flaky with clang) ") +run-plugin-signals-with-%: + $(call skip-test, $<, "BROKEN (flaky with clang) ") + +run-threadcount: threadcount + $(call skip-test, $<, "BROKEN (flaky with clang) ") +run-plugin-threadcount-with-%: + $(call skip-test, $<, "BROKEN (flaky with clang) ") + ifneq ($(CROSS_CC_HAS_POWER8_VECTOR),) PPC64_TESTS=bcdsub non_signalling_xscv endif From 1a7a31aec4758d6fd89b60d88669f74f30cdb6bb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 6 May 2024 21:56:05 +1000 Subject: [PATCH 02/96] spapr: Migrate ail-mode-3 spapr cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This cap did not add the migration code when it was introduced. This results in migration failure when changing the default using the command line. Cc: qemu-stable@nongnu.org Fixes: ccc5a4c5e10 ("spapr: Add SPAPR_CAP_AIL_MODE_3 for AIL mode 3 support for H_SET_MODE hcall") Reviewed-by: Harsh Prateek Bora Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Nicholas Piggin --- hw/ppc/spapr.c | 1 + hw/ppc/spapr_caps.c | 1 + include/hw/ppc/spapr.h | 1 + 3 files changed, 3 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 98fa3aa6a8..370d7c35d3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2195,6 +2195,7 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_cap_fwnmi, &vmstate_spapr_fwnmi, &vmstate_spapr_cap_rpt_invalidate, + &vmstate_spapr_cap_ail_mode_3, &vmstate_spapr_cap_nested_papr, NULL } diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 0a15415a1d..2f74923560 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -974,6 +974,7 @@ SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER); SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST); SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI); SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE); +SPAPR_CAP_MIG_STATE(ail_mode_3, SPAPR_CAP_AIL_MODE_3); void spapr_caps_init(SpaprMachineState *spapr) { diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 4aaf23d28f..f6de3e9972 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -1004,6 +1004,7 @@ extern const VMStateDescription vmstate_spapr_cap_large_decr; extern const VMStateDescription vmstate_spapr_cap_ccf_assist; extern const VMStateDescription vmstate_spapr_cap_fwnmi; extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate; +extern const VMStateDescription vmstate_spapr_cap_ail_mode_3; extern const VMStateDescription vmstate_spapr_wdt; static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap) From 8af863f2bd976b937f7e3d38b2ab1813b2fa1d9d Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Mon, 8 Jul 2024 15:55:12 +0900 Subject: [PATCH 03/96] spapr: Free stdout path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes LeakSanitizer warnings. Signed-off-by: Akihiko Odaki Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Nicholas Piggin --- hw/ppc/spapr_vof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c index 09f29be0b9..c02eaacfed 100644 --- a/hw/ppc/spapr_vof.c +++ b/hw/ppc/spapr_vof.c @@ -28,7 +28,7 @@ target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr, void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt) { - char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); + g_autofree char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); vof_build_dt(fdt, spapr->vof); From 785c8637f9d2362a8addf4ded853d975955a9d6b Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Mon, 8 Jul 2024 15:55:13 +0900 Subject: [PATCH 04/96] ppc/vof: Fix unaligned FDT property access FDT properties are aligned by 4 bytes, not 8 bytes. Signed-off-by: Akihiko Odaki Reviewed-by: Peter Maydell Reviewed-by: Michael S. Tsirkin Signed-off-by: Nicholas Piggin --- hw/ppc/vof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c index e3b430a81f..b5b6514d79 100644 --- a/hw/ppc/vof.c +++ b/hw/ppc/vof.c @@ -646,7 +646,7 @@ static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base) mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen); g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc)); if (sc == 2) { - mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac)); + mem0_end = ldq_be_p(mem0_reg + sizeof(uint32_t) * ac); } else { mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac)); } From c6a3d7bc9e3acf2431ac23ae6dbeb28aa92f873c Mon Sep 17 00:00:00 2001 From: Harsh Prateek Bora Date: Tue, 18 Jun 2024 13:53:52 +0530 Subject: [PATCH 05/96] accel/kvm: Introduce kvm_create_and_park_vcpu() helper There are distinct helpers for creating and parking a KVM vCPU. However, there can be cases where a platform needs to create and immediately park the vCPU during early stages of vcpu init which can later be reused when vcpu thread gets initialized. This would help detect failures with kvm_create_vcpu at an early stage. Suggested-by: Nicholas Piggin Reviewed-by: Nicholas Piggin Signed-off-by: Harsh Prateek Bora Signed-off-by: Nicholas Piggin --- accel/kvm/kvm-all.c | 12 ++++++++++++ include/sysemu/kvm.h | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 67b773692f..e1d1386306 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -398,6 +398,18 @@ int kvm_create_vcpu(CPUState *cpu) return 0; } +int kvm_create_and_park_vcpu(CPUState *cpu) +{ + int ret = 0; + + ret = kvm_create_vcpu(cpu); + if (!ret) { + kvm_park_vcpu(cpu); + } + + return ret; +} + static int do_kvm_destroy_vcpu(CPUState *cpu) { KVMState *s = kvm_state; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index c4a914b3d8..9cf14ca3d5 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -338,6 +338,14 @@ void kvm_park_vcpu(CPUState *cpu); */ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id); +/** + * kvm_create_and_park_vcpu - Create and park a KVM vCPU + * @cpu: QOM CPUState object for which KVM vCPU has to be created and parked. + * + * @returns: 0 when success, errno (<0) when failed. + */ +int kvm_create_and_park_vcpu(CPUState *cpu); + /* Arch specific hooks */ extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; From 18530e7c57dec3d82d02ed17038661e2005162c1 Mon Sep 17 00:00:00 2001 From: Harsh Prateek Bora Date: Tue, 18 Jun 2024 13:53:53 +0530 Subject: [PATCH 06/96] cpu-common.c: export cpu_get_free_index to be reused later This helper provides an easy way to identify the next available free cpu index which can be used for vcpu creation. Until now, this is being called at a very later stage and there is a need to be able to call it earlier (for now, with ppc64) hence the need to export. Suggested-by: Nicholas Piggin Reviewed-by: Nicholas Piggin Signed-off-by: Harsh Prateek Bora Signed-off-by: Nicholas Piggin --- cpu-common.c | 7 ++++--- include/exec/cpu-common.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cpu-common.c b/cpu-common.c index 7ae136f98c..6b262233a3 100644 --- a/cpu-common.c +++ b/cpu-common.c @@ -57,14 +57,12 @@ void cpu_list_unlock(void) qemu_mutex_unlock(&qemu_cpu_list_lock); } -static bool cpu_index_auto_assigned; -static int cpu_get_free_index(void) +int cpu_get_free_index(void) { CPUState *some_cpu; int max_cpu_index = 0; - cpu_index_auto_assigned = true; CPU_FOREACH(some_cpu) { if (some_cpu->cpu_index >= max_cpu_index) { max_cpu_index = some_cpu->cpu_index + 1; @@ -83,8 +81,11 @@ unsigned int cpu_list_generation_id_get(void) void cpu_list_add(CPUState *cpu) { + static bool cpu_index_auto_assigned; + QEMU_LOCK_GUARD(&qemu_cpu_list_lock); if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) { + cpu_index_auto_assigned = true; cpu->cpu_index = cpu_get_free_index(); assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX); } else { diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 240ee04369..2e1b499cb7 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -35,6 +35,8 @@ void cpu_list_lock(void); void cpu_list_unlock(void); unsigned int cpu_list_generation_id_get(void); +int cpu_get_free_index(void); + void tcg_iommu_init_notifier_list(CPUState *cpu); void tcg_iommu_free_notifier_list(CPUState *cpu); From cfb52d07f53aa916003d43f69c945c2b42bc6374 Mon Sep 17 00:00:00 2001 From: Harsh Prateek Bora Date: Tue, 18 Jun 2024 13:53:54 +0530 Subject: [PATCH 07/96] target/ppc: handle vcpu hotplug failure gracefully On ppc64, the PowerVM hypervisor runs with limited memory and a VCPU creation during hotplug may fail during kvm_ioctl for KVM_CREATE_VCPU, leading to termination of guest since errp is set to &error_fatal while calling kvm_init_vcpu. This unexpected behaviour can be avoided by pre-creating and parking vcpu on success or return error otherwise. This enables graceful error delivery for any vcpu hotplug failures while the guest can keep running. Also introducing KVM AccelCPUClass to init cpu_target_realize for kvm. Tested OK by repeatedly doing a hotplug/unplug of vcpus as below: #virsh setvcpus hotplug 40 #virsh setvcpus hotplug 70 error: internal error: unable to execute QEMU command 'device_add': kvmppc_cpu_realize: vcpu hotplug failed with -12 Signed-off by: Harsh Prateek Bora Reported-by: Anushree Mathur Suggested-by: Shivaprasad G Bhat Suggested-by: Vaibhav Jain Tested-by: Anushree Mathur Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/kvm.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 2c3932200b..907dba60d1 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -48,6 +48,8 @@ #include "qemu/mmap-alloc.h" #include "elf.h" #include "sysemu/kvm_int.h" +#include "sysemu/kvm.h" +#include "hw/core/accel-cpu.h" #include CONFIG_DEVICES @@ -2346,6 +2348,30 @@ static void alter_insns(uint64_t *word, uint64_t flags, bool on) } } +static bool kvmppc_cpu_realize(CPUState *cs, Error **errp) +{ + int ret; + const char *vcpu_str = (cs->parent_obj.hotplugged == true) ? + "hotplug" : "create"; + cs->cpu_index = cpu_get_free_index(); + + POWERPC_CPU(cs)->vcpu_id = cs->cpu_index; + + /* create and park to fail gracefully in case vcpu hotplug fails */ + ret = kvm_create_and_park_vcpu(cs); + if (ret) { + /* + * This causes QEMU to terminate if initial CPU creation + * fails, and only CPU hotplug failure if the error happens + * there. + */ + error_setg(errp, "%s: vcpu %s failed with %d", + __func__, vcpu_str, ret); + return false; + } + return true; +} + static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) { PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2966,3 +2992,23 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) void kvm_arch_accel_class_init(ObjectClass *oc) { } + +static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_target_realize = kvmppc_cpu_realize; +} + +static const TypeInfo kvm_cpu_accel_type_info = { + .name = ACCEL_CPU_NAME("kvm"), + + .parent = TYPE_ACCEL_CPU, + .class_init = kvm_cpu_accel_class_init, + .abstract = true, +}; +static void kvm_cpu_accel_register_types(void) +{ + type_register_static(&kvm_cpu_accel_type_info); +} +type_init(kvm_cpu_accel_register_types); From 2587a57dbb50257f296b5f28b889e54d0b64c394 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 19 Jul 2024 11:39:05 -0700 Subject: [PATCH 08/96] target/ppc/arch_dump: set prstatus pid to cpuid Every other architecture does this, and debuggers need it to be able to identify which prstatus note corresponds to which CPU. Reviewed-by: Thomas Huth Reviewed-by: Harsh Prateek Bora Signed-off-by: Omar Sandoval Signed-off-by: Nicholas Piggin --- target/ppc/arch_dump.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c index a8315659d9..f45474133a 100644 --- a/target/ppc/arch_dump.c +++ b/target/ppc/arch_dump.c @@ -47,9 +47,14 @@ struct PPCUserRegStruct { } QEMU_PACKED; struct PPCElfPrstatus { - char pad1[112]; + char pad1[32]; /* 32 == offsetof(struct elf_prstatus, pr_pid) */ + uint32_t pid; + char pad2[76]; /* 76 == offsetof(struct elf_prstatus, pr_reg) - + offsetof(struct elf_prstatus, pr_ppid) */ struct PPCUserRegStruct pr_reg; - char pad2[40]; + char pad3[40]; /* 40 == sizeof(struct elf_prstatus) - + offsetof(struct elf_prstatus, pr_reg) - + sizeof(struct user_pt_regs) */ } QEMU_PACKED; @@ -96,7 +101,7 @@ typedef struct NoteFuncArg { DumpState *state; } NoteFuncArg; -static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu) +static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu, int id) { int i; reg_t cr; @@ -109,6 +114,7 @@ static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu) prstatus = ¬e->contents.prstatus; memset(prstatus, 0, sizeof(*prstatus)); + prstatus->pid = cpu_to_dump32(s, id); reg = &prstatus->pr_reg; for (i = 0; i < 32; i++) { @@ -127,7 +133,7 @@ static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu) reg->ccr = cpu_to_dump_reg(s, cr); } -static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu) +static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu, int id) { int i; struct PPCElfFpregset *fpregset; @@ -146,7 +152,7 @@ static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu) fpregset->fpscr = cpu_to_dump_reg(s, cpu->env.fpscr); } -static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu) +static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu, int id) { int i; struct PPCElfVmxregset *vmxregset; @@ -178,7 +184,7 @@ static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu) vmxregset->vscr.u32[3] = cpu_to_dump32(s, ppc_get_vscr(&cpu->env)); } -static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu) +static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu, int id) { int i; struct PPCElfVsxregset *vsxregset; @@ -195,7 +201,7 @@ static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu) } } -static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu) +static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu, int id) { struct PPCElfSperegset *speregset; Note *note = &arg->note; @@ -211,7 +217,7 @@ static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu) static const struct NoteFuncDescStruct { int contents_size; - void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu); + void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu, int id); } note_func[] = { {sizeof_field(Note, contents.prstatus), ppc_write_elf_prstatus}, {sizeof_field(Note, contents.fpregset), ppc_write_elf_fpregset}, @@ -282,7 +288,7 @@ static int ppc_write_all_elf_notes(const char *note_name, arg.note.hdr.n_descsz = cpu_to_dump32(s, nf->contents_size); strncpy(arg.note.name, note_name, sizeof(arg.note.name)); - (*nf->note_contents_func)(&arg, cpu); + (*nf->note_contents_func)(&arg, cpu, id); note_size = sizeof(arg.note) - sizeof(arg.note.contents) + nf->contents_size; From b9c0a2e01c0f38bdc4ba8f69cf298eeebfb3738b Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Wed, 5 Jun 2024 15:57:52 +0000 Subject: [PATCH 09/96] linux-header: PPC: KVM: Update one-reg ids for DEXCR, HASHKEYR and HASHPKEYR This is a placeholder change for these SPRs until the full linux header update. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Nicholas Piggin --- linux-headers/asm-powerpc/kvm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 1691297a76..eaeda00178 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7) +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs From ca85beb4b783064781a3295feaa7b1a8645f2df9 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Wed, 5 Jun 2024 15:58:02 +0000 Subject: [PATCH 10/96] target/ppc/cpu_init: Synchronize DEXCR with KVM for migration The patch enables DEXCR migration by hooking with the "KVM one reg" ID KVM_REG_PPC_DEXCR. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Nicholas Piggin --- target/ppc/cpu_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index cdada7987d..7c3ee80661 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5886,9 +5886,9 @@ static void register_power10_hash_sprs(CPUPPCState *env) static void register_power10_dexcr_sprs(CPUPPCState *env) { - spr_register(env, SPR_DEXCR, "DEXCR", + spr_register_kvm(env, SPR_DEXCR, "DEXCR", SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_write_generic, KVM_REG_PPC_DEXCR, 0); spr_register(env, SPR_UDEXCR, "UDEXCR", From 843b243f8620a92f5ff652550b61fc724e5d520c Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Wed, 5 Jun 2024 15:58:12 +0000 Subject: [PATCH 11/96] target/ppc/cpu_init: Synchronize HASHKEYR with KVM for migration The patch enables HASHKEYR migration by hooking with the "KVM one reg" ID KVM_REG_PPC_HASHKEYR. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Nicholas Piggin --- target/ppc/cpu_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 7c3ee80661..d311b190c1 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5873,10 +5873,10 @@ static void register_power10_hash_sprs(CPUPPCState *env) ((uint64_t)g_rand_int(rand) << 32) | (uint64_t)g_rand_int(rand); g_rand_free(rand); #endif - spr_register(env, SPR_HASHKEYR, "HASHKEYR", + spr_register_kvm(env, SPR_HASHKEYR, "HASHKEYR", SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, - hashkeyr_initial_value); + KVM_REG_PPC_HASHKEYR, hashkeyr_initial_value); spr_register_hv(env, SPR_HASHPKEYR, "HASHPKEYR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, From c0840b46d4c8483a93370434f9ea10b8a7b50bde Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Wed, 5 Jun 2024 15:58:22 +0000 Subject: [PATCH 12/96] target/ppc/cpu_init: Synchronize HASHPKEYR with KVM for migration The patch enables HASHPKEYR migration by hooking with the "KVM one reg" ID KVM_REG_PPC_HASHPKEYR. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Nicholas Piggin --- target/ppc/cpu_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index d311b190c1..2e652f498e 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5877,11 +5877,11 @@ static void register_power10_hash_sprs(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_HASHKEYR, hashkeyr_initial_value); - spr_register_hv(env, SPR_HASHPKEYR, "HASHPKEYR", + spr_register_kvm_hv(env, SPR_HASHPKEYR, "HASHPKEYR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, - hashpkeyr_initial_value); + KVM_REG_PPC_HASHPKEYR, hashpkeyr_initial_value); } static void register_power10_dexcr_sprs(CPUPPCState *env) From 977e789c4a8ed813d4ab03f17ea20a575bf20cd1 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Thu, 2 May 2024 11:57:01 +0530 Subject: [PATCH 13/96] ppc/pnv: Update Power10's cfam id to use Power10 DD2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Power10 DD1.0 was dropped in: commit 8f054d9ee825 ("ppc: Drop support for POWER9 and POWER10 DD1 chips") Use the newer Power10 DD2 chips cfam id. Signed-off-by: Aditya Gupta Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 2 +- tests/qtest/pnv-xscom.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 6b41d1d2dd..13cebd6ab9 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -2087,7 +2087,7 @@ static void pnv_chip_power10_class_init(ObjectClass *klass, void *data) PnvChipClass *k = PNV_CHIP_CLASS(klass); static const int i2c_ports_per_engine[PNV10_CHIP_MAX_I2C] = {14, 14, 2, 16}; - k->chip_cfam_id = 0x120da04900008000ull; /* P10 DD1.0 (with NX) */ + k->chip_cfam_id = 0x220da04980000000ull; /* P10 DD2.0 (with NX) */ k->cores_mask = POWER10_CORE_MASK; k->chip_pir = pnv_chip_pir_p10; k->intc_create = pnv_chip_power10_intc_create; diff --git a/tests/qtest/pnv-xscom.h b/tests/qtest/pnv-xscom.h index 6f62941744..5aa1701ea7 100644 --- a/tests/qtest/pnv-xscom.h +++ b/tests/qtest/pnv-xscom.h @@ -56,7 +56,7 @@ static const PnvChip pnv_chips[] = { .chip_type = PNV_CHIP_POWER10, .cpu_model = "POWER10", .xscom_base = 0x000603fc00000000ull, - .cfam_id = 0x120da04900008000ull, + .cfam_id = 0x220da04980000000ull, .first_core = 0x0, .num_i2c = 4, }, From c6e07f03f7270799a26eb79e17ac40078ad94e5c Mon Sep 17 00:00:00 2001 From: Glenn Miles Date: Fri, 24 May 2024 13:24:14 -0500 Subject: [PATCH 14/96] ppc/pnv: Fix loss of LPC SERIRQ interrupts The LPC HC irq status register bits are set when an LPC IRQSER input is asserted. These irq status bits drive the PSI irq to the CPU interrupt controller. The LPC HC irq status bits are cleared by software writing to the register with 1's for the bits to clear. Existing register write was clearing the irq status bits even when the input was asserted, this results in interrupts being lost. This fix changes the behavior to keep track of the device IRQ status in internal state that is separate from the irq status register, and only allowing the irq status bits to be cleared if the associated input is not asserted. Signed-off-by: Glenn Miles [np: rebased before P9 PSI SERIRQ patch, adjust changelog/comments] Reviewed-by: Glenn Miles Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_lpc.c | 22 +++++++++++++++++++--- include/hw/ppc/pnv_lpc.h | 3 +++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c index d692858bee..7d26b29487 100644 --- a/hw/ppc/pnv_lpc.c +++ b/hw/ppc/pnv_lpc.c @@ -505,7 +505,14 @@ static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val, pnv_lpc_eval_irqs(lpc); break; case LPC_HC_IRQSTAT: - lpc->lpc_hc_irqstat &= ~val; + /* + * This register is write-to-clear for the IRQSER (LPC device IRQ) + * status. However if the device has not de-asserted its interrupt + * that will just raise this IRQ status bit again. Model this by + * keeping track of the inputs and only clearing if the inputs are + * deasserted. + */ + lpc->lpc_hc_irqstat &= ~(val & ~lpc->lpc_hc_irq_inputs); pnv_lpc_eval_irqs(lpc); break; case LPC_HC_ERROR_ADDRESS: @@ -803,11 +810,20 @@ static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level) static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level) { PnvLpcController *lpc = PNV_LPC(opaque); + uint32_t irq_bit = LPC_HC_IRQ_SERIRQ0 >> n; - /* The Naples HW latches the 1 levels, clearing is done by SW */ if (level) { - lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SERIRQ0 >> n; + lpc->lpc_hc_irq_inputs |= irq_bit; + + /* + * The LPC HC in Naples and later latches LPC IRQ into a bit field in + * the IRQSTAT register, and that drives the PSI IRQ to the IC. + * Software clears this bit manually (see LPC_HC_IRQSTAT handler). + */ + lpc->lpc_hc_irqstat |= irq_bit; pnv_lpc_eval_irqs(lpc); + } else { + lpc->lpc_hc_irq_inputs &= ~irq_bit; } } diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h index 5d22c45570..97c6872c3f 100644 --- a/include/hw/ppc/pnv_lpc.h +++ b/include/hw/ppc/pnv_lpc.h @@ -73,6 +73,9 @@ struct PnvLpcController { uint32_t opb_irq_pol; uint32_t opb_irq_input; + /* LPC device IRQ state */ + uint32_t lpc_hc_irq_inputs; + /* LPC HC registers */ uint32_t lpc_hc_fw_seg_idsel; uint32_t lpc_hc_fw_rd_acc_size; From 24c3caff995584342101a181af2eacd67129e5ec Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 10 May 2024 17:10:40 +1000 Subject: [PATCH 15/96] ppc/pnv: Implement POWER9 LPC PSI serirq outputs and auto-clear function The POWER8 LPC ISA device irqs all get combined and reported to the line connected the PSI LPCHC irq. POWER9 changed this so only internal LPC host controller irqs use that line, and the device irqs get routed to 4 new lines connected to PSI SERIRQ0-3. POWER9 also introduced a new feature that automatically clears the irq status in the LPC host controller when EOI'ed, so software does not have to. The powernv OPAL (skiboot) firmware managed to work because the LPCHC irq handler scanned all LPC irqs and handled those including clearing status even on POWER9 systems. So LPC irqs worked despite OPAL thinking it was running in POWER9 mode. After this change, UART interrupts show up on serirq1 which is where OPAL routes them to: cat /proc/interrupts ... 20: 0 XIVE-IRQ 1048563 Level opal-psi#0:lpchc ... 25: 34 XIVE-IRQ 1048568 Level opal-psi#0:lpc_serirq_mux1 Whereas they previously turn up on lpchc. Reviewed-by: Glenn Miles Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 36 +++++++++-- hw/ppc/pnv_lpc.c | 132 +++++++++++++++++++++++++++++++-------- include/hw/ppc/pnv_lpc.h | 14 ++++- 3 files changed, 150 insertions(+), 32 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 13cebd6ab9..f56dcf6597 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -727,7 +727,8 @@ static ISABus *pnv_chip_power8_isa_create(PnvChip *chip, Error **errp) Pnv8Chip *chip8 = PNV8_CHIP(chip); qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip8->psi), PSIHB_IRQ_EXTERNAL); - qdev_connect_gpio_out(DEVICE(&chip8->lpc), 0, irq); + qdev_connect_gpio_out_named(DEVICE(&chip8->lpc), "LPCHC", 0, irq); + return pnv_lpc_isa_create(&chip8->lpc, true, errp); } @@ -736,25 +737,48 @@ static ISABus *pnv_chip_power8nvl_isa_create(PnvChip *chip, Error **errp) Pnv8Chip *chip8 = PNV8_CHIP(chip); qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip8->psi), PSIHB_IRQ_LPC_I2C); - qdev_connect_gpio_out(DEVICE(&chip8->lpc), 0, irq); + qdev_connect_gpio_out_named(DEVICE(&chip8->lpc), "LPCHC", 0, irq); + return pnv_lpc_isa_create(&chip8->lpc, false, errp); } static ISABus *pnv_chip_power9_isa_create(PnvChip *chip, Error **errp) { Pnv9Chip *chip9 = PNV9_CHIP(chip); - qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPCHC); + qemu_irq irq; + + irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPCHC); + qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "LPCHC", 0, irq); + + irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ0); + qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 0, irq); + irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ1); + qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 1, irq); + irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ2); + qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 2, irq); + irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ3); + qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 3, irq); - qdev_connect_gpio_out(DEVICE(&chip9->lpc), 0, irq); return pnv_lpc_isa_create(&chip9->lpc, false, errp); } static ISABus *pnv_chip_power10_isa_create(PnvChip *chip, Error **errp) { Pnv10Chip *chip10 = PNV10_CHIP(chip); - qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPCHC); + qemu_irq irq; + + irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPCHC); + qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "LPCHC", 0, irq); + + irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ0); + qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "SERIRQ", 0, irq); + irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ1); + qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "SERIRQ", 1, irq); + irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ2); + qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "SERIRQ", 2, irq); + irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ3); + qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "SERIRQ", 3, irq); - qdev_connect_gpio_out(DEVICE(&chip10->lpc), 0, irq); return pnv_lpc_isa_create(&chip10->lpc, false, errp); } diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c index 7d26b29487..0f14e180af 100644 --- a/hw/ppc/pnv_lpc.c +++ b/hw/ppc/pnv_lpc.c @@ -64,6 +64,7 @@ enum { #define LPC_HC_IRQSER_START_4CLK 0x00000000 #define LPC_HC_IRQSER_START_6CLK 0x01000000 #define LPC_HC_IRQSER_START_8CLK 0x02000000 +#define LPC_HC_IRQSER_AUTO_CLEAR 0x00800000 #define LPC_HC_IRQMASK 0x34 /* same bit defs as LPC_HC_IRQSTAT */ #define LPC_HC_IRQSTAT 0x38 #define LPC_HC_IRQ_SERIRQ0 0x80000000 /* all bits down to ... */ @@ -420,32 +421,90 @@ static const MemoryRegionOps pnv_lpc_mmio_ops = { .endianness = DEVICE_BIG_ENDIAN, }; +/* Program the POWER9 LPC irq to PSI serirq routing table */ +static void pnv_lpc_eval_serirq_routes(PnvLpcController *lpc) +{ + int irq; + + if (!lpc->psi_has_serirq) { + if ((lpc->opb_irq_route0 & PPC_BITMASK(8, 13)) || + (lpc->opb_irq_route1 & PPC_BITMASK(4, 31))) { + qemu_log_mask(LOG_GUEST_ERROR, + "OPB: setting serirq routing on POWER8 system, ignoring.\n"); + } + return; + } + + for (irq = 0; irq <= 13; irq++) { + int serirq = (lpc->opb_irq_route1 >> (31 - 5 - (irq * 2))) & 0x3; + lpc->irq_to_serirq_route[irq] = serirq; + } + + for (irq = 14; irq < ISA_NUM_IRQS; irq++) { + int serirq = (lpc->opb_irq_route0 >> (31 - 9 - (irq * 2))) & 0x3; + lpc->irq_to_serirq_route[irq] = serirq; + } +} + static void pnv_lpc_eval_irqs(PnvLpcController *lpc) { - bool lpc_to_opb_irq = false; + uint32_t active_irqs = 0; + + if (lpc->lpc_hc_irqstat & PPC_BITMASK32(16, 31)) { + qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented irqs in IRQSTAT: " + "0x%08"PRIx32"\n", lpc->lpc_hc_irqstat); + } - /* Update LPC controller to OPB line */ if (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN) { - uint32_t irqs; - - irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask; - lpc_to_opb_irq = (irqs != 0); + active_irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask; } - /* We don't honor the polarity register, it's pointless and unused - * anyway - */ - if (lpc_to_opb_irq) { - lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC; - } else { - lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC; - } - - /* Update OPB internal latch */ - lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask; - /* Reflect the interrupt */ - qemu_set_irq(lpc->psi_irq, lpc->opb_irq_stat != 0); + if (!lpc->psi_has_serirq) { + /* + * POWER8 ORs all irqs together (also with LPCHC internal interrupt + * sources) and outputs a single line that raises the PSI LPCHC irq + * which then latches an OPB IRQ status register that sends the irq + * to PSI. + * + * We don't honor the polarity register, it's pointless and unused + * anyway + */ + if (active_irqs) { + lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC; + } else { + lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC; + } + + /* Update OPB internal latch */ + lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask; + + qemu_set_irq(lpc->psi_irq_lpchc, lpc->opb_irq_stat != 0); + } else { + /* + * POWER9 and POWER10 have routing fields in OPB master registers that + * send LPC irqs to 4 output lines that raise the PSI SERIRQ irqs. + * These don't appear to get latched into an OPB register like the + * LPCHC irqs. + * + * POWER9 LPC controller internal irqs still go via the OPB + * and LPCHC PSI irqs like P8, but we have no such internal sources + * modelled yet. + */ + bool serirq_out[4] = { false, false, false, false }; + int irq; + + for (irq = 0; irq < ISA_NUM_IRQS; irq++) { + if (active_irqs & (LPC_HC_IRQ_SERIRQ0 >> irq)) { + serirq_out[lpc->irq_to_serirq_route[irq]] = true; + } + } + + qemu_set_irq(lpc->psi_irq_serirq[0], serirq_out[0]); + qemu_set_irq(lpc->psi_irq_serirq[1], serirq_out[1]); + qemu_set_irq(lpc->psi_irq_serirq[2], serirq_out[2]); + qemu_set_irq(lpc->psi_irq_serirq[3], serirq_out[3]); + } } static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size) @@ -543,10 +602,10 @@ static uint64_t opb_master_read(void *opaque, hwaddr addr, unsigned size) uint64_t val = 0xfffffffffffffffful; switch (addr) { - case OPB_MASTER_LS_ROUTE0: /* TODO */ + case OPB_MASTER_LS_ROUTE0: val = lpc->opb_irq_route0; break; - case OPB_MASTER_LS_ROUTE1: /* TODO */ + case OPB_MASTER_LS_ROUTE1: val = lpc->opb_irq_route1; break; case OPB_MASTER_LS_IRQ_STAT: @@ -575,11 +634,15 @@ static void opb_master_write(void *opaque, hwaddr addr, PnvLpcController *lpc = opaque; switch (addr) { - case OPB_MASTER_LS_ROUTE0: /* TODO */ + case OPB_MASTER_LS_ROUTE0: lpc->opb_irq_route0 = val; + pnv_lpc_eval_serirq_routes(lpc); + pnv_lpc_eval_irqs(lpc); break; - case OPB_MASTER_LS_ROUTE1: /* TODO */ + case OPB_MASTER_LS_ROUTE1: lpc->opb_irq_route1 = val; + pnv_lpc_eval_serirq_routes(lpc); + pnv_lpc_eval_irqs(lpc); break; case OPB_MASTER_LS_IRQ_STAT: lpc->opb_irq_stat &= ~val; @@ -664,6 +727,8 @@ static void pnv_lpc_power9_realize(DeviceState *dev, Error **errp) PnvLpcClass *plc = PNV_LPC_GET_CLASS(dev); Error *local_err = NULL; + object_property_set_bool(OBJECT(lpc), "psi-serirq", true, &error_abort); + plc->parent_realize(dev, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -673,6 +738,9 @@ static void pnv_lpc_power9_realize(DeviceState *dev, Error **errp) /* P9 uses a MMIO region */ memory_region_init_io(&lpc->xscom_regs, OBJECT(lpc), &pnv_lpc_mmio_ops, lpc, "lpcm", PNV9_LPCM_SIZE); + + /* P9 LPC routes ISA irqs to 4 PSI SERIRQ lines */ + qdev_init_gpio_out_named(dev, lpc->psi_irq_serirq, "SERIRQ", 4); } static void pnv_lpc_power9_class_init(ObjectClass *klass, void *data) @@ -751,13 +819,19 @@ static void pnv_lpc_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR, &lpc->lpc_hc_regs); - qdev_init_gpio_out(dev, &lpc->psi_irq, 1); + qdev_init_gpio_out_named(dev, &lpc->psi_irq_lpchc, "LPCHC", 1); } +static Property pnv_lpc_properties[] = { + DEFINE_PROP_BOOL("psi-serirq", PnvLpcController, psi_has_serirq, false), + DEFINE_PROP_END_OF_LIST(), +}; + static void pnv_lpc_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + device_class_set_props(dc, pnv_lpc_properties); dc->realize = pnv_lpc_realize; dc->desc = "PowerNV LPC Controller"; dc->user_creatable = false; @@ -803,7 +877,7 @@ static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level) } if (pnv->cpld_irqstate != old_state) { - qemu_set_irq(lpc->psi_irq, pnv->cpld_irqstate != 0); + qemu_set_irq(lpc->psi_irq_lpchc, pnv->cpld_irqstate != 0); } } @@ -824,6 +898,13 @@ static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level) pnv_lpc_eval_irqs(lpc); } else { lpc->lpc_hc_irq_inputs &= ~irq_bit; + + /* POWER9 adds an auto-clear mode that clears IRQSTAT bits on EOI */ + if (lpc->psi_has_serirq && + (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_AUTO_CLEAR)) { + lpc->lpc_hc_irqstat &= ~irq_bit; + pnv_lpc_eval_irqs(lpc); + } } } @@ -854,6 +935,7 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp) handler = pnv_lpc_isa_irq_handler; } + /* POWER has a 17th irq, QEMU only implements the 16 regular device irqs */ irqs = qemu_allocate_irqs(handler, lpc, ISA_NUM_IRQS); isa_bus_register_input_irqs(isa_bus, irqs); diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h index 97c6872c3f..e0fd5e4130 100644 --- a/include/hw/ppc/pnv_lpc.h +++ b/include/hw/ppc/pnv_lpc.h @@ -23,6 +23,7 @@ #include "exec/memory.h" #include "hw/ppc/pnv.h" #include "hw/qdev-core.h" +#include "hw/isa/isa.h" /* For ISA_NUM_IRQS */ #define TYPE_PNV_LPC "pnv-lpc" typedef struct PnvLpcClass PnvLpcClass; @@ -87,8 +88,19 @@ struct PnvLpcController { /* XSCOM registers */ MemoryRegion xscom_regs; + /* + * In P8, ISA irqs are combined with internal sources to drive the + * LPCHC interrupt output. P9 ISA irqs raise one of 4 lines that + * drive PSI SERIRQ irqs, routing according to OPB routing registers. + */ + bool psi_has_serirq; + /* PSI to generate interrupts */ - qemu_irq psi_irq; + qemu_irq psi_irq_lpchc; + + /* P9 serirq lines and irq routing table */ + qemu_irq psi_irq_serirq[4]; + int irq_to_serirq_route[ISA_NUM_IRQS]; }; struct PnvLpcClass { From 53f18b3ef2c3e898e7dae21a1f33f9e2f3eed764 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 23 Feb 2024 22:34:56 +1000 Subject: [PATCH 16/96] ppc/pnv: Begin a more complete ADU LPC model for POWER9/10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements a framework for an ADU unit model. The ADU unit actually implements XSCOM, which is the bridge between MMIO and PIB. However it also includes control and status registers and other functions that are exposed as PIB (xscom) registers. To keep things simple, pnv_xscom.c remains the XSCOM bridge implementation, and pnv_adu.c implements the ADU registers and other functions. So far, just the ADU no-op registers in the pnv_xscom.c default handler are moved over to the adu model. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/meson.build | 1 + hw/ppc/pnv.c | 16 ++++++ hw/ppc/pnv_adu.c | 111 +++++++++++++++++++++++++++++++++++++ hw/ppc/pnv_xscom.c | 9 --- hw/ppc/trace-events | 4 ++ include/hw/ppc/pnv_adu.h | 25 +++++++++ include/hw/ppc/pnv_chip.h | 3 + include/hw/ppc/pnv_xscom.h | 6 ++ 8 files changed, 166 insertions(+), 9 deletions(-) create mode 100644 hw/ppc/pnv_adu.c create mode 100644 include/hw/ppc/pnv_adu.h diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index 3ebbf329bc..7cd9189869 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -42,6 +42,7 @@ endif ppc_ss.add(when: 'CONFIG_POWERNV', if_true: files( 'pnv.c', 'pnv_xscom.c', + 'pnv_adu.c', 'pnv_core.c', 'pnv_i2c.c', 'pnv_lpc.c', diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index f56dcf6597..689197cbb7 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1551,6 +1551,7 @@ static void pnv_chip_power9_instance_init(Object *obj) PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); int i; + object_initialize_child(obj, "adu", &chip9->adu, TYPE_PNV_ADU); object_initialize_child(obj, "xive", &chip9->xive, TYPE_PNV_XIVE); object_property_add_alias(obj, "xive-fabric", OBJECT(&chip9->xive), "xive-fabric"); @@ -1661,6 +1662,13 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) return; } + /* ADU */ + if (!qdev_realize(DEVICE(&chip9->adu), NULL, errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV9_XSCOM_ADU_BASE, + &chip9->adu.xscom_regs); + pnv_chip_quad_realize(chip9, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -1827,6 +1835,7 @@ static void pnv_chip_power10_instance_init(Object *obj) PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); int i; + object_initialize_child(obj, "adu", &chip10->adu, TYPE_PNV_ADU); object_initialize_child(obj, "xive", &chip10->xive, TYPE_PNV_XIVE2); object_property_add_alias(obj, "xive-fabric", OBJECT(&chip10->xive), "xive-fabric"); @@ -1919,6 +1928,13 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp) return; } + /* ADU */ + if (!qdev_realize(DEVICE(&chip10->adu), NULL, errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV10_XSCOM_ADU_BASE, + &chip10->adu.xscom_regs); + pnv_chip_power10_quad_realize(chip10, &local_err); if (local_err) { error_propagate(errp, local_err); diff --git a/hw/ppc/pnv_adu.c b/hw/ppc/pnv_adu.c new file mode 100644 index 0000000000..8279bc8b26 --- /dev/null +++ b/hw/ppc/pnv_adu.c @@ -0,0 +1,111 @@ +/* + * QEMU PowerPC PowerNV ADU unit + * + * The ADU unit actually implements XSCOM, which is the bridge between MMIO + * and PIB. However it also includes control and status registers and other + * functions that are exposed as PIB (xscom) registers. + * + * To keep things simple, pnv_xscom.c remains the XSCOM bridge + * implementation, and pnv_adu.c implements the ADU registers and other + * functions. + * + * Copyright (c) 2024, IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" + +#include "hw/qdev-properties.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_adu.h" +#include "hw/ppc/pnv_chip.h" +#include "hw/ppc/pnv_xscom.h" +#include "trace.h" + +static uint64_t pnv_adu_xscom_read(void *opaque, hwaddr addr, unsigned width) +{ + uint32_t offset = addr >> 3; + uint64_t val = 0; + + switch (offset) { + case 0x18: /* Receive status reg */ + case 0x12: /* log register */ + case 0x13: /* error register */ + break; + + default: + qemu_log_mask(LOG_UNIMP, "ADU Unimplemented read register: Ox%08x\n", + offset); + } + + trace_pnv_adu_xscom_read(addr, val); + + return val; +} + +static void pnv_adu_xscom_write(void *opaque, hwaddr addr, uint64_t val, + unsigned width) +{ + uint32_t offset = addr >> 3; + + trace_pnv_adu_xscom_write(addr, val); + + switch (offset) { + case 0x18: /* Receive status reg */ + case 0x12: /* log register */ + case 0x13: /* error register */ + break; + + default: + qemu_log_mask(LOG_UNIMP, "ADU Unimplemented write register: Ox%08x\n", + offset); + } +} + +const MemoryRegionOps pnv_adu_xscom_ops = { + .read = pnv_adu_xscom_read, + .write = pnv_adu_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_adu_realize(DeviceState *dev, Error **errp) +{ + PnvADU *adu = PNV_ADU(dev); + + /* XScom regions for ADU registers */ + pnv_xscom_region_init(&adu->xscom_regs, OBJECT(dev), + &pnv_adu_xscom_ops, adu, "xscom-adu", + PNV9_XSCOM_ADU_SIZE); +} + +static void pnv_adu_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pnv_adu_realize; + dc->desc = "PowerNV ADU"; + dc->user_creatable = false; +} + +static const TypeInfo pnv_adu_type_info = { + .name = TYPE_PNV_ADU, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvADU), + .class_init = pnv_adu_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } }, +}; + +static void pnv_adu_register_types(void) +{ + type_register_static(&pnv_adu_type_info); +} + +type_init(pnv_adu_register_types); diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c index a17816d072..d192bbe2c2 100644 --- a/hw/ppc/pnv_xscom.c +++ b/hw/ppc/pnv_xscom.c @@ -75,11 +75,6 @@ static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba) case PRD_P9_IPOLL_REG_MASK: case PRD_P9_IPOLL_REG_STATUS: - /* P9 xscom reset */ - case 0x0090018: /* Receive status reg */ - case 0x0090012: /* log register */ - case 0x0090013: /* error register */ - /* P8 xscom reset */ case 0x2020007: /* ADU stuff, log register */ case 0x2020009: /* ADU stuff, error register */ @@ -119,10 +114,6 @@ static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val) case 0x1010c03: /* PIBAM FIR MASK */ case 0x1010c04: /* PIBAM FIR MASK */ case 0x1010c05: /* PIBAM FIR MASK */ - /* P9 xscom reset */ - case 0x0090018: /* Receive status reg */ - case 0x0090012: /* log register */ - case 0x0090013: /* error register */ /* P8 xscom reset */ case 0x2020007: /* ADU stuff, log register */ diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events index bf29bbfd4b..1f125ce841 100644 --- a/hw/ppc/trace-events +++ b/hw/ppc/trace-events @@ -95,6 +95,10 @@ vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\"" vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64 vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64 +# pnv_adu.c +pnv_adu_xscom_read(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64 +pnv_adu_xscom_write(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64 + # pnv_chiptod.c pnv_chiptod_xscom_read(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64 pnv_chiptod_xscom_write(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64 diff --git a/include/hw/ppc/pnv_adu.h b/include/hw/ppc/pnv_adu.h new file mode 100644 index 0000000000..b5f308627b --- /dev/null +++ b/include/hw/ppc/pnv_adu.h @@ -0,0 +1,25 @@ +/* + * QEMU PowerPC PowerNV Emulation of some ADU behaviour + * + * Copyright (c) 2024, IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef PPC_PNV_ADU_H +#define PPC_PNV_ADU_H + +#include "hw/ppc/pnv.h" +#include "hw/qdev-core.h" + +#define TYPE_PNV_ADU "pnv-adu" + +OBJECT_DECLARE_TYPE(PnvADU, PnvADUClass, PNV_ADU) + +struct PnvADU { + DeviceState xd; + + MemoryRegion xscom_regs; +}; + +#endif /* PPC_PNV_ADU_H */ diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h index a4ed17ac59..4eaa7d3999 100644 --- a/include/hw/ppc/pnv_chip.h +++ b/include/hw/ppc/pnv_chip.h @@ -2,6 +2,7 @@ #define PPC_PNV_CHIP_H #include "hw/pci-host/pnv_phb4.h" +#include "hw/ppc/pnv_adu.h" #include "hw/ppc/pnv_chiptod.h" #include "hw/ppc/pnv_core.h" #include "hw/ppc/pnv_homer.h" @@ -77,6 +78,7 @@ struct Pnv9Chip { PnvChip parent_obj; /*< public >*/ + PnvADU adu; PnvXive xive; Pnv9Psi psi; PnvLpcController lpc; @@ -110,6 +112,7 @@ struct Pnv10Chip { PnvChip parent_obj; /*< public >*/ + PnvADU adu; PnvXive2 xive; Pnv9Psi psi; PnvLpcController lpc; diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h index 6209e18492..e93d310e79 100644 --- a/include/hw/ppc/pnv_xscom.h +++ b/include/hw/ppc/pnv_xscom.h @@ -82,6 +82,9 @@ struct PnvXScomInterfaceClass { #define PNV_XSCOM_PBCQ_SPCI_BASE 0x9013c00 #define PNV_XSCOM_PBCQ_SPCI_SIZE 0x5 +#define PNV9_XSCOM_ADU_BASE 0x0090000 +#define PNV9_XSCOM_ADU_SIZE 0x55 + /* * Layout of the XSCOM PCB addresses (POWER 9) */ @@ -128,6 +131,9 @@ struct PnvXScomInterfaceClass { #define PNV9_XSCOM_PEC_PCI_STK1 0x140 #define PNV9_XSCOM_PEC_PCI_STK2 0x180 +#define PNV10_XSCOM_ADU_BASE 0x0090000 +#define PNV10_XSCOM_ADU_SIZE 0x55 + /* * Layout of the XSCOM PCB addresses (POWER 10) */ From 24bd283bccb334109f112839ab6867f0192045d6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 17 Apr 2024 14:50:13 +1000 Subject: [PATCH 17/96] ppc/pnv: Implement ADU access to LPC space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of the functions of the ADU is indirect memory access engines that send and receive data via ADU registers. This implements the ADU LPC memory access functionality sufficiently for IBM proprietary firmware to access the UART and print characters to the serial port as it does on real hardware. This requires a linkage between adu and lpc, which allows adu to perform memory access in the lpc space. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 4 ++ hw/ppc/pnv_adu.c | 95 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/pnv_lpc.c | 12 ++--- include/hw/ppc/pnv_adu.h | 7 +++ include/hw/ppc/pnv_lpc.h | 5 +++ 5 files changed, 117 insertions(+), 6 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 689197cbb7..91ff1be21f 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1663,6 +1663,8 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) } /* ADU */ + object_property_set_link(OBJECT(&chip9->adu), "lpc", OBJECT(&chip9->lpc), + &error_abort); if (!qdev_realize(DEVICE(&chip9->adu), NULL, errp)) { return; } @@ -1929,6 +1931,8 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp) } /* ADU */ + object_property_set_link(OBJECT(&chip10->adu), "lpc", OBJECT(&chip10->lpc), + &error_abort); if (!qdev_realize(DEVICE(&chip10->adu), NULL, errp)) { return; } diff --git a/hw/ppc/pnv_adu.c b/hw/ppc/pnv_adu.c index 8279bc8b26..81b7d6e526 100644 --- a/hw/ppc/pnv_adu.c +++ b/hw/ppc/pnv_adu.c @@ -21,11 +21,18 @@ #include "hw/ppc/pnv.h" #include "hw/ppc/pnv_adu.h" #include "hw/ppc/pnv_chip.h" +#include "hw/ppc/pnv_lpc.h" #include "hw/ppc/pnv_xscom.h" #include "trace.h" +#define ADU_LPC_BASE_REG 0x40 +#define ADU_LPC_CMD_REG 0x41 +#define ADU_LPC_DATA_REG 0x42 +#define ADU_LPC_STATUS_REG 0x43 + static uint64_t pnv_adu_xscom_read(void *opaque, hwaddr addr, unsigned width) { + PnvADU *adu = PNV_ADU(opaque); uint32_t offset = addr >> 3; uint64_t val = 0; @@ -34,6 +41,24 @@ static uint64_t pnv_adu_xscom_read(void *opaque, hwaddr addr, unsigned width) case 0x12: /* log register */ case 0x13: /* error register */ break; + case ADU_LPC_BASE_REG: + /* + * LPC Address Map in Pervasive ADU Workbook + * + * return PNV10_LPCM_BASE(chip) & PPC_BITMASK(8, 31); + * XXX: implement as class property, or get from LPC? + */ + qemu_log_mask(LOG_UNIMP, "ADU: LPC_BASE_REG is not implemented\n"); + break; + case ADU_LPC_CMD_REG: + val = adu->lpc_cmd_reg; + break; + case ADU_LPC_DATA_REG: + val = adu->lpc_data_reg; + break; + case ADU_LPC_STATUS_REG: + val = PPC_BIT(0); /* ack / done */ + break; default: qemu_log_mask(LOG_UNIMP, "ADU Unimplemented read register: Ox%08x\n", @@ -45,9 +70,30 @@ static uint64_t pnv_adu_xscom_read(void *opaque, hwaddr addr, unsigned width) return val; } +static bool lpc_cmd_read(PnvADU *adu) +{ + return !!(adu->lpc_cmd_reg & PPC_BIT(0)); +} + +static bool lpc_cmd_write(PnvADU *adu) +{ + return !lpc_cmd_read(adu); +} + +static uint32_t lpc_cmd_addr(PnvADU *adu) +{ + return (adu->lpc_cmd_reg & PPC_BITMASK(32, 63)) >> PPC_BIT_NR(63); +} + +static uint32_t lpc_cmd_size(PnvADU *adu) +{ + return (adu->lpc_cmd_reg & PPC_BITMASK(5, 11)) >> PPC_BIT_NR(11); +} + static void pnv_adu_xscom_write(void *opaque, hwaddr addr, uint64_t val, unsigned width) { + PnvADU *adu = PNV_ADU(opaque); uint32_t offset = addr >> 3; trace_pnv_adu_xscom_write(addr, val); @@ -58,6 +104,47 @@ static void pnv_adu_xscom_write(void *opaque, hwaddr addr, uint64_t val, case 0x13: /* error register */ break; + case ADU_LPC_BASE_REG: + qemu_log_mask(LOG_UNIMP, + "ADU: Changing LPC_BASE_REG is not implemented\n"); + break; + + case ADU_LPC_CMD_REG: + adu->lpc_cmd_reg = val; + if (lpc_cmd_read(adu)) { + uint32_t lpc_addr = lpc_cmd_addr(adu); + uint32_t lpc_size = lpc_cmd_size(adu); + uint64_t data = 0; + + pnv_lpc_opb_read(adu->lpc, lpc_addr, (void *)&data, lpc_size); + + /* + * ADU access is performed within 8-byte aligned sectors. Smaller + * access sizes don't get formatted to the least significant byte, + * but rather appear in the data reg at the same offset as the + * address in memory. This shifts them into that position. + */ + adu->lpc_data_reg = be64_to_cpu(data) >> ((lpc_addr & 7) * 8); + } + break; + + case ADU_LPC_DATA_REG: + adu->lpc_data_reg = val; + if (lpc_cmd_write(adu)) { + uint32_t lpc_addr = lpc_cmd_addr(adu); + uint32_t lpc_size = lpc_cmd_size(adu); + uint64_t data; + + data = cpu_to_be64(val) >> ((lpc_addr & 7) * 8); /* See above */ + pnv_lpc_opb_write(adu->lpc, lpc_addr, (void *)&data, lpc_size); + } + break; + + case ADU_LPC_STATUS_REG: + qemu_log_mask(LOG_UNIMP, + "ADU: Changing LPC_STATUS_REG is not implemented\n"); + break; + default: qemu_log_mask(LOG_UNIMP, "ADU Unimplemented write register: Ox%08x\n", offset); @@ -78,18 +165,26 @@ static void pnv_adu_realize(DeviceState *dev, Error **errp) { PnvADU *adu = PNV_ADU(dev); + assert(adu->lpc); + /* XScom regions for ADU registers */ pnv_xscom_region_init(&adu->xscom_regs, OBJECT(dev), &pnv_adu_xscom_ops, adu, "xscom-adu", PNV9_XSCOM_ADU_SIZE); } +static Property pnv_adu_properties[] = { + DEFINE_PROP_LINK("lpc", PnvADU, lpc, TYPE_PNV_LPC, PnvLpcController *), + DEFINE_PROP_END_OF_LIST(), +}; + static void pnv_adu_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = pnv_adu_realize; dc->desc = "PowerNV ADU"; + device_class_set_props(dc, pnv_adu_properties); dc->user_creatable = false; } diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c index 0f14e180af..f8aad955b5 100644 --- a/hw/ppc/pnv_lpc.c +++ b/hw/ppc/pnv_lpc.c @@ -236,16 +236,16 @@ int pnv_dt_lpc(PnvChip *chip, void *fdt, int root_offset, uint64_t lpcm_addr, * TODO: rework to use address_space_stq() and address_space_ldq() * instead. */ -static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data, - int sz) +bool pnv_lpc_opb_read(PnvLpcController *lpc, uint32_t addr, + uint8_t *data, int sz) { /* XXX Handle access size limits and FW read caching here */ return !address_space_read(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED, data, sz); } -static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data, - int sz) +bool pnv_lpc_opb_write(PnvLpcController *lpc, uint32_t addr, + uint8_t *data, int sz) { /* XXX Handle access size limits here */ return !address_space_write(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED, @@ -277,7 +277,7 @@ static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd) } if (cmd & ECCB_CTL_READ) { - success = opb_read(lpc, opb_addr, data, sz); + success = pnv_lpc_opb_read(lpc, opb_addr, data, sz); if (success) { lpc->eccb_stat_reg = ECCB_STAT_OP_DONE | (((uint64_t)data[0]) << 24 | @@ -294,7 +294,7 @@ static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd) data[2] = lpc->eccb_data_reg >> 8; data[3] = lpc->eccb_data_reg; - success = opb_write(lpc, opb_addr, data, sz); + success = pnv_lpc_opb_write(lpc, opb_addr, data, sz); lpc->eccb_stat_reg = ECCB_STAT_OP_DONE; } /* XXX Which error bit (if any) to signal OPB error ? */ diff --git a/include/hw/ppc/pnv_adu.h b/include/hw/ppc/pnv_adu.h index b5f308627b..f9dbd8c8b3 100644 --- a/include/hw/ppc/pnv_adu.h +++ b/include/hw/ppc/pnv_adu.h @@ -10,6 +10,7 @@ #define PPC_PNV_ADU_H #include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_lpc.h" #include "hw/qdev-core.h" #define TYPE_PNV_ADU "pnv-adu" @@ -19,6 +20,12 @@ OBJECT_DECLARE_TYPE(PnvADU, PnvADUClass, PNV_ADU) struct PnvADU { DeviceState xd; + /* LPCMC (LPC Master Controller) access engine */ + PnvLpcController *lpc; + uint64_t lpc_base_reg; + uint64_t lpc_cmd_reg; + uint64_t lpc_data_reg; + MemoryRegion xscom_regs; }; diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h index e0fd5e4130..174add4c53 100644 --- a/include/hw/ppc/pnv_lpc.h +++ b/include/hw/ppc/pnv_lpc.h @@ -109,6 +109,11 @@ struct PnvLpcClass { DeviceRealize parent_realize; }; +bool pnv_lpc_opb_read(PnvLpcController *lpc, uint32_t addr, + uint8_t *data, int sz); +bool pnv_lpc_opb_write(PnvLpcController *lpc, uint32_t addr, + uint8_t *data, int sz); + ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp); int pnv_dt_lpc(PnvChip *chip, void *fdt, int root_offset, uint64_t lpcm_addr, uint64_t lpcm_size); From 7f516cdeef6d62e78ee769855dff95666e6b8c1d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 6 Jul 2024 13:22:58 +1000 Subject: [PATCH 18/96] target/ppc: Fix msgsnd for POWER8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POWER8 (ISA v2.07S) introduced the doorbell facility, the msgsnd instruction behaved mostly like msgsndp, it was addressed by TIR and could only send interrupts between threads on the core. ISA v3.0 changed msgsnd to be addressed by PIR and can interrupt any thread in the system. msgsnd only implements the v3.0 semantics, which can make multi-threaded POWER8 hang when booting Linux (due to IPIs failing). This change adds v2.07 semantics. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- target/ppc/excp_helper.c | 74 ++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 0cd542675f..c0120c8a88 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -2998,6 +2998,41 @@ static inline bool dbell_bcast_subproc(target_ulong rb) return (rb & DBELL_BRDCAST_MASK) == DBELL_BRDCAST_SUBPROC; } +/* + * Send an interrupt to a thread in the same core as env). + */ +static void msgsnd_core_tir(CPUPPCState *env, uint32_t target_tir, int irq) +{ + PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + uint32_t nr_threads = cs->nr_threads; + + if (!(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + nr_threads = 1; /* msgsndp behaves as 1-thread in LPAR-per-thread mode*/ + } + + if (target_tir >= nr_threads) { + return; + } + + if (nr_threads == 1) { + ppc_set_irq(cpu, irq, 1); + } else { + CPUState *ccs; + + /* Does iothread need to be locked for walking CPU list? */ + bql_lock(); + THREAD_SIBLING_FOREACH(cs, ccs) { + PowerPCCPU *ccpu = POWERPC_CPU(ccs); + if (target_tir == ppc_cpu_tir(ccpu)) { + ppc_set_irq(ccpu, irq, 1); + break; + } + } + bql_unlock(); + } +} + void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb) { if (!dbell_type_server(rb)) { @@ -3018,6 +3053,13 @@ void helper_book3s_msgsnd(CPUPPCState *env, target_ulong rb) return; } + /* POWER8 msgsnd is like msgsndp (targets a thread within core) */ + if (!(env->insns_flags2 & PPC2_ISA300)) { + msgsnd_core_tir(env, rb & PPC_BITMASK(57, 63), PPC_INTERRUPT_HDOORBELL); + return; + } + + /* POWER9 and later msgsnd is a global (targets any thread) */ cpu = ppc_get_vcpu_by_pir(pir); if (!cpu) { return; @@ -3064,41 +3106,13 @@ void helper_book3s_msgclrp(CPUPPCState *env, target_ulong rb) */ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) { - CPUState *cs = env_cpu(env); - PowerPCCPU *cpu = env_archcpu(env); - CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - int ttir = rb & PPC_BITMASK(57, 63); - helper_hfscr_facility_check(env, HFSCR_MSGP, "msgsndp", HFSCR_IC_MSGP); - if (!(env->flags & POWERPC_FLAG_SMT_1LPAR)) { - nr_threads = 1; /* msgsndp behaves as 1-thread in LPAR-per-thread mode*/ - } - - if (!dbell_type_server(rb) || ttir >= nr_threads) { + if (!dbell_type_server(rb)) { return; } - if (nr_threads == 1) { - ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, 1); - return; - } - - /* Does iothread need to be locked for walking CPU list? */ - bql_lock(); - THREAD_SIBLING_FOREACH(cs, ccs) { - PowerPCCPU *ccpu = POWERPC_CPU(ccs); - uint32_t thread_id = ppc_cpu_tir(ccpu); - - if (ttir == thread_id) { - ppc_set_irq(ccpu, PPC_INTERRUPT_DOORBELL, 1); - bql_unlock(); - return; - } - } - - g_assert_not_reached(); + msgsnd_core_tir(env, rb & PPC_BITMASK(57, 63), PPC_INTERRUPT_DOORBELL); } #endif /* TARGET_PPC64 */ From 060e61436794d13ede9a1d0eb2b1d0cf3b7cfcfd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 26 May 2024 15:04:05 +1000 Subject: [PATCH 19/96] ppc/pnv: Add pointer from PnvCPUState to PnvCore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helps move core state from CPU to core structures. Reviewed-by: Cédric Le Goater Reviewed-by: Harsh Prateek Bora Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_core.c | 3 +++ include/hw/ppc/pnv_core.h | 1 + 2 files changed, 4 insertions(+) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index f40ab721d6..2da271ffb6 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -278,6 +278,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) pc->threads = g_new(PowerPCCPU *, cc->nr_threads); for (i = 0; i < cc->nr_threads; i++) { PowerPCCPU *cpu; + PnvCPUState *pnv_cpu; obj = object_new(typename); cpu = POWERPC_CPU(obj); @@ -288,6 +289,8 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) object_property_add_child(OBJECT(pc), name, obj); cpu->machine_data = g_new0(PnvCPUState, 1); + pnv_cpu = pnv_cpu_state(cpu); + pnv_cpu->pnv_core = pc; object_unref(obj); } diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index c6d62fd145..29cab9dfd9 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -54,6 +54,7 @@ struct PnvCoreClass { #define PNV_CORE_TYPE_NAME(cpu_model) cpu_model PNV_CORE_TYPE_SUFFIX typedef struct PnvCPUState { + PnvCore *pnv_core; Object *intc; } PnvCPUState; From 0ca94b2f11223d41258e6a7a046e5ccde831de46 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 24 May 2024 21:58:18 +1000 Subject: [PATCH 20/96] ppc/pnv: Move timebase state into PnvCore The timebase state machine is per per-core state and can be driven by any thread in the core. It is currently implemented as a hack where the state is in a CPU structure and only thread 0's state is accessed by the chiptod, which limits programming the timebase side of the state machine to thread 0 of a core. Move the state out into PnvCore and share it among all threads. Reviewed-by: Harsh Prateek Bora Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_chiptod.c | 7 ++--- include/hw/ppc/pnv_core.h | 17 ++++++++++ target/ppc/cpu.h | 21 ------------- target/ppc/timebase_helper.c | 60 +++++++++++++++++++++--------------- 4 files changed, 55 insertions(+), 50 deletions(-) diff --git a/hw/ppc/pnv_chiptod.c b/hw/ppc/pnv_chiptod.c index 3831a72101..1e41fe557a 100644 --- a/hw/ppc/pnv_chiptod.c +++ b/hw/ppc/pnv_chiptod.c @@ -364,8 +364,7 @@ static void pnv_chiptod_xscom_write(void *opaque, hwaddr addr, qemu_log_mask(LOG_GUEST_ERROR, "pnv_chiptod: xscom write reg" " TOD_MOVE_TOD_TO_TB_REG with no slave target\n"); } else { - PowerPCCPU *cpu = chiptod->slave_pc_target->threads[0]; - CPUPPCState *env = &cpu->env; + PnvCore *pc = chiptod->slave_pc_target; /* * Moving TOD to TB will set the TB of all threads in a @@ -377,8 +376,8 @@ static void pnv_chiptod_xscom_write(void *opaque, hwaddr addr, * thread 0. */ - if (env->pnv_tod_tbst.tb_ready_for_tod) { - env->pnv_tod_tbst.tod_sent_to_tb = 1; + if (pc->tod_state.tb_ready_for_tod) { + pc->tod_state.tod_sent_to_tb = 1; } else { qemu_log_mask(LOG_GUEST_ERROR, "pnv_chiptod: xscom write reg" " TOD_MOVE_TOD_TO_TB_REG with TB not ready to" diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index 29cab9dfd9..ffec8516ae 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -25,6 +25,20 @@ #include "hw/ppc/pnv.h" #include "qom/object.h" +/* Per-core ChipTOD / TimeBase state */ +typedef struct PnvCoreTODState { + int tb_ready_for_tod; /* core TB ready to receive TOD from chiptod */ + int tod_sent_to_tb; /* chiptod sent TOD to the core TB */ + + /* + * "Timers" for async TBST events are simulated by mfTFAC because TFAC + * is polled for such events. These are just used to ensure firmware + * performs the polling at least a few times. + */ + int tb_state_timer; + int tb_sync_pulse_timer; +} PnvCoreTODState; + #define TYPE_PNV_CORE "powernv-cpu-core" OBJECT_DECLARE_TYPE(PnvCore, PnvCoreClass, PNV_CORE) @@ -38,6 +52,9 @@ struct PnvCore { uint32_t pir; uint32_t hwid; uint64_t hrmor; + + PnvCoreTODState tod_state; + PnvChip *chip; MemoryRegion xscom_regs; diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 2015e603d4..c78d6ca91a 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1196,21 +1196,6 @@ DEXCR_ASPECT(SRAPD, 4) DEXCR_ASPECT(NPHIE, 5) DEXCR_ASPECT(PHIE, 6) -/*****************************************************************************/ -/* PowerNV ChipTOD and TimeBase State Machine */ -struct pnv_tod_tbst { - int tb_ready_for_tod; /* core TB ready to receive TOD from chiptod */ - int tod_sent_to_tb; /* chiptod sent TOD to the core TB */ - - /* - * "Timers" for async TBST events are simulated by mfTFAC because TFAC - * is polled for such events. These are just used to ensure firmware - * performs the polling at least a few times. - */ - int tb_state_timer; - int tb_sync_pulse_timer; -}; - /*****************************************************************************/ /* The whole PowerPC CPU context */ @@ -1291,12 +1276,6 @@ struct CPUArchState { uint32_t tlb_need_flush; /* Delayed flush needed */ #define TLB_NEED_LOCAL_FLUSH 0x1 #define TLB_NEED_GLOBAL_FLUSH 0x2 - -#if defined(TARGET_PPC64) - /* PowerNV chiptod / timebase facility state. */ - /* Would be nice to put these into PnvCore */ - struct pnv_tod_tbst pnv_tod_tbst; -#endif #endif /* Other registers */ diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c index 39d397416e..b02535bbd5 100644 --- a/target/ppc/timebase_helper.c +++ b/target/ppc/timebase_helper.c @@ -217,7 +217,13 @@ void helper_store_booke_tsr(CPUPPCState *env, target_ulong val) store_booke_tsr(env, val); } -#if defined(TARGET_PPC64) +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) +/* + * qemu-user breaks with pnv headers, so they go under ifdefs for now. + * A clean up may be to move powernv specific registers and helpers into + * target/ppc/pnv_helper.c + */ +#include "hw/ppc/pnv_core.h" /* * POWER processor Timebase Facility */ @@ -298,8 +304,17 @@ static void write_tfmr(CPUPPCState *env, target_ulong val) } } +static PnvCoreTODState *cpu_get_tbst(PowerPCCPU *cpu) +{ + PnvCore *pc = pnv_cpu_state(cpu)->pnv_core; + + return &pc->tod_state; +} + static void tb_state_machine_step(CPUPPCState *env) { + PowerPCCPU *cpu = env_archcpu(env); + PnvCoreTODState *tod_state = cpu_get_tbst(cpu); uint64_t tfmr = env->spr[SPR_TFMR]; unsigned int tbst = tfmr_get_tb_state(tfmr); @@ -307,15 +322,15 @@ static void tb_state_machine_step(CPUPPCState *env) return; } - if (env->pnv_tod_tbst.tb_sync_pulse_timer) { - env->pnv_tod_tbst.tb_sync_pulse_timer--; + if (tod_state->tb_sync_pulse_timer) { + tod_state->tb_sync_pulse_timer--; } else { tfmr |= TFMR_TB_SYNC_OCCURED; write_tfmr(env, tfmr); } - if (env->pnv_tod_tbst.tb_state_timer) { - env->pnv_tod_tbst.tb_state_timer--; + if (tod_state->tb_state_timer) { + tod_state->tb_state_timer--; return; } @@ -332,20 +347,20 @@ static void tb_state_machine_step(CPUPPCState *env) } else if (tfmr & TFMR_MOVE_CHIP_TOD_TO_TB) { if (tbst == TBST_SYNC_WAIT) { tfmr = tfmr_new_tb_state(tfmr, TBST_GET_TOD); - env->pnv_tod_tbst.tb_state_timer = 3; + tod_state->tb_state_timer = 3; } else if (tbst == TBST_GET_TOD) { - if (env->pnv_tod_tbst.tod_sent_to_tb) { + if (tod_state->tod_sent_to_tb) { tfmr = tfmr_new_tb_state(tfmr, TBST_TB_RUNNING); tfmr &= ~TFMR_MOVE_CHIP_TOD_TO_TB; - env->pnv_tod_tbst.tb_ready_for_tod = 0; - env->pnv_tod_tbst.tod_sent_to_tb = 0; + tod_state->tb_ready_for_tod = 0; + tod_state->tod_sent_to_tb = 0; } } else { qemu_log_mask(LOG_GUEST_ERROR, "TFMR error: MOVE_CHIP_TOD_TO_TB " "state machine in invalid state 0x%x\n", tbst); tfmr = tfmr_new_tb_state(tfmr, TBST_TB_ERROR); tfmr |= TFMR_FIRMWARE_CONTROL_ERROR; - env->pnv_tod_tbst.tb_ready_for_tod = 0; + tod_state->tb_ready_for_tod = 0; } } @@ -361,6 +376,8 @@ target_ulong helper_load_tfmr(CPUPPCState *env) void helper_store_tfmr(CPUPPCState *env, target_ulong val) { + PowerPCCPU *cpu = env_archcpu(env); + PnvCoreTODState *tod_state = cpu_get_tbst(cpu); uint64_t tfmr = env->spr[SPR_TFMR]; uint64_t clear_on_write; unsigned int tbst = tfmr_get_tb_state(tfmr); @@ -384,14 +401,7 @@ void helper_store_tfmr(CPUPPCState *env, target_ulong val) * after the second mfspr. */ tfmr &= ~TFMR_TB_SYNC_OCCURED; - env->pnv_tod_tbst.tb_sync_pulse_timer = 1; - - if (ppc_cpu_tir(env_archcpu(env)) != 0 && - (val & (TFMR_LOAD_TOD_MOD | TFMR_MOVE_CHIP_TOD_TO_TB))) { - qemu_log_mask(LOG_UNIMP, "TFMR timebase state machine can only be " - "driven by thread 0\n"); - goto out; - } + tod_state->tb_sync_pulse_timer = 1; if (((tfmr | val) & (TFMR_LOAD_TOD_MOD | TFMR_MOVE_CHIP_TOD_TO_TB)) == (TFMR_LOAD_TOD_MOD | TFMR_MOVE_CHIP_TOD_TO_TB)) { @@ -399,7 +409,7 @@ void helper_store_tfmr(CPUPPCState *env, target_ulong val) "MOVE_CHIP_TOD_TO_TB both set\n"); tfmr = tfmr_new_tb_state(tfmr, TBST_TB_ERROR); tfmr |= TFMR_FIRMWARE_CONTROL_ERROR; - env->pnv_tod_tbst.tb_ready_for_tod = 0; + tod_state->tb_ready_for_tod = 0; goto out; } @@ -413,8 +423,8 @@ void helper_store_tfmr(CPUPPCState *env, target_ulong val) tfmr &= ~TFMR_LOAD_TOD_MOD; tfmr &= ~TFMR_MOVE_CHIP_TOD_TO_TB; tfmr &= ~TFMR_FIRMWARE_CONTROL_ERROR; /* XXX: should this be cleared? */ - env->pnv_tod_tbst.tb_ready_for_tod = 0; - env->pnv_tod_tbst.tod_sent_to_tb = 0; + tod_state->tb_ready_for_tod = 0; + tod_state->tod_sent_to_tb = 0; goto out; } @@ -427,19 +437,19 @@ void helper_store_tfmr(CPUPPCState *env, target_ulong val) if (tfmr & TFMR_LOAD_TOD_MOD) { /* Wait for an arbitrary 3 mfspr until the next state transition. */ - env->pnv_tod_tbst.tb_state_timer = 3; + tod_state->tb_state_timer = 3; } else if (tfmr & TFMR_MOVE_CHIP_TOD_TO_TB) { if (tbst == TBST_NOT_SET) { tfmr = tfmr_new_tb_state(tfmr, TBST_SYNC_WAIT); - env->pnv_tod_tbst.tb_ready_for_tod = 1; - env->pnv_tod_tbst.tb_state_timer = 3; /* arbitrary */ + tod_state->tb_ready_for_tod = 1; + tod_state->tb_state_timer = 3; /* arbitrary */ } else { qemu_log_mask(LOG_GUEST_ERROR, "TFMR error: MOVE_CHIP_TOD_TO_TB " "not in TB not set state 0x%x\n", tbst); tfmr = tfmr_new_tb_state(tfmr, TBST_TB_ERROR); tfmr |= TFMR_FIRMWARE_CONTROL_ERROR; - env->pnv_tod_tbst.tb_ready_for_tod = 0; + tod_state->tb_ready_for_tod = 0; } } From 60d30cff8472c0bf05a40b0f55221fb4efb768e2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 26 May 2024 21:24:00 +1000 Subject: [PATCH 21/96] target/ppc: Move SPR indirect registers into PnvCore SPRC/SPRD were recently added to all BookS CPUs supported, but they are only tested on POWER9 and POWER10, so restrict them to those CPUs. SPR indirect scratch registers presently replicated per-CPU like SMT SPRs, but the PnvCore is a better place for them since they are restricted to P9/P10. Also add SPR indirect read access to core thread state for POWER9 since skiboot accesses that when booting to check for big-core mode. Signed-off-by: Nicholas Piggin --- include/hw/ppc/pnv_core.h | 1 + target/ppc/cpu.h | 3 -- target/ppc/cpu_init.c | 21 +++++++------ target/ppc/misc_helper.c | 65 ++++++++++++++++++++------------------- 4 files changed, 45 insertions(+), 45 deletions(-) diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index ffec8516ae..693acb189b 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -53,6 +53,7 @@ struct PnvCore { uint32_t hwid; uint64_t hrmor; + target_ulong scratch[8]; /* SPRC/SPRD indirect SCRATCH registers */ PnvCoreTODState tod_state; PnvChip *chip; diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index c78d6ca91a..95ba9e7590 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1253,9 +1253,6 @@ struct CPUArchState { ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */ struct CPUBreakpoint *ciabr_breakpoint; struct CPUWatchpoint *dawr0_watchpoint; - - /* POWER CPU regs/state */ - target_ulong scratch[8]; /* SCRATCH registers (shared across core) */ #endif target_ulong sr[32]; /* segment registers */ uint32_t nb_BATs; /* number of BATs */ diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 2e652f498e..42bb047b54 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5760,16 +5760,6 @@ static void register_power_common_book4_sprs(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_core_write_generic, 0x00000000); - spr_register_hv(env, SPR_POWER_SPRC, "SPRC", - SPR_NOACCESS, SPR_NOACCESS, - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_sprc, - 0x00000000); - spr_register_hv(env, SPR_POWER_SPRD, "SPRD", - SPR_NOACCESS, SPR_NOACCESS, - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_sprd, &spr_write_sprd, - 0x00000000); #endif } @@ -5803,6 +5793,17 @@ static void register_power8_book4_sprs(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_WORT, 0); + /* SPRC/SPRD exist in earlier CPUs but only tested on POWER9/10 */ + spr_register_hv(env, SPR_POWER_SPRC, "SPRC", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_sprc, + 0x00000000); + spr_register_hv(env, SPR_POWER_SPRD, "SPRD", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_sprd, &spr_write_sprd, + 0x00000000); #endif } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index fa47be2298..de7c8813ec 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -308,6 +308,13 @@ void helper_store_dpdes(CPUPPCState *env, target_ulong val) bql_unlock(); } +/* + * qemu-user breaks with pnv headers, so they go under ifdefs for now. + * A clean up may be to move powernv specific registers and helpers into + * target/ppc/pnv_helper.c + */ +#include "hw/ppc/pnv_core.h" + /* Indirect SCOM (SPRC/SPRD) access to SCRATCH0-7 are implemented. */ void helper_store_sprc(CPUPPCState *env, target_ulong val) { @@ -321,11 +328,18 @@ void helper_store_sprc(CPUPPCState *env, target_ulong val) target_ulong helper_load_sprd(CPUPPCState *env) { + /* + * SPRD is a HV-only register for Power CPUs, so this will only be + * accessed by powernv machines. + */ + PowerPCCPU *cpu = env_archcpu(env); + PnvCore *pc = pnv_cpu_state(cpu)->pnv_core; target_ulong sprc = env->spr[SPR_POWER_SPRC]; - switch (sprc & 0x3c0) { - case 0: /* SCRATCH0-7 */ - return env->scratch[(sprc >> 3) & 0x7]; + switch (sprc & 0x3e0) { + case 0: /* SCRATCH0-3 */ + case 1: /* SCRATCH4-7 */ + return pc->scratch[(sprc >> 3) & 0x7]; default: qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x" TARGET_FMT_lx"\n", sprc); @@ -334,41 +348,28 @@ target_ulong helper_load_sprd(CPUPPCState *env) return 0; } -static void do_store_scratch(CPUPPCState *env, int nr, target_ulong val) -{ - CPUState *cs = env_cpu(env); - CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - - /* - * Log stores to SCRATCH, because some firmware uses these for debugging - * and logging, but they would normally be read by the BMC, which is - * not implemented in QEMU yet. This gives a way to get at the information. - * Could also dump these upon checkstop. - */ - qemu_log("SPRD write 0x" TARGET_FMT_lx " to SCRATCH%d\n", val, nr); - - if (nr_threads == 1) { - env->scratch[nr] = val; - return; - } - - THREAD_SIBLING_FOREACH(cs, ccs) { - CPUPPCState *cenv = &POWERPC_CPU(ccs)->env; - cenv->scratch[nr] = val; - } -} - void helper_store_sprd(CPUPPCState *env, target_ulong val) { target_ulong sprc = env->spr[SPR_POWER_SPRC]; + PowerPCCPU *cpu = env_archcpu(env); + PnvCore *pc = pnv_cpu_state(cpu)->pnv_core; + int nr; - switch (sprc & 0x3c0) { - case 0: /* SCRATCH0-7 */ - do_store_scratch(env, (sprc >> 3) & 0x7, val); + switch (sprc & 0x3e0) { + case 0: /* SCRATCH0-3 */ + case 1: /* SCRATCH4-7 */ + /* + * Log stores to SCRATCH, because some firmware uses these for + * debugging and logging, but they would normally be read by the BMC, + * which is not implemented in QEMU yet. This gives a way to get at the + * information. Could also dump these upon checkstop. + */ + nr = (sprc >> 3) & 0x7; + qemu_log("SPRD write 0x" TARGET_FMT_lx " to SCRATCH%d\n", val, nr); + pc->scratch[nr] = val; break; default: - qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x" + qemu_log_mask(LOG_UNIMP, "mtSPRD: Unimplemented SPRC:0x" TARGET_FMT_lx"\n", sprc); break; } From d76cb5a53b04d52db1500e83bd6bdfbfeca44e4d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 24 May 2024 15:52:04 +1000 Subject: [PATCH 22/96] ppc/pnv: use class attribute to limit SMT threads for different machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a class attribute to specify the number of SMT threads per core permitted for different machines, 8 for powernv8 and 4 for powernv9/10. Reviewed-by: Cédric Le Goater Reviewed-by: Harsh Prateek Bora Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 12 +++++++++--- include/hw/ppc/pnv.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 91ff1be21f..a84e0e0e30 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -899,6 +899,7 @@ static void pnv_init(MachineState *machine) PnvMachineState *pnv = PNV_MACHINE(machine); MachineClass *mc = MACHINE_GET_CLASS(machine); PnvMachineClass *pmc = PNV_MACHINE_GET_CLASS(machine); + int max_smt_threads = pmc->max_smt_threads; char *fw_filename; long fw_size; uint64_t chip_ram_start = 0; @@ -997,17 +998,19 @@ static void pnv_init(MachineState *machine) pnv->num_chips = machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads); - if (machine->smp.threads > 8) { - error_report("Cannot support more than 8 threads/core " - "on a powernv machine"); + if (machine->smp.threads > max_smt_threads) { + error_report("Cannot support more than %d threads/core " + "on %s machine", max_smt_threads, mc->desc); exit(1); } + if (!is_power_of_2(machine->smp.threads)) { error_report("Cannot support %d threads/core on a powernv" "machine because it must be a power of 2", machine->smp.threads); exit(1); } + /* * TODO: should we decide on how many chips we can create based * on #cores and Venice vs. Murano vs. Naples chip type etc..., @@ -2490,6 +2493,7 @@ static void pnv_machine_power8_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); + pmc->max_smt_threads = 8; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB); } @@ -2514,6 +2518,7 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); + pmc->max_smt_threads = 4; pmc->dt_power_mgt = pnv_dt_power_mgt; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB); @@ -2538,6 +2543,7 @@ static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); + pmc->max_smt_threads = 4; pmc->dt_power_mgt = pnv_dt_power_mgt; xfc->match_nvt = pnv10_xive_match_nvt; diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index 476b136146..1993dededf 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -76,6 +76,7 @@ struct PnvMachineClass { /*< public >*/ const char *compat; int compat_size; + int max_smt_threads; void (*dt_power_mgt)(PnvMachineState *pnv, void *fdt); void (*i2c_init)(PnvMachineState *pnv); From 25de28220cedadac15021ec40047785f30e153fe Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 24 May 2024 11:54:09 +1000 Subject: [PATCH 23/96] ppc/pnv: Extend chip_pir class method to TIR as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chip_pir chip class method allows the platform to set the PIR processor identification register. Extend this to a more general ID function which also allows the TIR to be set. This is in preparation for "big core", which is a more complicated topology of cores and threads. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 81 +++++++++++++++++++++++++-------------- hw/ppc/pnv_core.c | 10 +++-- include/hw/ppc/pnv_chip.h | 4 +- 3 files changed, 62 insertions(+), 33 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index a84e0e0e30..8827f729b1 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -154,7 +154,7 @@ static int pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) char *nodename; int cpus_offset = get_cpus_node(fdt); - pir = pnv_cc->chip_pir(chip, pc->hwid, 0); + pnv_cc->get_pir_tir(chip, pc->hwid, 0, &pir, NULL); nodename = g_strdup_printf("%s@%x", dc->fw_name, pir); offset = fdt_add_subnode(fdt, cpus_offset, nodename); @@ -236,7 +236,8 @@ static int pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) /* Build interrupt servers properties */ for (i = 0; i < smt_threads; i++) { - servers_prop[i] = cpu_to_be32(pnv_cc->chip_pir(chip, pc->hwid, i)); + pnv_cc->get_pir_tir(chip, pc->hwid, i, &pir, NULL); + servers_prop[i] = cpu_to_be32(pir); } _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", servers_prop, sizeof(*servers_prop) * smt_threads))); @@ -248,14 +249,17 @@ static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t hwid, uint32_t nr_threads) { PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); - uint32_t pir = pcc->chip_pir(chip, hwid, 0); - uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12); + uint32_t pir; + uint64_t addr; char *name; const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp"; uint32_t irange[2], i, rsize; uint64_t *reg; int offset; + pcc->get_pir_tir(chip, hwid, 0, &pir, NULL); + addr = PNV_ICP_BASE(chip) | (pir << 12); + irange[0] = cpu_to_be32(pir); irange[1] = cpu_to_be32(nr_threads); @@ -1106,10 +1110,16 @@ static void pnv_init(MachineState *machine) * 25:28 Core number * 29:31 Thread ID */ -static uint32_t pnv_chip_pir_p8(PnvChip *chip, uint32_t core_id, - uint32_t thread_id) +static void pnv_get_pir_tir_p8(PnvChip *chip, + uint32_t core_id, uint32_t thread_id, + uint32_t *pir, uint32_t *tir) { - return (chip->chip_id << 7) | (core_id << 3) | thread_id; + if (pir) { + *pir = (chip->chip_id << 7) | (core_id << 3) | thread_id; + } + if (tir) { + *tir = thread_id; + } } static void pnv_chip_power8_intc_create(PnvChip *chip, PowerPCCPU *cpu, @@ -1161,14 +1171,20 @@ static void pnv_chip_power8_intc_print_info(PnvChip *chip, PowerPCCPU *cpu, * * We only care about the lower bits. uint32_t is fine for the moment. */ -static uint32_t pnv_chip_pir_p9(PnvChip *chip, uint32_t core_id, - uint32_t thread_id) +static void pnv_get_pir_tir_p9(PnvChip *chip, + uint32_t core_id, uint32_t thread_id, + uint32_t *pir, uint32_t *tir) { - if (chip->nr_threads == 8) { - return (chip->chip_id << 8) | ((thread_id & 1) << 2) | (core_id << 3) | - (thread_id >> 1); - } else { - return (chip->chip_id << 8) | (core_id << 2) | thread_id; + if (pir) { + if (chip->nr_threads == 8) { + *pir = (chip->chip_id << 8) | ((thread_id & 1) << 2) | + (core_id << 3) | (thread_id >> 1); + } else { + *pir = (chip->chip_id << 8) | (core_id << 2) | thread_id; + } + } + if (tir) { + *tir = thread_id; } } @@ -1183,14 +1199,20 @@ static uint32_t pnv_chip_pir_p9(PnvChip *chip, uint32_t core_id, * * We only care about the lower bits. uint32_t is fine for the moment. */ -static uint32_t pnv_chip_pir_p10(PnvChip *chip, uint32_t core_id, - uint32_t thread_id) +static void pnv_get_pir_tir_p10(PnvChip *chip, + uint32_t core_id, uint32_t thread_id, + uint32_t *pir, uint32_t *tir) { - if (chip->nr_threads == 8) { - return (chip->chip_id << 8) | ((core_id / 4) << 4) | - ((core_id % 2) << 3) | thread_id; - } else { - return (chip->chip_id << 8) | (core_id << 2) | thread_id; + if (pir) { + if (chip->nr_threads == 8) { + *pir = (chip->chip_id << 8) | ((core_id / 4) << 4) | + ((core_id % 2) << 3) | thread_id; + } else { + *pir = (chip->chip_id << 8) | (core_id << 2) | thread_id; + } + } + if (tir) { + *tir = thread_id; } } @@ -1370,8 +1392,11 @@ static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp) int core_hwid = CPU_CORE(pnv_core)->core_id; for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) { - uint32_t pir = pcc->chip_pir(chip, core_hwid, j); - PnvICPState *icp = PNV_ICP(xics_icp_get(chip8->xics, pir)); + uint32_t pir; + PnvICPState *icp; + + pcc->get_pir_tir(chip, core_hwid, j, &pir, NULL); + icp = PNV_ICP(xics_icp_get(chip8->xics, pir)); memory_region_add_subregion(&chip8->icp_mmio, pir << 12, &icp->mmio); @@ -1483,7 +1508,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ k->cores_mask = POWER8E_CORE_MASK; k->num_phbs = 3; - k->chip_pir = pnv_chip_pir_p8; + k->get_pir_tir = pnv_get_pir_tir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; k->intc_destroy = pnv_chip_power8_intc_destroy; @@ -1507,7 +1532,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ k->cores_mask = POWER8_CORE_MASK; k->num_phbs = 3; - k->chip_pir = pnv_chip_pir_p8; + k->get_pir_tir = pnv_get_pir_tir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; k->intc_destroy = pnv_chip_power8_intc_destroy; @@ -1531,7 +1556,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ k->cores_mask = POWER8_CORE_MASK; k->num_phbs = 4; - k->chip_pir = pnv_chip_pir_p8; + k->get_pir_tir = pnv_get_pir_tir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; k->intc_destroy = pnv_chip_power8_intc_destroy; @@ -1814,7 +1839,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */ k->cores_mask = POWER9_CORE_MASK; - k->chip_pir = pnv_chip_pir_p9; + k->get_pir_tir = pnv_get_pir_tir_p9; k->intc_create = pnv_chip_power9_intc_create; k->intc_reset = pnv_chip_power9_intc_reset; k->intc_destroy = pnv_chip_power9_intc_destroy; @@ -2136,7 +2161,7 @@ static void pnv_chip_power10_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x220da04980000000ull; /* P10 DD2.0 (with NX) */ k->cores_mask = POWER10_CORE_MASK; - k->chip_pir = pnv_chip_pir_p10; + k->get_pir_tir = pnv_get_pir_tir_p10; k->intc_create = pnv_chip_power10_intc_create; k->intc_reset = pnv_chip_power10_intc_reset; k->intc_destroy = pnv_chip_power10_intc_destroy; diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 2da271ffb6..28ca61926d 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -227,8 +227,9 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, { CPUPPCState *env = &cpu->env; int core_hwid; - ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; - ppc_spr_t *tir = &env->spr_cb[SPR_TIR]; + ppc_spr_t *pir_spr = &env->spr_cb[SPR_PIR]; + ppc_spr_t *tir_spr = &env->spr_cb[SPR_TIR]; + uint32_t pir, tir; Error *local_err = NULL; PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); @@ -244,8 +245,9 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, core_hwid = object_property_get_uint(OBJECT(pc), "hwid", &error_abort); - tir->default_value = thread_index; - pir->default_value = pcc->chip_pir(pc->chip, core_hwid, thread_index); + pcc->get_pir_tir(pc->chip, core_hwid, thread_index, &pir, &tir); + pir_spr->default_value = pir; + tir_spr->default_value = tir; /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h index 4eaa7d3999..7d5d08bcdc 100644 --- a/include/hw/ppc/pnv_chip.h +++ b/include/hw/ppc/pnv_chip.h @@ -150,7 +150,9 @@ struct PnvChipClass { DeviceRealize parent_realize; - uint32_t (*chip_pir)(PnvChip *chip, uint32_t core_id, uint32_t thread_id); + /* Get PIR and TIR values for a CPU thread identified by core/thread id */ + void (*get_pir_tir)(PnvChip *chip, uint32_t core_id, uint32_t thread_id, + uint32_t *pir, uint32_t *tir); void (*intc_create)(PnvChip *chip, PowerPCCPU *cpu, Error **errp); void (*intc_reset)(PnvChip *chip, PowerPCCPU *cpu); void (*intc_destroy)(PnvChip *chip, PowerPCCPU *cpu); From feb37fdc821242d86c30bff33abd31bcce01e9e2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 16 May 2024 15:25:12 +1000 Subject: [PATCH 24/96] ppc: Add a core_index to CPUPPCState for SMT vCPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The way SMT thread siblings are matched is clunky, using hard-coded logic that checks the PIR SPR. Change that to use a new core_index variable in the CPUPPCState, where all siblings have the same core_index. CPU realize routines have flexibility in setting core/sibling topology. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_core.c | 2 ++ hw/ppc/spapr_cpu_core.c | 4 ++++ target/ppc/cpu.h | 9 +++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 28ca61926d..7bda29b9c7 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -249,6 +249,8 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, pir_spr->default_value = pir; tir_spr->default_value = tir; + env->core_index = core_hwid; + /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index e7c9edd033..b228c1d498 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -300,11 +300,13 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp) g_autofree char *id = NULL; CPUState *cs; PowerPCCPU *cpu; + CPUPPCState *env; obj = object_new(scc->cpu_type); cs = CPU(obj); cpu = POWERPC_CPU(obj); + env = &cpu->env; /* * All CPUs start halted. CPU0 is unhalted from the machine level reset code * and the rest are explicitly started up by the guest using an RTAS call. @@ -315,6 +317,8 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp) return NULL; } + env->core_index = cc->core_id; + cpu->node_id = sc->node_id; id = g_strdup_printf("thread[%d]", i); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 95ba9e7590..7b52a9bb18 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1247,6 +1247,9 @@ struct CPUArchState { /* when a memory exception occurs, the access type is stored here */ int access_type; + /* For SMT processors */ + int core_index; + #if !defined(CONFIG_USER_ONLY) /* MMU context, only relevant for full system emulation */ #if defined(TARGET_PPC64) @@ -1402,12 +1405,10 @@ struct CPUArchState { uint64_t pmu_base_time; }; -#define _CORE_ID(cs) \ - (POWERPC_CPU(cs)->env.spr_cb[SPR_PIR].default_value & ~(cs->nr_threads - 1)) - #define THREAD_SIBLING_FOREACH(cs, cs_sibling) \ CPU_FOREACH(cs_sibling) \ - if (_CORE_ID(cs) == _CORE_ID(cs_sibling)) + if (POWERPC_CPU(cs)->env.core_index == \ + POWERPC_CPU(cs_sibling)->env.core_index) #define SET_FIT_PERIOD(a_, b_, c_, d_) \ do { \ From 50d8cfb949066e4466700e814a0e26719d70a951 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 24 May 2024 17:49:52 +1000 Subject: [PATCH 25/96] target/ppc: Add helpers to check for SMT sibling threads Add helpers for TCG code to determine if there are SMT siblings sharing per-core and per-lpar registers. This simplifies the callers and makes SMT register topology simpler to modify with later changes. Reviewed-by: Harsh Prateek Bora Signed-off-by: Nicholas Piggin --- target/ppc/cpu.h | 11 +++++++++++ target/ppc/cpu_init.c | 2 +- target/ppc/excp_helper.c | 17 +++++------------ target/ppc/misc_helper.c | 27 ++++++--------------------- target/ppc/timebase_helper.c | 20 +++++++------------- 5 files changed, 30 insertions(+), 47 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 7b52a9bb18..417b284318 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1512,6 +1512,17 @@ struct PowerPCCPUClass { int (*check_attn)(CPUPPCState *env); }; +static inline bool ppc_cpu_core_single_threaded(CPUState *cs) +{ + return cs->nr_threads == 1; +} + +static inline bool ppc_cpu_lpar_single_threaded(CPUState *cs) +{ + return !(POWERPC_CPU(cs)->env.flags & POWERPC_FLAG_SMT_1LPAR) || + ppc_cpu_core_single_threaded(cs); +} + ObjectClass *ppc_cpu_class_by_name(const char *name); PowerPCCPUClass *ppc_cpu_class_by_pvr(uint32_t pvr); PowerPCCPUClass *ppc_cpu_class_by_pvr_mask(uint32_t pvr); diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 42bb047b54..5ec87c56e4 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -6976,7 +6976,7 @@ static void ppc_cpu_realize(DeviceState *dev, Error **errp) pcc->parent_realize(dev, errp); - if (env_cpu(env)->nr_threads > 1) { + if (!ppc_cpu_core_single_threaded(cs)) { env->flags |= POWERPC_FLAG_SMT; } diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index c0120c8a88..f33fc36db2 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -3005,18 +3005,11 @@ static void msgsnd_core_tir(CPUPPCState *env, uint32_t target_tir, int irq) { PowerPCCPU *cpu = env_archcpu(env); CPUState *cs = env_cpu(env); - uint32_t nr_threads = cs->nr_threads; - if (!(env->flags & POWERPC_FLAG_SMT_1LPAR)) { - nr_threads = 1; /* msgsndp behaves as 1-thread in LPAR-per-thread mode*/ - } - - if (target_tir >= nr_threads) { - return; - } - - if (nr_threads == 1) { - ppc_set_irq(cpu, irq, 1); + if (ppc_cpu_lpar_single_threaded(cs)) { + if (target_tir == 0) { + ppc_set_irq(cpu, irq, 1); + } } else { CPUState *ccs; @@ -3071,7 +3064,7 @@ void helper_book3s_msgsnd(CPUPPCState *env, target_ulong rb) brdcast = true; } - if (cs->nr_threads == 1 || !brdcast) { + if (ppc_cpu_core_single_threaded(cs) || !brdcast) { ppc_set_irq(cpu, PPC_INTERRUPT_HDOORBELL, 1); return; } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index de7c8813ec..9789d69664 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -48,9 +48,8 @@ void helper_spr_core_write_generic(CPUPPCState *env, uint32_t sprn, { CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - if (nr_threads == 1) { + if (ppc_cpu_core_single_threaded(cs)) { env->spr[sprn] = val; return; } @@ -195,7 +194,7 @@ void helper_store_ptcr(CPUPPCState *env, target_ulong val) return; } - if (cs->nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + if (ppc_cpu_lpar_single_threaded(cs)) { env->spr[SPR_PTCR] = val; tlb_flush(cs); } else { @@ -242,16 +241,12 @@ target_ulong helper_load_dpdes(CPUPPCState *env) { CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; target_ulong dpdes = 0; helper_hfscr_facility_check(env, HFSCR_MSGP, "load DPDES", HFSCR_IC_MSGP); - if (!(env->flags & POWERPC_FLAG_SMT_1LPAR)) { - nr_threads = 1; /* DPDES behaves as 1-thread in LPAR-per-thread mode */ - } - - if (nr_threads == 1) { + /* DPDES behaves as 1-thread in LPAR-per-thread mode */ + if (ppc_cpu_lpar_single_threaded(cs)) { if (env->pending_interrupts & PPC_INTERRUPT_DOORBELL) { dpdes = 1; } @@ -278,21 +273,11 @@ void helper_store_dpdes(CPUPPCState *env, target_ulong val) PowerPCCPU *cpu = env_archcpu(env); CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; helper_hfscr_facility_check(env, HFSCR_MSGP, "store DPDES", HFSCR_IC_MSGP); - if (!(env->flags & POWERPC_FLAG_SMT_1LPAR)) { - nr_threads = 1; /* DPDES behaves as 1-thread in LPAR-per-thread mode */ - } - - if (val & ~(nr_threads - 1)) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid DPDES register value " - TARGET_FMT_lx"\n", val); - val &= (nr_threads - 1); /* Ignore the invalid bits */ - } - - if (nr_threads == 1) { + /* DPDES behaves as 1-thread in LPAR-per-thread mode */ + if (ppc_cpu_lpar_single_threaded(cs)) { ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & 0x1); return; } diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c index b02535bbd5..d86112d60a 100644 --- a/target/ppc/timebase_helper.c +++ b/target/ppc/timebase_helper.c @@ -62,9 +62,8 @@ void helper_store_purr(CPUPPCState *env, target_ulong val) { CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + if (ppc_cpu_lpar_single_threaded(cs)) { cpu_ppc_store_purr(env, val); return; } @@ -81,9 +80,8 @@ void helper_store_tbl(CPUPPCState *env, target_ulong val) { CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + if (ppc_cpu_lpar_single_threaded(cs)) { cpu_ppc_store_tbl(env, val); return; } @@ -98,9 +96,8 @@ void helper_store_tbu(CPUPPCState *env, target_ulong val) { CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + if (ppc_cpu_lpar_single_threaded(cs)) { cpu_ppc_store_tbu(env, val); return; } @@ -140,9 +137,8 @@ void helper_store_hdecr(CPUPPCState *env, target_ulong val) { CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + if (ppc_cpu_lpar_single_threaded(cs)) { cpu_ppc_store_hdecr(env, val); return; } @@ -157,9 +153,8 @@ void helper_store_vtb(CPUPPCState *env, target_ulong val) { CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + if (ppc_cpu_lpar_single_threaded(cs)) { cpu_ppc_store_vtb(env, val); return; } @@ -174,9 +169,8 @@ void helper_store_tbu40(CPUPPCState *env, target_ulong val) { CPUState *cs = env_cpu(env); CPUState *ccs; - uint32_t nr_threads = cs->nr_threads; - if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + if (ppc_cpu_lpar_single_threaded(cs)) { cpu_ppc_store_tbu40(env, val); return; } @@ -293,7 +287,7 @@ static void write_tfmr(CPUPPCState *env, target_ulong val) { CPUState *cs = env_cpu(env); - if (cs->nr_threads == 1) { + if (ppc_cpu_core_single_threaded(cs)) { env->spr[SPR_TFMR] = val; } else { CPUState *ccs; From 59c921f2297d6e293fde593432acf90d819e4d51 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 18 Jun 2024 12:56:53 +1000 Subject: [PATCH 26/96] ppc: Add has_smt_siblings property to CPUPPCState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The decision to branch out to a slower SMT path in instruction emulation will become a bit more complicated with the way that "big-core" topology that will be implemented in subsequent changes. Hide these details from the wider CPU emulation code with a bool has_smt_siblings flag that can be set by machine initialisation. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_core.c | 3 +++ hw/ppc/spapr_cpu_core.c | 12 +++++++++--- target/ppc/cpu.h | 3 ++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 7bda29b9c7..8cfa94fbfa 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -288,6 +288,9 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) cpu = POWERPC_CPU(obj); pc->threads[i] = POWERPC_CPU(obj); + if (cc->nr_threads > 1) { + cpu->env.has_smt_siblings = true; + } snprintf(name, sizeof(name), "thread[%d]", i); object_property_add_child(OBJECT(pc), name, obj); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index b228c1d498..56090abcd1 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -349,9 +349,15 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) qemu_register_reset(spapr_cpu_core_reset_handler, sc); sc->threads = g_new0(PowerPCCPU *, cc->nr_threads); for (i = 0; i < cc->nr_threads; i++) { - sc->threads[i] = spapr_create_vcpu(sc, i, errp); - if (!sc->threads[i] || - !spapr_realize_vcpu(sc->threads[i], spapr, sc, i, errp)) { + PowerPCCPU *cpu; + + cpu = spapr_create_vcpu(sc, i, errp); + sc->threads[i] = cpu; + if (cpu && cc->nr_threads > 1) { + cpu->env.has_smt_siblings = true; + } + + if (!cpu || !spapr_realize_vcpu(cpu, spapr, sc, i, errp)) { spapr_cpu_core_unrealize(dev); return; } diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 417b284318..321ed2da75 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1248,6 +1248,7 @@ struct CPUArchState { int access_type; /* For SMT processors */ + bool has_smt_siblings; int core_index; #if !defined(CONFIG_USER_ONLY) @@ -1514,7 +1515,7 @@ struct PowerPCCPUClass { static inline bool ppc_cpu_core_single_threaded(CPUState *cs) { - return cs->nr_threads == 1; + return !POWERPC_CPU(cs)->env.has_smt_siblings; } static inline bool ppc_cpu_lpar_single_threaded(CPUState *cs) From c26504afd5f5cca1addfab5222621bc32a28522f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 16 May 2024 15:25:12 +1000 Subject: [PATCH 27/96] ppc/pnv: Add a big-core mode that joins two regular cores POWER9 and POWER10 machines come in two variants, big-core and small-core. Big-core machines are SMT8 from software's point of view, but the low level platform topology ("xscom registers and pervasive addressing"), these look more like a pair of small cores ganged together. Presently the way this is modelled is to create one SMT8 PnvCore and add special cases to xscom and pervasive for big-core mode that tries to split this into two small cores, but this is becoming too complicated to manage. A better approach is to create 2 core structures and ganging them together to look like an SMT8 core in TCG. Then the xscom and pervasive models mostly do not need to differentiate big and small core modes. This change adds initial mode bits and QEMU topology handling to split SMT8 cores into 2xSMT4 cores. Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 80 ++++++++++++++++++++++++++++++++------- hw/ppc/pnv_core.c | 8 +++- include/hw/ppc/pnv.h | 2 + include/hw/ppc/pnv_chip.h | 1 + include/hw/ppc/pnv_core.h | 1 + 5 files changed, 78 insertions(+), 14 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 8827f729b1..07a29411a6 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1002,14 +1002,39 @@ static void pnv_init(MachineState *machine) pnv->num_chips = machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads); + if (pnv->big_core) { + if (machine->smp.threads % 2 == 1) { + error_report("Cannot support %d threads with big-core option " + "because it must be an even number", + machine->smp.threads); + exit(1); + } + max_smt_threads *= 2; + } + if (machine->smp.threads > max_smt_threads) { error_report("Cannot support more than %d threads/core " "on %s machine", max_smt_threads, mc->desc); + if (pmc->max_smt_threads == 4) { + error_report("(use big-core=on for 8 threads per core)"); + } exit(1); } + if (pnv->big_core) { + /* + * powernv models PnvCore as a SMT4 core. Big-core requires 2xPnvCore + * per core, so adjust topology here. pnv_dt_core() processor + * device-tree and TCG SMT code make the 2 cores appear as one big core + * from software point of view. pnv pervasive models and xscoms tend to + * see the big core as 2 small core halves. + */ + machine->smp.cores *= 2; + machine->smp.threads /= 2; + } + if (!is_power_of_2(machine->smp.threads)) { - error_report("Cannot support %d threads/core on a powernv" + error_report("Cannot support %d threads/core on a powernv " "machine because it must be a power of 2", machine->smp.threads); exit(1); @@ -1048,6 +1073,8 @@ static void pnv_init(MachineState *machine) &error_fatal); object_property_set_int(chip, "nr-threads", machine->smp.threads, &error_fatal); + object_property_set_bool(chip, "big-core", pnv->big_core, + &error_fatal); /* * The POWER8 machine use the XICS interrupt interface. * Propagate the XICS fabric to the chip and its controllers. @@ -1175,11 +1202,17 @@ static void pnv_get_pir_tir_p9(PnvChip *chip, uint32_t core_id, uint32_t thread_id, uint32_t *pir, uint32_t *tir) { - if (pir) { - if (chip->nr_threads == 8) { - *pir = (chip->chip_id << 8) | ((thread_id & 1) << 2) | - (core_id << 3) | (thread_id >> 1); - } else { + if (chip->big_core) { + /* Big-core interleaves thread ID between small-cores */ + thread_id <<= 1; + thread_id |= core_id & 1; + core_id >>= 1; + + if (pir) { + *pir = (chip->chip_id << 8) | (core_id << 3) | thread_id; + } + } else { + if (pir) { *pir = (chip->chip_id << 8) | (core_id << 2) | thread_id; } } @@ -1203,11 +1236,17 @@ static void pnv_get_pir_tir_p10(PnvChip *chip, uint32_t core_id, uint32_t thread_id, uint32_t *pir, uint32_t *tir) { - if (pir) { - if (chip->nr_threads == 8) { - *pir = (chip->chip_id << 8) | ((core_id / 4) << 4) | - ((core_id % 2) << 3) | thread_id; - } else { + if (chip->big_core) { + /* Big-core interleaves thread ID between small-cores */ + thread_id <<= 1; + thread_id |= core_id & 1; + core_id >>= 1; + + if (pir) { + *pir = (chip->chip_id << 8) | (core_id << 3) | thread_id; + } + } else { + if (pir) { *pir = (chip->chip_id << 8) | (core_id << 2) | thread_id; } } @@ -2180,7 +2219,8 @@ static void pnv_chip_power10_class_init(ObjectClass *klass, void *data) &k->parent_realize); } -static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp) +static void pnv_chip_core_sanitize(PnvMachineState *pnv, PnvChip *chip, + Error **errp) { PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); int cores_max; @@ -2201,6 +2241,17 @@ static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp) } chip->cores_mask &= pcc->cores_mask; + /* Ensure small-cores a paired up in big-core mode */ + if (pnv->big_core) { + uint64_t even_cores = chip->cores_mask & 0x5555555555555555ULL; + uint64_t odd_cores = chip->cores_mask & 0xaaaaaaaaaaaaaaaaULL; + + if (even_cores ^ (odd_cores >> 1)) { + error_setg(errp, "warning: unpaired cores in big-core mode !"); + return; + } + } + /* now that we have a sane layout, let check the number of cores */ cores_max = ctpop64(chip->cores_mask); if (chip->nr_cores > cores_max) { @@ -2224,7 +2275,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) } /* Cores */ - pnv_chip_core_sanitize(chip, &error); + pnv_chip_core_sanitize(pnv, chip, &error); if (error) { error_propagate(errp, error); return; @@ -2255,6 +2306,8 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) &error_fatal); object_property_set_int(OBJECT(pnv_core), "hrmor", pnv->fw_load_addr, &error_fatal); + object_property_set_bool(OBJECT(pnv_core), "big-core", chip->big_core, + &error_fatal); object_property_set_link(OBJECT(pnv_core), "chip", OBJECT(chip), &error_abort); qdev_realize(DEVICE(pnv_core), NULL, &error_fatal); @@ -2288,6 +2341,7 @@ static Property pnv_chip_properties[] = { DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1), DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0), DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1), + DEFINE_PROP_BOOL("big-core", PnvChip, big_core, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 8cfa94fbfa..6dc05534d7 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -249,7 +249,12 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, pir_spr->default_value = pir; tir_spr->default_value = tir; - env->core_index = core_hwid; + if (pc->big_core) { + /* 2 "small cores" get the same core index for SMT operations */ + env->core_index = core_hwid >> 1; + } else { + env->core_index = core_hwid; + } /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); @@ -354,6 +359,7 @@ static void pnv_core_unrealize(DeviceState *dev) static Property pnv_core_properties[] = { DEFINE_PROP_UINT32("hwid", PnvCore, hwid, 0), DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0), + DEFINE_PROP_BOOL("big-core", PnvCore, big_core, false), DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index 1993dededf..283ddd50e7 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -101,6 +101,8 @@ struct PnvMachineState { PnvPnor *pnor; hwaddr fw_load_addr; + + bool big_core; }; PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id); diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h index 7d5d08bcdc..69d8273efe 100644 --- a/include/hw/ppc/pnv_chip.h +++ b/include/hw/ppc/pnv_chip.h @@ -27,6 +27,7 @@ struct PnvChip { uint64_t ram_start; uint64_t ram_size; + bool big_core; uint32_t nr_cores; uint32_t nr_threads; uint64_t cores_mask; diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index 693acb189b..50164e9e1f 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -49,6 +49,7 @@ struct PnvCore { /*< public >*/ PowerPCCPU **threads; + bool big_core; uint32_t pir; uint32_t hwid; uint64_t hrmor; From cf0eb929e59cb9074db7b197bb6782a2a47dddda Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 11 Jul 2024 19:06:14 +1000 Subject: [PATCH 28/96] ppc/pnv: Add allow for big-core differences in DT generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit device-tree building needs to account for big-core mode, because it is driven by qemu cores (small cores). Every second core should be skipped, and every core should describe threads for both small-cores that make up the big core. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 07a29411a6..a1c2cbbc3f 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -141,9 +141,9 @@ static int pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) CPUPPCState *env = &cpu->env; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); PnvChipClass *pnv_cc = PNV_CHIP_GET_CLASS(chip); - g_autofree uint32_t *servers_prop = g_new(uint32_t, smt_threads); + uint32_t *servers_prop; int i; - uint32_t pir; + uint32_t pir, tir; uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), 0xffffffff, 0xffffffff}; uint32_t tbfreq = PNV_TIMEBASE_FREQ; @@ -154,7 +154,10 @@ static int pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) char *nodename; int cpus_offset = get_cpus_node(fdt); - pnv_cc->get_pir_tir(chip, pc->hwid, 0, &pir, NULL); + pnv_cc->get_pir_tir(chip, pc->hwid, 0, &pir, &tir); + + /* Only one DT node per (big) core */ + g_assert(tir == 0); nodename = g_strdup_printf("%s@%x", dc->fw_name, pir); offset = fdt_add_subnode(fdt, cpus_offset, nodename); @@ -235,12 +238,28 @@ static int pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) } /* Build interrupt servers properties */ - for (i = 0; i < smt_threads; i++) { - pnv_cc->get_pir_tir(chip, pc->hwid, i, &pir, NULL); - servers_prop[i] = cpu_to_be32(pir); + if (pc->big_core) { + servers_prop = g_new(uint32_t, smt_threads * 2); + for (i = 0; i < smt_threads; i++) { + pnv_cc->get_pir_tir(chip, pc->hwid, i, &pir, NULL); + servers_prop[i * 2] = cpu_to_be32(pir); + + pnv_cc->get_pir_tir(chip, pc->hwid + 1, i, &pir, NULL); + servers_prop[i * 2 + 1] = cpu_to_be32(pir); + } + _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", + servers_prop, sizeof(*servers_prop) * smt_threads + * 2))); + } else { + servers_prop = g_new(uint32_t, smt_threads); + for (i = 0; i < smt_threads; i++) { + pnv_cc->get_pir_tir(chip, pc->hwid, i, &pir, NULL); + servers_prop[i] = cpu_to_be32(pir); + } + _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", + servers_prop, sizeof(*servers_prop) * smt_threads))); } - _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", - servers_prop, sizeof(*servers_prop) * smt_threads))); + g_free(servers_prop); return offset; } @@ -389,6 +408,10 @@ static void pnv_chip_power9_dt_populate(PnvChip *chip, void *fdt) _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features_300, sizeof(pa_features_300)))); + + if (pnv_core->big_core) { + i++; /* Big-core groups two QEMU cores */ + } } if (chip->ram_size) { @@ -450,6 +473,10 @@ static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt) _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features_31, sizeof(pa_features_31)))); + + if (pnv_core->big_core) { + i++; /* Big-core groups two QEMU cores */ + } } if (chip->ram_size) { From 27f61d1b0b708b4659894cd0677f65ebed6eaa0b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 11 Jul 2024 18:37:25 +1000 Subject: [PATCH 29/96] ppc/pnv: Implement big-core PVR for Power9/10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Power9/10 CPUs have PVR[51] set in small-core mode and clear in big-core mode. This is used by skiboot firmware. PVR is not hypervisor-privileged but it is not so important that spapr to implement this because it's generally masked out of PVR matching code in kernels, and only used by firmware. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 6dc05534d7..43cfeaa2d4 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -58,6 +58,10 @@ static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu) env->nip = 0x10; env->msr |= MSR_HVB; /* Hypervisor mode */ env->spr[SPR_HRMOR] = pc->hrmor; + if (pc->big_core) { + /* Clear "small core" bit on Power9/10 (this is set in default PVR) */ + env->spr[SPR_PVR] &= ~PPC_BIT(51); + } hreg_compute_hflags(env); ppc_maybe_interrupt(env); From 16ffcb3401ddb991ec746de05595ba62eae45a1b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 11 Jul 2024 18:31:35 +1000 Subject: [PATCH 30/96] ppc/pnv: Implement Power9 CPU core thread state indirect register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Power9 CPUs have a core thread state register accessible via SPRC/SPRD indirect registers. This register includes a bit for big-core mode, which skiboot requires. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- target/ppc/misc_helper.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 9789d69664..1b83971375 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -325,6 +325,23 @@ target_ulong helper_load_sprd(CPUPPCState *env) case 0: /* SCRATCH0-3 */ case 1: /* SCRATCH4-7 */ return pc->scratch[(sprc >> 3) & 0x7]; + + case 0x1e0: /* core thread state */ + if (env->excp_model == POWERPC_EXCP_POWER9) { + /* + * Only implement for POWER9 because skiboot uses it to check + * big-core mode. Other bits are unimplemented so we would + * prefer to get unimplemented message on POWER10 if it were + * used anywhere. + */ + if (pc->big_core) { + return PPC_BIT(63); + } else { + return 0; + } + } + /* fallthru */ + default: qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x" TARGET_FMT_lx"\n", sprc); From 78be3218940c0902d165f42ad0cdcd38e66c5df2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 18 Jun 2024 13:09:54 +1000 Subject: [PATCH 31/96] ppc/pnv: Add POWER10 ChipTOD quirk for big-core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POWER10 has a quirk in its ChipTOD addressing that requires the even small-core to be selected even when programming the odd small-core. This allows skiboot chiptod init to run in big-core mode. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 7 ++++++- hw/ppc/pnv_core.c | 2 ++ include/hw/ppc/pnv.h | 1 + include/hw/ppc/pnv_core.h | 7 +++++++ target/ppc/timebase_helper.c | 9 +++++++++ 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index a1c2cbbc3f..4605a49d28 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -2290,11 +2290,12 @@ static void pnv_chip_core_sanitize(PnvMachineState *pnv, PnvChip *chip, static void pnv_chip_core_realize(PnvChip *chip, Error **errp) { + PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); + PnvMachineClass *pmc = PNV_MACHINE_GET_CLASS(pnv); Error *error = NULL; PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); const char *typename = pnv_chip_core_typename(chip); int i, core_hwid; - PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); if (!object_class_by_name(typename)) { error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename); @@ -2335,8 +2336,11 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) &error_fatal); object_property_set_bool(OBJECT(pnv_core), "big-core", chip->big_core, &error_fatal); + object_property_set_bool(OBJECT(pnv_core), "quirk-tb-big-core", + pmc->quirk_tb_big_core, &error_fatal); object_property_set_link(OBJECT(pnv_core), "chip", OBJECT(chip), &error_abort); + qdev_realize(DEVICE(pnv_core), NULL, &error_fatal); /* Each core has an XSCOM MMIO region */ @@ -2650,6 +2654,7 @@ static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); pmc->max_smt_threads = 4; + pmc->quirk_tb_big_core = true; pmc->dt_power_mgt = pnv_dt_power_mgt; xfc->match_nvt = pnv10_xive_match_nvt; diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 43cfeaa2d4..1783795b23 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -364,6 +364,8 @@ static Property pnv_core_properties[] = { DEFINE_PROP_UINT32("hwid", PnvCore, hwid, 0), DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0), DEFINE_PROP_BOOL("big-core", PnvCore, big_core, false), + DEFINE_PROP_BOOL("quirk-tb-big-core", PnvCore, tod_state.big_core_quirk, + false), DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index 283ddd50e7..c56d152889 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -77,6 +77,7 @@ struct PnvMachineClass { const char *compat; int compat_size; int max_smt_threads; + bool quirk_tb_big_core; void (*dt_power_mgt)(PnvMachineState *pnv, void *fdt); void (*i2c_init)(PnvMachineState *pnv); diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index 50164e9e1f..c8784777a4 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -27,6 +27,13 @@ /* Per-core ChipTOD / TimeBase state */ typedef struct PnvCoreTODState { + /* + * POWER10 DD2.0 - big core TFMR drives the state machine on the even + * small core. Skiboot has a workaround that targets the even small core + * for CHIPTOD_TO_TB ops. + */ + bool big_core_quirk; + int tb_ready_for_tod; /* core TB ready to receive TOD from chiptod */ int tod_sent_to_tb; /* chiptod sent TOD to the core TB */ diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c index d86112d60a..73120323b4 100644 --- a/target/ppc/timebase_helper.c +++ b/target/ppc/timebase_helper.c @@ -218,6 +218,7 @@ void helper_store_booke_tsr(CPUPPCState *env, target_ulong val) * target/ppc/pnv_helper.c */ #include "hw/ppc/pnv_core.h" +#include "hw/ppc/pnv_chip.h" /* * POWER processor Timebase Facility */ @@ -302,6 +303,14 @@ static PnvCoreTODState *cpu_get_tbst(PowerPCCPU *cpu) { PnvCore *pc = pnv_cpu_state(cpu)->pnv_core; + if (pc->big_core && pc->tod_state.big_core_quirk) { + /* Must operate on the even small core */ + int core_id = CPU_CORE(pc)->core_id; + if (core_id & 1) { + pc = pc->chip->cores[core_id & ~1]; + } + } + return &pc->tod_state; } From b1beb69231c8a6a04ec365614e67729ea9af7cbf Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 18 Jun 2024 12:45:34 +1000 Subject: [PATCH 32/96] ppc/pnv: Add big-core machine property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Big-core implementation is complete, so expose it as a machine property that may be set with big-core=on option on powernv9 and powernv10 machines. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 61 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 4605a49d28..d19516c2d7 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -2581,6 +2581,34 @@ static int pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format, return total_count; } +static bool pnv_machine_get_big_core(Object *obj, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + return pnv->big_core; +} + +static void pnv_machine_set_big_core(Object *obj, bool value, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + pnv->big_core = value; +} + +static bool pnv_machine_get_hb(Object *obj, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + + return !!pnv->fw_load_addr; +} + +static void pnv_machine_set_hb(Object *obj, bool value, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + + if (value) { + pnv->fw_load_addr = 0x8000000; + } +} + static void pnv_machine_power8_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -2632,6 +2660,12 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) pmc->dt_power_mgt = pnv_dt_power_mgt; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB); + + object_class_property_add_bool(oc, "big-core", + pnv_machine_get_big_core, + pnv_machine_set_big_core); + object_class_property_set_description(oc, "big-core", + "Use big-core (aka fused-core) mode"); } static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data) @@ -2668,6 +2702,17 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data) pnv_machine_p10_common_class_init(oc, data); mc->desc = "IBM PowerNV (Non-Virtualized) POWER10"; + + /* + * This is the parent of POWER10 Rainier class, so properies go here + * rather than common init (which would add them to both parent and + * child which is invalid). + */ + object_class_property_add_bool(oc, "big-core", + pnv_machine_get_big_core, + pnv_machine_set_big_core); + object_class_property_set_description(oc, "big-core", + "Use big-core (aka fused-core) mode"); } static void pnv_machine_p10_rainier_class_init(ObjectClass *oc, void *data) @@ -2680,22 +2725,6 @@ static void pnv_machine_p10_rainier_class_init(ObjectClass *oc, void *data) pmc->i2c_init = pnv_rainier_i2c_init; } -static bool pnv_machine_get_hb(Object *obj, Error **errp) -{ - PnvMachineState *pnv = PNV_MACHINE(obj); - - return !!pnv->fw_load_addr; -} - -static void pnv_machine_set_hb(Object *obj, bool value, Error **errp) -{ - PnvMachineState *pnv = PNV_MACHINE(obj); - - if (value) { - pnv->fw_load_addr = 0x8000000; - } -} - static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg) { CPUPPCState *env = cpu_env(cs); From ca4f47752a14221a26cd2bf4710bb21ad2811a22 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 12 Jul 2024 13:16:44 +1000 Subject: [PATCH 33/96] ppc/pnv: Add a CPU nmi and resume function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Power CPUs have an execution control facility that can pause, resume, and cause NMIs, among other things. Add a function that will nmi a CPU and resume it if it was paused, in preparation for implementing the control facility. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 14 +++++++++++++- include/hw/ppc/pnv.h | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index d19516c2d7..084cfb9984 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -2751,11 +2751,23 @@ static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg) */ env->spr[SPR_SRR1] |= SRR1_WAKESCOM; } + if (arg.host_int == 1) { + cpu_resume(cs); + } +} + +/* + * Send a SRESET (NMI) interrupt to the CPU, and resume execution if it was + * paused. + */ +void pnv_cpu_do_nmi_resume(CPUState *cs) +{ + async_run_on_cpu(cs, pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_HOST_INT(1)); } static void pnv_cpu_do_nmi(PnvChip *chip, PowerPCCPU *cpu, void *opaque) { - async_run_on_cpu(CPU(cpu), pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL); + async_run_on_cpu(CPU(cpu), pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_HOST_INT(0)); } static void pnv_nmi(NMIState *n, int cpu_index, Error **errp) diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index c56d152889..b7858d310d 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -112,6 +112,8 @@ PnvChip *pnv_chip_add_phb(PnvChip *chip, PnvPHB *phb); #define PNV_FDT_ADDR 0x01000000 #define PNV_TIMEBASE_FREQ 512000000ULL +void pnv_cpu_do_nmi_resume(CPUState *cs); + /* * BMC helpers */ From c8891955086b2fa795efb7fa0e409e32f25e5447 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 16 May 2024 23:44:12 +1000 Subject: [PATCH 34/96] ppc/pnv: Implement POWER10 PC xscom registers for direct controls The PC unit in the processor core contains xscom registers that provide low level status and control of the CPU. This implements "direct controls", sufficient for skiboot firmware, which uses it to send NMI IPIs between CPUs. POWER10 is sufficiently different from POWER9 (particularly with respect to QME and special wakeup) that it is not trivial to implement POWER9 support by reusing the code. Signed-off-by: Nicholas Piggin --- hw/ppc/pnv_core.c | 89 ++++++++++++++++++++++++++++++++++++--- include/hw/ppc/pnv_core.h | 3 ++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 1783795b23..4484fe8c6a 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -185,16 +185,40 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = { */ #define PNV10_XSCOM_EC_CORE_THREAD_STATE 0x412 +#define PNV10_XSCOM_EC_CORE_THREAD_INFO 0x413 +#define PNV10_XSCOM_EC_CORE_DIRECT_CONTROLS 0x449 +#define PNV10_XSCOM_EC_CORE_RAS_STATUS 0x454 static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr, unsigned int width) { + PnvCore *pc = PNV_CORE(opaque); + int nr_threads = CPU_CORE(pc)->nr_threads; + int i; uint32_t offset = addr >> 3; uint64_t val = 0; switch (offset) { case PNV10_XSCOM_EC_CORE_THREAD_STATE: - val = 0; + for (i = 0; i < nr_threads; i++) { + PowerPCCPU *cpu = pc->threads[i]; + CPUState *cs = CPU(cpu); + + if (cs->halted) { + val |= PPC_BIT(56 + i); + } + } + break; + case PNV10_XSCOM_EC_CORE_THREAD_INFO: + break; + case PNV10_XSCOM_EC_CORE_RAS_STATUS: + for (i = 0; i < nr_threads; i++) { + PowerPCCPU *cpu = pc->threads[i]; + CPUState *cs = CPU(cpu); + if (cs->stopped) { + val |= PPC_BIT(0 + 8 * i) | PPC_BIT(1 + 8 * i); + } + } break; default: qemu_log_mask(LOG_UNIMP, "%s: unimp read 0x%08x\n", __func__, @@ -207,9 +231,46 @@ static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr, static void pnv_core_power10_xscom_write(void *opaque, hwaddr addr, uint64_t val, unsigned int width) { + PnvCore *pc = PNV_CORE(opaque); + int nr_threads = CPU_CORE(pc)->nr_threads; + int i; uint32_t offset = addr >> 3; switch (offset) { + case PNV10_XSCOM_EC_CORE_DIRECT_CONTROLS: + for (i = 0; i < nr_threads; i++) { + PowerPCCPU *cpu = pc->threads[i]; + CPUState *cs = CPU(cpu); + + if (val & PPC_BIT(7 + 8 * i)) { /* stop */ + val &= ~PPC_BIT(7 + 8 * i); + cpu_pause(cs); + } + if (val & PPC_BIT(6 + 8 * i)) { /* start */ + val &= ~PPC_BIT(6 + 8 * i); + cpu_resume(cs); + } + if (val & PPC_BIT(4 + 8 * i)) { /* sreset */ + val &= ~PPC_BIT(4 + 8 * i); + pnv_cpu_do_nmi_resume(cs); + } + if (val & PPC_BIT(3 + 8 * i)) { /* clear maint */ + /* + * Hardware has very particular cases for where clear maint + * must be used and where start must be used to resume a + * thread. These are not modelled exactly, just treat + * this and start the same. + */ + val &= ~PPC_BIT(3 + 8 * i); + cpu_resume(cs); + } + } + if (val) { + qemu_log_mask(LOG_UNIMP, "%s: unimp bits in DIRECT_CONTROLS " + "0x%016" PRIx64 "\n", __func__, val); + } + break; + default: qemu_log_mask(LOG_UNIMP, "%s: unimp write 0x%08x\n", __func__, offset); @@ -526,6 +587,7 @@ static const MemoryRegionOps pnv_quad_power10_xscom_ops = { static uint64_t pnv_qme_power10_xscom_read(void *opaque, hwaddr addr, unsigned int width) { + PnvQuad *eq = PNV_QUAD(opaque); uint32_t offset = addr >> 3; uint64_t val = -1; @@ -533,10 +595,14 @@ static uint64_t pnv_qme_power10_xscom_read(void *opaque, hwaddr addr, * Forth nibble selects the core within a quad, mask it to process read * for any core. */ - switch (offset & ~0xf000) { - case P10_QME_SPWU_HYP: + switch (offset & ~PPC_BITMASK32(16, 19)) { case P10_QME_SSH_HYP: - return 0; + val = 0; + if (eq->special_wakeup_done) { + val |= PPC_BIT(1); /* SPWU DONE */ + val |= PPC_BIT(4); /* SSH SPWU DONE */ + } + break; default: qemu_log_mask(LOG_UNIMP, "%s: unimp read 0x%08x\n", __func__, offset); @@ -548,9 +614,22 @@ static uint64_t pnv_qme_power10_xscom_read(void *opaque, hwaddr addr, static void pnv_qme_power10_xscom_write(void *opaque, hwaddr addr, uint64_t val, unsigned int width) { + PnvQuad *eq = PNV_QUAD(opaque); uint32_t offset = addr >> 3; + bool set; + int i; - switch (offset) { + switch (offset & ~PPC_BITMASK32(16, 19)) { + case P10_QME_SPWU_HYP: + set = !!(val & PPC_BIT(0)); + eq->special_wakeup_done = set; + for (i = 0; i < 4; i++) { + /* These bits select cores in the quad */ + if (offset & PPC_BIT32(16 + i)) { + eq->special_wakeup[i] = set; + } + } + break; default: qemu_log_mask(LOG_UNIMP, "%s: unimp write 0x%08x\n", __func__, offset); diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index c8784777a4..1de79a818e 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -109,6 +109,9 @@ OBJECT_DECLARE_TYPE(PnvQuad, PnvQuadClass, PNV_QUAD) struct PnvQuad { DeviceState parent_obj; + bool special_wakeup_done; + bool special_wakeup[4]; + uint32_t quad_id; MemoryRegion xscom_regs; MemoryRegion xscom_qme_regs; From 3b5ea01e98a5e26c1adb13d966f334cb58680cf8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 24 May 2024 15:02:46 +1000 Subject: [PATCH 35/96] ppc/pnv: Add an LPAR per core machine option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent POWER CPUs can operate in "LPAR per core" or "LPAR per thread" modes. In per-core mode, some SPRs and IPI doorbells are shared between threads in a core. In per-thread mode, supervisor and user state is not shared between threads. OpenPOWER systems after POWER8 use LPAR per thread mode, and it is required for KVM. Enterprise systems use LPAR per core mode, as they partition the machine by core. Implement a lpar-per-core machine option for powernv machines. This is fixed true for POWER8 machines, and defaults off for P9 and P10. With this change, powernv8 SMT now works sufficiently to run Linux, with a single socket. Multi-threaded KVM guests still have problems, as does multi-socket Linux boot. Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 38 ++++++++++++++++++++++++++++++++++++++ hw/ppc/pnv_core.c | 8 ++++++++ include/hw/ppc/pnv.h | 2 ++ include/hw/ppc/pnv_chip.h | 1 + include/hw/ppc/pnv_core.h | 1 + target/ppc/cpu_init.c | 3 ++- 6 files changed, 52 insertions(+), 1 deletion(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 084cfb9984..a3560d25b7 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1026,6 +1026,11 @@ static void pnv_init(MachineState *machine) exit(1); } + /* Set lpar-per-core mode if lpar-per-thread is not supported */ + if (!pmc->has_lpar_per_thread) { + pnv->lpar_per_core = true; + } + pnv->num_chips = machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads); @@ -1102,6 +1107,8 @@ static void pnv_init(MachineState *machine) &error_fatal); object_property_set_bool(chip, "big-core", pnv->big_core, &error_fatal); + object_property_set_bool(chip, "lpar-per-core", pnv->lpar_per_core, + &error_fatal); /* * The POWER8 machine use the XICS interrupt interface. * Propagate the XICS fabric to the chip and its controllers. @@ -2338,6 +2345,8 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) &error_fatal); object_property_set_bool(OBJECT(pnv_core), "quirk-tb-big-core", pmc->quirk_tb_big_core, &error_fatal); + object_property_set_bool(OBJECT(pnv_core), "lpar-per-core", + chip->lpar_per_core, &error_fatal); object_property_set_link(OBJECT(pnv_core), "chip", OBJECT(chip), &error_abort); @@ -2373,6 +2382,7 @@ static Property pnv_chip_properties[] = { DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0), DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1), DEFINE_PROP_BOOL("big-core", PnvChip, big_core, false), + DEFINE_PROP_BOOL("lpar-per-core", PnvChip, lpar_per_core, false), DEFINE_PROP_END_OF_LIST(), }; @@ -2593,6 +2603,18 @@ static void pnv_machine_set_big_core(Object *obj, bool value, Error **errp) pnv->big_core = value; } +static bool pnv_machine_get_lpar_per_core(Object *obj, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + return pnv->lpar_per_core; +} + +static void pnv_machine_set_lpar_per_core(Object *obj, bool value, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + pnv->lpar_per_core = value; +} + static bool pnv_machine_get_hb(Object *obj, Error **errp) { PnvMachineState *pnv = PNV_MACHINE(obj); @@ -2632,6 +2654,8 @@ static void pnv_machine_power8_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); pmc->max_smt_threads = 8; + /* POWER8 is always lpar-per-core mode */ + pmc->has_lpar_per_thread = false; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB); } @@ -2657,6 +2681,7 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); pmc->max_smt_threads = 4; + pmc->has_lpar_per_thread = true; pmc->dt_power_mgt = pnv_dt_power_mgt; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB); @@ -2666,6 +2691,12 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) pnv_machine_set_big_core); object_class_property_set_description(oc, "big-core", "Use big-core (aka fused-core) mode"); + + object_class_property_add_bool(oc, "lpar-per-core", + pnv_machine_get_lpar_per_core, + pnv_machine_set_lpar_per_core); + object_class_property_set_description(oc, "lpar-per-core", + "Use 1 LPAR per core mode"); } static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data) @@ -2688,6 +2719,7 @@ static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); pmc->max_smt_threads = 4; + pmc->has_lpar_per_thread = true; pmc->quirk_tb_big_core = true; pmc->dt_power_mgt = pnv_dt_power_mgt; @@ -2713,6 +2745,12 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data) pnv_machine_set_big_core); object_class_property_set_description(oc, "big-core", "Use big-core (aka fused-core) mode"); + + object_class_property_add_bool(oc, "lpar-per-core", + pnv_machine_get_lpar_per_core, + pnv_machine_set_lpar_per_core); + object_class_property_set_description(oc, "lpar-per-core", + "Use 1 LPAR per core mode"); } static void pnv_machine_p10_rainier_class_init(ObjectClass *oc, void *data) diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 4484fe8c6a..a30693990b 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -208,6 +208,9 @@ static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr, val |= PPC_BIT(56 + i); } } + if (pc->lpar_per_core) { + val |= PPC_BIT(62); + } break; case PNV10_XSCOM_EC_CORE_THREAD_INFO: break; @@ -321,6 +324,10 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, env->core_index = core_hwid; } + if (pc->lpar_per_core) { + cpu_ppc_set_1lpar(cpu); + } + /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); } @@ -427,6 +434,7 @@ static Property pnv_core_properties[] = { DEFINE_PROP_BOOL("big-core", PnvCore, big_core, false), DEFINE_PROP_BOOL("quirk-tb-big-core", PnvCore, tod_state.big_core_quirk, false), + DEFINE_PROP_BOOL("lpar-per-core", PnvCore, lpar_per_core, false), DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index b7858d310d..fcb6699150 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -77,6 +77,7 @@ struct PnvMachineClass { const char *compat; int compat_size; int max_smt_threads; + bool has_lpar_per_thread; bool quirk_tb_big_core; void (*dt_power_mgt)(PnvMachineState *pnv, void *fdt); @@ -104,6 +105,7 @@ struct PnvMachineState { hwaddr fw_load_addr; bool big_core; + bool lpar_per_core; }; PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id); diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h index 69d8273efe..ee1649babc 100644 --- a/include/hw/ppc/pnv_chip.h +++ b/include/hw/ppc/pnv_chip.h @@ -28,6 +28,7 @@ struct PnvChip { uint64_t ram_size; bool big_core; + bool lpar_per_core; uint32_t nr_cores; uint32_t nr_threads; uint64_t cores_mask; diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index 1de79a818e..d8afb4f95f 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -57,6 +57,7 @@ struct PnvCore { /*< public >*/ PowerPCCPU **threads; bool big_core; + bool lpar_per_core; uint32_t pir; uint32_t hwid; uint64_t hrmor; diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 5ec87c56e4..23881d09e9 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -6786,7 +6786,8 @@ void cpu_ppc_set_1lpar(PowerPCCPU *cpu) /* * pseries SMT means "LPAR per core" mode, e.g., msgsndp is usable - * between threads. + * between threads. powernv be in either mode, and it mostly affects + * supervisor visible registers and instructions. */ if (env->flags & POWERPC_FLAG_SMT) { env->flags |= POWERPC_FLAG_SMT_1LPAR; From 117664a1e70e6adff1e4384702a9d8343597b5b9 Mon Sep 17 00:00:00 2001 From: Chalapathi V Date: Wed, 26 Jun 2024 04:05:23 -0500 Subject: [PATCH 36/96] ppc/pnv: Remove ppc target dependency from pnv_xscom.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit target specific dependency from include/hw/ppc/pnv_xscom.h has been removed so that pnv_xscom.h can be included outside hw/ppc. Signed-off-by: Chalapathi V Reviewed-by: Cédric Le Goater Reviewed-by: Caleb Schlossin Signed-off-by: Nicholas Piggin --- include/hw/ppc/pnv_xscom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h index e93d310e79..764b324a00 100644 --- a/include/hw/ppc/pnv_xscom.h +++ b/include/hw/ppc/pnv_xscom.h @@ -21,9 +21,9 @@ #define PPC_PNV_XSCOM_H #include "exec/memory.h" -#include "hw/ppc/pnv.h" typedef struct PnvXScomInterface PnvXScomInterface; +typedef struct PnvChip PnvChip; #define TYPE_PNV_XSCOM_INTERFACE "pnv-xscom-interface" #define PNV_XSCOM_INTERFACE(obj) \ From 29318db133d0b2523bda771f76aa50c08842527f Mon Sep 17 00:00:00 2001 From: Chalapathi V Date: Wed, 26 Jun 2024 04:05:24 -0500 Subject: [PATCH 37/96] hw/ssi: Add SPI model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SPI controller device model supports a connection to a single SPI responder. This provide access to SPI seeproms, TPM, flash device and an ADC controller. All SPI function control is mapped into the SPI register space to enable full control by firmware. In this commit SPI configuration component is modelled which contains all SPI configuration and status registers as well as the hold registers for data to be sent or having been received. An existing QEMU SSI framework is used and SSI_BUS is created. Signed-off-by: Chalapathi V Reviewed-by: Caleb Schlossin Reviewed-by: Cédric Le Goater Reviewed-by: Glenn Miles [np: Fix FDT macro compile for qtest] Signed-off-by: Nicholas Piggin --- hw/ppc/Kconfig | 3 + hw/ssi/Kconfig | 4 + hw/ssi/meson.build | 1 + hw/ssi/pnv_spi.c | 215 ++++++++++++++++++++++++++++++++++ hw/ssi/trace-events | 6 + include/hw/ppc/pnv_xscom.h | 3 + include/hw/ssi/pnv_spi.h | 40 +++++++ include/hw/ssi/pnv_spi_regs.h | 67 +++++++++++ 8 files changed, 339 insertions(+) create mode 100644 hw/ssi/pnv_spi.c create mode 100644 include/hw/ssi/pnv_spi.h create mode 100644 include/hw/ssi/pnv_spi_regs.h diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index 347212f4db..c235519881 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -39,6 +39,9 @@ config POWERNV select PCI_POWERNV select PCA9552 select PCA9554 + select SSI + select SSI_M25P80 + select PNV_SPI config PPC405 bool diff --git a/hw/ssi/Kconfig b/hw/ssi/Kconfig index 83ee53c1d0..8d180de7cf 100644 --- a/hw/ssi/Kconfig +++ b/hw/ssi/Kconfig @@ -24,3 +24,7 @@ config STM32F2XX_SPI config BCM2835_SPI bool select SSI + +config PNV_SPI + bool + select SSI diff --git a/hw/ssi/meson.build b/hw/ssi/meson.build index b999aeb027..b7ad7fca3b 100644 --- a/hw/ssi/meson.build +++ b/hw/ssi/meson.build @@ -12,3 +12,4 @@ system_ss.add(when: 'CONFIG_IMX', if_true: files('imx_spi.c')) system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_spi.c')) system_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_spi_host.c')) system_ss.add(when: 'CONFIG_BCM2835_SPI', if_true: files('bcm2835_spi.c')) +system_ss.add(when: 'CONFIG_PNV_SPI', if_true: files('pnv_spi.c')) diff --git a/hw/ssi/pnv_spi.c b/hw/ssi/pnv_spi.c new file mode 100644 index 0000000000..468afdad07 --- /dev/null +++ b/hw/ssi/pnv_spi.c @@ -0,0 +1,215 @@ +/* + * QEMU PowerPC SPI model + * + * Copyright (c) 2024, IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/qdev-properties.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/ssi/pnv_spi.h" +#include "hw/ssi/pnv_spi_regs.h" +#include "hw/ssi/ssi.h" +#include +#include "hw/irq.h" +#include "trace.h" + +/* + * Macro from include/hw/ppc/fdt.h + * fdt.h cannot be included here as it contain ppc target specific dependency. + */ +#define _FDT(exp) \ + do { \ + int _ret = (exp); \ + if (_ret < 0) { \ + qemu_log_mask(LOG_GUEST_ERROR, \ + "error creating device tree: %s: %s", \ + #exp, fdt_strerror(_ret)); \ + exit(1); \ + } \ + } while (0) + +static uint64_t pnv_spi_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvSpi *s = PNV_SPI(opaque); + uint32_t reg = addr >> 3; + uint64_t val = ~0ull; + + switch (reg) { + case ERROR_REG: + case SPI_CTR_CFG_REG: + case CONFIG_REG1: + case SPI_CLK_CFG_REG: + case SPI_MM_REG: + case SPI_XMIT_DATA_REG: + val = s->regs[reg]; + break; + case SPI_RCV_DATA_REG: + val = s->regs[reg]; + trace_pnv_spi_read_RDR(val); + s->status = SETFIELD(SPI_STS_RDR_FULL, s->status, 0); + break; + case SPI_SEQ_OP_REG: + val = 0; + for (int i = 0; i < PNV_SPI_REG_SIZE; i++) { + val = (val << 8) | s->seq_op[i]; + } + break; + case SPI_STS_REG: + val = s->status; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi_regs: Invalid xscom " + "read at 0x%" PRIx32 "\n", reg); + } + + trace_pnv_spi_read(addr, val); + return val; +} + +static void pnv_spi_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvSpi *s = PNV_SPI(opaque); + uint32_t reg = addr >> 3; + + trace_pnv_spi_write(addr, val); + + switch (reg) { + case ERROR_REG: + case SPI_CTR_CFG_REG: + case CONFIG_REG1: + case SPI_MM_REG: + case SPI_RCV_DATA_REG: + s->regs[reg] = val; + break; + case SPI_CLK_CFG_REG: + /* + * To reset the SPI controller write the sequence 0x5 0xA to + * reset_control field + */ + if ((GETFIELD(SPI_CLK_CFG_RST_CTRL, s->regs[SPI_CLK_CFG_REG]) == 0x5) + && (GETFIELD(SPI_CLK_CFG_RST_CTRL, val) == 0xA)) { + /* SPI controller reset sequence completed, resetting */ + s->regs[reg] = SPI_CLK_CFG_HARD_RST; + } else { + s->regs[reg] = val; + } + break; + case SPI_XMIT_DATA_REG: + /* + * Writing to the transmit data register causes the transmit data + * register full status bit in the status register to be set. Writing + * when the transmit data register full status bit is already set + * causes a "Resource Not Available" condition. This is not possible + * in the model since writes to this register are not asynchronous to + * the operation sequence like it would be in hardware. + */ + s->regs[reg] = val; + trace_pnv_spi_write_TDR(val); + s->status = SETFIELD(SPI_STS_TDR_FULL, s->status, 1); + s->status = SETFIELD(SPI_STS_TDR_UNDERRUN, s->status, 0); + break; + case SPI_SEQ_OP_REG: + for (int i = 0; i < PNV_SPI_REG_SIZE; i++) { + s->seq_op[i] = (val >> (56 - i * 8)) & 0xFF; + } + break; + case SPI_STS_REG: + /* other fields are ignore_write */ + s->status = SETFIELD(SPI_STS_RDR_OVERRUN, s->status, + GETFIELD(SPI_STS_RDR, val)); + s->status = SETFIELD(SPI_STS_TDR_OVERRUN, s->status, + GETFIELD(SPI_STS_TDR, val)); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "pnv_spi_regs: Invalid xscom " + "write at 0x%" PRIx32 "\n", reg); + } + return; +} + +static const MemoryRegionOps pnv_spi_xscom_ops = { + .read = pnv_spi_xscom_read, + .write = pnv_spi_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static Property pnv_spi_properties[] = { + DEFINE_PROP_UINT32("spic_num", PnvSpi, spic_num, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_spi_realize(DeviceState *dev, Error **errp) +{ + PnvSpi *s = PNV_SPI(dev); + g_autofree char *name = g_strdup_printf(TYPE_PNV_SPI_BUS ".%d", + s->spic_num); + s->ssi_bus = ssi_create_bus(dev, name); + s->cs_line = g_new0(qemu_irq, 1); + qdev_init_gpio_out_named(DEVICE(s), s->cs_line, "cs", 1); + + /* spi scoms */ + pnv_xscom_region_init(&s->xscom_spic_regs, OBJECT(s), &pnv_spi_xscom_ops, + s, "xscom-spi", PNV10_XSCOM_PIB_SPIC_SIZE); +} + +static int pnv_spi_dt_xscom(PnvXScomInterface *dev, void *fdt, + int offset) +{ + PnvSpi *s = PNV_SPI(dev); + g_autofree char *name; + int s_offset; + const char compat[] = "ibm,power10-spi"; + uint32_t spic_pcba = PNV10_XSCOM_PIB_SPIC_BASE + + s->spic_num * PNV10_XSCOM_PIB_SPIC_SIZE; + uint32_t reg[] = { + cpu_to_be32(spic_pcba), + cpu_to_be32(PNV10_XSCOM_PIB_SPIC_SIZE) + }; + name = g_strdup_printf("pnv_spi@%x", spic_pcba); + s_offset = fdt_add_subnode(fdt, offset, name); + _FDT(s_offset); + + _FDT(fdt_setprop(fdt, s_offset, "reg", reg, sizeof(reg))); + _FDT(fdt_setprop(fdt, s_offset, "compatible", compat, sizeof(compat))); + _FDT((fdt_setprop_cell(fdt, s_offset, "spic_num#", s->spic_num))); + return 0; +} + +static void pnv_spi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xscomc = PNV_XSCOM_INTERFACE_CLASS(klass); + + xscomc->dt_xscom = pnv_spi_dt_xscom; + + dc->desc = "PowerNV SPI"; + dc->realize = pnv_spi_realize; + device_class_set_props(dc, pnv_spi_properties); +} + +static const TypeInfo pnv_spi_info = { + .name = TYPE_PNV_SPI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(PnvSpi), + .class_init = pnv_spi_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_spi_register_types(void) +{ + type_register_static(&pnv_spi_info); +} + +type_init(pnv_spi_register_types); diff --git a/hw/ssi/trace-events b/hw/ssi/trace-events index 7b5ad6a939..2cc29e1284 100644 --- a/hw/ssi/trace-events +++ b/hw/ssi/trace-events @@ -32,3 +32,9 @@ ibex_spi_host_reset(const char *msg) "%s" ibex_spi_host_transfer(uint32_t tx_data, uint32_t rx_data) "tx_data: 0x%" PRIx32 " rx_data: @0x%" PRIx32 ibex_spi_host_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size %u: 0x%" PRIx64 ibex_spi_host_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size %u:" + +#pnv_spi.c +pnv_spi_read(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64 +pnv_spi_write(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64 +pnv_spi_read_RDR(uint64_t val) "data extracted = 0x%" PRIx64 +pnv_spi_write_TDR(uint64_t val) "being written, data written = 0x%" PRIx64 diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h index 764b324a00..648388a599 100644 --- a/include/hw/ppc/pnv_xscom.h +++ b/include/hw/ppc/pnv_xscom.h @@ -200,6 +200,9 @@ struct PnvXScomInterfaceClass { #define PNV10_XSCOM_PEC_PCI_BASE 0x8010800 /* index goes upwards ... */ #define PNV10_XSCOM_PEC_PCI_SIZE 0x200 +#define PNV10_XSCOM_PIB_SPIC_BASE 0xc0000 +#define PNV10_XSCOM_PIB_SPIC_SIZE 0x20 + void pnv_xscom_init(PnvChip *chip, uint64_t size, hwaddr addr); int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset, uint64_t xscom_base, uint64_t xscom_size, diff --git a/include/hw/ssi/pnv_spi.h b/include/hw/ssi/pnv_spi.h new file mode 100644 index 0000000000..833042b74b --- /dev/null +++ b/include/hw/ssi/pnv_spi.h @@ -0,0 +1,40 @@ +/* + * QEMU PowerPC SPI model + * + * Copyright (c) 2024, IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This model Supports a connection to a single SPI responder. + * Introduced for P10 to provide access to SPI seeproms, TPM, flash device + * and an ADC controller. + */ + +#ifndef PPC_PNV_SPI_H +#define PPC_PNV_SPI_H + +#include "hw/ssi/ssi.h" +#include "hw/sysbus.h" + +#define TYPE_PNV_SPI "pnv-spi" +OBJECT_DECLARE_SIMPLE_TYPE(PnvSpi, PNV_SPI) + +#define PNV_SPI_REG_SIZE 8 +#define PNV_SPI_REGS 7 + +#define TYPE_PNV_SPI_BUS "pnv-spi-bus" +typedef struct PnvSpi { + SysBusDevice parent_obj; + + SSIBus *ssi_bus; + qemu_irq *cs_line; + MemoryRegion xscom_spic_regs; + /* SPI object number */ + uint32_t spic_num; + + /* SPI registers */ + uint64_t regs[PNV_SPI_REGS]; + uint8_t seq_op[PNV_SPI_REG_SIZE]; + uint64_t status; +} PnvSpi; +#endif /* PPC_PNV_SPI_H */ diff --git a/include/hw/ssi/pnv_spi_regs.h b/include/hw/ssi/pnv_spi_regs.h new file mode 100644 index 0000000000..5b6ff72d02 --- /dev/null +++ b/include/hw/ssi/pnv_spi_regs.h @@ -0,0 +1,67 @@ +/* + * QEMU PowerPC SPI model + * + * Copyright (c) 2024, IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef PNV_SPI_CONTROLLER_REGS_H +#define PNV_SPI_CONTROLLER_REGS_H + +/* + * Macros from target/ppc/cpu.h + * These macros are copied from ppc target specific file target/ppc/cpu.h + * as target/ppc/cpu.h cannot be included here. + */ +#define PPC_BIT(bit) (0x8000000000000000ULL >> (bit)) +#define PPC_BIT8(bit) (0x80 >> (bit)) +#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) +#define PPC_BITMASK8(bs, be) ((PPC_BIT8(bs) - PPC_BIT8(be)) | PPC_BIT8(bs)) +#define MASK_TO_LSH(m) (__builtin_ffsll(m) - 1) +#define GETFIELD(m, v) (((v) & (m)) >> MASK_TO_LSH(m)) +#define SETFIELD(m, v, val) \ + (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m))) + +/* Error Register */ +#define ERROR_REG 0x00 + +/* counter_config_reg */ +#define SPI_CTR_CFG_REG 0x01 + +/* config_reg */ +#define CONFIG_REG1 0x02 + +/* clock_config_reset_control_ecc_enable_reg */ +#define SPI_CLK_CFG_REG 0x03 +#define SPI_CLK_CFG_HARD_RST 0x0084000000000000; +#define SPI_CLK_CFG_RST_CTRL PPC_BITMASK(24, 27) + +/* memory_mapping_reg */ +#define SPI_MM_REG 0x04 + +/* transmit_data_reg */ +#define SPI_XMIT_DATA_REG 0x05 + +/* receive_data_reg */ +#define SPI_RCV_DATA_REG 0x06 + +/* sequencer_operation_reg */ +#define SPI_SEQ_OP_REG 0x07 + +/* status_reg */ +#define SPI_STS_REG 0x08 +#define SPI_STS_RDR_FULL PPC_BIT(0) +#define SPI_STS_RDR_OVERRUN PPC_BIT(1) +#define SPI_STS_RDR_UNDERRUN PPC_BIT(2) +#define SPI_STS_TDR_FULL PPC_BIT(4) +#define SPI_STS_TDR_OVERRUN PPC_BIT(5) +#define SPI_STS_TDR_UNDERRUN PPC_BIT(6) +#define SPI_STS_SEQ_FSM PPC_BITMASK(8, 15) +#define SPI_STS_SHIFTER_FSM PPC_BITMASK(16, 27) +#define SPI_STS_SEQ_INDEX PPC_BITMASK(28, 31) +#define SPI_STS_GEN_STATUS PPC_BITMASK(32, 63) +#define SPI_STS_RDR PPC_BITMASK(1, 3) +#define SPI_STS_TDR PPC_BITMASK(5, 7) + +#endif From b4cb930e40f172e2b28a9fbe0189e97469aad648 Mon Sep 17 00:00:00 2001 From: Chalapathi V Date: Wed, 26 Jun 2024 04:05:25 -0500 Subject: [PATCH 38/96] hw/ssi: Extend SPI model In this commit SPI shift engine and sequencer logic is implemented. Shift engine performs serialization and de-serialization according to the control by the sequencer and according to the setup defined in the configuration registers. Sequencer implements the main control logic and FSM to handle data transmit and data receive control of the shift engine. Signed-off-by: Chalapathi V Reviewed-by: Caleb Schlossin Reviewed-by: Glenn Miles Signed-off-by: Nicholas Piggin --- hw/ssi/pnv_spi.c | 1045 +++++++++++++++++++++++++++++++++ hw/ssi/trace-events | 15 + include/hw/ssi/pnv_spi.h | 27 + include/hw/ssi/pnv_spi_regs.h | 68 ++- 4 files changed, 1154 insertions(+), 1 deletion(-) diff --git a/hw/ssi/pnv_spi.c b/hw/ssi/pnv_spi.c index 468afdad07..cdff3f9621 100644 --- a/hw/ssi/pnv_spi.c +++ b/hw/ssi/pnv_spi.c @@ -17,6 +17,9 @@ #include "hw/irq.h" #include "trace.h" +#define PNV_SPI_OPCODE_LO_NIBBLE(x) (x & 0x0F) +#define PNV_SPI_MASKED_OPCODE(x) (x & 0xF0) + /* * Macro from include/hw/ppc/fdt.h * fdt.h cannot be included here as it contain ppc target specific dependency. @@ -32,6 +35,1040 @@ } \ } while (0) +/* PnvXferBuffer */ +typedef struct PnvXferBuffer { + + uint32_t len; + uint8_t *data; + +} PnvXferBuffer; + +/* pnv_spi_xfer_buffer_methods */ +static PnvXferBuffer *pnv_spi_xfer_buffer_new(void) +{ + PnvXferBuffer *payload = g_malloc0(sizeof(*payload)); + + return payload; +} + +static void pnv_spi_xfer_buffer_free(PnvXferBuffer *payload) +{ + free(payload->data); + free(payload); +} + +static uint8_t *pnv_spi_xfer_buffer_write_ptr(PnvXferBuffer *payload, + uint32_t offset, uint32_t length) +{ + if (payload->len < (offset + length)) { + payload->len = offset + length; + payload->data = g_realloc(payload->data, payload->len); + } + return &payload->data[offset]; +} + +static bool does_rdr_match(PnvSpi *s) +{ + /* + * According to spec, the mask bits that are 0 are compared and the + * bits that are 1 are ignored. + */ + uint16_t rdr_match_mask = GETFIELD(SPI_MM_RDR_MATCH_MASK, + s->regs[SPI_MM_REG]); + uint16_t rdr_match_val = GETFIELD(SPI_MM_RDR_MATCH_VAL, + s->regs[SPI_MM_REG]); + + if ((~rdr_match_mask & rdr_match_val) == ((~rdr_match_mask) & + GETFIELD(PPC_BITMASK(48, 63), s->regs[SPI_RCV_DATA_REG]))) { + return true; + } + return false; +} + +static uint8_t get_from_offset(PnvSpi *s, uint8_t offset) +{ + uint8_t byte; + + /* + * Offset is an index between 0 and PNV_SPI_REG_SIZE - 1 + * Check the offset before using it. + */ + if (offset < PNV_SPI_REG_SIZE) { + byte = (s->regs[SPI_XMIT_DATA_REG] >> (56 - offset * 8)) & 0xFF; + } else { + /* + * Log an error and return a 0xFF since we have to assign something + * to byte before returning. + */ + qemu_log_mask(LOG_GUEST_ERROR, "Invalid offset = %d used to get byte " + "from TDR\n", offset); + byte = 0xff; + } + return byte; +} + +static uint8_t read_from_frame(PnvSpi *s, uint8_t *read_buf, uint8_t nr_bytes, + uint8_t ecc_count, uint8_t shift_in_count) +{ + uint8_t byte; + int count = 0; + + while (count < nr_bytes) { + shift_in_count++; + if ((ecc_count != 0) && + (shift_in_count == (PNV_SPI_REG_SIZE + ecc_count))) { + shift_in_count = 0; + } else { + byte = read_buf[count]; + trace_pnv_spi_shift_rx(byte, count); + s->regs[SPI_RCV_DATA_REG] = (s->regs[SPI_RCV_DATA_REG] << 8) | byte; + } + count++; + } /* end of while */ + return shift_in_count; +} + +static void spi_response(PnvSpi *s, int bits, PnvXferBuffer *rsp_payload) +{ + uint8_t ecc_count; + uint8_t shift_in_count; + + /* + * Processing here must handle: + * - Which bytes in the payload we should move to the RDR + * - Explicit mode counter configuration settings + * - RDR full and RDR overrun status + */ + + /* + * First check that the response payload is the exact same + * number of bytes as the request payload was + */ + if (rsp_payload->len != (s->N1_bytes + s->N2_bytes)) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid response payload size in " + "bytes, expected %d, got %d\n", + (s->N1_bytes + s->N2_bytes), rsp_payload->len); + } else { + uint8_t ecc_control; + trace_pnv_spi_rx_received(rsp_payload->len); + trace_pnv_spi_log_Ncounts(s->N1_bits, s->N1_bytes, s->N1_tx, + s->N1_rx, s->N2_bits, s->N2_bytes, s->N2_tx, s->N2_rx); + /* + * Adding an ECC count let's us know when we have found a payload byte + * that was shifted in but cannot be loaded into RDR. Bits 29-30 of + * clock_config_reset_control register equal to either 0b00 or 0b10 + * indicate that we are taking in data with ECC and either applying + * the ECC or discarding it. + */ + ecc_count = 0; + ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL, s->regs[SPI_CLK_CFG_REG]); + if (ecc_control == 0 || ecc_control == 2) { + ecc_count = 1; + } + /* + * Use the N1_rx and N2_rx counts to control shifting data from the + * payload into the RDR. Keep an overall count of the number of bytes + * shifted into RDR so we can discard every 9th byte when ECC is + * enabled. + */ + shift_in_count = 0; + /* Handle the N1 portion of the frame first */ + if (s->N1_rx != 0) { + trace_pnv_spi_rx_read_N1frame(); + shift_in_count = read_from_frame(s, &rsp_payload->data[0], + s->N1_bytes, ecc_count, shift_in_count); + } + /* Handle the N2 portion of the frame */ + if (s->N2_rx != 0) { + trace_pnv_spi_rx_read_N2frame(); + shift_in_count = read_from_frame(s, + &rsp_payload->data[s->N1_bytes], s->N2_bytes, + ecc_count, shift_in_count); + } + if ((s->N1_rx + s->N2_rx) > 0) { + /* + * Data was received so handle RDR status. + * It is easier to handle RDR_full and RDR_overrun status here + * since the RDR register's shift_byte_in method is called + * multiple times in a row. Controlling RDR status is done here + * instead of in the RDR scoped methods for that reason. + */ + if (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1) { + /* + * Data was shifted into the RDR before having been read + * causing previous data to have been overrun. + */ + s->status = SETFIELD(SPI_STS_RDR_OVERRUN, s->status, 1); + } else { + /* + * Set status to indicate that the received data register is + * full. This flag is only cleared once the RDR is unloaded. + */ + s->status = SETFIELD(SPI_STS_RDR_FULL, s->status, 1); + } + } + } /* end of else */ +} /* end of spi_response() */ + +static void transfer(PnvSpi *s, PnvXferBuffer *payload) +{ + uint32_t tx; + uint32_t rx; + PnvXferBuffer *rsp_payload = NULL; + + rsp_payload = pnv_spi_xfer_buffer_new(); + for (int offset = 0; offset < payload->len; offset += s->transfer_len) { + tx = 0; + for (int i = 0; i < s->transfer_len; i++) { + if ((offset + i) >= payload->len) { + tx <<= 8; + } else { + tx = (tx << 8) | payload->data[offset + i]; + } + } + rx = ssi_transfer(s->ssi_bus, tx); + for (int i = 0; i < s->transfer_len; i++) { + if ((offset + i) >= payload->len) { + break; + } + *(pnv_spi_xfer_buffer_write_ptr(rsp_payload, rsp_payload->len, 1)) = + (rx >> (8 * (s->transfer_len - 1) - i * 8)) & 0xFF; + } + } + if (rsp_payload != NULL) { + spi_response(s, s->N1_bits, rsp_payload); + } +} + +static inline uint8_t get_seq_index(PnvSpi *s) +{ + return GETFIELD(SPI_STS_SEQ_INDEX, s->status); +} + +static inline void next_sequencer_fsm(PnvSpi *s) +{ + uint8_t seq_index = get_seq_index(s); + s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, (seq_index + 1)); + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_INDEX_INCREMENT); +} + +/* + * Calculate the N1 counters based on passed in opcode and + * internal register values. + * The method assumes that the opcode is a Shift_N1 opcode + * and doesn't test it. + * The counters returned are: + * N1 bits: Number of bits in the payload data that are significant + * to the responder. + * N1_bytes: Total count of payload bytes for the N1 (portion of the) frame. + * N1_tx: Total number of bytes taken from TDR for N1 + * N1_rx: Total number of bytes taken from the payload for N1 + */ +static void calculate_N1(PnvSpi *s, uint8_t opcode) +{ + /* + * Shift_N1 opcode form: 0x3M + * Implicit mode: + * If M != 0 the shift count is M bytes and M is the number of tx bytes. + * Forced Implicit mode: + * M is the shift count but tx and rx is determined by the count control + * register fields. Note that we only check for forced Implicit mode when + * M != 0 since the mode doesn't make sense when M = 0. + * Explicit mode: + * If M == 0 then shift count is number of bits defined in the + * Counter Configuration Register's shift_count_N1 field. + */ + if (PNV_SPI_OPCODE_LO_NIBBLE(opcode) == 0) { + /* Explicit mode */ + s->N1_bits = GETFIELD(SPI_CTR_CFG_N1, s->regs[SPI_CTR_CFG_REG]); + s->N1_bytes = (s->N1_bits + 7) / 8; + s->N1_tx = 0; + s->N1_rx = 0; + /* If tx count control for N1 is set, load the tx value */ + if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B2, s->regs[SPI_CTR_CFG_REG]) == 1) { + s->N1_tx = s->N1_bytes; + } + /* If rx count control for N1 is set, load the rx value */ + if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B3, s->regs[SPI_CTR_CFG_REG]) == 1) { + s->N1_rx = s->N1_bytes; + } + } else { + /* Implicit mode/Forced Implicit mode, use M field from opcode */ + s->N1_bytes = PNV_SPI_OPCODE_LO_NIBBLE(opcode); + s->N1_bits = s->N1_bytes * 8; + /* + * Assume that we are going to transmit the count + * (pure Implicit only) + */ + s->N1_tx = s->N1_bytes; + s->N1_rx = 0; + /* Let Forced Implicit mode have an effect on the counts */ + if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B1, s->regs[SPI_CTR_CFG_REG]) == 1) { + /* + * If Forced Implicit mode and count control doesn't + * indicate transmit then reset the tx count to 0 + */ + if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B2, + s->regs[SPI_CTR_CFG_REG]) == 0) { + s->N1_tx = 0; + } + /* If rx count control for N1 is set, load the rx value */ + if (GETFIELD(SPI_CTR_CFG_N1_CTRL_B3, + s->regs[SPI_CTR_CFG_REG]) == 1) { + s->N1_rx = s->N1_bytes; + } + } + } + /* + * Enforce an upper limit on the size of N1 that is equal to the known size + * of the shift register, 64 bits or 72 bits if ECC is enabled. + * If the size exceeds 72 bits it is a user error so log an error, + * cap the size at a max of 64 bits or 72 bits and set the sequencer FSM + * error bit. + */ + uint8_t ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL, + s->regs[SPI_CLK_CFG_REG]); + if (ecc_control == 0 || ecc_control == 2) { + if (s->N1_bytes > (PNV_SPI_REG_SIZE + 1)) { + qemu_log_mask(LOG_GUEST_ERROR, "Unsupported N1 shift size when " + "ECC enabled, bytes = 0x%x, bits = 0x%x\n", + s->N1_bytes, s->N1_bits); + s->N1_bytes = PNV_SPI_REG_SIZE + 1; + s->N1_bits = s->N1_bytes * 8; + } + } else if (s->N1_bytes > PNV_SPI_REG_SIZE) { + qemu_log_mask(LOG_GUEST_ERROR, "Unsupported N1 shift size, " + "bytes = 0x%x, bits = 0x%x\n", + s->N1_bytes, s->N1_bits); + s->N1_bytes = PNV_SPI_REG_SIZE; + s->N1_bits = s->N1_bytes * 8; + } +} /* end of calculate_N1 */ + +/* + * Shift_N1 operation handler method + */ +static bool operation_shiftn1(PnvSpi *s, uint8_t opcode, + PnvXferBuffer **payload, bool send_n1_alone) +{ + uint8_t n1_count; + bool stop = false; + + /* + * If there isn't a current payload left over from a stopped sequence + * create a new one. + */ + if (*payload == NULL) { + *payload = pnv_spi_xfer_buffer_new(); + } + /* + * Use a combination of N1 counters to build the N1 portion of the + * transmit payload. + * We only care about transmit at this time since the request payload + * only represents data going out on the controller output line. + * Leave mode specific considerations in the calculate function since + * all we really care about are counters that tell use exactly how + * many bytes are in the payload and how many of those bytes to + * include from the TDR into the payload. + */ + calculate_N1(s, opcode); + trace_pnv_spi_log_Ncounts(s->N1_bits, s->N1_bytes, s->N1_tx, + s->N1_rx, s->N2_bits, s->N2_bytes, s->N2_tx, s->N2_rx); + /* + * Zero out the N2 counters here in case there is no N2 operation following + * the N1 operation in the sequencer. This keeps leftover N2 information + * from interfering with spi_response logic. + */ + s->N2_bits = 0; + s->N2_bytes = 0; + s->N2_tx = 0; + s->N2_rx = 0; + /* + * N1_bytes is the overall size of the N1 portion of the frame regardless of + * whether N1 is used for tx, rx or both. Loop over the size to build a + * payload that is N1_bytes long. + * N1_tx is the count of bytes to take from the TDR and "shift" into the + * frame which means append those bytes to the payload for the N1 portion + * of the frame. + * If N1_tx is 0 or if the count exceeds the size of the TDR append 0xFF to + * the frame until the overall N1 count is reached. + */ + n1_count = 0; + while (n1_count < s->N1_bytes) { + /* + * Assuming that if N1_tx is not equal to 0 then it is the same as + * N1_bytes. + */ + if ((s->N1_tx != 0) && (n1_count < PNV_SPI_REG_SIZE)) { + + if (GETFIELD(SPI_STS_TDR_FULL, s->status) == 1) { + /* + * Note that we are only appending to the payload IF the TDR + * is full otherwise we don't touch the payload because we are + * going to NOT send the payload and instead tell the sequencer + * that called us to stop and wait for a TDR write so we have + * data to load into the payload. + */ + uint8_t n1_byte = 0x00; + n1_byte = get_from_offset(s, n1_count); + trace_pnv_spi_tx_append("n1_byte", n1_byte, n1_count); + *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1)) = + n1_byte; + } else { + /* + * We hit a shift_n1 opcode TX but the TDR is empty, tell the + * sequencer to stop and break this loop. + */ + trace_pnv_spi_sequencer_stop_requested("Shift N1" + "set for transmit but TDR is empty"); + stop = true; + break; + } + } else { + /* + * Cases here: + * - we are receiving during the N1 frame segment and the RDR + * is full so we need to stop until the RDR is read + * - we are transmitting and we don't care about RDR status + * since we won't be loading RDR during the frame segment. + * - we are receiving and the RDR is empty so we allow the operation + * to proceed. + */ + if ((s->N1_rx != 0) && (GETFIELD(SPI_STS_RDR_FULL, + s->status) == 1)) { + trace_pnv_spi_sequencer_stop_requested("shift N1" + "set for receive but RDR is full"); + stop = true; + break; + } else { + trace_pnv_spi_tx_append_FF("n1_byte"); + *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1)) + = 0xff; + } + } + n1_count++; + } /* end of while */ + /* + * If we are not stopping due to an empty TDR and we are doing an N1 TX + * and the TDR is full we need to clear the TDR_full status. + * Do this here instead of up in the loop above so we don't log the message + * in every loop iteration. + * Ignore the send_n1_alone flag, all that does is defer the TX until the N2 + * operation, which was found immediately after the current opcode. The TDR + * was unloaded and will be shifted so we have to clear the TDR_full status. + */ + if (!stop && (s->N1_tx != 0) && + (GETFIELD(SPI_STS_TDR_FULL, s->status) == 1)) { + s->status = SETFIELD(SPI_STS_TDR_FULL, s->status, 0); + } + /* + * There are other reasons why the shifter would stop, such as a TDR empty + * or RDR full condition with N1 set to receive. If we haven't stopped due + * to either one of those conditions then check if the send_n1_alone flag is + * equal to False, indicating the next opcode is an N2 operation, AND if + * the N2 counter reload switch (bit 0 of the N2 count control field) is + * set. This condition requires a pacing write to "kick" off the N2 + * shift which includes the N1 shift as well when send_n1_alone is False. + */ + if (!stop && !send_n1_alone && + (GETFIELD(SPI_CTR_CFG_N2_CTRL_B0, s->regs[SPI_CTR_CFG_REG]) == 1)) { + trace_pnv_spi_sequencer_stop_requested("N2 counter reload " + "active, stop N1 shift, TDR_underrun set to 1"); + stop = true; + s->status = SETFIELD(SPI_STS_TDR_UNDERRUN, s->status, 1); + } + /* + * If send_n1_alone is set AND we have a full TDR then this is the first and + * last payload to send and we don't have an N2 frame segment to add to the + * payload. + */ + if (send_n1_alone && !stop) { + /* We have a TX and a full TDR or an RX and an empty RDR */ + trace_pnv_spi_tx_request("Shifting N1 frame", (*payload)->len); + transfer(s, *payload); + /* The N1 frame shift is complete so reset the N1 counters */ + s->N2_bits = 0; + s->N2_bytes = 0; + s->N2_tx = 0; + s->N2_rx = 0; + pnv_spi_xfer_buffer_free(*payload); + *payload = NULL; + } + return stop; +} /* end of operation_shiftn1() */ + +/* + * Calculate the N2 counters based on passed in opcode and + * internal register values. + * The method assumes that the opcode is a Shift_N2 opcode + * and doesn't test it. + * The counters returned are: + * N2 bits: Number of bits in the payload data that are significant + * to the responder. + * N2_bytes: Total count of payload bytes for the N2 frame. + * N2_tx: Total number of bytes taken from TDR for N2 + * N2_rx: Total number of bytes taken from the payload for N2 + */ +static void calculate_N2(PnvSpi *s, uint8_t opcode) +{ + /* + * Shift_N2 opcode form: 0x4M + * Implicit mode: + * If M!=0 the shift count is M bytes and M is the number of rx bytes. + * Forced Implicit mode: + * M is the shift count but tx and rx is determined by the count control + * register fields. Note that we only check for Forced Implicit mode when + * M != 0 since the mode doesn't make sense when M = 0. + * Explicit mode: + * If M==0 then shift count is number of bits defined in the + * Counter Configuration Register's shift_count_N1 field. + */ + if (PNV_SPI_OPCODE_LO_NIBBLE(opcode) == 0) { + /* Explicit mode */ + s->N2_bits = GETFIELD(SPI_CTR_CFG_N2, s->regs[SPI_CTR_CFG_REG]); + s->N2_bytes = (s->N2_bits + 7) / 8; + s->N2_tx = 0; + s->N2_rx = 0; + /* If tx count control for N2 is set, load the tx value */ + if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B2, s->regs[SPI_CTR_CFG_REG]) == 1) { + s->N2_tx = s->N2_bytes; + } + /* If rx count control for N2 is set, load the rx value */ + if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B3, s->regs[SPI_CTR_CFG_REG]) == 1) { + s->N2_rx = s->N2_bytes; + } + } else { + /* Implicit mode/Forced Implicit mode, use M field from opcode */ + s->N2_bytes = PNV_SPI_OPCODE_LO_NIBBLE(opcode); + s->N2_bits = s->N2_bytes * 8; + /* Assume that we are going to receive the count */ + s->N2_rx = s->N2_bytes; + s->N2_tx = 0; + /* Let Forced Implicit mode have an effect on the counts */ + if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B1, s->regs[SPI_CTR_CFG_REG]) == 1) { + /* + * If Forced Implicit mode and count control doesn't + * indicate a receive then reset the rx count to 0 + */ + if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B3, + s->regs[SPI_CTR_CFG_REG]) == 0) { + s->N2_rx = 0; + } + /* If tx count control for N2 is set, load the tx value */ + if (GETFIELD(SPI_CTR_CFG_N2_CTRL_B2, + s->regs[SPI_CTR_CFG_REG]) == 1) { + s->N2_tx = s->N2_bytes; + } + } + } + /* + * Enforce an upper limit on the size of N1 that is equal to the + * known size of the shift register, 64 bits or 72 bits if ECC + * is enabled. + * If the size exceeds 72 bits it is a user error so log an error, + * cap the size at a max of 64 bits or 72 bits and set the sequencer FSM + * error bit. + */ + uint8_t ecc_control = GETFIELD(SPI_CLK_CFG_ECC_CTRL, + s->regs[SPI_CLK_CFG_REG]); + if (ecc_control == 0 || ecc_control == 2) { + if (s->N2_bytes > (PNV_SPI_REG_SIZE + 1)) { + /* Unsupported N2 shift size when ECC enabled */ + s->N2_bytes = PNV_SPI_REG_SIZE + 1; + s->N2_bits = s->N2_bytes * 8; + } + } else if (s->N2_bytes > PNV_SPI_REG_SIZE) { + /* Unsupported N2 shift size */ + s->N2_bytes = PNV_SPI_REG_SIZE; + s->N2_bits = s->N2_bytes * 8; + } +} /* end of calculate_N2 */ + +/* + * Shift_N2 operation handler method + */ + +static bool operation_shiftn2(PnvSpi *s, uint8_t opcode, + PnvXferBuffer **payload) +{ + uint8_t n2_count; + bool stop = false; + + /* + * If there isn't a current payload left over from a stopped sequence + * create a new one. + */ + if (*payload == NULL) { + *payload = pnv_spi_xfer_buffer_new(); + } + /* + * Use a combination of N2 counters to build the N2 portion of the + * transmit payload. + */ + calculate_N2(s, opcode); + trace_pnv_spi_log_Ncounts(s->N1_bits, s->N1_bytes, s->N1_tx, + s->N1_rx, s->N2_bits, s->N2_bytes, s->N2_tx, s->N2_rx); + /* + * The only difference between this code and the code for shift N1 is + * that this code has to account for the possible presence of N1 transmit + * bytes already taken from the TDR. + * If there are bytes to be transmitted for the N2 portion of the frame + * and there are still bytes in TDR that have not been copied into the + * TX data of the payload, this code will handle transmitting those + * remaining bytes. + * If for some reason the transmit count(s) add up to more than the size + * of the TDR we will just append 0xFF to the transmit payload data until + * the payload is N1 + N2 bytes long. + */ + n2_count = 0; + while (n2_count < s->N2_bytes) { + /* + * If the RDR is full and we need to RX just bail out, letting the + * code continue will end up building the payload twice in the same + * buffer since RDR full causes a sequence stop and restart. + */ + if ((s->N2_rx != 0) && + (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1)) { + trace_pnv_spi_sequencer_stop_requested("shift N2 set" + "for receive but RDR is full"); + stop = true; + break; + } + if ((s->N2_tx != 0) && ((s->N1_tx + n2_count) < + PNV_SPI_REG_SIZE)) { + /* Always append data for the N2 segment if it is set for TX */ + uint8_t n2_byte = 0x00; + n2_byte = get_from_offset(s, (s->N1_tx + n2_count)); + trace_pnv_spi_tx_append("n2_byte", n2_byte, (s->N1_tx + n2_count)); + *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1)) + = n2_byte; + } else { + /* + * Regardless of whether or not N2 is set for TX or RX, we need + * the number of bytes in the payload to match the overall length + * of the operation. + */ + trace_pnv_spi_tx_append_FF("n2_byte"); + *(pnv_spi_xfer_buffer_write_ptr(*payload, (*payload)->len, 1)) + = 0xff; + } + n2_count++; + } /* end of while */ + if (!stop) { + /* We have a TX and a full TDR or an RX and an empty RDR */ + trace_pnv_spi_tx_request("Shifting N2 frame", (*payload)->len); + transfer(s, *payload); + /* + * If we are doing an N2 TX and the TDR is full we need to clear the + * TDR_full status. Do this here instead of up in the loop above so we + * don't log the message in every loop iteration. + */ + if ((s->N2_tx != 0) && + (GETFIELD(SPI_STS_TDR_FULL, s->status) == 1)) { + s->status = SETFIELD(SPI_STS_TDR_FULL, s->status, 0); + } + /* + * The N2 frame shift is complete so reset the N2 counters. + * Reset the N1 counters also in case the frame was a combination of + * N1 and N2 segments. + */ + s->N2_bits = 0; + s->N2_bytes = 0; + s->N2_tx = 0; + s->N2_rx = 0; + s->N1_bits = 0; + s->N1_bytes = 0; + s->N1_tx = 0; + s->N1_rx = 0; + pnv_spi_xfer_buffer_free(*payload); + *payload = NULL; + } + return stop; +} /* end of operation_shiftn2()*/ + +static void operation_sequencer(PnvSpi *s) +{ + /* + * Loop through each sequencer operation ID and perform the requested + * operations. + * Flag for indicating if we should send the N1 frame or wait to combine + * it with a preceding N2 frame. + */ + bool send_n1_alone = true; + bool stop = false; /* Flag to stop the sequencer */ + uint8_t opcode = 0; + uint8_t masked_opcode = 0; + + /* + * PnvXferBuffer for containing the payload of the SPI frame. + * This is a static because there are cases where a sequence has to stop + * and wait for the target application to unload the RDR. If this occurs + * during a sequence where N1 is not sent alone and instead combined with + * N2 since the N1 tx length + the N2 tx length is less than the size of + * the TDR. + */ + static PnvXferBuffer *payload; + + if (payload == NULL) { + payload = pnv_spi_xfer_buffer_new(); + } + /* + * Clear the sequencer FSM error bit - general_SPI_status[3] + * before starting a sequence. + */ + s->status = SETFIELD(SPI_STS_GEN_STATUS_B3, s->status, 0); + /* + * If the FSM is idle set the sequencer index to 0 + * (new/restarted sequence) + */ + if (GETFIELD(SPI_STS_SEQ_FSM, s->status) == SEQ_STATE_IDLE) { + s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, 0); + } + /* + * There are only 8 possible operation IDs to iterate through though + * some operations may cause more than one frame to be sequenced. + */ + while (get_seq_index(s) < NUM_SEQ_OPS) { + opcode = s->seq_op[get_seq_index(s)]; + /* Set sequencer state to decode */ + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_DECODE); + /* + * Only the upper nibble of the operation ID is needed to know what + * kind of operation is requested. + */ + masked_opcode = PNV_SPI_MASKED_OPCODE(opcode); + switch (masked_opcode) { + /* + * Increment the operation index in each case instead of just + * once at the end in case an operation like the branch + * operation needs to change the index. + */ + case SEQ_OP_STOP: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + /* A stop operation in any position stops the sequencer */ + trace_pnv_spi_sequencer_op("STOP", get_seq_index(s)); + + stop = true; + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE); + s->loop_counter_1 = 0; + s->loop_counter_2 = 0; + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_IDLE); + break; + + case SEQ_OP_SELECT_SLAVE: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + trace_pnv_spi_sequencer_op("SELECT_SLAVE", get_seq_index(s)); + /* + * This device currently only supports a single responder + * connection at position 0. De-selecting a responder is fine + * and expected at the end of a sequence but selecting any + * responder other than 0 should cause an error. + */ + s->responder_select = PNV_SPI_OPCODE_LO_NIBBLE(opcode); + if (s->responder_select == 0) { + trace_pnv_spi_shifter_done(); + qemu_set_irq(s->cs_line[0], 1); + s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, + (get_seq_index(s) + 1)); + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_DONE); + } else if (s->responder_select != 1) { + qemu_log_mask(LOG_GUEST_ERROR, "Slave selection other than 1 " + "not supported, select = 0x%x\n", + s->responder_select); + trace_pnv_spi_sequencer_stop_requested("invalid " + "responder select"); + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE); + stop = true; + } else { + /* + * Only allow an FSM_START state when a responder is + * selected + */ + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_START); + trace_pnv_spi_shifter_stating(); + qemu_set_irq(s->cs_line[0], 0); + /* + * A Shift_N2 operation is only valid after a Shift_N1 + * according to the spec. The spec doesn't say if that means + * immediately after or just after at any point. We will track + * the occurrence of a Shift_N1 to enforce this requirement in + * the most generic way possible by assuming that the rule + * applies once a valid responder select has occurred. + */ + s->shift_n1_done = false; + next_sequencer_fsm(s); + } + break; + + case SEQ_OP_SHIFT_N1: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + trace_pnv_spi_sequencer_op("SHIFT_N1", get_seq_index(s)); + /* + * Only allow a shift_n1 when the state is not IDLE or DONE. + * In either of those two cases the sequencer is not in a proper + * state to perform shift operations because the sequencer has: + * - processed a responder deselect (DONE) + * - processed a stop opcode (IDLE) + * - encountered an error (IDLE) + */ + if ((GETFIELD(SPI_STS_SHIFTER_FSM, s->status) == FSM_IDLE) || + (GETFIELD(SPI_STS_SHIFTER_FSM, s->status) == FSM_DONE)) { + qemu_log_mask(LOG_GUEST_ERROR, "Shift_N1 not allowed in " + "shifter state = 0x%llx", GETFIELD( + SPI_STS_SHIFTER_FSM, s->status)); + /* + * Set sequencer FSM error bit 3 (general_SPI_status[3]) + * in status reg. + */ + s->status = SETFIELD(SPI_STS_GEN_STATUS_B3, s->status, 1); + trace_pnv_spi_sequencer_stop_requested("invalid shifter state"); + stop = true; + } else { + /* + * Look for the special case where there is a shift_n1 set for + * transmit and it is followed by a shift_n2 set for transmit + * AND the combined transmit length of the two operations is + * less than or equal to the size of the TDR register. In this + * case we want to use both this current shift_n1 opcode and the + * following shift_n2 opcode to assemble the frame for + * transmission to the responder without requiring a refill of + * the TDR between the two operations. + */ + if (PNV_SPI_MASKED_OPCODE(s->seq_op[get_seq_index(s) + 1]) + == SEQ_OP_SHIFT_N2) { + send_n1_alone = false; + } + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, + FSM_SHIFT_N1); + stop = operation_shiftn1(s, opcode, &payload, send_n1_alone); + if (stop) { + /* + * The operation code says to stop, this can occur if: + * (1) RDR is full and the N1 shift is set for receive + * (2) TDR was empty at the time of the N1 shift so we need + * to wait for data. + * (3) Neither 1 nor 2 are occurring and we aren't sending + * N1 alone and N2 counter reload is set (bit 0 of the N2 + * counter reload field). In this case TDR_underrun will + * will be set and the Payload has been loaded so it is + * ok to advance the sequencer. + */ + if (GETFIELD(SPI_STS_TDR_UNDERRUN, s->status)) { + s->shift_n1_done = true; + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, + FSM_SHIFT_N2); + s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, + (get_seq_index(s) + 1)); + } else { + /* + * This is case (1) or (2) so the sequencer needs to + * wait and NOT go to the next sequence yet. + */ + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, + FSM_WAIT); + } + } else { + /* Ok to move on to the next index */ + s->shift_n1_done = true; + next_sequencer_fsm(s); + } + } + break; + + case SEQ_OP_SHIFT_N2: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + trace_pnv_spi_sequencer_op("SHIFT_N2", get_seq_index(s)); + if (!s->shift_n1_done) { + qemu_log_mask(LOG_GUEST_ERROR, "Shift_N2 is not allowed if a " + "Shift_N1 is not done, shifter state = 0x%llx", + GETFIELD(SPI_STS_SHIFTER_FSM, s->status)); + /* + * In case the sequencer actually stops if an N2 shift is + * requested before any N1 shift is done. Set sequencer FSM + * error bit 3 (general_SPI_status[3]) in status reg. + */ + s->status = SETFIELD(SPI_STS_GEN_STATUS_B3, s->status, 1); + trace_pnv_spi_sequencer_stop_requested("shift_n2 " + "w/no shift_n1 done"); + stop = true; + } else { + /* Ok to do a Shift_N2 */ + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, + FSM_SHIFT_N2); + stop = operation_shiftn2(s, opcode, &payload); + /* + * If the operation code says to stop set the shifter state to + * wait and stop + */ + if (stop) { + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, + FSM_WAIT); + } else { + /* Ok to move on to the next index */ + next_sequencer_fsm(s); + } + } + break; + + case SEQ_OP_BRANCH_IFNEQ_RDR: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_RDR", get_seq_index(s)); + /* + * The memory mapping register RDR match value is compared against + * the 16 rightmost bytes of the RDR (potentially with masking). + * Since this comparison is performed against the contents of the + * RDR then a receive must have previously occurred otherwise + * there is no data to compare and the operation cannot be + * completed and will stop the sequencer until RDR full is set to + * 1. + */ + if (GETFIELD(SPI_STS_RDR_FULL, s->status) == 1) { + bool rdr_matched = false; + rdr_matched = does_rdr_match(s); + if (rdr_matched) { + trace_pnv_spi_RDR_match("success"); + /* A match occurred, increment the sequencer index. */ + next_sequencer_fsm(s); + } else { + trace_pnv_spi_RDR_match("failed"); + /* + * Branch the sequencer to the index coded into the op + * code. + */ + s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, + PNV_SPI_OPCODE_LO_NIBBLE(opcode)); + } + /* + * Regardless of where the branch ended up we want the + * sequencer to continue shifting so we have to clear + * RDR_full. + */ + s->status = SETFIELD(SPI_STS_RDR_FULL, s->status, 0); + } else { + trace_pnv_spi_sequencer_stop_requested("RDR not" + "full for 0x6x opcode"); + stop = true; + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_WAIT); + } + break; + + case SEQ_OP_TRANSFER_TDR: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + qemu_log_mask(LOG_GUEST_ERROR, "Transfer TDR is not supported\n"); + next_sequencer_fsm(s); + break; + + case SEQ_OP_BRANCH_IFNEQ_INC_1: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_INC_1", get_seq_index(s)); + /* + * The spec says the loop should execute count compare + 1 times. + * However we learned from engineering that we really only loop + * count_compare times, count compare = 0 makes this op code a + * no-op + */ + if (s->loop_counter_1 != + GETFIELD(SPI_CTR_CFG_CMP1, s->regs[SPI_CTR_CFG_REG])) { + /* + * Next index is the lower nibble of the branch operation ID, + * mask off all but the first three bits so we don't try to + * access beyond the sequencer_operation_reg boundary. + */ + s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, + PNV_SPI_OPCODE_LO_NIBBLE(opcode)); + s->loop_counter_1++; + } else { + /* Continue to next index if loop counter is reached */ + next_sequencer_fsm(s); + } + break; + + case SEQ_OP_BRANCH_IFNEQ_INC_2: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + trace_pnv_spi_sequencer_op("BRANCH_IFNEQ_INC_2", get_seq_index(s)); + uint8_t condition2 = GETFIELD(SPI_CTR_CFG_CMP2, + s->regs[SPI_CTR_CFG_REG]); + /* + * The spec says the loop should execute count compare + 1 times. + * However we learned from engineering that we really only loop + * count_compare times, count compare = 0 makes this op code a + * no-op + */ + if (s->loop_counter_2 != condition2) { + /* + * Next index is the lower nibble of the branch operation ID, + * mask off all but the first three bits so we don't try to + * access beyond the sequencer_operation_reg boundary. + */ + s->status = SETFIELD(SPI_STS_SEQ_INDEX, + s->status, PNV_SPI_OPCODE_LO_NIBBLE(opcode)); + s->loop_counter_2++; + } else { + /* Continue to next index if loop counter is reached */ + next_sequencer_fsm(s); + } + break; + + default: + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_EXECUTE); + /* Ignore unsupported operations. */ + next_sequencer_fsm(s); + break; + } /* end of switch */ + /* + * If we used all 8 opcodes without seeing a 00 - STOP in the sequence + * we need to go ahead and end things as if there was a STOP at the + * end. + */ + if (get_seq_index(s) == NUM_SEQ_OPS) { + /* All 8 opcodes completed, sequencer idling */ + s->status = SETFIELD(SPI_STS_SHIFTER_FSM, s->status, FSM_IDLE); + s->status = SETFIELD(SPI_STS_SEQ_INDEX, s->status, 0); + s->loop_counter_1 = 0; + s->loop_counter_2 = 0; + s->status = SETFIELD(SPI_STS_SEQ_FSM, s->status, SEQ_STATE_IDLE); + break; + } + /* Break the loop if a stop was requested */ + if (stop) { + break; + } + } /* end of while */ + return; +} /* end of operation_sequencer() */ + +/* + * The SPIC engine and its internal sequencer can be interrupted and reset by + * a hardware signal, the sbe_spicst_hard_reset bits from Pervasive + * Miscellaneous Register of sbe_register_bo device. + * Reset immediately aborts any SPI transaction in progress and returns the + * sequencer and state machines to idle state. + * The configuration register values are not changed. The status register is + * not reset. The engine registers are not reset. + * The SPIC engine reset does not have any affect on the attached devices. + * Reset handling of any attached devices is beyond the scope of the engine. + */ +static void do_reset(DeviceState *dev) +{ + PnvSpi *s = PNV_SPI(dev); + + trace_pnv_spi_reset(); + + /* Reset all N1 and N2 counters, and other constants */ + s->N2_bits = 0; + s->N2_bytes = 0; + s->N2_tx = 0; + s->N2_rx = 0; + s->N1_bits = 0; + s->N1_bytes = 0; + s->N1_tx = 0; + s->N1_rx = 0; + s->loop_counter_1 = 0; + s->loop_counter_2 = 0; + /* Disconnected from responder */ + qemu_set_irq(s->cs_line[0], 1); +} + static uint64_t pnv_spi_xscom_read(void *opaque, hwaddr addr, unsigned size) { PnvSpi *s = PNV_SPI(opaque); @@ -51,6 +1088,10 @@ static uint64_t pnv_spi_xscom_read(void *opaque, hwaddr addr, unsigned size) val = s->regs[reg]; trace_pnv_spi_read_RDR(val); s->status = SETFIELD(SPI_STS_RDR_FULL, s->status, 0); + if (GETFIELD(SPI_STS_SHIFTER_FSM, s->status) == FSM_WAIT) { + trace_pnv_spi_start_sequencer(); + operation_sequencer(s); + } break; case SPI_SEQ_OP_REG: val = 0; @@ -112,6 +1153,8 @@ static void pnv_spi_xscom_write(void *opaque, hwaddr addr, trace_pnv_spi_write_TDR(val); s->status = SETFIELD(SPI_STS_TDR_FULL, s->status, 1); s->status = SETFIELD(SPI_STS_TDR_UNDERRUN, s->status, 0); + trace_pnv_spi_start_sequencer(); + operation_sequencer(s); break; case SPI_SEQ_OP_REG: for (int i = 0; i < PNV_SPI_REG_SIZE; i++) { @@ -144,6 +1187,7 @@ static const MemoryRegionOps pnv_spi_xscom_ops = { static Property pnv_spi_properties[] = { DEFINE_PROP_UINT32("spic_num", PnvSpi, spic_num, 0), + DEFINE_PROP_UINT8("transfer_len", PnvSpi, transfer_len, 4), DEFINE_PROP_END_OF_LIST(), }; @@ -193,6 +1237,7 @@ static void pnv_spi_class_init(ObjectClass *klass, void *data) dc->desc = "PowerNV SPI"; dc->realize = pnv_spi_realize; + dc->reset = do_reset; device_class_set_props(dc, pnv_spi_properties); } diff --git a/hw/ssi/trace-events b/hw/ssi/trace-events index 2cc29e1284..089d269994 100644 --- a/hw/ssi/trace-events +++ b/hw/ssi/trace-events @@ -38,3 +38,18 @@ pnv_spi_read(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64 pnv_spi_write(uint64_t addr, uint64_t val) "addr 0x%" PRIx64 " val 0x%" PRIx64 pnv_spi_read_RDR(uint64_t val) "data extracted = 0x%" PRIx64 pnv_spi_write_TDR(uint64_t val) "being written, data written = 0x%" PRIx64 +pnv_spi_start_sequencer(void) "" +pnv_spi_reset(void) "spic engine sequencer configuration and spi communication" +pnv_spi_sequencer_op(const char* op, uint8_t index) "%s at index = 0x%x" +pnv_spi_shifter_stating(void) "pull CS line low" +pnv_spi_shifter_done(void) "pull the CS line high" +pnv_spi_log_Ncounts(uint8_t N1_bits, uint8_t N1_bytes, uint8_t N1_tx, uint8_t N1_rx, uint8_t N2_bits, uint8_t N2_bytes, uint8_t N2_tx, uint8_t N2_rx) "N1_bits = %d, N1_bytes = %d, N1_tx = %d, N1_rx = %d, N2_bits = %d, N2_bytes = %d, N2_tx = %d, N2_rx = %d" +pnv_spi_tx_append(const char* frame, uint8_t byte, uint8_t tdr_index) "%s = 0x%2.2x to payload from TDR at index %d" +pnv_spi_tx_append_FF(const char* frame) "%s to Payload" +pnv_spi_tx_request(const char* frame, uint32_t payload_len) "%s, payload len = %d" +pnv_spi_rx_received(uint32_t payload_len) "payload len = %d" +pnv_spi_rx_read_N1frame(void) "" +pnv_spi_rx_read_N2frame(void) "" +pnv_spi_shift_rx(uint8_t byte, uint32_t index) "byte = 0x%2.2x into RDR from payload index %d" +pnv_spi_sequencer_stop_requested(const char* reason) "due to %s" +pnv_spi_RDR_match(const char* result) "%s" diff --git a/include/hw/ssi/pnv_spi.h b/include/hw/ssi/pnv_spi.h index 833042b74b..8815f67d45 100644 --- a/include/hw/ssi/pnv_spi.h +++ b/include/hw/ssi/pnv_spi.h @@ -8,6 +8,14 @@ * This model Supports a connection to a single SPI responder. * Introduced for P10 to provide access to SPI seeproms, TPM, flash device * and an ADC controller. + * + * All SPI function control is mapped into the SPI register space to enable + * full control by firmware. + * + * SPI Controller has sequencer and shift engine. The SPI shift engine + * performs serialization and de-serialization according to the control by + * the sequencer and according to the setup defined in the configuration + * registers and the SPI sequencer implements the main control logic. */ #ifndef PPC_PNV_SPI_H @@ -31,6 +39,25 @@ typedef struct PnvSpi { MemoryRegion xscom_spic_regs; /* SPI object number */ uint32_t spic_num; + uint8_t transfer_len; + uint8_t responder_select; + /* To verify if shift_n1 happens prior to shift_n2 */ + bool shift_n1_done; + /* Loop counter for branch operation opcode Ex/Fx */ + uint8_t loop_counter_1; + uint8_t loop_counter_2; + /* N1/N2_bits specifies the size of the N1/N2 segment of a frame in bits.*/ + uint8_t N1_bits; + uint8_t N2_bits; + /* Number of bytes in a payload for the N1/N2 frame segment.*/ + uint8_t N1_bytes; + uint8_t N2_bytes; + /* Number of N1/N2 bytes marked for transmit */ + uint8_t N1_tx; + uint8_t N2_tx; + /* Number of N1/N2 bytes marked for receive */ + uint8_t N1_rx; + uint8_t N2_rx; /* SPI registers */ uint64_t regs[PNV_SPI_REGS]; diff --git a/include/hw/ssi/pnv_spi_regs.h b/include/hw/ssi/pnv_spi_regs.h index 5b6ff72d02..596e2c1911 100644 --- a/include/hw/ssi/pnv_spi_regs.h +++ b/include/hw/ssi/pnv_spi_regs.h @@ -28,6 +28,17 @@ /* counter_config_reg */ #define SPI_CTR_CFG_REG 0x01 +#define SPI_CTR_CFG_N1 PPC_BITMASK(0, 7) +#define SPI_CTR_CFG_N2 PPC_BITMASK(8, 15) +#define SPI_CTR_CFG_CMP1 PPC_BITMASK(24, 31) +#define SPI_CTR_CFG_CMP2 PPC_BITMASK(32, 39) +#define SPI_CTR_CFG_N1_CTRL_B1 PPC_BIT(49) +#define SPI_CTR_CFG_N1_CTRL_B2 PPC_BIT(50) +#define SPI_CTR_CFG_N1_CTRL_B3 PPC_BIT(51) +#define SPI_CTR_CFG_N2_CTRL_B0 PPC_BIT(52) +#define SPI_CTR_CFG_N2_CTRL_B1 PPC_BIT(53) +#define SPI_CTR_CFG_N2_CTRL_B2 PPC_BIT(54) +#define SPI_CTR_CFG_N2_CTRL_B3 PPC_BIT(55) /* config_reg */ #define CONFIG_REG1 0x02 @@ -36,9 +47,13 @@ #define SPI_CLK_CFG_REG 0x03 #define SPI_CLK_CFG_HARD_RST 0x0084000000000000; #define SPI_CLK_CFG_RST_CTRL PPC_BITMASK(24, 27) +#define SPI_CLK_CFG_ECC_EN PPC_BIT(28) +#define SPI_CLK_CFG_ECC_CTRL PPC_BITMASK(29, 30) /* memory_mapping_reg */ #define SPI_MM_REG 0x04 +#define SPI_MM_RDR_MATCH_VAL PPC_BITMASK(32, 47) +#define SPI_MM_RDR_MATCH_MASK PPC_BITMASK(48, 63) /* transmit_data_reg */ #define SPI_XMIT_DATA_REG 0x05 @@ -60,8 +75,59 @@ #define SPI_STS_SEQ_FSM PPC_BITMASK(8, 15) #define SPI_STS_SHIFTER_FSM PPC_BITMASK(16, 27) #define SPI_STS_SEQ_INDEX PPC_BITMASK(28, 31) -#define SPI_STS_GEN_STATUS PPC_BITMASK(32, 63) +#define SPI_STS_GEN_STATUS_B3 PPC_BIT(35) #define SPI_STS_RDR PPC_BITMASK(1, 3) #define SPI_STS_TDR PPC_BITMASK(5, 7) +/* + * Shifter states + * + * These are the same values defined for the Shifter FSM field of the + * status register. It's a 12 bit field so we will represent it as three + * nibbles in the constants. + * + * These are shifter_fsm values + * + * Status reg bits 16-27 -> field bits 0-11 + * bits 0,1,2,5 unused/reserved + * bit 4 crc shift in (unused) + * bit 8 crc shift out (unused) + */ + +#define FSM_DONE 0x100 /* bit 3 */ +#define FSM_SHIFT_N2 0x020 /* bit 6 */ +#define FSM_WAIT 0x010 /* bit 7 */ +#define FSM_SHIFT_N1 0x004 /* bit 9 */ +#define FSM_START 0x002 /* bit 10 */ +#define FSM_IDLE 0x001 /* bit 11 */ + +/* + * Sequencer states + * + * These are sequencer_fsm values + * + * Status reg bits 8-15 -> field bits 0-7 + * bits 0-3 unused/reserved + * + */ +#define SEQ_STATE_INDEX_INCREMENT 0x08 /* bit 4 */ +#define SEQ_STATE_EXECUTE 0x04 /* bit 5 */ +#define SEQ_STATE_DECODE 0x02 /* bit 6 */ +#define SEQ_STATE_IDLE 0x01 /* bit 7 */ + +/* + * These are the supported sequencer operations. + * Only the upper nibble is significant because for many operations + * the lower nibble is a variable specific to the operation. + */ +#define SEQ_OP_STOP 0x00 +#define SEQ_OP_SELECT_SLAVE 0x10 +#define SEQ_OP_SHIFT_N1 0x30 +#define SEQ_OP_SHIFT_N2 0x40 +#define SEQ_OP_BRANCH_IFNEQ_RDR 0x60 +#define SEQ_OP_TRANSFER_TDR 0xC0 +#define SEQ_OP_BRANCH_IFNEQ_INC_1 0xE0 +#define SEQ_OP_BRANCH_IFNEQ_INC_2 0xF0 +#define NUM_SEQ_OPS 8 + #endif From 8d970f4162b8a388eef08ee37dab47a650e390ab Mon Sep 17 00:00:00 2001 From: Chalapathi V Date: Wed, 26 Jun 2024 04:05:26 -0500 Subject: [PATCH 39/96] hw/block: Add Microchip's 25CSM04 to m25p80 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Microchip's 25CSM04 Serial EEPROM to m25p80. 25CSM04 provides 4 Mbits of Serial EEPROM utilizing the Serial Peripheral Interface (SPI) compatible bus. The device is organized as 524288 bytes of 8 bits each (512Kbyte) and is optimized for use in consumer and industrial applications where reliable and dependable nonvolatile memory storage is essential. Signed-off-by: Chalapathi V Reviewed-by: Glenn Miles Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/block/m25p80.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 9e99107b42..0b94af3653 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -357,6 +357,9 @@ static const FlashPartInfo known_devices[] = { .sfdp_read = m25p80_sfdp_w25q512jv }, { INFO("w25q01jvq", 0xef4021, 0, 64 << 10, 2048, ER_4K), .sfdp_read = m25p80_sfdp_w25q01jvq }, + + /* Microchip */ + { INFO("25csm04", 0x29cc00, 0x100, 64 << 10, 8, 0) }, }; typedef enum { From bb44dc48628e9168f16c460f778bbef7a91d7708 Mon Sep 17 00:00:00 2001 From: Chalapathi V Date: Wed, 26 Jun 2024 04:05:27 -0500 Subject: [PATCH 40/96] hw/ppc: SPI controller wiring to P10 chip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit, create SPI controller on p10 chip and connect cs irq. The QOM tree of pnv-spi and seeprom are. /machine (powernv10-machine) /chip[0] (power10_v2.0-pnv-chip) /pib_spic[2] (pnv-spi) /pnv-spi-bus.2 (SSI) /xscom-spi[0] (memory-region) /machine (powernv10-machine) /peripheral-anon (container) /device[0] (25csm04) /WP#[0] (irq) /ssi-gpio-cs[0] (irq) (qemu) qom-get /machine/peripheral-anon /device[76] "parent_bus" "/machine/chip[0]/pib_spic[2]/pnv-spi-bus.2" Signed-off-by: Chalapathi V Reviewed-by: Glenn Miles Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 21 ++++++++++++++++++++- hw/ssi/pnv_spi.c | 8 ++++++++ include/hw/ppc/pnv_chip.h | 3 +++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index a3560d25b7..3526852685 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1962,6 +1962,11 @@ static void pnv_chip_power10_instance_init(Object *obj) for (i = 0; i < pcc->i2c_num_engines; i++) { object_initialize_child(obj, "i2c[*]", &chip10->i2c[i], TYPE_PNV_I2C); } + + for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC; i++) { + object_initialize_child(obj, "pib_spic[*]", &chip10->pib_spic[i], + TYPE_PNV_SPI); + } } static void pnv_chip_power10_quad_realize(Pnv10Chip *chip10, Error **errp) @@ -2185,7 +2190,21 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_SBE_I2C)); } - + /* PIB SPI Controller */ + for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC; i++) { + object_property_set_int(OBJECT(&chip10->pib_spic[i]), "spic_num", + i, &error_fatal); + /* pib_spic[2] connected to 25csm04 which implements 1 byte transfer */ + object_property_set_int(OBJECT(&chip10->pib_spic[i]), "transfer_len", + (i == 2) ? 1 : 4, &error_fatal); + if (!sysbus_realize(SYS_BUS_DEVICE(OBJECT + (&chip10->pib_spic[i])), errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV10_XSCOM_PIB_SPIC_BASE + + i * PNV10_XSCOM_PIB_SPIC_SIZE, + &chip10->pib_spic[i].xscom_spic_regs); + } } static void pnv_rainier_i2c_init(PnvMachineState *pnv) diff --git a/hw/ssi/pnv_spi.c b/hw/ssi/pnv_spi.c index cdff3f9621..c1297ab733 100644 --- a/hw/ssi/pnv_spi.c +++ b/hw/ssi/pnv_spi.c @@ -1051,9 +1051,17 @@ static void operation_sequencer(PnvSpi *s) static void do_reset(DeviceState *dev) { PnvSpi *s = PNV_SPI(dev); + DeviceState *ssi_dev; trace_pnv_spi_reset(); + /* Connect cs irq */ + ssi_dev = ssi_get_cs(s->ssi_bus, 0); + if (ssi_dev) { + qemu_irq cs_line = qdev_get_gpio_in_named(ssi_dev, SSI_GPIO_CS, 0); + qdev_connect_gpio_out_named(DEVICE(s), "cs", 0, cs_line); + } + /* Reset all N1 and N2 counters, and other constants */ s->N2_bits = 0; s->N2_bytes = 0; diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h index ee1649babc..de34cbdc96 100644 --- a/include/hw/ppc/pnv_chip.h +++ b/include/hw/ppc/pnv_chip.h @@ -7,6 +7,7 @@ #include "hw/ppc/pnv_core.h" #include "hw/ppc/pnv_homer.h" #include "hw/ppc/pnv_n1_chiplet.h" +#include "hw/ssi/pnv_spi.h" #include "hw/ppc/pnv_lpc.h" #include "hw/ppc/pnv_occ.h" #include "hw/ppc/pnv_psi.h" @@ -123,6 +124,8 @@ struct Pnv10Chip { PnvSBE sbe; PnvHomer homer; PnvN1Chiplet n1_chiplet; +#define PNV10_CHIP_MAX_PIB_SPIC 6 + PnvSpi pib_spic[PNV10_CHIP_MAX_PIB_SPIC]; uint32_t nr_quads; PnvQuad *quads; From 533074918727c5fafb11a033fcccaac11ee0227b Mon Sep 17 00:00:00 2001 From: Chalapathi V Date: Wed, 26 Jun 2024 04:05:28 -0500 Subject: [PATCH 41/96] tests/qtest: Add pnv-spi-seeprom qtest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit Write a qtest pnv-spi-seeprom-test to check the SPI transactions between spi controller and seeprom device. Signed-off-by: Chalapathi V Acked-by: Cédric Le Goater Reviewed-by: Caleb Schlossin Signed-off-by: Nicholas Piggin --- tests/qtest/meson.build | 1 + tests/qtest/pnv-spi-seeprom-test.c | 110 +++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 tests/qtest/pnv-spi-seeprom-test.c diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index e7ab2a4312..2f0d3ef080 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -171,6 +171,7 @@ qtests_ppc64 = \ qtests_ppc + \ (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) + \ (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) + \ + (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-spi-seeprom-test'] : []) + \ (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-host-i2c-test'] : []) + \ (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) + \ (slirp.found() ? ['pxe-test'] : []) + \ diff --git a/tests/qtest/pnv-spi-seeprom-test.c b/tests/qtest/pnv-spi-seeprom-test.c new file mode 100644 index 0000000000..57f20af76e --- /dev/null +++ b/tests/qtest/pnv-spi-seeprom-test.c @@ -0,0 +1,110 @@ +/* + * QTest testcase for PowerNV 10 Seeprom Communications + * + * Copyright (c) 2024, IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include +#include "qemu/osdep.h" +#include "libqtest.h" +#include "qemu/bswap.h" +#include "hw/ssi/pnv_spi_regs.h" +#include "pnv-xscom.h" + +#define FLASH_SIZE (512 * 1024) +#define SPIC2_XSCOM_BASE 0xc0040 + +/* To transmit READ opcode and address */ +#define READ_OP_TDR_DATA 0x0300010000000000 +/* + * N1 shift - tx 4 bytes (transmit opcode and address) + * N2 shift - tx and rx 8 bytes. + */ +#define READ_OP_COUNTER_CONFIG 0x2040000000002b00 +/* SEQ_OP_SELECT_RESPONDER - N1 Shift - N2 Shift * 5 - SEQ_OP_STOP */ +#define READ_OP_SEQUENCER 0x1130404040404010 + +/* To transmit WREN(Set Write Enable Latch in status0 register) opcode */ +#define WRITE_OP_WREN 0x0600000000000000 +/* To transmit WRITE opcode, address and data */ +#define WRITE_OP_TDR_DATA 0x0300010012345678 +/* N1 shift - tx 8 bytes (transmit opcode, address and data) */ +#define WRITE_OP_COUNTER_CONFIG 0x4000000000002000 +/* SEQ_OP_SELECT_RESPONDER - N1 Shift - SEQ_OP_STOP */ +#define WRITE_OP_SEQUENCER 0x1130100000000000 + +static void pnv_spi_xscom_write(QTestState *qts, const PnvChip *chip, + uint32_t reg, uint64_t val) +{ + uint32_t pcba = SPIC2_XSCOM_BASE + reg; + qtest_writeq(qts, pnv_xscom_addr(chip, pcba), val); +} + +static uint64_t pnv_spi_xscom_read(QTestState *qts, const PnvChip *chip, + uint32_t reg) +{ + uint32_t pcba = SPIC2_XSCOM_BASE + reg; + return qtest_readq(qts, pnv_xscom_addr(chip, pcba)); +} + +static void spi_seeprom_transaction(QTestState *qts, const PnvChip *chip) +{ + /* SPI transactions to SEEPROM to read from SEEPROM image */ + pnv_spi_xscom_write(qts, chip, SPI_CTR_CFG_REG, READ_OP_COUNTER_CONFIG); + pnv_spi_xscom_write(qts, chip, SPI_SEQ_OP_REG, READ_OP_SEQUENCER); + pnv_spi_xscom_write(qts, chip, SPI_XMIT_DATA_REG, READ_OP_TDR_DATA); + pnv_spi_xscom_write(qts, chip, SPI_XMIT_DATA_REG, 0); + /* Read 5*8 bytes from SEEPROM at 0x100 */ + uint64_t rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG); + g_test_message("RDR READ = 0x%" PRIx64, rdr_val); + rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG); + rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG); + rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG); + rdr_val = pnv_spi_xscom_read(qts, chip, SPI_RCV_DATA_REG); + g_test_message("RDR READ = 0x%" PRIx64, rdr_val); + + /* SPI transactions to SEEPROM to write to SEEPROM image */ + pnv_spi_xscom_write(qts, chip, SPI_CTR_CFG_REG, WRITE_OP_COUNTER_CONFIG); + /* Set Write Enable Latch bit of status0 register */ + pnv_spi_xscom_write(qts, chip, SPI_SEQ_OP_REG, WRITE_OP_SEQUENCER); + pnv_spi_xscom_write(qts, chip, SPI_XMIT_DATA_REG, WRITE_OP_WREN); + /* write 8 bytes to SEEPROM at 0x100 */ + pnv_spi_xscom_write(qts, chip, SPI_SEQ_OP_REG, WRITE_OP_SEQUENCER); + pnv_spi_xscom_write(qts, chip, SPI_XMIT_DATA_REG, WRITE_OP_TDR_DATA); +} + +static void test_spi_seeprom(const void *data) +{ + const PnvChip *chip = data; + QTestState *qts = NULL; + g_autofree char *tmp_path = NULL; + int ret; + int fd; + + /* Create a temporary raw image */ + fd = g_file_open_tmp("qtest-seeprom-XXXXXX", &tmp_path, NULL); + g_assert(fd >= 0); + ret = ftruncate(fd, FLASH_SIZE); + g_assert(ret == 0); + close(fd); + + qts = qtest_initf("-machine powernv10 -smp 2,cores=2," + "threads=1 -accel tcg,thread=single -nographic " + "-blockdev node-name=pib_spic2,driver=file," + "filename=%s -device 25csm04,bus=pnv-spi-bus.2,cs=0," + "drive=pib_spic2", tmp_path); + spi_seeprom_transaction(qts, chip); + qtest_quit(qts); + unlink(tmp_path); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + char *tname = g_strdup_printf("pnv-xscom/spi-seeprom/%s", + pnv_chips[3].cpu_model); + qtest_add_data_func(tname, &pnv_chips[3], test_spi_seeprom); + g_free(tname); + return g_test_run(); +} From 8c01b2e1f7e34e6444abb59a544a52192393e798 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:20 -0500 Subject: [PATCH 42/96] pnv/xive2: XIVE2 Cache Watch, Cache Flush and Sync Injection support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XIVE offers a 'cache watch facility', which allows software to read/update a potentially cached table entry with no software lock. There's one such facility in the Virtualization Controller (VC) to update the ESB and END entries and one in the Presentation Controller (PC) to update the NVP/NVG/NVC entries. Each facility has 4 cache watch engines to control the updates and firmware can request an available engine by querying the hardware 'watch_assign' register of the VC or PC. The engine is then reserved and is released after the data is updated by reading the 'watch_spec' register (which also allows to check for a conflict during the update). If no engine is available, the special value 0xFF is returned and firmware is expected to repeat the request until an engine becomes available. Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 251 +++++++++++++++++++++++++++++++++------ hw/intc/pnv_xive2_regs.h | 90 ++++++++++++++ 2 files changed, 305 insertions(+), 36 deletions(-) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 2fb4fa29d4..af9ab68fc6 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -329,40 +329,48 @@ static int pnv_xive2_write_end(Xive2Router *xrtr, uint8_t blk, uint32_t idx, word_number); } -static int pnv_xive2_end_update(PnvXive2 *xive) +static int pnv_xive2_end_update(PnvXive2 *xive, uint8_t watch_engine) { - uint8_t blk = GETFIELD(VC_ENDC_WATCH_BLOCK_ID, - xive->vc_regs[(VC_ENDC_WATCH0_SPEC >> 3)]); - uint32_t idx = GETFIELD(VC_ENDC_WATCH_INDEX, - xive->vc_regs[(VC_ENDC_WATCH0_SPEC >> 3)]); - int i; + uint8_t blk; + uint32_t idx; + int i, spec_reg, data_reg; uint64_t endc_watch[4]; + assert(watch_engine < ARRAY_SIZE(endc_watch)); + + spec_reg = (VC_ENDC_WATCH0_SPEC + watch_engine * 0x40) >> 3; + data_reg = (VC_ENDC_WATCH0_DATA0 + watch_engine * 0x40) >> 3; + blk = GETFIELD(VC_ENDC_WATCH_BLOCK_ID, xive->vc_regs[spec_reg]); + idx = GETFIELD(VC_ENDC_WATCH_INDEX, xive->vc_regs[spec_reg]); + for (i = 0; i < ARRAY_SIZE(endc_watch); i++) { - endc_watch[i] = - cpu_to_be64(xive->vc_regs[(VC_ENDC_WATCH0_DATA0 >> 3) + i]); + endc_watch[i] = cpu_to_be64(xive->vc_regs[data_reg + i]); } return pnv_xive2_vst_write(xive, VST_END, blk, idx, endc_watch, XIVE_VST_WORD_ALL); } -static void pnv_xive2_end_cache_load(PnvXive2 *xive) +static void pnv_xive2_end_cache_load(PnvXive2 *xive, uint8_t watch_engine) { - uint8_t blk = GETFIELD(VC_ENDC_WATCH_BLOCK_ID, - xive->vc_regs[(VC_ENDC_WATCH0_SPEC >> 3)]); - uint32_t idx = GETFIELD(VC_ENDC_WATCH_INDEX, - xive->vc_regs[(VC_ENDC_WATCH0_SPEC >> 3)]); + uint8_t blk; + uint32_t idx; uint64_t endc_watch[4] = { 0 }; - int i; + int i, spec_reg, data_reg; + + assert(watch_engine < ARRAY_SIZE(endc_watch)); + + spec_reg = (VC_ENDC_WATCH0_SPEC + watch_engine * 0x40) >> 3; + data_reg = (VC_ENDC_WATCH0_DATA0 + watch_engine * 0x40) >> 3; + blk = GETFIELD(VC_ENDC_WATCH_BLOCK_ID, xive->vc_regs[spec_reg]); + idx = GETFIELD(VC_ENDC_WATCH_INDEX, xive->vc_regs[spec_reg]); if (pnv_xive2_vst_read(xive, VST_END, blk, idx, endc_watch)) { xive2_error(xive, "VST: no END entry %x/%x !?", blk, idx); } for (i = 0; i < ARRAY_SIZE(endc_watch); i++) { - xive->vc_regs[(VC_ENDC_WATCH0_DATA0 >> 3) + i] = - be64_to_cpu(endc_watch[i]); + xive->vc_regs[data_reg + i] = be64_to_cpu(endc_watch[i]); } } @@ -379,40 +387,48 @@ static int pnv_xive2_write_nvp(Xive2Router *xrtr, uint8_t blk, uint32_t idx, word_number); } -static int pnv_xive2_nvp_update(PnvXive2 *xive) +static int pnv_xive2_nvp_update(PnvXive2 *xive, uint8_t watch_engine) { - uint8_t blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID, - xive->pc_regs[(PC_NXC_WATCH0_SPEC >> 3)]); - uint32_t idx = GETFIELD(PC_NXC_WATCH_INDEX, - xive->pc_regs[(PC_NXC_WATCH0_SPEC >> 3)]); - int i; + uint8_t blk; + uint32_t idx; + int i, spec_reg, data_reg; uint64_t nxc_watch[4]; + assert(watch_engine < ARRAY_SIZE(nxc_watch)); + + spec_reg = (PC_NXC_WATCH0_SPEC + watch_engine * 0x40) >> 3; + data_reg = (PC_NXC_WATCH0_DATA0 + watch_engine * 0x40) >> 3; + blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID, xive->pc_regs[spec_reg]); + idx = GETFIELD(PC_NXC_WATCH_INDEX, xive->pc_regs[spec_reg]); + for (i = 0; i < ARRAY_SIZE(nxc_watch); i++) { - nxc_watch[i] = - cpu_to_be64(xive->pc_regs[(PC_NXC_WATCH0_DATA0 >> 3) + i]); + nxc_watch[i] = cpu_to_be64(xive->pc_regs[data_reg + i]); } return pnv_xive2_vst_write(xive, VST_NVP, blk, idx, nxc_watch, XIVE_VST_WORD_ALL); } -static void pnv_xive2_nvp_cache_load(PnvXive2 *xive) +static void pnv_xive2_nvp_cache_load(PnvXive2 *xive, uint8_t watch_engine) { - uint8_t blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID, - xive->pc_regs[(PC_NXC_WATCH0_SPEC >> 3)]); - uint32_t idx = GETFIELD(PC_NXC_WATCH_INDEX, - xive->pc_regs[(PC_NXC_WATCH0_SPEC >> 3)]); + uint8_t blk; + uint32_t idx; uint64_t nxc_watch[4] = { 0 }; - int i; + int i, spec_reg, data_reg; + + assert(watch_engine < ARRAY_SIZE(nxc_watch)); + + spec_reg = (PC_NXC_WATCH0_SPEC + watch_engine * 0x40) >> 3; + data_reg = (PC_NXC_WATCH0_DATA0 + watch_engine * 0x40) >> 3; + blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID, xive->pc_regs[spec_reg]); + idx = GETFIELD(PC_NXC_WATCH_INDEX, xive->pc_regs[spec_reg]); if (pnv_xive2_vst_read(xive, VST_NVP, blk, idx, nxc_watch)) { xive2_error(xive, "VST: no NVP entry %x/%x !?", blk, idx); } for (i = 0; i < ARRAY_SIZE(nxc_watch); i++) { - xive->pc_regs[(PC_NXC_WATCH0_DATA0 >> 3) + i] = - be64_to_cpu(nxc_watch[i]); + xive->pc_regs[data_reg + i] = be64_to_cpu(nxc_watch[i]); } } @@ -964,12 +980,70 @@ static const MemoryRegionOps pnv_xive2_ic_cq_ops = { }, }; +static uint8_t pnv_xive2_cache_watch_assign(uint64_t engine_mask, + uint64_t *state) +{ + uint8_t val = 0xFF; + int i; + + for (i = 3; i >= 0; i--) { + if (BIT(i) & engine_mask) { + if (!(BIT(i) & *state)) { + *state |= BIT(i); + val = 3 - i; + break; + } + } + } + return val; +} + +static void pnv_xive2_cache_watch_release(uint64_t *state, uint8_t watch_engine) +{ + uint8_t engine_bit = 3 - watch_engine; + + if (*state & BIT(engine_bit)) { + *state &= ~BIT(engine_bit); + } +} + +static uint8_t pnv_xive2_endc_cache_watch_assign(PnvXive2 *xive) +{ + uint64_t engine_mask = GETFIELD(VC_ENDC_CFG_CACHE_WATCH_ASSIGN, + xive->vc_regs[VC_ENDC_CFG >> 3]); + uint64_t state = xive->vc_regs[VC_ENDC_WATCH_ASSIGN >> 3]; + uint8_t val; + + /* + * We keep track of which engines are currently busy in the + * VC_ENDC_WATCH_ASSIGN register directly. When the firmware reads + * the register, we don't return its value but the ID of an engine + * it can use. + * There are 4 engines. 0xFF means no engine is available. + */ + val = pnv_xive2_cache_watch_assign(engine_mask, &state); + if (val != 0xFF) { + xive->vc_regs[VC_ENDC_WATCH_ASSIGN >> 3] = state; + } + return val; +} + +static void pnv_xive2_endc_cache_watch_release(PnvXive2 *xive, + uint8_t watch_engine) +{ + uint64_t state = xive->vc_regs[VC_ENDC_WATCH_ASSIGN >> 3]; + + pnv_xive2_cache_watch_release(&state, watch_engine); + xive->vc_regs[VC_ENDC_WATCH_ASSIGN >> 3] = state; +} + static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); uint64_t val = 0; uint32_t reg = offset >> 3; + uint8_t watch_engine; switch (offset) { /* @@ -1000,24 +1074,44 @@ static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset, val = xive->vc_regs[reg]; break; + case VC_ENDC_WATCH_ASSIGN: + val = pnv_xive2_endc_cache_watch_assign(xive); + break; + + case VC_ENDC_CFG: + val = xive->vc_regs[reg]; + break; + /* * END cache updates */ case VC_ENDC_WATCH0_SPEC: + case VC_ENDC_WATCH1_SPEC: + case VC_ENDC_WATCH2_SPEC: + case VC_ENDC_WATCH3_SPEC: + watch_engine = (offset - VC_ENDC_WATCH0_SPEC) >> 6; xive->vc_regs[reg] &= ~(VC_ENDC_WATCH_FULL | VC_ENDC_WATCH_CONFLICT); + pnv_xive2_endc_cache_watch_release(xive, watch_engine); val = xive->vc_regs[reg]; break; case VC_ENDC_WATCH0_DATA0: + case VC_ENDC_WATCH1_DATA0: + case VC_ENDC_WATCH2_DATA0: + case VC_ENDC_WATCH3_DATA0: /* * Load DATA registers from cache with data requested by the * SPEC register */ - pnv_xive2_end_cache_load(xive); + watch_engine = (offset - VC_ENDC_WATCH0_DATA0) >> 6; + pnv_xive2_end_cache_load(xive, watch_engine); val = xive->vc_regs[reg]; break; case VC_ENDC_WATCH0_DATA1 ... VC_ENDC_WATCH0_DATA3: + case VC_ENDC_WATCH1_DATA1 ... VC_ENDC_WATCH1_DATA3: + case VC_ENDC_WATCH2_DATA1 ... VC_ENDC_WATCH2_DATA3: + case VC_ENDC_WATCH3_DATA1 ... VC_ENDC_WATCH3_DATA3: val = xive->vc_regs[reg]; break; @@ -1063,6 +1157,7 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, { PnvXive2 *xive = PNV_XIVE2(opaque); uint32_t reg = offset >> 3; + uint8_t watch_engine; switch (offset) { /* @@ -1095,19 +1190,32 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, /* EAS update */ break; + case VC_ENDC_CFG: + break; + /* * END cache updates */ case VC_ENDC_WATCH0_SPEC: + case VC_ENDC_WATCH1_SPEC: + case VC_ENDC_WATCH2_SPEC: + case VC_ENDC_WATCH3_SPEC: val &= ~VC_ENDC_WATCH_CONFLICT; /* HW will set this bit */ break; case VC_ENDC_WATCH0_DATA1 ... VC_ENDC_WATCH0_DATA3: + case VC_ENDC_WATCH1_DATA1 ... VC_ENDC_WATCH1_DATA3: + case VC_ENDC_WATCH2_DATA1 ... VC_ENDC_WATCH2_DATA3: + case VC_ENDC_WATCH3_DATA1 ... VC_ENDC_WATCH3_DATA3: break; case VC_ENDC_WATCH0_DATA0: + case VC_ENDC_WATCH1_DATA0: + case VC_ENDC_WATCH2_DATA0: + case VC_ENDC_WATCH3_DATA0: /* writing to DATA0 triggers the cache write */ + watch_engine = (offset - VC_ENDC_WATCH0_DATA0) >> 6; xive->vc_regs[reg] = val; - pnv_xive2_end_update(xive); + pnv_xive2_end_update(xive, watch_engine); break; @@ -1157,12 +1265,43 @@ static const MemoryRegionOps pnv_xive2_ic_vc_ops = { }, }; +static uint8_t pnv_xive2_nxc_cache_watch_assign(PnvXive2 *xive) +{ + uint64_t engine_mask = GETFIELD(PC_NXC_PROC_CONFIG_WATCH_ASSIGN, + xive->pc_regs[PC_NXC_PROC_CONFIG >> 3]); + uint64_t state = xive->pc_regs[PC_NXC_WATCH_ASSIGN >> 3]; + uint8_t val; + + /* + * We keep track of which engines are currently busy in the + * PC_NXC_WATCH_ASSIGN register directly. When the firmware reads + * the register, we don't return its value but the ID of an engine + * it can use. + * There are 4 engines. 0xFF means no engine is available. + */ + val = pnv_xive2_cache_watch_assign(engine_mask, &state); + if (val != 0xFF) { + xive->pc_regs[PC_NXC_WATCH_ASSIGN >> 3] = state; + } + return val; +} + +static void pnv_xive2_nxc_cache_watch_release(PnvXive2 *xive, + uint8_t watch_engine) +{ + uint64_t state = xive->pc_regs[PC_NXC_WATCH_ASSIGN >> 3]; + + pnv_xive2_cache_watch_release(&state, watch_engine); + xive->pc_regs[PC_NXC_WATCH_ASSIGN >> 3] = state; +} + static uint64_t pnv_xive2_ic_pc_read(void *opaque, hwaddr offset, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); uint64_t val = -1; uint32_t reg = offset >> 3; + uint8_t watch_engine; switch (offset) { /* @@ -1173,24 +1312,44 @@ static uint64_t pnv_xive2_ic_pc_read(void *opaque, hwaddr offset, val = xive->pc_regs[reg]; break; + case PC_NXC_WATCH_ASSIGN: + val = pnv_xive2_nxc_cache_watch_assign(xive); + break; + + case PC_NXC_PROC_CONFIG: + val = xive->pc_regs[reg]; + break; + /* * cache updates */ case PC_NXC_WATCH0_SPEC: + case PC_NXC_WATCH1_SPEC: + case PC_NXC_WATCH2_SPEC: + case PC_NXC_WATCH3_SPEC: + watch_engine = (offset - PC_NXC_WATCH0_SPEC) >> 6; xive->pc_regs[reg] &= ~(PC_NXC_WATCH_FULL | PC_NXC_WATCH_CONFLICT); + pnv_xive2_nxc_cache_watch_release(xive, watch_engine); val = xive->pc_regs[reg]; break; case PC_NXC_WATCH0_DATA0: + case PC_NXC_WATCH1_DATA0: + case PC_NXC_WATCH2_DATA0: + case PC_NXC_WATCH3_DATA0: /* * Load DATA registers from cache with data requested by the * SPEC register */ - pnv_xive2_nvp_cache_load(xive); + watch_engine = (offset - PC_NXC_WATCH0_DATA0) >> 6; + pnv_xive2_nvp_cache_load(xive, watch_engine); val = xive->pc_regs[reg]; break; case PC_NXC_WATCH0_DATA1 ... PC_NXC_WATCH0_DATA3: + case PC_NXC_WATCH1_DATA1 ... PC_NXC_WATCH1_DATA3: + case PC_NXC_WATCH2_DATA1 ... PC_NXC_WATCH2_DATA3: + case PC_NXC_WATCH3_DATA1 ... PC_NXC_WATCH3_DATA3: val = xive->pc_regs[reg]; break; @@ -1219,6 +1378,7 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, { PnvXive2 *xive = PNV_XIVE2(opaque); uint32_t reg = offset >> 3; + uint8_t watch_engine; switch (offset) { @@ -1231,19 +1391,32 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, case PC_VSD_TABLE_DATA: break; + case PC_NXC_PROC_CONFIG: + break; + /* * cache updates */ case PC_NXC_WATCH0_SPEC: + case PC_NXC_WATCH1_SPEC: + case PC_NXC_WATCH2_SPEC: + case PC_NXC_WATCH3_SPEC: val &= ~PC_NXC_WATCH_CONFLICT; /* HW will set this bit */ break; case PC_NXC_WATCH0_DATA1 ... PC_NXC_WATCH0_DATA3: + case PC_NXC_WATCH1_DATA1 ... PC_NXC_WATCH1_DATA3: + case PC_NXC_WATCH2_DATA1 ... PC_NXC_WATCH2_DATA3: + case PC_NXC_WATCH3_DATA1 ... PC_NXC_WATCH3_DATA3: break; case PC_NXC_WATCH0_DATA0: + case PC_NXC_WATCH1_DATA0: + case PC_NXC_WATCH2_DATA0: + case PC_NXC_WATCH3_DATA0: /* writing to DATA0 triggers the cache write */ + watch_engine = (offset - PC_NXC_WATCH0_DATA0) >> 6; xive->pc_regs[reg] = val; - pnv_xive2_nvp_update(xive); + pnv_xive2_nvp_update(xive, watch_engine); break; /* case PC_NXC_FLUSH_CTRL: */ @@ -1814,6 +1987,12 @@ static void pnv_xive2_reset(void *dev) xive->cq_regs[CQ_XIVE_CFG >> 3] |= SETFIELD(CQ_XIVE_CFG_HYP_HARD_BLOCK_ID, 0ull, xive->chip->chip_id); + /* VC and PC cache watch assign mechanism */ + xive->vc_regs[VC_ENDC_CFG >> 3] = + SETFIELD(VC_ENDC_CFG_CACHE_WATCH_ASSIGN, 0ull, 0b0111); + xive->pc_regs[PC_NXC_PROC_CONFIG >> 3] = + SETFIELD(PC_NXC_PROC_CONFIG_WATCH_ASSIGN, 0ull, 0b0111); + /* Set default page size to 64k */ xive->ic_shift = xive->esb_shift = xive->end_shift = 16; xive->nvc_shift = xive->nvpg_shift = xive->tm_shift = 16; diff --git a/hw/intc/pnv_xive2_regs.h b/hw/intc/pnv_xive2_regs.h index 7165dc8704..f8e4a677c6 100644 --- a/hw/intc/pnv_xive2_regs.h +++ b/hw/intc/pnv_xive2_regs.h @@ -283,6 +283,15 @@ #define VC_ENDC_SYNC_QUEUE_HARD PPC_BIT(6) #define VC_QUEUE_COUNT 7 +/* ENDC cache watch assign */ +#define X_VC_ENDC_WATCH_ASSIGN 0x186 +#define VC_ENDC_WATCH_ASSIGN 0x430 + +/* ENDC configuration register */ +#define X_VC_ENDC_CFG 0x188 +#define VC_ENDC_CFG 0x440 +#define VC_ENDC_CFG_CACHE_WATCH_ASSIGN PPC_BITMASK(32, 35) + /* ENDC cache watch specification 0 */ #define X_VC_ENDC_WATCH0_SPEC 0x1A0 #define VC_ENDC_WATCH0_SPEC 0x500 @@ -302,6 +311,42 @@ #define VC_ENDC_WATCH0_DATA2 0x530 #define VC_ENDC_WATCH0_DATA3 0x538 +/* ENDC cache watch 1 */ +#define X_VC_ENDC_WATCH1_SPEC 0x1A8 +#define VC_ENDC_WATCH1_SPEC 0x540 +#define X_VC_ENDC_WATCH1_DATA0 0x1AC +#define X_VC_ENDC_WATCH1_DATA1 0x1AD +#define X_VC_ENDC_WATCH1_DATA2 0x1AE +#define X_VC_ENDC_WATCH1_DATA3 0x1AF +#define VC_ENDC_WATCH1_DATA0 0x560 +#define VC_ENDC_WATCH1_DATA1 0x568 +#define VC_ENDC_WATCH1_DATA2 0x570 +#define VC_ENDC_WATCH1_DATA3 0x578 + +/* ENDC cache watch 2 */ +#define X_VC_ENDC_WATCH2_SPEC 0x1B0 +#define VC_ENDC_WATCH2_SPEC 0x580 +#define X_VC_ENDC_WATCH2_DATA0 0x1B4 +#define X_VC_ENDC_WATCH2_DATA1 0x1B5 +#define X_VC_ENDC_WATCH2_DATA2 0x1B6 +#define X_VC_ENDC_WATCH2_DATA3 0x1B7 +#define VC_ENDC_WATCH2_DATA0 0x5A0 +#define VC_ENDC_WATCH2_DATA1 0x5A8 +#define VC_ENDC_WATCH2_DATA2 0x5B0 +#define VC_ENDC_WATCH2_DATA3 0x5B8 + +/* ENDC cache watch 3 */ +#define X_VC_ENDC_WATCH3_SPEC 0x1B8 +#define VC_ENDC_WATCH3_SPEC 0x5C0 +#define X_VC_ENDC_WATCH3_DATA0 0x1BC +#define X_VC_ENDC_WATCH3_DATA1 0x1BD +#define X_VC_ENDC_WATCH3_DATA2 0x1BE +#define X_VC_ENDC_WATCH3_DATA3 0x1BF +#define VC_ENDC_WATCH3_DATA0 0x5E0 +#define VC_ENDC_WATCH3_DATA1 0x5E8 +#define VC_ENDC_WATCH3_DATA2 0x5F0 +#define VC_ENDC_WATCH3_DATA3 0x5F8 + /* * PC LSB1 */ @@ -358,6 +403,15 @@ #define PC_NXC_FLUSH_POLL_BLOCK_ID_MASK PPC_BITMASK(36, 39) #define PC_NXC_FLUSH_POLL_OFFSET_MASK PPC_BITMASK(40, 63) /* 24-bit */ +/* NxC Cache watch assign */ +#define X_PC_NXC_WATCH_ASSIGN 0x286 +#define PC_NXC_WATCH_ASSIGN 0x430 + +/* NxC Proc config */ +#define X_PC_NXC_PROC_CONFIG 0x28A +#define PC_NXC_PROC_CONFIG 0x450 +#define PC_NXC_PROC_CONFIG_WATCH_ASSIGN PPC_BITMASK(0, 3) + /* NxC Cache Watch 0 Specification */ #define X_PC_NXC_WATCH0_SPEC 0x2A0 #define PC_NXC_WATCH0_SPEC 0x500 @@ -381,6 +435,42 @@ #define PC_NXC_WATCH0_DATA2 0x530 #define PC_NXC_WATCH0_DATA3 0x538 +/* NxC Cache Watch 1 */ +#define X_PC_NXC_WATCH1_SPEC 0x2A8 +#define PC_NXC_WATCH1_SPEC 0x540 +#define X_PC_NXC_WATCH1_DATA0 0x2AC +#define X_PC_NXC_WATCH1_DATA1 0x2AD +#define X_PC_NXC_WATCH1_DATA2 0x2AE +#define X_PC_NXC_WATCH1_DATA3 0x2AF +#define PC_NXC_WATCH1_DATA0 0x560 +#define PC_NXC_WATCH1_DATA1 0x568 +#define PC_NXC_WATCH1_DATA2 0x570 +#define PC_NXC_WATCH1_DATA3 0x578 + +/* NxC Cache Watch 2 */ +#define X_PC_NXC_WATCH2_SPEC 0x2B0 +#define PC_NXC_WATCH2_SPEC 0x580 +#define X_PC_NXC_WATCH2_DATA0 0x2B4 +#define X_PC_NXC_WATCH2_DATA1 0x2B5 +#define X_PC_NXC_WATCH2_DATA2 0x2B6 +#define X_PC_NXC_WATCH2_DATA3 0x2B7 +#define PC_NXC_WATCH2_DATA0 0x5A0 +#define PC_NXC_WATCH2_DATA1 0x5A8 +#define PC_NXC_WATCH2_DATA2 0x5B0 +#define PC_NXC_WATCH2_DATA3 0x5B8 + +/* NxC Cache Watch 3 */ +#define X_PC_NXC_WATCH3_SPEC 0x2B8 +#define PC_NXC_WATCH3_SPEC 0x5C0 +#define X_PC_NXC_WATCH3_DATA0 0x2BC +#define X_PC_NXC_WATCH3_DATA1 0x2BD +#define X_PC_NXC_WATCH3_DATA2 0x2BE +#define X_PC_NXC_WATCH3_DATA3 0x2BF +#define PC_NXC_WATCH3_DATA0 0x5E0 +#define PC_NXC_WATCH3_DATA1 0x5E8 +#define PC_NXC_WATCH3_DATA2 0x5F0 +#define PC_NXC_WATCH3_DATA3 0x5F8 + /* * TCTXT Registers */ From 64770efd668e61128f30d6d50861c7a85ba12ec5 Mon Sep 17 00:00:00 2001 From: Michael Kowal Date: Wed, 24 Jul 2024 16:21:21 -0500 Subject: [PATCH 43/96] pnv/xive2: Structure/define alignment changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Made changes to some structure and define elements to ease review in next patchset. Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index af9ab68fc6..08b6da78fb 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -45,16 +45,16 @@ typedef struct XiveVstInfo { static const XiveVstInfo vst_infos[] = { - [VST_EAS] = { "EAT", sizeof(Xive2Eas), 16 }, - [VST_ESB] = { "ESB", 1, 16 }, - [VST_END] = { "ENDT", sizeof(Xive2End), 16 }, + [VST_EAS] = { "EAT", sizeof(Xive2Eas), 16 }, + [VST_ESB] = { "ESB", 1, 16 }, + [VST_END] = { "ENDT", sizeof(Xive2End), 16 }, - [VST_NVP] = { "NVPT", sizeof(Xive2Nvp), 16 }, - [VST_NVG] = { "NVGT", sizeof(Xive2Nvgc), 16 }, - [VST_NVC] = { "NVCT", sizeof(Xive2Nvgc), 16 }, + [VST_NVP] = { "NVPT", sizeof(Xive2Nvp), 16 }, + [VST_NVG] = { "NVGT", sizeof(Xive2Nvgc), 16 }, + [VST_NVC] = { "NVCT", sizeof(Xive2Nvgc), 16 }, - [VST_IC] = { "IC", 1 /* ? */ , 16 }, /* Topology # */ - [VST_SYNC] = { "SYNC", 1 /* ? */ , 16 }, /* Topology # */ + [VST_IC] = { "IC", 1, /* ? */ 16 }, /* Topology # */ + [VST_SYNC] = { "SYNC", 1, /* ? */ 16 }, /* Topology # */ /* * This table contains the backing store pages for the interrupt @@ -1720,13 +1720,13 @@ static const MemoryRegionOps pnv_xive2_ic_lsi_ops = { /* * Sync MMIO page (write only) */ -#define PNV_XIVE2_SYNC_IPI 0x000 -#define PNV_XIVE2_SYNC_HW 0x080 -#define PNV_XIVE2_SYNC_NxC 0x100 -#define PNV_XIVE2_SYNC_INT 0x180 -#define PNV_XIVE2_SYNC_OS_ESC 0x200 -#define PNV_XIVE2_SYNC_POOL_ESC 0x280 -#define PNV_XIVE2_SYNC_HARD_ESC 0x300 +#define PNV_XIVE2_SYNC_IPI 0x000 +#define PNV_XIVE2_SYNC_HW 0x080 +#define PNV_XIVE2_SYNC_NxC 0x100 +#define PNV_XIVE2_SYNC_INT 0x180 +#define PNV_XIVE2_SYNC_OS_ESC 0x200 +#define PNV_XIVE2_SYNC_POOL_ESC 0x280 +#define PNV_XIVE2_SYNC_HARD_ESC 0x300 static uint64_t pnv_xive2_ic_sync_read(void *opaque, hwaddr offset, unsigned size) From 76125c0132f27f0b4ba1b71d19027aba1fe62fd9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 24 Jul 2024 16:21:22 -0500 Subject: [PATCH 44/96] pnv/xive: Support cache flush and queue sync inject with notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds support for writing a completion notification byte in memory whenever a cache flush or queue sync inject operation is requested by software. QEMU does not cache any of the XIVE data that is in memory and therefore it simply writes the completion notification byte at the time that the operation is requested. Co-authored-by: Glenn Miles Signed-off-by: Glenn Miles Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 154 +++++++++++++++++++++++++++++++++++++- hw/intc/pnv_xive2_regs.h | 16 ++++ include/hw/ppc/pnv_chip.h | 1 + 3 files changed, 169 insertions(+), 2 deletions(-) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 08b6da78fb..3dbbfddacb 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -25,6 +25,7 @@ #include "hw/ppc/ppc.h" #include "hw/qdev-properties.h" #include "sysemu/reset.h" +#include "sysemu/qtest.h" #include @@ -32,6 +33,16 @@ #undef XIVE2_DEBUG +/* XIVE Sync or Flush Notification Block */ +typedef struct XiveSfnBlock { + uint8_t bytes[32]; +} XiveSfnBlock; + +/* XIVE Thread Sync or Flush Notification Area */ +typedef struct XiveThreadNA { + XiveSfnBlock topo[16]; +} XiveThreadNA; + /* * Virtual structures table (VST) */ @@ -54,7 +65,7 @@ static const XiveVstInfo vst_infos[] = { [VST_NVC] = { "NVCT", sizeof(Xive2Nvgc), 16 }, [VST_IC] = { "IC", 1, /* ? */ 16 }, /* Topology # */ - [VST_SYNC] = { "SYNC", 1, /* ? */ 16 }, /* Topology # */ + [VST_SYNC] = { "SYNC", sizeof(XiveThreadNA), 16 }, /* Topology # */ /* * This table contains the backing store pages for the interrupt @@ -329,6 +340,73 @@ static int pnv_xive2_write_end(Xive2Router *xrtr, uint8_t blk, uint32_t idx, word_number); } +static inline int pnv_xive2_get_current_pir(PnvXive2 *xive) +{ + if (!qtest_enabled()) { + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); + return ppc_cpu_pir(cpu); + } + return 0; +} + +/* + * After SW injects a Queue Sync or Cache Flush operation, HW will notify + * SW of the completion of the operation by writing a byte of all 1's (0xff) + * to a specific memory location. The memory location is calculated by first + * looking up a base address in the SYNC VSD using the Topology ID of the + * originating thread as the "block" number. This points to a + * 64k block of memory that is further divided into 128 512 byte chunks of + * memory, which is indexed by the thread id of the requesting thread. + * Finally, this 512 byte chunk of memory is divided into 16 32 byte + * chunks which are indexed by the topology id of the targeted IC's chip. + * The values below are the offsets into that 32 byte chunk of memory for + * each type of cache flush or queue sync operation. + */ +#define PNV_XIVE2_QUEUE_IPI 0x00 +#define PNV_XIVE2_QUEUE_HW 0x01 +#define PNV_XIVE2_QUEUE_NXC 0x02 +#define PNV_XIVE2_QUEUE_INT 0x03 +#define PNV_XIVE2_QUEUE_OS 0x04 +#define PNV_XIVE2_QUEUE_POOL 0x05 +#define PNV_XIVE2_QUEUE_HARD 0x06 +#define PNV_XIVE2_CACHE_ENDC 0x08 +#define PNV_XIVE2_CACHE_ESBC 0x09 +#define PNV_XIVE2_CACHE_EASC 0x0a +#define PNV_XIVE2_QUEUE_NXC_LD_LCL_NCO 0x10 +#define PNV_XIVE2_QUEUE_NXC_LD_LCL_CO 0x11 +#define PNV_XIVE2_QUEUE_NXC_ST_LCL_NCI 0x12 +#define PNV_XIVE2_QUEUE_NXC_ST_LCL_CI 0x13 +#define PNV_XIVE2_QUEUE_NXC_ST_RMT_NCI 0x14 +#define PNV_XIVE2_QUEUE_NXC_ST_RMT_CI 0x15 +#define PNV_XIVE2_CACHE_NXC 0x18 + +static int pnv_xive2_inject_notify(PnvXive2 *xive, int type) +{ + uint64_t addr; + int pir = pnv_xive2_get_current_pir(xive); + int thread_nr = PNV10_PIR2THREAD(pir); + int thread_topo_id = PNV10_PIR2CHIP(pir); + int ic_topo_id = xive->chip->chip_id; + uint64_t offset = ic_topo_id * sizeof(XiveSfnBlock); + uint8_t byte = 0xff; + MemTxResult result; + + /* Retrieve the address of requesting thread's notification area */ + addr = pnv_xive2_vst_addr(xive, VST_SYNC, thread_topo_id, thread_nr); + + if (!addr) { + xive2_error(xive, "VST: no SYNC entry %x/%x !?", + thread_topo_id, thread_nr); + return -1; + } + + address_space_stb(&address_space_memory, addr + offset + type, byte, + MEMTXATTRS_UNSPECIFIED, &result); + assert(result == MEMTX_OK); + + return 0; +} + static int pnv_xive2_end_update(PnvXive2 *xive, uint8_t watch_engine) { uint8_t blk; @@ -1178,6 +1256,10 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, /* ESB update */ break; + case VC_ESBC_FLUSH_INJECT: + pnv_xive2_inject_notify(xive, PNV_XIVE2_CACHE_ESBC); + break; + case VC_ESBC_CFG: break; @@ -1190,6 +1272,10 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, /* EAS update */ break; + case VC_EASC_FLUSH_INJECT: + pnv_xive2_inject_notify(xive, PNV_XIVE2_CACHE_EASC); + break; + case VC_ENDC_CFG: break; @@ -1224,6 +1310,10 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, xive->vc_regs[VC_ENDC_FLUSH_CTRL >> 3] |= VC_ENDC_FLUSH_CTRL_POLL_VALID; break; + case VC_ENDC_FLUSH_INJECT: + pnv_xive2_inject_notify(xive, PNV_XIVE2_CACHE_ENDC); + break; + /* * Indirect invalidation */ @@ -1424,6 +1514,10 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, xive->pc_regs[PC_NXC_FLUSH_CTRL >> 3] |= PC_NXC_FLUSH_CTRL_POLL_VALID; break; + case PC_NXC_FLUSH_INJECT: + pnv_xive2_inject_notify(xive, PNV_XIVE2_CACHE_NXC); + break; + /* * Indirect invalidation */ @@ -1727,6 +1821,12 @@ static const MemoryRegionOps pnv_xive2_ic_lsi_ops = { #define PNV_XIVE2_SYNC_OS_ESC 0x200 #define PNV_XIVE2_SYNC_POOL_ESC 0x280 #define PNV_XIVE2_SYNC_HARD_ESC 0x300 +#define PNV_XIVE2_SYNC_NXC_LD_LCL_NCO 0x800 +#define PNV_XIVE2_SYNC_NXC_LD_LCL_CO 0x880 +#define PNV_XIVE2_SYNC_NXC_ST_LCL_NCI 0x900 +#define PNV_XIVE2_SYNC_NXC_ST_LCL_CI 0x980 +#define PNV_XIVE2_SYNC_NXC_ST_RMT_NCI 0xA00 +#define PNV_XIVE2_SYNC_NXC_ST_RMT_CI 0xA80 static uint64_t pnv_xive2_ic_sync_read(void *opaque, hwaddr offset, unsigned size) @@ -1738,22 +1838,72 @@ static uint64_t pnv_xive2_ic_sync_read(void *opaque, hwaddr offset, return -1; } +/* + * The sync MMIO space spans two pages. The lower page is use for + * queue sync "poll" requests while the upper page is used for queue + * sync "inject" requests. Inject requests require the HW to write + * a byte of all 1's to a predetermined location in memory in order + * to signal completion of the request. Both pages have the same + * layout, so it is easiest to handle both with a single function. + */ static void pnv_xive2_ic_sync_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); + int inject_type; + hwaddr pg_offset_mask = (1ull << xive->ic_shift) - 1; - switch (offset) { + /* adjust offset for inject page */ + hwaddr adj_offset = offset & pg_offset_mask; + + switch (adj_offset) { case PNV_XIVE2_SYNC_IPI: + inject_type = PNV_XIVE2_QUEUE_IPI; + break; case PNV_XIVE2_SYNC_HW: + inject_type = PNV_XIVE2_QUEUE_HW; + break; case PNV_XIVE2_SYNC_NxC: + inject_type = PNV_XIVE2_QUEUE_NXC; + break; case PNV_XIVE2_SYNC_INT: + inject_type = PNV_XIVE2_QUEUE_INT; + break; case PNV_XIVE2_SYNC_OS_ESC: + inject_type = PNV_XIVE2_QUEUE_OS; + break; case PNV_XIVE2_SYNC_POOL_ESC: + inject_type = PNV_XIVE2_QUEUE_POOL; + break; case PNV_XIVE2_SYNC_HARD_ESC: + inject_type = PNV_XIVE2_QUEUE_HARD; + break; + case PNV_XIVE2_SYNC_NXC_LD_LCL_NCO: + inject_type = PNV_XIVE2_QUEUE_NXC_LD_LCL_NCO; + break; + case PNV_XIVE2_SYNC_NXC_LD_LCL_CO: + inject_type = PNV_XIVE2_QUEUE_NXC_LD_LCL_CO; + break; + case PNV_XIVE2_SYNC_NXC_ST_LCL_NCI: + inject_type = PNV_XIVE2_QUEUE_NXC_ST_LCL_NCI; + break; + case PNV_XIVE2_SYNC_NXC_ST_LCL_CI: + inject_type = PNV_XIVE2_QUEUE_NXC_ST_LCL_CI; + break; + case PNV_XIVE2_SYNC_NXC_ST_RMT_NCI: + inject_type = PNV_XIVE2_QUEUE_NXC_ST_RMT_NCI; + break; + case PNV_XIVE2_SYNC_NXC_ST_RMT_CI: + inject_type = PNV_XIVE2_QUEUE_NXC_ST_RMT_CI; break; default: xive2_error(xive, "SYNC: invalid write @%"HWADDR_PRIx, offset); + return; + } + + /* Write Queue Sync notification byte if writing to sync inject page */ + if ((offset & ~pg_offset_mask) != 0) { + pnv_xive2_inject_notify(xive, inject_type); } } diff --git a/hw/intc/pnv_xive2_regs.h b/hw/intc/pnv_xive2_regs.h index f8e4a677c6..ca05255d20 100644 --- a/hw/intc/pnv_xive2_regs.h +++ b/hw/intc/pnv_xive2_regs.h @@ -232,6 +232,10 @@ #define VC_ESBC_FLUSH_POLL_BLOCK_ID_MASK PPC_BITMASK(32, 35) #define VC_ESBC_FLUSH_POLL_OFFSET_MASK PPC_BITMASK(36, 63) /* 28-bit */ +/* ESBC cache flush inject register */ +#define X_VC_ESBC_FLUSH_INJECT 0x142 +#define VC_ESBC_FLUSH_INJECT 0x210 + /* ESBC configuration */ #define X_VC_ESBC_CFG 0x148 #define VC_ESBC_CFG 0x240 @@ -250,6 +254,10 @@ #define VC_EASC_FLUSH_POLL_BLOCK_ID_MASK PPC_BITMASK(32, 35) #define VC_EASC_FLUSH_POLL_OFFSET_MASK PPC_BITMASK(36, 63) /* 28-bit */ +/* EASC flush inject register */ +#define X_VC_EASC_FLUSH_INJECT 0x162 +#define VC_EASC_FLUSH_INJECT 0x310 + /* * VC2 */ @@ -270,6 +278,10 @@ #define VC_ENDC_FLUSH_POLL_BLOCK_ID_MASK PPC_BITMASK(36, 39) #define VC_ENDC_FLUSH_POLL_OFFSET_MASK PPC_BITMASK(40, 63) /* 24-bit */ +/* ENDC flush inject register */ +#define X_VC_ENDC_FLUSH_INJECT 0x182 +#define VC_ENDC_FLUSH_INJECT 0x410 + /* ENDC Sync done */ #define X_VC_ENDC_SYNC_DONE 0x184 #define VC_ENDC_SYNC_DONE 0x420 @@ -403,6 +415,10 @@ #define PC_NXC_FLUSH_POLL_BLOCK_ID_MASK PPC_BITMASK(36, 39) #define PC_NXC_FLUSH_POLL_OFFSET_MASK PPC_BITMASK(40, 63) /* 24-bit */ +/* NxC Cache flush inject */ +#define X_PC_NXC_FLUSH_INJECT 0x282 +#define PC_NXC_FLUSH_INJECT 0x410 + /* NxC Cache watch assign */ #define X_PC_NXC_WATCH_ASSIGN 0x286 #define PC_NXC_WATCH_ASSIGN 0x430 diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h index de34cbdc96..24ce37a9c8 100644 --- a/include/hw/ppc/pnv_chip.h +++ b/include/hw/ppc/pnv_chip.h @@ -139,6 +139,7 @@ struct Pnv10Chip { #define PNV10_PIR2FUSEDCORE(pir) (((pir) >> 3) & 0xf) #define PNV10_PIR2CHIP(pir) (((pir) >> 8) & 0x7f) +#define PNV10_PIR2THREAD(pir) (((pir) & 0x7f)) struct PnvChipClass { /*< private >*/ From d6d5f5c0347b124319ff9c0a43358bdae1d7ea26 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:23 -0500 Subject: [PATCH 45/96] pnv/xive2: Add NVG and NVC to cache watch facility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cache watch facility uses the same register interface to handle entries in the NVP, NVG and NVC tables. A bit-field in the 'watchX specification' register tells the table type. So far, that bit-field was not read and the code assumed a read/write to the NVP table. This patch allows to read/write entries in the NVG and NVC table as well. Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 49 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 3dbbfddacb..561e61682e 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -465,10 +465,30 @@ static int pnv_xive2_write_nvp(Xive2Router *xrtr, uint8_t blk, uint32_t idx, word_number); } -static int pnv_xive2_nvp_update(PnvXive2 *xive, uint8_t watch_engine) +static int pnv_xive2_nxc_to_table_type(uint8_t nxc_type, uint32_t *table_type) { - uint8_t blk; - uint32_t idx; + switch (nxc_type) { + case PC_NXC_WATCH_NXC_NVP: + *table_type = VST_NVP; + break; + case PC_NXC_WATCH_NXC_NVG: + *table_type = VST_NVG; + break; + case PC_NXC_WATCH_NXC_NVC: + *table_type = VST_NVC; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "XIVE: invalid table type for nxc operation\n"); + return -1; + } + return 0; +} + +static int pnv_xive2_nxc_update(PnvXive2 *xive, uint8_t watch_engine) +{ + uint8_t blk, nxc_type; + uint32_t idx, table_type = -1; int i, spec_reg, data_reg; uint64_t nxc_watch[4]; @@ -476,21 +496,24 @@ static int pnv_xive2_nvp_update(PnvXive2 *xive, uint8_t watch_engine) spec_reg = (PC_NXC_WATCH0_SPEC + watch_engine * 0x40) >> 3; data_reg = (PC_NXC_WATCH0_DATA0 + watch_engine * 0x40) >> 3; + nxc_type = GETFIELD(PC_NXC_WATCH_NXC_TYPE, xive->pc_regs[spec_reg]); blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID, xive->pc_regs[spec_reg]); idx = GETFIELD(PC_NXC_WATCH_INDEX, xive->pc_regs[spec_reg]); + assert(!pnv_xive2_nxc_to_table_type(nxc_type, &table_type)); + for (i = 0; i < ARRAY_SIZE(nxc_watch); i++) { nxc_watch[i] = cpu_to_be64(xive->pc_regs[data_reg + i]); } - return pnv_xive2_vst_write(xive, VST_NVP, blk, idx, nxc_watch, + return pnv_xive2_vst_write(xive, table_type, blk, idx, nxc_watch, XIVE_VST_WORD_ALL); } -static void pnv_xive2_nvp_cache_load(PnvXive2 *xive, uint8_t watch_engine) +static void pnv_xive2_nxc_cache_load(PnvXive2 *xive, uint8_t watch_engine) { - uint8_t blk; - uint32_t idx; + uint8_t blk, nxc_type; + uint32_t idx, table_type = -1; uint64_t nxc_watch[4] = { 0 }; int i, spec_reg, data_reg; @@ -498,11 +521,15 @@ static void pnv_xive2_nvp_cache_load(PnvXive2 *xive, uint8_t watch_engine) spec_reg = (PC_NXC_WATCH0_SPEC + watch_engine * 0x40) >> 3; data_reg = (PC_NXC_WATCH0_DATA0 + watch_engine * 0x40) >> 3; + nxc_type = GETFIELD(PC_NXC_WATCH_NXC_TYPE, xive->pc_regs[spec_reg]); blk = GETFIELD(PC_NXC_WATCH_BLOCK_ID, xive->pc_regs[spec_reg]); idx = GETFIELD(PC_NXC_WATCH_INDEX, xive->pc_regs[spec_reg]); - if (pnv_xive2_vst_read(xive, VST_NVP, blk, idx, nxc_watch)) { - xive2_error(xive, "VST: no NVP entry %x/%x !?", blk, idx); + assert(!pnv_xive2_nxc_to_table_type(nxc_type, &table_type)); + + if (pnv_xive2_vst_read(xive, table_type, blk, idx, nxc_watch)) { + xive2_error(xive, "VST: no NXC entry %x/%x in %s table!?", + blk, idx, vst_infos[table_type].name); } for (i = 0; i < ARRAY_SIZE(nxc_watch); i++) { @@ -1432,7 +1459,7 @@ static uint64_t pnv_xive2_ic_pc_read(void *opaque, hwaddr offset, * SPEC register */ watch_engine = (offset - PC_NXC_WATCH0_DATA0) >> 6; - pnv_xive2_nvp_cache_load(xive, watch_engine); + pnv_xive2_nxc_cache_load(xive, watch_engine); val = xive->pc_regs[reg]; break; @@ -1506,7 +1533,7 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, /* writing to DATA0 triggers the cache write */ watch_engine = (offset - PC_NXC_WATCH0_DATA0) >> 6; xive->pc_regs[reg] = val; - pnv_xive2_nvp_update(xive, watch_engine); + pnv_xive2_nxc_update(xive, watch_engine); break; /* case PC_NXC_FLUSH_CTRL: */ From 1775b7d1091452dab24ef23ddc1b7c1943a5e9e4 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:24 -0500 Subject: [PATCH 46/96] pnv/xive2: Configure Virtualization Structure Tables through the PC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both the virtualization layer (VC) and presentation layer (PC) need to be configured to access the VSTs. Since the information is redundant, the xive model combines both into one set of tables and only the definitions going through the VC are kept. The definitions through the PC are ignored. That works well as long as firmware calls the VC for all the tables. For the NVG and NVC tables, it can make sense to only configure them with the PC, since they are only used by the presenter. So this patch allows firmware to configure the VST tables through the PC as well. The definitions are still shared, since the VST tables can be set through both the VC and/or PC, they are dynamically re-mapped in memory by first deleting the memory subregion. Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 47 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 561e61682e..33e76633b5 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -762,6 +762,9 @@ static void pnv_xive2_vst_set_exclusive(PnvXive2 *xive, uint8_t type, * entries provisioned by FW (such as skiboot) and resize the * ESB window accordingly. */ + if (memory_region_is_mapped(&xsrc->esb_mmio)) { + memory_region_del_subregion(&xive->esb_mmio, &xsrc->esb_mmio); + } if (!(VSD_INDIRECT & vsd)) { memory_region_set_size(&xsrc->esb_mmio, vst_tsize * SBE_PER_BYTE * (1ull << xsrc->esb_shift)); @@ -777,6 +780,9 @@ static void pnv_xive2_vst_set_exclusive(PnvXive2 *xive, uint8_t type, /* * Backing store pages for the END. */ + if (memory_region_is_mapped(&end_xsrc->esb_mmio)) { + memory_region_del_subregion(&xive->end_mmio, &end_xsrc->esb_mmio); + } if (!(VSD_INDIRECT & vsd)) { memory_region_set_size(&end_xsrc->esb_mmio, (vst_tsize / info->size) * (1ull << end_xsrc->esb_shift)); @@ -801,13 +807,10 @@ static void pnv_xive2_vst_set_exclusive(PnvXive2 *xive, uint8_t type, * Both PC and VC sub-engines are configured as each use the Virtual * Structure Tables */ -static void pnv_xive2_vst_set_data(PnvXive2 *xive, uint64_t vsd) +static void pnv_xive2_vst_set_data(PnvXive2 *xive, uint64_t vsd, + uint8_t type, uint8_t blk) { uint8_t mode = GETFIELD(VSD_MODE, vsd); - uint8_t type = GETFIELD(VC_VSD_TABLE_SELECT, - xive->vc_regs[VC_VSD_TABLE_ADDR >> 3]); - uint8_t blk = GETFIELD(VC_VSD_TABLE_ADDRESS, - xive->vc_regs[VC_VSD_TABLE_ADDR >> 3]); uint64_t vst_addr = vsd & VSD_ADDRESS_MASK; if (type > VST_ERQ) { @@ -842,6 +845,16 @@ static void pnv_xive2_vst_set_data(PnvXive2 *xive, uint64_t vsd) } } +static void pnv_xive2_vc_vst_set_data(PnvXive2 *xive, uint64_t vsd) +{ + uint8_t type = GETFIELD(VC_VSD_TABLE_SELECT, + xive->vc_regs[VC_VSD_TABLE_ADDR >> 3]); + uint8_t blk = GETFIELD(VC_VSD_TABLE_ADDRESS, + xive->vc_regs[VC_VSD_TABLE_ADDR >> 3]); + + pnv_xive2_vst_set_data(xive, vsd, type, blk); +} + /* * MMIO handlers */ @@ -1271,7 +1284,7 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, case VC_VSD_TABLE_ADDR: break; case VC_VSD_TABLE_DATA: - pnv_xive2_vst_set_data(xive, val); + pnv_xive2_vc_vst_set_data(xive, val); break; /* @@ -1490,6 +1503,16 @@ static uint64_t pnv_xive2_ic_pc_read(void *opaque, hwaddr offset, return val; } +static void pnv_xive2_pc_vst_set_data(PnvXive2 *xive, uint64_t vsd) +{ + uint8_t type = GETFIELD(PC_VSD_TABLE_SELECT, + xive->pc_regs[PC_VSD_TABLE_ADDR >> 3]); + uint8_t blk = GETFIELD(PC_VSD_TABLE_ADDRESS, + xive->pc_regs[PC_VSD_TABLE_ADDR >> 3]); + + pnv_xive2_vst_set_data(xive, vsd, type, blk); +} + static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) { @@ -1500,12 +1523,18 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, switch (offset) { /* - * VSD table settings. Only taken into account in the VC - * sub-engine because the Xive2Router model combines both VC and PC - * sub-engines + * VSD table settings. + * The Xive2Router model combines both VC and PC sub-engines. We + * allow to configure the tables through both, for the rare cases + * where a table only really needs to be configured for one of + * them (e.g. the NVG table for the presenter). It assumes that + * firmware passes the same address to the VC and PC when tables + * are defined for both, which seems acceptable. */ case PC_VSD_TABLE_ADDR: + break; case PC_VSD_TABLE_DATA: + pnv_xive2_pc_vst_set_data(xive, val); break; case PC_NXC_PROC_CONFIG: From 9d7188a2ba6e520934691612915afb98c10823c5 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:25 -0500 Subject: [PATCH 47/96] pnv/xive2: Enable VST NVG and NVC index compression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable NVG and NVC VST tables for index compression which indicates the number of bits the address is shifted to the right for the table accesses. The compression values are defined as: 0000 - No compression 0001 - 1 bit shift 0010 - 2 bit shift .... 1000 - 8 bit shift 1001-1111 - No compression Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 20 ++++++++++++++++++++ hw/intc/pnv_xive2_regs.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 33e76633b5..c3b5bfe61f 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -217,6 +217,20 @@ static uint64_t pnv_xive2_vst_addr_indirect(PnvXive2 *xive, uint32_t type, return pnv_xive2_vst_addr_direct(xive, type, vsd, (idx % vst_per_page)); } +static uint8_t pnv_xive2_nvc_table_compress_shift(PnvXive2 *xive) +{ + uint8_t shift = GETFIELD(PC_NXC_PROC_CONFIG_NVC_TABLE_COMPRESS, + xive->pc_regs[PC_NXC_PROC_CONFIG >> 3]); + return shift > 8 ? 0 : shift; +} + +static uint8_t pnv_xive2_nvg_table_compress_shift(PnvXive2 *xive) +{ + uint8_t shift = GETFIELD(PC_NXC_PROC_CONFIG_NVG_TABLE_COMPRESS, + xive->pc_regs[PC_NXC_PROC_CONFIG >> 3]); + return shift > 8 ? 0 : shift; +} + static uint64_t pnv_xive2_vst_addr(PnvXive2 *xive, uint32_t type, uint8_t blk, uint32_t idx) { @@ -238,6 +252,12 @@ static uint64_t pnv_xive2_vst_addr(PnvXive2 *xive, uint32_t type, uint8_t blk, return xive ? pnv_xive2_vst_addr(xive, type, blk, idx) : 0; } + if (type == VST_NVG) { + idx >>= pnv_xive2_nvg_table_compress_shift(xive); + } else if (type == VST_NVC) { + idx >>= pnv_xive2_nvc_table_compress_shift(xive); + } + if (VSD_INDIRECT & vsd) { return pnv_xive2_vst_addr_indirect(xive, type, vsd, idx); } diff --git a/hw/intc/pnv_xive2_regs.h b/hw/intc/pnv_xive2_regs.h index ca05255d20..e8b87b3d2c 100644 --- a/hw/intc/pnv_xive2_regs.h +++ b/hw/intc/pnv_xive2_regs.h @@ -427,6 +427,8 @@ #define X_PC_NXC_PROC_CONFIG 0x28A #define PC_NXC_PROC_CONFIG 0x450 #define PC_NXC_PROC_CONFIG_WATCH_ASSIGN PPC_BITMASK(0, 3) +#define PC_NXC_PROC_CONFIG_NVG_TABLE_COMPRESS PPC_BITMASK(32, 35) +#define PC_NXC_PROC_CONFIG_NVC_TABLE_COMPRESS PPC_BITMASK(36, 39) /* NxC Cache Watch 0 Specification */ #define X_PC_NXC_WATCH0_SPEC 0x2A0 From 4c81813e25d24ece49141572ad5f07d8efe7bf4d Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:26 -0500 Subject: [PATCH 48/96] pnv/xive2: Set Translation Table for the NVC port space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set Translation Table for the NVC port space is missing. The xive model doesn't take into account the remapping of IO operations via the Set Translation Table but firmware is allowed to define it for the Notify Virtual Crowd (NVC), like it's already done for the other VST tables. Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index c3b5bfe61f..08b9166a09 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -722,6 +722,7 @@ static int pnv_xive2_stt_set_data(PnvXive2 *xive, uint64_t val) case CQ_TAR_NVPG: case CQ_TAR_ESB: case CQ_TAR_END: + case CQ_TAR_NVC: xive->tables[tsel][entry] = val; break; default: From fa414eb6655228e274811ade0c7bcddb88acaee5 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:27 -0500 Subject: [PATCH 49/96] pnv/xive2: Fail VST entry address computation if table has no VSD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fail VST entry address computation if firmware doesn't define a descriptor for one of the Virtualization Structure Tables (VST), there's no point in trying to compute the address of its entry. Abort the operation and log an error. Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 08b9166a09..9fbd44f974 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -244,6 +244,11 @@ static uint64_t pnv_xive2_vst_addr(PnvXive2 *xive, uint32_t type, uint8_t blk, } vsd = xive->vsds[type][blk]; + if (vsd == 0) { + xive2_error(xive, "VST: vsd == 0 block id %d for VST %s %d !?", + blk, info->name, idx); + return 0; + } /* Remote VST access */ if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { From 96c674bf08365ae0ffa2b960a12718bf2ca90079 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:28 -0500 Subject: [PATCH 50/96] pnv/xive2: Move xive2_nvp_pic_print_info() to xive2.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving xive2_nvp_pic_print_info() to align with the other "pic_print_info" functions. Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/pnv_xive2.c | 27 --------------------------- hw/intc/xive2.c | 26 ++++++++++++++++++++++++++ include/hw/ppc/xive2_regs.h | 2 ++ 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 9fbd44f974..78609105a8 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -2436,33 +2436,6 @@ static void pnv_xive2_register_types(void) type_init(pnv_xive2_register_types) -static void xive2_nvp_pic_print_info(Xive2Nvp *nvp, uint32_t nvp_idx, - GString *buf) -{ - uint8_t eq_blk = xive_get_field32(NVP2_W5_VP_END_BLOCK, nvp->w5); - uint32_t eq_idx = xive_get_field32(NVP2_W5_VP_END_INDEX, nvp->w5); - - if (!xive2_nvp_is_valid(nvp)) { - return; - } - - g_string_append_printf(buf, " %08x end:%02x/%04x IPB:%02x", - nvp_idx, eq_blk, eq_idx, - xive_get_field32(NVP2_W2_IPB, nvp->w2)); - /* - * When the NVP is HW controlled, more fields are updated - */ - if (xive2_nvp_is_hw(nvp)) { - g_string_append_printf(buf, " CPPR:%02x", - xive_get_field32(NVP2_W2_CPPR, nvp->w2)); - if (xive2_nvp_is_co(nvp)) { - g_string_append_printf(buf, " CO:%04x", - xive_get_field32(NVP2_W1_CO_THRID, nvp->w1)); - } - } - g_string_append_c(buf, '\n'); -} - /* * If the table is direct, we can compute the number of PQ entries * provisioned by FW. diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c index 3e7238c663..ac914b3d1c 100644 --- a/hw/intc/xive2.c +++ b/hw/intc/xive2.c @@ -137,6 +137,32 @@ void xive2_end_eas_pic_print_info(Xive2End *end, uint32_t end_idx, (uint32_t) xive_get_field64(EAS2_END_DATA, eas->w)); } +void xive2_nvp_pic_print_info(Xive2Nvp *nvp, uint32_t nvp_idx, GString *buf) +{ + uint8_t eq_blk = xive_get_field32(NVP2_W5_VP_END_BLOCK, nvp->w5); + uint32_t eq_idx = xive_get_field32(NVP2_W5_VP_END_INDEX, nvp->w5); + + if (!xive2_nvp_is_valid(nvp)) { + return; + } + + g_string_append_printf(buf, " %08x end:%02x/%04x IPB:%02x", + nvp_idx, eq_blk, eq_idx, + xive_get_field32(NVP2_W2_IPB, nvp->w2)); + /* + * When the NVP is HW controlled, more fields are updated + */ + if (xive2_nvp_is_hw(nvp)) { + g_string_append_printf(buf, " CPPR:%02x", + xive_get_field32(NVP2_W2_CPPR, nvp->w2)); + if (xive2_nvp_is_co(nvp)) { + g_string_append_printf(buf, " CO:%04x", + xive_get_field32(NVP2_W1_CO_THRID, nvp->w1)); + } + } + g_string_append_c(buf, '\n'); +} + static void xive2_end_enqueue(Xive2End *end, uint32_t data) { uint64_t qaddr_base = xive2_end_qaddr(end); diff --git a/include/hw/ppc/xive2_regs.h b/include/hw/ppc/xive2_regs.h index 4e5e17cd89..ec5d6ec2d6 100644 --- a/include/hw/ppc/xive2_regs.h +++ b/include/hw/ppc/xive2_regs.h @@ -194,6 +194,8 @@ static inline uint32_t xive2_nvp_blk(uint32_t cam_line) return (cam_line >> XIVE2_NVP_SHIFT) & 0xf; } +void xive2_nvp_pic_print_info(Xive2Nvp *nvp, uint32_t nvp_idx, GString *buf); + /* * Notification Virtual Group or Crowd (NVG/NVC) */ From 6adb007357752ff665fde7dd43e5e0afabe7dcbc Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:29 -0500 Subject: [PATCH 51/96] pnv/xive2: Refine TIMA 'info pic' output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In XIVE Gen 2 there were some minor changes to the TIMA header that were updated when printed. Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/xive.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 70f11f993b..5a02dd8e02 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -692,9 +692,15 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, GString *buf) } } - g_string_append_printf(buf, "CPU[%04x]: " - "QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR W2\n", - cpu_index); + if (xive_presenter_get_config(tctx->xptr) & XIVE_PRESENTER_GEN1_TIMA_OS) { + g_string_append_printf(buf, "CPU[%04x]: " + "QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR" + " W2\n", cpu_index); + } else { + g_string_append_printf(buf, "CPU[%04x]: " + "QW NSR CPPR IPB LSMFB - LGS T PIPR" + " W2\n", cpu_index); + } for (i = 0; i < XIVE_TM_RING_COUNT; i++) { char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]); From 5fc9c71724559be273ee1b68c65ffe45e1386e3c Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Jul 2024 16:21:30 -0500 Subject: [PATCH 52/96] pnv/xive2: Dump more END state with 'info pic' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additional END state 'info pic' information as added. The 'ignore', 'crowd' and 'precluded escalation control' bits of an Event Notification Descriptor are all used when delivering an interrupt targeting a VP-group or crowd. Signed-off-by: Frederic Barrat Signed-off-by: Michael Kowal Reviewed-by: Cédric Le Goater Signed-off-by: Nicholas Piggin --- hw/intc/xive2.c | 7 +++++-- include/hw/ppc/xive2_regs.h | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c index ac914b3d1c..1f150685bf 100644 --- a/hw/intc/xive2.c +++ b/hw/intc/xive2.c @@ -89,7 +89,7 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf) pq = xive_get_field32(END2_W1_ESn, end->w1); g_string_append_printf(buf, - " %08x %c%c %c%c%c%c%c%c%c%c%c%c " + " %08x %c%c %c%c%c%c%c%c%c%c%c%c%c %c%c " "prio:%d nvp:%02x/%04x", end_idx, pq & XIVE_ESB_VAL_P ? 'P' : '-', @@ -98,12 +98,15 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf) xive2_end_is_enqueue(end) ? 'q' : '-', xive2_end_is_notify(end) ? 'n' : '-', xive2_end_is_backlog(end) ? 'b' : '-', + xive2_end_is_precluded_escalation(end) ? 'p' : '-', xive2_end_is_escalate(end) ? 'e' : '-', xive2_end_is_escalate_end(end) ? 'N' : '-', xive2_end_is_uncond_escalation(end) ? 'u' : '-', xive2_end_is_silent_escalation(end) ? 's' : '-', xive2_end_is_firmware1(end) ? 'f' : '-', xive2_end_is_firmware2(end) ? 'F' : '-', + xive2_end_is_ignore(end) ? 'i' : '-', + xive2_end_is_crowd(end) ? 'c' : '-', priority, nvp_blk, nvp_idx); if (qaddr_base) { @@ -676,7 +679,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, } found = xive_presenter_notify(xrtr->xfb, format, nvp_blk, nvp_idx, - xive_get_field32(END2_W6_IGNORE, end.w7), + xive2_end_is_ignore(&end), priority, xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7)); diff --git a/include/hw/ppc/xive2_regs.h b/include/hw/ppc/xive2_regs.h index ec5d6ec2d6..4349d009d0 100644 --- a/include/hw/ppc/xive2_regs.h +++ b/include/hw/ppc/xive2_regs.h @@ -97,6 +97,7 @@ typedef struct Xive2End { uint32_t w6; #define END2_W6_FORMAT_BIT PPC_BIT32(0) #define END2_W6_IGNORE PPC_BIT32(1) +#define END2_W6_CROWD PPC_BIT32(2) #define END2_W6_VP_BLOCK PPC_BITMASK32(4, 7) #define END2_W6_VP_OFFSET PPC_BITMASK32(8, 31) #define END2_W6_VP_OFFSET_GEN1 PPC_BITMASK32(13, 31) @@ -111,6 +112,8 @@ typedef struct Xive2End { #define xive2_end_is_notify(end) \ (be32_to_cpu((end)->w0) & END2_W0_UCOND_NOTIFY) #define xive2_end_is_backlog(end) (be32_to_cpu((end)->w0) & END2_W0_BACKLOG) +#define xive2_end_is_precluded_escalation(end) \ + (be32_to_cpu((end)->w0) & END2_W0_PRECL_ESC_CTL) #define xive2_end_is_escalate(end) \ (be32_to_cpu((end)->w0) & END2_W0_ESCALATE_CTL) #define xive2_end_is_uncond_escalation(end) \ @@ -123,6 +126,10 @@ typedef struct Xive2End { (be32_to_cpu((end)->w0) & END2_W0_FIRMWARE1) #define xive2_end_is_firmware2(end) \ (be32_to_cpu((end)->w0) & END2_W0_FIRMWARE2) +#define xive2_end_is_ignore(end) \ + (be32_to_cpu((end)->w6) & END2_W6_IGNORE) +#define xive2_end_is_crowd(end) \ + (be32_to_cpu((end)->w6) & END2_W6_CROWD) static inline uint64_t xive2_end_qaddr(Xive2End *end) { From a7e10fab78d91d7d0dee60ce1e4d1b28365d570f Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Thu, 23 May 2024 15:14:53 +0530 Subject: [PATCH 53/96] target/ppc: Move VMX integer add/sub saturate insns to decodetree. Moving the following instructions to decodetree specification : v{add,sub}{u,s}{b,h,w}s : VX-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Reviewed-by: Richard Henderson Signed-off-by: Chinmay Rath Signed-off-by: Nicholas Piggin --- target/ppc/helper.h | 24 +-- target/ppc/insn32.decode | 16 ++ target/ppc/int_helper.c | 22 +-- target/ppc/translate/vmx-impl.c.inc | 238 ++++++++++++++++++++-------- target/ppc/translate/vmx-ops.c.inc | 19 +-- 5 files changed, 220 insertions(+), 99 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 4fa089cbf9..dd7526bc2e 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -203,18 +203,18 @@ DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslv, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VPRTYBQ, TCG_CALL_NO_RWG, void, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddubs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vadduhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vadduws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsububs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubuhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_3(VADDUQM, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_4(VADDECUQ, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VADDEUQM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index ee33141476..8988fca60e 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -832,6 +832,14 @@ VADDCUW 000100 ..... ..... ..... 00110000000 @VX VADDCUQ 000100 ..... ..... ..... 00101000000 @VX VADDUQM 000100 ..... ..... ..... 00100000000 @VX +VADDSBS 000100 ..... ..... ..... 01100000000 @VX +VADDSHS 000100 ..... ..... ..... 01101000000 @VX +VADDSWS 000100 ..... ..... ..... 01110000000 @VX + +VADDUBS 000100 ..... ..... ..... 01000000000 @VX +VADDUHS 000100 ..... ..... ..... 01001000000 @VX +VADDUWS 000100 ..... ..... ..... 01010000000 @VX + VADDEUQM 000100 ..... ..... ..... ..... 111100 @VA VADDECUQ 000100 ..... ..... ..... ..... 111101 @VA @@ -839,6 +847,14 @@ VSUBCUW 000100 ..... ..... ..... 10110000000 @VX VSUBCUQ 000100 ..... ..... ..... 10101000000 @VX VSUBUQM 000100 ..... ..... ..... 10100000000 @VX +VSUBSBS 000100 ..... ..... ..... 11100000000 @VX +VSUBSHS 000100 ..... ..... ..... 11101000000 @VX +VSUBSWS 000100 ..... ..... ..... 11110000000 @VX + +VSUBUBS 000100 ..... ..... ..... 11000000000 @VX +VSUBUHS 000100 ..... ..... ..... 11001000000 @VX +VSUBUWS 000100 ..... ..... ..... 11010000000 @VX + VSUBECUQ 000100 ..... ..... ..... ..... 111111 @VA VSUBEUQM 000100 ..... ..... ..... ..... 111110 @VA diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 2c6b633d65..ef4b2e75d6 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -541,7 +541,7 @@ VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); } #define VARITHSAT_DO(name, op, optype, cvt, element) \ - void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ + void helper_V##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ { \ int sat = 0; \ @@ -555,17 +555,17 @@ VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); } \ } #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ - VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ - VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) + VARITHSAT_DO(ADDS##suffix##S, +, optype, cvt, element) \ + VARITHSAT_DO(SUBS##suffix##S, -, optype, cvt, element) #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ - VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ - VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) -VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) -VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) -VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) -VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) -VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) -VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) + VARITHSAT_DO(ADDU##suffix##S, +, optype, cvt, element) \ + VARITHSAT_DO(SUBU##suffix##S, -, optype, cvt, element) +VARITHSAT_SIGNED(B, s8, int16_t, cvtshsb) +VARITHSAT_SIGNED(H, s16, int32_t, cvtswsh) +VARITHSAT_SIGNED(W, s32, int64_t, cvtsdsw) +VARITHSAT_UNSIGNED(B, u8, uint16_t, cvtshub) +VARITHSAT_UNSIGNED(H, u16, uint32_t, cvtswuh) +VARITHSAT_UNSIGNED(W, u32, uint64_t, cvtsduw) #undef VARITHSAT_CASE #undef VARITHSAT_DO #undef VARITHSAT_SIGNED diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 8084af75cc..fdb283c1d4 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -1047,58 +1047,6 @@ TRANS(VRLQ, do_vector_rotl_quad, false, false) TRANS(VRLQNM, do_vector_rotl_quad, true, false) TRANS(VRLQMI, do_vector_rotl_quad, false, true) -#define GEN_VXFORM_SAT(NAME, VECE, NORM, SAT, OPC2, OPC3) \ -static void glue(glue(gen_, NAME), _vec)(unsigned vece, TCGv_vec t, \ - TCGv_vec sat, TCGv_vec a, \ - TCGv_vec b) \ -{ \ - TCGv_vec x = tcg_temp_new_vec_matching(t); \ - glue(glue(tcg_gen_, NORM), _vec)(VECE, x, a, b); \ - glue(glue(tcg_gen_, SAT), _vec)(VECE, t, a, b); \ - tcg_gen_cmp_vec(TCG_COND_NE, VECE, x, x, t); \ - tcg_gen_or_vec(VECE, sat, sat, x); \ -} \ -static void glue(gen_, NAME)(DisasContext *ctx) \ -{ \ - static const TCGOpcode vecop_list[] = { \ - glue(glue(INDEX_op_, NORM), _vec), \ - glue(glue(INDEX_op_, SAT), _vec), \ - INDEX_op_cmp_vec, 0 \ - }; \ - static const GVecGen4 g = { \ - .fniv = glue(glue(gen_, NAME), _vec), \ - .fno = glue(gen_helper_, NAME), \ - .opt_opc = vecop_list, \ - .write_aofs = true, \ - .vece = VECE, \ - }; \ - if (unlikely(!ctx->altivec_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VPU); \ - return; \ - } \ - tcg_gen_gvec_4(avr_full_offset(rD(ctx->opcode)), \ - offsetof(CPUPPCState, vscr_sat), \ - avr_full_offset(rA(ctx->opcode)), \ - avr_full_offset(rB(ctx->opcode)), \ - 16, 16, &g); \ -} - -GEN_VXFORM_SAT(vaddubs, MO_8, add, usadd, 0, 8); -GEN_VXFORM_DUAL_EXT(vaddubs, PPC_ALTIVEC, PPC_NONE, 0, \ - vmul10uq, PPC_NONE, PPC2_ISA300, 0x0000F800) -GEN_VXFORM_SAT(vadduhs, MO_16, add, usadd, 0, 9); -GEN_VXFORM_DUAL(vadduhs, PPC_ALTIVEC, PPC_NONE, \ - vmul10euq, PPC_NONE, PPC2_ISA300) -GEN_VXFORM_SAT(vadduws, MO_32, add, usadd, 0, 10); -GEN_VXFORM_SAT(vaddsbs, MO_8, add, ssadd, 0, 12); -GEN_VXFORM_SAT(vaddshs, MO_16, add, ssadd, 0, 13); -GEN_VXFORM_SAT(vaddsws, MO_32, add, ssadd, 0, 14); -GEN_VXFORM_SAT(vsububs, MO_8, sub, ussub, 0, 24); -GEN_VXFORM_SAT(vsubuhs, MO_16, sub, ussub, 0, 25); -GEN_VXFORM_SAT(vsubuws, MO_32, sub, ussub, 0, 26); -GEN_VXFORM_SAT(vsubsbs, MO_8, sub, sssub, 0, 28); -GEN_VXFORM_SAT(vsubshs, MO_16, sub, sssub, 0, 29); -GEN_VXFORM_SAT(vsubsws, MO_32, sub, sssub, 0, 30); GEN_VXFORM_TRANS(vsl, 2, 7); GEN_VXFORM_TRANS(vsr, 2, 11); GEN_VXFORM_ENV(vpkuhum, 7, 0); @@ -2641,26 +2589,14 @@ static void gen_xpnd04_2(DisasContext *ctx) } } - -GEN_VXFORM_DUAL(vsubsws, PPC_ALTIVEC, PPC_NONE, \ - xpnd04_2, PPC_NONE, PPC2_ISA300) - GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \ bcdadd, PPC_NONE, PPC2_ALTIVEC_207) -GEN_VXFORM_DUAL(vsububs, PPC_ALTIVEC, PPC_NONE, \ - bcdadd, PPC_NONE, PPC2_ALTIVEC_207) GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \ bcdsub, PPC_NONE, PPC2_ALTIVEC_207) -GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \ - bcdsub, PPC_NONE, PPC2_ALTIVEC_207) -GEN_VXFORM_DUAL(vaddshs, PPC_ALTIVEC, PPC_NONE, \ - bcdcpsgn, PPC_NONE, PPC2_ISA300) GEN_VXFORM_DUAL(vsubudm, PPC2_ALTIVEC_207, PPC_NONE, \ bcds, PPC_NONE, PPC2_ISA300) GEN_VXFORM_DUAL(vsubuwm, PPC_ALTIVEC, PPC_NONE, \ bcdus, PPC_NONE, PPC2_ISA300) -GEN_VXFORM_DUAL(vsubsbs, PPC_ALTIVEC, PPC_NONE, \ - bcdtrunc, PPC_NONE, PPC2_ISA300) static void gen_vsbox(DisasContext *ctx) { @@ -2937,6 +2873,180 @@ static bool do_vx_vaddsubcuw(DisasContext *ctx, arg_VX *a, int add) TRANS(VSUBCUW, do_vx_vaddsubcuw, 0) TRANS(VADDCUW, do_vx_vaddsubcuw, 1) +/* Integer Add/Sub Saturate Instructions */ +static inline void do_vadd_vsub_sat +( + unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b, + void (*norm_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec), + void (*sat_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + norm_op(vece, x, a, b); + sat_op(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); +} + +static void gen_vadd_sat_u(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + do_vadd_vsub_sat(vece, t, sat, a, b, tcg_gen_add_vec, tcg_gen_usadd_vec); +} + +static void gen_vadd_sat_s(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + do_vadd_vsub_sat(vece, t, sat, a, b, tcg_gen_add_vec, tcg_gen_ssadd_vec); +} + +static void gen_vsub_sat_u(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + do_vadd_vsub_sat(vece, t, sat, a, b, tcg_gen_sub_vec, tcg_gen_ussub_vec); +} + +static void gen_vsub_sat_s(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + do_vadd_vsub_sat(vece, t, sat, a, b, tcg_gen_sub_vec, tcg_gen_sssub_vec); +} + +/* + * Signed/Unsigned add/sub helper ops for byte/halfword/word + * GVecGen4 struct variants. + */ +static const TCGOpcode vecop_list_sub_u[] = { + INDEX_op_sub_vec, INDEX_op_ussub_vec, INDEX_op_cmp_vec, 0 +}; +static const TCGOpcode vecop_list_sub_s[] = { + INDEX_op_sub_vec, INDEX_op_sssub_vec, INDEX_op_cmp_vec, 0 +}; +static const TCGOpcode vecop_list_add_u[] = { + INDEX_op_add_vec, INDEX_op_usadd_vec, INDEX_op_cmp_vec, 0 +}; +static const TCGOpcode vecop_list_add_s[] = { + INDEX_op_add_vec, INDEX_op_ssadd_vec, INDEX_op_cmp_vec, 0 +}; + +static const GVecGen4 op_vsububs = { + .fniv = gen_vsub_sat_u, + .fno = gen_helper_VSUBUBS, + .opt_opc = vecop_list_sub_u, + .write_aofs = true, + .vece = MO_8 +}; + +static const GVecGen4 op_vaddubs = { + .fniv = gen_vadd_sat_u, + .fno = gen_helper_VADDUBS, + .opt_opc = vecop_list_add_u, + .write_aofs = true, + .vece = MO_8 +}; + +static const GVecGen4 op_vsubuhs = { + .fniv = gen_vsub_sat_u, + .fno = gen_helper_VSUBUHS, + .opt_opc = vecop_list_sub_u, + .write_aofs = true, + .vece = MO_16 +}; + +static const GVecGen4 op_vadduhs = { + .fniv = gen_vadd_sat_u, + .fno = gen_helper_VADDUHS, + .opt_opc = vecop_list_add_u, + .write_aofs = true, + .vece = MO_16 +}; + +static const GVecGen4 op_vsubuws = { + .fniv = gen_vsub_sat_u, + .fno = gen_helper_VSUBUWS, + .opt_opc = vecop_list_sub_u, + .write_aofs = true, + .vece = MO_32 +}; + +static const GVecGen4 op_vadduws = { + .fniv = gen_vadd_sat_u, + .fno = gen_helper_VADDUWS, + .opt_opc = vecop_list_add_u, + .write_aofs = true, + .vece = MO_32 +}; + +static const GVecGen4 op_vsubsbs = { + .fniv = gen_vsub_sat_s, + .fno = gen_helper_VSUBSBS, + .opt_opc = vecop_list_sub_s, + .write_aofs = true, + .vece = MO_8 +}; + +static const GVecGen4 op_vaddsbs = { + .fniv = gen_vadd_sat_s, + .fno = gen_helper_VADDSBS, + .opt_opc = vecop_list_add_s, + .write_aofs = true, + .vece = MO_8 +}; + +static const GVecGen4 op_vsubshs = { + .fniv = gen_vsub_sat_s, + .fno = gen_helper_VSUBSHS, + .opt_opc = vecop_list_sub_s, + .write_aofs = true, + .vece = MO_16 +}; + +static const GVecGen4 op_vaddshs = { + .fniv = gen_vadd_sat_s, + .fno = gen_helper_VADDSHS, + .opt_opc = vecop_list_add_s, + .write_aofs = true, + .vece = MO_16 +}; + +static const GVecGen4 op_vsubsws = { + .fniv = gen_vsub_sat_s, + .fno = gen_helper_VSUBSWS, + .opt_opc = vecop_list_sub_s, + .write_aofs = true, + .vece = MO_32 +}; + +static const GVecGen4 op_vaddsws = { + .fniv = gen_vadd_sat_s, + .fno = gen_helper_VADDSWS, + .opt_opc = vecop_list_add_s, + .write_aofs = true, + .vece = MO_32 +}; + +static bool do_vx_vadd_vsub_sat(DisasContext *ctx, arg_VX *a, const GVecGen4 *op) +{ + REQUIRE_VECTOR(ctx); + tcg_gen_gvec_4(avr_full_offset(a->vrt), offsetof(CPUPPCState, vscr_sat), + avr_full_offset(a->vra), avr_full_offset(a->vrb), + 16, 16, op); + + return true; +} + +TRANS_FLAGS(ALTIVEC, VSUBUBS, do_vx_vadd_vsub_sat, &op_vsububs) +TRANS_FLAGS(ALTIVEC, VSUBUHS, do_vx_vadd_vsub_sat, &op_vsubuhs) +TRANS_FLAGS(ALTIVEC, VSUBUWS, do_vx_vadd_vsub_sat, &op_vsubuws) +TRANS_FLAGS(ALTIVEC, VSUBSBS, do_vx_vadd_vsub_sat, &op_vsubsbs) +TRANS_FLAGS(ALTIVEC, VSUBSHS, do_vx_vadd_vsub_sat, &op_vsubshs) +TRANS_FLAGS(ALTIVEC, VSUBSWS, do_vx_vadd_vsub_sat, &op_vsubsws) +TRANS_FLAGS(ALTIVEC, VADDUBS, do_vx_vadd_vsub_sat, &op_vaddubs) +TRANS_FLAGS(ALTIVEC, VADDUHS, do_vx_vadd_vsub_sat, &op_vadduhs) +TRANS_FLAGS(ALTIVEC, VADDUWS, do_vx_vadd_vsub_sat, &op_vadduws) +TRANS_FLAGS(ALTIVEC, VADDSBS, do_vx_vadd_vsub_sat, &op_vaddsbs) +TRANS_FLAGS(ALTIVEC, VADDSHS, do_vx_vadd_vsub_sat, &op_vaddshs) +TRANS_FLAGS(ALTIVEC, VADDSWS, do_vx_vadd_vsub_sat, &op_vaddsws) + static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even, void (*gen_mul)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) { diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index 7bb11b0549..e28958a126 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -54,18 +54,13 @@ GEN_VXFORM(vsro, 6, 17), GEN_VXFORM(xpnd04_1, 0, 22), GEN_VXFORM_300(bcdsr, 0, 23), GEN_VXFORM_300(bcdsr, 0, 31), -GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM_DUAL(vadduhs, vmul10euq, 0, 9, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM(vadduws, 0, 10), -GEN_VXFORM(vaddsbs, 0, 12), -GEN_VXFORM_DUAL(vaddshs, bcdcpsgn, 0, 13, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM(vaddsws, 0, 14), -GEN_VXFORM_DUAL(vsububs, bcdadd, 0, 24, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM_DUAL(vsubuhs, bcdsub, 0, 25, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM(vsubuws, 0, 26), -GEN_VXFORM_DUAL(vsubsbs, bcdtrunc, 0, 28, PPC_ALTIVEC, PPC2_ISA300), -GEN_VXFORM(vsubshs, 0, 29), -GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE), +GEN_VXFORM_300_EXT(vmul10uq, 0, 8, 0x0000F800), +GEN_VXFORM_300(vmul10euq, 0, 9), +GEN_VXFORM_300(bcdcpsgn, 0, 13), +GEN_VXFORM_207(bcdadd, 0, 24), +GEN_VXFORM_207(bcdsub, 0, 25), +GEN_VXFORM_300(bcdtrunc, 0, 28), +GEN_VXFORM_300(xpnd04_2, 0, 30), GEN_VXFORM_300(bcdtrunc, 0, 20), GEN_VXFORM_300(bcdutrunc, 0, 21), GEN_VXFORM(vsl, 2, 7), From 8fc7b63adaa860c81119f6f8cd6cc981504bfb7b Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Thu, 23 May 2024 15:14:54 +0530 Subject: [PATCH 54/96] target/ppc: Improve VMX integer add/sub saturate instructions. No need for a full comparison; xor produces non-zero bits for QC just fine. Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson Signed-off-by: Nicholas Piggin --- target/ppc/translate/vmx-impl.c.inc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index fdb283c1d4..152bcde0e3 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2876,15 +2876,15 @@ TRANS(VADDCUW, do_vx_vaddsubcuw, 1) /* Integer Add/Sub Saturate Instructions */ static inline void do_vadd_vsub_sat ( - unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b, + unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b, void (*norm_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec), void (*sat_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) { TCGv_vec x = tcg_temp_new_vec_matching(t); norm_op(vece, x, a, b); sat_op(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } static void gen_vadd_sat_u(unsigned vece, TCGv_vec t, TCGv_vec sat, @@ -2916,16 +2916,16 @@ static void gen_vsub_sat_s(unsigned vece, TCGv_vec t, TCGv_vec sat, * GVecGen4 struct variants. */ static const TCGOpcode vecop_list_sub_u[] = { - INDEX_op_sub_vec, INDEX_op_ussub_vec, INDEX_op_cmp_vec, 0 + INDEX_op_sub_vec, INDEX_op_ussub_vec, 0 }; static const TCGOpcode vecop_list_sub_s[] = { - INDEX_op_sub_vec, INDEX_op_sssub_vec, INDEX_op_cmp_vec, 0 + INDEX_op_sub_vec, INDEX_op_sssub_vec, 0 }; static const TCGOpcode vecop_list_add_u[] = { - INDEX_op_add_vec, INDEX_op_usadd_vec, INDEX_op_cmp_vec, 0 + INDEX_op_add_vec, INDEX_op_usadd_vec, 0 }; static const TCGOpcode vecop_list_add_s[] = { - INDEX_op_add_vec, INDEX_op_ssadd_vec, INDEX_op_cmp_vec, 0 + INDEX_op_add_vec, INDEX_op_ssadd_vec, 0 }; static const GVecGen4 op_vsububs = { From 48eda6abfd7ebf3a21c699d8b13d7506b877d1b7 Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Thu, 23 May 2024 15:18:19 +0530 Subject: [PATCH 55/96] target/ppc: Move ISA300 flag check out of do_helper_XX3. Moving PPC2_ISA300 flag check out of do_helper_XX3 method in vmx-impl.c.inc so that the helper can be used with other instructions as well. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson Signed-off-by: Nicholas Piggin --- target/ppc/translate/vsx-impl.c.inc | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 0266f09119..6025119e5b 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2712,8 +2712,6 @@ static bool do_helper_XX3(DisasContext *ctx, arg_XX3 *a, void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) { TCGv_ptr xt, xa, xb; - - REQUIRE_INSNS_FLAGS2(ctx, ISA300); REQUIRE_VSX(ctx); xt = gen_vsr_ptr(a->xt); @@ -2724,13 +2722,13 @@ static bool do_helper_XX3(DisasContext *ctx, arg_XX3 *a, return true; } -TRANS(XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP) -TRANS(XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP) -TRANS(XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP) -TRANS(XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP) -TRANS(XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP) -TRANS(XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP) -TRANS(XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP) +TRANS_FLAGS2(ISA300, XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP) +TRANS_FLAGS2(ISA300, XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP) +TRANS_FLAGS2(ISA300, XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP) +TRANS_FLAGS2(ISA300, XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP) +TRANS_FLAGS2(ISA300, XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP) +TRANS_FLAGS2(ISA300, XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP) +TRANS_FLAGS2(ISA300, XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP) static bool do_helper_X(arg_X *a, void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) From 638f6d553af70ae5a7dc26200d35e385d649ff75 Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Thu, 23 May 2024 15:18:20 +0530 Subject: [PATCH 56/96] target/ppc: Move VSX arithmetic and max/min insns to decodetree. Moving the following instructions to decodetree specification: x{s, v}{add, sub, mul, div}{s, d}p : XX3-form xs{max, min}dp, xv{max, min}{s, d}p : XX3-form The changes were verfied by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson Signed-off-by: Nicholas Piggin --- target/ppc/fpu_helper.c | 44 ++++++++++---------- target/ppc/helper.h | 44 ++++++++++---------- target/ppc/insn32.decode | 30 ++++++++++++++ target/ppc/translate/vsx-impl.c.inc | 63 +++++++++++++---------------- target/ppc/translate/vsx-ops.c.inc | 22 ---------- 5 files changed, 101 insertions(+), 102 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 51bce99fd5..3f2e4f5827 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -1599,14 +1599,14 @@ void helper_##name(CPUPPCState *env, ppc_vsr_t *xt, \ do_float_check_status(env, sfifprf, GETPC()); \ } -VSX_ADD_SUB(xsadddp, add, 1, float64, VsrD(0), 1, 0) -VSX_ADD_SUB(xsaddsp, add, 1, float64, VsrD(0), 1, 1) -VSX_ADD_SUB(xvadddp, add, 2, float64, VsrD(i), 0, 0) -VSX_ADD_SUB(xvaddsp, add, 4, float32, VsrW(i), 0, 0) -VSX_ADD_SUB(xssubdp, sub, 1, float64, VsrD(0), 1, 0) -VSX_ADD_SUB(xssubsp, sub, 1, float64, VsrD(0), 1, 1) -VSX_ADD_SUB(xvsubdp, sub, 2, float64, VsrD(i), 0, 0) -VSX_ADD_SUB(xvsubsp, sub, 4, float32, VsrW(i), 0, 0) +VSX_ADD_SUB(XSADDDP, add, 1, float64, VsrD(0), 1, 0) +VSX_ADD_SUB(XSADDSP, add, 1, float64, VsrD(0), 1, 1) +VSX_ADD_SUB(XVADDDP, add, 2, float64, VsrD(i), 0, 0) +VSX_ADD_SUB(XVADDSP, add, 4, float32, VsrW(i), 0, 0) +VSX_ADD_SUB(XSSUBDP, sub, 1, float64, VsrD(0), 1, 0) +VSX_ADD_SUB(XSSUBSP, sub, 1, float64, VsrD(0), 1, 1) +VSX_ADD_SUB(XVSUBDP, sub, 2, float64, VsrD(i), 0, 0) +VSX_ADD_SUB(XVSUBSP, sub, 4, float32, VsrW(i), 0, 0) void helper_xsaddqp(CPUPPCState *env, uint32_t opcode, ppc_vsr_t *xt, ppc_vsr_t *xa, ppc_vsr_t *xb) @@ -1676,10 +1676,10 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ do_float_check_status(env, sfifprf, GETPC()); \ } -VSX_MUL(xsmuldp, 1, float64, VsrD(0), 1, 0) -VSX_MUL(xsmulsp, 1, float64, VsrD(0), 1, 1) -VSX_MUL(xvmuldp, 2, float64, VsrD(i), 0, 0) -VSX_MUL(xvmulsp, 4, float32, VsrW(i), 0, 0) +VSX_MUL(XSMULDP, 1, float64, VsrD(0), 1, 0) +VSX_MUL(XSMULSP, 1, float64, VsrD(0), 1, 1) +VSX_MUL(XVMULDP, 2, float64, VsrD(i), 0, 0) +VSX_MUL(XVMULSP, 4, float32, VsrW(i), 0, 0) void helper_xsmulqp(CPUPPCState *env, uint32_t opcode, ppc_vsr_t *xt, ppc_vsr_t *xa, ppc_vsr_t *xb) @@ -1750,10 +1750,10 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ do_float_check_status(env, sfifprf, GETPC()); \ } -VSX_DIV(xsdivdp, 1, float64, VsrD(0), 1, 0) -VSX_DIV(xsdivsp, 1, float64, VsrD(0), 1, 1) -VSX_DIV(xvdivdp, 2, float64, VsrD(i), 0, 0) -VSX_DIV(xvdivsp, 4, float32, VsrW(i), 0, 0) +VSX_DIV(XSDIVDP, 1, float64, VsrD(0), 1, 0) +VSX_DIV(XSDIVSP, 1, float64, VsrD(0), 1, 1) +VSX_DIV(XVDIVDP, 2, float64, VsrD(i), 0, 0) +VSX_DIV(XVDIVSP, 4, float32, VsrW(i), 0, 0) void helper_xsdivqp(CPUPPCState *env, uint32_t opcode, ppc_vsr_t *xt, ppc_vsr_t *xa, ppc_vsr_t *xb) @@ -2383,12 +2383,12 @@ void helper_##name(CPUPPCState *env, ppc_vsr_t *xt, \ do_float_check_status(env, false, GETPC()); \ } -VSX_MAX_MIN(xsmaxdp, maxnum, 1, float64, VsrD(0)) -VSX_MAX_MIN(xvmaxdp, maxnum, 2, float64, VsrD(i)) -VSX_MAX_MIN(xvmaxsp, maxnum, 4, float32, VsrW(i)) -VSX_MAX_MIN(xsmindp, minnum, 1, float64, VsrD(0)) -VSX_MAX_MIN(xvmindp, minnum, 2, float64, VsrD(i)) -VSX_MAX_MIN(xvminsp, minnum, 4, float32, VsrW(i)) +VSX_MAX_MIN(XSMAXDP, maxnum, 1, float64, VsrD(0)) +VSX_MAX_MIN(XVMAXDP, maxnum, 2, float64, VsrD(i)) +VSX_MAX_MIN(XVMAXSP, maxnum, 4, float32, VsrW(i)) +VSX_MAX_MIN(XSMINDP, minnum, 1, float64, VsrD(0)) +VSX_MAX_MIN(XVMINDP, minnum, 2, float64, VsrD(i)) +VSX_MAX_MIN(XVMINSP, minnum, 4, float32, VsrW(i)) #define VSX_MAX_MINC(name, max, tp, fld) \ void helper_##name(CPUPPCState *env, \ diff --git a/target/ppc/helper.h b/target/ppc/helper.h index dd7526bc2e..13f20bb243 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -364,12 +364,12 @@ DEF_HELPER_FLAGS_4(bcdsr, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) DEF_HELPER_FLAGS_4(bcdtrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) DEF_HELPER_FLAGS_4(bcdutrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) -DEF_HELPER_4(xsadddp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSADDDP, void, env, vsr, vsr, vsr) DEF_HELPER_5(xsaddqp, void, env, i32, vsr, vsr, vsr) -DEF_HELPER_4(xssubdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmuldp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSSUBDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMULDP, void, env, vsr, vsr, vsr) DEF_HELPER_5(xsmulqp, void, env, i32, vsr, vsr, vsr) -DEF_HELPER_4(xsdivdp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSDIVDP, void, env, vsr, vsr, vsr) DEF_HELPER_5(xsdivqp, void, env, i32, vsr, vsr, vsr) DEF_HELPER_3(xsredp, void, env, vsr, vsr) DEF_HELPER_3(xssqrtdp, void, env, vsr, vsr) @@ -392,8 +392,8 @@ DEF_HELPER_4(xscmpodp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpudp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpoqp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpuqp, void, env, i32, vsr, vsr) -DEF_HELPER_4(xsmaxdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmindp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMAXDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMINDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XSMAXCDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XSMINCDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XSMAXJDP, void, env, vsr, vsr, vsr) @@ -439,10 +439,10 @@ DEF_HELPER_4(xsrqpxp, void, env, i32, vsr, vsr) DEF_HELPER_4(xssqrtqp, void, env, i32, vsr, vsr) DEF_HELPER_5(xssubqp, void, env, i32, vsr, vsr, vsr) -DEF_HELPER_4(xsaddsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xssubsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmulsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsdivsp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSADDSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSSUBSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMULSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSDIVSP, void, env, vsr, vsr, vsr) DEF_HELPER_3(xsresp, void, env, vsr, vsr) DEF_HELPER_2(xsrsp, i64, env, i64) DEF_HELPER_3(xssqrtsp, void, env, vsr, vsr) @@ -461,10 +461,10 @@ DEF_HELPER_5(XSNMADDQPO, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(XSNMSUBQP, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(XSNMSUBQPO, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_4(xvadddp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvsubdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvmuldp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvdivdp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVADDDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVSUBDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMULDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVDIVDP, void, env, vsr, vsr, vsr) DEF_HELPER_3(xvredp, void, env, vsr, vsr) DEF_HELPER_3(xvsqrtdp, void, env, vsr, vsr) DEF_HELPER_3(xvrsqrtedp, void, env, vsr, vsr) @@ -474,8 +474,8 @@ DEF_HELPER_5(xvmadddp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvmsubdp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_4(xvmaxdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvmindp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) @@ -495,10 +495,10 @@ DEF_HELPER_3(xvrdpim, void, env, vsr, vsr) DEF_HELPER_3(xvrdpip, void, env, vsr, vsr) DEF_HELPER_3(xvrdpiz, void, env, vsr, vsr) -DEF_HELPER_4(xvaddsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvsubsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvmulsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvdivsp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVADDSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVSUBSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMULSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVDIVSP, void, env, vsr, vsr, vsr) DEF_HELPER_3(xvresp, void, env, vsr, vsr) DEF_HELPER_3(xvsqrtsp, void, env, vsr, vsr) DEF_HELPER_3(xvrsqrtesp, void, env, vsr, vsr) @@ -508,8 +508,8 @@ DEF_HELPER_5(xvmaddsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvmsubsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_4(xvmaxsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvminsp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 8988fca60e..1301e5bbc0 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -1004,6 +1004,28 @@ XVNEGSP 111100 ..... 00000 ..... 110111001 .. @XX2 XVCPSGNDP 111100 ..... ..... ..... 11110000 ... @XX3 XVCPSGNSP 111100 ..... ..... ..... 11010000 ... @XX3 +## VSX Binary Floating-Point Arithmetic Instructions + +XSADDSP 111100 ..... ..... ..... 00000000 ... @XX3 +XSSUBSP 111100 ..... ..... ..... 00001000 ... @XX3 +XSMULSP 111100 ..... ..... ..... 00010000 ... @XX3 +XSDIVSP 111100 ..... ..... ..... 00011000 ... @XX3 + +XSADDDP 111100 ..... ..... ..... 00100000 ... @XX3 +XSSUBDP 111100 ..... ..... ..... 00101000 ... @XX3 +XSMULDP 111100 ..... ..... ..... 00110000 ... @XX3 +XSDIVDP 111100 ..... ..... ..... 00111000 ... @XX3 + +XVADDSP 111100 ..... ..... ..... 01000000 ... @XX3 +XVSUBSP 111100 ..... ..... ..... 01001000 ... @XX3 +XVMULSP 111100 ..... ..... ..... 01010000 ... @XX3 +XVDIVSP 111100 ..... ..... ..... 01011000 ... @XX3 + +XVADDDP 111100 ..... ..... ..... 01100000 ... @XX3 +XVSUBDP 111100 ..... ..... ..... 01101000 ... @XX3 +XVMULDP 111100 ..... ..... ..... 01110000 ... @XX3 +XVDIVDP 111100 ..... ..... ..... 01111000 ... @XX3 + ## VSX Scalar Multiply-Add Instructions XSMADDADP 111100 ..... ..... ..... 00100001 . . . @XX3 @@ -1073,6 +1095,14 @@ XSCMPEQQP 111111 ..... ..... ..... 0001000100 - @X XSCMPGEQP 111111 ..... ..... ..... 0011000100 - @X XSCMPGTQP 111111 ..... ..... ..... 0011100100 - @X +XSMAXDP 111100 ..... ..... ..... 10100000 ... @XX3 +XSMINDP 111100 ..... ..... ..... 10101000 ... @XX3 + +XVMAXSP 111100 ..... ..... ..... 11000000 ... @XX3 +XVMINSP 111100 ..... ..... ..... 11001000 ... @XX3 +XVMAXDP 111100 ..... ..... ..... 11100000 ... @XX3 +XVMINDP 111100 ..... ..... ..... 11101000 ... @XX3 + ## VSX Binary Floating-Point Convert Instructions XSCVQPDP 111111 ..... 10100 ..... 1101000100 . @X_tb_rc diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 6025119e5b..0d16e0f02b 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -864,20 +864,6 @@ static void gen_##name(DisasContext *ctx) \ gen_helper_##name(tcg_env, opc); \ } -#define GEN_VSX_HELPER_X3(name, op1, op2, inval, type) \ -static void gen_##name(DisasContext *ctx) \ -{ \ - TCGv_ptr xt, xa, xb; \ - if (unlikely(!ctx->vsx_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VSXU); \ - return; \ - } \ - xt = gen_vsr_ptr(xT(ctx->opcode)); \ - xa = gen_vsr_ptr(xA(ctx->opcode)); \ - xb = gen_vsr_ptr(xB(ctx->opcode)); \ - gen_helper_##name(tcg_env, xt, xa, xb); \ -} - #define GEN_VSX_HELPER_X2(name, op1, op2, inval, type) \ static void gen_##name(DisasContext *ctx) \ { \ @@ -983,12 +969,8 @@ static void gen_##name(DisasContext *ctx) \ set_cpu_vsr(xT(ctx->opcode), tcg_constant_i64(0), false); \ } -GEN_VSX_HELPER_X3(xsadddp, 0x00, 0x04, 0, PPC2_VSX) GEN_VSX_HELPER_R3(xsaddqp, 0x04, 0x00, 0, PPC2_ISA300) -GEN_VSX_HELPER_X3(xssubdp, 0x00, 0x05, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xsmuldp, 0x00, 0x06, 0, PPC2_VSX) GEN_VSX_HELPER_R3(xsmulqp, 0x04, 0x01, 0, PPC2_ISA300) -GEN_VSX_HELPER_X3(xsdivdp, 0x00, 0x07, 0, PPC2_VSX) GEN_VSX_HELPER_R3(xsdivqp, 0x04, 0x11, 0, PPC2_ISA300) GEN_VSX_HELPER_X2(xsredp, 0x14, 0x05, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX) @@ -1001,8 +983,6 @@ GEN_VSX_HELPER_X2_AB(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX) GEN_VSX_HELPER_X2_AB(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX) GEN_VSX_HELPER_R2_AB(xscmpoqp, 0x04, 0x04, 0, PPC2_VSX) GEN_VSX_HELPER_R2_AB(xscmpuqp, 0x04, 0x14, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xsmindp, 0x00, 0x15, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xscvdphp, 0x16, 0x15, 0x11, PPC2_ISA300) GEN_VSX_HELPER_X2(xscvdpsp, 0x12, 0x10, 0, PPC2_VSX) GEN_VSX_HELPER_R2(xscvdpqp, 0x04, 0x1A, 0x16, PPC2_ISA300) @@ -1233,27 +1213,17 @@ GEN_VSX_HELPER_R2(xsrqpi, 0x05, 0x00, 0, PPC2_ISA300) GEN_VSX_HELPER_R2(xsrqpxp, 0x05, 0x01, 0, PPC2_ISA300) GEN_VSX_HELPER_R2(xssqrtqp, 0x04, 0x19, 0x1B, PPC2_ISA300) GEN_VSX_HELPER_R3(xssubqp, 0x04, 0x10, 0, PPC2_ISA300) -GEN_VSX_HELPER_X3(xsaddsp, 0x00, 0x00, 0, PPC2_VSX207) -GEN_VSX_HELPER_X3(xssubsp, 0x00, 0x01, 0, PPC2_VSX207) -GEN_VSX_HELPER_X3(xsmulsp, 0x00, 0x02, 0, PPC2_VSX207) -GEN_VSX_HELPER_X3(xsdivsp, 0x00, 0x03, 0, PPC2_VSX207) GEN_VSX_HELPER_X2(xsresp, 0x14, 0x01, 0, PPC2_VSX207) GEN_VSX_HELPER_X2(xssqrtsp, 0x16, 0x00, 0, PPC2_VSX207) GEN_VSX_HELPER_X2(xsrsqrtesp, 0x14, 0x00, 0, PPC2_VSX207) GEN_VSX_HELPER_X2(xscvsxdsp, 0x10, 0x13, 0, PPC2_VSX207) GEN_VSX_HELPER_X2(xscvuxdsp, 0x10, 0x12, 0, PPC2_VSX207) -GEN_VSX_HELPER_X3(xvadddp, 0x00, 0x0C, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvredp, 0x14, 0x0D, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX) GEN_VSX_HELPER_X2_AB(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX) GEN_VSX_HELPER_X1(xvtsqrtdp, 0x14, 0x0E, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvmaxdp, 0x00, 0x1C, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvmindp, 0x00, 0x1D, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvcvdpsp, 0x12, 0x18, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvcvdpsxds, 0x10, 0x1D, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvcvdpsxws, 0x10, 0x0D, 0, PPC2_VSX) @@ -1269,17 +1239,11 @@ GEN_VSX_HELPER_X2(xvrdpim, 0x12, 0x0F, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvrdpip, 0x12, 0x0E, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvrdpiz, 0x12, 0x0D, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvaddsp, 0x00, 0x08, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvsubsp, 0x00, 0x09, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvresp, 0x14, 0x09, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX) GEN_VSX_HELPER_X2_AB(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX) GEN_VSX_HELPER_X1(xvtsqrtsp, 0x14, 0x0A, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvmaxsp, 0x00, 0x18, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xvminsp, 0x00, 0x19, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvcvspdp, 0x12, 0x1C, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvcvhpsp, 0x16, 0x1D, 0x18, PPC2_ISA300) GEN_VSX_HELPER_X2(xvcvsphp, 0x16, 0x1D, 0x19, PPC2_ISA300) @@ -2730,6 +2694,33 @@ TRANS_FLAGS2(ISA300, XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP) TRANS_FLAGS2(ISA300, XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP) TRANS_FLAGS2(ISA300, XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP) +TRANS_FLAGS2(VSX207, XSADDSP, do_helper_XX3, gen_helper_XSADDSP) +TRANS_FLAGS2(VSX207, XSSUBSP, do_helper_XX3, gen_helper_XSSUBSP) +TRANS_FLAGS2(VSX207, XSMULSP, do_helper_XX3, gen_helper_XSMULSP) +TRANS_FLAGS2(VSX207, XSDIVSP, do_helper_XX3, gen_helper_XSDIVSP) + +TRANS_FLAGS2(VSX, XSADDDP, do_helper_XX3, gen_helper_XSADDDP) +TRANS_FLAGS2(VSX, XSSUBDP, do_helper_XX3, gen_helper_XSSUBDP) +TRANS_FLAGS2(VSX, XSMULDP, do_helper_XX3, gen_helper_XSMULDP) +TRANS_FLAGS2(VSX, XSDIVDP, do_helper_XX3, gen_helper_XSDIVDP) + +TRANS_FLAGS2(VSX, XVADDSP, do_helper_XX3, gen_helper_XVADDSP) +TRANS_FLAGS2(VSX, XVSUBSP, do_helper_XX3, gen_helper_XVSUBSP) +TRANS_FLAGS2(VSX, XVMULSP, do_helper_XX3, gen_helper_XVMULSP) +TRANS_FLAGS2(VSX, XVDIVSP, do_helper_XX3, gen_helper_XVDIVSP) + +TRANS_FLAGS2(VSX, XVADDDP, do_helper_XX3, gen_helper_XVADDDP) +TRANS_FLAGS2(VSX, XVSUBDP, do_helper_XX3, gen_helper_XVSUBDP) +TRANS_FLAGS2(VSX, XVMULDP, do_helper_XX3, gen_helper_XVMULDP) +TRANS_FLAGS2(VSX, XVDIVDP, do_helper_XX3, gen_helper_XVDIVDP) + +TRANS_FLAGS2(VSX, XSMAXDP, do_helper_XX3, gen_helper_XSMAXDP) +TRANS_FLAGS2(VSX, XSMINDP, do_helper_XX3, gen_helper_XSMINDP) +TRANS_FLAGS2(VSX, XVMAXSP, do_helper_XX3, gen_helper_XVMAXSP) +TRANS_FLAGS2(VSX, XVMINSP, do_helper_XX3, gen_helper_XVMINSP) +TRANS_FLAGS2(VSX, XVMAXDP, do_helper_XX3, gen_helper_XVMAXDP) +TRANS_FLAGS2(VSX, XVMINDP, do_helper_XX3, gen_helper_XVMINDP) + static bool do_helper_X(arg_X *a, void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) { diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index a3ba094d62..18510d757d 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -153,12 +153,8 @@ GEN_XX2FORM_EO(xvxexpdp, 0x16, 0x1D, 0x00, PPC2_ISA300), GEN_XX2FORM_EO(xvxsigdp, 0x16, 0x1D, 0x01, PPC2_ISA300), GEN_XX2FORM_EO(xvxexpsp, 0x16, 0x1D, 0x08, PPC2_ISA300), -GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX), GEN_VSX_XFORM_300(xsaddqp, 0x04, 0x00, 0x0), -GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX), -GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX), GEN_VSX_XFORM_300(xsmulqp, 0x04, 0x01, 0x0), -GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX), GEN_XX2FORM(xsredp, 0x14, 0x05, PPC2_VSX), GEN_XX2FORM(xssqrtdp, 0x16, 0x04, PPC2_VSX), GEN_XX2FORM(xsrsqrtedp, 0x14, 0x04, PPC2_VSX), @@ -170,8 +166,6 @@ GEN_XX2IFORM(xscmpodp, 0x0C, 0x05, PPC2_VSX), GEN_XX2IFORM(xscmpudp, 0x0C, 0x04, PPC2_VSX), GEN_VSX_XFORM_300(xscmpoqp, 0x04, 0x04, 0x00600001), GEN_VSX_XFORM_300(xscmpuqp, 0x04, 0x14, 0x00600001), -GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX), -GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX), GEN_XX2FORM_EO(xscvdphp, 0x16, 0x15, 0x11, PPC2_ISA300), GEN_XX2FORM(xscvdpsp, 0x12, 0x10, PPC2_VSX), GEN_XX2FORM(xscvdpspn, 0x16, 0x10, PPC2_VSX207), @@ -191,10 +185,6 @@ GEN_XX2FORM(xsrdpim, 0x12, 0x07, PPC2_VSX), GEN_XX2FORM(xsrdpip, 0x12, 0x06, PPC2_VSX), GEN_XX2FORM(xsrdpiz, 0x12, 0x05, PPC2_VSX), -GEN_XX3FORM(xsaddsp, 0x00, 0x00, PPC2_VSX207), -GEN_XX3FORM(xssubsp, 0x00, 0x01, PPC2_VSX207), -GEN_XX3FORM(xsmulsp, 0x00, 0x02, PPC2_VSX207), -GEN_XX3FORM(xsdivsp, 0x00, 0x03, PPC2_VSX207), GEN_VSX_XFORM_300(xsdivqp, 0x04, 0x11, 0x0), GEN_XX2FORM(xsresp, 0x14, 0x01, PPC2_VSX207), GEN_XX2FORM(xsrsp, 0x12, 0x11, PPC2_VSX207), @@ -203,10 +193,6 @@ GEN_XX2FORM(xsrsqrtesp, 0x14, 0x00, PPC2_VSX207), GEN_XX2FORM(xscvsxdsp, 0x10, 0x13, PPC2_VSX207), GEN_XX2FORM(xscvuxdsp, 0x10, 0x12, PPC2_VSX207), -GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX), -GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX), -GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX), -GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX), GEN_XX2FORM(xvredp, 0x14, 0x0D, PPC2_VSX), GEN_XX2FORM(xvsqrtdp, 0x16, 0x0C, PPC2_VSX), GEN_XX2FORM(xvrsqrtedp, 0x14, 0x0C, PPC2_VSX), @@ -220,8 +206,6 @@ GEN_XX3FORM_NAME(xvnmadddp, "xvnmaddadp", 0x04, 0x1C, PPC2_VSX), GEN_XX3FORM_NAME(xvnmadddp, "xvnmaddmdp", 0x04, 0x1D, PPC2_VSX), GEN_XX3FORM_NAME(xvnmsubdp, "xvnmsubadp", 0x04, 0x1E, PPC2_VSX), GEN_XX3FORM_NAME(xvnmsubdp, "xvnmsubmdp", 0x04, 0x1F, PPC2_VSX), -GEN_XX3FORM(xvmaxdp, 0x00, 0x1C, PPC2_VSX), -GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX), GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX), GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX), GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX), @@ -241,10 +225,6 @@ GEN_XX2FORM(xvrdpim, 0x12, 0x0F, PPC2_VSX), GEN_XX2FORM(xvrdpip, 0x12, 0x0E, PPC2_VSX), GEN_XX2FORM(xvrdpiz, 0x12, 0x0D, PPC2_VSX), -GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX), -GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX), -GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX), -GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX), GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX), GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX), GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX), @@ -258,8 +238,6 @@ GEN_XX3FORM_NAME(xvnmaddsp, "xvnmaddasp", 0x04, 0x18, PPC2_VSX), GEN_XX3FORM_NAME(xvnmaddsp, "xvnmaddmsp", 0x04, 0x19, PPC2_VSX), GEN_XX3FORM_NAME(xvnmsubsp, "xvnmsubasp", 0x04, 0x1A, PPC2_VSX), GEN_XX3FORM_NAME(xvnmsubsp, "xvnmsubmsp", 0x04, 0x1B, PPC2_VSX), -GEN_XX3FORM(xvmaxsp, 0x00, 0x18, PPC2_VSX), -GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX), GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX), GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX), GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX), From c1167a9257a22433a16e223a1209c9c72836edee Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Thu, 23 May 2024 15:18:21 +0530 Subject: [PATCH 57/96] target/ppc: Move VSX logical instructions to decodetree. Moving the following instructions to decodetree specification : xxl{and, andc, or, orc, nor, xor, nand, eqv} : XX3-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson Signed-off-by: Nicholas Piggin --- target/ppc/insn32.decode | 11 +++++++++ target/ppc/translate/vsx-impl.c.inc | 37 +++++++++++++---------------- target/ppc/translate/vsx-ops.c.inc | 11 --------- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 1301e5bbc0..4f86b175f1 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -1138,6 +1138,17 @@ XXMFACC 011111 ... -- 00000 ----- 0010110001 - @X_a XXMTACC 011111 ... -- 00001 ----- 0010110001 - @X_a XXSETACCZ 011111 ... -- 00011 ----- 0010110001 - @X_a +## VSX Vector Logical instructions + +XXLAND 111100 ..... ..... ..... 10000010 ... @XX3 +XXLANDC 111100 ..... ..... ..... 10001010 ... @XX3 +XXLOR 111100 ..... ..... ..... 10010010 ... @XX3 +XXLXOR 111100 ..... ..... ..... 10011010 ... @XX3 +XXLNOR 111100 ..... ..... ..... 10100010 ... @XX3 +XXLEQV 111100 ..... ..... ..... 10111010 ... @XX3 +XXLNAND 111100 ..... ..... ..... 10110010 ... @XX3 +XXLORC 111100 ..... ..... ..... 10101010 ... @XX3 + ## VSX GER instruction XVI4GER8 111011 ... -- ..... ..... 00100011 ..- @XX3_at xa=%xx_xa diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 0d16e0f02b..a769f199ce 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -1573,26 +1573,24 @@ static void gen_xxbrw(DisasContext *ctx) set_cpu_vsr(xT(ctx->opcode), xtl, false); } -#define VSX_LOGICAL(name, vece, tcg_op) \ -static void glue(gen_, name)(DisasContext *ctx) \ - { \ - if (unlikely(!ctx->vsx_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VSXU); \ - return; \ - } \ - tcg_op(vece, vsr_full_offset(xT(ctx->opcode)), \ - vsr_full_offset(xA(ctx->opcode)), \ - vsr_full_offset(xB(ctx->opcode)), 16, 16); \ - } +static bool do_logical_op(DisasContext *ctx, arg_XX3 *a, unsigned vece, + void (*helper)(unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t)) +{ + REQUIRE_VSX(ctx); + helper(vece, vsr_full_offset(a->xt), + vsr_full_offset(a->xa), + vsr_full_offset(a->xb), 16, 16); + return true; +} -VSX_LOGICAL(xxland, MO_64, tcg_gen_gvec_and) -VSX_LOGICAL(xxlandc, MO_64, tcg_gen_gvec_andc) -VSX_LOGICAL(xxlor, MO_64, tcg_gen_gvec_or) -VSX_LOGICAL(xxlxor, MO_64, tcg_gen_gvec_xor) -VSX_LOGICAL(xxlnor, MO_64, tcg_gen_gvec_nor) -VSX_LOGICAL(xxleqv, MO_64, tcg_gen_gvec_eqv) -VSX_LOGICAL(xxlnand, MO_64, tcg_gen_gvec_nand) -VSX_LOGICAL(xxlorc, MO_64, tcg_gen_gvec_orc) +TRANS_FLAGS2(VSX, XXLAND, do_logical_op, MO_64, tcg_gen_gvec_and); +TRANS_FLAGS2(VSX, XXLANDC, do_logical_op, MO_64, tcg_gen_gvec_andc); +TRANS_FLAGS2(VSX, XXLOR, do_logical_op, MO_64, tcg_gen_gvec_or); +TRANS_FLAGS2(VSX, XXLXOR, do_logical_op, MO_64, tcg_gen_gvec_xor); +TRANS_FLAGS2(VSX, XXLNOR, do_logical_op, MO_64, tcg_gen_gvec_nor); +TRANS_FLAGS2(VSX207, XXLEQV, do_logical_op, MO_64, tcg_gen_gvec_eqv); +TRANS_FLAGS2(VSX207, XXLNAND, do_logical_op, MO_64, tcg_gen_gvec_nand); +TRANS_FLAGS2(VSX207, XXLORC, do_logical_op, MO_64, tcg_gen_gvec_orc); #define VSX_XXMRG(name, high) \ static void glue(gen_, name)(DisasContext *ctx) \ @@ -2899,4 +2897,3 @@ TRANS64(PMXVF64GERNN, do_ger, gen_helper_XVF64GERNN) #undef GEN_XX2IFORM #undef GEN_XX3_RC_FORM #undef GEN_XX3FORM_DM -#undef VSX_LOGICAL diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index 18510d757d..3c0a70cb7c 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -263,17 +263,6 @@ GEN_XX2FORM_EO(xvcvhpsp, 0x16, 0x1D, 0x18, PPC2_ISA300), GEN_XX2FORM_EO(xvcvsphp, 0x16, 0x1D, 0x19, PPC2_ISA300), GEN_XX2FORM_EO(xxbrq, 0x16, 0x1D, 0x1F, PPC2_ISA300), -#define VSX_LOGICAL(name, opc2, opc3, fl2) \ -GEN_XX3FORM(name, opc2, opc3, fl2) - -VSX_LOGICAL(xxland, 0x8, 0x10, PPC2_VSX), -VSX_LOGICAL(xxlandc, 0x8, 0x11, PPC2_VSX), -VSX_LOGICAL(xxlor, 0x8, 0x12, PPC2_VSX), -VSX_LOGICAL(xxlxor, 0x8, 0x13, PPC2_VSX), -VSX_LOGICAL(xxlnor, 0x8, 0x14, PPC2_VSX), -VSX_LOGICAL(xxleqv, 0x8, 0x17, PPC2_VSX207), -VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207), -VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207), GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX), GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX), GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00), From cff278c9fa4ed6a8c2e5d2aba6c490e6252a6825 Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Tue, 18 Jun 2024 14:28:28 +0530 Subject: [PATCH 58/96] target/ppc: Moving VSX scalar storage access insns to decodetree. Moving the following instructions to decodetree specification : lxs{d, iwa, ibz, ihz, iwz, sp}x : X-form stxs{d, ib, ih, iw, sp}x : X-form The changes were verified by validating that the tcg-ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson Signed-off-by: Nicholas Piggin --- target/ppc/insn32.decode | 13 +++++ target/ppc/translate/vsx-impl.c.inc | 79 +++++++++++++---------------- target/ppc/translate/vsx-ops.c.inc | 11 ---- 3 files changed, 49 insertions(+), 54 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 4f86b175f1..f2661df918 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -993,6 +993,19 @@ STXVRHX 011111 ..... ..... ..... 0010101101 . @X_TSX STXVRWX 011111 ..... ..... ..... 0011001101 . @X_TSX STXVRDX 011111 ..... ..... ..... 0011101101 . @X_TSX +LXSDX 011111 ..... ..... ..... 1001001100 . @X_TSX +LXSIWAX 011111 ..... ..... ..... 0001001100 . @X_TSX +LXSIBZX 011111 ..... ..... ..... 1100001101 . @X_TSX +LXSIHZX 011111 ..... ..... ..... 1100101101 . @X_TSX +LXSIWZX 011111 ..... ..... ..... 0000001100 . @X_TSX +LXSSPX 011111 ..... ..... ..... 1000001100 . @X_TSX + +STXSDX 011111 ..... ..... ..... 1011001100 . @X_TSX +STXSIBX 011111 ..... ..... ..... 1110001101 . @X_TSX +STXSIHX 011111 ..... ..... ..... 1110101101 . @X_TSX +STXSIWX 011111 ..... ..... ..... 0010001100 . @X_TSX +STXSSPX 011111 ..... ..... ..... 1010001100 . @X_TSX + ## VSX Vector Binary Floating-Point Sign Manipulation Instructions XVABSDP 111100 ..... 00000 ..... 111011001 .. @XX2 diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index a769f199ce..de2a26a213 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -24,30 +24,27 @@ static inline TCGv_ptr gen_acc_ptr(int reg) return r; } -#define VSX_LOAD_SCALAR(name, operation) \ -static void gen_##name(DisasContext *ctx) \ -{ \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->vsx_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VSXU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(); \ - gen_set_access_type(ctx, ACCESS_INT); \ - EA = tcg_temp_new(); \ - gen_addr_reg_index(ctx, EA); \ - gen_qemu_##operation(ctx, t0, EA); \ - set_cpu_vsr(xT(ctx->opcode), t0, true); \ - /* NOTE: cpu_vsrl is undefined */ \ +static bool do_lxs(DisasContext *ctx, arg_X *a, + void (*op)(DisasContext *, TCGv_i64, TCGv)) +{ + TCGv EA; + TCGv_i64 t0; + REQUIRE_VSX(ctx); + t0 = tcg_temp_new_i64(); + gen_set_access_type(ctx, ACCESS_INT); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); + op(ctx, t0, EA); + set_cpu_vsr(a->rt, t0, true); + /* NOTE: cpu_vsrl is undefined */ + return true; } -VSX_LOAD_SCALAR(lxsdx, ld64_i64) -VSX_LOAD_SCALAR(lxsiwax, ld32s_i64) -VSX_LOAD_SCALAR(lxsibzx, ld8u_i64) -VSX_LOAD_SCALAR(lxsihzx, ld16u_i64) -VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64) -VSX_LOAD_SCALAR(lxsspx, ld32fs) +TRANS_FLAGS2(VSX, LXSDX, do_lxs, gen_qemu_ld64_i64); +TRANS_FLAGS2(VSX207, LXSIWAX, do_lxs, gen_qemu_ld32s_i64); +TRANS_FLAGS2(ISA300, LXSIBZX, do_lxs, gen_qemu_ld8u_i64); +TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64); +TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64); +TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs); static void gen_lxvd2x(DisasContext *ctx) { @@ -266,29 +263,25 @@ VSX_VECTOR_LOAD_STORE_LENGTH(stxvl) VSX_VECTOR_LOAD_STORE_LENGTH(stxvll) #endif -#define VSX_STORE_SCALAR(name, operation) \ -static void gen_##name(DisasContext *ctx) \ -{ \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->vsx_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VSXU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(); \ - gen_set_access_type(ctx, ACCESS_INT); \ - EA = tcg_temp_new(); \ - gen_addr_reg_index(ctx, EA); \ - get_cpu_vsr(t0, xS(ctx->opcode), true); \ - gen_qemu_##operation(ctx, t0, EA); \ +static bool do_stxs(DisasContext *ctx, arg_X *a, + void (*op)(DisasContext *, TCGv_i64, TCGv)) +{ + TCGv EA; + TCGv_i64 t0; + REQUIRE_VSX(ctx); + t0 = tcg_temp_new_i64(); + gen_set_access_type(ctx, ACCESS_INT); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); + get_cpu_vsr(t0, a->rt, true); + op(ctx, t0, EA); + return true; } -VSX_STORE_SCALAR(stxsdx, st64_i64) - -VSX_STORE_SCALAR(stxsibx, st8_i64) -VSX_STORE_SCALAR(stxsihx, st16_i64) -VSX_STORE_SCALAR(stxsiwx, st32_i64) -VSX_STORE_SCALAR(stxsspx, st32fs) +TRANS_FLAGS2(VSX, STXSDX, do_stxs, gen_qemu_st64_i64); +TRANS_FLAGS2(ISA300, STXSIBX, do_stxs, gen_qemu_st8_i64); +TRANS_FLAGS2(ISA300, STXSIHX, do_stxs, gen_qemu_st16_i64); +TRANS_FLAGS2(VSX207, STXSIWX, do_stxs, gen_qemu_st32_i64); +TRANS_FLAGS2(VSX207, STXSSPX, do_stxs, gen_qemu_st32fs); static void gen_stxvd2x(DisasContext *ctx) { diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index 3c0a70cb7c..d44cb55836 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -1,9 +1,3 @@ -GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX), -GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207), -GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207), -GEN_HANDLER_E(lxsibzx, 0x1F, 0x0D, 0x18, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(lxsihzx, 0x1F, 0x0D, 0x19, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207), GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(lxvwsx, 0x1F, 0x0C, 0x0B, 0, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX), @@ -15,11 +9,6 @@ GEN_HANDLER_E(lxvl, 0x1F, 0x0D, 0x08, 0, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(lxvll, 0x1F, 0x0D, 0x09, 0, PPC_NONE, PPC2_ISA300), #endif -GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX), -GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(stxsihx, 0x1F, 0xD, 0x1D, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(stxsiwx, 0x1F, 0xC, 0x04, 0, PPC_NONE, PPC2_VSX207), -GEN_HANDLER_E(stxsspx, 0x1F, 0xC, 0x14, 0, PPC_NONE, PPC2_VSX207), GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE, PPC2_ISA300), From 29df8d950e20f5caeec137fa20bc1245fb9f702e Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Tue, 18 Jun 2024 14:28:29 +0530 Subject: [PATCH 59/96] target/ppc: Move VSX vector with length storage access insns to decodetree. Moving the following instructions to decodetree specification : {l, st}xvl(l) : X-form The changes were verified by validating that the tcg-ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Also added a new function do_ea_calc_ra to calculate the effective address : EA <- (RA == 0) ? 0 : GPR[RA], which is now used by the above-said insns, and shall be used later by (p){lx, stx}vp insns. Reviewed-by: Richard Henderson Signed-off-by: Chinmay Rath [np: Fix 32-bit build] Signed-off-by: Nicholas Piggin --- target/ppc/helper.h | 8 +-- target/ppc/insn32.decode | 6 ++ target/ppc/mem_helper.c | 8 +-- target/ppc/translate.c | 17 ++++++ target/ppc/translate/vsx-impl.c.inc | 90 ++++++++++++++++++++--------- target/ppc/translate/vsx-ops.c.inc | 8 --- 6 files changed, 94 insertions(+), 43 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 13f20bb243..85be749004 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -277,10 +277,10 @@ DEF_HELPER_3(STVEBX, void, env, avr, tl) DEF_HELPER_3(STVEHX, void, env, avr, tl) DEF_HELPER_3(STVEWX, void, env, avr, tl) #if defined(TARGET_PPC64) -DEF_HELPER_4(lxvl, void, env, tl, vsr, tl) -DEF_HELPER_4(lxvll, void, env, tl, vsr, tl) -DEF_HELPER_4(stxvl, void, env, tl, vsr, tl) -DEF_HELPER_4(stxvll, void, env, tl, vsr, tl) +DEF_HELPER_4(LXVL, void, env, tl, vsr, tl) +DEF_HELPER_4(LXVLL, void, env, tl, vsr, tl) +DEF_HELPER_4(STXVL, void, env, tl, vsr, tl) +DEF_HELPER_4(STXVLL, void, env, tl, vsr, tl) #endif DEF_HELPER_4(vsumsws, void, env, avr, avr, avr) DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index f2661df918..e87b034159 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -1006,6 +1006,12 @@ STXSIHX 011111 ..... ..... ..... 1110101101 . @X_TSX STXSIWX 011111 ..... ..... ..... 0010001100 . @X_TSX STXSSPX 011111 ..... ..... ..... 1010001100 . @X_TSX +LXVL 011111 ..... ..... ..... 0100001101 . @X_TSX +LXVLL 011111 ..... ..... ..... 0100101101 . @X_TSX + +STXVL 011111 ..... ..... ..... 0110001101 . @X_TSX +STXVLL 011111 ..... ..... ..... 0110101101 . @X_TSX + ## VSX Vector Binary Floating-Point Sign Manipulation Instructions XVABSDP 111100 ..... 00000 ..... 111011001 .. @XX2 diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 953dd08d5d..51b137febd 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -475,8 +475,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, \ *xt = t; \ } -VSX_LXVL(lxvl, 0) -VSX_LXVL(lxvll, 1) +VSX_LXVL(LXVL, 0) +VSX_LXVL(LXVLL, 1) #undef VSX_LXVL #define VSX_STXVL(name, lj) \ @@ -504,8 +504,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, \ } \ } -VSX_STXVL(stxvl, 0) -VSX_STXVL(stxvll, 1) +VSX_STXVL(STXVL, 0) +VSX_STXVL(STXVLL, 1) #undef VSX_STXVL #undef GET_NB #endif /* TARGET_PPC64 */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index cba943a49d..46aabce82b 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -2543,6 +2543,7 @@ static inline void gen_align_no_le(DisasContext *ctx) (ctx->opcode & 0x03FF0000) | POWERPC_EXCP_ALIGN_LE); } +/* EA <- {(ra == 0) ? 0 : GPR[ra]} + displ */ static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ) { TCGv ea = tcg_temp_new(); @@ -2557,6 +2558,22 @@ static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ) return ea; } +#if defined(TARGET_PPC64) +/* EA <- (ra == 0) ? 0 : GPR[ra] */ +static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) +{ + TCGv EA = tcg_temp_new(); + if (!ra) { + tcg_gen_movi_tl(EA, 0); + } else if (NARROW_MODE(ctx)) { + tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); + } else { + tcg_gen_mov_tl(EA, cpu_gpr[ra]); + } + return EA; +} +#endif + /*** Integer load ***/ #define DEF_MEMOP(op) ((op) | ctx->default_tcg_memop_mask) #define BSWAP_MEMOP(op) ((op) | (ctx->default_tcg_memop_mask ^ MO_BSWAP)) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index de2a26a213..46bab49215 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -232,36 +232,72 @@ static void gen_lxvb16x(DisasContext *ctx) set_cpu_vsr(xT(ctx->opcode), xtl, false); } -#ifdef TARGET_PPC64 -#define VSX_VECTOR_LOAD_STORE_LENGTH(name) \ -static void gen_##name(DisasContext *ctx) \ -{ \ - TCGv EA; \ - TCGv_ptr xt; \ - \ - if (xT(ctx->opcode) < 32) { \ - if (unlikely(!ctx->vsx_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VSXU); \ - return; \ - } \ - } else { \ - if (unlikely(!ctx->altivec_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VPU); \ - return; \ - } \ - } \ - EA = tcg_temp_new(); \ - xt = gen_vsr_ptr(xT(ctx->opcode)); \ - gen_set_access_type(ctx, ACCESS_INT); \ - gen_addr_register(ctx, EA); \ - gen_helper_##name(tcg_env, EA, xt, cpu_gpr[rB(ctx->opcode)]); \ +#if defined(TARGET_PPC64) +static bool do_ld_st_vl(DisasContext *ctx, arg_X *a, + void (*helper)(TCGv_ptr, TCGv, TCGv_ptr, TCGv)) +{ + TCGv EA; + TCGv_ptr xt; + if (a->rt < 32) { + REQUIRE_VSX(ctx); + } else { + REQUIRE_VECTOR(ctx); + } + xt = gen_vsr_ptr(a->rt); + gen_set_access_type(ctx, ACCESS_INT); + EA = do_ea_calc_ra(ctx, a->ra); + helper(tcg_env, EA, xt, cpu_gpr[a->rb]); + return true; +} +#endif + +static bool trans_LXVL(DisasContext *ctx, arg_LXVL *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + return do_ld_st_vl(ctx, a, gen_helper_LXVL); +#else + qemu_build_not_reached(); +#endif + return true; } -VSX_VECTOR_LOAD_STORE_LENGTH(lxvl) -VSX_VECTOR_LOAD_STORE_LENGTH(lxvll) -VSX_VECTOR_LOAD_STORE_LENGTH(stxvl) -VSX_VECTOR_LOAD_STORE_LENGTH(stxvll) +static bool trans_LXVLL(DisasContext *ctx, arg_LXVLL *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + return do_ld_st_vl(ctx, a, gen_helper_LXVLL); +#else + qemu_build_not_reached(); #endif + return true; +} + +static bool trans_STXVL(DisasContext *ctx, arg_STXVL *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + return do_ld_st_vl(ctx, a, gen_helper_STXVL); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_STXVLL(DisasContext *ctx, arg_STXVLL *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + return do_ld_st_vl(ctx, a, gen_helper_STXVLL); +#else + qemu_build_not_reached(); +#endif + return true; +} static bool do_stxs(DisasContext *ctx, arg_X *a, void (*op)(DisasContext *, TCGv_i64, TCGv)) diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index d44cb55836..7f4326c974 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -4,19 +4,11 @@ GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300), -#if defined(TARGET_PPC64) -GEN_HANDLER_E(lxvl, 0x1F, 0x0D, 0x08, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(lxvll, 0x1F, 0x0D, 0x09, 0, PPC_NONE, PPC2_ISA300), -#endif GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(stxvb16x, 0x1F, 0x0C, 0x1F, 0, PPC_NONE, PPC2_ISA300), -#if defined(TARGET_PPC64) -GEN_HANDLER_E(stxvl, 0x1F, 0x0D, 0x0C, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(stxvll, 0x1F, 0x0D, 0x0D, 0, PPC_NONE, PPC2_ISA300), -#endif GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207), GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207), From 7419dc5b2b5bcc929d91e8920692041a8f6d1977 Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Tue, 18 Jun 2024 14:28:30 +0530 Subject: [PATCH 60/96] target/ppc: Move VSX vector storage access insns to decodetree. Moving the following instructions to decodetree specification: lxv{b16, d2, h8, w4, ds, ws}x : X-form stxv{b16, d2, h8, w4}x : X-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson Signed-off-by: Nicholas Piggin --- target/ppc/insn32.decode | 10 ++ target/ppc/translate/vsx-impl.c.inc | 199 ++++++++++++---------------- target/ppc/translate/vsx-ops.c.inc | 12 -- 3 files changed, 97 insertions(+), 124 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index e87b034159..77869cfb33 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -1006,9 +1006,19 @@ STXSIHX 011111 ..... ..... ..... 1110101101 . @X_TSX STXSIWX 011111 ..... ..... ..... 0010001100 . @X_TSX STXSSPX 011111 ..... ..... ..... 1010001100 . @X_TSX +LXVB16X 011111 ..... ..... ..... 1101101100 . @X_TSX +LXVD2X 011111 ..... ..... ..... 1101001100 . @X_TSX +LXVH8X 011111 ..... ..... ..... 1100101100 . @X_TSX +LXVW4X 011111 ..... ..... ..... 1100001100 . @X_TSX +LXVDSX 011111 ..... ..... ..... 0101001100 . @X_TSX +LXVWSX 011111 ..... ..... ..... 0101101100 . @X_TSX LXVL 011111 ..... ..... ..... 0100001101 . @X_TSX LXVLL 011111 ..... ..... ..... 0100101101 . @X_TSX +STXVB16X 011111 ..... ..... ..... 1111101100 . @X_TSX +STXVD2X 011111 ..... ..... ..... 1111001100 . @X_TSX +STXVH8X 011111 ..... ..... ..... 1110101100 . @X_TSX +STXVW4X 011111 ..... ..... ..... 1110001100 . @X_TSX STXVL 011111 ..... ..... ..... 0110001101 . @X_TSX STXVLL 011111 ..... ..... ..... 0110101101 . @X_TSX diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 46bab49215..e0fb4bad92 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -46,41 +46,37 @@ TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64); TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64); TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs); -static void gen_lxvd2x(DisasContext *ctx) +static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a) { TCGv EA; TCGv_i64 t0; - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, VSX); + t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); gen_qemu_ld64_i64(ctx, t0, EA); - set_cpu_vsr(xT(ctx->opcode), t0, true); + set_cpu_vsr(a->rt, t0, true); tcg_gen_addi_tl(EA, EA, 8); gen_qemu_ld64_i64(ctx, t0, EA); - set_cpu_vsr(xT(ctx->opcode), t0, false); + set_cpu_vsr(a->rt, t0, false); + return true; } -static void gen_lxvw4x(DisasContext *ctx) +static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a) { TCGv EA; - TCGv_i64 xth; - TCGv_i64 xtl; - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } + TCGv_i64 xth, xtl; + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, VSX); + xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); - gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); if (ctx->le_mode) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -97,55 +93,45 @@ static void gen_lxvw4x(DisasContext *ctx) tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); } - set_cpu_vsr(xT(ctx->opcode), xth, true); - set_cpu_vsr(xT(ctx->opcode), xtl, false); + set_cpu_vsr(a->rt, xth, true); + set_cpu_vsr(a->rt, xtl, false); + return true; } -static void gen_lxvwsx(DisasContext *ctx) +static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a) { TCGv EA; TCGv_i32 data; - if (xT(ctx->opcode) < 32) { - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } + if (a->rt < 32) { + REQUIRE_VSX(ctx); } else { - if (unlikely(!ctx->altivec_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VPU); - return; - } + REQUIRE_VECTOR(ctx); } + REQUIRE_INSNS_FLAGS2(ctx, ISA300); gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - - gen_addr_reg_index(ctx, EA); - + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); data = tcg_temp_new_i32(); tcg_gen_qemu_ld_i32(data, EA, ctx->mem_idx, DEF_MEMOP(MO_UL)); - tcg_gen_gvec_dup_i32(MO_UL, vsr_full_offset(xT(ctx->opcode)), 16, 16, data); + tcg_gen_gvec_dup_i32(MO_UL, vsr_full_offset(a->rt), 16, 16, data); + return true; } -static void gen_lxvdsx(DisasContext *ctx) +static bool trans_LXVDSX(DisasContext *ctx, arg_LXVDSX *a) { TCGv EA; TCGv_i64 data; - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, VSX); gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - - gen_addr_reg_index(ctx, EA); - + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); data = tcg_temp_new_i64(); tcg_gen_qemu_ld_i64(data, EA, ctx->mem_idx, DEF_MEMOP(MO_UQ)); - tcg_gen_gvec_dup_i64(MO_UQ, vsr_full_offset(xT(ctx->opcode)), 16, 16, data); + tcg_gen_gvec_dup_i64(MO_UQ, vsr_full_offset(a->rt), 16, 16, data); + return true; } static void gen_bswap16x8(TCGv_i64 outh, TCGv_i64 outl, @@ -184,52 +170,47 @@ static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl, tcg_gen_deposit_i64(outl, outl, lo, 32, 32); } -static void gen_lxvh8x(DisasContext *ctx) +static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a) { TCGv EA; - TCGv_i64 xth; - TCGv_i64 xtl; + TCGv_i64 xth, xtl; + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); - - EA = tcg_temp_new(); - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ); tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); if (ctx->le_mode) { gen_bswap16x8(xth, xtl, xth, xtl); } - set_cpu_vsr(xT(ctx->opcode), xth, true); - set_cpu_vsr(xT(ctx->opcode), xtl, false); + set_cpu_vsr(a->rt, xth, true); + set_cpu_vsr(a->rt, xtl, false); + return true; } -static void gen_lxvb16x(DisasContext *ctx) +static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a) { TCGv EA; - TCGv_i64 xth; - TCGv_i64 xtl; + TCGv_i64 xth, xtl; + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ); tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); - set_cpu_vsr(xT(ctx->opcode), xth, true); - set_cpu_vsr(xT(ctx->opcode), xtl, false); + set_cpu_vsr(a->rt, xth, true); + set_cpu_vsr(a->rt, xtl, false); + return true; } #if defined(TARGET_PPC64) @@ -319,42 +300,39 @@ TRANS_FLAGS2(ISA300, STXSIHX, do_stxs, gen_qemu_st16_i64); TRANS_FLAGS2(VSX207, STXSIWX, do_stxs, gen_qemu_st32_i64); TRANS_FLAGS2(VSX207, STXSSPX, do_stxs, gen_qemu_st32fs); -static void gen_stxvd2x(DisasContext *ctx) +static bool trans_STXVD2X(DisasContext *ctx, arg_STXVD2X *a) { TCGv EA; TCGv_i64 t0; - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, VSX); + t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - gen_addr_reg_index(ctx, EA); - get_cpu_vsr(t0, xS(ctx->opcode), true); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); + get_cpu_vsr(t0, a->rt, true); gen_qemu_st64_i64(ctx, t0, EA); tcg_gen_addi_tl(EA, EA, 8); - get_cpu_vsr(t0, xS(ctx->opcode), false); + get_cpu_vsr(t0, a->rt, false); gen_qemu_st64_i64(ctx, t0, EA); + return true; } -static void gen_stxvw4x(DisasContext *ctx) +static bool trans_STXVW4X(DisasContext *ctx, arg_STXVW4X *a) { TCGv EA; - TCGv_i64 xsh; - TCGv_i64 xsl; + TCGv_i64 xsh, xsl; + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, VSX); - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } xsh = tcg_temp_new_i64(); xsl = tcg_temp_new_i64(); - get_cpu_vsr(xsh, xS(ctx->opcode), true); - get_cpu_vsr(xsl, xS(ctx->opcode), false); + get_cpu_vsr(xsh, a->rt, true); + get_cpu_vsr(xsl, a->rt, false); gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); if (ctx->le_mode) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -371,25 +349,23 @@ static void gen_stxvw4x(DisasContext *ctx) tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ); } + return true; } -static void gen_stxvh8x(DisasContext *ctx) +static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X *a) { TCGv EA; - TCGv_i64 xsh; - TCGv_i64 xsl; + TCGv_i64 xsh, xsl; + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } xsh = tcg_temp_new_i64(); xsl = tcg_temp_new_i64(); - get_cpu_vsr(xsh, xS(ctx->opcode), true); - get_cpu_vsr(xsl, xS(ctx->opcode), false); + get_cpu_vsr(xsh, a->rt, true); + get_cpu_vsr(xsl, a->rt, false); gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); if (ctx->le_mode) { TCGv_i64 outh = tcg_temp_new_i64(); TCGv_i64 outl = tcg_temp_new_i64(); @@ -403,28 +379,27 @@ static void gen_stxvh8x(DisasContext *ctx) tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ); } + return true; } -static void gen_stxvb16x(DisasContext *ctx) +static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a) { TCGv EA; - TCGv_i64 xsh; - TCGv_i64 xsl; + TCGv_i64 xsh, xsl; + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } xsh = tcg_temp_new_i64(); xsl = tcg_temp_new_i64(); - get_cpu_vsr(xsh, xS(ctx->opcode), true); - get_cpu_vsr(xsl, xS(ctx->opcode), false); + get_cpu_vsr(xsh, a->rt, true); + get_cpu_vsr(xsl, a->rt, false); gen_set_access_type(ctx, ACCESS_INT); - EA = tcg_temp_new(); - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ); tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ); + return true; } static void gen_mfvsrwz(DisasContext *ctx) diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index 7f4326c974..91cde088bc 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -1,15 +1,3 @@ -GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX), -GEN_HANDLER_E(lxvwsx, 0x1F, 0x0C, 0x0B, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX), -GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX), -GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300), - -GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX), -GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX), -GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(stxvb16x, 0x1F, 0x0C, 0x1F, 0, PPC_NONE, PPC2_ISA300), - GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207), GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207), GEN_HANDLER_E(mtvsrwz, 0x1F, 0x13, 0x07, 0x0000F800, PPC_NONE, PPC2_VSX207), From e77d736d2a069d462b686f2207df06859abb9ace Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Tue, 18 Jun 2024 14:28:31 +0530 Subject: [PATCH 61/96] target/ppc: Move VSX fp compare insns to decodetree. Moving the following instructions to decodetree specification: xvcmp{eq, gt, ge, ne}{s, d}p : XX3-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson Signed-off-by: Nicholas Piggin --- target/ppc/fpu_helper.c | 16 +++++----- target/ppc/helper.h | 16 +++++----- target/ppc/insn32.decode | 12 ++++++++ target/ppc/translate/vsx-impl.c.inc | 46 +++++++++++++---------------- target/ppc/translate/vsx-ops.c.inc | 18 ----------- 5 files changed, 48 insertions(+), 60 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 3f2e4f5827..230466a87f 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2527,14 +2527,14 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ return crf6; \ } -VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1) -VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1) -VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1) -VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0) -VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1) -VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1) -VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1) -VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0) +VSX_CMP(XVCMPEQDP, 2, float64, VsrD(i), eq, 0, 1) +VSX_CMP(XVCMPGEDP, 2, float64, VsrD(i), le, 1, 1) +VSX_CMP(XVCMPGTDP, 2, float64, VsrD(i), lt, 1, 1) +VSX_CMP(XVCMPNEDP, 2, float64, VsrD(i), eq, 0, 0) +VSX_CMP(XVCMPEQSP, 4, float32, VsrW(i), eq, 0, 1) +VSX_CMP(XVCMPGESP, 4, float32, VsrW(i), le, 1, 1) +VSX_CMP(XVCMPGTSP, 4, float32, VsrW(i), lt, 1, 1) +VSX_CMP(XVCMPNESP, 4, float32, VsrW(i), eq, 0, 0) /* * VSX_CVT_FP_TO_FP - VSX floating point/floating point conversion diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 85be749004..5a77e761bd 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -476,10 +476,10 @@ DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpnedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPEQDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGTDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPNEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvdpsp, void, env, vsr, vsr) DEF_HELPER_3(xvcvdpsxds, void, env, vsr, vsr) DEF_HELPER_3(xvcvdpsxws, void, env, vsr, vsr) @@ -510,10 +510,10 @@ DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPEQSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGTSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPNESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr) DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr) DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 77869cfb33..e53fd2840d 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -241,6 +241,9 @@ &XX3 xt xa xb @XX3 ...... ..... ..... ..... ........ ... &XX3 xt=%xx_xt xa=%xx_xa xb=%xx_xb +&XX3_rc xt xa xb rc:bool +@XX3_rc ...... ..... ..... ..... rc:1 ....... ... &XX3_rc xt=%xx_xt xa=%xx_xa xb=%xx_xb + # 32 bit GER instructions have all mask bits considered 1 &MMIRR_XX3 xa xb xt pmsk xmsk ymsk %xx_at 23:3 @@ -1124,6 +1127,15 @@ XSCMPEQQP 111111 ..... ..... ..... 0001000100 - @X XSCMPGEQP 111111 ..... ..... ..... 0011000100 - @X XSCMPGTQP 111111 ..... ..... ..... 0011100100 - @X +XVCMPEQSP 111100 ..... ..... ..... . 1000011 ... @XX3_rc +XVCMPGTSP 111100 ..... ..... ..... . 1001011 ... @XX3_rc +XVCMPGESP 111100 ..... ..... ..... . 1010011 ... @XX3_rc +XVCMPNESP 111100 ..... ..... ..... . 1011011 ... @XX3_rc +XVCMPEQDP 111100 ..... ..... ..... . 1100011 ... @XX3_rc +XVCMPGTDP 111100 ..... ..... ..... . 1101011 ... @XX3_rc +XVCMPGEDP 111100 ..... ..... ..... . 1110011 ... @XX3_rc +XVCMPNEDP 111100 ..... ..... ..... . 1111011 ... @XX3_rc + XSMAXDP 111100 ..... ..... ..... 10100000 ... @XX3 XSMINDP 111100 ..... ..... ..... 10101000 ... @XX3 diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index e0fb4bad92..26ebf3fedf 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -792,34 +792,28 @@ static bool do_xvcpsgn(DisasContext *ctx, arg_XX3 *a, unsigned vece) TRANS(XVCPSGNSP, do_xvcpsgn, MO_32) TRANS(XVCPSGNDP, do_xvcpsgn, MO_64) -#define VSX_CMP(name, op1, op2, inval, type) \ -static void gen_##name(DisasContext *ctx) \ -{ \ - TCGv_i32 ignored; \ - TCGv_ptr xt, xa, xb; \ - if (unlikely(!ctx->vsx_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VSXU); \ - return; \ - } \ - xt = gen_vsr_ptr(xT(ctx->opcode)); \ - xa = gen_vsr_ptr(xA(ctx->opcode)); \ - xb = gen_vsr_ptr(xB(ctx->opcode)); \ - if ((ctx->opcode >> (31 - 21)) & 1) { \ - gen_helper_##name(cpu_crf[6], tcg_env, xt, xa, xb); \ - } else { \ - ignored = tcg_temp_new_i32(); \ - gen_helper_##name(ignored, tcg_env, xt, xa, xb); \ - } \ +static bool do_cmp(DisasContext *ctx, arg_XX3_rc *a, + void (*helper)(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + TCGv_i32 dest; + TCGv_ptr xt, xa, xb; + REQUIRE_VSX(ctx); + xt = gen_vsr_ptr(a->xt); + xa = gen_vsr_ptr(a->xa); + xb = gen_vsr_ptr(a->xb); + dest = a->rc ? cpu_crf[6] : tcg_temp_new_i32(); + helper(dest, tcg_env, xt, xa, xb); + return true; } -VSX_CMP(xvcmpeqdp, 0x0C, 0x0C, 0, PPC2_VSX) -VSX_CMP(xvcmpgedp, 0x0C, 0x0E, 0, PPC2_VSX) -VSX_CMP(xvcmpgtdp, 0x0C, 0x0D, 0, PPC2_VSX) -VSX_CMP(xvcmpnedp, 0x0C, 0x0F, 0, PPC2_ISA300) -VSX_CMP(xvcmpeqsp, 0x0C, 0x08, 0, PPC2_VSX) -VSX_CMP(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX) -VSX_CMP(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX) -VSX_CMP(xvcmpnesp, 0x0C, 0x0B, 0, PPC2_VSX) +TRANS_FLAGS2(VSX, XVCMPEQSP, do_cmp, gen_helper_XVCMPEQSP); +TRANS_FLAGS2(VSX, XVCMPGTSP, do_cmp, gen_helper_XVCMPGTSP); +TRANS_FLAGS2(VSX, XVCMPGESP, do_cmp, gen_helper_XVCMPGESP); +TRANS_FLAGS2(ISA300, XVCMPNESP, do_cmp, gen_helper_XVCMPNESP); +TRANS_FLAGS2(VSX, XVCMPEQDP, do_cmp, gen_helper_XVCMPEQDP); +TRANS_FLAGS2(VSX, XVCMPGTDP, do_cmp, gen_helper_XVCMPGTDP); +TRANS_FLAGS2(VSX, XVCMPGEDP, do_cmp, gen_helper_XVCMPGEDP); +TRANS_FLAGS2(ISA300, XVCMPNEDP, do_cmp, gen_helper_XVCMPNEDP); static bool trans_XSCVQPDP(DisasContext *ctx, arg_X_tb_rc *a) { diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index 91cde088bc..e553b5b8fa 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -43,16 +43,6 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 1, PPC_NONE, fl2), \ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 1, PPC_NONE, fl2), \ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 1, PPC_NONE, fl2) -#define GEN_XX3_RC_FORM(name, opc2, opc3, fl2) \ -GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x00, 0, PPC_NONE, fl2), \ -GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x00, 0, PPC_NONE, fl2), \ -GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x00, 0, PPC_NONE, fl2), \ -GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x00, 0, PPC_NONE, fl2), \ -GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x10, 0, PPC_NONE, fl2), \ -GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x10, 0, PPC_NONE, fl2), \ -GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x10, 0, PPC_NONE, fl2), \ -GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x10, 0, PPC_NONE, fl2) - #define GEN_XX3FORM_DM(name, opc2, opc3) \ GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\ GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\ @@ -175,10 +165,6 @@ GEN_XX3FORM_NAME(xvnmadddp, "xvnmaddadp", 0x04, 0x1C, PPC2_VSX), GEN_XX3FORM_NAME(xvnmadddp, "xvnmaddmdp", 0x04, 0x1D, PPC2_VSX), GEN_XX3FORM_NAME(xvnmsubdp, "xvnmsubadp", 0x04, 0x1E, PPC2_VSX), GEN_XX3FORM_NAME(xvnmsubdp, "xvnmsubmdp", 0x04, 0x1F, PPC2_VSX), -GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX), -GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX), -GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX), -GEN_XX3_RC_FORM(xvcmpnedp, 0x0C, 0x0F, PPC2_ISA300), GEN_XX2FORM(xvcvdpsp, 0x12, 0x18, PPC2_VSX), GEN_XX2FORM(xvcvdpsxds, 0x10, 0x1D, PPC2_VSX), GEN_XX2FORM(xvcvdpsxws, 0x10, 0x0D, PPC2_VSX), @@ -207,10 +193,6 @@ GEN_XX3FORM_NAME(xvnmaddsp, "xvnmaddasp", 0x04, 0x18, PPC2_VSX), GEN_XX3FORM_NAME(xvnmaddsp, "xvnmaddmsp", 0x04, 0x19, PPC2_VSX), GEN_XX3FORM_NAME(xvnmsubsp, "xvnmsubasp", 0x04, 0x1A, PPC2_VSX), GEN_XX3FORM_NAME(xvnmsubsp, "xvnmsubmsp", 0x04, 0x1B, PPC2_VSX), -GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX), -GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX), -GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX), -GEN_XX3_RC_FORM(xvcmpnesp, 0x0C, 0x0B, PPC2_ISA300), GEN_XX2FORM(xvcvspdp, 0x12, 0x1C, PPC2_VSX), GEN_XX2FORM(xvcvspsxds, 0x10, 0x19, PPC2_VSX), GEN_XX2FORM(xvcvspsxws, 0x10, 0x09, PPC2_VSX), From bf15bf0a1d07913f22e9e82a0b829d45efc69195 Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Tue, 9 Jul 2024 17:13:39 +0530 Subject: [PATCH 62/96] target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc. Those functions are used to ld/st data to and from Altivec registers, in 64 bits chunks, and are only used in vmx-impl.c.inc file, hence the clean-up movement. Reviewed-by: Richard Henderson Signed-off-by: Chinmay Rath Signed-off-by: Nicholas Piggin --- target/ppc/translate.c | 10 ---------- target/ppc/translate/vmx-impl.c.inc | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 46aabce82b..71513ba964 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -5558,16 +5558,6 @@ static inline void set_fpr(int regno, TCGv_i64 src) tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, vsr64_offset(regno, false)); } -static inline void get_avr64(TCGv_i64 dst, int regno, bool high) -{ - tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high)); -} - -static inline void set_avr64(int regno, TCGv_i64 src, bool high) -{ - tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); -} - /* * Helpers for decodetree used by !function for decoding arguments. */ diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 152bcde0e3..a182d2cf81 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -14,6 +14,16 @@ static inline TCGv_ptr gen_avr_ptr(int reg) return r; } +static inline void get_avr64(TCGv_i64 dst, int regno, bool high) +{ + tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high)); +} + +static inline void set_avr64(int regno, TCGv_i64 src, bool high) +{ + tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); +} + static bool trans_LVX(DisasContext *ctx, arg_X *a) { TCGv EA; From acbdee4588d972b8553b2c5c9ec4c17c2fe399a7 Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Tue, 9 Jul 2024 17:13:40 +0530 Subject: [PATCH 63/96] target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128. Updated instructions {l, st}vx to use tcg_gen_qemu_ld/st_i128, instead of using 64 bits loads/stores in succession. Introduced functions {get, set}_avr_full in vmx-impl.c.inc to facilitate the above, and potential future usage. Reviewed-by: Richard Henderson Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath Signed-off-by: Nicholas Piggin --- target/ppc/translate/vmx-impl.c.inc | 42 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index a182d2cf81..70d0ad2e71 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -24,25 +24,29 @@ static inline void set_avr64(int regno, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); } +static inline void get_avr_full(TCGv_i128 dst, int regno) +{ + tcg_gen_ld_i128(dst, tcg_env, avr_full_offset(regno)); +} + +static inline void set_avr_full(int regno, TCGv_i128 src) +{ + tcg_gen_st_i128(src, tcg_env, avr_full_offset(regno)); +} + static bool trans_LVX(DisasContext *ctx, arg_X *a) { TCGv EA; - TCGv_i64 avr; + TCGv_i128 avr; REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); gen_set_access_type(ctx, ACCESS_INT); - avr = tcg_temp_new_i64(); + avr = tcg_temp_new_i128(); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_andi_tl(EA, EA, ~0xf); - /* - * We only need to swap high and low halves. gen_qemu_ld64_i64 - * does necessary 64-bit byteswap already. - */ - gen_qemu_ld64_i64(ctx, avr, EA); - set_avr64(a->rt, avr, !ctx->le_mode); - tcg_gen_addi_tl(EA, EA, 8); - gen_qemu_ld64_i64(ctx, avr, EA); - set_avr64(a->rt, avr, ctx->le_mode); + tcg_gen_qemu_ld_i128(avr, EA, ctx->mem_idx, + DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR)); + set_avr_full(a->rt, avr); return true; } @@ -56,22 +60,16 @@ static bool trans_LVXL(DisasContext *ctx, arg_LVXL *a) static bool trans_STVX(DisasContext *ctx, arg_STVX *a) { TCGv EA; - TCGv_i64 avr; + TCGv_i128 avr; REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); gen_set_access_type(ctx, ACCESS_INT); - avr = tcg_temp_new_i64(); + avr = tcg_temp_new_i128(); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_andi_tl(EA, EA, ~0xf); - /* - * We only need to swap high and low halves. gen_qemu_st64_i64 - * does necessary 64-bit byteswap already. - */ - get_avr64(avr, a->rt, !ctx->le_mode); - gen_qemu_st64_i64(ctx, avr, EA); - tcg_gen_addi_tl(EA, EA, 8); - get_avr64(avr, a->rt, ctx->le_mode); - gen_qemu_st64_i64(ctx, avr, EA); + get_avr_full(avr, a->rt); + tcg_gen_qemu_st_i128(avr, EA, ctx->mem_idx, + DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR)); return true; } From 625b58fde83d226c6ea61004edad04858481c0e4 Mon Sep 17 00:00:00 2001 From: Chinmay Rath Date: Tue, 9 Jul 2024 17:13:41 +0530 Subject: [PATCH 64/96] target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128. Updated many VSX instructions to use tcg_gen_qemu_ld/st_i128, instead of using tcg_gen_qemu_ld/st_i64 consecutively. Introduced functions {get,set}_vsr_full to facilitate the above & for future use. Reviewed-by: Richard Henderson Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath Signed-off-by: Nicholas Piggin --- target/ppc/translate/vsx-impl.c.inc | 74 +++++++++++++---------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 26ebf3fedf..40a87ddc4a 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -10,6 +10,16 @@ static inline void set_cpu_vsr(int n, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high)); } +static inline void get_vsr_full(TCGv_i128 dst, int reg) +{ + tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg)); +} + +static inline void set_vsr_full(int reg, TCGv_i128 src) +{ + tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg)); +} + static inline TCGv_ptr gen_vsr_ptr(int reg) { TCGv_ptr r = tcg_temp_new_ptr(); @@ -196,20 +206,17 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a) static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a) { TCGv EA; - TCGv_i64 xth, xtl; + TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); - xth = tcg_temp_new_i64(); - xtl = tcg_temp_new_i64(); + data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); - tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ); - tcg_gen_addi_tl(EA, EA, 8); - tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); - set_cpu_vsr(a->rt, xth, true); - set_cpu_vsr(a->rt, xtl, false); + tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx, + MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR); + set_vsr_full(a->rt, data); return true; } @@ -385,20 +392,17 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X *a) static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a) { TCGv EA; - TCGv_i64 xsh, xsl; + TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); - xsh = tcg_temp_new_i64(); - xsl = tcg_temp_new_i64(); - get_cpu_vsr(xsh, a->rt, true); - get_cpu_vsr(xsl, a->rt, false); + data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); - tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ); - tcg_gen_addi_tl(EA, EA, 8); - tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ); + get_vsr_full(data, a->rt); + tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx, + MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR); return true; } @@ -2175,13 +2179,13 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, int rt, bool store, bool paired) { TCGv ea; - TCGv_i64 xt; + TCGv_i128 data; MemOp mop; int rt1, rt2; - xt = tcg_temp_new_i64(); + data = tcg_temp_new_i128(); - mop = DEF_MEMOP(MO_UQ); + mop = DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR); gen_set_access_type(ctx, ACCESS_INT); ea = do_ea_calc(ctx, ra, displ); @@ -2195,32 +2199,20 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, } if (store) { - get_cpu_vsr(xt, rt1, !ctx->le_mode); - tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); - gen_addr_add(ctx, ea, ea, 8); - get_cpu_vsr(xt, rt1, ctx->le_mode); - tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); + get_vsr_full(data, rt1); + tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); if (paired) { - gen_addr_add(ctx, ea, ea, 8); - get_cpu_vsr(xt, rt2, !ctx->le_mode); - tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); - gen_addr_add(ctx, ea, ea, 8); - get_cpu_vsr(xt, rt2, ctx->le_mode); - tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); + gen_addr_add(ctx, ea, ea, 16); + get_vsr_full(data, rt2); + tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); } } else { - tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); - set_cpu_vsr(rt1, xt, !ctx->le_mode); - gen_addr_add(ctx, ea, ea, 8); - tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); - set_cpu_vsr(rt1, xt, ctx->le_mode); + tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); + set_vsr_full(rt1, data); if (paired) { - gen_addr_add(ctx, ea, ea, 8); - tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); - set_cpu_vsr(rt2, xt, !ctx->le_mode); - gen_addr_add(ctx, ea, ea, 8); - tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); - set_cpu_vsr(rt2, xt, ctx->le_mode); + gen_addr_add(ctx, ea, ea, 16); + tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); + set_vsr_full(rt2, data); } } return true; From 698faf3304805d8951bb0de0ab08f5d2dbeae7cf Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:36 +0200 Subject: [PATCH 65/96] target/ppc: Reorganise and rename ppc_hash32_pp_prot() Reorganise ppc_hash32_pp_prot() swapping the if legs so it does not test for negative first and clean up to make it shorter. Also rename it to ppc_hash32_prot(). Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu-hash32.c | 2 +- target/ppc/mmu-hash32.h | 43 ++++++++++++++++------------------------- target/ppc/mmu_common.c | 2 +- 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index d5f2057eb1..8a446c8a7d 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -45,7 +45,7 @@ static int ppc_hash32_pte_prot(int mmu_idx, key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS)); pp = pte.pte1 & HPTE32_R_PP; - return ppc_hash32_pp_prot(key, pp, !!(sr & SR32_NX)); + return ppc_hash32_prot(key, pp, !!(sr & SR32_NX)); } static target_ulong hash32_bat_size(int mmu_idx, diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h index f0ce6951b4..bc4eedbecc 100644 --- a/target/ppc/mmu-hash32.h +++ b/target/ppc/mmu-hash32.h @@ -102,49 +102,40 @@ static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); } -static inline int ppc_hash32_pp_prot(bool key, int pp, bool nx) +static inline int ppc_hash32_prot(bool key, int pp, bool nx) { int prot; - if (key == 0) { - switch (pp) { - case 0x0: - case 0x1: - case 0x2: - prot = PAGE_READ | PAGE_WRITE; - break; - - case 0x3: - prot = PAGE_READ; - break; - - default: - abort(); - } - } else { + if (key) { switch (pp) { case 0x0: prot = 0; break; - case 0x1: case 0x3: prot = PAGE_READ; break; - case 0x2: prot = PAGE_READ | PAGE_WRITE; break; - default: - abort(); + g_assert_not_reached(); + } + } else { + switch (pp) { + case 0x0: + case 0x1: + case 0x2: + prot = PAGE_READ | PAGE_WRITE; + break; + case 0x3: + prot = PAGE_READ; + break; + default: + g_assert_not_reached(); } } - if (nx == 0) { - prot |= PAGE_EXEC; - } - - return prot; + return nx ? prot : prot | PAGE_EXEC; } typedef struct { diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index e2542694f0..08c5b61f76 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -120,7 +120,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, } /* Keep the matching PTE information */ ctx->raddr = pte1; - ctx->prot = ppc_hash32_pp_prot(ctx->key, pp, ctx->nx); + ctx->prot = ppc_hash32_prot(ctx->key, pp, ctx->nx); if (check_prot_access_type(ctx->prot, access_type)) { /* Access granted */ qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); From 5a902297eedfcef267e525df1e0cf64d95a7d885 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:37 +0200 Subject: [PATCH 66/96] target/ppc/mmu_common.c: Remove local name for a constant The mmask local variable is a less descriptive local name for a constant. Drop it and use the constant directly in the two places it is needed. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 08c5b61f76..2618cdec6a 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -98,7 +98,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, target_ulong pte1, int h, MMUAccessType access_type) { - target_ulong ptem, mmask; + target_ulong ptem; int ret, pteh, ptev, pp; ret = -1; @@ -108,12 +108,11 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, if (ptev && h == pteh) { /* Check vsid & api */ ptem = pte0 & PTE_PTEM_MASK; - mmask = PTE_CHECK_MASK; pp = pte1 & 0x00000003; if (ptem == ctx->ptem) { if (ctx->raddr != (hwaddr)-1ULL) { /* all matches should have equal RPN, WIMG & PP */ - if ((ctx->raddr & mmask) != (pte1 & mmask)) { + if ((ctx->raddr & PTE_CHECK_MASK) != (pte1 & PTE_CHECK_MASK)) { qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n"); return -3; } From 15465dd8b9e29d2c18d5eecfa0ba0fbdc6c8e511 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:38 +0200 Subject: [PATCH 67/96] target/ppc/mmu_common.c: Remove single use local variable The ptem variable in ppc6xx_tlb_pte_check() is used only once, simplify by removing it as the value is already clear itself without adding a local name for it. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 2618cdec6a..371ec24485 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -98,7 +98,6 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, target_ulong pte1, int h, MMUAccessType access_type) { - target_ulong ptem; int ret, pteh, ptev, pp; ret = -1; @@ -107,9 +106,8 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, pteh = (pte0 >> 6) & 1; if (ptev && h == pteh) { /* Check vsid & api */ - ptem = pte0 & PTE_PTEM_MASK; pp = pte1 & 0x00000003; - if (ptem == ctx->ptem) { + if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) { if (ctx->raddr != (hwaddr)-1ULL) { /* all matches should have equal RPN, WIMG & PP */ if ((ctx->raddr & PTE_CHECK_MASK) != (pte1 & PTE_CHECK_MASK)) { From f6b50257c7c7297be6bcafe8ff977a38c965d0c0 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:39 +0200 Subject: [PATCH 68/96] target/ppc/mmu_common.c: Remove single use local variable The ptev variable in ppc6xx_tlb_pte_check() is used only once and just obfuscates an otherwise clear value. Get rid of it. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 371ec24485..16578f7fa5 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -98,13 +98,12 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, target_ulong pte1, int h, MMUAccessType access_type) { - int ret, pteh, ptev, pp; + int ret, pteh, pp; ret = -1; /* Check validity and table match */ - ptev = pte_is_valid(pte0); pteh = (pte0 >> 6) & 1; - if (ptev && h == pteh) { + if (pte_is_valid(pte0) && h == pteh) { /* Check vsid & api */ pp = pte1 & 0x00000003; if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) { From 3208c36ad34213eee36b1427d8cb944cfa0a192c Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:40 +0200 Subject: [PATCH 69/96] target/ppc/mmu_common.c: Remove another single use local variable In ppc6xx_tlb_pte_check() the pteh variable is used only once to compare to the h parameter of the function. Inline its value and use pteh name for the function parameter which is more descriptive. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 16578f7fa5..b21f52290f 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -95,15 +95,14 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, } static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, - target_ulong pte1, int h, + target_ulong pte1, int pteh, MMUAccessType access_type) { - int ret, pteh, pp; + int ret, pp; ret = -1; /* Check validity and table match */ - pteh = (pte0 >> 6) & 1; - if (pte_is_valid(pte0) && h == pteh) { + if (pte_is_valid(pte0) && ((pte0 >> 6) & 1) == pteh) { /* Check vsid & api */ pp = pte1 & 0x00000003; if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) { From 7ee01cf8632e0666f933a88ddd97315cc17cc4e0 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:41 +0200 Subject: [PATCH 70/96] target/ppc/mmu_common.c: Remove yet another single use local variable In ppc6xx_tlb_pte_check() the pp variable is used only once to pass it to a function parameter with the same name. Remove the local and inline the value. Also use named constant for the hex value to make it clearer. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index b21f52290f..799d2ced9b 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -98,13 +98,12 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, target_ulong pte1, int pteh, MMUAccessType access_type) { - int ret, pp; + int ret; ret = -1; /* Check validity and table match */ if (pte_is_valid(pte0) && ((pte0 >> 6) & 1) == pteh) { /* Check vsid & api */ - pp = pte1 & 0x00000003; if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) { if (ctx->raddr != (hwaddr)-1ULL) { /* all matches should have equal RPN, WIMG & PP */ @@ -115,7 +114,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, } /* Keep the matching PTE information */ ctx->raddr = pte1; - ctx->prot = ppc_hash32_prot(ctx->key, pp, ctx->nx); + ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, ctx->nx); if (check_prot_access_type(ctx->prot, access_type)) { /* Access granted */ qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); From 0e65cea1bd33d4f2917b272c3b03e0eeb8e7b6fd Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:42 +0200 Subject: [PATCH 71/96] target/ppc/mmu_common.c: Return directly in ppc6xx_tlb_pte_check() Instead of using a local ret variable return directly and remove the local. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 799d2ced9b..a5ae11394d 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -98,9 +98,6 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, target_ulong pte1, int pteh, MMUAccessType access_type) { - int ret; - - ret = -1; /* Check validity and table match */ if (pte_is_valid(pte0) && ((pte0 >> 6) & 1) == pteh) { /* Check vsid & api */ @@ -118,16 +115,15 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, if (check_prot_access_type(ctx->prot, access_type)) { /* Access granted */ qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); - ret = 0; + return 0; } else { /* Access right violation */ qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n"); - ret = -2; + return -2; } } } - - return ret; + return -1; } static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p, From 9e2d6802b5c7d15d0d82bb7c9370ebd3df7492ac Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:43 +0200 Subject: [PATCH 72/96] target/ppc/mmu_common.c: Simplify ppc6xx_tlb_pte_check() Invert conditions to avoid deep nested ifs and return early instead. Remove some obvious comments that don't add more clarity. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 43 ++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index a5ae11394d..28adb3ca10 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -99,31 +99,26 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, MMUAccessType access_type) { /* Check validity and table match */ - if (pte_is_valid(pte0) && ((pte0 >> 6) & 1) == pteh) { - /* Check vsid & api */ - if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) { - if (ctx->raddr != (hwaddr)-1ULL) { - /* all matches should have equal RPN, WIMG & PP */ - if ((ctx->raddr & PTE_CHECK_MASK) != (pte1 & PTE_CHECK_MASK)) { - qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n"); - return -3; - } - } - /* Keep the matching PTE information */ - ctx->raddr = pte1; - ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, ctx->nx); - if (check_prot_access_type(ctx->prot, access_type)) { - /* Access granted */ - qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); - return 0; - } else { - /* Access right violation */ - qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n"); - return -2; - } - } + if (!pte_is_valid(pte0) || ((pte0 >> 6) & 1) != pteh || + (pte0 & PTE_PTEM_MASK) != ctx->ptem) { + return -1; + } + /* all matches should have equal RPN, WIMG & PP */ + if (ctx->raddr != (hwaddr)-1ULL && + (ctx->raddr & PTE_CHECK_MASK) != (pte1 & PTE_CHECK_MASK)) { + qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n"); + return -3; + } + /* Keep the matching PTE information */ + ctx->raddr = pte1; + ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, ctx->nx); + if (check_prot_access_type(ctx->prot, access_type)) { + qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); + return 0; + } else { + qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n"); + return -2; } - return -1; } static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p, From 0e2d7fc817678d8eedd8cae33bb7c191887466c8 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:44 +0200 Subject: [PATCH 73/96] target/ppc/mmu_common.c: Remove unused field from mmu_ctx_t The eaddr field of mmu_ctx_t is set once but never used so can be removed. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 28adb3ca10..0a07023f48 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -40,7 +40,6 @@ /* Context used internally during MMU translations */ typedef struct { hwaddr raddr; /* Real address */ - hwaddr eaddr; /* Effective address */ int prot; /* Protection bits */ hwaddr hash[2]; /* Pagetable hash values */ target_ulong ptem; /* Virtual segment ID | API */ @@ -348,7 +347,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, /* Perform segment based translation when no BATs matched */ pr = FIELD_EX64(env->msr, MSR, PR); - ctx->eaddr = eaddr; sr = env->sr[eaddr >> 28]; ctx->key = (((sr & 0x20000000) && pr) || From f6f8838b055d231ee4bfc31ddaac95fae20834b6 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:45 +0200 Subject: [PATCH 74/96] target/ppc/mmu_common.c: Remove hash field from mmu_ctx_t Return hash value via a parameter and remove it from mmu_ctx.t. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 0a07023f48..e3537c63c0 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -41,7 +41,6 @@ typedef struct { hwaddr raddr; /* Real address */ int prot; /* Protection bits */ - hwaddr hash[2]; /* Pagetable hash values */ target_ulong ptem; /* Virtual segment ID | API */ int key; /* Access key */ int nx; /* Non-execute area */ @@ -331,7 +330,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, } static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong eaddr, + target_ulong eaddr, hwaddr *hashp, MMUAccessType access_type, int type) { PowerPCCPU *cpu = env_archcpu(env); @@ -379,8 +378,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx " htab_mask " HWADDR_FMT_plx " hash " HWADDR_FMT_plx "\n", ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash); - ctx->hash[0] = hash; - ctx->hash[1] = ~hash; + *hashp = hash; /* Initialize real address with an invalid value */ ctx->raddr = (hwaddr)-1ULL; @@ -761,8 +759,8 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; mmu_ctx_t ctx; - int type; - int ret; + hwaddr hash = 0; /* init to 0 to avoid used uninit warning */ + int type, ret; if (ppc_real_mode_xlate(cpu, eaddr, access_type, raddrp, psizep, protp)) { return true; @@ -779,9 +777,8 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, } ctx.prot = 0; - ctx.hash[0] = 0; - ctx.hash[1] = 0; - ret = mmu6xx_get_physical_address(env, &ctx, eaddr, access_type, type); + ret = mmu6xx_get_physical_address(env, &ctx, eaddr, &hash, + access_type, type); if (ret == 0) { *raddrp = ctx.raddr; *protp = ctx.prot; @@ -834,9 +831,9 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, tlb_miss: env->error_code |= ctx.key << 19; env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) + - get_pteg_offset32(cpu, ctx.hash[0]); + get_pteg_offset32(cpu, hash); env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) + - get_pteg_offset32(cpu, ctx.hash[1]); + get_pteg_offset32(cpu, ~hash); break; case -2: /* Access rights violation */ From f8e0cc94192bd040421ff704a8efdc3a83391ffe Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:46 +0200 Subject: [PATCH 75/96] target/ppc/mmu_common.c: Remove pte_update_flags() This function is used only once, its return value is ignored and one of its parameter is a return value from a previous call. It is better to inline it in the caller and remove it. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index e3537c63c0..c4902b7632 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -119,39 +119,14 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, } } -static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p, - int ret, MMUAccessType access_type) -{ - int store = 0; - - /* Update page flags */ - if (!(*pte1p & 0x00000100)) { - /* Update accessed flag */ - *pte1p |= 0x00000100; - store = 1; - } - if (!(*pte1p & 0x00000080)) { - if (access_type == MMU_DATA_STORE && ret == 0) { - /* Update changed flag */ - *pte1p |= 0x00000080; - store = 1; - } else { - /* Force page fault for first write access */ - ctx->prot &= ~PAGE_WRITE; - } - } - - return store; -} - /* Software driven TLB helpers */ static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr, MMUAccessType access_type) { ppc6xx_tlb_t *tlb; - int nr, best, way; - int ret; + target_ulong *pte1p; + int nr, best, way, ret; best = -1; ret = -1; /* No TLB found */ @@ -204,7 +179,17 @@ done: " prot=%01x ret=%d\n", ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret); /* Update page flags */ - pte_update_flags(ctx, &env->tlb.tlb6[best].pte1, ret, access_type); + pte1p = &env->tlb.tlb6[best].pte1; + *pte1p |= 0x00000100; /* Update accessed flag */ + if (!(*pte1p & 0x00000080)) { + if (access_type == MMU_DATA_STORE && ret == 0) { + /* Update changed flag */ + *pte1p |= 0x00000080; + } else { + /* Force page fault for first write access */ + ctx->prot &= ~PAGE_WRITE; + } + } } #if defined(DUMP_PAGE_TABLES) if (qemu_loglevel_mask(CPU_LOG_MMU)) { From 691cf34f216141138bb0289735a762dd7d812137 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:47 +0200 Subject: [PATCH 76/96] target/ppc/mmu_common.c: Remove nx field from mmu_ctx_t Pass it as a parameter instead. Also use named constants instead of hex values when extracting bits from SR. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index c4902b7632..9f402a979d 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -43,7 +43,6 @@ typedef struct { int prot; /* Protection bits */ target_ulong ptem; /* Virtual segment ID | API */ int key; /* Access key */ - int nx; /* Non-execute area */ } mmu_ctx_t; void ppc_store_sdr1(CPUPPCState *env, target_ulong value) @@ -94,7 +93,7 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, target_ulong pte1, int pteh, - MMUAccessType access_type) + MMUAccessType access_type, bool nx) { /* Check validity and table match */ if (!pte_is_valid(pte0) || ((pte0 >> 6) & 1) != pteh || @@ -109,7 +108,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, } /* Keep the matching PTE information */ ctx->raddr = pte1; - ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, ctx->nx); + ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, nx); if (check_prot_access_type(ctx->prot, access_type)) { qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); return 0; @@ -121,8 +120,9 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, /* Software driven TLB helpers */ -static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong eaddr, MMUAccessType access_type) +static int ppc6xx_tlb_check(CPUPPCState *env, + mmu_ctx_t *ctx, target_ulong eaddr, + MMUAccessType access_type, bool nx) { ppc6xx_tlb_t *tlb; target_ulong *pte1p; @@ -150,7 +150,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx, access_type == MMU_DATA_STORE ? 'S' : 'L', access_type == MMU_INST_FETCH ? 'I' : 'D'); switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1, - 0, access_type)) { + 0, access_type, nx)) { case -2: /* Access violation */ ret = -2; @@ -322,7 +322,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, hwaddr hash; target_ulong vsid, sr, pgidx; int ds, target_page_bits; - bool pr; + bool pr, nx; /* First try to find a BAT entry if there are any */ if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) { @@ -336,8 +336,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, ctx->key = (((sr & 0x20000000) && pr) || ((sr & 0x40000000) && !pr)) ? 1 : 0; ds = sr & 0x80000000 ? 1 : 0; - ctx->nx = sr & 0x10000000 ? 1 : 0; - vsid = sr & 0x00FFFFFF; + nx = sr & SR32_NX; + vsid = sr & SR32_VSID; target_page_bits = TARGET_PAGE_BITS; qemu_log_mask(CPU_LOG_MMU, "Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx @@ -352,10 +352,10 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, ctx->ptem = (vsid << 7) | (pgidx >> 10); qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid " - TARGET_FMT_lx "\n", ctx->key, ds, ctx->nx, vsid); + TARGET_FMT_lx "\n", ctx->key, ds, nx, vsid); if (!ds) { /* Check if instruction fetch is allowed, if needed */ - if (type == ACCESS_CODE && ctx->nx) { + if (type == ACCESS_CODE && nx) { qemu_log_mask(CPU_LOG_MMU, "No access allowed\n"); return -3; } @@ -368,7 +368,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, /* Initialize real address with an invalid value */ ctx->raddr = (hwaddr)-1ULL; /* Software TLB search */ - return ppc6xx_tlb_check(env, ctx, eaddr, access_type); + return ppc6xx_tlb_check(env, ctx, eaddr, access_type, nx); } /* Direct-store segment : absolutely *BUGGY* for now */ From aaf5845b87c6acb7f3e95ea8b45947f98c3fdc7e Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:48 +0200 Subject: [PATCH 77/96] target/ppc/mmu_common.c: Convert local variable to bool In mmu6xx_get_physical_address() ds is used as bool, declare it as such. Also use named constant instead of hex value. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 9f402a979d..5145bde7f9 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -321,8 +321,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, PowerPCCPU *cpu = env_archcpu(env); hwaddr hash; target_ulong vsid, sr, pgidx; - int ds, target_page_bits; - bool pr, nx; + int target_page_bits; + bool pr, ds, nx; /* First try to find a BAT entry if there are any */ if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) { @@ -335,7 +335,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, sr = env->sr[eaddr >> 28]; ctx->key = (((sr & 0x20000000) && pr) || ((sr & 0x40000000) && !pr)) ? 1 : 0; - ds = sr & 0x80000000 ? 1 : 0; + ds = sr & SR32_T; nx = sr & SR32_NX; vsid = sr & SR32_VSID; target_page_bits = TARGET_PAGE_BITS; From 8abd6d4288f69627f49d1e6e228d2f0d9d490c21 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:49 +0200 Subject: [PATCH 78/96] target/ppc/mmu_common.c: Remove single use local variable In mmu6xx_get_physical_address() tagtet_page_bits local is declared only to use TARGET_PAGE_BITS once. Drop the unneeded variable. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 5145bde7f9..0152e8d875 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -321,7 +321,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, PowerPCCPU *cpu = env_archcpu(env); hwaddr hash; target_ulong vsid, sr, pgidx; - int target_page_bits; bool pr, ds, nx; /* First try to find a BAT entry if there are any */ @@ -338,7 +337,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, ds = sr & SR32_T; nx = sr & SR32_NX; vsid = sr & SR32_VSID; - target_page_bits = TARGET_PAGE_BITS; qemu_log_mask(CPU_LOG_MMU, "Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx " nip=" TARGET_FMT_lx " lr=" TARGET_FMT_lx @@ -347,7 +345,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, (int)FIELD_EX64(env->msr, MSR, IR), (int)FIELD_EX64(env->msr, MSR, DR), pr ? 1 : 0, access_type == MMU_DATA_STORE, type); - pgidx = (eaddr & ~SEGMENT_MASK_256M) >> target_page_bits; + pgidx = (eaddr & ~SEGMENT_MASK_256M) >> TARGET_PAGE_BITS; hash = vsid ^ pgidx; ctx->ptem = (vsid << 7) | (pgidx >> 10); From 40df08d2239f00abacdd58a39c34a66264ec91a9 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:50 +0200 Subject: [PATCH 79/96] target/ppc/mmu_common.c: Simplify a switch statement In mmu6xx_get_physical_address() the switch handles all cases so the default is never reached and can be dropped. Also group together cases which just return -4. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 0152e8d875..b2993e8563 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -375,15 +375,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, case ACCESS_INT: /* Integer load/store : only access allowed */ break; - case ACCESS_CODE: - /* No code fetch is allowed in direct-store areas */ - return -4; - case ACCESS_FLOAT: - /* Floating point load/store */ - return -4; - case ACCESS_RES: - /* lwarx, ldarx or srwcx. */ - return -4; case ACCESS_CACHE: /* * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi @@ -393,12 +384,10 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, */ ctx->raddr = eaddr; return 0; - case ACCESS_EXT: - /* eciwx or ecowx */ - return -4; - default: - qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need address" - " translation\n"); + case ACCESS_CODE: /* No code fetch is allowed in direct-store areas */ + case ACCESS_FLOAT: /* Floating point load/store */ + case ACCESS_RES: /* lwarx, ldarx or srwcx. */ + case ACCESS_EXT: /* eciwx or ecowx */ return -4; } if ((access_type == MMU_DATA_STORE || ctx->key != 1) && From 0ce61ffaf1c573fdbd2079214499b435b71a1b83 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:51 +0200 Subject: [PATCH 80/96] target/ppc/mmu_common.c: Inline and remove ppc6xx_tlb_pte_check() This function is only called once and we can make the caller simpler by inlining it. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 71 +++++++++++++---------------------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index b2993e8563..784e833ff2 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -91,33 +91,6 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, return nr; } -static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, - target_ulong pte1, int pteh, - MMUAccessType access_type, bool nx) -{ - /* Check validity and table match */ - if (!pte_is_valid(pte0) || ((pte0 >> 6) & 1) != pteh || - (pte0 & PTE_PTEM_MASK) != ctx->ptem) { - return -1; - } - /* all matches should have equal RPN, WIMG & PP */ - if (ctx->raddr != (hwaddr)-1ULL && - (ctx->raddr & PTE_CHECK_MASK) != (pte1 & PTE_CHECK_MASK)) { - qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n"); - return -3; - } - /* Keep the matching PTE information */ - ctx->raddr = pte1; - ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, nx); - if (check_prot_access_type(ctx->prot, access_type)) { - qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); - return 0; - } else { - qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n"); - return -2; - } -} - /* Software driven TLB helpers */ static int ppc6xx_tlb_check(CPUPPCState *env, @@ -149,32 +122,32 @@ static int ppc6xx_tlb_check(CPUPPCState *env, tlb->EPN, eaddr, tlb->pte1, access_type == MMU_DATA_STORE ? 'S' : 'L', access_type == MMU_INST_FETCH ? 'I' : 'D'); - switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1, - 0, access_type, nx)) { - case -2: - /* Access violation */ - ret = -2; - best = nr; - break; - case -1: /* No match */ - case -3: /* TLB inconsistency */ - default: - break; - case 0: - /* access granted */ - /* - * XXX: we should go on looping to check all TLBs - * consistency but we can speed-up the whole thing as - * the result would be undefined if TLBs are not - * consistent. - */ + /* Check validity and table match */ + if (!pte_is_valid(tlb->pte0) || ((tlb->pte0 >> 6) & 1) != 0 || + (tlb->pte0 & PTE_PTEM_MASK) != ctx->ptem) { + continue; + } + /* all matches should have equal RPN, WIMG & PP */ + if (ctx->raddr != (hwaddr)-1ULL && + (ctx->raddr & PTE_CHECK_MASK) != (tlb->pte1 & PTE_CHECK_MASK)) { + qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n"); + /* TLB inconsistency */ + continue; + } + /* Keep the matching PTE information */ + best = nr; + ctx->raddr = tlb->pte1; + ctx->prot = ppc_hash32_prot(ctx->key, tlb->pte1 & HPTE32_R_PP, nx); + if (check_prot_access_type(ctx->prot, access_type)) { + qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); ret = 0; - best = nr; - goto done; + break; + } else { + qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n"); + ret = -2; } } if (best != -1) { -done: qemu_log_mask(CPU_LOG_MMU, "found TLB at addr " HWADDR_FMT_plx " prot=%01x ret=%d\n", ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret); From cab21e2ecb917abec597a184fd44d479b4bb1e66 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:53 +0200 Subject: [PATCH 81/96] target/ppc/mmu_common.c: Remove ptem field from mmu_ctx_t Instead of passing around ptem in context use it once in the same function so it can be removed from mmu_ctx_t. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 784e833ff2..339df377e8 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -41,7 +41,6 @@ typedef struct { hwaddr raddr; /* Real address */ int prot; /* Protection bits */ - target_ulong ptem; /* Virtual segment ID | API */ int key; /* Access key */ } mmu_ctx_t; @@ -95,16 +94,18 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr, - MMUAccessType access_type, bool nx) + MMUAccessType access_type, target_ulong ptem, + bool nx) { ppc6xx_tlb_t *tlb; target_ulong *pte1p; int nr, best, way, ret; + bool is_code = (access_type == MMU_INST_FETCH); best = -1; ret = -1; /* No TLB found */ for (way = 0; way < env->nb_ways; way++) { - nr = ppc6xx_tlb_getnum(env, eaddr, way, access_type == MMU_INST_FETCH); + nr = ppc6xx_tlb_getnum(env, eaddr, way, is_code); tlb = &env->tlb.tlb6[nr]; /* This test "emulates" the PTE index match for hardware TLBs */ if ((eaddr & TARGET_PAGE_MASK) != tlb->EPN) { @@ -124,7 +125,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, access_type == MMU_INST_FETCH ? 'I' : 'D'); /* Check validity and table match */ if (!pte_is_valid(tlb->pte0) || ((tlb->pte0 >> 6) & 1) != 0 || - (tlb->pte0 & PTE_PTEM_MASK) != ctx->ptem) { + (tlb->pte0 & PTE_PTEM_MASK) != ptem) { continue; } /* all matches should have equal RPN, WIMG & PP */ @@ -164,6 +165,10 @@ static int ppc6xx_tlb_check(CPUPPCState *env, } } } + if (ret == -1) { + int r = is_code ? SPR_ICMP : SPR_DCMP; + env->spr[r] = ptem; + } #if defined(DUMP_PAGE_TABLES) if (qemu_loglevel_mask(CPU_LOG_MMU)) { CPUState *cs = env_cpu(env); @@ -293,7 +298,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, { PowerPCCPU *cpu = env_archcpu(env); hwaddr hash; - target_ulong vsid, sr, pgidx; + target_ulong vsid, sr, pgidx, ptem; bool pr, ds, nx; /* First try to find a BAT entry if there are any */ @@ -320,7 +325,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, access_type == MMU_DATA_STORE, type); pgidx = (eaddr & ~SEGMENT_MASK_256M) >> TARGET_PAGE_BITS; hash = vsid ^ pgidx; - ctx->ptem = (vsid << 7) | (pgidx >> 10); + ptem = (vsid << 7) | (pgidx >> 10); /* Virtual segment ID | API */ qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid " TARGET_FMT_lx "\n", ctx->key, ds, nx, vsid); @@ -339,7 +344,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, /* Initialize real address with an invalid value */ ctx->raddr = (hwaddr)-1ULL; /* Software TLB search */ - return ppc6xx_tlb_check(env, ctx, eaddr, access_type, nx); + return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, nx); } /* Direct-store segment : absolutely *BUGGY* for now */ @@ -741,7 +746,7 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, cs->exception_index = POWERPC_EXCP_IFTLB; env->error_code = 1 << 18; env->spr[SPR_IMISS] = eaddr; - env->spr[SPR_ICMP] = 0x80000000 | ctx.ptem; + env->spr[SPR_ICMP] |= 0x80000000; goto tlb_miss; case -2: /* Access rights violation */ @@ -772,7 +777,7 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, env->error_code = 0; } env->spr[SPR_DMISS] = eaddr; - env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem; + env->spr[SPR_DCMP] |= 0x80000000; tlb_miss: env->error_code |= ctx.key << 19; env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) + From 719a1da19ee67cfd3ef5b50f778d0204daaeb0b2 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:54 +0200 Subject: [PATCH 82/96] target/ppc: Add function to get protection key for hash32 MMU Add a function to get key bit from SR and use it instead of open coded version. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu-hash32.c | 9 ++++++--- target/ppc/mmu-hash32.h | 5 +++++ target/ppc/mmu_common.c | 3 +-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index 8a446c8a7d..93559447ff 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -42,7 +42,7 @@ static int ppc_hash32_pte_prot(int mmu_idx, { unsigned pp, key; - key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS)); + key = ppc_hash32_key(mmuidx_pr(mmu_idx), sr); pp = pte.pte1 & HPTE32_R_PP; return ppc_hash32_prot(key, pp, !!(sr & SR32_NX)); @@ -145,7 +145,6 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr, { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - int key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS)); qemu_log_mask(CPU_LOG_MMU, "direct store...\n"); @@ -206,7 +205,11 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr, cpu_abort(cs, "ERROR: insn should not need address translation\n"); } - *prot = key ? PAGE_READ | PAGE_WRITE : PAGE_READ; + if (ppc_hash32_key(mmuidx_pr(mmu_idx), sr)) { + *prot = PAGE_READ | PAGE_WRITE; + } else { + *prot = PAGE_READ; + } if (check_prot_access_type(*prot, access_type)) { *raddr = eaddr; return true; diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h index bc4eedbecc..5902cf8333 100644 --- a/target/ppc/mmu-hash32.h +++ b/target/ppc/mmu-hash32.h @@ -102,6 +102,11 @@ static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); } +static inline bool ppc_hash32_key(bool pr, target_ulong sr) +{ + return pr ? (sr & SR32_KP) : (sr & SR32_KS); +} + static inline int ppc_hash32_prot(bool key, int pp, bool nx) { int prot; diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 339df377e8..1ed2f45ac7 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -310,8 +310,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, pr = FIELD_EX64(env->msr, MSR, PR); sr = env->sr[eaddr >> 28]; - ctx->key = (((sr & 0x20000000) && pr) || - ((sr & 0x40000000) && !pr)) ? 1 : 0; + ctx->key = ppc_hash32_key(pr, sr); ds = sr & SR32_T; nx = sr & SR32_NX; vsid = sr & SR32_VSID; From 620ba617df15ae0bce3be794c870525e329ab78c Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:55 +0200 Subject: [PATCH 83/96] target/ppc/mmu-hash32.c: Inline and remove ppc_hash32_pte_prot() This is used only once and can be inlined. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu-hash32.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index 93559447ff..160311de87 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -37,17 +37,6 @@ # define LOG_BATS(...) do { } while (0) #endif -static int ppc_hash32_pte_prot(int mmu_idx, - target_ulong sr, ppc_hash_pte32_t pte) -{ - unsigned pp, key; - - key = ppc_hash32_key(mmuidx_pr(mmu_idx), sr); - pp = pte.pte1 & HPTE32_R_PP; - - return ppc_hash32_prot(key, pp, !!(sr & SR32_NX)); -} - static target_ulong hash32_bat_size(int mmu_idx, target_ulong batu, target_ulong batl) { @@ -341,10 +330,10 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; target_ulong sr; - hwaddr pte_offset; + hwaddr pte_offset, raddr; ppc_hash_pte32_t pte; + bool key; int prot; - hwaddr raddr; /* There are no hash32 large pages. */ *psizep = TARGET_PAGE_BITS; @@ -426,8 +415,8 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset); /* 7. Check access permissions */ - - prot = ppc_hash32_pte_prot(mmu_idx, sr, pte); + key = ppc_hash32_key(mmuidx_pr(mmu_idx), sr); + prot = ppc_hash32_prot(key, pte.pte1 & HPTE32_R_PP, sr & SR32_NX); if (!check_prot_access_type(prot, access_type)) { /* Access right violation */ From fa7f2cb91b8f6805ec5d8581ca067ac83acc287e Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:56 +0200 Subject: [PATCH 84/96] target/ppc/mmu_common.c: Init variable in function that relies on it The ppc6xx_tlb_check() relies on the caller to initialise raddr field in ctx. Move this init from the only caller into the function. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 1ed2f45ac7..fe321ab49c 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -102,6 +102,8 @@ static int ppc6xx_tlb_check(CPUPPCState *env, int nr, best, way, ret; bool is_code = (access_type == MMU_INST_FETCH); + /* Initialize real address with an invalid value */ + ctx->raddr = (hwaddr)-1ULL; best = -1; ret = -1; /* No TLB found */ for (way = 0; way < env->nb_ways; way++) { @@ -340,8 +342,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash); *hashp = hash; - /* Initialize real address with an invalid value */ - ctx->raddr = (hwaddr)-1ULL; /* Software TLB search */ return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, nx); } From da5c1d20e9d63575cb358158895a0efa55682c35 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:57 +0200 Subject: [PATCH 85/96] target/ppc/mmu_common.c: Remove key field from mmu_ctx_t Pass it as a function parameter and remove it from mmu_ctx_t. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index fe321ab49c..be09c3b1a3 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -41,7 +41,6 @@ typedef struct { hwaddr raddr; /* Real address */ int prot; /* Protection bits */ - int key; /* Access key */ } mmu_ctx_t; void ppc_store_sdr1(CPUPPCState *env, target_ulong value) @@ -95,7 +94,7 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr, MMUAccessType access_type, target_ulong ptem, - bool nx) + bool key, bool nx) { ppc6xx_tlb_t *tlb; target_ulong *pte1p; @@ -140,7 +139,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, /* Keep the matching PTE information */ best = nr; ctx->raddr = tlb->pte1; - ctx->prot = ppc_hash32_prot(ctx->key, tlb->pte1 & HPTE32_R_PP, nx); + ctx->prot = ppc_hash32_prot(key, tlb->pte1 & HPTE32_R_PP, nx); if (check_prot_access_type(ctx->prot, access_type)) { qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); ret = 0; @@ -295,13 +294,14 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, } static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong eaddr, hwaddr *hashp, + target_ulong eaddr, + hwaddr *hashp, bool *keyp, MMUAccessType access_type, int type) { PowerPCCPU *cpu = env_archcpu(env); hwaddr hash; target_ulong vsid, sr, pgidx, ptem; - bool pr, ds, nx; + bool key, pr, ds, nx; /* First try to find a BAT entry if there are any */ if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) { @@ -312,7 +312,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, pr = FIELD_EX64(env->msr, MSR, PR); sr = env->sr[eaddr >> 28]; - ctx->key = ppc_hash32_key(pr, sr); + key = ppc_hash32_key(pr, sr); + *keyp = key; ds = sr & SR32_T; nx = sr & SR32_NX; vsid = sr & SR32_VSID; @@ -329,7 +330,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, ptem = (vsid << 7) | (pgidx >> 10); /* Virtual segment ID | API */ qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid " - TARGET_FMT_lx "\n", ctx->key, ds, nx, vsid); + TARGET_FMT_lx "\n", key, ds, nx, vsid); if (!ds) { /* Check if instruction fetch is allowed, if needed */ if (type == ACCESS_CODE && nx) { @@ -343,7 +344,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, *hashp = hash; /* Software TLB search */ - return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, nx); + return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, key, nx); } /* Direct-store segment : absolutely *BUGGY* for now */ @@ -367,8 +368,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, case ACCESS_EXT: /* eciwx or ecowx */ return -4; } - if ((access_type == MMU_DATA_STORE || ctx->key != 1) && - (access_type == MMU_DATA_LOAD || ctx->key != 0)) { + if ((access_type == MMU_DATA_STORE || !key) && + (access_type == MMU_DATA_LOAD || key)) { ctx->raddr = eaddr; return 2; } @@ -709,6 +710,7 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, CPUPPCState *env = &cpu->env; mmu_ctx_t ctx; hwaddr hash = 0; /* init to 0 to avoid used uninit warning */ + bool key; int type, ret; if (ppc_real_mode_xlate(cpu, eaddr, access_type, raddrp, psizep, protp)) { @@ -726,7 +728,7 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, } ctx.prot = 0; - ret = mmu6xx_get_physical_address(env, &ctx, eaddr, &hash, + ret = mmu6xx_get_physical_address(env, &ctx, eaddr, &hash, &key, access_type, type); if (ret == 0) { *raddrp = ctx.raddr; @@ -778,7 +780,7 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, env->spr[SPR_DMISS] = eaddr; env->spr[SPR_DCMP] |= 0x80000000; tlb_miss: - env->error_code |= ctx.key << 19; + env->error_code |= key << 19; env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) + get_pteg_offset32(cpu, hash); env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) + From aa781c102a445e1007a307a972fed24c66b9c24c Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:58 +0200 Subject: [PATCH 86/96] target/ppc/mmu_common.c: Stop using ctx in ppc6xx_tlb_check() Pass raddr and prot in function parameters instead. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index be09c3b1a3..ede409eb99 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -91,10 +91,9 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, /* Software driven TLB helpers */ -static int ppc6xx_tlb_check(CPUPPCState *env, - mmu_ctx_t *ctx, target_ulong eaddr, - MMUAccessType access_type, target_ulong ptem, - bool key, bool nx) +static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr *raddr, int *prot, + target_ulong eaddr, MMUAccessType access_type, + target_ulong ptem, bool key, bool nx) { ppc6xx_tlb_t *tlb; target_ulong *pte1p; @@ -102,7 +101,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, bool is_code = (access_type == MMU_INST_FETCH); /* Initialize real address with an invalid value */ - ctx->raddr = (hwaddr)-1ULL; + *raddr = (hwaddr)-1ULL; best = -1; ret = -1; /* No TLB found */ for (way = 0; way < env->nb_ways; way++) { @@ -130,17 +129,17 @@ static int ppc6xx_tlb_check(CPUPPCState *env, continue; } /* all matches should have equal RPN, WIMG & PP */ - if (ctx->raddr != (hwaddr)-1ULL && - (ctx->raddr & PTE_CHECK_MASK) != (tlb->pte1 & PTE_CHECK_MASK)) { + if (*raddr != (hwaddr)-1ULL && + (*raddr & PTE_CHECK_MASK) != (tlb->pte1 & PTE_CHECK_MASK)) { qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n"); /* TLB inconsistency */ continue; } /* Keep the matching PTE information */ best = nr; - ctx->raddr = tlb->pte1; - ctx->prot = ppc_hash32_prot(key, tlb->pte1 & HPTE32_R_PP, nx); - if (check_prot_access_type(ctx->prot, access_type)) { + *raddr = tlb->pte1; + *prot = ppc_hash32_prot(key, tlb->pte1 & HPTE32_R_PP, nx); + if (check_prot_access_type(*prot, access_type)) { qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); ret = 0; break; @@ -152,7 +151,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, if (best != -1) { qemu_log_mask(CPU_LOG_MMU, "found TLB at addr " HWADDR_FMT_plx " prot=%01x ret=%d\n", - ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret); + *raddr & TARGET_PAGE_MASK, *prot, ret); /* Update page flags */ pte1p = &env->tlb.tlb6[best].pte1; *pte1p |= 0x00000100; /* Update accessed flag */ @@ -162,7 +161,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, *pte1p |= 0x00000080; } else { /* Force page fault for first write access */ - ctx->prot &= ~PAGE_WRITE; + *prot &= ~PAGE_WRITE; } } } @@ -344,7 +343,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, *hashp = hash; /* Software TLB search */ - return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, key, nx); + return ppc6xx_tlb_check(env, &ctx->raddr, &ctx->prot, eaddr, + access_type, ptem, key, nx); } /* Direct-store segment : absolutely *BUGGY* for now */ From 68bf3a7bbc64d75d6a78a7d31b6b26e343a96320 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:12:59 +0200 Subject: [PATCH 87/96] target/ppc/mmu_common.c: Rename function parameter Rename parameter of get_bat_6xx_tlb() from virtual to eaddr to match other functions. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index ede409eb99..110936ca83 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -221,7 +221,7 @@ static inline void bat_size_prot(CPUPPCState *env, target_ulong *blp, } static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong virtual, MMUAccessType access_type) + target_ulong eaddr, MMUAccessType access_type) { target_ulong *BATlt, *BATut, *BATu, *BATl; target_ulong BEPIl, BEPIu, bl; @@ -230,7 +230,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, bool ifetch = access_type == MMU_INST_FETCH; qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__, - ifetch ? 'I' : 'D', virtual); + ifetch ? 'I' : 'D', eaddr); if (ifetch) { BATlt = env->IBAT[1]; BATut = env->IBAT[0]; @@ -246,15 +246,15 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, bat_size_prot(env, &bl, &valid, &prot, BATu, BATl); qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__, - ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl); - if ((virtual & 0xF0000000) == BEPIu && - ((virtual & 0x0FFE0000) & ~bl) == BEPIl) { + ifetch ? 'I' : 'D', i, eaddr, *BATu, *BATl); + if ((eaddr & 0xF0000000) == BEPIu && + ((eaddr & 0x0FFE0000) & ~bl) == BEPIl) { /* BAT matches */ if (valid != 0) { /* Get physical address */ ctx->raddr = (*BATl & 0xF0000000) | - ((virtual & 0x0FFE0000 & bl) | (*BATl & 0x0FFE0000)) | - (virtual & 0x0001F000); + ((eaddr & 0x0FFE0000 & bl) | (*BATl & 0x0FFE0000)) | + (eaddr & 0x0001F000); /* Compute access rights */ ctx->prot = prot; if (check_prot_access_type(ctx->prot, access_type)) { @@ -273,7 +273,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, if (ret < 0) { if (qemu_log_enabled()) { qemu_log_mask(CPU_LOG_MMU, "no BAT match for " - TARGET_FMT_lx ":\n", virtual); + TARGET_FMT_lx ":\n", eaddr); for (i = 0; i < 4; i++) { BATu = &BATut[i]; BATl = &BATlt[i]; @@ -284,7 +284,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, " BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__, ifetch ? 'I' : 'D', - i, virtual, *BATu, *BATl, BEPIu, BEPIl, bl); + i, eaddr, *BATu, *BATl, BEPIu, BEPIl, bl); } } } From 6ca35e8763e9c37a9cedd28d286f78fbbf45968c Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:00 +0200 Subject: [PATCH 88/96] target/ppc/mmu_common.c: Use defines instead of numeric constants Replace some BAT related constants with defines from mmu-hash32.h Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 110936ca83..aa002bba35 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -201,7 +201,7 @@ static inline void bat_size_prot(CPUPPCState *env, target_ulong *blp, target_ulong bl; int pp, valid, prot; - bl = (*BATu & 0x00001FFC) << 15; + bl = (*BATu & BATU32_BL) << 15; valid = 0; prot = 0; if ((!FIELD_EX64(env->msr, MSR, PR) && (*BATu & 0x00000002)) || @@ -241,19 +241,19 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, for (i = 0; i < env->nb_BATs; i++) { BATu = &BATut[i]; BATl = &BATlt[i]; - BEPIu = *BATu & 0xF0000000; - BEPIl = *BATu & 0x0FFE0000; + BEPIu = *BATu & BATU32_BEPIU; + BEPIl = *BATu & BATU32_BEPIL; bat_size_prot(env, &bl, &valid, &prot, BATu, BATl); qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__, ifetch ? 'I' : 'D', i, eaddr, *BATu, *BATl); - if ((eaddr & 0xF0000000) == BEPIu && - ((eaddr & 0x0FFE0000) & ~bl) == BEPIl) { + if ((eaddr & BATU32_BEPIU) == BEPIu && + ((eaddr & BATU32_BEPIL) & ~bl) == BEPIl) { /* BAT matches */ if (valid != 0) { /* Get physical address */ - ctx->raddr = (*BATl & 0xF0000000) | - ((eaddr & 0x0FFE0000 & bl) | (*BATl & 0x0FFE0000)) | + ctx->raddr = (*BATl & BATU32_BEPIU) | + ((eaddr & BATU32_BEPIL & bl) | (*BATl & BATU32_BEPIL)) | (eaddr & 0x0001F000); /* Compute access rights */ ctx->prot = prot; @@ -277,9 +277,9 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, for (i = 0; i < 4; i++) { BATu = &BATut[i]; BATl = &BATlt[i]; - BEPIu = *BATu & 0xF0000000; - BEPIl = *BATu & 0x0FFE0000; - bl = (*BATu & 0x00001FFC) << 15; + BEPIu = *BATu & BATU32_BEPIU; + BEPIl = *BATu & BATU32_BEPIL; + bl = (*BATu & BATU32_BL) << 15; qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " " TARGET_FMT_lx " " @@ -520,9 +520,9 @@ static void mmu6xx_dump_BATs(CPUPPCState *env, int type) for (i = 0; i < env->nb_BATs; i++) { BATu = &BATut[i]; BATl = &BATlt[i]; - BEPIu = *BATu & 0xF0000000; - BEPIl = *BATu & 0x0FFE0000; - bl = (*BATu & 0x00001FFC) << 15; + BEPIu = *BATu & BATU32_BEPIU; + BEPIl = *BATu & BATU32_BEPIL; + bl = (*BATu & BATU32_BL) << 15; qemu_printf("%s BAT%d BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " " TARGET_FMT_lx " " TARGET_FMT_lx "\n", From d323338629588ea985c68384642169045ca0e16d Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:01 +0200 Subject: [PATCH 89/96] target/ppc: Remove bat_size_prot() There is already a hash32_bat_prot() function that does most if this and the rest can be inlined. Export hash32_bat_prot() and rename it to ppc_hash32_bat_prot() to match other functions and use it in get_bat_6xx_tlb(). Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu-hash32.c | 18 +------------- target/ppc/mmu-hash32.h | 14 +++++++++++ target/ppc/mmu_common.c | 52 ++++++++++------------------------------- 3 files changed, 27 insertions(+), 57 deletions(-) diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index 160311de87..6f0f0bbb00 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -48,22 +48,6 @@ static target_ulong hash32_bat_size(int mmu_idx, return BATU32_BEPI & ~((batu & BATU32_BL) << 15); } -static int hash32_bat_prot(PowerPCCPU *cpu, - target_ulong batu, target_ulong batl) -{ - int pp, prot; - - prot = 0; - pp = batl & BATL32_PP; - if (pp != 0) { - prot = PAGE_READ | PAGE_EXEC; - if (pp == 0x2) { - prot |= PAGE_WRITE; - } - } - return prot; -} - static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea, MMUAccessType access_type, int *prot, int mmu_idx) @@ -95,7 +79,7 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea, if (mask && ((ea & mask) == (batu & BATU32_BEPI))) { hwaddr raddr = (batl & mask) | (ea & ~mask); - *prot = hash32_bat_prot(cpu, batu, batl); + *prot = ppc_hash32_bat_prot(batu, batl); return raddr & TARGET_PAGE_MASK; } diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h index 5902cf8333..bd75f7d647 100644 --- a/target/ppc/mmu-hash32.h +++ b/target/ppc/mmu-hash32.h @@ -143,6 +143,20 @@ static inline int ppc_hash32_prot(bool key, int pp, bool nx) return nx ? prot : prot | PAGE_EXEC; } +static inline int ppc_hash32_bat_prot(target_ulong batu, target_ulong batl) +{ + int prot = 0; + int pp = batl & BATL32_PP; + + if (pp) { + prot = PAGE_READ | PAGE_EXEC; + if (pp == 0x2) { + prot |= PAGE_WRITE; + } + } + return prot; +} + typedef struct { uint32_t pte0, pte1; } ppc_hash_pte32_t; diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index aa002bba35..624ed51a92 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -193,40 +193,13 @@ static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr *raddr, int *prot, return ret; } -/* Perform BAT hit & translation */ -static inline void bat_size_prot(CPUPPCState *env, target_ulong *blp, - int *validp, int *protp, target_ulong *BATu, - target_ulong *BATl) -{ - target_ulong bl; - int pp, valid, prot; - - bl = (*BATu & BATU32_BL) << 15; - valid = 0; - prot = 0; - if ((!FIELD_EX64(env->msr, MSR, PR) && (*BATu & 0x00000002)) || - (FIELD_EX64(env->msr, MSR, PR) && (*BATu & 0x00000001))) { - valid = 1; - pp = *BATl & 0x00000003; - if (pp != 0) { - prot = PAGE_READ | PAGE_EXEC; - if (pp == 0x2) { - prot |= PAGE_WRITE; - } - } - } - *blp = bl; - *validp = valid; - *protp = prot; -} - static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong eaddr, MMUAccessType access_type) + target_ulong eaddr, MMUAccessType access_type, + bool pr) { target_ulong *BATlt, *BATut, *BATu, *BATl; target_ulong BEPIl, BEPIu, bl; - int i, valid, prot; - int ret = -1; + int i, ret = -1; bool ifetch = access_type == MMU_INST_FETCH; qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__, @@ -243,20 +216,19 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, BATl = &BATlt[i]; BEPIu = *BATu & BATU32_BEPIU; BEPIl = *BATu & BATU32_BEPIL; - bat_size_prot(env, &bl, &valid, &prot, BATu, BATl); qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__, ifetch ? 'I' : 'D', i, eaddr, *BATu, *BATl); - if ((eaddr & BATU32_BEPIU) == BEPIu && - ((eaddr & BATU32_BEPIL) & ~bl) == BEPIl) { - /* BAT matches */ - if (valid != 0) { + bl = (*BATu & BATU32_BL) << 15; + if ((!pr && (*BATu & BATU32_VS)) || (pr && (*BATu & BATU32_VP))) { + if ((eaddr & BATU32_BEPIU) == BEPIu && + ((eaddr & BATU32_BEPIL) & ~bl) == BEPIl) { /* Get physical address */ ctx->raddr = (*BATl & BATU32_BEPIU) | ((eaddr & BATU32_BEPIL & bl) | (*BATl & BATU32_BEPIL)) | (eaddr & 0x0001F000); /* Compute access rights */ - ctx->prot = prot; + ctx->prot = ppc_hash32_bat_prot(*BATu, *BATl); if (check_prot_access_type(ctx->prot, access_type)) { qemu_log_mask(CPU_LOG_MMU, "BAT %d match: r " HWADDR_FMT_plx " prot=%c%c\n", i, ctx->raddr, @@ -300,16 +272,16 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, PowerPCCPU *cpu = env_archcpu(env); hwaddr hash; target_ulong vsid, sr, pgidx, ptem; - bool key, pr, ds, nx; + bool key, ds, nx; + bool pr = FIELD_EX64(env->msr, MSR, PR); /* First try to find a BAT entry if there are any */ - if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) { + if (env->nb_BATs && + get_bat_6xx_tlb(env, ctx, eaddr, access_type, pr) == 0) { return 0; } /* Perform segment based translation when no BATs matched */ - pr = FIELD_EX64(env->msr, MSR, PR); - sr = env->sr[eaddr >> 28]; key = ppc_hash32_key(pr, sr); *keyp = key; From 7e590cf6160ab2d3b626f67312f605b7e410e82d Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:02 +0200 Subject: [PATCH 90/96] target/ppc/mmu_common.c: Stop using ctx in get_bat_6xx_tlb() Pass raddr and prot in function parameters instead Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 624ed51a92..4770b43630 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -193,7 +193,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr *raddr, int *prot, return ret; } -static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, +static int get_bat_6xx_tlb(CPUPPCState *env, hwaddr *raddr, int *prot, target_ulong eaddr, MMUAccessType access_type, bool pr) { @@ -224,16 +224,16 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, if ((eaddr & BATU32_BEPIU) == BEPIu && ((eaddr & BATU32_BEPIL) & ~bl) == BEPIl) { /* Get physical address */ - ctx->raddr = (*BATl & BATU32_BEPIU) | + *raddr = (*BATl & BATU32_BEPIU) | ((eaddr & BATU32_BEPIL & bl) | (*BATl & BATU32_BEPIL)) | (eaddr & 0x0001F000); /* Compute access rights */ - ctx->prot = ppc_hash32_bat_prot(*BATu, *BATl); - if (check_prot_access_type(ctx->prot, access_type)) { + *prot = ppc_hash32_bat_prot(*BATu, *BATl); + if (check_prot_access_type(*prot, access_type)) { qemu_log_mask(CPU_LOG_MMU, "BAT %d match: r " HWADDR_FMT_plx - " prot=%c%c\n", i, ctx->raddr, - ctx->prot & PAGE_READ ? 'R' : '-', - ctx->prot & PAGE_WRITE ? 'W' : '-'); + " prot=%c%c\n", i, *raddr, + *prot & PAGE_READ ? 'R' : '-', + *prot & PAGE_WRITE ? 'W' : '-'); ret = 0; } else { ret = -2; @@ -277,7 +277,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, /* First try to find a BAT entry if there are any */ if (env->nb_BATs && - get_bat_6xx_tlb(env, ctx, eaddr, access_type, pr) == 0) { + get_bat_6xx_tlb(env, &ctx->raddr, &ctx->prot, eaddr, + access_type, pr) == 0) { return 0; } From bfb5a5eee5cfbe9f248472f7489fed241a2dab21 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:03 +0200 Subject: [PATCH 91/96] target/ppc/mmu_common.c: Remove mmu_ctx_t Completely get rid of mmu_ctx_t after converting the remaining functions to pass raddr and prot without the context struct. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu_common.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 4770b43630..60f8736210 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -37,12 +37,6 @@ /* #define DUMP_PAGE_TABLES */ -/* Context used internally during MMU translations */ -typedef struct { - hwaddr raddr; /* Real address */ - int prot; /* Protection bits */ -} mmu_ctx_t; - void ppc_store_sdr1(CPUPPCState *env, target_ulong value) { PowerPCCPU *cpu = env_archcpu(env); @@ -264,8 +258,8 @@ static int get_bat_6xx_tlb(CPUPPCState *env, hwaddr *raddr, int *prot, return ret; } -static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong eaddr, +static int mmu6xx_get_physical_address(CPUPPCState *env, hwaddr *raddr, + int *prot, target_ulong eaddr, hwaddr *hashp, bool *keyp, MMUAccessType access_type, int type) { @@ -277,8 +271,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, /* First try to find a BAT entry if there are any */ if (env->nb_BATs && - get_bat_6xx_tlb(env, &ctx->raddr, &ctx->prot, eaddr, - access_type, pr) == 0) { + get_bat_6xx_tlb(env, raddr, prot, eaddr, access_type, pr) == 0) { return 0; } @@ -316,7 +309,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, *hashp = hash; /* Software TLB search */ - return ppc6xx_tlb_check(env, &ctx->raddr, &ctx->prot, eaddr, + return ppc6xx_tlb_check(env, raddr, prot, eaddr, access_type, ptem, key, nx); } @@ -333,7 +326,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, * Should make the instruction do no-op. As it already do * no-op, it's quite easy :-) */ - ctx->raddr = eaddr; + *raddr = eaddr; return 0; case ACCESS_CODE: /* No code fetch is allowed in direct-store areas */ case ACCESS_FLOAT: /* Floating point load/store */ @@ -343,7 +336,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, } if ((access_type == MMU_DATA_STORE || !key) && (access_type == MMU_DATA_LOAD || key)) { - ctx->raddr = eaddr; + *raddr = eaddr; return 2; } return -2; @@ -681,7 +674,6 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - mmu_ctx_t ctx; hwaddr hash = 0; /* init to 0 to avoid used uninit warning */ bool key; int type, ret; @@ -700,12 +692,9 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, type = ACCESS_INT; } - ctx.prot = 0; - ret = mmu6xx_get_physical_address(env, &ctx, eaddr, &hash, &key, + ret = mmu6xx_get_physical_address(env, raddrp, protp, eaddr, &hash, &key, access_type, type); if (ret == 0) { - *raddrp = ctx.raddr; - *protp = ctx.prot; *psizep = TARGET_PAGE_BITS; return true; } else if (!guest_visible) { From 51993bef122896b29d1be218d536b6b3211cf2f1 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:04 +0200 Subject: [PATCH 92/96] target/ppc/mmu-hash32.c: Inline and remove ppc_hash32_pte_raddr() This function is used only once and does not add more clarity than doing it inline. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu-hash32.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index 6f0f0bbb00..c4de1647e2 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -298,15 +298,6 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu, return pte_offset; } -static hwaddr ppc_hash32_pte_raddr(target_ulong sr, ppc_hash_pte32_t pte, - target_ulong eaddr) -{ - hwaddr rpn = pte.pte1 & HPTE32_R_RPN; - hwaddr mask = ~TARGET_PAGE_MASK; - - return (rpn & ~mask) | (eaddr & mask); -} - bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, hwaddr *raddrp, int *psizep, int *protp, int mmu_idx, bool guest_visible) @@ -440,11 +431,12 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, */ prot &= ~PAGE_WRITE; } - } + } + *protp = prot; /* 9. Determine the real address from the PTE */ - - *raddrp = ppc_hash32_pte_raddr(sr, pte, eaddr); - *protp = prot; + *raddrp = pte.pte1 & HPTE32_R_RPN; + *raddrp &= TARGET_PAGE_MASK; + *raddrp |= eaddr & ~TARGET_PAGE_MASK; return true; } From 9eb0530033ac3a52fcca055213bc512e4e29b954 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:05 +0200 Subject: [PATCH 93/96] target/ppc/mmu-hash32.c: Move get_pteg_offset32() to the header This function is a simple shared function, move it to other similar static inline functions in the header. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu-hash32.c | 7 ------- target/ppc/mmu-hash32.h | 6 +++++- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index c4de1647e2..44b16142ab 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -201,13 +201,6 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr, return false; } -hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash) -{ - target_ulong mask = ppc_hash32_hpt_mask(cpu); - - return (hash * HASH_PTEG_SIZE_32) & mask; -} - static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off, bool secondary, target_ulong ptem, ppc_hash_pte32_t *pte) diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h index bd75f7d647..2838de031c 100644 --- a/target/ppc/mmu-hash32.h +++ b/target/ppc/mmu-hash32.h @@ -3,7 +3,6 @@ #ifndef CONFIG_USER_ONLY -hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash); bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, hwaddr *raddrp, int *psizep, int *protp, int mmu_idx, bool guest_visible); @@ -102,6 +101,11 @@ static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); } +static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash) +{ + return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu); +} + static inline bool ppc_hash32_key(bool pr, target_ulong sr) { return pr ? (sr & SR32_KP) : (sr & SR32_KS); From 14a43ab3335afb3f68ca103739405178abe070ea Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:06 +0200 Subject: [PATCH 94/96] target/ppc: Unexport some functions from mmu-book3s-v3.h The ppc_hash64_hpt_base() and ppc_hash64_hpt_mask() functions are mostly used by mmu-hash64.c only but there is one call to ppc_hash64_hpt_mask() in hw/ppc/spapr_vhyp_mmu.c.in a helper function that can be moved to mmu-hash64.c which allows these functions to be removed from the header. Signed-off-by: BALATON Zoltan Reviewed-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- hw/ppc/spapr_vhyp_mmu.c | 21 ++++------------ target/ppc/mmu-book3s-v3.h | 40 ------------------------------- target/ppc/mmu-hash64.c | 49 ++++++++++++++++++++++++++++++++++++++ target/ppc/mmu-hash64.h | 1 + 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/hw/ppc/spapr_vhyp_mmu.c b/hw/ppc/spapr_vhyp_mmu.c index b3dd8b3a59..2d41d7f77b 100644 --- a/hw/ppc/spapr_vhyp_mmu.c +++ b/hw/ppc/spapr_vhyp_mmu.c @@ -15,19 +15,6 @@ #include "helper_regs.h" #include "hw/ppc/spapr.h" #include "mmu-hash64.h" -#include "mmu-book3s-v3.h" - - -static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex) -{ - /* - * hash value/pteg group index is normalized by HPT mask - */ - if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { - return false; - } - return true; -} static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, target_ulong *args) @@ -70,7 +57,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, pteh &= ~0x60ULL; - if (!valid_ptex(cpu, ptex)) { + if (!ppc_hash64_valid_ptex(cpu, ptex)) { return H_PARAMETER; } @@ -119,7 +106,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu const ppc_hash_pte64_t *hptes; target_ulong v, r; - if (!valid_ptex(cpu, ptex)) { + if (!ppc_hash64_valid_ptex(cpu, ptex)) { return REMOVE_PARM; } @@ -250,7 +237,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr, const ppc_hash_pte64_t *hptes; target_ulong v, r; - if (!valid_ptex(cpu, ptex)) { + if (!ppc_hash64_valid_ptex(cpu, ptex)) { return H_PARAMETER; } @@ -287,7 +274,7 @@ static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr, int i, ridx, n_entries = 1; const ppc_hash_pte64_t *hptes; - if (!valid_ptex(cpu, ptex)) { + if (!ppc_hash64_valid_ptex(cpu, ptex)) { return H_PARAMETER; } diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index f3f7993958..263ce55c1f 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -83,46 +83,6 @@ static inline bool ppc64_v3_radix(PowerPCCPU *cpu) return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR); } -static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu) -{ - uint64_t base; - - if (cpu->vhyp) { - return 0; - } - if (cpu->env.mmu_model == POWERPC_MMU_3_00) { - ppc_v3_pate_t pate; - - if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) { - return 0; - } - base = pate.dw0; - } else { - base = cpu->env.spr[SPR_SDR1]; - } - return base & SDR_64_HTABORG; -} - -static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) -{ - uint64_t base; - - if (cpu->vhyp) { - return cpu->vhyp_class->hpt_mask(cpu->vhyp); - } - if (cpu->env.mmu_model == POWERPC_MMU_3_00) { - ppc_v3_pate_t pate; - - if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) { - return 0; - } - base = pate.dw0; - } else { - base = cpu->env.spr[SPR_SDR1]; - } - return (1ULL << ((base & SDR_64_HTABSIZE) + 18 - 7)) - 1; -} - #endif /* TARGET_PPC64 */ #endif /* CONFIG_USER_ONLY */ diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index cbc8efa0c3..7bc0323f26 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -508,6 +508,46 @@ static int ppc_hash64_amr_prot(PowerPCCPU *cpu, ppc_hash_pte64_t pte) return prot; } +static hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu) +{ + uint64_t base; + + if (cpu->vhyp) { + return 0; + } + if (cpu->env.mmu_model == POWERPC_MMU_3_00) { + ppc_v3_pate_t pate; + + if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) { + return 0; + } + base = pate.dw0; + } else { + base = cpu->env.spr[SPR_SDR1]; + } + return base & SDR_64_HTABORG; +} + +static hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) +{ + uint64_t base; + + if (cpu->vhyp) { + return cpu->vhyp_class->hpt_mask(cpu->vhyp); + } + if (cpu->env.mmu_model == POWERPC_MMU_3_00) { + ppc_v3_pate_t pate; + + if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) { + return 0; + } + base = pate.dw0; + } else { + base = cpu->env.spr[SPR_SDR1]; + } + return (1ULL << ((base & SDR_64_HTABSIZE) + 18 - 7)) - 1; +} + const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, hwaddr ptex, int n) { @@ -545,6 +585,15 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, false, n * HASH_PTE_SIZE_64); } +bool ppc_hash64_valid_ptex(PowerPCCPU *cpu, target_ulong ptex) +{ + /* hash value/pteg group index is normalized by HPT mask */ + if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { + return false; + } + return true; +} + static unsigned hpte_page_shift(const PPCHash64SegmentPageSizes *sps, uint64_t pte0, uint64_t pte1) { diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h index de653fcae5..ae8d4b37ae 100644 --- a/target/ppc/mmu-hash64.h +++ b/target/ppc/mmu-hash64.h @@ -120,6 +120,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, hwaddr ptex, int n); void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, hwaddr ptex, int n); +bool ppc_hash64_valid_ptex(PowerPCCPU *cpu, target_ulong ptex); static inline uint64_t ppc_hash64_hpte0(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, int i) From b864074ce074006f9bcc66a11cd4205355abb9ac Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:07 +0200 Subject: [PATCH 95/96] target/ppc/mmu-radix64: Remove externally unused parts from header Move the parts not needed outside of mmu-radix64.c from the header to the C file to leave only parts in the header that need to be exported. Also drop unneded include of this header. Signed-off-by: BALATON Zoltan Acked-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu-book3s-v3.c | 1 - target/ppc/mmu-radix64.c | 49 +++++++++++++++++++++++++++++++++++ target/ppc/mmu-radix64.h | 53 +------------------------------------- 3 files changed, 50 insertions(+), 53 deletions(-) diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c index c8f69b3df9..a812cb5113 100644 --- a/target/ppc/mmu-book3s-v3.c +++ b/target/ppc/mmu-book3s-v3.c @@ -21,7 +21,6 @@ #include "cpu.h" #include "mmu-hash64.h" #include "mmu-book3s-v3.h" -#include "mmu-radix64.h" bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry) { diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 5a02e4963b..cf9619e847 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -29,6 +29,37 @@ #include "mmu-radix64.h" #include "mmu-book3s-v3.h" +/* Radix Partition Table Entry Fields */ +#define PATE1_R_PRTB 0x0FFFFFFFFFFFF000 +#define PATE1_R_PRTS 0x000000000000001F + +/* Radix Process Table Entry Fields */ +#define PRTBE_R_GET_RTS(rts) \ + ((((rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31) +#define PRTBE_R_RPDB 0x0FFFFFFFFFFFFF00 +#define PRTBE_R_RPDS 0x000000000000001F + +/* Radix Page Directory/Table Entry Fields */ +#define R_PTE_VALID 0x8000000000000000 +#define R_PTE_LEAF 0x4000000000000000 +#define R_PTE_SW0 0x2000000000000000 +#define R_PTE_RPN 0x01FFFFFFFFFFF000 +#define R_PTE_SW1 0x0000000000000E00 +#define R_GET_SW(sw) (((sw >> 58) & 0x8) | ((sw >> 9) & 0x7)) +#define R_PTE_R 0x0000000000000100 +#define R_PTE_C 0x0000000000000080 +#define R_PTE_ATT 0x0000000000000030 +#define R_PTE_ATT_NORMAL 0x0000000000000000 +#define R_PTE_ATT_SAO 0x0000000000000010 +#define R_PTE_ATT_NI_IO 0x0000000000000020 +#define R_PTE_ATT_TOLERANT_IO 0x0000000000000030 +#define R_PTE_EAA_PRIV 0x0000000000000008 +#define R_PTE_EAA_R 0x0000000000000004 +#define R_PTE_EAA_RW 0x0000000000000002 +#define R_PTE_EAA_X 0x0000000000000001 +#define R_PDE_NLB PRTBE_R_RPDB +#define R_PDE_NLS PRTBE_R_RPDS + static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env, vaddr eaddr, uint64_t *lpid, uint64_t *pid) @@ -180,6 +211,24 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, } } +static int ppc_radix64_get_prot_eaa(uint64_t pte) +{ + return (pte & R_PTE_EAA_R ? PAGE_READ : 0) | + (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) | + (pte & R_PTE_EAA_X ? PAGE_EXEC : 0); +} + +static int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu) +{ + const CPUPPCState *env = &cpu->env; + int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */ + int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 */ + + return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */ + (amr & 0x1 ? 0 : PAGE_READ) | + (iamr & 0x1 ? 0 : PAGE_EXEC); +} + static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, uint64_t pte, int *fault_cause, int *prot, int mmu_idx, bool partition_scoped) diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h index c5c04a1527..6620b3d648 100644 --- a/target/ppc/mmu-radix64.h +++ b/target/ppc/mmu-radix64.h @@ -3,7 +3,7 @@ #ifndef CONFIG_USER_ONLY -#include "exec/page-protection.h" +#ifdef TARGET_PPC64 /* Radix Quadrants */ #define R_EADDR_MASK 0x3FFFFFFFFFFFFFFF @@ -14,61 +14,10 @@ #define R_EADDR_QUADRANT2 0x8000000000000000 #define R_EADDR_QUADRANT3 0xC000000000000000 -/* Radix Partition Table Entry Fields */ -#define PATE1_R_PRTB 0x0FFFFFFFFFFFF000 -#define PATE1_R_PRTS 0x000000000000001F - -/* Radix Process Table Entry Fields */ -#define PRTBE_R_GET_RTS(rts) \ - ((((rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31) -#define PRTBE_R_RPDB 0x0FFFFFFFFFFFFF00 -#define PRTBE_R_RPDS 0x000000000000001F - -/* Radix Page Directory/Table Entry Fields */ -#define R_PTE_VALID 0x8000000000000000 -#define R_PTE_LEAF 0x4000000000000000 -#define R_PTE_SW0 0x2000000000000000 -#define R_PTE_RPN 0x01FFFFFFFFFFF000 -#define R_PTE_SW1 0x0000000000000E00 -#define R_GET_SW(sw) (((sw >> 58) & 0x8) | ((sw >> 9) & 0x7)) -#define R_PTE_R 0x0000000000000100 -#define R_PTE_C 0x0000000000000080 -#define R_PTE_ATT 0x0000000000000030 -#define R_PTE_ATT_NORMAL 0x0000000000000000 -#define R_PTE_ATT_SAO 0x0000000000000010 -#define R_PTE_ATT_NI_IO 0x0000000000000020 -#define R_PTE_ATT_TOLERANT_IO 0x0000000000000030 -#define R_PTE_EAA_PRIV 0x0000000000000008 -#define R_PTE_EAA_R 0x0000000000000004 -#define R_PTE_EAA_RW 0x0000000000000002 -#define R_PTE_EAA_X 0x0000000000000001 -#define R_PDE_NLB PRTBE_R_RPDB -#define R_PDE_NLS PRTBE_R_RPDS - -#ifdef TARGET_PPC64 - bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, hwaddr *raddr, int *psizep, int *protp, int mmu_idx, bool guest_visible); -static inline int ppc_radix64_get_prot_eaa(uint64_t pte) -{ - return (pte & R_PTE_EAA_R ? PAGE_READ : 0) | - (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) | - (pte & R_PTE_EAA_X ? PAGE_EXEC : 0); -} - -static inline int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu) -{ - const CPUPPCState *env = &cpu->env; - int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */ - int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 */ - - return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */ - (amr & 0x1 ? 0 : PAGE_READ) | - (iamr & 0x1 ? 0 : PAGE_EXEC); -} - #endif /* TARGET_PPC64 */ #endif /* CONFIG_USER_ONLY */ From d741ecffd2ca260ce7875a4596f17736b5ccb7c3 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 27 May 2024 01:13:08 +0200 Subject: [PATCH 96/96] target/ppc: Remove includes from mmu-book3s-v3.h Drop includes from header that is not needed by the header itself and only include them from C files that really need it. Signed-off-by: BALATON Zoltan Acked-by: Nicholas Piggin Signed-off-by: Nicholas Piggin --- target/ppc/mmu-book3s-v3.h | 3 --- target/ppc/mmu-hash64.c | 1 + target/ppc/mmu-radix64.c | 1 + 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index 263ce55c1f..be66e26604 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -20,9 +20,6 @@ #ifndef PPC_MMU_BOOK3S_V3_H #define PPC_MMU_BOOK3S_V3_H -#include "mmu-hash64.h" -#include "mmu-books.h" - #ifndef CONFIG_USER_ONLY /* diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index 7bc0323f26..5e1983e334 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -31,6 +31,7 @@ #include "hw/hw.h" #include "internal.h" #include "mmu-book3s-v3.h" +#include "mmu-books.h" #include "helper_regs.h" #ifdef CONFIG_TCG diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index cf9619e847..be7a45f254 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -28,6 +28,7 @@ #include "internal.h" #include "mmu-radix64.h" #include "mmu-book3s-v3.h" +#include "mmu-books.h" /* Radix Partition Table Entry Fields */ #define PATE1_R_PRTB 0x0FFFFFFFFFFFF000