From 62c7ec3488fe0dcbabffd543f458914e27736115 Mon Sep 17 00:00:00 2001 From: Aaron Lindsay OS Date: Fri, 15 Feb 2019 09:56:38 +0000 Subject: [PATCH 01/25] target/arm: Fix CRn to be 14 for PMEVTYPER/PMEVCNTR This bug was introduced in: commit 5ecdd3e47cadae83a62dc92b472f1fe163b56f59 target/arm: Finish implementation of PM[X]EVCNTR and PM[X]EVTYPER Signed-off-by: Aaron Lindsay Reported-by: Laurent Desnogues Reviewed-by: Laurent Desnogues Message-id: 20190205135129.19338-1-aaron@os.amperecomputing.com Signed-off-by: Peter Maydell --- target/arm/helper.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 520ceea7a4..bd9f6050ec 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5855,25 +5855,25 @@ void register_cp_regs_for_features(ARMCPU *cpu) char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i); char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i); ARMCPRegInfo pmev_regs[] = { - { .name = pmevcntr_name, .cp = 15, .crn = 15, + { .name = pmevcntr_name, .cp = 15, .crn = 14, .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, .accessfn = pmreg_access }, { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 15, .crm = 8 | (3 & (i >> 3)), + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, .type = ARM_CP_IO, .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, .raw_readfn = pmevcntr_rawread, .raw_writefn = pmevcntr_rawwrite }, - { .name = pmevtyper_name, .cp = 15, .crn = 15, + { .name = pmevtyper_name, .cp = 15, .crn = 14, .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, .accessfn = pmreg_access }, { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 15, .crm = 12 | (3 & (i >> 3)), + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, .type = ARM_CP_IO, .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, From 831a2fca343ebcd6651eab9102bd7a36b77da65d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 15 Feb 2019 09:56:38 +0000 Subject: [PATCH 02/25] target/arm: Implement HACR_EL2 HACR_EL2 is a register with IMPDEF behaviour, which allows implementation specific trapping to EL2. Implement it as RAZ/WI, since QEMU's implementation has no extra traps. This also matches what h/w implementations like Cortex-A53 and A57 do. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20190205181218.8995-1-peter.maydell@linaro.org --- target/arm/helper.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/arm/helper.c b/target/arm/helper.c index bd9f6050ec..e1ef2f3523 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -4434,6 +4434,9 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0, .access = PL2_RW, @@ -4666,6 +4669,9 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), .writefn = hcr_writelow }, + { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, From abd5abc58c5d4c9bd23427b0998a44eb87ed47a2 Mon Sep 17 00:00:00 2001 From: Catherine Ho Date: Fri, 15 Feb 2019 09:56:38 +0000 Subject: [PATCH 03/25] target/arm: Fix int128_make128 lo, hi order in paired_cmpxchg64_be The lo,hi order is different from the comments. And in commit 1ec182c33379 ("target/arm: Convert to HAVE_CMPXCHG128"), it changes the original code logic. So just restore the old code logic before this commit: do_paired_cmpxchg64_be(): cmpv = int128_make128(env->exclusive_high, env->exclusive_val); newv = int128_make128(new_hi, new_lo); This fixes a bug that would only be visible for big-endian AArch64 guest code. Fixes: 1ec182c33379 ("target/arm: Convert to HAVE_CMPXCHG128") Signed-off-by: Catherine Ho Reviewed-by: Richard Henderson Message-id: 1548985244-24523-1-git-send-email-catherine.hecx@gmail.com [PMM: added note that bug only affects BE guests] Signed-off-by: Peter Maydell --- target/arm/helper-a64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 101fa6d3ea..70850e564d 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -583,8 +583,8 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, * High and low need to be switched here because this is not actually a * 128bit store but two doublewords stored consecutively */ - Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - Int128 newv = int128_make128(new_lo, new_hi); + Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val); + Int128 newv = int128_make128(new_hi, new_lo); Int128 oldv; uintptr_t ra = GETPC(); uint64_t o0, o1; From b5bd7440422bb66deaceb812bb9287a6a3cdf10c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 15 Feb 2019 09:56:38 +0000 Subject: [PATCH 04/25] target/arm: relax permission checks for HWCAP_CPUID registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although technically not visible to userspace the kernel does make them visible via a trap and emulate ABI. We provide a new permission mask (PL0U_R) which maps to PL0_R for CONFIG_USER builds and adjust the minimum permission check accordingly. Signed-off-by: Alex Bennée Message-id: 20190205190224.2198-2-alex.bennee@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/cpu.h | 12 ++++++++++++ target/arm/helper.c | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 47238e4245..c92c097b44 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -2226,6 +2226,18 @@ static inline bool cptype_valid(int cptype) #define PL0_R (0x02 | PL1_R) #define PL0_W (0x01 | PL1_W) +/* + * For user-mode some registers are accessible to EL0 via a kernel + * trap-and-emulate ABI. In this case we define the read permissions + * as actually being PL0_R. However some bits of any given register + * may still be masked. + */ +#ifdef CONFIG_USER_ONLY +#define PL0U_R PL0_R +#else +#define PL0U_R PL1_R +#endif + #define PL3_RW (PL3_R | PL3_W) #define PL2_RW (PL2_R | PL2_W) #define PL1_RW (PL1_R | PL1_W) diff --git a/target/arm/helper.c b/target/arm/helper.c index e1ef2f3523..88cf497603 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6857,7 +6857,11 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, if (r->state != ARM_CP_STATE_AA32) { int mask = 0; switch (r->opc1) { - case 0: case 1: case 2: + case 0: + /* min_EL EL1, but some accessible to EL0 via kernel ABI */ + mask = PL0U_R | PL1_RW; + break; + case 1: case 2: /* min_EL EL1 */ mask = PL1_RW; break; From 6c5c0fec29bbfe36c64eca1edfd8455be46b77c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 15 Feb 2019 09:56:38 +0000 Subject: [PATCH 05/25] target/arm: expose CPUID registers to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A number of CPUID registers are exposed to userspace by modern Linux kernels thanks to the "ARM64 CPU Feature Registers" ABI. For QEMU's user-mode emulation we don't need to emulate the kernels trap but just return the value the trap would have done. To avoid too much #ifdef hackery we process ARMCPRegInfo with a new helper (modify_arm_cp_regs) before defining the registers. The modify routine is driven by a simple data structure which describes which bits are exported and which are fixed. Signed-off-by: Alex Bennée Message-id: 20190205190224.2198-3-alex.bennee@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/cpu.h | 21 ++++++++++++++++ target/arm/helper.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index c92c097b44..7c31e5a2d1 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -2464,6 +2464,27 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs) } const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp); +/* + * Definition of an ARM co-processor register as viewed from + * userspace. This is used for presenting sanitised versions of + * registers to userspace when emulating the Linux AArch64 CPU + * ID/feature ABI (advertised as HWCAP_CPUID). + */ +typedef struct ARMCPRegUserSpaceInfo { + /* Name of register */ + const char *name; + + /* Only some bits are exported to user space */ + uint64_t exported_bits; + + /* Fixed bits are applied after the mask */ + uint64_t fixed_bits; +} ARMCPRegUserSpaceInfo; + +#define REGUSERINFO_SENTINEL { .name = NULL } + +void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods); + /* CPWriteFn that can be used to implement writes-ignored behaviour */ void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); diff --git a/target/arm/helper.c b/target/arm/helper.c index 88cf497603..b2abaf5b22 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6109,6 +6109,30 @@ void register_cp_regs_for_features(ARMCPU *cpu) .resetvalue = cpu->pmceid1 }, REGINFO_SENTINEL }; +#ifdef CONFIG_USER_ONLY + ARMCPRegUserSpaceInfo v8_user_idregs[] = { + { .name = "ID_AA64PFR0_EL1", + .exported_bits = 0x000f000f00ff0000, + .fixed_bits = 0x0000000000000011 }, + { .name = "ID_AA64PFR1_EL1", + .exported_bits = 0x00000000000000f0 }, + { .name = "ID_AA64ZFR0_EL1" }, + { .name = "ID_AA64MMFR0_EL1", + .fixed_bits = 0x00000000ff000000 }, + { .name = "ID_AA64MMFR1_EL1" }, + { .name = "ID_AA64DFR0_EL1", + .fixed_bits = 0x0000000000000006 }, + { .name = "ID_AA64DFR1_EL1" }, + { .name = "ID_AA64AFR0_EL1" }, + { .name = "ID_AA64AFR1_EL1" }, + { .name = "ID_AA64ISAR0_EL1", + .exported_bits = 0x00fffffff0fffff0 }, + { .name = "ID_AA64ISAR1_EL1", + .exported_bits = 0x000000f0ffffffff }, + REGUSERINFO_SENTINEL + }; + modify_arm_cp_regs(v8_idregs, v8_user_idregs); +#endif /* RVBAR_EL1 is only implemented if EL1 is the highest EL */ if (!arm_feature(env, ARM_FEATURE_EL3) && !arm_feature(env, ARM_FEATURE_EL2)) { @@ -6385,6 +6409,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_OVERRIDE }; +#ifdef CONFIG_USER_ONLY + ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = { + { .name = "MIDR_EL1", + .exported_bits = 0x00000000ffffffff }, + { .name = "REVIDR_EL1" }, + REGUSERINFO_SENTINEL + }; + modify_arm_cp_regs(id_v8_midr_cp_reginfo, id_v8_user_midr_cp_reginfo); +#endif if (arm_feature(env, ARM_FEATURE_OMAPCP) || arm_feature(env, ARM_FEATURE_STRONGARM)) { ARMCPRegInfo *r; @@ -6966,6 +6999,32 @@ void define_arm_cp_regs_with_opaque(ARMCPU *cpu, } } +/* + * Modify ARMCPRegInfo for access from userspace. + * + * This is a data driven modification directed by + * ARMCPRegUserSpaceInfo. All registers become ARM_CP_CONST as + * user-space cannot alter any values and dynamic values pertaining to + * execution state are hidden from user space view anyway. + */ +void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods) +{ + const ARMCPRegUserSpaceInfo *m; + ARMCPRegInfo *r; + + for (m = mods; m->name; m++) { + for (r = regs; r->type != ARM_CP_SENTINEL; r++) { + if (strcmp(r->name, m->name) == 0) { + r->type = ARM_CP_CONST; + r->access = PL0U_R; + r->resetvalue &= m->exported_bits; + r->resetvalue |= m->fixed_bits; + break; + } + } + } +} + const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp) { return g_hash_table_lookup(cpregs, &encoded_cp); From 522641660c3de64ed8322b8636c58625cd564a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 15 Feb 2019 09:56:38 +0000 Subject: [PATCH 06/25] target/arm: expose MPIDR_EL1 to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As this is a single register we could expose it with a simple ifdef but we use the existing modify_arm_cp_regs mechanism for consistency. Signed-off-by: Alex Bennée Message-id: 20190205190224.2198-4-alex.bennee@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index b2abaf5b22..77c7305694 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -3657,13 +3657,6 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri) return mpidr_read_val(env); } -static const ARMCPRegInfo mpidr_cp_reginfo[] = { - { .name = "MPIDR", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, - .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW }, - REGINFO_SENTINEL -}; - static const ARMCPRegInfo lpae_cp_reginfo[] = { /* NOP AMAIR0/1 */ { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH, @@ -6451,6 +6444,20 @@ void register_cp_regs_for_features(ARMCPU *cpu) } if (arm_feature(env, ARM_FEATURE_MPIDR)) { + ARMCPRegInfo mpidr_cp_reginfo[] = { + { .name = "MPIDR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, + .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW }, + REGINFO_SENTINEL + }; +#ifdef CONFIG_USER_ONLY + ARMCPRegUserSpaceInfo mpidr_user_cp_reginfo[] = { + { .name = "MPIDR_EL1", + .fixed_bits = 0x0000000080000000 }, + REGUSERINFO_SENTINEL + }; + modify_arm_cp_regs(mpidr_cp_reginfo, mpidr_user_cp_reginfo); +#endif define_arm_cp_regs(cpu, mpidr_cp_reginfo); } From d040242effe47850060d2ef1c461ff637d88a84d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 15 Feb 2019 09:56:39 +0000 Subject: [PATCH 07/25] target/arm: expose remaining CPUID registers as RAZ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a whole bunch more registers in the CPUID space which are currently not used but are exposed as RAZ. To avoid too much duplication we expand ARMCPRegUserSpaceInfo to understand glob patterns so we only need one entry to tweak whole ranges of registers. Signed-off-by: Alex Bennée Message-id: 20190205190224.2198-5-alex.bennee@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/cpu.h | 3 +++ target/arm/helper.c | 26 +++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 7c31e5a2d1..f0334413ec 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -2474,6 +2474,9 @@ typedef struct ARMCPRegUserSpaceInfo { /* Name of register */ const char *name; + /* Is the name actually a glob pattern */ + bool is_glob; + /* Only some bits are exported to user space */ uint64_t exported_bits; diff --git a/target/arm/helper.c b/target/arm/helper.c index 77c7305694..5ac335f598 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6109,19 +6109,27 @@ void register_cp_regs_for_features(ARMCPU *cpu) .fixed_bits = 0x0000000000000011 }, { .name = "ID_AA64PFR1_EL1", .exported_bits = 0x00000000000000f0 }, + { .name = "ID_AA64PFR*_EL1_RESERVED", + .is_glob = true }, { .name = "ID_AA64ZFR0_EL1" }, { .name = "ID_AA64MMFR0_EL1", .fixed_bits = 0x00000000ff000000 }, { .name = "ID_AA64MMFR1_EL1" }, + { .name = "ID_AA64MMFR*_EL1_RESERVED", + .is_glob = true }, { .name = "ID_AA64DFR0_EL1", .fixed_bits = 0x0000000000000006 }, { .name = "ID_AA64DFR1_EL1" }, - { .name = "ID_AA64AFR0_EL1" }, - { .name = "ID_AA64AFR1_EL1" }, + { .name = "ID_AA64DFR*_EL1_RESERVED", + .is_glob = true }, + { .name = "ID_AA64AFR*", + .is_glob = true }, { .name = "ID_AA64ISAR0_EL1", .exported_bits = 0x00fffffff0fffff0 }, { .name = "ID_AA64ISAR1_EL1", .exported_bits = 0x000000f0ffffffff }, + { .name = "ID_AA64ISAR*_EL1_RESERVED", + .is_glob = true }, REGUSERINFO_SENTINEL }; modify_arm_cp_regs(v8_idregs, v8_user_idregs); @@ -7020,8 +7028,17 @@ void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods) ARMCPRegInfo *r; for (m = mods; m->name; m++) { + GPatternSpec *pat = NULL; + if (m->is_glob) { + pat = g_pattern_spec_new(m->name); + } for (r = regs; r->type != ARM_CP_SENTINEL; r++) { - if (strcmp(r->name, m->name) == 0) { + if (pat && g_pattern_match_string(pat, r->name)) { + r->type = ARM_CP_CONST; + r->access = PL0U_R; + r->resetvalue = 0; + /* continue */ + } else if (strcmp(r->name, m->name) == 0) { r->type = ARM_CP_CONST; r->access = PL0U_R; r->resetvalue &= m->exported_bits; @@ -7029,6 +7046,9 @@ void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods) break; } } + if (pat) { + g_pattern_spec_free(pat); + } } } From 37020ff1539800b367fd69c1386e0d841f48b1ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 15 Feb 2019 09:56:39 +0000 Subject: [PATCH 08/25] linux-user/elfload: enable HWCAP_CPUID for AArch64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace programs should (in theory) query the ELF HWCAP before probing these registers. Now we have implemented them all make it public. Signed-off-by: Alex Bennée Reviewed-by: Richard Henderson Message-id: 20190205190224.2198-6-alex.bennee@linaro.org Signed-off-by: Peter Maydell --- linux-user/elfload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 775a36ccdd..3a50d587ff 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -580,6 +580,7 @@ static uint32_t get_elf_hwcap(void) hwcaps |= ARM_HWCAP_A64_FP; hwcaps |= ARM_HWCAP_A64_ASIMD; + hwcaps |= ARM_HWCAP_A64_CPUID; /* probe for the extra features */ #define GET_FEATURE_ID(feat, hwcap) \ From 823e1b3818f9b10b824ddcd756983b6e2fa68730 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 15 Feb 2019 09:56:39 +0000 Subject: [PATCH 09/25] arm: Allow system registers for KVM guests to be changed by QEMU code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment the Arm implementations of kvm_arch_{get,put}_registers() don't support having QEMU change the values of system registers (aka coprocessor registers for AArch32). This is because although kvm_arch_get_registers() calls write_list_to_cpustate() to update the CPU state struct fields (so QEMU code can read the values in the usual way), kvm_arch_put_registers() does not call write_cpustate_to_list(), meaning that any changes to the CPU state struct fields will not be passed back to KVM. The rationale for this design is documented in a comment in the AArch32 kvm_arch_put_registers() -- writing the values in the cpregs list into the CPU state struct is "lossy" because the write of a register might not succeed, and so if we blindly copy the CPU state values back again we will incorrectly change register values for the guest. The assumption was that no QEMU code would need to write to the registers. However, when we implemented debug support for KVM guests, we broke that assumption: the code to handle "set the guest up to take a breakpoint exception" does so by updating various guest registers including ESR_EL1. Support this by making kvm_arch_put_registers() synchronize CPU state back into the list. We sync only those registers where the initial write succeeds, which should be sufficient. Signed-off-by: Peter Maydell Reviewed-by: Alex Bennée Tested-by: Alex Bennée Tested-by: Dongjiu Geng --- target/arm/cpu.h | 9 ++++++++- target/arm/helper.c | 27 +++++++++++++++++++++++++-- target/arm/kvm32.c | 20 ++------------------ target/arm/kvm64.c | 2 ++ target/arm/machine.c | 2 +- 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index f0334413ec..bfc05c796a 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -2535,18 +2535,25 @@ bool write_list_to_cpustate(ARMCPU *cpu); /** * write_cpustate_to_list: * @cpu: ARMCPU + * @kvm_sync: true if this is for syncing back to KVM * * For each register listed in the ARMCPU cpreg_indexes list, write * its value from the ARMCPUState structure into the cpreg_values list. * This is used to copy info from TCG's working data structures into * KVM or for outbound migration. * + * @kvm_sync is true if we are doing this in order to sync the + * register state back to KVM. In this case we will only update + * values in the list if the previous list->cpustate sync actually + * successfully wrote the CPU state. Otherwise we will keep the value + * that is in the list. + * * Returns: true if all register values were read correctly, * false if some register was unknown or could not be read. * Note that we do not stop early on failure -- we will attempt * reading all registers in the list. */ -bool write_cpustate_to_list(ARMCPU *cpu); +bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); #define ARM_CPUID_TI915T 0x54029152 #define ARM_CPUID_TI925T 0x54029252 diff --git a/target/arm/helper.c b/target/arm/helper.c index 5ac335f598..7653aa6a50 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -264,7 +264,7 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri) return true; } -bool write_cpustate_to_list(ARMCPU *cpu) +bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) { /* Write the coprocessor state from cpu->env to the (index,value) list. */ int i; @@ -273,6 +273,7 @@ bool write_cpustate_to_list(ARMCPU *cpu) for (i = 0; i < cpu->cpreg_array_len; i++) { uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]); const ARMCPRegInfo *ri; + uint64_t newval; ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); if (!ri) { @@ -282,7 +283,29 @@ bool write_cpustate_to_list(ARMCPU *cpu) if (ri->type & ARM_CP_NO_RAW) { continue; } - cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri); + + newval = read_raw_cp_reg(&cpu->env, ri); + if (kvm_sync) { + /* + * Only sync if the previous list->cpustate sync succeeded. + * Rather than tracking the success/failure state for every + * item in the list, we just recheck "does the raw write we must + * have made in write_list_to_cpustate() read back OK" here. + */ + uint64_t oldval = cpu->cpreg_values[i]; + + if (oldval == newval) { + continue; + } + + write_raw_cp_reg(&cpu->env, ri, oldval); + if (read_raw_cp_reg(&cpu->env, ri) != oldval) { + continue; + } + + write_raw_cp_reg(&cpu->env, ri, newval); + } + cpu->cpreg_values[i] = newval; } return ok; } diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index bd51eb43c8..a75e04cc8f 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -387,24 +387,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } - /* Note that we do not call write_cpustate_to_list() - * here, so we are only writing the tuple list back to - * KVM. This is safe because nothing can change the - * CPUARMState cp15 fields (in particular gdb accesses cannot) - * and so there are no changes to sync. In fact syncing would - * be wrong at this point: for a constant register where TCG and - * KVM disagree about its value, the preceding write_list_to_cpustate() - * would not have had any effect on the CPUARMState value (since the - * register is read-only), and a write_cpustate_to_list() here would - * then try to write the TCG value back into KVM -- this would either - * fail or incorrectly change the value the guest sees. - * - * If we ever want to allow the user to modify cp15 registers via - * the gdb stub, we would need to be more clever here (for instance - * tracking the set of registers kvm_arch_get_registers() successfully - * managed to update the CPUARMState with, and only allowing those - * to be written back up into the kernel). - */ + write_cpustate_to_list(cpu, true); + if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 089af9c5f0..e3ba149248 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -838,6 +838,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + write_cpustate_to_list(cpu, true); + if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } diff --git a/target/arm/machine.c b/target/arm/machine.c index b292549614..124192bfc2 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -630,7 +630,7 @@ static int cpu_pre_save(void *opaque) abort(); } } else { - if (!write_cpustate_to_list(cpu)) { + if (!write_cpustate_to_list(cpu, false)) { /* This should never fail. */ abort(); } From 6ea564872238a25de2bdac8a61c485df6bcce9d6 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 15 Feb 2019 09:56:39 +0000 Subject: [PATCH 10/25] MAINTAINERS: Remove Peter Crosthwaite from various entries Peter Crosthwaite hasn't had the bandwidth to do code review or other QEMU work for some time now -- remove his email address from MAINTAINERS file entries so we don't bombard him with patch emails. Signed-off-by: Peter Maydell Message-id: 20190207181422.4907-1-peter.maydell@linaro.org --- MAINTAINERS | 4 ---- 1 file changed, 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e170a4c733..ffb029f63a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -110,7 +110,6 @@ Guest CPU cores (TCG): ---------------------- Overall L: qemu-devel@nongnu.org -M: Peter Crosthwaite M: Richard Henderson R: Paolo Bonzini S: Maintained @@ -1345,7 +1344,6 @@ F: tests/virtio-scsi-test.c T: git https://github.com/bonzini/qemu.git scsi-next SSI -M: Peter Crosthwaite M: Alistair Francis S: Maintained F: hw/ssi/* @@ -1356,7 +1354,6 @@ F: tests/m25p80-test.c Xilinx SPI M: Alistair Francis -M: Peter Crosthwaite S: Maintained F: hw/ssi/xilinx_* @@ -1766,7 +1763,6 @@ F: qom/cpu.c F: include/qom/cpu.h Device Tree -M: Peter Crosthwaite M: Alexander Graf S: Maintained F: device_tree.c From 935fe442dc234c7b3fa52d346ced7a614696107e Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 15 Feb 2019 09:56:39 +0000 Subject: [PATCH 11/25] hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code for handling the NVIC SHPR1 register intends to permit byte and halfword accesses (as the architecture requires). However the 'case' line for it only lists the base address of the register, so attempts to access bytes other than the first one end up in the "bad write" default logic. This bug was added accidentally when we split out the SHPR1 logic from SHPR2 and SHPR3 to support v6M. Fixes: 7c9140afd594 ("nvic: Handle ARMv6-M SCS reserved registers") Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé --- The Zephyr RTOS happens to access SHPR1 byte at a time, which is how I spotted this. --- hw/intc/armv7m_nvic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 790a3d9584..ab822f4251 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -1841,7 +1841,7 @@ static MemTxResult nvic_sysreg_read(void *opaque, hwaddr addr, } } break; - case 0xd18: /* System Handler Priority (SHPR1) */ + case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */ if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) { val = 0; break; @@ -1956,7 +1956,7 @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr, } nvic_irq_update(s); return MEMTX_OK; - case 0xd18: /* System Handler Priority (SHPR1) */ + case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */ if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) { return MEMTX_OK; } From 5007c904e158aaaf97e65338e52f5ef9e8df0944 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 15 Feb 2019 09:56:39 +0000 Subject: [PATCH 12/25] hw/arm/armsse: Fix miswiring of expansion IRQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 91c1e9fcbd7548db368 where we added dual-CPU support to the ARMSSE, we set up the wiring of the expansion IRQs via nested loops: the outer loop on 'i' loops for each CPU, and the inner loop on 'j' loops for each interrupt. Fix a typo which meant we were wiring every expansion IRQ line to external IRQ 0 on CPU 0 and to external IRQ 1 on CPU 1. Fixes: 91c1e9fcbd7548db368 ("hw/arm/armsse: Support dual-CPU configuration") Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé --- hw/arm/armsse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c index 5d53071a5a..9a8c49547d 100644 --- a/hw/arm/armsse.c +++ b/hw/arm/armsse.c @@ -565,7 +565,7 @@ static void armsse_realize(DeviceState *dev, Error **errp) /* Connect EXP_IRQ/EXP_CPUn_IRQ GPIOs to the NVIC's lines 32 and up */ s->exp_irqs[i] = g_new(qemu_irq, s->exp_numirq); for (j = 0; j < s->exp_numirq; j++) { - s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, i + 32); + s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, j + 32); } if (i == 0) { gpioname = g_strdup("EXP_IRQ"); From 2900847ff4c862887af750935a875059615f509a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:39 +0000 Subject: [PATCH 13/25] target/arm: Rely on optimization within tcg_gen_gvec_or Since we're now handling a == b generically, we no longer need to do it by hand within target/arm/. Reviewed-by: David Gibson Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 6 +----- target/arm/translate-sve.c | 6 +----- target/arm/translate.c | 12 +++--------- 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index e002251ac6..a12bfac719 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -10648,11 +10648,7 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); return; case 2: /* ORR */ - if (rn == rm) { /* MOV */ - gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); - } + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); return; case 3: /* ORN */ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index b15b615ceb..3a2eb51566 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -280,11 +280,7 @@ static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a) static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a) { - if (a->rn == a->rm) { /* MOV */ - return do_mov_z(s, a->rd, a->rn); - } else { - return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm); - } + return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm); } static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a) diff --git a/target/arm/translate.c b/target/arm/translate.c index 66cf28c8cb..9d2dba7ed2 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -6294,15 +6294,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); break; - case 2: - if (rn == rm) { - /* VMOV */ - tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size); - } else { - /* VORR */ - tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } + case 2: /* VORR */ + tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); break; case 3: /* VORN */ tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs, From 264d2a481a6c34dfda53be3fbea66116bcef9c5a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:40 +0000 Subject: [PATCH 14/25] target/arm: Use vector minmax expanders for aarch64 Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index a12bfac719..fd5ceb6613 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -10948,6 +10948,20 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) } switch (opcode) { + case 0x0c: /* SMAX, UMAX */ + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); + } + return; + case 0x0d: /* SMIN, UMIN */ + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); + } + return; case 0x10: /* ADD, SUB */ if (u) { gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); @@ -11109,27 +11123,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genenvfn = fns[size][u]; break; } - case 0xc: /* SMAX, UMAX */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_max_s8, gen_helper_neon_max_u8 }, - { gen_helper_neon_max_s16, gen_helper_neon_max_u16 }, - { tcg_gen_smax_i32, tcg_gen_umax_i32 }, - }; - genfn = fns[size][u]; - break; - } - - case 0xd: /* SMIN, UMIN */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_min_s8, gen_helper_neon_min_u8 }, - { gen_helper_neon_min_s16, gen_helper_neon_min_u16 }, - { tcg_gen_smin_i32, tcg_gen_umin_i32 }, - }; - genfn = fns[size][u]; - break; - } case 0xe: /* SABD, UABD */ case 0xf: /* SABA, UABA */ { From 6f2782218230bbb33fa22f9a2f73f8a570046007 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:40 +0000 Subject: [PATCH 15/25] target/arm: Use vector minmax expanders for aarch32 Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index 9d2dba7ed2..df1cd3fa3e 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -6368,6 +6368,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); return 0; + + case NEON_3R_VMAX: + if (u) { + tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { + tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + return 0; + case NEON_3R_VMIN: + if (u) { + tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { + tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + return 0; } if (size == 3) { @@ -6533,12 +6552,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VQRSHL: GEN_NEON_INTEGER_OP_ENV(qrshl); break; - case NEON_3R_VMAX: - GEN_NEON_INTEGER_OP(max); - break; - case NEON_3R_VMIN: - GEN_NEON_INTEGER_OP(min); - break; case NEON_3R_VABD: GEN_NEON_INTEGER_OP(abd); break; From 9ecd3c5c1651fa7f9adbedff4806a2da0b50490c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:40 +0000 Subject: [PATCH 16/25] target/arm: Use tcg integer min/max primitives for neon The 32-bit PMIN/PMAX has been decomposed to scalars, and so can be trivially expanded inline. Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-5-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/translate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index df1cd3fa3e..f0101d2788 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -4760,10 +4760,10 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1) } /* 32-bit pairwise ops end up the same as the elementwise versions. */ -#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32 -#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32 -#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 -#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 +#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 +#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 +#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 +#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 #define GEN_NEON_INTEGER_OP_ENV(name) do { \ switch ((size << 1) | u) { \ From a5c5dc53c4688efc149b235361d2d49869e77139 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:40 +0000 Subject: [PATCH 17/25] target/arm: Remove neon min/max helpers These are now unused. Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-6-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.h | 12 ------------ target/arm/neon_helper.c | 12 ------------ 2 files changed, 24 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index 53a38188c6..9874c35ea9 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -276,18 +276,6 @@ DEF_HELPER_2(neon_cge_s16, i32, i32, i32) DEF_HELPER_2(neon_cge_u32, i32, i32, i32) DEF_HELPER_2(neon_cge_s32, i32, i32, i32) -DEF_HELPER_2(neon_min_u8, i32, i32, i32) -DEF_HELPER_2(neon_min_s8, i32, i32, i32) -DEF_HELPER_2(neon_min_u16, i32, i32, i32) -DEF_HELPER_2(neon_min_s16, i32, i32, i32) -DEF_HELPER_2(neon_min_u32, i32, i32, i32) -DEF_HELPER_2(neon_min_s32, i32, i32, i32) -DEF_HELPER_2(neon_max_u8, i32, i32, i32) -DEF_HELPER_2(neon_max_s8, i32, i32, i32) -DEF_HELPER_2(neon_max_u16, i32, i32, i32) -DEF_HELPER_2(neon_max_s16, i32, i32, i32) -DEF_HELPER_2(neon_max_u32, i32, i32, i32) -DEF_HELPER_2(neon_max_s32, i32, i32, i32) DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c index c2c6491a83..3249005b62 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/neon_helper.c @@ -581,12 +581,6 @@ NEON_VOP(cge_u32, neon_u32, 1) #undef NEON_FN #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 -NEON_VOP(min_s8, neon_s8, 4) -NEON_VOP(min_u8, neon_u8, 4) -NEON_VOP(min_s16, neon_s16, 2) -NEON_VOP(min_u16, neon_u16, 2) -NEON_VOP(min_s32, neon_s32, 1) -NEON_VOP(min_u32, neon_u32, 1) NEON_POP(pmin_s8, neon_s8, 4) NEON_POP(pmin_u8, neon_u8, 4) NEON_POP(pmin_s16, neon_s16, 2) @@ -594,12 +588,6 @@ NEON_POP(pmin_u16, neon_u16, 2) #undef NEON_FN #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2 -NEON_VOP(max_s8, neon_s8, 4) -NEON_VOP(max_u8, neon_u8, 4) -NEON_VOP(max_s16, neon_s16, 2) -NEON_VOP(max_u16, neon_u16, 2) -NEON_VOP(max_s32, neon_s32, 1) -NEON_VOP(max_u32, neon_u32, 1) NEON_POP(pmax_s8, neon_s8, 4) NEON_POP(pmax_u8, neon_u8, 4) NEON_POP(pmax_s16, neon_s16, 2) From b0a909a48a08cbba05d65af620ddd3aac29135ee Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:40 +0000 Subject: [PATCH 18/25] target/arm: Fix vfp_gdb_get/set_reg vs FPSCR The components of this register is stored in several different locations. Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-7-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 7653aa6a50..8eedce113c 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -81,7 +81,7 @@ static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) } switch (reg - nregs) { case 0: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSID]); return 4; - case 1: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSCR]); return 4; + case 1: stl_p(buf, vfp_get_fpscr(env)); return 4; case 2: stl_p(buf, env->vfp.xregs[ARM_VFP_FPEXC]); return 4; } return 0; @@ -107,7 +107,7 @@ static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) } switch (reg - nregs) { case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4; - case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4; + case 1: vfp_set_fpscr(env, ldl_p(buf)); return 4; case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4; } return 0; From ec527e4eeccc31e3beadf3b61b66c61bbd873811 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:40 +0000 Subject: [PATCH 19/25] target/arm: Fix arm_cpu_dump_state vs FPSCR Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-8-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index f0101d2788..9b426f4271 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -13641,7 +13641,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, i * 2 + 1, (uint32_t)(v >> 32), i, v); } - cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]); + cpu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env)); } } From 55a889456ef78f3f9b8eae9846c2f1453b1dd77b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:40 +0000 Subject: [PATCH 20/25] target/arm: Split out flags setting from vfp compares Minimize the code within a macro by splitting out a helper function. Use deposit32 instead of manual bit manipulation. Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-9-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.c | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 8eedce113c..28e45f0f0b 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -12871,31 +12871,40 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) return float64_sqrt(a, &env->vfp.fp_status); } +static void softfloat_to_vfp_compare(CPUARMState *env, int cmp) +{ + uint32_t flags; + switch (cmp) { + case float_relation_equal: + flags = 0x6; + break; + case float_relation_less: + flags = 0x8; + break; + case float_relation_greater: + flags = 0x2; + break; + case float_relation_unordered: + flags = 0x3; + break; + default: + g_assert_not_reached(); + } + env->vfp.xregs[ARM_VFP_FPSCR] = + deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags); +} + /* XXX: check quiet/signaling case */ #define DO_VFP_cmp(p, type) \ void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + softfloat_to_vfp_compare(env, \ + type ## _compare_quiet(a, b, &env->vfp.fp_status)); \ } \ void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + softfloat_to_vfp_compare(env, \ + type ## _compare(a, b, &env->vfp.fp_status)); \ } DO_VFP_cmp(s, float32) DO_VFP_cmp(d, float64) From 18aaa59c622208743565307668a2100ab24f7de9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:41 +0000 Subject: [PATCH 21/25] target/arm: Fix set of bits kept in xregs[ARM_VFP_FPSCR] Given that we mask bits properly on set, there is no reason to mask them again on get. We failed to clear the exception status bits, 0x9f, which means that the wrong value would be returned on get. Except in the (probably normal) case in which the set clears all of the bits. Simplify the code in set to also clear the RES0 bits. Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-10-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 28e45f0f0b..d4b7eca30a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -12707,7 +12707,7 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) int i; uint32_t fpscr; - fpscr = (env->vfp.xregs[ARM_VFP_FPSCR] & 0xffc8ffff) + fpscr = env->vfp.xregs[ARM_VFP_FPSCR] | (env->vfp.vec_len << 16) | (env->vfp.vec_stride << 20); @@ -12749,7 +12749,7 @@ static inline int vfp_exceptbits_to_host(int target_bits) void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) { int i; - uint32_t changed; + uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR]; /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) { @@ -12758,12 +12758,13 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) /* * We don't implement trapped exception handling, so the - * trap enable bits are all RAZ/WI (not RES0!) + * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) + * + * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC + * (which are stored in fp_status), and the other RES0 bits + * in between, then we clear all of the low 16 bits. */ - val &= ~(FPCR_IDE | FPCR_IXE | FPCR_UFE | FPCR_OFE | FPCR_DZE | FPCR_IOE); - - changed = env->vfp.xregs[ARM_VFP_FPSCR]; - env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff); + env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xffc80000; env->vfp.vec_len = (val >> 16) & 7; env->vfp.vec_stride = (val >> 20) & 3; From a4d5846245c5e029e5aa3945a9bda1de1c3fedbf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:41 +0000 Subject: [PATCH 22/25] target/arm: Split out FPSCR.QC to a vector field Change the representation of this field such that it is easy to set from vector code. Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-11-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/cpu.h | 5 ++++- target/arm/helper.c | 19 +++++++++++++++---- target/arm/neon_helper.c | 2 +- target/arm/vec_helper.c | 2 +- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index bfc05c796a..84ae6849c2 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -577,11 +577,13 @@ typedef struct CPUARMState { ARMPredicateReg preg_tmp; #endif - uint32_t xregs[16]; /* We store these fpcsr fields separately for convenience. */ + uint32_t qc[4] QEMU_ALIGNED(16); int vec_len; int vec_stride; + uint32_t xregs[16]; + /* Scratch space for aa32 neon expansion. */ uint32_t scratch[8]; @@ -1427,6 +1429,7 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); #define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */ #define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */ #define FPCR_DN (1 << 25) /* Default NaN enable bit */ +#define FPCR_QC (1 << 27) /* Cumulative saturation bit */ static inline uint32_t vfp_get_fpsr(CPUARMState *env) { diff --git a/target/arm/helper.c b/target/arm/helper.c index d4b7eca30a..55e9b77bb1 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -12704,8 +12704,7 @@ static inline int vfp_exceptbits_from_host(int host_bits) uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) { - int i; - uint32_t fpscr; + uint32_t i, fpscr; fpscr = env->vfp.xregs[ARM_VFP_FPSCR] | (env->vfp.vec_len << 16) @@ -12716,8 +12715,11 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) /* FZ16 does not generate an input denormal exception. */ i |= (get_float_exception_flags(&env->vfp.fp_status_f16) & ~float_flag_input_denormal); - fpscr |= vfp_exceptbits_from_host(i); + + i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3]; + fpscr |= i ? FPCR_QC : 0; + return fpscr; } @@ -12764,10 +12766,19 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) * (which are stored in fp_status), and the other RES0 bits * in between, then we clear all of the low 16 bits. */ - env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xffc80000; + env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000; env->vfp.vec_len = (val >> 16) & 7; env->vfp.vec_stride = (val >> 20) & 3; + /* + * The bit we set within fpscr_q is arbitrary; the register as a + * whole being zero/non-zero is what counts. + */ + env->vfp.qc[0] = val & FPCR_QC; + env->vfp.qc[1] = 0; + env->vfp.qc[2] = 0; + env->vfp.qc[3] = 0; + changed ^= val; if (changed & (3 << 22)) { i = (val >> 22) & 3; diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c index 3249005b62..ed1c6fc41c 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/neon_helper.c @@ -15,7 +15,7 @@ #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) -#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q +#define SET_QC() env->vfp.qc[0] = 1 #define NEON_TYPE1(name, type) \ typedef struct \ diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 37f338732e..65a18af4e0 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -36,7 +36,7 @@ #define H4(x) (x) #endif -#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q +#define SET_QC() env->vfp.qc[0] = 1 static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) { From 89e68b575e138d0af1435f11a8ffcd8779c237bd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:41 +0000 Subject: [PATCH 23/25] target/arm: Use vector operations for saturation For same-sign saturation, we have tcg vector operations. We can compute the QC bit by comparing the saturated value against the unsaturated value. Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-12-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.h | 33 +++++++ target/arm/translate-a64.c | 36 ++++---- target/arm/translate.c | 172 +++++++++++++++++++++++++++++++------ target/arm/translate.h | 4 + target/arm/vec_helper.c | 130 ++++++++++++++++++++++++++++ 5 files changed, 331 insertions(+), 44 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index 9874c35ea9..923e8e1525 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -641,6 +641,39 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index fd5ceb6613..af8e4fd4be 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -10948,6 +10948,22 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) } switch (opcode) { + case 0x01: /* SQADD, UQADD */ + tcg_gen_gvec_4(vec_full_reg_offset(s, rd), + offsetof(CPUARMState, vfp.qc), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s), + (u ? uqadd_op : sqadd_op) + size); + return; + case 0x05: /* SQSUB, UQSUB */ + tcg_gen_gvec_4(vec_full_reg_offset(s, rd), + offsetof(CPUARMState, vfp.qc), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s), + (u ? uqsub_op : sqsub_op) + size); + return; case 0x0c: /* SMAX, UMAX */ if (u) { gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); @@ -11043,16 +11059,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn = fns[size][u]; break; } - case 0x1: /* SQADD, UQADD */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, - { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, - { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, - }; - genenvfn = fns[size][u]; - break; - } case 0x2: /* SRHADD, URHADD */ { static NeonGenTwoOpFn * const fns[3][2] = { @@ -11073,16 +11079,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn = fns[size][u]; break; } - case 0x5: /* SQSUB, UQSUB */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, - { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, - { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, - }; - genenvfn = fns[size][u]; - break; - } case 0x8: /* SSHL, USHL */ { static NeonGenTwoOpFn * const fns[3][2] = { diff --git a/target/arm/translate.c b/target/arm/translate.c index 9b426f4271..dac737f6ca 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -6148,6 +6148,142 @@ const GVecGen3 cmtst_op[4] = { .vece = MO_64 }, }; +static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_add_vec(vece, x, a, b); + tcg_gen_usadd_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); + tcg_temp_free_vec(x); +} + +const GVecGen4 uqadd_op[4] = { + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_b, + .opc = INDEX_op_usadd_vec, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_h, + .opc = INDEX_op_usadd_vec, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_s, + .opc = INDEX_op_usadd_vec, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_d, + .opc = INDEX_op_usadd_vec, + .write_aofs = true, + .vece = MO_64 }, +}; + +static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_add_vec(vece, x, a, b); + tcg_gen_ssadd_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); + tcg_temp_free_vec(x); +} + +const GVecGen4 sqadd_op[4] = { + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_b, + .opc = INDEX_op_ssadd_vec, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_h, + .opc = INDEX_op_ssadd_vec, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_s, + .opc = INDEX_op_ssadd_vec, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_d, + .opc = INDEX_op_ssadd_vec, + .write_aofs = true, + .vece = MO_64 }, +}; + +static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_sub_vec(vece, x, a, b); + tcg_gen_ussub_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); + tcg_temp_free_vec(x); +} + +const GVecGen4 uqsub_op[4] = { + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_b, + .opc = INDEX_op_ussub_vec, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_h, + .opc = INDEX_op_ussub_vec, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_s, + .opc = INDEX_op_ussub_vec, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_d, + .opc = INDEX_op_ussub_vec, + .write_aofs = true, + .vece = MO_64 }, +}; + +static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_sub_vec(vece, x, a, b); + tcg_gen_sssub_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); + tcg_temp_free_vec(x); +} + +const GVecGen4 sqsub_op[4] = { + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_b, + .opc = INDEX_op_sssub_vec, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_h, + .opc = INDEX_op_sssub_vec, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_s, + .opc = INDEX_op_sssub_vec, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_d, + .opc = INDEX_op_sssub_vec, + .write_aofs = true, + .vece = MO_64 }, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -6331,6 +6467,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } return 0; + case NEON_3R_VQADD: + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, vec_size, vec_size, + (u ? uqadd_op : sqadd_op) + size); + break; + + case NEON_3R_VQSUB: + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, vec_size, vec_size, + (u ? uqsub_op : sqsub_op) + size); + break; + case NEON_3R_VMUL: /* VMUL */ if (u) { /* Polynomial case allows only P8 and is handled below. */ @@ -6395,24 +6543,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) neon_load_reg64(cpu_V0, rn + pass); neon_load_reg64(cpu_V1, rm + pass); switch (op) { - case NEON_3R_VQADD: - if (u) { - gen_helper_neon_qadd_u64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } else { - gen_helper_neon_qadd_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } - break; - case NEON_3R_VQSUB: - if (u) { - gen_helper_neon_qsub_u64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } else { - gen_helper_neon_qsub_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } - break; case NEON_3R_VSHL: if (u) { gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); @@ -6528,18 +6658,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VHADD: GEN_NEON_INTEGER_OP(hadd); break; - case NEON_3R_VQADD: - GEN_NEON_INTEGER_OP_ENV(qadd); - break; case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; - case NEON_3R_VQSUB: - GEN_NEON_INTEGER_OP_ENV(qsub); - break; case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; diff --git a/target/arm/translate.h b/target/arm/translate.h index 17748ddfb9..f25fe75685 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -214,6 +214,10 @@ extern const GVecGen2i ssra_op[4]; extern const GVecGen2i usra_op[4]; extern const GVecGen2i sri_op[4]; extern const GVecGen2i sli_op[4]; +extern const GVecGen4 uqadd_op[4]; +extern const GVecGen4 sqadd_op[4]; +extern const GVecGen4 uqsub_op[4]; +extern const GVecGen4 sqsub_op[4]; void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); /* diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 65a18af4e0..10f17e4b5c 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -766,3 +766,133 @@ DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) DO_FMLA_IDX(gvec_fmla_idx_d, float64, ) #undef DO_FMLA_IDX + +#define DO_SAT(NAME, WTYPE, TYPEN, TYPEM, OP, MIN, MAX) \ +void HELPER(NAME)(void *vd, void *vq, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + TYPEN *d = vd, *n = vn; TYPEM *m = vm; \ + bool q = false; \ + for (i = 0; i < oprsz / sizeof(TYPEN); i++) { \ + WTYPE dd = (WTYPE)n[i] OP m[i]; \ + if (dd < MIN) { \ + dd = MIN; \ + q = true; \ + } else if (dd > MAX) { \ + dd = MAX; \ + q = true; \ + } \ + d[i] = dd; \ + } \ + if (q) { \ + uint32_t *qc = vq; \ + qc[0] = 1; \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_SAT(gvec_uqadd_b, int, uint8_t, uint8_t, +, 0, UINT8_MAX) +DO_SAT(gvec_uqadd_h, int, uint16_t, uint16_t, +, 0, UINT16_MAX) +DO_SAT(gvec_uqadd_s, int64_t, uint32_t, uint32_t, +, 0, UINT32_MAX) + +DO_SAT(gvec_sqadd_b, int, int8_t, int8_t, +, INT8_MIN, INT8_MAX) +DO_SAT(gvec_sqadd_h, int, int16_t, int16_t, +, INT16_MIN, INT16_MAX) +DO_SAT(gvec_sqadd_s, int64_t, int32_t, int32_t, +, INT32_MIN, INT32_MAX) + +DO_SAT(gvec_uqsub_b, int, uint8_t, uint8_t, -, 0, UINT8_MAX) +DO_SAT(gvec_uqsub_h, int, uint16_t, uint16_t, -, 0, UINT16_MAX) +DO_SAT(gvec_uqsub_s, int64_t, uint32_t, uint32_t, -, 0, UINT32_MAX) + +DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX) +DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX) +DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX) + +#undef DO_SAT + +void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + uint64_t nn = n[i], mm = m[i], dd = nn + mm; + if (dd < nn) { + dd = UINT64_MAX; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_uqsub_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + uint64_t nn = n[i], mm = m[i], dd = nn - mm; + if (nn < mm) { + dd = 0; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_sqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + int64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + int64_t nn = n[i], mm = m[i], dd = nn + mm; + if (((dd ^ nn) & ~(nn ^ mm)) & INT64_MIN) { + dd = (nn >> 63) ^ ~INT64_MIN; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + int64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + int64_t nn = n[i], mm = m[i], dd = nn - mm; + if (((dd ^ nn) & (nn ^ mm)) & INT64_MIN) { + dd = (nn >> 63) ^ ~INT64_MIN; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} From d8efe78e8039511b95c23d75bb48eca6873fbb0f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Feb 2019 09:56:41 +0000 Subject: [PATCH 24/25] target/arm: Add missing clear_tail calls Fortunately, the functions affected are so far only called from SVE, so there is no tail to be cleared. But as we convert more of AdvSIMD to gvec, this will matter. Signed-off-by: Richard Henderson Message-id: 20190209033847.9014-13-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/vec_helper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 10f17e4b5c..dfc635cf9a 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -638,6 +638,7 @@ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ d[i] = FUNC(n[i], stat); \ } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ } DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) @@ -688,6 +689,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ d[i] = FUNC(n[i], m[i], stat); \ } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ } DO_3OP(gvec_fadd_h, float16_add, float16) From 0f8b09b22234460cb5b8766a25066cf6b5f06842 Mon Sep 17 00:00:00 2001 From: Sandra Loosemore Date: Fri, 15 Feb 2019 09:56:41 +0000 Subject: [PATCH 25/25] gdbstub: Send a reply to the vKill packet. Per the GDB remote protocol documentation https://sourceware.org/gdb/current/onlinedocs/gdb/Packets.html#index-vKill-packet the debug stub is expected to send a reply to the 'vKill' packet. At least some versions of GDB crash if the gdb stub simply exits without sending a reply. This patch fixes QEMU's gdb stub to conform to the expected behavior. Note that QEMU's existing handling of the legacy 'k' packet is correct: in that case GDB does not expect a reply, and QEMU does not send one. Signed-off-by: Sandra Loosemore Message-id: 1550008033-26540-1-git-send-email-sandra@codesourcery.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- gdbstub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gdbstub.c b/gdbstub.c index ff19579452..bc774ae992 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -1361,6 +1361,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf) break; } else if (strncmp(p, "Kill;", 5) == 0) { /* Kill the target */ + put_packet(s, "OK"); error_report("QEMU: Terminated via GDBstub"); exit(0); } else {