From bf559ee4025adaf9713e22def862d31f1db5994e Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Wed, 31 Mar 2021 14:47:13 +0800 Subject: [PATCH 01/43] hw/arm/smmuv3: Support 16K translation granule The driver can query some bits in SMMUv3 IDR5 to learn which translation granules are supported. Arm recommends that SMMUv3 implementations support at least 4K and 64K granules. But in the vSMMUv3, there seems to be no reason not to support 16K translation granule. In addition, if 16K is not supported, vSVA will failed to be enabled in the future for 16K guest kernel. So it'd better to support it. Signed-off-by: Kunkun Jiang Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Peter Maydell --- hw/arm/smmuv3.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 8705612535..228dc54b0b 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -259,8 +259,9 @@ static void smmuv3_init_regs(SMMUv3State *s) s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, 1); - /* 4K and 64K granule support */ + /* 4K, 16K and 64K granule support */ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ @@ -503,7 +504,8 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) tg = CD_TG(cd, i); tt->granule_sz = tg2granule(tg, i); - if ((tt->granule_sz != 12 && tt->granule_sz != 16) || CD_ENDI(cd)) { + if ((tt->granule_sz != 12 && tt->granule_sz != 14 && + tt->granule_sz != 16) || CD_ENDI(cd)) { goto bad_cd; } From 8196fe9d83d6519128b514f332418bae06513970 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 8 Apr 2021 17:24:02 +0100 Subject: [PATCH 02/43] target/arm: Make Thumb store insns UNDEF for Rn==1111 The Arm ARM specifies that for Thumb encodings of the various plain store insns, if the Rn field is 1111 then we must UNDEF. This is different from the Arm encodings, where this case is either UNPREDICTABLE or has well-defined behaviour. The exclusive stores, store-release and STRD do not have this UNDEF case for any encoding. Enforce the UNDEF for this case in the Thumb plain store insns. Fixes: https://bugs.launchpad.net/qemu/+bug/1922887 Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210408162402.5822-1-peter.maydell@linaro.org --- target/arm/translate.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/target/arm/translate.c b/target/arm/translate.c index 7103da2d7a..68809e08f0 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -6476,6 +6476,14 @@ static bool op_store_rr(DisasContext *s, arg_ldst_rr *a, ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite; TCGv_i32 addr, tmp; + /* + * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it + * is either UNPREDICTABLE or has defined behaviour + */ + if (s->thumb && a->rn == 15) { + return false; + } + addr = op_addr_rr_pre(s, a); tmp = load_reg(s, a->rt); @@ -6620,6 +6628,14 @@ static bool op_store_ri(DisasContext *s, arg_ldst_ri *a, ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite; TCGv_i32 addr, tmp; + /* + * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it + * is either UNPREDICTABLE or has defined behaviour + */ + if (s->thumb && a->rn == 15) { + return false; + } + addr = op_addr_ri_pre(s, a); tmp = load_reg(s, a->rt); From 98f96050aacb1f48956413832ae36392169801a1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:30:58 -0700 Subject: [PATCH 03/43] target/arm: Fix mte_checkN We were incorrectly assuming that only the first byte of an MTE access is checked against the tags. But per the ARM, unaligned accesses are pre-decomposed into single-byte accesses. So by the time we reach the actual MTE check in the ARM pseudocode, all accesses are aligned. Therefore, the first failure is always either the first byte of the access, or the first byte of the granule. In addition, some of the arithmetic is off for last-first -> count. This does not become directly visible until a later patch that passes single bytes into this function, so ptr == ptr_last. Buglink: https://bugs.launchpad.net/bugs/1921948 Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-2-richard.henderson@linaro.org Reviewed-by: Peter Maydell [PMM: tweaked a comment] Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 8be17e1b70..010d1c2e99 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -757,10 +757,10 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) { int mmu_idx, ptr_tag, bit55; - uint64_t ptr_last, ptr_end, prev_page, next_page; - uint64_t tag_first, tag_end; - uint64_t tag_byte_first, tag_byte_end; - uint32_t esize, total, tag_count, tag_size, n, c; + uint64_t ptr_last, prev_page, next_page; + uint64_t tag_first, tag_last; + uint64_t tag_byte_first, tag_byte_last; + uint32_t total, tag_count, tag_size, n, c; uint8_t *mem1, *mem2; MMUAccessType type; @@ -779,29 +779,27 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; - esize = FIELD_EX32(desc, MTEDESC, ESIZE); total = FIELD_EX32(desc, MTEDESC, TSIZE); - /* Find the addr of the end of the access, and of the last element. */ - ptr_end = ptr + total; - ptr_last = ptr_end - esize; + /* Find the addr of the end of the access */ + ptr_last = ptr + total - 1; /* Round the bounds to the tag granule, and compute the number of tags. */ tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); - tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE); - tag_count = (tag_end - tag_first) / TAG_GRANULE; + tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE); + tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; /* Round the bounds to twice the tag granule, and compute the bytes. */ tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); - tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE); + tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE); /* Locate the page boundaries. */ prev_page = ptr & TARGET_PAGE_MASK; next_page = prev_page + TARGET_PAGE_SIZE; - if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) { + if (likely(tag_last - prev_page <= TARGET_PAGE_SIZE)) { /* Memory access stays on one page. */ - tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE); + tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, MMU_DATA_LOAD, tag_size, ra); if (!mem1) { @@ -815,9 +813,9 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, MMU_DATA_LOAD, tag_size, ra); - tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE); + tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1; mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, - ptr_end - next_page, + ptr_last - next_page + 1, MMU_DATA_LOAD, tag_size, ra); /* @@ -838,15 +836,13 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, } /* - * If we failed, we know which granule. Compute the element that - * is first in that granule, and signal failure on that element. + * If we failed, we know which granule. For the first granule, the + * failure address is @ptr, the first byte accessed. Otherwise the + * failure address is the first byte of the nth granule. */ if (unlikely(n < tag_count)) { - uint64_t fail_ofs; - - fail_ofs = tag_first + n * TAG_GRANULE - ptr; - fail_ofs = ROUND_UP(fail_ofs, esize); - mte_check_fail(env, desc, ptr + fail_ofs, ra); + uint64_t fault = (n == 0 ? ptr : tag_first + n * TAG_GRANULE); + mte_check_fail(env, desc, fault, ra); } done: From f8c8a8606071b2966f83ebaccc69714ac3cad548 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:30:59 -0700 Subject: [PATCH 04/43] target/arm: Split out mte_probe_int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split out a helper function from mte_checkN to perform all of the checking and address manpulation. So far, just use this in mte_checkN itself. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-3-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 52 +++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 010d1c2e99..2b5331f8db 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -753,33 +753,45 @@ static int checkN(uint8_t *mem, int odd, int cmp, int count) return n; } -uint64_t mte_checkN(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra) +/** + * mte_probe_int() - helper for mte_probe and mte_check + * @env: CPU environment + * @desc: MTEDESC descriptor + * @ptr: virtual address of the base of the access + * @fault: return virtual address of the first check failure + * + * Internal routine for both mte_probe and mte_check. + * Return zero on failure, filling in *fault. + * Return negative on trivial success for tbi disabled. + * Return positive on success with tbi enabled. + */ +static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, + uintptr_t ra, uint32_t total, uint64_t *fault) { int mmu_idx, ptr_tag, bit55; uint64_t ptr_last, prev_page, next_page; uint64_t tag_first, tag_last; uint64_t tag_byte_first, tag_byte_last; - uint32_t total, tag_count, tag_size, n, c; + uint32_t tag_count, tag_size, n, c; uint8_t *mem1, *mem2; MMUAccessType type; bit55 = extract64(ptr, 55, 1); + *fault = ptr; /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ if (unlikely(!tbi_check(desc, bit55))) { - return ptr; + return -1; } ptr_tag = allocation_tag_from_addr(ptr); if (tcma_check(desc, bit55, ptr_tag)) { - goto done; + return 1; } mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; - total = FIELD_EX32(desc, MTEDESC, TSIZE); /* Find the addr of the end of the access */ ptr_last = ptr + total - 1; @@ -803,7 +815,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, MMU_DATA_LOAD, tag_size, ra); if (!mem1) { - goto done; + return 1; } /* Perform all of the comparisons. */ n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); @@ -829,23 +841,39 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, } if (n == c) { if (!mem2) { - goto done; + return 1; } n += checkN(mem2, 0, ptr_tag, tag_count - c); } } + if (likely(n == tag_count)) { + return 1; + } + /* * If we failed, we know which granule. For the first granule, the * failure address is @ptr, the first byte accessed. Otherwise the * failure address is the first byte of the nth granule. */ - if (unlikely(n < tag_count)) { - uint64_t fault = (n == 0 ? ptr : tag_first + n * TAG_GRANULE); - mte_check_fail(env, desc, fault, ra); + if (n > 0) { + *fault = tag_first + n * TAG_GRANULE; } + return 0; +} - done: +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + uint64_t fault; + uint32_t total = FIELD_EX32(desc, MTEDESC, TSIZE); + int ret = mte_probe_int(env, desc, ptr, ra, total, &fault); + + if (unlikely(ret == 0)) { + mte_check_fail(env, desc, fault, ra); + } else if (ret < 0) { + return ptr; + } return useronly_clean_ptr(ptr); } From 4a09a21345e8adb4734ecb5be59bac9c4d82aa85 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:31:00 -0700 Subject: [PATCH 05/43] target/arm: Fix unaligned checks for mte_check1, mte_probe1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were incorrectly assuming that only the first byte of an MTE access is checked against the tags. But per the ARM, unaligned accesses are pre-decomposed into single-byte accesses. So by the time we reach the actual MTE check in the ARM pseudocode, all accesses are aligned. We cannot tell a priori whether or not a given scalar access is aligned, therefore we must at least check. Use mte_probe_int, which is already set up for checking multiple granules. Buglink: https://bugs.launchpad.net/bugs/1921948 Tested-by: Alex Bennée Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 109 +++++++++++++--------------------------- 1 file changed, 35 insertions(+), 74 deletions(-) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 2b5331f8db..0ee0397eb2 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -617,80 +617,6 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, } } -/* - * Perform an MTE checked access for a single logical or atomic access. - */ -static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr, - uintptr_t ra, int bit55) -{ - int mem_tag, mmu_idx, ptr_tag, size; - MMUAccessType type; - uint8_t *mem; - - ptr_tag = allocation_tag_from_addr(ptr); - - if (tcma_check(desc, bit55, ptr_tag)) { - return true; - } - - mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); - type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; - size = FIELD_EX32(desc, MTEDESC, ESIZE); - - mem = allocation_tag_mem(env, mmu_idx, ptr, type, size, - MMU_DATA_LOAD, 1, ra); - if (!mem) { - return true; - } - - mem_tag = load_tag1(ptr, mem); - return ptr_tag == mem_tag; -} - -/* - * No-fault version of mte_check1, to be used by SVE for MemSingleNF. - * Returns false if the access is Checked and the check failed. This - * is only intended to probe the tag -- the validity of the page must - * be checked beforehand. - */ -bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) -{ - int bit55 = extract64(ptr, 55, 1); - - /* If TBI is disabled, the access is unchecked. */ - if (unlikely(!tbi_check(desc, bit55))) { - return true; - } - - return mte_probe1_int(env, desc, ptr, 0, bit55); -} - -uint64_t mte_check1(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra) -{ - int bit55 = extract64(ptr, 55, 1); - - /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ - if (unlikely(!tbi_check(desc, bit55))) { - return ptr; - } - - if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { - mte_check_fail(env, desc, ptr, ra); - } - - return useronly_clean_ptr(ptr); -} - -uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) -{ - return mte_check1(env, desc, ptr, GETPC()); -} - -/* - * Perform an MTE checked access for multiple logical accesses. - */ - /** * checkN: * @tag: tag memory to test @@ -882,6 +808,41 @@ uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) return mte_checkN(env, desc, ptr, GETPC()); } +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + uint64_t fault; + uint32_t total = FIELD_EX32(desc, MTEDESC, ESIZE); + int ret = mte_probe_int(env, desc, ptr, ra, total, &fault); + + if (unlikely(ret == 0)) { + mte_check_fail(env, desc, fault, ra); + } else if (ret < 0) { + return ptr; + } + return useronly_clean_ptr(ptr); +} + +uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return mte_check1(env, desc, ptr, GETPC()); +} + +/* + * No-fault version of mte_check1, to be used by SVE for MemSingleNF. + * Returns false if the access is Checked and the check failed. This + * is only intended to probe the tag -- the validity of the page must + * be checked beforehand. + */ +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + uint64_t fault; + uint32_t total = FIELD_EX32(desc, MTEDESC, ESIZE); + int ret = mte_probe_int(env, desc, ptr, 0, total, &fault); + + return ret != 0; +} + /* * Perform an MTE checked access for DC_ZVA. */ From 09641ef93112c45bc32cf86a4999d0e0532909c3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:31:01 -0700 Subject: [PATCH 06/43] test/tcg/aarch64: Add mte-5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Buglink: https://bugs.launchpad.net/bugs/1921948 Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- tests/tcg/aarch64/Makefile.target | 2 +- tests/tcg/aarch64/mte-5.c | 44 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tests/tcg/aarch64/mte-5.c diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index 05b2622bfc..928357b10a 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -37,7 +37,7 @@ AARCH64_TESTS += bti-2 # MTE Tests ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_ARMV8_MTE),) -AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-6 +AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-%: CFLAGS += -march=armv8.5-a+memtag endif diff --git a/tests/tcg/aarch64/mte-5.c b/tests/tcg/aarch64/mte-5.c new file mode 100644 index 0000000000..6dbd6ab3ea --- /dev/null +++ b/tests/tcg/aarch64/mte-5.c @@ -0,0 +1,44 @@ +/* + * Memory tagging, faulting unaligned access. + * + * Copyright (c) 2021 Linaro Ltd + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "mte.h" + +void pass(int sig, siginfo_t *info, void *uc) +{ + assert(info->si_code == SEGV_MTESERR); + exit(0); +} + +int main(int ac, char **av) +{ + struct sigaction sa; + void *p0, *p1, *p2; + long excl = 1; + + enable_mte(PR_MTE_TCF_SYNC); + p0 = alloc_mte_mem(sizeof(*p0)); + + /* Create two differently tagged pointers. */ + asm("irg %0,%1,%2" : "=r"(p1) : "r"(p0), "r"(excl)); + asm("gmi %0,%1,%0" : "+r"(excl) : "r" (p1)); + assert(excl != 1); + asm("irg %0,%1,%2" : "=r"(p2) : "r"(p0), "r"(excl)); + assert(p1 != p2); + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = pass; + sa.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &sa, NULL); + + /* Store store two different tags in sequential granules. */ + asm("stg %0, [%0]" : : "r"(p1)); + asm("stg %0, [%0]" : : "r"(p2 + 16)); + + /* Perform an unaligned load crossing the granules. */ + asm volatile("ldr %0, [%1]" : "=r"(p0) : "r"(p1 + 12)); + abort(); +} From 28f3250306e52ae94df9faab93b9d0167fe6b587 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:31:02 -0700 Subject: [PATCH 07/43] target/arm: Replace MTEDESC ESIZE+TSIZE with SIZEM1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After recent changes, mte_checkN does not use ESIZE, and mte_check1 never used TSIZE. We can combine the two into a single field: SIZEM1. Choose to pass size - 1 because size == 0 is never used, our immediate need in mte_probe_int is for the address of the last byte (ptr + size - 1), and since almost all operations are powers of 2, this makes the immediate constant one bit smaller. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-6-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/internals.h | 4 ++-- target/arm/mte_helper.c | 18 ++++++++---------- target/arm/translate-a64.c | 5 ++--- target/arm/translate-sve.c | 5 ++--- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/target/arm/internals.h b/target/arm/internals.h index f11bd32696..2c77f2d50f 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -26,6 +26,7 @@ #define TARGET_ARM_INTERNALS_H #include "hw/registerfields.h" +#include "tcg/tcg-gvec-desc.h" #include "syndrome.h" /* register banks for CPU modes */ @@ -1142,8 +1143,7 @@ FIELD(MTEDESC, MIDX, 0, 4) FIELD(MTEDESC, TBI, 4, 2) FIELD(MTEDESC, TCMA, 6, 2) FIELD(MTEDESC, WRITE, 8, 1) -FIELD(MTEDESC, ESIZE, 9, 5) -FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ +FIELD(MTEDESC, SIZEM1, 9, SIMD_DATA_BITS - 9) /* size - 1 */ bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check1(CPUARMState *env, uint32_t desc, diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 0ee0397eb2..804057d3f6 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -692,13 +692,13 @@ static int checkN(uint8_t *mem, int odd, int cmp, int count) * Return positive on success with tbi enabled. */ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, - uintptr_t ra, uint32_t total, uint64_t *fault) + uintptr_t ra, uint64_t *fault) { int mmu_idx, ptr_tag, bit55; uint64_t ptr_last, prev_page, next_page; uint64_t tag_first, tag_last; uint64_t tag_byte_first, tag_byte_last; - uint32_t tag_count, tag_size, n, c; + uint32_t sizem1, tag_count, tag_size, n, c; uint8_t *mem1, *mem2; MMUAccessType type; @@ -718,9 +718,10 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + sizem1 = FIELD_EX32(desc, MTEDESC, SIZEM1); /* Find the addr of the end of the access */ - ptr_last = ptr + total - 1; + ptr_last = ptr + sizem1; /* Round the bounds to the tag granule, and compute the number of tags. */ tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); @@ -738,7 +739,7 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, if (likely(tag_last - prev_page <= TARGET_PAGE_SIZE)) { /* Memory access stays on one page. */ tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; - mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1, MMU_DATA_LOAD, tag_size, ra); if (!mem1) { return 1; @@ -792,8 +793,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) { uint64_t fault; - uint32_t total = FIELD_EX32(desc, MTEDESC, TSIZE); - int ret = mte_probe_int(env, desc, ptr, ra, total, &fault); + int ret = mte_probe_int(env, desc, ptr, ra, &fault); if (unlikely(ret == 0)) { mte_check_fail(env, desc, fault, ra); @@ -812,8 +812,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) { uint64_t fault; - uint32_t total = FIELD_EX32(desc, MTEDESC, ESIZE); - int ret = mte_probe_int(env, desc, ptr, ra, total, &fault); + int ret = mte_probe_int(env, desc, ptr, ra, &fault); if (unlikely(ret == 0)) { mte_check_fail(env, desc, fault, ra); @@ -837,8 +836,7 @@ uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) { uint64_t fault; - uint32_t total = FIELD_EX32(desc, MTEDESC, ESIZE); - int ret = mte_probe_int(env, desc, ptr, 0, total, &fault); + int ret = mte_probe_int(env, desc, ptr, 0, &fault); return ret != 0; } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 0b42e53500..3af00ae90e 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -272,7 +272,7 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1); tcg_desc = tcg_const_i32(desc); ret = new_tmp_a64(s); @@ -306,8 +306,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize); - desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); tcg_desc = tcg_const_i32(desc); ret = new_tmp_a64(s); diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 0eefb61214..5179c1f836 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4509,8 +4509,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); - desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); desc <<= SVE_MTEDESC_SHIFT; } else { addr = clean_data_tbi(s, addr); @@ -5189,7 +5188,7 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); desc <<= SVE_MTEDESC_SHIFT; } desc = simd_desc(vsz, vsz, desc | scale); From bd47b61c5ebc8c5f696910644125a28115d3f6ea Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:31:03 -0700 Subject: [PATCH 08/43] target/arm: Merge mte_check1, mte_checkN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mte_check1 and mte_checkN functions are now identical. Drop mte_check1 and rename mte_checkN to mte_check. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-7-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.h | 3 +-- target/arm/internals.h | 5 +---- target/arm/mte_helper.c | 26 +++----------------------- target/arm/sve_helper.c | 14 +++++++------- target/arm/translate-a64.c | 4 ++-- 5 files changed, 14 insertions(+), 38 deletions(-) diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index c139fa81f9..7b706571bb 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -104,8 +104,7 @@ DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) -DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) -DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_check, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) diff --git a/target/arm/internals.h b/target/arm/internals.h index 2c77f2d50f..af1db2cd9c 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1146,10 +1146,7 @@ FIELD(MTEDESC, WRITE, 8, 1) FIELD(MTEDESC, SIZEM1, 9, SIMD_DATA_BITS - 9) /* size - 1 */ bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); -uint64_t mte_check1(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra); -uint64_t mte_checkN(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra); +uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); static inline int allocation_tag_from_addr(uint64_t ptr) { diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 804057d3f6..c91d561ce3 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -789,8 +789,7 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, return 0; } -uint64_t mte_checkN(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra) +uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) { uint64_t fault; int ret = mte_probe_int(env, desc, ptr, ra, &fault); @@ -803,28 +802,9 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, return useronly_clean_ptr(ptr); } -uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) +uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr) { - return mte_checkN(env, desc, ptr, GETPC()); -} - -uint64_t mte_check1(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra) -{ - uint64_t fault; - int ret = mte_probe_int(env, desc, ptr, ra, &fault); - - if (unlikely(ret == 0)) { - mte_check_fail(env, desc, fault, ra); - } else if (ret < 0) { - return ptr; - } - return useronly_clean_ptr(ptr); -} - -uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) -{ - return mte_check1(env, desc, ptr, GETPC()); + return mte_check(env, desc, ptr, GETPC()); } /* diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index fd6c58f96a..b63ddfc7f9 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4442,7 +4442,7 @@ static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env, uintptr_t ra) { sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, - mtedesc, ra, mte_check1); + mtedesc, ra, mte_check); } static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env, @@ -4451,7 +4451,7 @@ static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env, uintptr_t ra) { sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, - mtedesc, ra, mte_checkN); + mtedesc, ra, mte_check); } @@ -4826,7 +4826,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, if (fault == FAULT_FIRST) { /* Trapping mte check for the first-fault element. */ if (mtedesc) { - mte_check1(env, mtedesc, addr + mem_off, retaddr); + mte_check(env, mtedesc, addr + mem_off, retaddr); } /* @@ -5373,7 +5373,7 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, info.attrs, BP_MEM_READ, retaddr); } if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { - mte_check1(env, mtedesc, addr, retaddr); + mte_check(env, mtedesc, addr, retaddr); } host_fn(&scratch, reg_off, info.host); } else { @@ -5386,7 +5386,7 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, BP_MEM_READ, retaddr); } if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { - mte_check1(env, mtedesc, addr, retaddr); + mte_check(env, mtedesc, addr, retaddr); } tlb_fn(env, &scratch, reg_off, addr, retaddr); } @@ -5552,7 +5552,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, */ addr = base + (off_fn(vm, reg_off) << scale); if (mtedesc) { - mte_check1(env, mtedesc, addr, retaddr); + mte_check(env, mtedesc, addr, retaddr); } tlb_fn(env, vd, reg_off, addr, retaddr); @@ -5773,7 +5773,7 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, } if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { - mte_check1(env, mtedesc, addr, retaddr); + mte_check(env, mtedesc, addr, retaddr); } } i += 1; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 3af00ae90e..a68d5dd5d1 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -276,7 +276,7 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, tcg_desc = tcg_const_i32(desc); ret = new_tmp_a64(s); - gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr); + gen_helper_mte_check(ret, cpu_env, tcg_desc, addr); tcg_temp_free_i32(tcg_desc); return ret; @@ -310,7 +310,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, tcg_desc = tcg_const_i32(desc); ret = new_tmp_a64(s); - gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr); + gen_helper_mte_check(ret, cpu_env, tcg_desc, addr); tcg_temp_free_i32(tcg_desc); return ret; From d304d280b3167289323f3f2c6c2fbfa1dfe8d1d7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:31:04 -0700 Subject: [PATCH 09/43] target/arm: Rename mte_probe1 to mte_probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For consistency with the mte_check1 + mte_checkN merge to mte_check, rename the probe function as well. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-8-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/internals.h | 2 +- target/arm/mte_helper.c | 6 +++--- target/arm/sve_helper.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/target/arm/internals.h b/target/arm/internals.h index af1db2cd9c..886db56b58 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1145,7 +1145,7 @@ FIELD(MTEDESC, TCMA, 6, 2) FIELD(MTEDESC, WRITE, 8, 1) FIELD(MTEDESC, SIZEM1, 9, SIMD_DATA_BITS - 9) /* size - 1 */ -bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); +bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); static inline int allocation_tag_from_addr(uint64_t ptr) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index c91d561ce3..a6fccc6e69 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -121,7 +121,7 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, * exception for inaccessible pages, and resolves the virtual address * into the softmmu tlb. * - * When RA == 0, this is for mte_probe1. The page is expected to be + * When RA == 0, this is for mte_probe. The page is expected to be * valid. Indicate to probe_access_flags no-fault, then assert that * we received a valid page. */ @@ -808,12 +808,12 @@ uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr) } /* - * No-fault version of mte_check1, to be used by SVE for MemSingleNF. + * No-fault version of mte_check, to be used by SVE for MemSingleNF. * Returns false if the access is Checked and the check failed. This * is only intended to probe the tag -- the validity of the page must * be checked beforehand. */ -bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) +bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr) { uint64_t fault; int ret = mte_probe_int(env, desc, ptr, 0, &fault); diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index b63ddfc7f9..982240d104 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4869,7 +4869,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, /* Watchpoint hit, see below. */ goto do_fault; } - if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) { goto do_fault; } /* @@ -4919,7 +4919,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, & BP_MEM_READ)) { goto do_fault; } - if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) { goto do_fault; } host_fn(vd, reg_off, host + mem_off); @@ -5588,7 +5588,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, } if (mtedesc && arm_tlb_mte_tagged(&info.attrs) && - !mte_probe1(env, mtedesc, addr)) { + !mte_probe(env, mtedesc, addr)) { goto fault; } From 4c3310c73f7349f1aabae55a7babd6419eeb1d04 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:31:05 -0700 Subject: [PATCH 10/43] target/arm: Simplify sve mte checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that mte_check1 and mte_checkN have been merged, we can merge sve_cont_ldst_mte_check1 and sve_cont_ldst_mte_checkN. Which means that we can eliminate the function pointer into sve_ldN_r and sve_stN_r, calling sve_cont_ldst_mte_check directly. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/sve_helper.c | 84 +++++++++++++---------------------------- 1 file changed, 26 insertions(+), 58 deletions(-) diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 982240d104..c068dfa0d5 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4382,13 +4382,9 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, #endif } -typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t); - -static inline QEMU_ALWAYS_INLINE -void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, int esize, - int msize, uint32_t mtedesc, uintptr_t ra, - mte_check_fn *check) +static void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra) { intptr_t mem_off, reg_off, reg_last; @@ -4405,7 +4401,7 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, uint64_t pg = vg[reg_off >> 6]; do { if ((pg >> (reg_off & 63)) & 1) { - check(env, mtedesc, addr, ra); + mte_check(env, mtedesc, addr, ra); } reg_off += esize; mem_off += msize; @@ -4422,7 +4418,7 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, uint64_t pg = vg[reg_off >> 6]; do { if ((pg >> (reg_off & 63)) & 1) { - check(env, mtedesc, addr, ra); + mte_check(env, mtedesc, addr, ra); } reg_off += esize; mem_off += msize; @@ -4431,30 +4427,6 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, } } -typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, - int esize, int msize, uint32_t mtedesc, - uintptr_t ra); - -static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, - int esize, int msize, uint32_t mtedesc, - uintptr_t ra) -{ - sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, - mtedesc, ra, mte_check); -} - -static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, - int esize, int msize, uint32_t mtedesc, - uintptr_t ra) -{ - sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, - mtedesc, ra, mte_check); -} - - /* * Common helper for all contiguous 1,2,3,4-register predicated stores. */ @@ -4463,8 +4435,7 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, uint32_t desc, const uintptr_t retaddr, const int esz, const int msz, const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, - sve_ldst1_tlb_fn *tlb_fn, - sve_cont_ldst_mte_check_fn *mte_check_fn) + sve_ldst1_tlb_fn *tlb_fn) { const unsigned rd = simd_data(desc); const intptr_t reg_max = simd_oprsz(desc); @@ -4493,9 +4464,9 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, * Handle mte checks for all active elements. * Since TBI must be set for MTE, !mtedesc => !mte_active. */ - if (mte_check_fn && mtedesc) { - mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, - mtedesc, retaddr); + if (mtedesc) { + sve_cont_ldst_mte_check(&info, env, vg, addr, 1 << esz, N << msz, + mtedesc, retaddr); } flags = info.page[0].flags | info.page[1].flags; @@ -4621,8 +4592,7 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, mtedesc = 0; } - sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, - N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); + sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn); } #define DO_LD1_1(NAME, ESZ) \ @@ -4630,7 +4600,7 @@ void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \ - sve_##NAME##_host, sve_##NAME##_tlb, NULL); \ + sve_##NAME##_host, sve_##NAME##_tlb); \ } \ void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -4644,22 +4614,22 @@ void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ - sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL); \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ - sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL); \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ } \ void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint32_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint32_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ @@ -4693,7 +4663,7 @@ void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \ - sve_ld1bb_host, sve_ld1bb_tlb, NULL); \ + sve_ld1bb_host, sve_ld1bb_tlb); \ } \ void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -4707,13 +4677,13 @@ void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ - sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL); \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ - sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL); \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -5090,8 +5060,7 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, const uintptr_t retaddr, const int esz, const int msz, const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, - sve_ldst1_tlb_fn *tlb_fn, - sve_cont_ldst_mte_check_fn *mte_check_fn) + sve_ldst1_tlb_fn *tlb_fn) { const unsigned rd = simd_data(desc); const intptr_t reg_max = simd_oprsz(desc); @@ -5117,9 +5086,9 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, * Handle mte checks for all active elements. * Since TBI must be set for MTE, !mtedesc => !mte_active. */ - if (mte_check_fn && mtedesc) { - mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, - mtedesc, retaddr); + if (mtedesc) { + sve_cont_ldst_mte_check(&info, env, vg, addr, 1 << esz, N << msz, + mtedesc, retaddr); } flags = info.page[0].flags | info.page[1].flags; @@ -5233,8 +5202,7 @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, mtedesc = 0; } - sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, - N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); + sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn); } #define DO_STN_1(N, NAME, ESZ) \ @@ -5242,7 +5210,7 @@ void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \ - sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL); \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ } \ void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -5256,13 +5224,13 @@ void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ - sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL); \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ } \ void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ - sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL); \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ } \ void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ From 33e74c3172defc841692b4281d2dbd8f8a966e17 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Apr 2021 11:31:06 -0700 Subject: [PATCH 11/43] target/arm: Remove log2_esize parameter to gen_mte_checkN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The log2_esize parameter is not used except trivially. Drop the parameter and the deferral to gen_mte_check1. This fixes a bug in that the parameters as documented in the header file were the reverse from those in the implementation. Which meant that translate-sve.c was passing the parameters in the wrong order. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210416183106.1516563-10-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 15 +++++++-------- target/arm/translate-a64.h | 2 +- target/arm/translate-sve.c | 4 ++-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index a68d5dd5d1..f35a5e8174 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -295,9 +295,9 @@ TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, * For MTE, check multiple logical sequential accesses. */ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int log2_esize, int total_size) + bool tag_checked, int size) { - if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) { + if (tag_checked && s->mte_active[0]) { TCGv_i32 tcg_desc; TCGv_i64 ret; int desc = 0; @@ -306,7 +306,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1); tcg_desc = tcg_const_i32(desc); ret = new_tmp_a64(s); @@ -315,7 +315,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, return ret; } - return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize); + return clean_data_tbi(s, addr); } typedef struct DisasCompare64 { @@ -2965,8 +2965,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) } clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, - (wback || rn != 31) && !set_tag, - size, 2 << size); + (wback || rn != 31) && !set_tag, 2 << size); if (is_vector) { if (is_load) { @@ -3713,7 +3712,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) * promote consecutive little-endian elements below. */ clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, - size, total); + total); /* * Consecutive little-endian elements from a single register @@ -3866,7 +3865,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg_sp(s, rn); clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, - scale, total); + total); tcg_ebytes = tcg_const_i64(1 << scale); for (xs = 0; xs < selem; xs++) { diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index 3668b671dd..868d355048 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -44,7 +44,7 @@ TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, bool tag_checked, int log2_size); TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int count, int log2_esize); + bool tag_checked, int size); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 5179c1f836..584c4d047c 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4264,7 +4264,7 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) dirty_addr = tcg_temp_new_i64(); tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); - clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); tcg_temp_free_i64(dirty_addr); /* @@ -4352,7 +4352,7 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) dirty_addr = tcg_temp_new_i64(); tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); - clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); tcg_temp_free_i64(dirty_addr); /* Note that unpredicated load/store of vector/predicate registers From a736cbc303f3c3f79b7b0b09e0dd4e18c8bcf94c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:27 -0700 Subject: [PATCH 12/43] target/arm: Fix decode of align in VLDST_single The encoding of size = 2 and size = 3 had the incorrect decode for align, overlapping the stride field. This error was hidden by what should have been unnecessary masking in translate. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/neon-ls.decode | 4 ++-- target/arm/translate-neon.c.inc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode index c17f5019e3..0a2a0e15db 100644 --- a/target/arm/neon-ls.decode +++ b/target/arm/neon-ls.decode @@ -46,7 +46,7 @@ VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \ VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \ vd=%vd_dp size=0 stride=1 -VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 align:2 rm:4 \ +VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 . align:1 rm:4 \ vd=%vd_dp size=1 stride=%imm1_5_p1 -VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 align:3 rm:4 \ +VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 . align:2 rm:4 \ vd=%vd_dp size=2 stride=%imm1_6_p1 diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index f6c68e30ab..0e5828744b 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -606,7 +606,7 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) switch (nregs) { case 1: if (((a->align & (1 << a->size)) != 0) || - (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) { + (a->size == 2 && (a->align == 1 || a->align == 2))) { return false; } break; @@ -621,7 +621,7 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) } break; case 4: - if ((a->size == 2) && ((a->align & 3) == 3)) { + if (a->size == 2 && a->align == 3) { return false; } break; From 6a01eab7d823d8ae7430015e27922370f4bf9107 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:28 -0700 Subject: [PATCH 13/43] target/arm: Rename TBFLAG_A32, SCTLR_B We're about to rearrange the macro expansion surrounding tbflags, and this field name will be expanded using the bit definition of the same name, resulting in a token pasting error. So SCTLR_B -> SCTLR__B in the 3 uses, and document it. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 2 +- target/arm/helper.c | 2 +- target/arm/translate.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 193a49ec7f..304e0a6af3 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3423,7 +3423,7 @@ FIELD(TBFLAG_A32, VECSTRIDE, 12, 2) /* Not cached. */ */ FIELD(TBFLAG_A32, XSCALE_CPAR, 12, 2) FIELD(TBFLAG_A32, VFPEN, 14, 1) /* Partially cached, minus FPEXC. */ -FIELD(TBFLAG_A32, SCTLR_B, 15, 1) +FIELD(TBFLAG_A32, SCTLR__B, 15, 1) /* Cannot overlap with SCTLR_B */ FIELD(TBFLAG_A32, HSTR_ACTIVE, 16, 1) /* * Indicates whether cp register reads and writes by guest code should access diff --git a/target/arm/helper.c b/target/arm/helper.c index d9220be7c5..556b9d4f0a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -13003,7 +13003,7 @@ static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el, bool sctlr_b = arm_sctlr_b(env); if (sctlr_b) { - flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, 1); + flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR__B, 1); } if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) { flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1); diff --git a/target/arm/translate.c b/target/arm/translate.c index 68809e08f0..2de4252953 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -8895,7 +8895,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL); - dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B); + dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR__B); dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE); dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS); dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN); From ae6eb1e9b3ccc211d96261a5c650e6650b508aa6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:29 -0700 Subject: [PATCH 14/43] target/arm: Rename TBFLAG_ANY, PSTATE_SS We're about to rearrange the macro expansion surrounding tbflags, and this field name will be expanded using the bit definition of the same name, resulting in a token pasting error. So PSTATE_SS -> PSTATE__SS in the uses, and document it. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 2 +- target/arm/helper.c | 4 ++-- target/arm/translate-a64.c | 2 +- target/arm/translate.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 304e0a6af3..4cbf2db3e3 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3396,7 +3396,7 @@ typedef ARMCPU ArchCPU; */ FIELD(TBFLAG_ANY, AARCH64_STATE, 31, 1) FIELD(TBFLAG_ANY, SS_ACTIVE, 30, 1) -FIELD(TBFLAG_ANY, PSTATE_SS, 29, 1) /* Not cached. */ +FIELD(TBFLAG_ANY, PSTATE__SS, 29, 1) /* Not cached. */ FIELD(TBFLAG_ANY, BE_DATA, 28, 1) FIELD(TBFLAG_ANY, MMUIDX, 24, 4) /* Target EL if we take a floating-point-disabled exception */ diff --git a/target/arm/helper.c b/target/arm/helper.c index 556b9d4f0a..cd8dec126f 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -13333,11 +13333,11 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, * 0 x Inactive (the TB flag for SS is always 0) * 1 0 Active-pending * 1 1 Active-not-pending - * SS_ACTIVE is set in hflags; PSTATE_SS is computed every TB. + * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB. */ if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE) && (env->pstate & PSTATE_SS)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1); + flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE__SS, 1); } *pflags = flags; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index f35a5e8174..64b3a5200c 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14733,7 +14733,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, * end the TB */ dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE); - dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS); + dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE__SS); dc->is_ldex = false; dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL); diff --git a/target/arm/translate.c b/target/arm/translate.c index 2de4252953..271c53dadb 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -8925,7 +8925,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) * end the TB */ dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE); - dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS); + dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE__SS); dc->is_ldex = false; dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK; From a729a46b05ab09e473cd757ee7a62373a175fa62 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:30 -0700 Subject: [PATCH 15/43] target/arm: Add wrapper macros for accessing tbflags We're about to split tbflags into two parts. These macros will ensure that the correct part is used with the correct set of bits. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 22 +++++++++- target/arm/helper-a64.c | 2 +- target/arm/helper.c | 85 +++++++++++++++++--------------------- target/arm/translate-a64.c | 36 ++++++++-------- target/arm/translate.c | 48 ++++++++++----------- 5 files changed, 101 insertions(+), 92 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 4cbf2db3e3..b798ff8115 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3462,6 +3462,26 @@ FIELD(TBFLAG_A64, TCMA, 16, 2) FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1) FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) +/* + * Helpers for using the above. + */ +#define DP_TBFLAG_ANY(DST, WHICH, VAL) \ + (DST = FIELD_DP32(DST, TBFLAG_ANY, WHICH, VAL)) +#define DP_TBFLAG_A64(DST, WHICH, VAL) \ + (DST = FIELD_DP32(DST, TBFLAG_A64, WHICH, VAL)) +#define DP_TBFLAG_A32(DST, WHICH, VAL) \ + (DST = FIELD_DP32(DST, TBFLAG_A32, WHICH, VAL)) +#define DP_TBFLAG_M32(DST, WHICH, VAL) \ + (DST = FIELD_DP32(DST, TBFLAG_M32, WHICH, VAL)) +#define DP_TBFLAG_AM32(DST, WHICH, VAL) \ + (DST = FIELD_DP32(DST, TBFLAG_AM32, WHICH, VAL)) + +#define EX_TBFLAG_ANY(IN, WHICH) FIELD_EX32(IN, TBFLAG_ANY, WHICH) +#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX32(IN, TBFLAG_A64, WHICH) +#define EX_TBFLAG_A32(IN, WHICH) FIELD_EX32(IN, TBFLAG_A32, WHICH) +#define EX_TBFLAG_M32(IN, WHICH) FIELD_EX32(IN, TBFLAG_M32, WHICH) +#define EX_TBFLAG_AM32(IN, WHICH) FIELD_EX32(IN, TBFLAG_AM32, WHICH) + /** * cpu_mmu_index: * @env: The cpu environment @@ -3472,7 +3492,7 @@ FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) */ static inline int cpu_mmu_index(CPUARMState *env, bool ifetch) { - return FIELD_EX32(env->hflags, TBFLAG_ANY, MMUIDX); + return EX_TBFLAG_ANY(env->hflags, MMUIDX); } static inline bool bswap_code(bool sctlr_b) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 061c8ff846..9cc3b066e2 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -1020,7 +1020,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) * the hflags rebuild, since we can pull the composite TBII field * from there. */ - tbii = FIELD_EX32(env->hflags, TBFLAG_A64, TBII); + tbii = EX_TBFLAG_A64(env->hflags, TBII); if ((tbii >> extract64(new_pc, 55, 1)) & 1) { /* TBI is enabled. */ int core_mmu_idx = cpu_mmu_index(env, false); diff --git a/target/arm/helper.c b/target/arm/helper.c index cd8dec126f..2769e6fd35 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -12987,12 +12987,11 @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el, ARMMMUIdx mmu_idx, uint32_t flags) { - flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el); - flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX, - arm_to_core_mmu_idx(mmu_idx)); + DP_TBFLAG_ANY(flags, FPEXC_EL, fp_el); + DP_TBFLAG_ANY(flags, MMUIDX, arm_to_core_mmu_idx(mmu_idx)); if (arm_singlestep_active(env)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1); + DP_TBFLAG_ANY(flags, SS_ACTIVE, 1); } return flags; } @@ -13003,12 +13002,12 @@ static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el, bool sctlr_b = arm_sctlr_b(env); if (sctlr_b) { - flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR__B, 1); + DP_TBFLAG_A32(flags, SCTLR__B, 1); } if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1); + DP_TBFLAG_ANY(flags, BE_DATA, 1); } - flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env)); + DP_TBFLAG_A32(flags, NS, !access_secure_reg(env)); return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } @@ -13019,7 +13018,7 @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el, uint32_t flags = 0; if (arm_v7m_is_handler_mode(env)) { - flags = FIELD_DP32(flags, TBFLAG_M32, HANDLER, 1); + DP_TBFLAG_M32(flags, HANDLER, 1); } /* @@ -13030,7 +13029,7 @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el, if (arm_feature(env, ARM_FEATURE_V8) && !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) && (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) { - flags = FIELD_DP32(flags, TBFLAG_M32, STACKCHECK, 1); + DP_TBFLAG_M32(flags, STACKCHECK, 1); } return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); @@ -13040,8 +13039,7 @@ static uint32_t rebuild_hflags_aprofile(CPUARMState *env) { int flags = 0; - flags = FIELD_DP32(flags, TBFLAG_ANY, DEBUG_TARGET_EL, - arm_debug_target_el(env)); + DP_TBFLAG_ANY(flags, DEBUG_TARGET_EL, arm_debug_target_el(env)); return flags; } @@ -13051,12 +13049,12 @@ static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el, uint32_t flags = rebuild_hflags_aprofile(env); if (arm_el_is_aa64(env, 1)) { - flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1); + DP_TBFLAG_A32(flags, VFPEN, 1); } if (arm_current_el(env) < 2 && env->cp15.hstr_el2 && (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { - flags = FIELD_DP32(flags, TBFLAG_A32, HSTR_ACTIVE, 1); + DP_TBFLAG_A32(flags, HSTR_ACTIVE, 1); } return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); @@ -13071,14 +13069,14 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, uint64_t sctlr; int tbii, tbid; - flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1); + DP_TBFLAG_ANY(flags, AARCH64_STATE, 1); /* Get control bits for tagged addresses. */ tbid = aa64_va_parameter_tbi(tcr, mmu_idx); tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx); - flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii); - flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid); + DP_TBFLAG_A64(flags, TBII, tbii); + DP_TBFLAG_A64(flags, TBID, tbid); if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { int sve_el = sve_exception_el(env, el); @@ -13093,14 +13091,14 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } else { zcr_len = sve_zcr_len_for_el(env, el); } - flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el); - flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len); + DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el); + DP_TBFLAG_A64(flags, ZCR_LEN, zcr_len); } sctlr = regime_sctlr(env, stage1); if (arm_cpu_data_is_big_endian_a64(el, sctlr)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1); + DP_TBFLAG_ANY(flags, BE_DATA, 1); } if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) { @@ -13111,14 +13109,14 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, * The decision of which action to take is left to a helper. */ if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) { - flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1); + DP_TBFLAG_A64(flags, PAUTH_ACTIVE, 1); } } if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */ if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) { - flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1); + DP_TBFLAG_A64(flags, BT, 1); } } @@ -13130,7 +13128,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, case ARMMMUIdx_SE10_1: case ARMMMUIdx_SE10_1_PAN: /* TODO: ARMv8.3-NV */ - flags = FIELD_DP32(flags, TBFLAG_A64, UNPRIV, 1); + DP_TBFLAG_A64(flags, UNPRIV, 1); break; case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: @@ -13141,7 +13139,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, * gated by HCR_EL2. == '11', and so is LDTR. */ if (env->cp15.hcr_el2 & HCR_TGE) { - flags = FIELD_DP32(flags, TBFLAG_A64, UNPRIV, 1); + DP_TBFLAG_A64(flags, UNPRIV, 1); } break; default: @@ -13159,24 +13157,23 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, * 4) If no Allocation Tag Access, then all accesses are Unchecked. */ if (allocation_tag_access_enabled(env, el, sctlr)) { - flags = FIELD_DP32(flags, TBFLAG_A64, ATA, 1); + DP_TBFLAG_A64(flags, ATA, 1); if (tbid && !(env->pstate & PSTATE_TCO) && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { - flags = FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1); + DP_TBFLAG_A64(flags, MTE_ACTIVE, 1); } } /* And again for unprivileged accesses, if required. */ - if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV) + if (EX_TBFLAG_A64(flags, UNPRIV) && tbid && !(env->pstate & PSTATE_TCO) && (sctlr & SCTLR_TCF0) && allocation_tag_access_enabled(env, 0, sctlr)) { - flags = FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1); + DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1); } /* Cache TCMA as well as TBI. */ - flags = FIELD_DP32(flags, TBFLAG_A64, TCMA, - aa64_va_parameter_tcma(tcr, mmu_idx)); + DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); } return rebuild_hflags_common(env, fp_el, mmu_idx, flags); @@ -13272,10 +13269,10 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, *cs_base = 0; assert_hflags_rebuild_correctly(env); - if (FIELD_EX32(flags, TBFLAG_ANY, AARCH64_STATE)) { + if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) { *pc = env->pc; if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { - flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype); + DP_TBFLAG_A64(flags, BTYPE, env->btype); } } else { *pc = env->regs[15]; @@ -13284,7 +13281,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, if (arm_feature(env, ARM_FEATURE_M_SECURITY) && FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) != env->v7m.secure) { - flags = FIELD_DP32(flags, TBFLAG_M32, FPCCR_S_WRONG, 1); + DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1); } if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) && @@ -13296,12 +13293,12 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, * active FP context; we must create a new FP context before * executing any FP insn. */ - flags = FIELD_DP32(flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED, 1); + DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1); } bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) { - flags = FIELD_DP32(flags, TBFLAG_M32, LSPACT, 1); + DP_TBFLAG_M32(flags, LSPACT, 1); } } else { /* @@ -13309,21 +13306,18 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, * Note that VECLEN+VECSTRIDE are RES0 for M-profile. */ if (arm_feature(env, ARM_FEATURE_XSCALE)) { - flags = FIELD_DP32(flags, TBFLAG_A32, - XSCALE_CPAR, env->cp15.c15_cpar); + DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar); } else { - flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, - env->vfp.vec_len); - flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, - env->vfp.vec_stride); + DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len); + DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride); } if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) { - flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1); + DP_TBFLAG_A32(flags, VFPEN, 1); } } - flags = FIELD_DP32(flags, TBFLAG_AM32, THUMB, env->thumb); - flags = FIELD_DP32(flags, TBFLAG_AM32, CONDEXEC, env->condexec_bits); + DP_TBFLAG_AM32(flags, THUMB, env->thumb); + DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits); } /* @@ -13335,9 +13329,8 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, * 1 1 Active-not-pending * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB. */ - if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE) && - (env->pstate & PSTATE_SS)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE__SS, 1); + if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) { + DP_TBFLAG_ANY(flags, PSTATE__SS, 1); } *pflags = flags; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 64b3a5200c..05d83a5f7a 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14684,28 +14684,28 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, !arm_el_is_aa64(env, 3); dc->thumb = 0; dc->sctlr_b = 0; - dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; + dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; dc->condexec_mask = 0; dc->condexec_cond = 0; - core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX); + core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); - dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII); - dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID); - dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA); + dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); + dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); + dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) dc->user = (dc->current_el == 0); #endif - dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL); - dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL); - dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16; - dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE); - dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT); - dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE); - dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV); - dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA); - dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE); - dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE); + dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); + dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); + dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16; + dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); + dc->bt = EX_TBFLAG_A64(tb_flags, BT); + dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); + dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); + dc->ata = EX_TBFLAG_A64(tb_flags, ATA); + dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); + dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; @@ -14732,10 +14732,10 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, * emit code to generate a software step exception * end the TB */ - dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE); - dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE__SS); + dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); + dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); dc->is_ldex = false; - dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL); + dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL); /* Bound the number of insns to execute to those left on the page. */ bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; diff --git a/target/arm/translate.c b/target/arm/translate.c index 271c53dadb..5c21e98d24 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -8864,46 +8864,42 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) */ dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3); - dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB); - dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; - condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC); + dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB); + dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; + condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC); dc->condexec_mask = (condexec & 0xf) << 1; dc->condexec_cond = condexec >> 4; - core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX); + core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx); dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) dc->user = (dc->current_el == 0); #endif - dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL); + dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); if (arm_feature(env, ARM_FEATURE_M)) { dc->vfp_enabled = 1; dc->be_data = MO_TE; - dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER); + dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER); dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) && regime_is_secure(env, dc->mmu_idx); - dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK); - dc->v8m_fpccr_s_wrong = - FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG); + dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK); + dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG); dc->v7m_new_fp_ctxt_needed = - FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED); - dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT); + EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED); + dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT); } else { - dc->be_data = - FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; - dc->debug_target_el = - FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL); - dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR__B); - dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE); - dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS); - dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN); + dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL); + dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B); + dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE); + dc->ns = EX_TBFLAG_A32(tb_flags, NS); + dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN); if (arm_feature(env, ARM_FEATURE_XSCALE)) { - dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR); + dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR); } else { - dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN); - dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE); + dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); + dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); } } dc->cp_regs = cpu->cp_regs; @@ -8924,8 +8920,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) * emit code to generate a software step exception * end the TB */ - dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE); - dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE__SS); + dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); + dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); dc->is_ldex = false; dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK; @@ -9364,11 +9360,11 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) DisasContext dc = { }; const TranslatorOps *ops = &arm_translator_ops; - if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) { + if (EX_TBFLAG_AM32(tb->flags, THUMB)) { ops = &thumb_translator_ops; } #ifdef TARGET_AARCH64 - if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) { + if (EX_TBFLAG_ANY(tb->flags, AARCH64_STATE)) { ops = &aarch64_translator_ops; } #endif From 3902bfc6f06144016b8b25f5b6fb2211e85406fc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:31 -0700 Subject: [PATCH 16/43] target/arm: Introduce CPUARMTBFlags In preparation for splitting tb->flags across multiple fields, introduce a structure to hold the value(s). So far this only migrates the one uint32_t and fixes all of the places that require adjustment to match. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-6-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 26 ++++++++++++--------- target/arm/helper.c | 48 +++++++++++++++++++++----------------- target/arm/translate-a64.c | 2 +- target/arm/translate.c | 7 +++--- target/arm/translate.h | 11 +++++++++ 5 files changed, 57 insertions(+), 37 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index b798ff8115..79af9a7c62 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -225,6 +225,10 @@ typedef struct ARMPACKey { } ARMPACKey; #endif +/* See the commentary above the TBFLAG field definitions. */ +typedef struct CPUARMTBFlags { + uint32_t flags; +} CPUARMTBFlags; typedef struct CPUARMState { /* Regs for current mode. */ @@ -253,7 +257,7 @@ typedef struct CPUARMState { uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */ /* Cached TBFLAGS state. See below for which bits are included. */ - uint32_t hflags; + CPUARMTBFlags hflags; /* Frequently accessed CPSR bits are stored separately for efficiency. This contains all the other bits. Use cpsr_{read,write} to access @@ -3466,21 +3470,21 @@ FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) * Helpers for using the above. */ #define DP_TBFLAG_ANY(DST, WHICH, VAL) \ - (DST = FIELD_DP32(DST, TBFLAG_ANY, WHICH, VAL)) + (DST.flags = FIELD_DP32(DST.flags, TBFLAG_ANY, WHICH, VAL)) #define DP_TBFLAG_A64(DST, WHICH, VAL) \ - (DST = FIELD_DP32(DST, TBFLAG_A64, WHICH, VAL)) + (DST.flags = FIELD_DP32(DST.flags, TBFLAG_A64, WHICH, VAL)) #define DP_TBFLAG_A32(DST, WHICH, VAL) \ - (DST = FIELD_DP32(DST, TBFLAG_A32, WHICH, VAL)) + (DST.flags = FIELD_DP32(DST.flags, TBFLAG_A32, WHICH, VAL)) #define DP_TBFLAG_M32(DST, WHICH, VAL) \ - (DST = FIELD_DP32(DST, TBFLAG_M32, WHICH, VAL)) + (DST.flags = FIELD_DP32(DST.flags, TBFLAG_M32, WHICH, VAL)) #define DP_TBFLAG_AM32(DST, WHICH, VAL) \ - (DST = FIELD_DP32(DST, TBFLAG_AM32, WHICH, VAL)) + (DST.flags = FIELD_DP32(DST.flags, TBFLAG_AM32, WHICH, VAL)) -#define EX_TBFLAG_ANY(IN, WHICH) FIELD_EX32(IN, TBFLAG_ANY, WHICH) -#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX32(IN, TBFLAG_A64, WHICH) -#define EX_TBFLAG_A32(IN, WHICH) FIELD_EX32(IN, TBFLAG_A32, WHICH) -#define EX_TBFLAG_M32(IN, WHICH) FIELD_EX32(IN, TBFLAG_M32, WHICH) -#define EX_TBFLAG_AM32(IN, WHICH) FIELD_EX32(IN, TBFLAG_AM32, WHICH) +#define EX_TBFLAG_ANY(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_ANY, WHICH) +#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_A64, WHICH) +#define EX_TBFLAG_A32(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_A32, WHICH) +#define EX_TBFLAG_M32(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_M32, WHICH) +#define EX_TBFLAG_AM32(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_AM32, WHICH) /** * cpu_mmu_index: diff --git a/target/arm/helper.c b/target/arm/helper.c index 2769e6fd35..f564e59084 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -12984,8 +12984,9 @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) } #endif -static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx, uint32_t flags) +static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx, + CPUARMTBFlags flags) { DP_TBFLAG_ANY(flags, FPEXC_EL, fp_el); DP_TBFLAG_ANY(flags, MMUIDX, arm_to_core_mmu_idx(mmu_idx)); @@ -12996,8 +12997,9 @@ static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el, return flags; } -static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx, uint32_t flags) +static CPUARMTBFlags rebuild_hflags_common_32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx, + CPUARMTBFlags flags) { bool sctlr_b = arm_sctlr_b(env); @@ -13012,10 +13014,10 @@ static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el, return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } -static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx) +static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx) { - uint32_t flags = 0; + CPUARMTBFlags flags = {}; if (arm_v7m_is_handler_mode(env)) { DP_TBFLAG_M32(flags, HANDLER, 1); @@ -13035,18 +13037,18 @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el, return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } -static uint32_t rebuild_hflags_aprofile(CPUARMState *env) +static CPUARMTBFlags rebuild_hflags_aprofile(CPUARMState *env) { - int flags = 0; + CPUARMTBFlags flags = {}; DP_TBFLAG_ANY(flags, DEBUG_TARGET_EL, arm_debug_target_el(env)); return flags; } -static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx) +static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx) { - uint32_t flags = rebuild_hflags_aprofile(env); + CPUARMTBFlags flags = rebuild_hflags_aprofile(env); if (arm_el_is_aa64(env, 1)) { DP_TBFLAG_A32(flags, VFPEN, 1); @@ -13060,10 +13062,10 @@ static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el, return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } -static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, - ARMMMUIdx mmu_idx) +static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, + ARMMMUIdx mmu_idx) { - uint32_t flags = rebuild_hflags_aprofile(env); + CPUARMTBFlags flags = rebuild_hflags_aprofile(env); ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx); uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; uint64_t sctlr; @@ -13179,7 +13181,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } -static uint32_t rebuild_hflags_internal(CPUARMState *env) +static CPUARMTBFlags rebuild_hflags_internal(CPUARMState *env) { int el = arm_current_el(env); int fp_el = fp_exception_el(env, el); @@ -13208,6 +13210,7 @@ void HELPER(rebuild_hflags_m32_newel)(CPUARMState *env) int el = arm_current_el(env); int fp_el = fp_exception_el(env, el); ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); + env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx); } @@ -13250,12 +13253,12 @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el) static inline void assert_hflags_rebuild_correctly(CPUARMState *env) { #ifdef CONFIG_DEBUG_TCG - uint32_t env_flags_current = env->hflags; - uint32_t env_flags_rebuilt = rebuild_hflags_internal(env); + CPUARMTBFlags c = env->hflags; + CPUARMTBFlags r = rebuild_hflags_internal(env); - if (unlikely(env_flags_current != env_flags_rebuilt)) { + if (unlikely(c.flags != r.flags)) { fprintf(stderr, "TCG hflags mismatch (current:0x%08x rebuilt:0x%08x)\n", - env_flags_current, env_flags_rebuilt); + c.flags, r.flags); abort(); } #endif @@ -13264,10 +13267,11 @@ static inline void assert_hflags_rebuild_correctly(CPUARMState *env) void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *pflags) { - uint32_t flags = env->hflags; + CPUARMTBFlags flags; *cs_base = 0; assert_hflags_rebuild_correctly(env); + flags = env->hflags; if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) { *pc = env->pc; @@ -13333,7 +13337,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, DP_TBFLAG_ANY(flags, PSTATE__SS, 1); } - *pflags = flags; + *pflags = flags.flags; } #ifdef TARGET_AARCH64 diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 05d83a5f7a..b32ff56666 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14670,7 +14670,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, DisasContext *dc = container_of(dcbase, DisasContext, base); CPUARMState *env = cpu->env_ptr; ARMCPU *arm_cpu = env_archcpu(env); - uint32_t tb_flags = dc->base.tb->flags; + CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); int bound, core_mmu_idx; dc->isar = &arm_cpu->isar; diff --git a/target/arm/translate.c b/target/arm/translate.c index 5c21e98d24..6a15e5d16c 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -8852,7 +8852,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) DisasContext *dc = container_of(dcbase, DisasContext, base); CPUARMState *env = cs->env_ptr; ARMCPU *cpu = env_archcpu(env); - uint32_t tb_flags = dc->base.tb->flags; + CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); uint32_t condexec, core_mmu_idx; dc->isar = &cpu->isar; @@ -9359,12 +9359,13 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc = { }; const TranslatorOps *ops = &arm_translator_ops; + CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb); - if (EX_TBFLAG_AM32(tb->flags, THUMB)) { + if (EX_TBFLAG_AM32(tb_flags, THUMB)) { ops = &thumb_translator_ops; } #ifdef TARGET_AARCH64 - if (EX_TBFLAG_ANY(tb->flags, AARCH64_STATE)) { + if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) { ops = &aarch64_translator_ops; } #endif diff --git a/target/arm/translate.h b/target/arm/translate.h index 423b0e08df..f30287e554 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -394,6 +394,17 @@ typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); +/** + * arm_tbflags_from_tb: + * @tb: the TranslationBlock + * + * Extract the flag values from @tb. + */ +static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) +{ + return (CPUARMTBFlags){ tb->flags }; +} + /* * Enum for argument to fpstatus_ptr(). */ From a378206a205a65c182854a961d99acbce00cda86 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:32 -0700 Subject: [PATCH 17/43] target/arm: Move mode specific TB flags to tb->cs_base Now that we have all of the proper macros defined, expanding the CPUARMTBFlags structure and populating the two TB fields is relatively simple. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-7-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 49 ++++++++++++++++++++++++------------------ target/arm/helper.c | 10 +++++---- target/arm/translate.h | 2 +- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 79af9a7c62..a8da7c55a6 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -228,6 +228,7 @@ typedef struct ARMPACKey { /* See the commentary above the TBFLAG field definitions. */ typedef struct CPUARMTBFlags { uint32_t flags; + target_ulong flags2; } CPUARMTBFlags; typedef struct CPUARMState { @@ -3381,20 +3382,26 @@ typedef ARMCPU ArchCPU; #include "exec/cpu-all.h" /* - * Bit usage in the TB flags field: bit 31 indicates whether we are - * in 32 or 64 bit mode. The meaning of the other bits depends on that. - * We put flags which are shared between 32 and 64 bit mode at the top - * of the word, and flags which apply to only one mode at the bottom. + * We have more than 32-bits worth of state per TB, so we split the data + * between tb->flags and tb->cs_base, which is otherwise unused for ARM. + * We collect these two parts in CPUARMTBFlags where they are named + * flags and flags2 respectively. * - * 31 20 18 14 9 0 - * +--------------+-----+-----+----------+--------------+ - * | | | TBFLAG_A32 | | - * | | +-----+----------+ TBFLAG_AM32 | - * | TBFLAG_ANY | |TBFLAG_M32| | - * | +-----------+----------+--------------| - * | | TBFLAG_A64 | - * +--------------+-------------------------------------+ - * 31 20 0 + * The flags that are shared between all execution modes, TBFLAG_ANY, + * are stored in flags. The flags that are specific to a given mode + * are stores in flags2. Since cs_base is sized on the configured + * address size, flags2 always has 64-bits for A64, and a minimum of + * 32-bits for A32 and M32. + * + * The bits for 32-bit A-profile and M-profile partially overlap: + * + * 18 9 0 + * +----------------+--------------+ + * | TBFLAG_A32 | | + * +-----+----------+ TBFLAG_AM32 | + * | |TBFLAG_M32| | + * +-----+----------+--------------+ + * 14 9 0 * * Unless otherwise noted, these bits are cached in env->hflags. */ @@ -3472,19 +3479,19 @@ FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) #define DP_TBFLAG_ANY(DST, WHICH, VAL) \ (DST.flags = FIELD_DP32(DST.flags, TBFLAG_ANY, WHICH, VAL)) #define DP_TBFLAG_A64(DST, WHICH, VAL) \ - (DST.flags = FIELD_DP32(DST.flags, TBFLAG_A64, WHICH, VAL)) + (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A64, WHICH, VAL)) #define DP_TBFLAG_A32(DST, WHICH, VAL) \ - (DST.flags = FIELD_DP32(DST.flags, TBFLAG_A32, WHICH, VAL)) + (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A32, WHICH, VAL)) #define DP_TBFLAG_M32(DST, WHICH, VAL) \ - (DST.flags = FIELD_DP32(DST.flags, TBFLAG_M32, WHICH, VAL)) + (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_M32, WHICH, VAL)) #define DP_TBFLAG_AM32(DST, WHICH, VAL) \ - (DST.flags = FIELD_DP32(DST.flags, TBFLAG_AM32, WHICH, VAL)) + (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_AM32, WHICH, VAL)) #define EX_TBFLAG_ANY(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_ANY, WHICH) -#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_A64, WHICH) -#define EX_TBFLAG_A32(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_A32, WHICH) -#define EX_TBFLAG_M32(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_M32, WHICH) -#define EX_TBFLAG_AM32(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_AM32, WHICH) +#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_A64, WHICH) +#define EX_TBFLAG_A32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_A32, WHICH) +#define EX_TBFLAG_M32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_M32, WHICH) +#define EX_TBFLAG_AM32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_AM32, WHICH) /** * cpu_mmu_index: diff --git a/target/arm/helper.c b/target/arm/helper.c index f564e59084..4aa7650d3a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -13256,9 +13256,11 @@ static inline void assert_hflags_rebuild_correctly(CPUARMState *env) CPUARMTBFlags c = env->hflags; CPUARMTBFlags r = rebuild_hflags_internal(env); - if (unlikely(c.flags != r.flags)) { - fprintf(stderr, "TCG hflags mismatch (current:0x%08x rebuilt:0x%08x)\n", - c.flags, r.flags); + if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) { + fprintf(stderr, "TCG hflags mismatch " + "(current:(0x%08x,0x" TARGET_FMT_lx ")" + " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n", + c.flags, c.flags2, r.flags, r.flags2); abort(); } #endif @@ -13269,7 +13271,6 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, { CPUARMTBFlags flags; - *cs_base = 0; assert_hflags_rebuild_correctly(env); flags = env->hflags; @@ -13338,6 +13339,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, } *pflags = flags.flags; + *cs_base = flags.flags2; } #ifdef TARGET_AARCH64 diff --git a/target/arm/translate.h b/target/arm/translate.h index f30287e554..50c2aba066 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -402,7 +402,7 @@ typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); */ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) { - return (CPUARMTBFlags){ tb->flags }; + return (CPUARMTBFlags){ tb->flags, tb->cs_base }; } /* From 5896f39253ead37f65a2c13a9b0066f56c282d4c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:34 -0700 Subject: [PATCH 18/43] target/arm: Move TBFLAG_AM32 bits to the top Now that these bits have been moved out of tb->flags, where TBFLAG_ANY was filling from the top, move AM32 to fill from the top, and A32 and M32 to fill from the bottom. This means fewer changes when adding new bits. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index a8da7c55a6..15104e1440 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3395,13 +3395,13 @@ typedef ARMCPU ArchCPU; * * The bits for 32-bit A-profile and M-profile partially overlap: * - * 18 9 0 - * +----------------+--------------+ - * | TBFLAG_A32 | | - * +-----+----------+ TBFLAG_AM32 | - * | |TBFLAG_M32| | - * +-----+----------+--------------+ - * 14 9 0 + * 31 23 11 10 0 + * +-------------+----------+----------------+ + * | | | TBFLAG_A32 | + * | TBFLAG_AM32 | +-----+----------+ + * | | |TBFLAG_M32| + * +-------------+----------------+----------+ + * 31 23 5 4 0 * * Unless otherwise noted, these bits are cached in env->hflags. */ @@ -3418,44 +3418,44 @@ FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 20, 2) /* * Bit usage when in AArch32 state, both A- and M-profile. */ -FIELD(TBFLAG_AM32, CONDEXEC, 0, 8) /* Not cached. */ -FIELD(TBFLAG_AM32, THUMB, 8, 1) /* Not cached. */ +FIELD(TBFLAG_AM32, CONDEXEC, 24, 8) /* Not cached. */ +FIELD(TBFLAG_AM32, THUMB, 23, 1) /* Not cached. */ /* * Bit usage when in AArch32 state, for A-profile only. */ -FIELD(TBFLAG_A32, VECLEN, 9, 3) /* Not cached. */ -FIELD(TBFLAG_A32, VECSTRIDE, 12, 2) /* Not cached. */ +FIELD(TBFLAG_A32, VECLEN, 0, 3) /* Not cached. */ +FIELD(TBFLAG_A32, VECSTRIDE, 3, 2) /* Not cached. */ /* * We store the bottom two bits of the CPAR as TB flags and handle * checks on the other bits at runtime. This shares the same bits as * VECSTRIDE, which is OK as no XScale CPU has VFP. * Not cached, because VECLEN+VECSTRIDE are not cached. */ -FIELD(TBFLAG_A32, XSCALE_CPAR, 12, 2) -FIELD(TBFLAG_A32, VFPEN, 14, 1) /* Partially cached, minus FPEXC. */ -FIELD(TBFLAG_A32, SCTLR__B, 15, 1) /* Cannot overlap with SCTLR_B */ -FIELD(TBFLAG_A32, HSTR_ACTIVE, 16, 1) +FIELD(TBFLAG_A32, XSCALE_CPAR, 5, 2) +FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Partially cached, minus FPEXC. */ +FIELD(TBFLAG_A32, SCTLR__B, 8, 1) /* Cannot overlap with SCTLR_B */ +FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1) /* * Indicates whether cp register reads and writes by guest code should access * the secure or nonsecure bank of banked registers; note that this is not * the same thing as the current security state of the processor! */ -FIELD(TBFLAG_A32, NS, 17, 1) +FIELD(TBFLAG_A32, NS, 10, 1) /* * Bit usage when in AArch32 state, for M-profile only. */ /* Handler (ie not Thread) mode */ -FIELD(TBFLAG_M32, HANDLER, 9, 1) +FIELD(TBFLAG_M32, HANDLER, 0, 1) /* Whether we should generate stack-limit checks */ -FIELD(TBFLAG_M32, STACKCHECK, 10, 1) +FIELD(TBFLAG_M32, STACKCHECK, 1, 1) /* Set if FPCCR.LSPACT is set */ -FIELD(TBFLAG_M32, LSPACT, 11, 1) /* Not cached. */ +FIELD(TBFLAG_M32, LSPACT, 2, 1) /* Not cached. */ /* Set if we must create a new FP context */ -FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 12, 1) /* Not cached. */ +FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 3, 1) /* Not cached. */ /* Set if FPCCR.S does not match current security state */ -FIELD(TBFLAG_M32, FPCCR_S_WRONG, 13, 1) /* Not cached. */ +FIELD(TBFLAG_M32, FPCCR_S_WRONG, 4, 1) /* Not cached. */ /* * Bit usage when in AArch64 state From eee81d41ec4c5bf9bbde4e4d35648e29e2244f3f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:35 -0700 Subject: [PATCH 19/43] target/arm: Move TBFLAG_ANY bits to the bottom Now that other bits have been moved out of tb->flags, there's no point in filling from the top. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-10-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 15104e1440..5e0131be1a 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3405,15 +3405,15 @@ typedef ARMCPU ArchCPU; * * Unless otherwise noted, these bits are cached in env->hflags. */ -FIELD(TBFLAG_ANY, AARCH64_STATE, 31, 1) -FIELD(TBFLAG_ANY, SS_ACTIVE, 30, 1) -FIELD(TBFLAG_ANY, PSTATE__SS, 29, 1) /* Not cached. */ -FIELD(TBFLAG_ANY, BE_DATA, 28, 1) -FIELD(TBFLAG_ANY, MMUIDX, 24, 4) +FIELD(TBFLAG_ANY, AARCH64_STATE, 0, 1) +FIELD(TBFLAG_ANY, SS_ACTIVE, 1, 1) +FIELD(TBFLAG_ANY, PSTATE__SS, 2, 1) /* Not cached. */ +FIELD(TBFLAG_ANY, BE_DATA, 3, 1) +FIELD(TBFLAG_ANY, MMUIDX, 4, 4) /* Target EL if we take a floating-point-disabled exception */ -FIELD(TBFLAG_ANY, FPEXC_EL, 22, 2) +FIELD(TBFLAG_ANY, FPEXC_EL, 8, 2) /* For A-profile only, target EL for debug exceptions. */ -FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 20, 2) +FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 10, 2) /* * Bit usage when in AArch32 state, both A- and M-profile. From 4479ec30c9c4d2399b6e5bf4e77d136cfd27aebd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:36 -0700 Subject: [PATCH 20/43] target/arm: Add ALIGN_MEM to TBFLAG_ANY Use this to signal when memory access alignment is required. This value comes from the CCR register for M-profile, and from the SCTLR register for A-profile. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-11-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 2 ++ target/arm/helper.c | 19 +++++++++++++++++-- target/arm/translate-a64.c | 1 + target/arm/translate.c | 7 +++---- target/arm/translate.h | 2 ++ 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 5e0131be1a..616b393253 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3414,6 +3414,8 @@ FIELD(TBFLAG_ANY, MMUIDX, 4, 4) FIELD(TBFLAG_ANY, FPEXC_EL, 8, 2) /* For A-profile only, target EL for debug exceptions. */ FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 10, 2) +/* Memory operations require alignment: SCTLR_ELx.A or CCR.UNALIGN_TRP */ +FIELD(TBFLAG_ANY, ALIGN_MEM, 12, 1) /* * Bit usage when in AArch32 state, both A- and M-profile. diff --git a/target/arm/helper.c b/target/arm/helper.c index 4aa7650d3a..9b1b98705f 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -13018,6 +13018,12 @@ static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el, ARMMMUIdx mmu_idx) { CPUARMTBFlags flags = {}; + uint32_t ccr = env->v7m.ccr[env->v7m.secure]; + + /* Without HaveMainExt, CCR.UNALIGN_TRP is RES1. */ + if (ccr & R_V7M_CCR_UNALIGN_TRP_MASK) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); + } if (arm_v7m_is_handler_mode(env)) { DP_TBFLAG_M32(flags, HANDLER, 1); @@ -13030,7 +13036,7 @@ static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el, */ if (arm_feature(env, ARM_FEATURE_V8) && !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) && - (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) { + (ccr & R_V7M_CCR_STKOFHFNMIGN_MASK))) { DP_TBFLAG_M32(flags, STACKCHECK, 1); } @@ -13049,12 +13055,17 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, ARMMMUIdx mmu_idx) { CPUARMTBFlags flags = rebuild_hflags_aprofile(env); + int el = arm_current_el(env); + + if (arm_sctlr(env, el) & SCTLR_A) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); + } if (arm_el_is_aa64(env, 1)) { DP_TBFLAG_A32(flags, VFPEN, 1); } - if (arm_current_el(env) < 2 && env->cp15.hstr_el2 && + if (el < 2 && env->cp15.hstr_el2 && (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { DP_TBFLAG_A32(flags, HSTR_ACTIVE, 1); } @@ -13099,6 +13110,10 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, sctlr = regime_sctlr(env, stage1); + if (sctlr & SCTLR_A) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); + } + if (arm_cpu_data_is_big_endian_a64(el, sctlr)) { DP_TBFLAG_ANY(flags, BE_DATA, 1); } diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index b32ff56666..92a62b1a75 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14697,6 +14697,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->user = (dc->current_el == 0); #endif dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); + dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16; dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); diff --git a/target/arm/translate.c b/target/arm/translate.c index 6a15e5d16c..970e537eae 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -933,8 +933,7 @@ static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, { TCGv addr; - if (arm_dc_feature(s, ARM_FEATURE_M) && - !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) { + if (s->align_mem) { opc |= MO_ALIGN; } @@ -948,8 +947,7 @@ static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, { TCGv addr; - if (arm_dc_feature(s, ARM_FEATURE_M) && - !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) { + if (s->align_mem) { opc |= MO_ALIGN; } @@ -8877,6 +8875,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->user = (dc->current_el == 0); #endif dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); + dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); if (arm_feature(env, ARM_FEATURE_M)) { dc->vfp_enabled = 1; diff --git a/target/arm/translate.h b/target/arm/translate.h index 50c2aba066..b185c14a03 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -87,6 +87,8 @@ typedef struct DisasContext { bool bt; /* True if any CP15 access is trapped by HSTR_EL2 */ bool hstr_active; + /* True if memory operations require alignment */ + bool align_mem; /* * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. * < 0, set by the current instruction. From 9d486b40e895b0e4cfaf47a0bdbd9144547b66d5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:37 -0700 Subject: [PATCH 21/43] target/arm: Adjust gen_aa32_{ld, st}_i32 for align+endianness Create a finalize_memop function that computes alignment and endianness and returns the final MemOp for the operation. Split out gen_aa32_{ld,st}_internal_i32 which bypasses any special handling of endianness or alignment. Adjust gen_aa32_{ld,st}_i32 so that s->be_data is not added by the callers. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-12-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-neon.c.inc | 9 +-- target/arm/translate.c | 100 +++++++++++++++++--------------- target/arm/translate.h | 24 ++++++++ 3 files changed, 79 insertions(+), 54 deletions(-) diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index 0e5828744b..c82aa1412e 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -559,8 +559,7 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) addr = tcg_temp_new_i32(); load_reg_var(s, addr, a->rn); for (reg = 0; reg < nregs; reg++) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), size); if ((vd & 1) && vec_size == 16) { /* * We cannot write 16 bytes at once because the @@ -650,13 +649,11 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) */ for (reg = 0; reg < nregs; reg++) { if (a->l) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | a->size); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), a->size); neon_store_element(vd, a->reg_idx, a->size, tmp); } else { /* Store */ neon_load_element(tmp, vd, a->reg_idx, a->size); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), - s->be_data | a->size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), a->size); } vd += a->stride; tcg_gen_addi_i32(addr, addr, 1 << a->size); diff --git a/target/arm/translate.c b/target/arm/translate.c index 970e537eae..5bf68b782a 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -908,7 +908,8 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) #define IS_USER_ONLY 0 #endif -/* Abstractions of "generate code to do a guest load/store for +/* + * Abstractions of "generate code to do a guest load/store for * AArch32", where a vaddr is always 32 bits (and is zero * extended if we're a 64 bit core) and data is also * 32 bits unless specifically doing a 64 bit access. @@ -916,7 +917,7 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) * that the address argument is TCGv_i32 rather than TCGv. */ -static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op) +static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op) { TCGv addr = tcg_temp_new(); tcg_gen_extu_i32_tl(addr, a32); @@ -928,47 +929,51 @@ static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op) return addr; } +/* + * Internal routines are used for NEON cases where the endianness + * and/or alignment has already been taken into account and manipulated. + */ +static void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val, + TCGv_i32 a32, int index, MemOp opc) +{ + TCGv addr = gen_aa32_addr(s, a32, opc); + tcg_gen_qemu_ld_i32(val, addr, index, opc); + tcg_temp_free(addr); +} + +static void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val, + TCGv_i32 a32, int index, MemOp opc) +{ + TCGv addr = gen_aa32_addr(s, a32, opc); + tcg_gen_qemu_st_i32(val, addr, index, opc); + tcg_temp_free(addr); +} + static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, int index, MemOp opc) { - TCGv addr; - - if (s->align_mem) { - opc |= MO_ALIGN; - } - - addr = gen_aa32_addr(s, a32, opc); - tcg_gen_qemu_ld_i32(val, addr, index, opc); - tcg_temp_free(addr); + gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc)); } static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, int index, MemOp opc) { - TCGv addr; + gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc)); +} - if (s->align_mem) { - opc |= MO_ALIGN; +#define DO_GEN_LD(SUFF, OPC) \ + static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \ + TCGv_i32 a32, int index) \ + { \ + gen_aa32_ld_i32(s, val, a32, index, OPC); \ } - addr = gen_aa32_addr(s, a32, opc); - tcg_gen_qemu_st_i32(val, addr, index, opc); - tcg_temp_free(addr); -} - -#define DO_GEN_LD(SUFF, OPC) \ -static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \ - TCGv_i32 a32, int index) \ -{ \ - gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \ -} - -#define DO_GEN_ST(SUFF, OPC) \ -static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \ - TCGv_i32 a32, int index) \ -{ \ - gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \ -} +#define DO_GEN_ST(SUFF, OPC) \ + static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \ + TCGv_i32 a32, int index) \ + { \ + gen_aa32_st_i32(s, val, a32, index, OPC); \ + } static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val) { @@ -6456,7 +6461,7 @@ static bool op_load_rr(DisasContext *s, arg_ldst_rr *a, addr = op_addr_rr_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); /* @@ -6485,7 +6490,7 @@ static bool op_store_rr(DisasContext *s, arg_ldst_rr *a, addr = op_addr_rr_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); tcg_temp_free_i32(tmp); @@ -6508,13 +6513,13 @@ static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a) addr = op_addr_rr_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL); store_reg(s, a->rt, tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL); store_reg(s, a->rt + 1, tmp); /* LDRD w/ base writeback is undefined if the registers overlap. */ @@ -6537,13 +6542,13 @@ static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a) addr = op_addr_rr_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL); tcg_temp_free_i32(tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = load_reg(s, a->rt + 1); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL); tcg_temp_free_i32(tmp); op_addr_rr_post(s, a, addr, -4); @@ -6608,7 +6613,7 @@ static bool op_load_ri(DisasContext *s, arg_ldst_ri *a, addr = op_addr_ri_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); /* @@ -6637,7 +6642,7 @@ static bool op_store_ri(DisasContext *s, arg_ldst_ri *a, addr = op_addr_ri_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); tcg_temp_free_i32(tmp); @@ -6653,13 +6658,13 @@ static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) addr = op_addr_ri_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL); store_reg(s, a->rt, tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL); store_reg(s, rt2, tmp); /* LDRD w/ base writeback is undefined if the registers overlap. */ @@ -6692,13 +6697,13 @@ static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) addr = op_addr_ri_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL); tcg_temp_free_i32(tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = load_reg(s, rt2); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL); tcg_temp_free_i32(tmp); op_addr_ri_post(s, a, addr, -4); @@ -6924,7 +6929,7 @@ static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop) addr = load_reg(s, a->rn); tmp = load_reg(s, a->rt); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop); disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite); tcg_temp_free_i32(tmp); @@ -7080,7 +7085,7 @@ static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop) addr = load_reg(s, a->rn); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel); tcg_temp_free_i32(addr); @@ -8264,8 +8269,7 @@ static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half) addr = load_reg(s, a->rn); tcg_gen_add_i32(addr, addr, tmp); - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - half ? MO_UW | s->be_data : MO_UB); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB); tcg_temp_free_i32(addr); tcg_gen_add_i32(tmp, tmp, tmp); diff --git a/target/arm/translate.h b/target/arm/translate.h index b185c14a03..0c60b83b3d 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -459,4 +459,28 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) return statusptr; } +/** + * finalize_memop: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Build the complete MemOp for a memory operation, including alignment + * and endianness. + * + * If (op & MO_AMASK) then the operation already contains the required + * alignment, e.g. for AccType_ATOMIC. Otherwise, this an optionally + * unaligned operation, e.g. for AccType_NORMAL. + * + * In the latter case, there are configuration bits that require alignment, + * and this is applied here. Note that there is no way to indicate that + * no alignment should ever be enforced; this must be handled manually. + */ +static inline MemOp finalize_memop(DisasContext *s, MemOp opc) +{ + if (s->align_mem && !(opc & MO_AMASK)) { + opc |= MO_ALIGN; + } + return opc | s->be_data; +} + #endif /* TARGET_ARM_TRANSLATE_H */ From 37bf7a055f6b26a398dd0e953bf73d44e2312b33 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:38 -0700 Subject: [PATCH 22/43] target/arm: Merge gen_aa32_frob64 into gen_aa32_ld_i64 This is the only caller. Adjust some commentary to talk about SCTLR_B instead of the vanishing function. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-13-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index 5bf68b782a..2f2a6d76b4 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -975,20 +975,17 @@ static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, gen_aa32_st_i32(s, val, a32, index, OPC); \ } -static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val) -{ - /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b) { - tcg_gen_rotri_i64(val, val, 32); - } -} - static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, int index, MemOp opc) { TCGv addr = gen_aa32_addr(s, a32, opc); tcg_gen_qemu_ld_i64(val, addr, index, opc); - gen_aa32_frob64(s, val); + + /* Not needed for user-mode BE32, where we use MO_BE instead. */ + if (!IS_USER_ONLY && s->sctlr_b) { + tcg_gen_rotri_i64(val, val, 32); + } + tcg_temp_free(addr); } @@ -4987,16 +4984,13 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i32 tmp2 = tcg_temp_new_i32(); TCGv_i64 t64 = tcg_temp_new_i64(); - /* For AArch32, architecturally the 32-bit word at the lowest + /* + * For AArch32, architecturally the 32-bit word at the lowest * address is always Rt and the one at addr+4 is Rt2, even if * the CPU is big-endian. That means we don't want to do a - * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if - * for an architecturally 64-bit access, but instead do a - * 64-bit access using MO_BE if appropriate and then split - * the two halves. - * This only makes a difference for BE32 user-mode, where - * frob64() must not flip the two halves of the 64-bit data - * but this code must treat BE32 user-mode like BE32 system. + * gen_aa32_ld_i64(), which checks SCTLR_B as if for an + * architecturally 64-bit access, but instead do a 64-bit access + * using MO_BE if appropriate and then split the two halves. */ TCGv taddr = gen_aa32_addr(s, addr, opc); @@ -5056,14 +5050,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, TCGv_i64 n64 = tcg_temp_new_i64(); t2 = load_reg(s, rt2); - /* For AArch32, architecturally the 32-bit word at the lowest + + /* + * For AArch32, architecturally the 32-bit word at the lowest * address is always Rt and the one at addr+4 is Rt2, even if * the CPU is big-endian. Since we're going to treat this as a * single 64-bit BE store, we need to put the two halves in the * opposite order for BE to LE, so that they end up in the right - * places. - * We don't want gen_aa32_frob64() because that does the wrong - * thing for BE32 usermode. + * places. We don't want gen_aa32_st_i64, because that checks + * SCTLR_B as if for an architectural 64-bit access. */ if (s->be_data == MO_BE) { tcg_gen_concat_i32_i64(n64, t2, t1); From 9565ac4cc7e1d1aaccf3d8c6aed423b776e7995f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:39 -0700 Subject: [PATCH 23/43] target/arm: Fix SCTLR_B test for TCGv_i64 load/store Just because operating on a TCGv_i64 temporary does not mean that we're performing a 64-bit operation. Restrict the frobbing to actual 64-bit operations. This bug is not currently visible because all current users of these two functions always pass MO_64. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-14-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index 2f2a6d76b4..e99c0ab5cb 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -982,7 +982,7 @@ static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, tcg_gen_qemu_ld_i64(val, addr, index, opc); /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b) { + if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { tcg_gen_rotri_i64(val, val, 32); } @@ -1001,7 +1001,7 @@ static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, TCGv addr = gen_aa32_addr(s, a32, opc); /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b) { + if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { TCGv_i64 tmp = tcg_temp_new_i64(); tcg_gen_rotri_i64(tmp, val, 32); tcg_gen_qemu_st_i64(tmp, addr, index, opc); From abe66294e1d4899b312c296e93abcd3b88f2492e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:40 -0700 Subject: [PATCH 24/43] target/arm: Adjust gen_aa32_{ld, st}_i64 for align+endianness Adjust the interface to match what has been done to the TCGv_i32 load/store functions. This is less obvious, because at present the only user of these functions, trans_VLDST_multiple, also wants to manipulate the endianness to speed up loading multiple bytes. Thus we retain an "internal" interface which is identical to the current gen_aa32_{ld,st}_i64 interface. The "new" interface will gain users as we remove the legacy interfaces, gen_aa32_ld64 and gen_aa32_st64. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-15-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-neon.c.inc | 6 ++- target/arm/translate.c | 78 +++++++++++++++++++-------------- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index c82aa1412e..18d9042130 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -494,11 +494,13 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) int tt = a->vd + reg + spacing * xs; if (a->l) { - gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, + endian | size); neon_store_element64(tt, n, size, tmp64); } else { neon_load_element64(tmp64, tt, n, size); - gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); + gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, + endian | size); } tcg_gen_add_i32(addr, addr, tmp); } diff --git a/target/arm/translate.c b/target/arm/translate.c index e99c0ab5cb..21b241b1ce 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -949,6 +949,37 @@ static void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val, tcg_temp_free(addr); } +static void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val, + TCGv_i32 a32, int index, MemOp opc) +{ + TCGv addr = gen_aa32_addr(s, a32, opc); + + tcg_gen_qemu_ld_i64(val, addr, index, opc); + + /* Not needed for user-mode BE32, where we use MO_BE instead. */ + if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { + tcg_gen_rotri_i64(val, val, 32); + } + tcg_temp_free(addr); +} + +static void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val, + TCGv_i32 a32, int index, MemOp opc) +{ + TCGv addr = gen_aa32_addr(s, a32, opc); + + /* Not needed for user-mode BE32, where we use MO_BE instead. */ + if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_rotri_i64(tmp, val, 32); + tcg_gen_qemu_st_i64(tmp, addr, index, opc); + tcg_temp_free_i64(tmp); + } else { + tcg_gen_qemu_st_i64(val, addr, index, opc); + } + tcg_temp_free(addr); +} + static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, int index, MemOp opc) { @@ -961,6 +992,18 @@ static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc)); } +static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, + int index, MemOp opc) +{ + gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc)); +} + +static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, + int index, MemOp opc) +{ + gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc)); +} + #define DO_GEN_LD(SUFF, OPC) \ static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \ TCGv_i32 a32, int index) \ @@ -975,47 +1018,16 @@ static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, gen_aa32_st_i32(s, val, a32, index, OPC); \ } -static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, - int index, MemOp opc) -{ - TCGv addr = gen_aa32_addr(s, a32, opc); - tcg_gen_qemu_ld_i64(val, addr, index, opc); - - /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { - tcg_gen_rotri_i64(val, val, 32); - } - - tcg_temp_free(addr); -} - static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, int index) { - gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data); -} - -static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, - int index, MemOp opc) -{ - TCGv addr = gen_aa32_addr(s, a32, opc); - - /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_rotri_i64(tmp, val, 32); - tcg_gen_qemu_st_i64(tmp, addr, index, opc); - tcg_temp_free_i64(tmp); - } else { - tcg_gen_qemu_st_i64(val, addr, index, opc); - } - tcg_temp_free(addr); + gen_aa32_ld_i64(s, val, a32, index, MO_Q); } static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, int index) { - gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data); + gen_aa32_st_i64(s, val, a32, index, MO_Q); } DO_GEN_LD(8u, MO_UB) From 4d753eb5fb03ee7bc71ecd453a650b7546be81da Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:41 -0700 Subject: [PATCH 25/43] target/arm: Enforce word alignment for LDRD/STRD Buglink: https://bugs.launchpad.net/qemu/+bug/1905356 Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-16-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index 21b241b1ce..4b0dba9e77 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -6520,13 +6520,13 @@ static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a) addr = op_addr_rr_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); store_reg(s, a->rt, tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); store_reg(s, a->rt + 1, tmp); /* LDRD w/ base writeback is undefined if the registers overlap. */ @@ -6549,13 +6549,13 @@ static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a) addr = op_addr_rr_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = load_reg(s, a->rt + 1); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); op_addr_rr_post(s, a, addr, -4); @@ -6665,13 +6665,13 @@ static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) addr = op_addr_ri_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); store_reg(s, a->rt, tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); store_reg(s, rt2, tmp); /* LDRD w/ base writeback is undefined if the registers overlap. */ @@ -6704,13 +6704,13 @@ static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) addr = op_addr_ri_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = load_reg(s, rt2); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); op_addr_ri_post(s, a, addr, -4); From 824efdf5256399c9830941ddd78c28e3aa4618d8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:42 -0700 Subject: [PATCH 26/43] target/arm: Enforce alignment for LDA/LDAH/STL/STLH Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-17-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index 4b0dba9e77..f5a214e35e 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -6936,7 +6936,7 @@ static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop) addr = load_reg(s, a->rn); tmp = load_reg(s, a->rt); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN); disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite); tcg_temp_free_i32(tmp); @@ -7092,7 +7092,7 @@ static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop) addr = load_reg(s, a->rn); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN); disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel); tcg_temp_free_i32(addr); From 2e1f39e29bf9a6b28eaee9fc0949aab50dbad94a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:43 -0700 Subject: [PATCH 27/43] target/arm: Enforce alignment for LDM/STM Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-18-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index f5a214e35e..9095c4a86f 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7884,7 +7884,7 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) } else { tmp = load_reg(s, i); } - gen_aa32_st32(s, tmp, addr, mem_idx); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); /* No need to add after the last transfer. */ @@ -7959,7 +7959,7 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) } tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, mem_idx); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); if (user) { tmp2 = tcg_const_i32(i); gen_helper_set_user_reg(cpu_env, tmp2, tmp); From c0c7f66087b193303bf9afe6e5e675fd02a17e12 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:44 -0700 Subject: [PATCH 28/43] target/arm: Enforce alignment for RFE Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-19-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index 9095c4a86f..b8704d2504 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -8357,10 +8357,10 @@ static bool trans_RFE(DisasContext *s, arg_RFE *a) /* Load PC into tmp and CPSR into tmp2. */ t1 = tcg_temp_new_i32(); - gen_aa32_ld32u(s, t1, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN); tcg_gen_addi_i32(addr, addr, 4); t2 = tcg_temp_new_i32(); - gen_aa32_ld32u(s, t2, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN); if (a->w) { /* Base writeback. */ From 2fd0800c68b48c5402eea0f88bd68aadfdc15004 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:45 -0700 Subject: [PATCH 29/43] target/arm: Enforce alignment for SRS Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-20-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index b8704d2504..3b071012ca 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5200,11 +5200,11 @@ static void gen_srs(DisasContext *s, } tcg_gen_addi_i32(addr, addr, offset); tmp = load_reg(s, 14); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); tmp = load_cpu_field(spsr); tcg_gen_addi_i32(addr, addr, 4); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); if (writeback) { switch (amode) { From ad9aeae1a9bbb3498bb8acfc13799f9e1cd86c97 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:46 -0700 Subject: [PATCH 30/43] target/arm: Enforce alignment for VLDM/VSTM Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-21-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-vfp.c.inc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 10766f210c..f50afb23e7 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -1503,12 +1503,12 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) for (i = 0; i < n; i++) { if (a->l) { /* load */ - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); vfp_store_reg32(tmp, a->vd + i); } else { /* store */ vfp_load_reg32(tmp, a->vd + i); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); } tcg_gen_addi_i32(addr, addr, offset); } @@ -1586,12 +1586,12 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) for (i = 0; i < n; i++) { if (a->l) { /* load */ - gen_aa32_ld64(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); vfp_store_reg64(tmp, a->vd + i); } else { /* store */ vfp_load_reg64(tmp, a->vd + i); - gen_aa32_st64(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); } tcg_gen_addi_i32(addr, addr, offset); } From 6cd623d166025c8299f76ba0389fd7e879f82779 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:47 -0700 Subject: [PATCH 31/43] target/arm: Enforce alignment for VLDR/VSTR Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-22-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-vfp.c.inc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index f50afb23e7..e20d9c7ba6 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -1364,11 +1364,11 @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) addr = add_reg_for_lit(s, a->rn, offset); tmp = tcg_temp_new_i32(); if (a->l) { - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); vfp_store_reg32(tmp, a->vd); } else { vfp_load_reg32(tmp, a->vd); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); } tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); @@ -1398,11 +1398,11 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) addr = add_reg_for_lit(s, a->rn, offset); tmp = tcg_temp_new_i32(); if (a->l) { - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); vfp_store_reg32(tmp, a->vd); } else { vfp_load_reg32(tmp, a->vd); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); } tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); @@ -1439,11 +1439,11 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) addr = add_reg_for_lit(s, a->rn, offset); tmp = tcg_temp_new_i64(); if (a->l) { - gen_aa32_ld64(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); vfp_store_reg64(tmp, a->vd); } else { vfp_load_reg64(tmp, a->vd); - gen_aa32_st64(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); } tcg_temp_free_i64(tmp); tcg_temp_free_i32(addr); From a8502b37f69f256456ee6599a7850db38e5cc90a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:48 -0700 Subject: [PATCH 32/43] target/arm: Enforce alignment for VLDn (all lanes) Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-23-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-neon.c.inc | 37 +++++++++++++++++++++++++-------- target/arm/translate.c | 15 +++++++++++++ target/arm/translate.h | 1 + 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index 18d9042130..9c2b076027 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -522,6 +522,7 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) int size = a->size; int nregs = a->n + 1; TCGv_i32 addr, tmp; + MemOp mop, align; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -532,18 +533,33 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) return false; } + align = 0; if (size == 3) { if (nregs != 4 || a->a == 0) { return false; } /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ - size = 2; - } - if (nregs == 1 && a->a == 1 && size == 0) { - return false; - } - if (nregs == 3 && a->a == 1) { - return false; + size = MO_32; + align = MO_ALIGN_16; + } else if (a->a) { + switch (nregs) { + case 1: + if (size == 0) { + return false; + } + align = MO_ALIGN; + break; + case 2: + align = pow2_align(size + 1); + break; + case 3: + return false; + case 4: + align = pow2_align(size + 2); + break; + default: + g_assert_not_reached(); + } } if (!vfp_access_check(s)) { @@ -556,12 +572,12 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) */ stride = a->t ? 2 : 1; vec_size = nregs == 1 ? stride * 8 : 8; - + mop = size | align; tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, a->rn); for (reg = 0; reg < nregs; reg++) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), size); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); if ((vd & 1) && vec_size == 16) { /* * We cannot write 16 bytes at once because the @@ -577,6 +593,9 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) } tcg_gen_addi_i32(addr, addr, 1 << size); vd += stride; + + /* Subsequent memory operations inherit alignment */ + mop &= ~MO_AMASK; } tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); diff --git a/target/arm/translate.c b/target/arm/translate.c index 3b071012ca..43ff0d4b8a 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -908,6 +908,21 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) #define IS_USER_ONLY 0 #endif +MemOp pow2_align(unsigned i) +{ + static const MemOp mop_align[] = { + 0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16, + /* + * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such + * that 256-bit alignment (MO_ALIGN_32) cannot be supported: + * see get_alignment_bits(). Enforce only 128-bit alignment for now. + */ + MO_ALIGN_16 + }; + g_assert(i < ARRAY_SIZE(mop_align)); + return mop_align[i]; +} + /* * Abstractions of "generate code to do a guest load/store for * AArch32", where a vaddr is always 32 bits (and is zero diff --git a/target/arm/translate.h b/target/arm/translate.h index 0c60b83b3d..ccf60c96d8 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -204,6 +204,7 @@ void arm_test_cc(DisasCompare *cmp, int cc); void arm_free_cc(DisasCompare *cmp); void arm_jump_cc(DisasCompare *cmp, TCGLabel *label); void arm_gen_test_cc(int cc, TCGLabel *label); +MemOp pow2_align(unsigned i); /* Return state of Alternate Half-precision flag, caller frees result */ static inline TCGv_i32 get_ahp_flag(void) From 7c68c196cf693e6098a04bd24985004db5983914 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:49 -0700 Subject: [PATCH 33/43] target/arm: Enforce alignment for VLDn/VSTn (multiple) Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-24-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-neon.c.inc | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index 9c2b076027..e706c37c80 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -429,7 +429,7 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) { /* Neon load/store multiple structures */ int nregs, interleave, spacing, reg, n; - MemOp endian = s->be_data; + MemOp mop, align, endian; int mmu_idx = get_mem_index(s); int size = a->size; TCGv_i64 tmp64; @@ -473,20 +473,36 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) } /* For our purposes, bytes are always little-endian. */ + endian = s->be_data; if (size == 0) { endian = MO_LE; } + + /* Enforce alignment requested by the instruction */ + if (a->align) { + align = pow2_align(a->align + 2); /* 4 ** a->align */ + } else { + align = s->align_mem ? MO_ALIGN : 0; + } + /* * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ if (interleave == 1 && endian == MO_LE) { + /* Retain any natural alignment. */ + if (align == MO_ALIGN) { + align = pow2_align(size); + } size = 3; } + tmp64 = tcg_temp_new_i64(); addr = tcg_temp_new_i32(); tmp = tcg_const_i32(1 << size); load_reg_var(s, addr, a->rn); + + mop = endian | size | align; for (reg = 0; reg < nregs; reg++) { for (n = 0; n < 8 >> size; n++) { int xs; @@ -494,15 +510,16 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) int tt = a->vd + reg + spacing * xs; if (a->l) { - gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, - endian | size); + gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop); neon_store_element64(tt, n, size, tmp64); } else { neon_load_element64(tmp64, tt, n, size); - gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, - endian | size); + gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop); } tcg_gen_add_i32(addr, addr, tmp); + + /* Subsequent memory operations inherit alignment */ + mop &= ~MO_AMASK; } } } From 88976ff0a4a8b27046df6fd05fb7be70a2f987da Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:50 -0700 Subject: [PATCH 34/43] target/arm: Enforce alignment for VLDn/VSTn (single) Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-25-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-neon.c.inc | 48 ++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index e706c37c80..a02b8369a1 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -629,6 +629,7 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) int nregs = a->n + 1; int vd = a->vd; TCGv_i32 addr, tmp; + MemOp mop; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -678,23 +679,58 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) return true; } + /* Pick up SCTLR settings */ + mop = finalize_memop(s, a->size); + + if (a->align) { + MemOp align_op; + + switch (nregs) { + case 1: + /* For VLD1, use natural alignment. */ + align_op = MO_ALIGN; + break; + case 2: + /* For VLD2, use double alignment. */ + align_op = pow2_align(a->size + 1); + break; + case 4: + if (a->size == MO_32) { + /* + * For VLD4.32, align = 1 is double alignment, align = 2 is + * quad alignment; align = 3 is rejected above. + */ + align_op = pow2_align(a->size + a->align); + } else { + /* For VLD4.8 and VLD.16, we want quad alignment. */ + align_op = pow2_align(a->size + 2); + } + break; + default: + /* For VLD3, the alignment field is zero and rejected above. */ + g_assert_not_reached(); + } + + mop = (mop & ~MO_AMASK) | align_op; + } + tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, a->rn); - /* - * TODO: if we implemented alignment exceptions, we should check - * addr against the alignment encoded in a->align here. - */ + for (reg = 0; reg < nregs; reg++) { if (a->l) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), a->size); + gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop); neon_store_element(vd, a->reg_idx, a->size, tmp); } else { /* Store */ neon_load_element(tmp, vd, a->reg_idx, a->size); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), a->size); + gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop); } vd += a->stride; tcg_gen_addi_i32(addr, addr, 1 << a->size); + + /* Subsequent memory operations inherit alignment */ + mop &= ~MO_AMASK; } tcg_temp_free_i32(addr); tcg_temp_free_i32(tmp); From dc821642296ad0307ae6c0220e4b9ba1ae165d9e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:51 -0700 Subject: [PATCH 35/43] target/arm: Use finalize_memop for aa64 gpr load/store In the case of gpr load, merge the size and is_signed arguments; otherwise, simply convert size to memop. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-26-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 78 ++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 45 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 92a62b1a75..f2995d2b74 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -886,19 +886,19 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) * Store from GPR register to memory. */ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, - TCGv_i64 tcg_addr, int size, int memidx, + TCGv_i64 tcg_addr, MemOp memop, int memidx, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - g_assert(size <= 3); - tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size); + memop = finalize_memop(s, memop); + tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); if (iss_valid) { uint32_t syn; syn = syn_data_abort_with_iss(0, - size, + (memop & MO_SIZE), false, iss_srt, iss_sf, @@ -909,37 +909,28 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, } static void do_gpr_st(DisasContext *s, TCGv_i64 source, - TCGv_i64 tcg_addr, int size, + TCGv_i64 tcg_addr, MemOp memop, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s), + do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), iss_valid, iss_srt, iss_sf, iss_ar); } /* * Load from memory to GPR register */ -static void do_gpr_ld_memidx(DisasContext *s, - TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, - bool extend, int memidx, +static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + MemOp memop, bool extend, int memidx, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - MemOp memop = s->be_data + size; - - g_assert(size <= 3); - - if (is_signed) { - memop += MO_SIGN; - } - + memop = finalize_memop(s, memop); tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); - if (extend && is_signed) { - g_assert(size < 3); + if (extend && (memop & MO_SIGN)) { + g_assert((memop & MO_SIZE) <= MO_32); tcg_gen_ext32u_i64(dest, dest); } @@ -947,8 +938,8 @@ static void do_gpr_ld_memidx(DisasContext *s, uint32_t syn; syn = syn_data_abort_with_iss(0, - size, - is_signed, + (memop & MO_SIZE), + (memop & MO_SIGN) != 0, iss_srt, iss_sf, iss_ar, @@ -957,14 +948,12 @@ static void do_gpr_ld_memidx(DisasContext *s, } } -static void do_gpr_ld(DisasContext *s, - TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, bool extend, +static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + MemOp memop, bool extend, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend, - get_mem_index(s), + do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), iss_valid, iss_srt, iss_sf, iss_ar); } @@ -2717,7 +2706,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt, + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); return; @@ -2830,8 +2819,8 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) /* Only unsigned 32bit loads target 32bit registers. */ bool iss_sf = opc != 0; - do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + false, true, rt, iss_sf, false); } tcg_temp_free_i64(clean_addr); } @@ -2989,11 +2978,11 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) /* Do not modify tcg_rt before recognizing any exception * from the second load. */ - do_gpr_ld(s, tmp, clean_addr, size, is_signed, false, - false, 0, false, false); + do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, + false, false, 0, false, false); tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false, - false, 0, false, false); + do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, + false, false, 0, false, false); tcg_gen_mov_i64(tcg_rt, tmp); tcg_temp_free_i64(tmp); @@ -3124,8 +3113,8 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, iss_valid, rt, iss_sf, false); } else { - do_gpr_ld_memidx(s, tcg_rt, clean_addr, size, - is_signed, is_extended, memidx, + do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + is_extended, memidx, iss_valid, rt, iss_sf, false); } } @@ -3229,9 +3218,8 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size, - is_signed, is_extended, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + is_extended, true, rt, iss_sf, false); } } } @@ -3314,8 +3302,8 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + is_extended, true, rt, iss_sf, false); } } } @@ -3402,7 +3390,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, * full load-acquire (we only need "load-acquire processor consistent"), * but we choose to implement them as full LDAQ. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); return; @@ -3475,7 +3463,7 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, is_wback || rn != 31, size); tcg_rt = cpu_reg(s, rt); - do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false, + do_gpr_ld(s, tcg_rt, clean_addr, size, /* extend */ false, /* iss_valid */ !is_wback, /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); @@ -3560,8 +3548,8 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) * Load-AcquirePC semantics; we implement as the slightly more * restrictive Load-Acquire. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend, - true, rt, iss_sf, true); + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size + is_signed * MO_SIGN, + extend, true, rt, iss_sf, true); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); } } From 4044a3cd1cdf07503b1fe896ca145328dceba435 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:52 -0700 Subject: [PATCH 36/43] target/arm: Use finalize_memop for aa64 fpr load/store For 128-bit load/store, use 16-byte alignment. This requires that we perform the two operations in the correct order so that we generate the alignment fault before modifying memory. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-27-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 42 +++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index f2995d2b74..b90d6880e7 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -963,25 +963,33 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) { /* This writes the bottom N bits of a 128 bit wide vector to memory */ - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64)); + TCGv_i64 tmplo = tcg_temp_new_i64(); + MemOp mop; + + tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); + if (size < 4) { - tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), - s->be_data + size); + mop = finalize_memop(s, size); + tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { bool be = s->be_data == MO_BE; TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); + TCGv_i64 tmphi = tcg_temp_new_i64(); + tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); + + mop = s->be_data | MO_Q; + tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), + mop | (s->align_mem ? MO_ALIGN_16 : 0)); tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s), - s->be_data | MO_Q); - tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx)); - tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s), - s->be_data | MO_Q); + tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, + get_mem_index(s), mop); + tcg_temp_free_i64(tcg_hiaddr); + tcg_temp_free_i64(tmphi); } - tcg_temp_free_i64(tmp); + tcg_temp_free_i64(tmplo); } /* @@ -992,10 +1000,11 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); TCGv_i64 tmphi = NULL; + MemOp mop; if (size < 4) { - MemOp memop = s->be_data + size; - tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop); + mop = finalize_memop(s, size); + tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { bool be = s->be_data == MO_BE; TCGv_i64 tcg_hiaddr; @@ -1003,11 +1012,12 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) tmphi = tcg_temp_new_i64(); tcg_hiaddr = tcg_temp_new_i64(); + mop = s->be_data | MO_Q; + tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), + mop | (s->align_mem ? MO_ALIGN_16 : 0)); tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s), - s->be_data | MO_Q); - tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s), - s->be_data | MO_Q); + tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, + get_mem_index(s), mop); tcg_temp_free_i64(tcg_hiaddr); } From acb07e08d634bc36a18936dd5e2ebc318bcaf3db Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:53 -0700 Subject: [PATCH 37/43] target/arm: Enforce alignment for aa64 load-acq/store-rel Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-28-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index b90d6880e7..ac60dcf760 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -2699,7 +2699,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); - do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, + /* TODO: ARMv8.4-LSE SCTLR.nAA */ + do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); return; @@ -2716,8 +2717,9 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, true, rt, - disas_ldst_compute_iss_sf(size, false, 0), is_lasr); + /* TODO: ARMv8.4-LSE SCTLR.nAA */ + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true, + rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); return; @@ -3505,15 +3507,18 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) int size = extract32(insn, 30, 2); TCGv_i64 clean_addr, dirty_addr; bool is_store = false; - bool is_signed = false; bool extend = false; bool iss_sf; + MemOp mop; if (!dc_isar_feature(aa64_rcpc_8_4, s)) { unallocated_encoding(s); return; } + /* TODO: ARMv8.4-LSE SCTLR.nAA */ + mop = size | MO_ALIGN; + switch (opc) { case 0: /* STLURB */ is_store = true; @@ -3525,21 +3530,21 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - is_signed = true; + mop |= MO_SIGN; break; case 3: /* LDAPURS* 32-bit variant */ if (size > 1) { unallocated_encoding(s); return; } - is_signed = true; + mop |= MO_SIGN; extend = true; /* zero-extend 32->64 after signed load */ break; default: g_assert_not_reached(); } - iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); + iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); if (rn == 31) { gen_check_sp_alignment(s); @@ -3552,13 +3557,13 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) if (is_store) { /* Store-Release semantics */ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, iss_sf, true); + do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); } else { /* * Load-AcquirePC semantics; we implement as the slightly more * restrictive Load-Acquire. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size + is_signed * MO_SIGN, + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, extend, true, rt, iss_sf, true); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); } From a9e89e539ec3a67f5257ce055a8ed38bd58fc89f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:54 -0700 Subject: [PATCH 38/43] target/arm: Use MemOp for size + endian in aa64 vector ld/st Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-29-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index ac60dcf760..d3bda16ecd 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1146,24 +1146,24 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, /* Store from vector register to memory */ static void do_vec_st(DisasContext *s, int srcidx, int element, - TCGv_i64 tcg_addr, int size, MemOp endian) + TCGv_i64 tcg_addr, MemOp mop) { TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); + read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); tcg_temp_free_i64(tcg_tmp); } /* Load from memory to vector register */ static void do_vec_ld(DisasContext *s, int destidx, int element, - TCGv_i64 tcg_addr, int size, MemOp endian) + TCGv_i64 tcg_addr, MemOp mop) { TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); - write_vec_element(s, tcg_tmp, destidx, element, size); + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); + write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); tcg_temp_free_i64(tcg_tmp); } @@ -3734,9 +3734,9 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) for (xs = 0; xs < selem; xs++) { int tt = (rt + r + xs) % 32; if (is_store) { - do_vec_st(s, tt, e, clean_addr, size, endian); + do_vec_st(s, tt, e, clean_addr, size | endian); } else { - do_vec_ld(s, tt, e, clean_addr, size, endian); + do_vec_ld(s, tt, e, clean_addr, size | endian); } tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); } @@ -3885,9 +3885,9 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) } else { /* Load/store one element per register */ if (is_load) { - do_vec_ld(s, rt, index, clean_addr, scale, s->be_data); + do_vec_ld(s, rt, index, clean_addr, scale | s->be_data); } else { - do_vec_st(s, rt, index, clean_addr, scale, s->be_data); + do_vec_st(s, rt, index, clean_addr, scale | s->be_data); } } tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); From c8f638d99aaf8284b9ba81fb49ad6985d109794f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:55 -0700 Subject: [PATCH 39/43] target/arm: Enforce alignment for aa64 vector LDn/STn (multiple) Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-30-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index d3bda16ecd..2a82dbbd6d 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3635,7 +3635,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) bool is_postidx = extract32(insn, 23, 1); bool is_q = extract32(insn, 30, 1); TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; - MemOp endian = s->be_data; + MemOp endian, align, mop; int total; /* total bytes */ int elements; /* elements per vector */ @@ -3703,6 +3703,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) } /* For our purposes, bytes are always little-endian. */ + endian = s->be_data; if (size == 0) { endian = MO_LE; } @@ -3721,11 +3722,17 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ + align = MO_ALIGN; if (selem == 1 && endian == MO_LE) { + align = pow2_align(size); size = 3; } - elements = (is_q ? 16 : 8) >> size; + if (!s->align_mem) { + align = 0; + } + mop = endian | size | align; + elements = (is_q ? 16 : 8) >> size; tcg_ebytes = tcg_const_i64(1 << size); for (r = 0; r < rpt; r++) { int e; @@ -3734,9 +3741,9 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) for (xs = 0; xs < selem; xs++) { int tt = (rt + r + xs) % 32; if (is_store) { - do_vec_st(s, tt, e, clean_addr, size | endian); + do_vec_st(s, tt, e, clean_addr, mop); } else { - do_vec_ld(s, tt, e, clean_addr, size | endian); + do_vec_ld(s, tt, e, clean_addr, mop); } tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); } From 37abe399df6a8b7006a19f3378715b650599e8fa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:56 -0700 Subject: [PATCH 40/43] target/arm: Enforce alignment for aa64 vector LDn/STn (single) Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-31-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 2a82dbbd6d..95897e63af 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3815,6 +3815,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) int index = is_q << 3 | S << 2 | size; int xs, total; TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; + MemOp mop; if (extract32(insn, 31, 1)) { unallocated_encoding(s); @@ -3876,6 +3877,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, total); + mop = finalize_memop(s, scale); tcg_ebytes = tcg_const_i64(1 << scale); for (xs = 0; xs < selem; xs++) { @@ -3883,8 +3885,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) /* Load and replicate to all elements */ TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, - get_mem_index(s), s->be_data + scale); + tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), (is_q + 1) * 8, vec_full_reg_size(s), tcg_tmp); @@ -3892,9 +3893,9 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) } else { /* Load/store one element per register */ if (is_load) { - do_vec_ld(s, rt, index, clean_addr, scale | s->be_data); + do_vec_ld(s, rt, index, clean_addr, mop); } else { - do_vec_st(s, rt, index, clean_addr, scale | s->be_data); + do_vec_st(s, rt, index, clean_addr, mop); } } tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); From 0ca0f8720a424a643d33cce802a4b769fbb62836 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Apr 2021 13:22:57 -0700 Subject: [PATCH 41/43] target/arm: Enforce alignment for sve LD1R Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210419202257.161730-32-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-sve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 584c4d047c..864ed669c4 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -5001,7 +5001,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) clean_addr = gen_mte_check1(s, temp, false, true, msz); tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), - s->be_data | dtype_mop[a->dtype]); + finalize_memop(s, dtype_mop[a->dtype])); /* Broadcast to *all* elements. */ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), From da7e13c00b5962016b9c72079bef5e0a5398db0d Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 31 Mar 2021 13:19:00 +0200 Subject: [PATCH 42/43] hw: add compat machines for 6.1 Add 6.1 machine types for arm/i440fx/q35/s390x/spapr. Signed-off-by: Cornelia Huck Acked-by: Greg Kurz Message-id: 20210331111900.118274-1-cohuck@redhat.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/arm/virt.c | 7 ++++++- hw/core/machine.c | 3 +++ hw/i386/pc.c | 3 +++ hw/i386/pc_piix.c | 14 +++++++++++++- hw/i386/pc_q35.c | 13 ++++++++++++- hw/ppc/spapr.c | 17 ++++++++++++++--- hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- include/hw/boards.h | 3 +++ include/hw/i386/pc.h | 3 +++ 9 files changed, 70 insertions(+), 7 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 9f01d9041b..fee696fb0e 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2757,10 +2757,15 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +static void virt_machine_6_1_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE_AS_LATEST(6, 1) + static void virt_machine_6_0_options(MachineClass *mc) { } -DEFINE_VIRT_MACHINE_AS_LATEST(6, 0) +DEFINE_VIRT_MACHINE(6, 0) static void virt_machine_5_2_options(MachineClass *mc) { diff --git a/hw/core/machine.c b/hw/core/machine.c index 40def78183..cebcdcc351 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -36,6 +36,9 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" +GlobalProperty hw_compat_6_0[] = {}; +const size_t hw_compat_6_0_len = G_N_ELEMENTS(hw_compat_6_0); + GlobalProperty hw_compat_5_2[] = { { "ICH9-LPC", "smm-compat", "on"}, { "PIIX4_PM", "smm-compat", "on"}, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8a84b25a03..364816efc9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -96,6 +96,9 @@ #include "trace.h" #include CONFIG_DEVICES +GlobalProperty pc_compat_6_0[] = {}; +const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0); + GlobalProperty pc_compat_5_2[] = { { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 46cc951073..4e8edffeaf 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -415,7 +415,7 @@ static void pc_i440fx_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); } -static void pc_i440fx_6_0_machine_options(MachineClass *m) +static void pc_i440fx_6_1_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_machine_options(m); @@ -424,6 +424,18 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) pcmc->default_cpu_version = 1; } +DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL, + pc_i440fx_6_1_machine_options); + +static void pc_i440fx_6_0_machine_options(MachineClass *m) +{ + pc_i440fx_6_1_machine_options(m); + m->alias = NULL; + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len); + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); +} + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, pc_i440fx_6_0_machine_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 53450190f5..458ed41c65 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -345,7 +345,7 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_6_0_machine_options(MachineClass *m) +static void pc_q35_6_1_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_machine_options(m); @@ -353,6 +353,17 @@ static void pc_q35_6_0_machine_options(MachineClass *m) pcmc->default_cpu_version = 1; } +DEFINE_Q35_MACHINE(v6_1, "pc-q35-6.1", NULL, + pc_q35_6_1_machine_options); + +static void pc_q35_6_0_machine_options(MachineClass *m) +{ + pc_q35_6_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len); + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); +} + DEFINE_Q35_MACHINE(v6_0, "pc-q35-6.0", NULL, pc_q35_6_0_machine_options); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e4be00b732..529ff056dd 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4605,14 +4605,25 @@ static void spapr_machine_latest_class_options(MachineClass *mc) type_init(spapr_machine_register_##suffix) /* - * pseries-6.0 + * pseries-6.1 */ -static void spapr_machine_6_0_class_options(MachineClass *mc) +static void spapr_machine_6_1_class_options(MachineClass *mc) { /* Defaults for the latest behaviour inherited from the base class */ } -DEFINE_SPAPR_MACHINE(6_0, "6.0", true); +DEFINE_SPAPR_MACHINE(6_1, "6.1", true); + +/* + * pseries-6.0 + */ +static void spapr_machine_6_0_class_options(MachineClass *mc) +{ + spapr_machine_6_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); +} + +DEFINE_SPAPR_MACHINE(6_0, "6.0", false); /* * pseries-5.2 diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 2972b607f3..56b52d2d30 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -795,14 +795,26 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_6_1_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_6_1_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(6_1, "6.1", true); + static void ccw_machine_6_0_instance_options(MachineState *machine) { + ccw_machine_6_1_instance_options(machine); } static void ccw_machine_6_0_class_options(MachineClass *mc) { + ccw_machine_6_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); } -DEFINE_CCW_MACHINE(6_0, "6.0", true); +DEFINE_CCW_MACHINE(6_0, "6.0", false); static void ccw_machine_5_2_instance_options(MachineState *machine) { diff --git a/include/hw/boards.h b/include/hw/boards.h index ad6c8fd537..3d55d2bd62 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -353,6 +353,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_6_0[]; +extern const size_t hw_compat_6_0_len; + extern GlobalProperty hw_compat_5_2[]; extern const size_t hw_compat_5_2_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index dcf060b791..1522a3359a 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -197,6 +197,9 @@ bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, const CPUArchIdList *apic_ids, GArray *entry); +extern GlobalProperty pc_compat_6_0[]; +extern const size_t pc_compat_6_0_len; + extern GlobalProperty pc_compat_5_2[]; extern const size_t pc_compat_5_2_len; From a6091108aa44e9017af4ca13c43f55a629e3744c Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 25 Mar 2021 16:33:15 +0000 Subject: [PATCH 43/43] hw/pci-host/gpex: Don't fault for unmapped parts of MMIO and PIO windows Currently the gpex PCI controller implements no special behaviour for guest accesses to areas of the PIO and MMIO where it has not mapped any PCI devices, which means that for Arm you end up with a CPU exception due to a data abort. Most host OSes expect "like an x86 PC" behaviour, where bad accesses like this return -1 for reads and ignore writes. In the interests of not being surprising, make host CPU accesses to these windows behave as -1/discard where there's no mapped PCI device. The old behaviour generally didn't cause any problems, because almost always the guest OS will map the PCI devices and then only access where it has mapped them. One corner case where you will see this kind of access is if Linux attempts to probe legacy ISA devices via a PIO window access. So far the only case where we've seen this has been via the syzkaller fuzzer. Reported-by: Dmitry Vyukov Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Acked-by: Michael S. Tsirkin Message-id: 20210325163315.27724-1-peter.maydell@linaro.org Fixes: https://bugs.launchpad.net/qemu/+bug/1918917 Signed-off-by: Peter Maydell --- hw/core/machine.c | 4 ++- hw/pci-host/gpex.c | 56 ++++++++++++++++++++++++++++++++++++-- include/hw/pci-host/gpex.h | 4 +++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index cebcdcc351..0f5ce43d0c 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -36,7 +36,9 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" -GlobalProperty hw_compat_6_0[] = {}; +GlobalProperty hw_compat_6_0[] = { + { "gpex-pcihost", "allow-unmapped-accesses", "false" }, +}; const size_t hw_compat_6_0_len = G_N_ELEMENTS(hw_compat_6_0); GlobalProperty hw_compat_5_2[] = { diff --git a/hw/pci-host/gpex.c b/hw/pci-host/gpex.c index 2bdbe7b456..a6752fac5e 100644 --- a/hw/pci-host/gpex.c +++ b/hw/pci-host/gpex.c @@ -83,12 +83,51 @@ static void gpex_host_realize(DeviceState *dev, Error **errp) int i; pcie_host_mmcfg_init(pex, PCIE_MMCFG_SIZE_MAX); + sysbus_init_mmio(sbd, &pex->mmio); + + /* + * Note that the MemoryRegions io_mmio and io_ioport that we pass + * to pci_register_root_bus() are not the same as the + * MemoryRegions io_mmio_window and io_ioport_window that we + * expose as SysBus MRs. The difference is in the behaviour of + * accesses to addresses where no PCI device has been mapped. + * + * io_mmio and io_ioport are the underlying PCI view of the PCI + * address space, and when a PCI device does a bus master access + * to a bad address this is reported back to it as a transaction + * failure. + * + * io_mmio_window and io_ioport_window implement "unmapped + * addresses read as -1 and ignore writes"; this is traditional + * x86 PC behaviour, which is not mandated by the PCI spec proper + * but expected by much PCI-using guest software, including Linux. + * + * In the interests of not being unnecessarily surprising, we + * implement it in the gpex PCI host controller, by providing the + * _window MRs, which are containers with io ops that implement + * the 'background' behaviour and which hold the real PCI MRs as + * subregions. + */ memory_region_init(&s->io_mmio, OBJECT(s), "gpex_mmio", UINT64_MAX); memory_region_init(&s->io_ioport, OBJECT(s), "gpex_ioport", 64 * 1024); - sysbus_init_mmio(sbd, &pex->mmio); - sysbus_init_mmio(sbd, &s->io_mmio); - sysbus_init_mmio(sbd, &s->io_ioport); + if (s->allow_unmapped_accesses) { + memory_region_init_io(&s->io_mmio_window, OBJECT(s), + &unassigned_io_ops, OBJECT(s), + "gpex_mmio_window", UINT64_MAX); + memory_region_init_io(&s->io_ioport_window, OBJECT(s), + &unassigned_io_ops, OBJECT(s), + "gpex_ioport_window", 64 * 1024); + + memory_region_add_subregion(&s->io_mmio_window, 0, &s->io_mmio); + memory_region_add_subregion(&s->io_ioport_window, 0, &s->io_ioport); + sysbus_init_mmio(sbd, &s->io_mmio_window); + sysbus_init_mmio(sbd, &s->io_ioport_window); + } else { + sysbus_init_mmio(sbd, &s->io_mmio); + sysbus_init_mmio(sbd, &s->io_ioport); + } + for (i = 0; i < GPEX_NUM_IRQS; i++) { sysbus_init_irq(sbd, &s->irq[i]); s->irq_num[i] = -1; @@ -108,6 +147,16 @@ static const char *gpex_host_root_bus_path(PCIHostState *host_bridge, return "0000:00"; } +static Property gpex_host_properties[] = { + /* + * Permit CPU accesses to unmapped areas of the PIO and MMIO windows + * (discarding writes and returning -1 for reads) rather than aborting. + */ + DEFINE_PROP_BOOL("allow-unmapped-accesses", GPEXHost, + allow_unmapped_accesses, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void gpex_host_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -117,6 +166,7 @@ static void gpex_host_class_init(ObjectClass *klass, void *data) dc->realize = gpex_host_realize; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->fw_name = "pci"; + device_class_set_props(dc, gpex_host_properties); } static void gpex_host_initfn(Object *obj) diff --git a/include/hw/pci-host/gpex.h b/include/hw/pci-host/gpex.h index d48a020a95..fcf8b63820 100644 --- a/include/hw/pci-host/gpex.h +++ b/include/hw/pci-host/gpex.h @@ -49,8 +49,12 @@ struct GPEXHost { MemoryRegion io_ioport; MemoryRegion io_mmio; + MemoryRegion io_ioport_window; + MemoryRegion io_mmio_window; qemu_irq irq[GPEX_NUM_IRQS]; int irq_num[GPEX_NUM_IRQS]; + + bool allow_unmapped_accesses; }; struct GPEXConfig {