mirror of https://github.com/xemu-project/xemu.git
Improvements to 128-bit atomics:
- Separate __int128_t type and arithmetic detection - Support 128-bit load/store in backend for i386, aarch64, ppc64, s390x - Accelerate atomics via host/include/ Decodetree: - Add named field syntax - Move tests to meson -----BEGIN PGP SIGNATURE----- iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmR2R10dHHJpY2hhcmQu aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/bsgf/XLi8q+ITyoEAKwG4 6ML7DktLAdIs9Euah9twqe16U0BM0YzpKfymBfVVBKKaIa0524N4ZKIT3h6EeJo+ f+ultqrpsnH+aQh4wc3ZCkEvRdhzhFT8VcoRTunJuJrbL3Y8n2ZSgODUL2a0tahT Nn+zEPm8rzQanSKQHq5kyNBLpgTUKjc5wKfvy/WwttnFmkTnqzcuEA6nPVOVwOHC lZBQCByIQWsHfFHUVJFvsFzBQbm0mAiW6FNKzPBkoXon0h/UZUI1lV+xXzgutFs+ zR2O8IZwLYRu2wOWiTF8Nn2qQafkB3Dhwoq3JTEXhOqosOPExbIiWlsZDlPiKRJk bwmQlg== =XQMb -----END PGP SIGNATURE----- Merge tag 'pull-tcg-20230530' of https://gitlab.com/rth7680/qemu into staging Improvements to 128-bit atomics: - Separate __int128_t type and arithmetic detection - Support 128-bit load/store in backend for i386, aarch64, ppc64, s390x - Accelerate atomics via host/include/ Decodetree: - Add named field syntax - Move tests to meson # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmR2R10dHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/bsgf/XLi8q+ITyoEAKwG4 # 6ML7DktLAdIs9Euah9twqe16U0BM0YzpKfymBfVVBKKaIa0524N4ZKIT3h6EeJo+ # f+ultqrpsnH+aQh4wc3ZCkEvRdhzhFT8VcoRTunJuJrbL3Y8n2ZSgODUL2a0tahT # Nn+zEPm8rzQanSKQHq5kyNBLpgTUKjc5wKfvy/WwttnFmkTnqzcuEA6nPVOVwOHC # lZBQCByIQWsHfFHUVJFvsFzBQbm0mAiW6FNKzPBkoXon0h/UZUI1lV+xXzgutFs+ # zR2O8IZwLYRu2wOWiTF8Nn2qQafkB3Dhwoq3JTEXhOqosOPExbIiWlsZDlPiKRJk # bwmQlg== # =XQMb # -----END PGP SIGNATURE----- # gpg: Signature made Tue 30 May 2023 11:58:37 AM PDT # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate] * tag 'pull-tcg-20230530' of https://gitlab.com/rth7680/qemu: (27 commits) tests/decode: Add tests for various named-field cases scripts/decodetree: Implement named field support scripts/decodetree: Implement a topological sort scripts/decodetree: Pass lvalue-formatter function to str_extract() docs: Document decodetree named field syntax tests/decode: Convert tests to meson decodetree: Do not remove output_file from /dev decodetree: Diagnose empty pattern group decodetree: Fix recursion in prop_format and build_tree decodetree: Add --test-for-error tcg: Remove TCG_TARGET_TLB_DISPLACEMENT_BITS accel/tcg: Add aarch64 store_atom_insert_al16 accel/tcg: Add aarch64 lse2 load_atom_extract_al16_or_al8 accel/tcg: Add x86_64 load_atom_extract_al16_or_al8 accel/tcg: Extract store_atom_insert_al16 to host header accel/tcg: Extract load_atom_extract_al16_or_al8 to host header tcg/s390x: Support 128-bit load/store tcg/ppc: Support 128-bit load/store tcg/aarch64: Support 128-bit load/store tcg/aarch64: Simplify constraints on qemu_ld/st ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
51bdb0b57a
|
@ -9,6 +9,9 @@
|
|||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "host/load-extract-al16-al8.h"
|
||||
#include "host/store-insert-al16.h"
|
||||
|
||||
#ifdef CONFIG_ATOMIC64
|
||||
# define HAVE_al8 true
|
||||
#else
|
||||
|
@ -156,7 +159,7 @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
|
|||
* another process, because the fallback start_exclusive solution
|
||||
* provides no protection across processes.
|
||||
*/
|
||||
if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) {
|
||||
if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
|
||||
uint64_t *p = __builtin_assume_aligned(pv, 8);
|
||||
return *p;
|
||||
}
|
||||
|
@ -191,7 +194,7 @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
|
|||
* another process, because the fallback start_exclusive solution
|
||||
* provides no protection across processes.
|
||||
*/
|
||||
if (!page_check_range(h2g(p), 16, PAGE_WRITE)) {
|
||||
if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
|
||||
return *p;
|
||||
}
|
||||
#endif
|
||||
|
@ -311,40 +314,6 @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
|
|||
return int128_getlo(r);
|
||||
}
|
||||
|
||||
/**
|
||||
* load_atom_extract_al16_or_al8:
|
||||
* @p: host address
|
||||
* @s: object size in bytes, @s <= 8.
|
||||
*
|
||||
* Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not
|
||||
* cross an 16-byte boundary then the access must be 16-byte atomic,
|
||||
* otherwise the access must be 8-byte atomic.
|
||||
*/
|
||||
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
|
||||
load_atom_extract_al16_or_al8(void *pv, int s)
|
||||
{
|
||||
uintptr_t pi = (uintptr_t)pv;
|
||||
int o = pi & 7;
|
||||
int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
|
||||
Int128 r;
|
||||
|
||||
pv = (void *)(pi & ~7);
|
||||
if (pi & 8) {
|
||||
uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
|
||||
uint64_t a = qatomic_read__nocheck(p8);
|
||||
uint64_t b = qatomic_read__nocheck(p8 + 1);
|
||||
|
||||
if (HOST_BIG_ENDIAN) {
|
||||
r = int128_make128(b, a);
|
||||
} else {
|
||||
r = int128_make128(a, b);
|
||||
}
|
||||
} else {
|
||||
r = atomic16_read_ro(pv);
|
||||
}
|
||||
return int128_getlo(int128_urshift(r, shr));
|
||||
}
|
||||
|
||||
/**
|
||||
* load_atom_4_by_2:
|
||||
* @pv: host address
|
||||
|
@ -713,45 +682,6 @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
|
|||
__ATOMIC_RELAXED, __ATOMIC_RELAXED));
|
||||
}
|
||||
|
||||
/**
|
||||
* store_atom_insert_al16:
|
||||
* @p: host address
|
||||
* @val: shifted value to store
|
||||
* @msk: mask for value to store
|
||||
*
|
||||
* Atomically store @val to @p masked by @msk.
|
||||
*/
|
||||
static void ATTRIBUTE_ATOMIC128_OPT
|
||||
store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
|
||||
{
|
||||
#if defined(CONFIG_ATOMIC128)
|
||||
__uint128_t *pu, old, new;
|
||||
|
||||
/* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
|
||||
pu = __builtin_assume_aligned(ps, 16);
|
||||
old = *pu;
|
||||
do {
|
||||
new = (old & ~msk.u) | val.u;
|
||||
} while (!__atomic_compare_exchange_n(pu, &old, new, true,
|
||||
__ATOMIC_RELAXED, __ATOMIC_RELAXED));
|
||||
#elif defined(CONFIG_CMPXCHG128)
|
||||
__uint128_t *pu, old, new;
|
||||
|
||||
/*
|
||||
* Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
|
||||
* defer to libatomic, so we must use __sync_*_compare_and_swap_16
|
||||
* and accept the sequential consistency that comes with it.
|
||||
*/
|
||||
pu = __builtin_assume_aligned(ps, 16);
|
||||
do {
|
||||
old = *pu;
|
||||
new = (old & ~msk.u) | val.u;
|
||||
} while (!__sync_bool_compare_and_swap_16(pu, old, new));
|
||||
#else
|
||||
qemu_build_not_reached();
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* store_bytes_leN:
|
||||
* @pv: host address
|
||||
|
|
|
@ -23,22 +23,42 @@ Fields
|
|||
|
||||
Syntax::
|
||||
|
||||
field_def := '%' identifier ( unnamed_field )* ( !function=identifier )?
|
||||
field_def := '%' identifier ( field )* ( !function=identifier )?
|
||||
field := unnamed_field | named_field
|
||||
unnamed_field := number ':' ( 's' ) number
|
||||
named_field := identifier ':' ( 's' ) number
|
||||
|
||||
For *unnamed_field*, the first number is the least-significant bit position
|
||||
of the field and the second number is the length of the field. If the 's' is
|
||||
present, the field is considered signed. If multiple ``unnamed_fields`` are
|
||||
present, they are concatenated. In this way one can define disjoint fields.
|
||||
present, the field is considered signed.
|
||||
|
||||
A *named_field* refers to some other field in the instruction pattern
|
||||
or format. Regardless of the length of the other field where it is
|
||||
defined, it will be inserted into this field with the specified
|
||||
signedness and bit width.
|
||||
|
||||
Field definitions that involve loops (i.e. where a field is defined
|
||||
directly or indirectly in terms of itself) are errors.
|
||||
|
||||
A format can include fields that refer to named fields that are
|
||||
defined in the instruction pattern(s) that use the format.
|
||||
Conversely, an instruction pattern can include fields that refer to
|
||||
named fields that are defined in the format it uses. However you
|
||||
cannot currently do both at once (i.e. pattern P uses format F; F has
|
||||
a field A that refers to a named field B that is defined in P, and P
|
||||
has a field C that refers to a named field D that is defined in F).
|
||||
|
||||
If multiple ``fields`` are present, they are concatenated.
|
||||
In this way one can define disjoint fields.
|
||||
|
||||
If ``!function`` is specified, the concatenated result is passed through the
|
||||
named function, taking and returning an integral value.
|
||||
|
||||
One may use ``!function`` with zero ``unnamed_fields``. This case is called
|
||||
One may use ``!function`` with zero ``fields``. This case is called
|
||||
a *parameter*, and the named function is only passed the ``DisasContext``
|
||||
and returns an integral value extracted from there.
|
||||
|
||||
A field with no ``unnamed_fields`` and no ``!function`` is in error.
|
||||
A field with no ``fields`` and no ``!function`` is in error.
|
||||
|
||||
Field examples:
|
||||
|
||||
|
@ -56,6 +76,9 @@ Field examples:
|
|||
| %shimm8 5:s8 13:1 | expand_shimm8(sextract(i, 5, 8) << 1 | |
|
||||
| !function=expand_shimm8 | extract(i, 13, 1)) |
|
||||
+---------------------------+---------------------------------------------+
|
||||
| %sz_imm 10:2 sz:3 | expand_sz_imm(extract(i, 10, 2) << 3 | |
|
||||
| !function=expand_sz_imm | extract(a->sz, 0, 3)) |
|
||||
+---------------------------+---------------------------------------------+
|
||||
|
||||
Argument Sets
|
||||
=============
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Atomic extract 64 from 128-bit, AArch64 version.
|
||||
*
|
||||
* Copyright (C) 2023 Linaro, Ltd.
|
||||
*/
|
||||
|
||||
#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H
|
||||
#define AARCH64_LOAD_EXTRACT_AL16_AL8_H
|
||||
|
||||
#include "host/cpuinfo.h"
|
||||
#include "tcg/debug-assert.h"
|
||||
|
||||
/**
|
||||
* load_atom_extract_al16_or_al8:
|
||||
* @pv: host address
|
||||
* @s: object size in bytes, @s <= 8.
|
||||
*
|
||||
* Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
|
||||
* cross an 16-byte boundary then the access must be 16-byte atomic,
|
||||
* otherwise the access must be 8-byte atomic.
|
||||
*/
|
||||
static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
|
||||
{
|
||||
uintptr_t pi = (uintptr_t)pv;
|
||||
__int128_t *ptr_align = (__int128_t *)(pi & ~7);
|
||||
int shr = (pi & 7) * 8;
|
||||
uint64_t l, h;
|
||||
|
||||
/*
|
||||
* With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned
|
||||
* and single-copy atomic on the parts if 8-byte aligned.
|
||||
* All we need do is align the pointer mod 8.
|
||||
*/
|
||||
tcg_debug_assert(HAVE_ATOMIC128_RO);
|
||||
asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align));
|
||||
return (l >> shr) | (h << (-shr & 63));
|
||||
}
|
||||
|
||||
#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Atomic store insert into 128-bit, AArch64 version.
|
||||
*
|
||||
* Copyright (C) 2023 Linaro, Ltd.
|
||||
*/
|
||||
|
||||
#ifndef AARCH64_STORE_INSERT_AL16_H
|
||||
#define AARCH64_STORE_INSERT_AL16_H
|
||||
|
||||
/**
|
||||
* store_atom_insert_al16:
|
||||
* @p: host address
|
||||
* @val: shifted value to store
|
||||
* @msk: mask for value to store
|
||||
*
|
||||
* Atomically store @val to @p masked by @msk.
|
||||
*/
|
||||
static inline void ATTRIBUTE_ATOMIC128_OPT
|
||||
store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
|
||||
{
|
||||
/*
|
||||
* GCC only implements __sync* primitives for int128 on aarch64.
|
||||
* We can do better without the barriers, and integrating the
|
||||
* arithmetic into the load-exclusive/store-conditional pair.
|
||||
*/
|
||||
uint64_t tl, th, vl, vh, ml, mh;
|
||||
uint32_t fail;
|
||||
|
||||
qemu_build_assert(!HOST_BIG_ENDIAN);
|
||||
vl = int128_getlo(val);
|
||||
vh = int128_gethi(val);
|
||||
ml = int128_getlo(msk);
|
||||
mh = int128_gethi(msk);
|
||||
|
||||
asm("0: ldxp %[l], %[h], %[mem]\n\t"
|
||||
"bic %[l], %[l], %[ml]\n\t"
|
||||
"bic %[h], %[h], %[mh]\n\t"
|
||||
"orr %[l], %[l], %[vl]\n\t"
|
||||
"orr %[h], %[h], %[vh]\n\t"
|
||||
"stxp %w[f], %[l], %[h], %[mem]\n\t"
|
||||
"cbnz %w[f], 0b\n"
|
||||
: [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th)
|
||||
: [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh));
|
||||
}
|
||||
|
||||
#endif /* AARCH64_STORE_INSERT_AL16_H */
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Atomic extract 64 from 128-bit, generic version.
|
||||
*
|
||||
* Copyright (C) 2023 Linaro, Ltd.
|
||||
*/
|
||||
|
||||
#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H
|
||||
#define HOST_LOAD_EXTRACT_AL16_AL8_H
|
||||
|
||||
/**
|
||||
* load_atom_extract_al16_or_al8:
|
||||
* @pv: host address
|
||||
* @s: object size in bytes, @s <= 8.
|
||||
*
|
||||
* Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
|
||||
* cross an 16-byte boundary then the access must be 16-byte atomic,
|
||||
* otherwise the access must be 8-byte atomic.
|
||||
*/
|
||||
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
|
||||
load_atom_extract_al16_or_al8(void *pv, int s)
|
||||
{
|
||||
uintptr_t pi = (uintptr_t)pv;
|
||||
int o = pi & 7;
|
||||
int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
|
||||
Int128 r;
|
||||
|
||||
pv = (void *)(pi & ~7);
|
||||
if (pi & 8) {
|
||||
uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
|
||||
uint64_t a = qatomic_read__nocheck(p8);
|
||||
uint64_t b = qatomic_read__nocheck(p8 + 1);
|
||||
|
||||
if (HOST_BIG_ENDIAN) {
|
||||
r = int128_make128(b, a);
|
||||
} else {
|
||||
r = int128_make128(a, b);
|
||||
}
|
||||
} else {
|
||||
r = atomic16_read_ro(pv);
|
||||
}
|
||||
return int128_getlo(int128_urshift(r, shr));
|
||||
}
|
||||
|
||||
#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Atomic store insert into 128-bit, generic version.
|
||||
*
|
||||
* Copyright (C) 2023 Linaro, Ltd.
|
||||
*/
|
||||
|
||||
#ifndef HOST_STORE_INSERT_AL16_H
|
||||
#define HOST_STORE_INSERT_AL16_H
|
||||
|
||||
/**
|
||||
* store_atom_insert_al16:
|
||||
* @p: host address
|
||||
* @val: shifted value to store
|
||||
* @msk: mask for value to store
|
||||
*
|
||||
* Atomically store @val to @p masked by @msk.
|
||||
*/
|
||||
static inline void ATTRIBUTE_ATOMIC128_OPT
|
||||
store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
|
||||
{
|
||||
#if defined(CONFIG_ATOMIC128)
|
||||
__uint128_t *pu;
|
||||
Int128Alias old, new;
|
||||
|
||||
/* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
|
||||
pu = __builtin_assume_aligned(ps, 16);
|
||||
old.u = *pu;
|
||||
msk = int128_not(msk);
|
||||
do {
|
||||
new.s = int128_and(old.s, msk);
|
||||
new.s = int128_or(new.s, val);
|
||||
} while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true,
|
||||
__ATOMIC_RELAXED, __ATOMIC_RELAXED));
|
||||
#else
|
||||
Int128 old, new, cmp;
|
||||
|
||||
ps = __builtin_assume_aligned(ps, 16);
|
||||
old = *ps;
|
||||
msk = int128_not(msk);
|
||||
do {
|
||||
cmp = old;
|
||||
new = int128_and(old, msk);
|
||||
new = int128_or(new, val);
|
||||
old = atomic16_cmpxchg(ps, cmp, new);
|
||||
} while (int128_ne(cmp, old));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* HOST_STORE_INSERT_AL16_H */
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Load/store for 128-bit atomic operations, x86_64 version.
|
||||
*
|
||||
* Copyright (C) 2023 Linaro, Ltd.
|
||||
*
|
||||
* See docs/devel/atomics.rst for discussion about the guarantees each
|
||||
* atomic primitive is meant to provide.
|
||||
*/
|
||||
|
||||
#ifndef AARCH64_ATOMIC128_LDST_H
|
||||
#define AARCH64_ATOMIC128_LDST_H
|
||||
|
||||
#ifdef CONFIG_INT128_TYPE
|
||||
#include "host/cpuinfo.h"
|
||||
#include "tcg/debug-assert.h"
|
||||
|
||||
/*
|
||||
* Through clang 16, with -mcx16, __atomic_load_n is incorrectly
|
||||
* expanded to a read-write operation: lock cmpxchg16b.
|
||||
*/
|
||||
|
||||
#define HAVE_ATOMIC128_RO likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
|
||||
#define HAVE_ATOMIC128_RW 1
|
||||
|
||||
static inline Int128 atomic16_read_ro(const Int128 *ptr)
|
||||
{
|
||||
Int128Alias r;
|
||||
|
||||
tcg_debug_assert(HAVE_ATOMIC128_RO);
|
||||
asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr));
|
||||
|
||||
return r.s;
|
||||
}
|
||||
|
||||
static inline Int128 atomic16_read_rw(Int128 *ptr)
|
||||
{
|
||||
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
|
||||
Int128Alias r;
|
||||
|
||||
if (HAVE_ATOMIC128_RO) {
|
||||
asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
|
||||
} else {
|
||||
r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0);
|
||||
}
|
||||
return r.s;
|
||||
}
|
||||
|
||||
static inline void atomic16_set(Int128 *ptr, Int128 val)
|
||||
{
|
||||
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
|
||||
Int128Alias new = { .s = val };
|
||||
|
||||
if (HAVE_ATOMIC128_RO) {
|
||||
asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i));
|
||||
} else {
|
||||
__int128_t old;
|
||||
do {
|
||||
old = *ptr_align;
|
||||
} while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* Provide QEMU_ERROR stubs. */
|
||||
#include "host/include/generic/host/atomic128-ldst.h"
|
||||
#endif
|
||||
|
||||
#endif /* AARCH64_ATOMIC128_LDST_H */
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Atomic extract 64 from 128-bit, x86_64 version.
|
||||
*
|
||||
* Copyright (C) 2023 Linaro, Ltd.
|
||||
*/
|
||||
|
||||
#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H
|
||||
#define X86_64_LOAD_EXTRACT_AL16_AL8_H
|
||||
|
||||
#ifdef CONFIG_INT128_TYPE
|
||||
#include "host/cpuinfo.h"
|
||||
|
||||
/**
|
||||
* load_atom_extract_al16_or_al8:
|
||||
* @pv: host address
|
||||
* @s: object size in bytes, @s <= 8.
|
||||
*
|
||||
* Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
|
||||
* cross an 16-byte boundary then the access must be 16-byte atomic,
|
||||
* otherwise the access must be 8-byte atomic.
|
||||
*/
|
||||
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
|
||||
load_atom_extract_al16_or_al8(void *pv, int s)
|
||||
{
|
||||
uintptr_t pi = (uintptr_t)pv;
|
||||
__int128_t *ptr_align = (__int128_t *)(pi & ~7);
|
||||
int shr = (pi & 7) * 8;
|
||||
Int128Alias r;
|
||||
|
||||
/*
|
||||
* ptr_align % 16 is now only 0 or 8.
|
||||
* If the host supports atomic loads with VMOVDQU, then always use that,
|
||||
* making the branch highly predictable. Otherwise we must use VMOVDQA
|
||||
* when ptr_align % 16 == 0 for 16-byte atomicity.
|
||||
*/
|
||||
if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) {
|
||||
asm("vmovdqu %1, %0" : "=x" (r.i) : "m" (*ptr_align));
|
||||
} else {
|
||||
asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
|
||||
}
|
||||
return int128_getlo(int128_urshift(r.s, shr));
|
||||
}
|
||||
#else
|
||||
/* Fallback definition that must be optimized away, or error. */
|
||||
uint64_t QEMU_ERROR("unsupported atomic")
|
||||
load_atom_extract_al16_or_al8(void *pv, int s);
|
||||
#endif
|
||||
|
||||
#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */
|
|
@ -481,7 +481,7 @@ static inline void bswap128s(Int128 *s)
|
|||
* a possible structure and the native types. Ease parameter passing
|
||||
* via use of the transparent union extension.
|
||||
*/
|
||||
#ifdef CONFIG_INT128
|
||||
#ifdef CONFIG_INT128_TYPE
|
||||
typedef union {
|
||||
__uint128_t u;
|
||||
__int128_t i;
|
||||
|
@ -489,6 +489,6 @@ typedef union {
|
|||
} Int128Alias __attribute__((transparent_union));
|
||||
#else
|
||||
typedef Int128 Int128Alias;
|
||||
#endif /* CONFIG_INT128 */
|
||||
#endif /* CONFIG_INT128_TYPE */
|
||||
|
||||
#endif /* INT128_H */
|
||||
|
|
15
meson.build
15
meson.build
|
@ -2543,7 +2543,13 @@ config_host_data.set('CONFIG_ATOMIC64', cc.links('''
|
|||
return 0;
|
||||
}'''))
|
||||
|
||||
has_int128 = cc.links('''
|
||||
has_int128_type = cc.compiles('''
|
||||
__int128_t a;
|
||||
__uint128_t b;
|
||||
int main(void) { b = a; }''')
|
||||
config_host_data.set('CONFIG_INT128_TYPE', has_int128_type)
|
||||
|
||||
has_int128 = has_int128_type and cc.links('''
|
||||
__int128_t a;
|
||||
__uint128_t b;
|
||||
int main (void) {
|
||||
|
@ -2552,10 +2558,9 @@ has_int128 = cc.links('''
|
|||
a = a * a;
|
||||
return 0;
|
||||
}''')
|
||||
|
||||
config_host_data.set('CONFIG_INT128', has_int128)
|
||||
|
||||
if has_int128
|
||||
if has_int128_type
|
||||
# "do we have 128-bit atomics which are handled inline and specifically not
|
||||
# via libatomic". The reason we can't use libatomic is documented in the
|
||||
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
|
||||
|
@ -2564,7 +2569,7 @@ if has_int128
|
|||
# __alignof(unsigned __int128) for the host.
|
||||
atomic_test_128 = '''
|
||||
int main(int ac, char **av) {
|
||||
unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
|
||||
__uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16);
|
||||
p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
|
||||
__atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
|
||||
__atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
||||
|
@ -2586,7 +2591,7 @@ if has_int128
|
|||
config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
|
||||
int main(void)
|
||||
{
|
||||
unsigned __int128 x = 0, y = 0;
|
||||
__uint128_t x = 0, y = 0;
|
||||
__sync_val_compare_and_swap_16(&x, y, x);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ arguments = {}
|
|||
formats = {}
|
||||
allpatterns = []
|
||||
anyextern = False
|
||||
testforerror = False
|
||||
|
||||
translate_prefix = 'trans'
|
||||
translate_scope = 'static '
|
||||
|
@ -53,6 +54,80 @@ re_fld_ident = '%[a-zA-Z0-9_]*'
|
|||
re_fmt_ident = '@[a-zA-Z0-9_]*'
|
||||
re_pat_ident = '[a-zA-Z0-9_]*'
|
||||
|
||||
# Local implementation of a topological sort. We use the same API that
|
||||
# the Python graphlib does, so that when QEMU moves forward to a
|
||||
# baseline of Python 3.9 or newer this code can all be dropped and
|
||||
# replaced with:
|
||||
# from graphlib import TopologicalSorter, CycleError
|
||||
#
|
||||
# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
|
||||
#
|
||||
# We only implement the parts of TopologicalSorter we care about:
|
||||
# ts = TopologicalSorter(graph=None)
|
||||
# create the sorter. graph is a dictionary whose keys are
|
||||
# nodes and whose values are lists of the predecessors of that node.
|
||||
# (That is, if graph contains "A" -> ["B", "C"] then we must output
|
||||
# B and C before A.)
|
||||
# ts.static_order()
|
||||
# returns a list of all the nodes in sorted order, or raises CycleError
|
||||
# CycleError
|
||||
# exception raised if there are cycles in the graph. The second
|
||||
# element in the args attribute is a list of nodes which form a
|
||||
# cycle; the first and last element are the same, eg [a, b, c, a]
|
||||
# (Our implementation doesn't give the order correctly.)
|
||||
#
|
||||
# For our purposes we can assume that the data set is always small
|
||||
# (typically 10 nodes or less, actual links in the graph very rare),
|
||||
# so we don't need to worry about efficiency of implementation.
|
||||
#
|
||||
# The core of this implementation is from
|
||||
# https://code.activestate.com/recipes/578272-topological-sort/
|
||||
# (but updated to Python 3), and is under the MIT license.
|
||||
|
||||
class CycleError(ValueError):
|
||||
"""Subclass of ValueError raised if cycles exist in the graph"""
|
||||
pass
|
||||
|
||||
class TopologicalSorter:
|
||||
"""Topologically sort a graph"""
|
||||
def __init__(self, graph=None):
|
||||
self.graph = graph
|
||||
|
||||
def static_order(self):
|
||||
# We do the sort right here, unlike the stdlib version
|
||||
from functools import reduce
|
||||
data = {}
|
||||
r = []
|
||||
|
||||
if not self.graph:
|
||||
return []
|
||||
|
||||
# This code wants the values in the dict to be specifically sets
|
||||
for k, v in self.graph.items():
|
||||
data[k] = set(v)
|
||||
|
||||
# Find all items that don't depend on anything.
|
||||
extra_items_in_deps = (reduce(set.union, data.values())
|
||||
- set(data.keys()))
|
||||
# Add empty dependencies where needed
|
||||
data.update({item:{} for item in extra_items_in_deps})
|
||||
while True:
|
||||
ordered = set(item for item, dep in data.items() if not dep)
|
||||
if not ordered:
|
||||
break
|
||||
r.extend(ordered)
|
||||
data = {item: (dep - ordered)
|
||||
for item, dep in data.items()
|
||||
if item not in ordered}
|
||||
if data:
|
||||
# This doesn't give as nice results as the stdlib, which
|
||||
# gives you the cycle by listing the nodes in order. Here
|
||||
# we only know the nodes in the cycle but not their order.
|
||||
raise CycleError(f'nodes are in a cycle', list(data.keys()))
|
||||
|
||||
return r
|
||||
# end TopologicalSorter
|
||||
|
||||
def error_with_file(file, lineno, *args):
|
||||
"""Print an error message from file:line and args and exit."""
|
||||
global output_file
|
||||
|
@ -70,8 +145,13 @@ def error_with_file(file, lineno, *args):
|
|||
|
||||
if output_file and output_fd:
|
||||
output_fd.close()
|
||||
os.remove(output_file)
|
||||
exit(1)
|
||||
# Do not try to remove e.g. -o /dev/null
|
||||
if not output_file.startswith("/dev"):
|
||||
try:
|
||||
os.remove(output_file)
|
||||
except PermissionError:
|
||||
pass
|
||||
exit(0 if testforerror else 1)
|
||||
# end error_with_file
|
||||
|
||||
|
||||
|
@ -205,11 +285,14 @@ class Field:
|
|||
s = ''
|
||||
return str(self.pos) + ':' + s + str(self.len)
|
||||
|
||||
def str_extract(self):
|
||||
def str_extract(self, lvalue_formatter):
|
||||
global bitop_width
|
||||
s = 's' if self.sign else ''
|
||||
return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
|
||||
|
||||
def referenced_fields(self):
|
||||
return []
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.sign == other.sign and self.mask == other.mask
|
||||
|
||||
|
@ -228,12 +311,12 @@ class MultiField:
|
|||
def __str__(self):
|
||||
return str(self.subs)
|
||||
|
||||
def str_extract(self):
|
||||
def str_extract(self, lvalue_formatter):
|
||||
global bitop_width
|
||||
ret = '0'
|
||||
pos = 0
|
||||
for f in reversed(self.subs):
|
||||
ext = f.str_extract()
|
||||
ext = f.str_extract(lvalue_formatter)
|
||||
if pos == 0:
|
||||
ret = ext
|
||||
else:
|
||||
|
@ -241,6 +324,12 @@ class MultiField:
|
|||
pos += f.len
|
||||
return ret
|
||||
|
||||
def referenced_fields(self):
|
||||
l = []
|
||||
for f in self.subs:
|
||||
l.extend(f.referenced_fields())
|
||||
return l
|
||||
|
||||
def __ne__(self, other):
|
||||
if len(self.subs) != len(other.subs):
|
||||
return True
|
||||
|
@ -264,9 +353,12 @@ class ConstField:
|
|||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
||||
def str_extract(self):
|
||||
def str_extract(self, lvalue_formatter):
|
||||
return str(self.value)
|
||||
|
||||
def referenced_fields(self):
|
||||
return []
|
||||
|
||||
def __cmp__(self, other):
|
||||
return self.value - other.value
|
||||
# end ConstField
|
||||
|
@ -283,8 +375,12 @@ class FunctionField:
|
|||
def __str__(self):
|
||||
return self.func + '(' + str(self.base) + ')'
|
||||
|
||||
def str_extract(self):
|
||||
return self.func + '(ctx, ' + self.base.str_extract() + ')'
|
||||
def str_extract(self, lvalue_formatter):
|
||||
return (self.func + '(ctx, '
|
||||
+ self.base.str_extract(lvalue_formatter) + ')')
|
||||
|
||||
def referenced_fields(self):
|
||||
return self.base.referenced_fields()
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.func == other.func and self.base == other.base
|
||||
|
@ -304,9 +400,12 @@ class ParameterField:
|
|||
def __str__(self):
|
||||
return self.func
|
||||
|
||||
def str_extract(self):
|
||||
def str_extract(self, lvalue_formatter):
|
||||
return self.func + '(ctx)'
|
||||
|
||||
def referenced_fields(self):
|
||||
return []
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.func == other.func
|
||||
|
||||
|
@ -314,6 +413,32 @@ class ParameterField:
|
|||
return not self.__eq__(other)
|
||||
# end ParameterField
|
||||
|
||||
class NamedField:
|
||||
"""Class representing a field already named in the pattern"""
|
||||
def __init__(self, name, sign, len):
|
||||
self.mask = 0
|
||||
self.sign = sign
|
||||
self.len = len
|
||||
self.name = name
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
def str_extract(self, lvalue_formatter):
|
||||
global bitop_width
|
||||
s = 's' if self.sign else ''
|
||||
lvalue = lvalue_formatter(self.name)
|
||||
return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
|
||||
|
||||
def referenced_fields(self):
|
||||
return [self.name]
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.name == other.name
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
# end NamedField
|
||||
|
||||
class Arguments:
|
||||
"""Class representing the extracted fields of a format"""
|
||||
|
@ -337,7 +462,6 @@ class Arguments:
|
|||
output('} ', self.struct_name(), ';\n\n')
|
||||
# end Arguments
|
||||
|
||||
|
||||
class General:
|
||||
"""Common code between instruction formats and instruction patterns"""
|
||||
def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
|
||||
|
@ -351,12 +475,59 @@ class General:
|
|||
self.fieldmask = fldm
|
||||
self.fields = flds
|
||||
self.width = w
|
||||
self.dangling = None
|
||||
|
||||
def __str__(self):
|
||||
return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
|
||||
|
||||
def str1(self, i):
|
||||
return str_indent(i) + self.__str__()
|
||||
|
||||
def dangling_references(self):
|
||||
# Return a list of all named references which aren't satisfied
|
||||
# directly by this format/pattern. This will be either:
|
||||
# * a format referring to a field which is specified by the
|
||||
# pattern(s) using it
|
||||
# * a pattern referring to a field which is specified by the
|
||||
# format it uses
|
||||
# * a user error (referring to a field that doesn't exist at all)
|
||||
if self.dangling is None:
|
||||
# Compute this once and cache the answer
|
||||
dangling = []
|
||||
for n, f in self.fields.items():
|
||||
for r in f.referenced_fields():
|
||||
if r not in self.fields:
|
||||
dangling.append(r)
|
||||
self.dangling = dangling
|
||||
return self.dangling
|
||||
|
||||
def output_fields(self, indent, lvalue_formatter):
|
||||
# We use a topological sort to ensure that any use of NamedField
|
||||
# comes after the initialization of the field it is referencing.
|
||||
graph = {}
|
||||
for n, f in self.fields.items():
|
||||
refs = f.referenced_fields()
|
||||
graph[n] = refs
|
||||
|
||||
try:
|
||||
ts = TopologicalSorter(graph)
|
||||
for n in ts.static_order():
|
||||
# We only want to emit assignments for the keys
|
||||
# in our fields list, not for anything that ends up
|
||||
# in the tsort graph only because it was referenced as
|
||||
# a NamedField.
|
||||
try:
|
||||
f = self.fields[n]
|
||||
output(indent, lvalue_formatter(n), ' = ',
|
||||
f.str_extract(lvalue_formatter), ';\n')
|
||||
except KeyError:
|
||||
pass
|
||||
except CycleError as e:
|
||||
# The second element of args is a list of nodes which form
|
||||
# a cycle (there might be others too, but only one is reported).
|
||||
# Pretty-print it to tell the user.
|
||||
cycle = ' => '.join(e.args[1])
|
||||
error(self.lineno, 'field definitions form a cycle: ' + cycle)
|
||||
# end General
|
||||
|
||||
|
||||
|
@ -370,8 +541,7 @@ class Format(General):
|
|||
def output_extract(self):
|
||||
output('static void ', self.extract_name(), '(DisasContext *ctx, ',
|
||||
self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
|
||||
for n, f in self.fields.items():
|
||||
output(' a->', n, ' = ', f.str_extract(), ';\n')
|
||||
self.output_fields(str_indent(4), lambda n: 'a->' + n)
|
||||
output('}\n\n')
|
||||
# end Format
|
||||
|
||||
|
@ -392,11 +562,36 @@ class Pattern(General):
|
|||
ind = str_indent(i)
|
||||
arg = self.base.base.name
|
||||
output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
|
||||
# We might have named references in the format that refer to fields
|
||||
# in the pattern, or named references in the pattern that refer
|
||||
# to fields in the format. This affects whether we extract the fields
|
||||
# for the format before or after the ones for the pattern.
|
||||
# For simplicity we don't allow cross references in both directions.
|
||||
# This is also where we catch the syntax error of referring to
|
||||
# a nonexistent field.
|
||||
fmt_refs = self.base.dangling_references()
|
||||
for r in fmt_refs:
|
||||
if r not in self.fields:
|
||||
error(self.lineno, f'format refers to undefined field {r}')
|
||||
pat_refs = self.dangling_references()
|
||||
for r in pat_refs:
|
||||
if r not in self.base.fields:
|
||||
error(self.lineno, f'pattern refers to undefined field {r}')
|
||||
if pat_refs and fmt_refs:
|
||||
error(self.lineno, ('pattern that uses fields defined in format '
|
||||
'cannot use format that uses fields defined '
|
||||
'in pattern'))
|
||||
if fmt_refs:
|
||||
# pattern fields first
|
||||
self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
|
||||
assert not extracted, "dangling fmt refs but it was already extracted"
|
||||
if not extracted:
|
||||
output(ind, self.base.extract_name(),
|
||||
'(ctx, &u.f_', arg, ', insn);\n')
|
||||
for n, f in self.fields.items():
|
||||
output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
|
||||
if not fmt_refs:
|
||||
# pattern fields last
|
||||
self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
|
||||
|
||||
output(ind, 'if (', translate_prefix, '_', self.name,
|
||||
'(ctx, &u.f_', arg, ')) return true;\n')
|
||||
|
||||
|
@ -473,7 +668,7 @@ class MultiPattern(General):
|
|||
|
||||
def prop_format(self):
|
||||
for p in self.pats:
|
||||
p.build_tree()
|
||||
p.prop_format()
|
||||
|
||||
def prop_width(self):
|
||||
width = None
|
||||
|
@ -505,6 +700,12 @@ class IncMultiPattern(MultiPattern):
|
|||
output(ind, '}\n')
|
||||
else:
|
||||
p.output_code(i, extracted, p.fixedbits, p.fixedmask)
|
||||
|
||||
def build_tree(self):
|
||||
if not self.pats:
|
||||
error_with_file(self.file, self.lineno, 'empty pattern group')
|
||||
super().build_tree()
|
||||
|
||||
#end IncMultiPattern
|
||||
|
||||
|
||||
|
@ -536,8 +737,10 @@ class Tree:
|
|||
ind = str_indent(i)
|
||||
|
||||
# If we identified all nodes below have the same format,
|
||||
# extract the fields now.
|
||||
if not extracted and self.base:
|
||||
# extract the fields now. But don't do it if the format relies
|
||||
# on named fields from the insn pattern, as those won't have
|
||||
# been initialised at this point.
|
||||
if not extracted and self.base and not self.base.dangling_references():
|
||||
output(ind, self.base.extract_name(),
|
||||
'(ctx, &u.f_', self.base.base.name, ', insn);\n')
|
||||
extracted = True
|
||||
|
@ -623,7 +826,7 @@ class ExcMultiPattern(MultiPattern):
|
|||
return t
|
||||
|
||||
def build_tree(self):
|
||||
super().prop_format()
|
||||
super().build_tree()
|
||||
self.tree = self.__build_tree(self.pats, self.fixedbits,
|
||||
self.fixedmask)
|
||||
|
||||
|
@ -659,6 +862,7 @@ def parse_field(lineno, name, toks):
|
|||
"""Parse one instruction field from TOKS at LINENO"""
|
||||
global fields
|
||||
global insnwidth
|
||||
global re_C_ident
|
||||
|
||||
# A "simple" field will have only one entry;
|
||||
# a "multifield" will have several.
|
||||
|
@ -673,6 +877,25 @@ def parse_field(lineno, name, toks):
|
|||
func = func[1]
|
||||
continue
|
||||
|
||||
if re.fullmatch(re_C_ident + ':s[0-9]+', t):
|
||||
# Signed named field
|
||||
subtoks = t.split(':')
|
||||
n = subtoks[0]
|
||||
le = int(subtoks[1])
|
||||
f = NamedField(n, True, le)
|
||||
subs.append(f)
|
||||
width += le
|
||||
continue
|
||||
if re.fullmatch(re_C_ident + ':[0-9]+', t):
|
||||
# Unsigned named field
|
||||
subtoks = t.split(':')
|
||||
n = subtoks[0]
|
||||
le = int(subtoks[1])
|
||||
f = NamedField(n, False, le)
|
||||
subs.append(f)
|
||||
width += le
|
||||
continue
|
||||
|
||||
if re.fullmatch('[0-9]+:s[0-9]+', t):
|
||||
# Signed field extract
|
||||
subtoks = t.split(':s')
|
||||
|
@ -1286,11 +1509,12 @@ def main():
|
|||
global bitop_width
|
||||
global variablewidth
|
||||
global anyextern
|
||||
global testforerror
|
||||
|
||||
decode_scope = 'static '
|
||||
|
||||
long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
|
||||
'static-decode=', 'varinsnwidth=']
|
||||
'static-decode=', 'varinsnwidth=', 'test-for-error']
|
||||
try:
|
||||
(opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
|
||||
except getopt.GetoptError as err:
|
||||
|
@ -1319,6 +1543,8 @@ def main():
|
|||
bitop_width = 64
|
||||
elif insnwidth != 32:
|
||||
error(0, 'cannot handle insns of width', insnwidth)
|
||||
elif o == '--test-for-error':
|
||||
testforerror = True
|
||||
else:
|
||||
assert False, 'unhandled option'
|
||||
|
||||
|
@ -1417,6 +1643,7 @@ def main():
|
|||
|
||||
if output_file:
|
||||
output_fd.close()
|
||||
exit(1 if testforerror else 0)
|
||||
# end main
|
||||
|
||||
|
||||
|
|
|
@ -10,11 +10,10 @@
|
|||
* tcg-target-con-str.h; the constraint combination is inclusive or.
|
||||
*/
|
||||
C_O0_I1(r)
|
||||
C_O0_I2(lZ, l)
|
||||
C_O0_I2(r, rA)
|
||||
C_O0_I2(rZ, r)
|
||||
C_O0_I2(w, r)
|
||||
C_O1_I1(r, l)
|
||||
C_O0_I3(rZ, rZ, r)
|
||||
C_O1_I1(r, r)
|
||||
C_O1_I1(w, r)
|
||||
C_O1_I1(w, w)
|
||||
|
@ -33,4 +32,5 @@ C_O1_I2(w, w, wO)
|
|||
C_O1_I2(w, w, wZ)
|
||||
C_O1_I3(w, w, w, w)
|
||||
C_O1_I4(r, r, rA, rZ, rZ)
|
||||
C_O2_I1(r, r, r)
|
||||
C_O2_I4(r, r, rZ, rZ, rA, rMZ)
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
* REGS(letter, register_mask)
|
||||
*/
|
||||
REGS('r', ALL_GENERAL_REGS)
|
||||
REGS('l', ALL_QLDST_REGS)
|
||||
REGS('w', ALL_VECTOR_REGS)
|
||||
|
||||
/*
|
||||
|
|
|
@ -40,11 +40,12 @@ static const int tcg_target_reg_alloc_order[] = {
|
|||
|
||||
TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
|
||||
TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
|
||||
TCG_REG_X16, TCG_REG_X17,
|
||||
|
||||
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
|
||||
TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
|
||||
|
||||
/* X16 reserved as temporary */
|
||||
/* X17 reserved as temporary */
|
||||
/* X18 reserved by system */
|
||||
/* X19 reserved for AREG0 */
|
||||
/* X29 reserved as fp */
|
||||
|
@ -71,8 +72,10 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
|
|||
return TCG_REG_X0 + slot;
|
||||
}
|
||||
|
||||
#define TCG_REG_TMP TCG_REG_X30
|
||||
#define TCG_VEC_TMP TCG_REG_V31
|
||||
#define TCG_REG_TMP0 TCG_REG_X16
|
||||
#define TCG_REG_TMP1 TCG_REG_X17
|
||||
#define TCG_REG_TMP2 TCG_REG_X30
|
||||
#define TCG_VEC_TMP0 TCG_REG_V31
|
||||
|
||||
#ifndef CONFIG_SOFTMMU
|
||||
#define TCG_REG_GUEST_BASE TCG_REG_X28
|
||||
|
@ -129,14 +132,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
|
|||
#define ALL_GENERAL_REGS 0xffffffffu
|
||||
#define ALL_VECTOR_REGS 0xffffffff00000000ull
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
#define ALL_QLDST_REGS \
|
||||
(ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
|
||||
(1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
|
||||
#else
|
||||
#define ALL_QLDST_REGS ALL_GENERAL_REGS
|
||||
#endif
|
||||
|
||||
/* Match a constant valid for addition (12-bit, optionally shifted). */
|
||||
static inline bool is_aimm(uint64_t val)
|
||||
{
|
||||
|
@ -390,6 +385,10 @@ typedef enum {
|
|||
I3305_LDR_v64 = 0x5c000000,
|
||||
I3305_LDR_v128 = 0x9c000000,
|
||||
|
||||
/* Load/store exclusive. */
|
||||
I3306_LDXP = 0xc8600000,
|
||||
I3306_STXP = 0xc8200000,
|
||||
|
||||
/* Load/store register. Described here as 3.3.12, but the helper
|
||||
that emits them can transform to 3.3.10 or 3.3.13. */
|
||||
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
|
||||
|
@ -454,6 +453,9 @@ typedef enum {
|
|||
I3406_ADR = 0x10000000,
|
||||
I3406_ADRP = 0x90000000,
|
||||
|
||||
/* Add/subtract extended register instructions. */
|
||||
I3501_ADD = 0x0b200000,
|
||||
|
||||
/* Add/subtract shifted register instructions (without a shift). */
|
||||
I3502_ADD = 0x0b000000,
|
||||
I3502_ADDS = 0x2b000000,
|
||||
|
@ -624,6 +626,12 @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
|
|||
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
|
||||
TCGReg rt, TCGReg rt2, TCGReg rn)
|
||||
{
|
||||
tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
|
||||
TCGReg rt, int imm19)
|
||||
{
|
||||
|
@ -706,6 +714,14 @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
|
|||
tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
|
||||
}
|
||||
|
||||
static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
|
||||
TCGType sf, TCGReg rd, TCGReg rn,
|
||||
TCGReg rm, int opt, int imm3)
|
||||
{
|
||||
tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
|
||||
imm3 << 10 | rn << 5 | rd);
|
||||
}
|
||||
|
||||
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
|
||||
the rare occasion when we actually want to supply a shift amount. */
|
||||
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
|
||||
|
@ -984,7 +1000,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
|
|||
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
|
||||
TCGReg r, TCGReg base, intptr_t offset)
|
||||
{
|
||||
TCGReg temp = TCG_REG_TMP;
|
||||
TCGReg temp = TCG_REG_TMP0;
|
||||
|
||||
if (offset < -0xffffff || offset > 0xffffff) {
|
||||
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
|
||||
|
@ -1136,8 +1152,8 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
|
|||
}
|
||||
|
||||
/* Worst-case scenario, move offset to temp register, use reg offset. */
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
|
||||
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
|
||||
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
|
||||
}
|
||||
|
||||
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
|
||||
|
@ -1353,8 +1369,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
|
|||
if (offset == sextract64(offset, 0, 26)) {
|
||||
tcg_out_insn(s, 3206, BL, offset);
|
||||
} else {
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
|
||||
tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
|
||||
tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1491,7 +1507,7 @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
|
|||
AArch64Insn insn;
|
||||
|
||||
if (rl == ah || (!const_bh && rl == bh)) {
|
||||
rl = TCG_REG_TMP;
|
||||
rl = TCG_REG_TMP0;
|
||||
}
|
||||
|
||||
if (const_bl) {
|
||||
|
@ -1508,7 +1524,7 @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
|
|||
possibility of adding 0+const in the low part, and the
|
||||
immediate add instructions encode XSP not XZR. Don't try
|
||||
anything more elaborate here than loading another zero. */
|
||||
al = TCG_REG_TMP;
|
||||
al = TCG_REG_TMP0;
|
||||
tcg_out_movi(s, ext, al, 0);
|
||||
}
|
||||
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
|
||||
|
@ -1549,7 +1565,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
|
|||
{
|
||||
TCGReg a1 = a0;
|
||||
if (is_ctz) {
|
||||
a1 = TCG_REG_TMP;
|
||||
a1 = TCG_REG_TMP0;
|
||||
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
|
||||
}
|
||||
if (const_b && b == (ext ? 64 : 32)) {
|
||||
|
@ -1558,7 +1574,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
|
|||
AArch64Insn sel = I3506_CSEL;
|
||||
|
||||
tcg_out_cmp(s, ext, a0, 0, 1);
|
||||
tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
|
||||
tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
|
||||
|
||||
if (const_b) {
|
||||
if (b == -1) {
|
||||
|
@ -1571,7 +1587,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
|
|||
b = d;
|
||||
}
|
||||
}
|
||||
tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
|
||||
tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1588,7 +1604,7 @@ bool tcg_target_has_memory_bswap(MemOp memop)
|
|||
}
|
||||
|
||||
static const TCGLdstHelperParam ldst_helper_param = {
|
||||
.ntmp = 1, .tmp = { TCG_REG_TMP }
|
||||
.ntmp = 1, .tmp = { TCG_REG_TMP0 }
|
||||
};
|
||||
|
||||
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
|
||||
|
@ -1633,19 +1649,19 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
TCGType addr_type = s->addr_type;
|
||||
TCGLabelQemuLdst *ldst = NULL;
|
||||
MemOp opc = get_memop(oi);
|
||||
MemOp s_bits = opc & MO_SIZE;
|
||||
unsigned a_mask;
|
||||
|
||||
h->aa = atom_and_align_for_opc(s, opc,
|
||||
have_lse2 ? MO_ATOM_WITHIN16
|
||||
: MO_ATOM_IFALIGN,
|
||||
false);
|
||||
s_bits == MO_128);
|
||||
a_mask = (1 << h->aa.align) - 1;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned s_mask = (1u << s_bits) - 1;
|
||||
unsigned mem_index = get_mmuidx(oi);
|
||||
TCGReg x3;
|
||||
TCGReg addr_adj;
|
||||
TCGType mask_type;
|
||||
uint64_t compare_mask;
|
||||
|
||||
|
@ -1657,27 +1673,27 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
|
||||
? TCG_TYPE_I64 : TCG_TYPE_I32);
|
||||
|
||||
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
|
||||
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
|
||||
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
|
||||
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
|
||||
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
|
||||
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
|
||||
tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
|
||||
tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
|
||||
TLB_MASK_TABLE_OFS(mem_index), 1, 0);
|
||||
|
||||
/* Extract the TLB index from the address into X0. */
|
||||
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
|
||||
TCG_REG_X0, TCG_REG_X0, addr_reg,
|
||||
TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
|
||||
s->page_bits - CPU_TLB_ENTRY_BITS);
|
||||
|
||||
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
|
||||
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
|
||||
/* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
|
||||
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
|
||||
|
||||
/* Load the tlb comparator into X0, and the fast path addend into X1. */
|
||||
tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
|
||||
/* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
|
||||
tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
|
||||
is_ld ? offsetof(CPUTLBEntry, addr_read)
|
||||
: offsetof(CPUTLBEntry, addr_write));
|
||||
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
|
||||
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
|
||||
offsetof(CPUTLBEntry, addend));
|
||||
|
||||
/*
|
||||
|
@ -1686,25 +1702,26 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
* cross pages using the address of the last byte of the access.
|
||||
*/
|
||||
if (a_mask >= s_mask) {
|
||||
x3 = addr_reg;
|
||||
addr_adj = addr_reg;
|
||||
} else {
|
||||
addr_adj = TCG_REG_TMP2;
|
||||
tcg_out_insn(s, 3401, ADDI, addr_type,
|
||||
TCG_REG_X3, addr_reg, s_mask - a_mask);
|
||||
x3 = TCG_REG_X3;
|
||||
addr_adj, addr_reg, s_mask - a_mask);
|
||||
}
|
||||
compare_mask = (uint64_t)s->page_mask | a_mask;
|
||||
|
||||
/* Store the page mask part of the address into X3. */
|
||||
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
|
||||
/* Store the page mask part of the address into TMP2. */
|
||||
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
|
||||
addr_adj, compare_mask);
|
||||
|
||||
/* Perform the address comparison. */
|
||||
tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
|
||||
tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
|
||||
|
||||
/* If not equal, we jump to the slow path. */
|
||||
ldst->label_ptr[0] = s->code_ptr;
|
||||
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
|
||||
|
||||
h->base = TCG_REG_X1,
|
||||
h->base = TCG_REG_TMP1;
|
||||
h->index = addr_reg;
|
||||
h->index_ext = addr_type;
|
||||
#else
|
||||
|
@ -1822,6 +1839,108 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
|
|||
}
|
||||
}
|
||||
|
||||
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
||||
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
|
||||
{
|
||||
TCGLabelQemuLdst *ldst;
|
||||
HostAddress h;
|
||||
TCGReg base;
|
||||
bool use_pair;
|
||||
|
||||
ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
|
||||
|
||||
/* Compose the final address, as LDP/STP have no indexing. */
|
||||
if (h.index == TCG_REG_XZR) {
|
||||
base = h.base;
|
||||
} else {
|
||||
base = TCG_REG_TMP2;
|
||||
if (h.index_ext == TCG_TYPE_I32) {
|
||||
/* add base, base, index, uxtw */
|
||||
tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
|
||||
h.base, h.index, MO_32, 0);
|
||||
} else {
|
||||
/* add base, base, index */
|
||||
tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
|
||||
}
|
||||
}
|
||||
|
||||
use_pair = h.aa.atom < MO_128 || have_lse2;
|
||||
|
||||
if (!use_pair) {
|
||||
tcg_insn_unit *branch = NULL;
|
||||
TCGReg ll, lh, sl, sh;
|
||||
|
||||
/*
|
||||
* If we have already checked for 16-byte alignment, that's all
|
||||
* we need. Otherwise we have determined that misaligned atomicity
|
||||
* may be handled with two 8-byte loads.
|
||||
*/
|
||||
if (h.aa.align < MO_128) {
|
||||
/*
|
||||
* TODO: align should be MO_64, so we only need test bit 3,
|
||||
* which means we could use TBNZ instead of ANDS+B_C.
|
||||
*/
|
||||
tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
|
||||
branch = s->code_ptr;
|
||||
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
|
||||
use_pair = true;
|
||||
}
|
||||
|
||||
if (is_ld) {
|
||||
/*
|
||||
* 16-byte atomicity without LSE2 requires LDXP+STXP loop:
|
||||
* ldxp lo, hi, [base]
|
||||
* stxp t0, lo, hi, [base]
|
||||
* cbnz t0, .-8
|
||||
* Require no overlap between data{lo,hi} and base.
|
||||
*/
|
||||
if (datalo == base || datahi == base) {
|
||||
tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
|
||||
base = TCG_REG_TMP2;
|
||||
}
|
||||
ll = sl = datalo;
|
||||
lh = sh = datahi;
|
||||
} else {
|
||||
/*
|
||||
* 16-byte atomicity without LSE2 requires LDXP+STXP loop:
|
||||
* 1: ldxp t0, t1, [base]
|
||||
* stxp t0, lo, hi, [base]
|
||||
* cbnz t0, 1b
|
||||
*/
|
||||
tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
|
||||
ll = TCG_REG_TMP0;
|
||||
lh = TCG_REG_TMP1;
|
||||
sl = datalo;
|
||||
sh = datahi;
|
||||
}
|
||||
|
||||
tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
|
||||
tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
|
||||
tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
|
||||
|
||||
if (use_pair) {
|
||||
/* "b .+8", branching across the one insn of use_pair. */
|
||||
tcg_out_insn(s, 3206, B, 2);
|
||||
reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
|
||||
}
|
||||
}
|
||||
|
||||
if (use_pair) {
|
||||
if (is_ld) {
|
||||
tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
|
||||
} else {
|
||||
tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (ldst) {
|
||||
ldst->type = TCG_TYPE_I128;
|
||||
ldst->datalo_reg = datalo;
|
||||
ldst->datahi_reg = datahi;
|
||||
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
static const tcg_insn_unit *tb_ret_addr;
|
||||
|
||||
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
|
||||
|
@ -1847,7 +1966,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
|
|||
|
||||
set_jmp_insn_offset(s, which);
|
||||
tcg_out32(s, I3206_B);
|
||||
tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
|
||||
tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
|
||||
set_jmp_reset_offset(s, which);
|
||||
}
|
||||
|
||||
|
@ -1866,7 +1985,7 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
|
|||
ptrdiff_t i_offset = i_addr - jmp_rx;
|
||||
|
||||
/* Note that we asserted this in range in tcg_out_goto_tb. */
|
||||
insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
|
||||
insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
|
||||
}
|
||||
qatomic_set((uint32_t *)jmp_rw, insn);
|
||||
flush_idcache_range(jmp_rx, jmp_rw, 4);
|
||||
|
@ -2060,13 +2179,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
|
||||
case INDEX_op_rem_i64:
|
||||
case INDEX_op_rem_i32:
|
||||
tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
|
||||
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
|
||||
tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
|
||||
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
|
||||
break;
|
||||
case INDEX_op_remu_i64:
|
||||
case INDEX_op_remu_i32:
|
||||
tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
|
||||
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
|
||||
tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
|
||||
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
|
||||
break;
|
||||
|
||||
case INDEX_op_shl_i64:
|
||||
|
@ -2110,8 +2229,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
if (c2) {
|
||||
tcg_out_rotl(s, ext, a0, a1, a2);
|
||||
} else {
|
||||
tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
|
||||
tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
|
||||
tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
|
||||
tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -2161,6 +2280,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
case INDEX_op_qemu_st_a64_i64:
|
||||
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
|
||||
break;
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
|
||||
break;
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
|
||||
break;
|
||||
|
||||
case INDEX_op_bswap64_i64:
|
||||
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
|
||||
|
@ -2517,8 +2644,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
|
|||
break;
|
||||
}
|
||||
}
|
||||
tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
|
||||
a2 = TCG_VEC_TMP;
|
||||
tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
|
||||
a2 = TCG_VEC_TMP0;
|
||||
}
|
||||
if (is_scalar) {
|
||||
insn = cmp_scalar_insn[cond];
|
||||
|
@ -2799,12 +2926,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|||
case INDEX_op_qemu_ld_a64_i32:
|
||||
case INDEX_op_qemu_ld_a32_i64:
|
||||
case INDEX_op_qemu_ld_a64_i64:
|
||||
return C_O1_I1(r, l);
|
||||
return C_O1_I1(r, r);
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
return C_O2_I1(r, r, r);
|
||||
case INDEX_op_qemu_st_a32_i32:
|
||||
case INDEX_op_qemu_st_a64_i32:
|
||||
case INDEX_op_qemu_st_a32_i64:
|
||||
case INDEX_op_qemu_st_a64_i64:
|
||||
return C_O0_I2(lZ, l);
|
||||
return C_O0_I2(rZ, r);
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
return C_O0_I3(rZ, rZ, r);
|
||||
|
||||
case INDEX_op_deposit_i32:
|
||||
case INDEX_op_deposit_i64:
|
||||
|
@ -2900,9 +3033,11 @@ static void tcg_target_init(TCGContext *s)
|
|||
s->reserved_regs = 0;
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
|
||||
}
|
||||
|
||||
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "host/cpuinfo.h"
|
||||
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||
|
||||
typedef enum {
|
||||
|
@ -131,7 +130,16 @@ typedef enum {
|
|||
#define TCG_TARGET_HAS_muluh_i64 1
|
||||
#define TCG_TARGET_HAS_mulsh_i64 1
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
/*
|
||||
* Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
|
||||
* which requires writable pages. We must defer to the helper for user-only,
|
||||
* but in system mode all ram is writable for the host.
|
||||
*/
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
|
||||
#else
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 1
|
||||
#endif
|
||||
|
||||
#define TCG_TARGET_HAS_v64 1
|
||||
#define TCG_TARGET_HAS_v128 1
|
||||
|
|
|
@ -31,7 +31,6 @@ extern int arm_arch;
|
|||
#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
|
||||
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
|
||||
|
||||
typedef enum {
|
||||
|
|
|
@ -91,6 +91,8 @@ static const int tcg_target_reg_alloc_order[] = {
|
|||
#endif
|
||||
};
|
||||
|
||||
#define TCG_TMP_VEC TCG_REG_XMM5
|
||||
|
||||
static const int tcg_target_call_iarg_regs[] = {
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
#if defined(_WIN64)
|
||||
|
@ -319,6 +321,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
|
|||
#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
|
||||
#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
|
||||
#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
|
||||
#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16)
|
||||
#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16)
|
||||
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
|
||||
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
|
||||
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
|
||||
|
@ -1753,7 +1757,21 @@ typedef struct {
|
|||
|
||||
bool tcg_target_has_memory_bswap(MemOp memop)
|
||||
{
|
||||
return have_movbe;
|
||||
TCGAtomAlign aa;
|
||||
|
||||
if (!have_movbe) {
|
||||
return false;
|
||||
}
|
||||
if ((memop & MO_SIZE) < MO_128) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
|
||||
* but do allow a pair of 64-bit operations, i.e. MOVBEQ.
|
||||
*/
|
||||
aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
|
||||
return aa.atom < MO_128;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1781,6 +1799,30 @@ static const TCGLdstHelperParam ldst_helper_param = {
|
|||
static const TCGLdstHelperParam ldst_helper_param = { };
|
||||
#endif
|
||||
|
||||
static void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
|
||||
TCGReg l, TCGReg h, TCGReg v)
|
||||
{
|
||||
int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
|
||||
|
||||
/* vpmov{d,q} %v, %l */
|
||||
tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
|
||||
/* vpextr{d,q} $1, %v, %h */
|
||||
tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
|
||||
tcg_out8(s, 1);
|
||||
}
|
||||
|
||||
static void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
|
||||
TCGReg v, TCGReg l, TCGReg h)
|
||||
{
|
||||
int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
|
||||
|
||||
/* vmov{d,q} %l, %v */
|
||||
tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
|
||||
/* vpinsr{d,q} $1, %h, %v, %v */
|
||||
tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
|
||||
tcg_out8(s, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate code for the slow path for a load at the end of block
|
||||
*/
|
||||
|
@ -1870,6 +1912,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
{
|
||||
TCGLabelQemuLdst *ldst = NULL;
|
||||
MemOp opc = get_memop(oi);
|
||||
MemOp s_bits = opc & MO_SIZE;
|
||||
unsigned a_mask;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
|
@ -1880,7 +1923,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
*h = x86_guest_base;
|
||||
#endif
|
||||
h->base = addrlo;
|
||||
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
|
||||
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
|
||||
a_mask = (1 << h->aa.align) - 1;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
|
@ -1890,7 +1933,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
TCGType tlbtype = TCG_TYPE_I32;
|
||||
int trexw = 0, hrexw = 0, tlbrexw = 0;
|
||||
unsigned mem_index = get_mmuidx(oi);
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned s_mask = (1 << s_bits) - 1;
|
||||
int tlb_mask;
|
||||
|
||||
|
@ -2070,6 +2112,72 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
|||
h.base, h.index, 0, h.ofs + 4);
|
||||
}
|
||||
break;
|
||||
|
||||
case MO_128:
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||
|
||||
/*
|
||||
* Without 16-byte atomicity, use integer regs.
|
||||
* That is where we want the data, and it allows bswaps.
|
||||
*/
|
||||
if (h.aa.atom < MO_128) {
|
||||
if (use_movbe) {
|
||||
TCGReg t = datalo;
|
||||
datalo = datahi;
|
||||
datahi = t;
|
||||
}
|
||||
if (h.base == datalo || h.index == datalo) {
|
||||
tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
|
||||
datalo, datahi, 0);
|
||||
tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
|
||||
datahi, datahi, 8);
|
||||
} else {
|
||||
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
|
||||
h.base, h.index, 0, h.ofs + 8);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* With 16-byte atomicity, a vector load is required.
|
||||
* If we already have 16-byte alignment, then VMOVDQA always works.
|
||||
* Else if VMOVDQU has atomicity with dynamic alignment, use that.
|
||||
* Else use we require a runtime test for alignment for VMOVDQA;
|
||||
* use VMOVDQU on the unaligned nonatomic path for simplicity.
|
||||
*/
|
||||
if (h.aa.align >= MO_128) {
|
||||
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
|
||||
TCG_TMP_VEC, 0,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
} else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
|
||||
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
|
||||
TCG_TMP_VEC, 0,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
} else {
|
||||
TCGLabel *l1 = gen_new_label();
|
||||
TCGLabel *l2 = gen_new_label();
|
||||
|
||||
tcg_out_testi(s, h.base, 15);
|
||||
tcg_out_jxx(s, JCC_JNE, l1, true);
|
||||
|
||||
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
|
||||
TCG_TMP_VEC, 0,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
tcg_out_jxx(s, JCC_JMP, l2, true);
|
||||
|
||||
tcg_out_label(s, l1);
|
||||
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
|
||||
TCG_TMP_VEC, 0,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
tcg_out_label(s, l2);
|
||||
}
|
||||
tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC);
|
||||
break;
|
||||
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
@ -2140,6 +2248,63 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
|||
h.base, h.index, 0, h.ofs + 4);
|
||||
}
|
||||
break;
|
||||
|
||||
case MO_128:
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||
|
||||
/*
|
||||
* Without 16-byte atomicity, use integer regs.
|
||||
* That is where we have the data, and it allows bswaps.
|
||||
*/
|
||||
if (h.aa.atom < MO_128) {
|
||||
if (use_movbe) {
|
||||
TCGReg t = datalo;
|
||||
datalo = datahi;
|
||||
datahi = t;
|
||||
}
|
||||
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
|
||||
h.base, h.index, 0, h.ofs + 8);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* With 16-byte atomicity, a vector store is required.
|
||||
* If we already have 16-byte alignment, then VMOVDQA always works.
|
||||
* Else if VMOVDQU has atomicity with dynamic alignment, use that.
|
||||
* Else use we require a runtime test for alignment for VMOVDQA;
|
||||
* use VMOVDQU on the unaligned nonatomic path for simplicity.
|
||||
*/
|
||||
tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi);
|
||||
if (h.aa.align >= MO_128) {
|
||||
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
|
||||
TCG_TMP_VEC, 0,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
} else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
|
||||
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
|
||||
TCG_TMP_VEC, 0,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
} else {
|
||||
TCGLabel *l1 = gen_new_label();
|
||||
TCGLabel *l2 = gen_new_label();
|
||||
|
||||
tcg_out_testi(s, h.base, 15);
|
||||
tcg_out_jxx(s, JCC_JNE, l1, true);
|
||||
|
||||
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
|
||||
TCG_TMP_VEC, 0,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
tcg_out_jxx(s, JCC_JMP, l2, true);
|
||||
|
||||
tcg_out_label(s, l1);
|
||||
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
|
||||
TCG_TMP_VEC, 0,
|
||||
h.base, h.index, 0, h.ofs);
|
||||
tcg_out_label(s, l2);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
@ -2470,6 +2635,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
|
||||
}
|
||||
break;
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
|
||||
break;
|
||||
|
||||
case INDEX_op_qemu_st_a64_i32:
|
||||
case INDEX_op_qemu_st8_a64_i32:
|
||||
|
@ -2496,6 +2666,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
|
||||
}
|
||||
break;
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
|
||||
break;
|
||||
|
||||
OP_32_64(mulu2):
|
||||
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
|
||||
|
@ -3193,6 +3368,15 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|||
case INDEX_op_qemu_st_a64_i64:
|
||||
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
|
||||
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||
return C_O2_I1(r, r, L);
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||
return C_O0_I3(L, L, L);
|
||||
|
||||
case INDEX_op_brcond2_i32:
|
||||
return C_O0_I4(r, r, ri, ri);
|
||||
|
||||
|
@ -3962,6 +4146,7 @@ static void tcg_target_init(TCGContext *s)
|
|||
|
||||
s->reserved_regs = 0;
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
|
||||
#ifdef _WIN64
|
||||
/* These are call saved, and we don't save them, so don't use them. */
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
#include "host/cpuinfo.h"
|
||||
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 1
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
|
||||
|
||||
#ifdef __x86_64__
|
||||
# define TCG_TARGET_REG_BITS 64
|
||||
|
@ -118,7 +117,6 @@ typedef enum {
|
|||
#define have_avx1 (cpuinfo & CPUINFO_AVX1)
|
||||
#define have_avx2 (cpuinfo & CPUINFO_AVX2)
|
||||
#define have_movbe (cpuinfo & CPUINFO_MOVBE)
|
||||
#define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
|
||||
|
||||
/*
|
||||
* There are interesting instructions in AVX512, so long as we have AVX512VL,
|
||||
|
@ -202,7 +200,8 @@ typedef enum {
|
|||
#define TCG_TARGET_HAS_qemu_st8_i32 1
|
||||
#endif
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 \
|
||||
(TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
|
||||
|
||||
/* We do not support older SSE systems, only beginning with AVX1. */
|
||||
#define TCG_TARGET_HAS_v64 have_avx1
|
||||
|
|
|
@ -36,7 +36,6 @@
|
|||
#endif
|
||||
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
||||
#define TCG_TARGET_NB_REGS 32
|
||||
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||
|
|
|
@ -14,6 +14,7 @@ C_O0_I2(r, r)
|
|||
C_O0_I2(r, ri)
|
||||
C_O0_I2(v, r)
|
||||
C_O0_I3(r, r, r)
|
||||
C_O0_I3(o, m, r)
|
||||
C_O0_I4(r, r, ri, ri)
|
||||
C_O0_I4(r, r, r, r)
|
||||
C_O1_I1(r, r)
|
||||
|
@ -34,6 +35,7 @@ C_O1_I3(v, v, v, v)
|
|||
C_O1_I4(r, r, ri, rZ, rZ)
|
||||
C_O1_I4(r, r, r, ri, ri)
|
||||
C_O2_I1(r, r, r)
|
||||
C_O2_I1(o, m, r)
|
||||
C_O2_I2(r, r, r, r)
|
||||
C_O2_I4(r, r, rI, rZM, r, r)
|
||||
C_O2_I4(r, r, r, r, rI, rZM)
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
* REGS(letter, register_mask)
|
||||
*/
|
||||
REGS('r', ALL_GENERAL_REGS)
|
||||
REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
|
||||
REGS('v', ALL_VECTOR_REGS)
|
||||
|
||||
/*
|
||||
|
|
|
@ -295,25 +295,27 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
|
|||
|
||||
#define B OPCD( 18)
|
||||
#define BC OPCD( 16)
|
||||
|
||||
#define LBZ OPCD( 34)
|
||||
#define LHZ OPCD( 40)
|
||||
#define LHA OPCD( 42)
|
||||
#define LWZ OPCD( 32)
|
||||
#define LWZUX XO31( 55)
|
||||
#define STB OPCD( 38)
|
||||
#define STH OPCD( 44)
|
||||
#define STW OPCD( 36)
|
||||
|
||||
#define STD XO62( 0)
|
||||
#define STDU XO62( 1)
|
||||
#define STDX XO31(149)
|
||||
|
||||
#define LD XO58( 0)
|
||||
#define LDX XO31( 21)
|
||||
#define LDU XO58( 1)
|
||||
#define LDUX XO31( 53)
|
||||
#define LWA XO58( 2)
|
||||
#define LWAX XO31(341)
|
||||
#define LQ OPCD( 56)
|
||||
|
||||
#define STB OPCD( 38)
|
||||
#define STH OPCD( 44)
|
||||
#define STW OPCD( 36)
|
||||
#define STD XO62( 0)
|
||||
#define STDU XO62( 1)
|
||||
#define STDX XO31(149)
|
||||
#define STQ XO62( 2)
|
||||
|
||||
#define ADDIC OPCD( 12)
|
||||
#define ADDI OPCD( 14)
|
||||
|
@ -2020,7 +2022,18 @@ typedef struct {
|
|||
|
||||
bool tcg_target_has_memory_bswap(MemOp memop)
|
||||
{
|
||||
return true;
|
||||
TCGAtomAlign aa;
|
||||
|
||||
if ((memop & MO_SIZE) <= MO_64) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reject 16-byte memop with 16-byte atomicity,
|
||||
* but do allow a pair of 64-bit operations.
|
||||
*/
|
||||
aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
|
||||
return aa.atom <= MO_64;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2035,7 +2048,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
{
|
||||
TCGLabelQemuLdst *ldst = NULL;
|
||||
MemOp opc = get_memop(oi);
|
||||
MemOp a_bits;
|
||||
MemOp a_bits, s_bits;
|
||||
|
||||
/*
|
||||
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
|
||||
|
@ -2047,10 +2060,11 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
* As of 3.0, "the non-atomic access is performed as described in
|
||||
* the corresponding list", which matches MO_ATOM_SUBALIGN.
|
||||
*/
|
||||
s_bits = opc & MO_SIZE;
|
||||
h->aa = atom_and_align_for_opc(s, opc,
|
||||
have_isa_3_00 ? MO_ATOM_SUBALIGN
|
||||
: MO_ATOM_IFALIGN,
|
||||
false);
|
||||
s_bits == MO_128);
|
||||
a_bits = h->aa.align;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
|
@ -2060,7 +2074,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
|
||||
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
|
||||
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
|
||||
ldst = new_ldst_label(s);
|
||||
ldst->is_ld = is_ld;
|
||||
|
@ -2303,6 +2316,60 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
|||
}
|
||||
}
|
||||
|
||||
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
||||
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
|
||||
{
|
||||
TCGLabelQemuLdst *ldst;
|
||||
HostAddress h;
|
||||
bool need_bswap;
|
||||
uint32_t insn;
|
||||
TCGReg index;
|
||||
|
||||
ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
|
||||
|
||||
/* Compose the final address, as LQ/STQ have no indexing. */
|
||||
index = h.index;
|
||||
if (h.base != 0) {
|
||||
index = TCG_REG_TMP1;
|
||||
tcg_out32(s, ADD | TAB(index, h.base, h.index));
|
||||
}
|
||||
need_bswap = get_memop(oi) & MO_BSWAP;
|
||||
|
||||
if (h.aa.atom == MO_128) {
|
||||
tcg_debug_assert(!need_bswap);
|
||||
tcg_debug_assert(datalo & 1);
|
||||
tcg_debug_assert(datahi == datalo - 1);
|
||||
insn = is_ld ? LQ : STQ;
|
||||
tcg_out32(s, insn | TAI(datahi, index, 0));
|
||||
} else {
|
||||
TCGReg d1, d2;
|
||||
|
||||
if (HOST_BIG_ENDIAN ^ need_bswap) {
|
||||
d1 = datahi, d2 = datalo;
|
||||
} else {
|
||||
d1 = datalo, d2 = datahi;
|
||||
}
|
||||
|
||||
if (need_bswap) {
|
||||
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
|
||||
insn = is_ld ? LDBRX : STDBRX;
|
||||
tcg_out32(s, insn | TAB(d1, 0, index));
|
||||
tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
|
||||
} else {
|
||||
insn = is_ld ? LD : STD;
|
||||
tcg_out32(s, insn | TAI(d1, index, 0));
|
||||
tcg_out32(s, insn | TAI(d2, index, 8));
|
||||
}
|
||||
}
|
||||
|
||||
if (ldst) {
|
||||
ldst->type = TCG_TYPE_I128;
|
||||
ldst->datalo_reg = datalo;
|
||||
ldst->datahi_reg = datahi;
|
||||
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
|
||||
{
|
||||
int i;
|
||||
|
@ -2860,6 +2927,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
args[4], TCG_TYPE_I64);
|
||||
}
|
||||
break;
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
|
||||
break;
|
||||
|
||||
case INDEX_op_qemu_st_a64_i32:
|
||||
if (TCG_TARGET_REG_BITS == 32) {
|
||||
|
@ -2889,6 +2961,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
args[4], TCG_TYPE_I64);
|
||||
}
|
||||
break;
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
|
||||
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
|
||||
break;
|
||||
|
||||
case INDEX_op_setcond_i32:
|
||||
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
|
||||
|
@ -3722,6 +3799,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|||
case INDEX_op_qemu_st_a64_i64:
|
||||
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
|
||||
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
return C_O2_I1(o, m, r);
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
return C_O0_I3(o, m, r);
|
||||
|
||||
case INDEX_op_add_vec:
|
||||
case INDEX_op_sub_vec:
|
||||
case INDEX_op_mul_vec:
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
|
||||
#define TCG_TARGET_NB_REGS 64
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
||||
|
||||
typedef enum {
|
||||
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
|
||||
|
@ -149,7 +148,8 @@ extern bool have_vsx;
|
|||
#define TCG_TARGET_HAS_mulsh_i64 1
|
||||
#endif
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 \
|
||||
(TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
|
||||
|
||||
/*
|
||||
* While technically Altivec could support V64, it has no 64-bit store
|
||||
|
|
|
@ -35,7 +35,6 @@
|
|||
#define TCG_TARGET_REG_BITS 64
|
||||
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
|
||||
#define TCG_TARGET_NB_REGS 32
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ C_O0_I2(r, r)
|
|||
C_O0_I2(r, ri)
|
||||
C_O0_I2(r, rA)
|
||||
C_O0_I2(v, r)
|
||||
C_O0_I3(o, m, r)
|
||||
C_O1_I1(r, r)
|
||||
C_O1_I1(v, r)
|
||||
C_O1_I1(v, v)
|
||||
|
@ -36,6 +37,7 @@ C_O1_I2(v, v, v)
|
|||
C_O1_I3(v, v, v, v)
|
||||
C_O1_I4(r, r, ri, rI, r)
|
||||
C_O1_I4(r, r, rA, rI, r)
|
||||
C_O2_I1(o, m, r)
|
||||
C_O2_I2(o, m, 0, r)
|
||||
C_O2_I2(o, m, r, r)
|
||||
C_O2_I3(o, m, 0, 1, r)
|
||||
|
|
|
@ -243,6 +243,7 @@ typedef enum S390Opcode {
|
|||
RXY_LLGF = 0xe316,
|
||||
RXY_LLGH = 0xe391,
|
||||
RXY_LMG = 0xeb04,
|
||||
RXY_LPQ = 0xe38f,
|
||||
RXY_LRV = 0xe31e,
|
||||
RXY_LRVG = 0xe30f,
|
||||
RXY_LRVH = 0xe31f,
|
||||
|
@ -253,6 +254,7 @@ typedef enum S390Opcode {
|
|||
RXY_STG = 0xe324,
|
||||
RXY_STHY = 0xe370,
|
||||
RXY_STMG = 0xeb24,
|
||||
RXY_STPQ = 0xe38e,
|
||||
RXY_STRV = 0xe33e,
|
||||
RXY_STRVG = 0xe32f,
|
||||
RXY_STRVH = 0xe33f,
|
||||
|
@ -1577,7 +1579,18 @@ typedef struct {
|
|||
|
||||
bool tcg_target_has_memory_bswap(MemOp memop)
|
||||
{
|
||||
return true;
|
||||
TCGAtomAlign aa;
|
||||
|
||||
if ((memop & MO_SIZE) <= MO_64) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reject 16-byte memop with 16-byte atomicity,
|
||||
* but do allow a pair of 64-bit operations.
|
||||
*/
|
||||
aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
|
||||
return aa.atom <= MO_64;
|
||||
}
|
||||
|
||||
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
|
||||
|
@ -1734,13 +1747,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
{
|
||||
TCGLabelQemuLdst *ldst = NULL;
|
||||
MemOp opc = get_memop(oi);
|
||||
MemOp s_bits = opc & MO_SIZE;
|
||||
unsigned a_mask;
|
||||
|
||||
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
|
||||
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
|
||||
a_mask = (1 << h->aa.align) - 1;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned s_mask = (1 << s_bits) - 1;
|
||||
int mem_index = get_mmuidx(oi);
|
||||
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
|
||||
|
@ -1865,6 +1878,80 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
|
|||
}
|
||||
}
|
||||
|
||||
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
||||
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
|
||||
{
|
||||
TCGLabel *l1 = NULL, *l2 = NULL;
|
||||
TCGLabelQemuLdst *ldst;
|
||||
HostAddress h;
|
||||
bool need_bswap;
|
||||
bool use_pair;
|
||||
S390Opcode insn;
|
||||
|
||||
ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
|
||||
|
||||
use_pair = h.aa.atom < MO_128;
|
||||
need_bswap = get_memop(oi) & MO_BSWAP;
|
||||
|
||||
if (!use_pair) {
|
||||
/*
|
||||
* Atomicity requires we use LPQ. If we've already checked for
|
||||
* 16-byte alignment, that's all we need. If we arrive with
|
||||
* lesser alignment, we have determined that less than 16-byte
|
||||
* alignment can be satisfied with two 8-byte loads.
|
||||
*/
|
||||
if (h.aa.align < MO_128) {
|
||||
use_pair = true;
|
||||
l1 = gen_new_label();
|
||||
l2 = gen_new_label();
|
||||
|
||||
tcg_out_insn(s, RI, TMLL, addr_reg, 15);
|
||||
tgen_branch(s, 7, l1); /* CC in {1,2,3} */
|
||||
}
|
||||
|
||||
tcg_debug_assert(!need_bswap);
|
||||
tcg_debug_assert(datalo & 1);
|
||||
tcg_debug_assert(datahi == datalo - 1);
|
||||
insn = is_ld ? RXY_LPQ : RXY_STPQ;
|
||||
tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
|
||||
|
||||
if (use_pair) {
|
||||
tgen_branch(s, S390_CC_ALWAYS, l2);
|
||||
tcg_out_label(s, l1);
|
||||
}
|
||||
}
|
||||
if (use_pair) {
|
||||
TCGReg d1, d2;
|
||||
|
||||
if (need_bswap) {
|
||||
d1 = datalo, d2 = datahi;
|
||||
insn = is_ld ? RXY_LRVG : RXY_STRVG;
|
||||
} else {
|
||||
d1 = datahi, d2 = datalo;
|
||||
insn = is_ld ? RXY_LG : RXY_STG;
|
||||
}
|
||||
|
||||
if (h.base == d1 || h.index == d1) {
|
||||
tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
|
||||
h.base = TCG_TMP0;
|
||||
h.index = TCG_REG_NONE;
|
||||
h.disp = 0;
|
||||
}
|
||||
tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
|
||||
tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
|
||||
}
|
||||
if (l2) {
|
||||
tcg_out_label(s, l2);
|
||||
}
|
||||
|
||||
if (ldst) {
|
||||
ldst->type = TCG_TYPE_I128;
|
||||
ldst->datalo_reg = datalo;
|
||||
ldst->datahi_reg = datahi;
|
||||
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
|
||||
{
|
||||
/* Reuse the zeroing that exists for goto_ptr. */
|
||||
|
@ -2226,6 +2313,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
case INDEX_op_qemu_st_a64_i64:
|
||||
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
|
||||
break;
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
|
||||
break;
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
|
||||
break;
|
||||
|
||||
case INDEX_op_ld16s_i64:
|
||||
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
|
||||
|
@ -3107,6 +3202,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|||
case INDEX_op_qemu_st_a32_i32:
|
||||
case INDEX_op_qemu_st_a64_i32:
|
||||
return C_O0_I2(r, r);
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
return C_O2_I1(o, m, r);
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
return C_O0_I3(o, m, r);
|
||||
|
||||
case INDEX_op_deposit_i32:
|
||||
case INDEX_op_deposit_i64:
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#define S390_TCG_TARGET_H
|
||||
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 2
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
|
||||
|
||||
/* We have a +- 4GB range on the branches; leave some slop. */
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
|
||||
|
@ -140,7 +139,7 @@ extern uint64_t s390_facilities[3];
|
|||
#define TCG_TARGET_HAS_muluh_i64 0
|
||||
#define TCG_TARGET_HAS_mulsh_i64 0
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 1
|
||||
|
||||
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
|
||||
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#define SPARC_TCG_TARGET_H
|
||||
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
|
||||
#define TCG_TARGET_NB_REGS 32
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
|
||||
|
|
|
@ -5736,8 +5736,8 @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
|
|||
mov[0].dst = ldst->datalo_reg;
|
||||
mov[0].src =
|
||||
tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
|
||||
mov[0].dst_type = TCG_TYPE_I32;
|
||||
mov[0].src_type = TCG_TYPE_I32;
|
||||
mov[0].dst_type = TCG_TYPE_REG;
|
||||
mov[0].src_type = TCG_TYPE_REG;
|
||||
mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
|
||||
|
||||
mov[1].dst = ldst->datahi_reg;
|
||||
|
|
|
@ -42,7 +42,6 @@
|
|||
|
||||
#define TCG_TARGET_INTERPRETER 1
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||
|
||||
#if UINTPTR_MAX == UINT32_MAX
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
#!/bin/sh
|
||||
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
||||
# See the COPYING.LIB file in the top-level directory.
|
||||
|
||||
PYTHON=$1
|
||||
DECODETREE=$2
|
||||
E=0
|
||||
|
||||
# All of these tests should produce errors
|
||||
for i in err_*.decode; do
|
||||
if $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
|
||||
# Pass, aka failed to fail.
|
||||
echo FAIL: $i 1>&2
|
||||
E=1
|
||||
fi
|
||||
done
|
||||
|
||||
for i in succ_*.decode; do
|
||||
if ! $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
|
||||
echo FAIL:$i 1>&2
|
||||
fi
|
||||
done
|
||||
|
||||
exit $E
|
|
@ -0,0 +1,7 @@
|
|||
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
||||
# See the COPYING.LIB file in the top-level directory.
|
||||
|
||||
# Diagnose formats which refer to undefined fields
|
||||
%field1 field2:3
|
||||
@fmt ........ ........ ........ ........ %field1
|
||||
insn 00000000 00000000 00000000 00000000 @fmt
|
|
@ -0,0 +1,7 @@
|
|||
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
||||
# See the COPYING.LIB file in the top-level directory.
|
||||
|
||||
# Diagnose fields whose definitions form a loop
|
||||
%field1 field2:3
|
||||
%field2 field1:4
|
||||
insn 00000000 00000000 00000000 00000000 %field1 %field2
|
|
@ -0,0 +1,8 @@
|
|||
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
||||
# See the COPYING.LIB file in the top-level directory.
|
||||
|
||||
# Diagnose patterns which refer to undefined fields
|
||||
&f1 f1 a
|
||||
%field1 field2:3
|
||||
@fmt ........ ........ ........ .... a:4 &f1
|
||||
insn 00000000 00000000 00000000 0000 .... @fmt f1=%field1
|
|
@ -0,0 +1,14 @@
|
|||
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
||||
# See the COPYING.LIB file in the top-level directory.
|
||||
|
||||
# Diagnose fields where the format refers to a field defined in the
|
||||
# pattern and the pattern refers to a field defined in the format.
|
||||
# This is theoretically not impossible to implement, but is not
|
||||
# supported by the script at this time.
|
||||
&abcd a b c d
|
||||
%refa a:3
|
||||
%refc c:4
|
||||
# Format defines 'c' and sets 'b' to an indirect ref to 'a'
|
||||
@fmt ........ ........ ........ c:8 &abcd b=%refa
|
||||
# Pattern defines 'a' and sets 'd' to an indirect ref to 'c'
|
||||
insn 00000000 00000000 00000000 ........ @fmt d=%refc a=6
|
|
@ -0,0 +1,64 @@
|
|||
err_tests = [
|
||||
'err_argset1.decode',
|
||||
'err_argset2.decode',
|
||||
'err_field1.decode',
|
||||
'err_field2.decode',
|
||||
'err_field3.decode',
|
||||
'err_field4.decode',
|
||||
'err_field5.decode',
|
||||
'err_field6.decode',
|
||||
'err_field7.decode',
|
||||
'err_field8.decode',
|
||||
'err_field9.decode',
|
||||
'err_field10.decode',
|
||||
'err_init1.decode',
|
||||
'err_init2.decode',
|
||||
'err_init3.decode',
|
||||
'err_init4.decode',
|
||||
'err_overlap1.decode',
|
||||
'err_overlap2.decode',
|
||||
'err_overlap3.decode',
|
||||
'err_overlap4.decode',
|
||||
'err_overlap5.decode',
|
||||
'err_overlap6.decode',
|
||||
'err_overlap7.decode',
|
||||
'err_overlap8.decode',
|
||||
'err_overlap9.decode',
|
||||
'err_pattern_group_empty.decode',
|
||||
'err_pattern_group_ident1.decode',
|
||||
'err_pattern_group_ident2.decode',
|
||||
'err_pattern_group_nest1.decode',
|
||||
'err_pattern_group_nest2.decode',
|
||||
'err_pattern_group_nest3.decode',
|
||||
'err_pattern_group_overlap1.decode',
|
||||
'err_width1.decode',
|
||||
'err_width2.decode',
|
||||
'err_width3.decode',
|
||||
'err_width4.decode',
|
||||
]
|
||||
|
||||
succ_tests = [
|
||||
'succ_argset_type1.decode',
|
||||
'succ_function.decode',
|
||||
'succ_ident1.decode',
|
||||
'succ_named_field.decode',
|
||||
'succ_pattern_group_nest1.decode',
|
||||
'succ_pattern_group_nest2.decode',
|
||||
'succ_pattern_group_nest3.decode',
|
||||
'succ_pattern_group_nest4.decode',
|
||||
]
|
||||
|
||||
suite = 'decodetree'
|
||||
decodetree = find_program(meson.project_source_root() / 'scripts/decodetree.py')
|
||||
|
||||
foreach t: err_tests
|
||||
test(fs.replace_suffix(t, ''),
|
||||
decodetree, args: ['-o', '/dev/null', '--test-for-error', files(t)],
|
||||
suite: suite)
|
||||
endforeach
|
||||
|
||||
foreach t: succ_tests
|
||||
test(fs.replace_suffix(t, ''),
|
||||
decodetree, args: ['-o', '/dev/null', files(t)],
|
||||
suite: suite)
|
||||
endforeach
|
|
@ -0,0 +1,19 @@
|
|||
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
||||
# See the COPYING.LIB file in the top-level directory.
|
||||
|
||||
# field using a named_field
|
||||
%imm_sz 8:8 sz:3
|
||||
insn 00000000 00000000 ........ 00000000 imm_sz=%imm_sz sz=1
|
||||
|
||||
# Ditto, via a format. Here a field in the format
|
||||
# references a named field defined in the insn pattern:
|
||||
&imm_a imm alpha
|
||||
%foo 0:16 alpha:4
|
||||
@foo 00000001 ........ ........ ........ &imm_a imm=%foo
|
||||
i1 ........ 00000000 ........ ........ @foo alpha=1
|
||||
i2 ........ 00000001 ........ ........ @foo alpha=2
|
||||
|
||||
# Here the named field is defined in the format and referenced
|
||||
# from the insn pattern:
|
||||
@bar 00000010 ........ ........ ........ &imm_a alpha=4
|
||||
i3 ........ 00000000 ........ ........ @bar imm=%foo
|
|
@ -74,10 +74,7 @@ if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host
|
|||
dependencies: [qemuutil, vhost_user])
|
||||
endif
|
||||
|
||||
test('decodetree', sh,
|
||||
args: [ files('decode/check.sh'), config_host['PYTHON'], files('../scripts/decodetree.py') ],
|
||||
workdir: meson.current_source_dir() / 'decode',
|
||||
suite: 'decodetree')
|
||||
subdir('decode')
|
||||
|
||||
if 'CONFIG_TCG' in config_all
|
||||
subdir('fp')
|
||||
|
|
Loading…
Reference in New Issue