tcg/i386: Fix tcg_out_addi_ptr for win64

tcg: Implement atomicity for TCGv_i128
 tcg: First quarter of cleanups for building tcg once
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRkWC8dHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/I+wf8CUF+J/E9u0EuurrB
 1asDicANUJIACnqlcEpSPKuSMtbzo1RDTQUR+d3GWJjyLASvSJZFZTQqWBdACRpc
 sNuDz3/1a6FbiM14CwIVmPpcjQXa+18Ck670Chmw51KyEt2xyDJTySFIGEqjiuTf
 YVDBbOs8neFZdcDvAs1qNUTjhRj4nNtkpQoBpv0tGH7E0CzPp6OcvxwfieVyLOIa
 Cy1ELM3aMyVN5MTjnORYLK70Pa9emdjB88SlypZx363ARKC7B50lzYPQ4E5zrOZq
 FKrOq5nFWLCtn4BID0R+jUmuUP6znR/hTlToDmf/9B4j9TUivERWlc54lz3YU6Gn
 su3FKg==
 =LVOb
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20230516-3' of https://gitlab.com/rth7680/qemu into staging

tcg/i386: Fix tcg_out_addi_ptr for win64
tcg: Implement atomicity for TCGv_i128
tcg: First quarter of cleanups for building tcg once

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRkWC8dHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/I+wf8CUF+J/E9u0EuurrB
# 1asDicANUJIACnqlcEpSPKuSMtbzo1RDTQUR+d3GWJjyLASvSJZFZTQqWBdACRpc
# sNuDz3/1a6FbiM14CwIVmPpcjQXa+18Ck670Chmw51KyEt2xyDJTySFIGEqjiuTf
# YVDBbOs8neFZdcDvAs1qNUTjhRj4nNtkpQoBpv0tGH7E0CzPp6OcvxwfieVyLOIa
# Cy1ELM3aMyVN5MTjnORYLK70Pa9emdjB88SlypZx363ARKC7B50lzYPQ4E5zrOZq
# FKrOq5nFWLCtn4BID0R+jUmuUP6znR/hTlToDmf/9B4j9TUivERWlc54lz3YU6Gn
# su3FKg==
# =LVOb
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 16 May 2023 09:29:35 PM PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-tcg-20230516-3' of https://gitlab.com/rth7680/qemu: (74 commits)
  tcg: Split out exec/user/guest-base.h
  tcg: Add tlb_dyn_max_bits to TCGContext
  tcg: Add page_bits and page_mask to TCGContext
  tcg: Remove TARGET_LONG_BITS, TCG_TYPE_TL
  tcg/mips: Remove TARGET_LONG_BITS, TCG_TYPE_TL
  tcg/loongarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
  tcg/aarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
  tcg/aarch64: Remove USE_GUEST_BASE
  tcg/arm: Remove TARGET_LONG_BITS
  tcg/i386: Remove TARGET_LONG_BITS, TCG_TYPE_TL
  tcg/i386: Adjust type of tlb_mask
  tcg/i386: Conditionalize tcg_out_extu_i32_i64
  tcg/i386: Always enable TCG_TARGET_HAS_extr[lh]_i64_i32
  tcg/tci: Elimnate TARGET_LONG_BITS, target_ulong
  tcg: Split INDEX_op_qemu_{ld,st}* for guest address size
  tcg: Remove TCGv from tcg_gen_atomic_*
  tcg: Remove TCGv from tcg_gen_qemu_{ld,st}_*
  tcg: Add addr_type to TCGContext
  accel/tcg: Widen plugin_gen_empty_mem_callback to i64
  tcg: Reduce copies for plugin_gen_mem_callbacks
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-05-16 21:30:27 -07:00
commit 6972ef1440
50 changed files with 5343 additions and 3350 deletions

View File

@ -13,20 +13,20 @@
* See the COPYING file in the top-level directory.
*/
static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
MemOpIdx oi)
{
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
}
#if HAVE_ATOMIC128
static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
static void atomic_trace_ld_post(CPUArchState *env, uint64_t addr,
MemOpIdx oi)
{
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
}
static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
static void atomic_trace_st_post(CPUArchState *env, uint64_t addr,
MemOpIdx oi)
{
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
@ -40,7 +40,7 @@ static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
*/
#define CMPXCHG_HELPER(OP, TYPE) \
TYPE HELPER(atomic_##OP)(CPUArchState *env, target_ulong addr, \
TYPE HELPER(atomic_##OP)(CPUArchState *env, uint64_t addr, \
TYPE oldv, TYPE newv, uint32_t oi) \
{ return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); }
@ -62,7 +62,7 @@ CMPXCHG_HELPER(cmpxchgo_le, Int128)
#undef CMPXCHG_HELPER
Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, uint64_t addr,
Int128 cmpv, Int128 newv, uint32_t oi)
{
#if TCG_TARGET_REG_BITS == 32
@ -82,7 +82,7 @@ Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
#endif
}
Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, uint64_t addr,
Int128 cmpv, Int128 newv, uint32_t oi)
{
#if TCG_TARGET_REG_BITS == 32
@ -103,7 +103,7 @@ Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
}
#define ATOMIC_HELPER(OP, TYPE) \
TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \
TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, uint64_t addr, \
TYPE val, uint32_t oi) \
{ return glue(glue(cpu_atomic_,OP),_mmu)(env, addr, val, oi, GETPC()); }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -92,27 +92,6 @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
void *userdata)
{ }
static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
{
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
TCGv_i64 vaddr64 = tcg_temp_ebb_new_i64();
TCGv_ptr udata = tcg_temp_ebb_new_ptr();
tcg_gen_movi_i32(meminfo, info);
tcg_gen_movi_ptr(udata, 0);
tcg_gen_ld_i32(cpu_index, cpu_env,
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
tcg_gen_extu_tl_i64(vaddr64, vaddr);
gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, vaddr64, udata);
tcg_temp_free_ptr(udata);
tcg_temp_free_i64(vaddr64);
tcg_temp_free_i32(meminfo);
tcg_temp_free_i32(cpu_index);
}
static void gen_empty_udata_cb(void)
{
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
@ -145,9 +124,22 @@ static void gen_empty_inline_cb(void)
tcg_temp_free_i64(val);
}
static void gen_empty_mem_cb(TCGv addr, uint32_t info)
static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
{
do_gen_mem_cb(addr, info);
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
TCGv_ptr udata = tcg_temp_ebb_new_ptr();
tcg_gen_movi_i32(meminfo, info);
tcg_gen_movi_ptr(udata, 0);
tcg_gen_ld_i32(cpu_index, cpu_env,
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr, udata);
tcg_temp_free_ptr(udata);
tcg_temp_free_i32(meminfo);
tcg_temp_free_i32(cpu_index);
}
/*
@ -202,35 +194,17 @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
}
}
union mem_gen_fn {
void (*mem_fn)(TCGv, uint32_t);
void (*inline_fn)(void);
};
static void gen_mem_wrapped(enum plugin_gen_cb type,
const union mem_gen_fn *f, TCGv addr,
uint32_t info, bool is_mem)
void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
{
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
if (is_mem) {
f->mem_fn(addr, info);
} else {
f->inline_fn();
}
gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_MEM, rw);
gen_empty_mem_cb(addr, info);
tcg_gen_plugin_cb_end();
}
void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
{
union mem_gen_fn fn;
fn.mem_fn = gen_empty_mem_cb;
gen_mem_wrapped(PLUGIN_GEN_CB_MEM, &fn, addr, info, true);
fn.inline_fn = gen_empty_inline_cb;
gen_mem_wrapped(PLUGIN_GEN_CB_INLINE, &fn, 0, info, false);
gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_INLINE, rw);
gen_empty_inline_cb();
tcg_gen_plugin_cb_end();
}
static TCGOp *find_op(TCGOp *op, TCGOpcode opc)

View File

@ -39,62 +39,65 @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
#endif /* IN_HELPER_PROTO */
DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
i32, env, i64, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
i32, env, i64, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
i32, env, i64, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
i32, env, i64, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
i32, env, i64, i32, i32, i32)
#ifdef CONFIG_ATOMIC64
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
i64, env, tl, i64, i64, i32)
i64, env, i64, i64, i64, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
i64, env, tl, i64, i64, i32)
i64, env, i64, i64, i64, i32)
#endif
#ifdef CONFIG_CMPXCHG128
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
i128, env, tl, i128, i128, i32)
i128, env, i64, i128, i128, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
i128, env, tl, i128, i128, i32)
i128, env, i64, i128, i128, i32)
#endif
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
i128, env, tl, i128, i128, i32)
i128, env, i64, i128, i128, i32)
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
i128, env, tl, i128, i128, i32)
i128, env, i64, i128, i128, i32)
#ifdef CONFIG_ATOMIC64
#define GEN_ATOMIC_HELPERS(NAME) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le), \
TCG_CALL_NO_WG, i64, env, tl, i64, i32) \
TCG_CALL_NO_WG, i64, env, i64, i64, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be), \
TCG_CALL_NO_WG, i64, env, tl, i64, i32)
TCG_CALL_NO_WG, i64, env, i64, i64, i32)
#else
#define GEN_ATOMIC_HELPERS(NAME) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
TCG_CALL_NO_WG, i32, env, tl, i32, i32)
TCG_CALL_NO_WG, i32, env, i64, i32, i32)
#endif /* CONFIG_ATOMIC64 */
GEN_ATOMIC_HELPERS(fetch_add)

View File

@ -72,9 +72,11 @@ QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
TBContext tb_ctx;
/* Encode VAL as a signed leb128 sequence at P.
Return P incremented past the encoded value. */
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
/*
* Encode VAL as a signed leb128 sequence at P.
* Return P incremented past the encoded value.
*/
static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
{
int more, byte;
@ -92,21 +94,23 @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val)
return p;
}
/* Decode a signed leb128 sequence at *PP; increment *PP past the
decoded value. Return the decoded value. */
static target_long decode_sleb128(const uint8_t **pp)
/*
* Decode a signed leb128 sequence at *PP; increment *PP past the
* decoded value. Return the decoded value.
*/
static int64_t decode_sleb128(const uint8_t **pp)
{
const uint8_t *p = *pp;
target_long val = 0;
int64_t val = 0;
int byte, shift = 0;
do {
byte = *p++;
val |= (target_ulong)(byte & 0x7f) << shift;
val |= (int64_t)(byte & 0x7f) << shift;
shift += 7;
} while (byte & 0x80);
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
val |= -(target_ulong)1 << shift;
val |= -(int64_t)1 << shift;
}
*pp = p;
@ -132,7 +136,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
int i, j, n;
for (i = 0, n = tb->icount; i < n; ++i) {
target_ulong prev;
uint64_t prev;
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
if (i == 0) {
@ -352,6 +356,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb_set_page_addr0(tb, phys_pc);
tb_set_page_addr1(tb, -1);
tcg_ctx->gen_tb = tb;
tcg_ctx->addr_type = TCG_TYPE_TL;
#ifdef CONFIG_SOFTMMU
tcg_ctx->page_bits = TARGET_PAGE_BITS;
tcg_ctx->page_mask = TARGET_PAGE_MASK;
tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
#endif
tb_overflow:
#ifdef CONFIG_PROFILER
@ -444,7 +455,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
/* Dump header and the first instruction */
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
fprintf(logfile,
" -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
" -- guest addr 0x%016" PRIx64 " + tb prologue\n",
tcg_ctx->gen_insn_data[insn][0]);
chunk_start = tcg_ctx->gen_insn_end_off[insn];
disas(logfile, tb->tc.ptr, chunk_start);
@ -457,7 +468,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
while (insn < tb->icount) {
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
if (chunk_end > chunk_start) {
fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n",
fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
tcg_ctx->gen_insn_data[insn][0]);
disas(logfile, tb->tc.ptr + chunk_start,
chunk_end - chunk_start);

View File

@ -889,35 +889,9 @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
/* The softmmu versions of these helpers are in cputlb.c. */
/*
* Verify that we have passed the correct MemOp to the correct function.
*
* We could present one function to target code, and dispatch based on
* the MemOp, but so far we have worked hard to avoid an indirect function
* call along the memory path.
*/
static void validate_memop(MemOpIdx oi, MemOp expected)
static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra, MMUAccessType type)
{
#ifdef CONFIG_DEBUG_TCG
MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
assert(have == expected);
#endif
}
void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
{
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
}
void helper_unaligned_st(CPUArchState *env, target_ulong addr)
{
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
}
static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t ra, MMUAccessType type)
{
MemOp mop = get_memop(oi);
int a_bits = get_alignment_bits(mop);
void *ret;
@ -931,116 +905,251 @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
return ret;
}
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
#include "ldst_atomicity.c.inc"
static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra)
{
void *haddr;
uint8_t ret;
validate_memop(oi, MO_UB);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
tcg_debug_assert((mop & MO_SIZE) == MO_8);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = ldub_p(haddr);
clear_helper_retaddr();
return ret;
}
tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
return do_ld1_mmu(env, addr, get_memop(oi), ra);
}
tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
}
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
uint8_t ret = do_ld1_mmu(env, addr, get_memop(oi), ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return ret;
}
static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra)
{
void *haddr;
uint16_t ret;
tcg_debug_assert((mop & MO_SIZE) == MO_16);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_2(env, ra, haddr, mop);
clear_helper_retaddr();
return ret;
}
tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) {
ret = bswap16(ret);
}
return ret;
}
tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
int16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) {
ret = bswap16(ret);
}
return ret;
}
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
uint16_t ret;
validate_memop(oi, MO_BEUW);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
ret = lduw_be_p(haddr);
clear_helper_retaddr();
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
ret = do_ld2_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_be16(ret);
}
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint16_t ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
ret = do_ld2_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_le16(ret);
}
static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra)
{
void *haddr;
uint32_t ret;
tcg_debug_assert((mop & MO_SIZE) == MO_32);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_4(env, ra, haddr, mop);
clear_helper_retaddr();
return ret;
}
tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) {
ret = bswap32(ret);
}
return ret;
}
tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
int32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) {
ret = bswap32(ret);
}
return ret;
}
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
uint32_t ret;
validate_memop(oi, MO_BEUL);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
ret = ldl_be_p(haddr);
clear_helper_retaddr();
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
ret = do_ld4_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_be32(ret);
}
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint32_t ret;
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
ret = do_ld4_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_le32(ret);
}
static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra)
{
void *haddr;
uint64_t ret;
tcg_debug_assert((mop & MO_SIZE) == MO_64);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_8(env, ra, haddr, mop);
clear_helper_retaddr();
return ret;
}
uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
uint64_t ret = do_ld8_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) {
ret = bswap64(ret);
}
return ret;
}
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
uint64_t ret;
validate_memop(oi, MO_BEUQ);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
ret = ldq_be_p(haddr);
clear_helper_retaddr();
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
ret = do_ld8_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return ret;
}
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
uint16_t ret;
validate_memop(oi, MO_LEUW);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
ret = lduw_le_p(haddr);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return ret;
}
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
uint32_t ret;
validate_memop(oi, MO_LEUL);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
ret = ldl_le_p(haddr);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return ret;
return cpu_to_be64(ret);
}
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
uint64_t ret;
validate_memop(oi, MO_LEUQ);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
ret = ldq_le_p(haddr);
clear_helper_retaddr();
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
ret = do_ld8_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
return cpu_to_le64(ret);
}
static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
MemOp mop, uintptr_t ra)
{
void *haddr;
Int128 ret;
tcg_debug_assert((mop & MO_SIZE) == MO_128);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_16(env, ra, haddr, mop);
clear_helper_retaddr();
return ret;
}
Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
if (mop & MO_BSWAP) {
ret = bswap128(ret);
}
return ret;
}
Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
{
return helper_ld16_mmu(env, addr, oi, GETPC());
}
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
Int128 ret;
validate_memop(oi, MO_128 | MO_BE);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
memcpy(&ret, haddr, 16);
clear_helper_retaddr();
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
ret = do_ld16_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
if (!HOST_BIG_ENDIAN) {
ret = bswap128(ret);
}
@ -1050,132 +1159,218 @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
Int128 ret;
validate_memop(oi, MO_128 | MO_LE);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
memcpy(&ret, haddr, 16);
clear_helper_retaddr();
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
ret = do_ld16_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
if (HOST_BIG_ENDIAN) {
ret = bswap128(ret);
}
return ret;
}
void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
MemOpIdx oi, uintptr_t ra)
static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
MemOp mop, uintptr_t ra)
{
void *haddr;
validate_memop(oi, MO_UB);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
tcg_debug_assert((mop & MO_SIZE) == MO_8);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
stb_p(haddr, val);
clear_helper_retaddr();
}
void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
MemOpIdx oi, uintptr_t ra)
{
do_st1_mmu(env, addr, val, get_memop(oi), ra);
}
void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
MemOpIdx oi, uintptr_t ra)
{
do_st1_mmu(env, addr, val, get_memop(oi), ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
MemOp mop, uintptr_t ra)
{
void *haddr;
tcg_debug_assert((mop & MO_SIZE) == MO_16);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
store_atom_2(env, ra, haddr, mop, val);
clear_helper_retaddr();
}
void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
if (mop & MO_BSWAP) {
val = bswap16(val);
}
do_st2_he_mmu(env, addr, val, mop, ra);
}
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
validate_memop(oi, MO_BEUW);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
stw_be_p(haddr, val);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
validate_memop(oi, MO_BEUL);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
stl_be_p(haddr, val);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
validate_memop(oi, MO_BEUQ);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
stq_be_p(haddr, val);
clear_helper_retaddr();
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
do_st2_he_mmu(env, addr, be16_to_cpu(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
do_st2_he_mmu(env, addr, le16_to_cpu(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
MemOp mop, uintptr_t ra)
{
void *haddr;
validate_memop(oi, MO_LEUW);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
stw_le_p(haddr, val);
tcg_debug_assert((mop & MO_SIZE) == MO_32);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
store_atom_4(env, ra, haddr, mop, val);
clear_helper_retaddr();
}
void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
if (mop & MO_BSWAP) {
val = bswap32(val);
}
do_st4_he_mmu(env, addr, val, mop, ra);
}
void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
do_st4_he_mmu(env, addr, be32_to_cpu(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
do_st4_he_mmu(env, addr, le32_to_cpu(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
MemOp mop, uintptr_t ra)
{
void *haddr;
validate_memop(oi, MO_LEUL);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
stl_le_p(haddr, val);
tcg_debug_assert((mop & MO_SIZE) == MO_64);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
store_atom_8(env, ra, haddr, mop, val);
clear_helper_retaddr();
}
void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
if (mop & MO_BSWAP) {
val = bswap64(val);
}
do_st8_he_mmu(env, addr, val, mop, ra);
}
void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
do_st8_he_mmu(env, addr, cpu_to_be64(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
do_st8_he_mmu(env, addr, cpu_to_le64(val), mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
MemOp mop, uintptr_t ra)
{
void *haddr;
validate_memop(oi, MO_LEUQ);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
stq_le_p(haddr, val);
tcg_debug_assert((mop & MO_SIZE) == MO_128);
haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
store_atom_16(env, ra, haddr, mop, val);
clear_helper_retaddr();
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
MemOpIdx oi, uintptr_t ra)
{
MemOp mop = get_memop(oi);
if (mop & MO_BSWAP) {
val = bswap128(val);
}
do_st16_he_mmu(env, addr, val, mop, ra);
}
void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
{
helper_st16_mmu(env, addr, val, oi, GETPC());
}
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
Int128 val, MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
validate_memop(oi, MO_128 | MO_BE);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
if (!HOST_BIG_ENDIAN) {
val = bswap128(val);
}
memcpy(haddr, &val, 16);
clear_helper_retaddr();
do_st16_he_mmu(env, addr, val, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
Int128 val, MemOpIdx oi, uintptr_t ra)
{
void *haddr;
MemOp mop = get_memop(oi);
validate_memop(oi, MO_128 | MO_LE);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
if (HOST_BIG_ENDIAN) {
val = bswap128(val);
}
memcpy(haddr, &val, 16);
clear_helper_retaddr();
do_st16_he_mmu(env, addr, val, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
@ -1267,7 +1462,6 @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
void *haddr;
uint64_t ret;
validate_memop(oi, MO_BEUQ);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
ret = ldq_p(haddr);
clear_helper_retaddr();

View File

@ -297,31 +297,20 @@ swap: ``translator_ld{sign}{size}_swap(env, ptr, swap)``
Regexes for git grep
- ``\<translator_ld[us]\?[bwlq]\(_swap\)\?\>``
``helper_*_{ld,st}*_mmu``
``helper_{ld,st}*_mmu``
~~~~~~~~~~~~~~~~~~~~~~~~~
These functions are intended primarily to be called by the code
generated by the TCG backend. They may also be called by target
CPU helper function code. Like the ``cpu_{ld,st}_mmuidx_ra`` functions
they perform accesses by guest virtual address, with a given ``mmuidx``.
generated by the TCG backend. Like the ``cpu_{ld,st}_mmu`` functions
they perform accesses by guest virtual address, with a given ``MemOpIdx``.
These functions specify an ``opindex`` parameter which encodes
(among other things) the mmu index to use for the access. This parameter
should be created by calling ``make_memop_idx()``.
They differ from ``cpu_{ld,st}_mmu`` in that they take the endianness
of the operation only from the MemOpIdx, and loads extend the return
value to the size of a host general register (``tcg_target_ulong``).
The ``retaddr`` parameter should be the result of GETPC() called directly
from the top level HELPER(foo) function (or 0 if no guest CPU state
unwinding is required).
load: ``helper_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
**TODO** The names of these functions are a bit odd for historical
reasons because they were originally expected to be called only from
within generated code. We should rename them to bring them more in
line with the other memory access functions. The explicit endianness
is the only feature they have beyond ``*_mmuidx_ra``.
load: ``helper_{endian}_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
store: ``helper_{size}_mmu(env, addr, val, opindex, retaddr)``
``sign``
- (empty) : for 32 or 64 bit sizes
@ -334,14 +323,9 @@ store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
- ``l`` : 32 bits
- ``q`` : 64 bits
``endian``
- ``le`` : little endian
- ``be`` : big endian
- ``ret`` : target endianness
Regexes for git grep
- ``\<helper_\(le\|be\|ret\)_ld[us]\?[bwlq]_mmu\>``
- ``\<helper_\(le\|be\|ret\)_st[bwlq]_mmu\>``
- ``\<helper_ld[us]\?[bwlq]_mmu\>``
- ``\<helper_st[bwlq]_mmu\>``
``address_space_*``
~~~~~~~~~~~~~~~~~~~

View File

@ -672,19 +672,20 @@ QEMU specific operations
| This operation is optional. If the TCG backend does not implement the
goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
* - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx*
* - qemu_ld_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx*
qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
qemu_st8_i32 *t0*, *t1*, *flags*, *memidx*
- | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest
address *t1*. The _i32/_i64 size applies to the size of the input/output
address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output
register *t0* only. The address *t1* is always sized according to the guest,
and the width of the memory operation is controlled by *flags*.
|
| Both *t0* and *t1* may be split into little-endian ordered pairs of registers
if dealing with 64-bit quantities on a 32-bit host.
if dealing with 64-bit quantities on a 32-bit host, or 128-bit quantities on
a 64-bit host.
|
| The *memidx* selects the qemu tlb index to use (e.g. user or kernel access).
The flags are the MemOp bits, selecting the sign, width, and endianness
@ -693,6 +694,8 @@ QEMU specific operations
| For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
64-bit memory access specified in *flags*.
|
| For qemu_ld/st_i128, these are only supported for a 64-bit host.
|
| For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
the memory operation is known to be 8-bit. This allows the backend to
provide a different set of register constraints.

View File

@ -84,11 +84,8 @@
#if defined(CONFIG_USER_ONLY)
#include "exec/user/abitypes.h"
#include "exec/user/guest-base.h"
/* On some host systems the guest address space is reserved on the host.
* This allows the guest address space to be offset to a convenient location.
*/
extern uintptr_t guest_base;
extern bool have_guest_base;
/*

View File

@ -72,6 +72,43 @@ typedef enum MemOp {
MO_ALIGN_64 = 6 << MO_ASHIFT,
MO_ALIGN = MO_AMASK,
/*
* MO_ATOM_* describes the atomicity requirements of the operation:
* MO_ATOM_IFALIGN: the operation must be single-copy atomic if it
* is aligned; if unaligned there is no atomicity.
* MO_ATOM_IFALIGN_PAIR: the entire operation may be considered to
* be a pair of half-sized operations which are packed together
* for convenience, with single-copy atomicity on each half if
* the half is aligned.
* This is the atomicity e.g. of Arm pre-FEAT_LSE2 LDP.
* MO_ATOM_WITHIN16: the operation is single-copy atomic, even if it
* is unaligned, so long as it does not cross a 16-byte boundary;
* if it crosses a 16-byte boundary there is no atomicity.
* This is the atomicity e.g. of Arm FEAT_LSE2 LDR.
* MO_ATOM_WITHIN16_PAIR: the entire operation is single-copy atomic,
* if it happens to be within a 16-byte boundary, otherwise it
* devolves to a pair of half-sized MO_ATOM_WITHIN16 operations.
* Depending on alignment, one or both will be single-copy atomic.
* This is the atomicity e.g. of Arm FEAT_LSE2 LDP.
* MO_ATOM_SUBALIGN: the operation is single-copy atomic by parts
* by the alignment. E.g. if the address is 0 mod 4, then each
* 4-byte subobject is single-copy atomic.
* This is the atomicity e.g. of IBM Power.
* MO_ATOM_NONE: the operation has no atomicity requirements.
*
* Note the default (i.e. 0) value is single-copy atomic to the
* size of the operation, if aligned. This retains the behaviour
* from before this field was introduced.
*/
MO_ATOM_SHIFT = 8,
MO_ATOM_IFALIGN = 0 << MO_ATOM_SHIFT,
MO_ATOM_IFALIGN_PAIR = 1 << MO_ATOM_SHIFT,
MO_ATOM_WITHIN16 = 2 << MO_ATOM_SHIFT,
MO_ATOM_WITHIN16_PAIR = 3 << MO_ATOM_SHIFT,
MO_ATOM_SUBALIGN = 4 << MO_ATOM_SHIFT,
MO_ATOM_NONE = 5 << MO_ATOM_SHIFT,
MO_ATOM_MASK = 7 << MO_ATOM_SHIFT,
/* Combinations of the above, for ease of use. */
MO_UB = MO_8,
MO_UW = MO_16,

View File

@ -27,7 +27,7 @@ void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
void plugin_gen_insn_end(void);
void plugin_gen_disable_mem_helpers(void);
void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info);
void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info);
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
{
@ -69,7 +69,7 @@ static inline void plugin_gen_tb_end(CPUState *cpu)
static inline void plugin_gen_disable_mem_helpers(void)
{ }
static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
static inline void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
{ }
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Declaration of guest_base.
* Copyright (c) 2003 Fabrice Bellard
*/
#ifndef EXEC_USER_GUEST_BASE_H
#define EXEC_USER_GUEST_BASE_H
extern uintptr_t guest_base;
#endif

View File

@ -71,6 +71,24 @@
#define bit_LZCNT (1 << 5)
#endif
/*
* Signatures for different CPU implementations as returned from Leaf 0.
*/
#ifndef signature_INTEL_ecx
/* "Genu" "ineI" "ntel" */
#define signature_INTEL_ebx 0x756e6547
#define signature_INTEL_edx 0x49656e69
#define signature_INTEL_ecx 0x6c65746e
#endif
#ifndef signature_AMD_ecx
/* "Auth" "enti" "cAMD" */
#define signature_AMD_ebx 0x68747541
#define signature_AMD_edx 0x69746e65
#define signature_AMD_ecx 0x444d4163
#endif
static inline unsigned xgetbv_low(unsigned c)
{
unsigned a, d;

View File

@ -25,59 +25,39 @@
#ifndef TCG_LDST_H
#define TCG_LDST_H
#ifdef CONFIG_SOFTMMU
/* Value zero-extended to tcg register size. */
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t retaddr);
tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t retaddr);
tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t retaddr);
uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t retaddr);
tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr);
tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr);
uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t retaddr);
/* Value sign-extended to tcg register size. */
tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t retaddr);
tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t retaddr);
tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr);
tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr);
tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
MemOpIdx oi, uintptr_t retaddr);
/*
* Value extended to at least uint32_t, so that some ABIs do not require
* zero-extension from uint8_t or uint16_t.
*/
void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr);
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr);
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr);
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr);
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr);
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr);
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
MemOpIdx oi, uintptr_t retaddr);
#else
G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
#endif /* CONFIG_SOFTMMU */
#endif /* TCG_LDST_H */

View File

@ -723,49 +723,28 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
#endif
#if TARGET_INSN_START_WORDS == 1
# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
static inline void tcg_gen_insn_start(target_ulong pc)
{
tcg_gen_op1(INDEX_op_insn_start, pc);
TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS);
tcg_set_insn_start_param(op, 0, pc);
}
# else
static inline void tcg_gen_insn_start(target_ulong pc)
{
tcg_gen_op2(INDEX_op_insn_start, (uint32_t)pc, (uint32_t)(pc >> 32));
}
# endif
#elif TARGET_INSN_START_WORDS == 2
# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
{
tcg_gen_op2(INDEX_op_insn_start, pc, a1);
TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 2 * 64 / TCG_TARGET_REG_BITS);
tcg_set_insn_start_param(op, 0, pc);
tcg_set_insn_start_param(op, 1, a1);
}
# else
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
{
tcg_gen_op4(INDEX_op_insn_start,
(uint32_t)pc, (uint32_t)(pc >> 32),
(uint32_t)a1, (uint32_t)(a1 >> 32));
}
# endif
#elif TARGET_INSN_START_WORDS == 3
# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
target_ulong a2)
{
tcg_gen_op3(INDEX_op_insn_start, pc, a1, a2);
TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 3 * 64 / TCG_TARGET_REG_BITS);
tcg_set_insn_start_param(op, 0, pc);
tcg_set_insn_start_param(op, 1, a1);
tcg_set_insn_start_param(op, 2, a2);
}
#else
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
target_ulong a2)
{
tcg_gen_op6(INDEX_op_insn_start,
(uint32_t)pc, (uint32_t)(pc >> 32),
(uint32_t)a1, (uint32_t)(a1 >> 32),
(uint32_t)a2, (uint32_t)(a2 >> 32));
}
# endif
#else
# error "Unhandled number of operands to insn_start"
#endif
@ -824,73 +803,203 @@ static inline void tcg_gen_plugin_cb_end(void)
#define tcg_temp_new() tcg_temp_new_i32()
#define tcg_global_mem_new tcg_global_mem_new_i32
#define tcg_temp_free tcg_temp_free_i32
#define tcgv_tl_temp tcgv_i32_temp
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
#else
#define tcg_temp_new() tcg_temp_new_i64()
#define tcg_global_mem_new tcg_global_mem_new_i64
#define tcg_temp_free tcg_temp_free_i64
#define tcgv_tl_temp tcgv_i64_temp
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
#endif
void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_ld_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
void tcg_gen_qemu_st_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
void tcg_gen_qemu_ld_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
void tcg_gen_qemu_st_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
void tcg_gen_qemu_ld_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
void tcg_gen_qemu_st_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
TCGArg, MemOp);
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
TCGArg, MemOp);
void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
TCGArg, MemOp);
static inline void
tcg_gen_qemu_ld_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
{
tcg_gen_qemu_ld_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
}
void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
TCGArg, MemOp);
void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
TCGArg, MemOp);
void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
TCGArg, MemOp);
static inline void
tcg_gen_qemu_st_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
{
tcg_gen_qemu_st_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
}
void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
static inline void
tcg_gen_qemu_ld_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
{
tcg_gen_qemu_ld_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
}
void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
static inline void
tcg_gen_qemu_st_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
{
tcg_gen_qemu_st_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
}
void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
static inline void
tcg_gen_qemu_ld_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
{
tcg_gen_qemu_ld_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
}
static inline void
tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
{
tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
}
void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
TCGv_i128, TCGArg, MemOp, TCGType);
void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
TCGv_i128, TCGArg, MemOp, TCGType);
void tcg_gen_atomic_xchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_xchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_add_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_add_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_and_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_and_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_or_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_or_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_xor_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_xor_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_smin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_smin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_umin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_umin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_smax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_smax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_umax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_fetch_umax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_add_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_add_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_and_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_and_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_or_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_or_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_xor_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_xor_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_smin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_smin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_umin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_umin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_smax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_smax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_umax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
TCGArg, MemOp, TCGType);
void tcg_gen_atomic_umax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
TCGArg, MemOp, TCGType);
#define DEF_ATOMIC2(N, S) \
static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S v, \
TCGArg i, MemOp m) \
{ N##_##S##_chk(r, tcgv_tl_temp(a), v, i, m, TCG_TYPE_TL); }
#define DEF_ATOMIC3(N, S) \
static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S o, \
TCGv_##S n, TCGArg i, MemOp m) \
{ N##_##S##_chk(r, tcgv_tl_temp(a), o, n, i, m, TCG_TYPE_TL); }
DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i32)
DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i64)
DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i128)
DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i32)
DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i64)
DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i128)
DEF_ATOMIC2(tcg_gen_atomic_xchg, i32)
DEF_ATOMIC2(tcg_gen_atomic_xchg, i64)
DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i32)
DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i64)
DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i32)
DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i64)
DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i32)
DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i64)
DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i32)
DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i64)
DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i32)
DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i64)
DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i32)
DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i64)
DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i32)
DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i64)
DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i32)
DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i64)
DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i32)
DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i64)
DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i32)
DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i64)
DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i32)
DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i64)
DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i32)
DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i64)
DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i32)
DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i64)
DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i32)
DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i64)
DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i32)
DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i64)
DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i32)
DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
#undef DEF_ATOMIC2
#undef DEF_ATOMIC3
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);

View File

@ -186,11 +186,10 @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
/* QEMU specific */
DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
DEF(insn_start, 0, 0, DATA64_ARGS * TARGET_INSN_START_WORDS,
TCG_OPF_NOT_PRESENT)
DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
@ -199,19 +198,46 @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
/* Replicate ld/st ops for 32 and 64-bit guest addresses. */
DEF(qemu_ld_a32_i32, 1, 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,
DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
/* Only used by i386 to cope with stupid register constraints. */
DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
/* Only for 64-bit hosts at the moment. */
DEF(qemu_ld_a32_i128, 2, 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
DEF(qemu_ld_a64_i128, 2, 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
DEF(qemu_st_a32_i128, 0, 3, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
DEF(qemu_st_a64_i128, 0, 3, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
/* Host vector support. */
@ -283,7 +309,6 @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
#endif
#undef TLADDR_ARGS
#undef DATA64_ARGS
#undef IMPL
#undef IMPL64

View File

@ -558,6 +558,13 @@ struct TCGContext {
int nb_temps;
int nb_indirects;
int nb_ops;
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
#ifdef CONFIG_SOFTMMU
int page_mask;
uint8_t page_bits;
uint8_t tlb_dyn_max_bits;
#endif
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
@ -629,7 +636,7 @@ struct TCGContext {
TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS];
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
uint64_t gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
/* Exit to translator on overflow. */
sigjmp_buf jmp_trans;
@ -771,24 +778,24 @@ static inline void tcg_set_insn_param(TCGOp *op, int arg, TCGArg v)
op->args[arg] = v;
}
static inline target_ulong tcg_get_insn_start_param(TCGOp *op, int arg)
static inline uint64_t tcg_get_insn_start_param(TCGOp *op, int arg)
{
#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
if (TCG_TARGET_REG_BITS == 64) {
return tcg_get_insn_param(op, arg);
#else
return tcg_get_insn_param(op, arg * 2) |
((uint64_t)tcg_get_insn_param(op, arg * 2 + 1) << 32);
#endif
} else {
return deposit64(tcg_get_insn_param(op, arg * 2), 32, 32,
tcg_get_insn_param(op, arg * 2 + 1));
}
}
static inline void tcg_set_insn_start_param(TCGOp *op, int arg, target_ulong v)
static inline void tcg_set_insn_start_param(TCGOp *op, int arg, uint64_t v)
{
#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
if (TCG_TARGET_REG_BITS == 64) {
tcg_set_insn_param(op, arg, v);
#else
} else {
tcg_set_insn_param(op, arg * 2, v);
tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
#endif
}
}
/* The last op that was emitted. */
@ -852,7 +859,7 @@ void tcg_register_thread(void);
void tcg_prologue_init(TCGContext *s);
void tcg_func_start(TCGContext *s);
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start);
void tb_target_set_jmp_target(const TranslationBlock *, int,
uintptr_t, uintptr_t);

View File

@ -2259,23 +2259,21 @@ config_host_data.set('HAVE_BROKEN_SIZE_MAX', not cc.compiles('''
return printf("%zu", SIZE_MAX);
}''', args: ['-Werror']))
atomic_test = '''
# See if 64-bit atomic operations are supported.
# Note that without __atomic builtins, we can only
# assume atomic loads/stores max at pointer size.
config_host_data.set('CONFIG_ATOMIC64', cc.links('''
#include <stdint.h>
int main(void)
{
@0@ x = 0, y = 0;
uint64_t x = 0, y = 0;
y = __atomic_load_n(&x, __ATOMIC_RELAXED);
__atomic_store_n(&x, y, __ATOMIC_RELAXED);
__atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
__atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
__atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
return 0;
}'''
# See if 64-bit atomic operations are supported.
# Note that without __atomic builtins, we can only
# assume atomic loads/stores max at pointer size.
config_host_data.set('CONFIG_ATOMIC64', cc.links(atomic_test.format('uint64_t')))
}'''))
has_int128 = cc.links('''
__int128_t a;
@ -2293,21 +2291,39 @@ if has_int128
# "do we have 128-bit atomics which are handled inline and specifically not
# via libatomic". The reason we can't use libatomic is documented in the
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
has_atomic128 = cc.links(atomic_test.format('unsigned __int128'))
# We only care about these operations on 16-byte aligned pointers, so
# force 16-byte alignment of the pointer, which may be greater than
# __alignof(unsigned __int128) for the host.
atomic_test_128 = '''
int main(int ac, char **av) {
unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16));
p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
__atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
__atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
return 0;
}'''
has_atomic128 = cc.links(atomic_test_128)
config_host_data.set('CONFIG_ATOMIC128', has_atomic128)
if not has_atomic128
has_cmpxchg128 = cc.links('''
# Even with __builtin_assume_aligned, the above test may have failed
# without optimization enabled. Try again with optimizations locally
# enabled for the function. See
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
has_atomic128_opt = cc.links('__attribute__((optimize("O1")))' + atomic_test_128)
config_host_data.set('CONFIG_ATOMIC128_OPT', has_atomic128_opt)
if not has_atomic128_opt
config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
int main(void)
{
unsigned __int128 x = 0, y = 0;
__sync_val_compare_and_swap_16(&x, y, x);
return 0;
}
''')
config_host_data.set('CONFIG_CMPXCHG128', has_cmpxchg128)
'''))
endif
endif
endif

View File

@ -13,6 +13,12 @@
#include "../tcg-ldst.c.inc"
#include "../tcg-pool.c.inc"
#include "qemu/bitops.h"
#ifdef __linux__
#include <asm/hwcap.h>
#endif
#ifdef CONFIG_DARWIN
#include <sys/sysctl.h>
#endif
/* We're going to re-use TCGType in setting of the SF bit, which controls
the size of the operation performed. If we know the values match, it
@ -71,15 +77,13 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
return TCG_REG_X0 + slot;
}
bool have_lse;
bool have_lse2;
#define TCG_REG_TMP TCG_REG_X30
#define TCG_VEC_TMP TCG_REG_V31
#ifndef CONFIG_SOFTMMU
/* Note that XZR cannot be encoded in the address base register slot,
as that actaully encodes SP. So if we need to zero-extend the guest
address, via the address index register slot, we need to load even
a zero guest base into a register. */
#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
#define TCG_REG_GUEST_BASE TCG_REG_X28
#endif
@ -1584,41 +1588,13 @@ typedef struct {
TCGReg base;
TCGReg index;
TCGType index_ext;
TCGAtomAlign aa;
} HostAddress;
#ifdef CONFIG_SOFTMMU
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* MemOpIdx oi, uintptr_t ra)
*/
static void * const qemu_ld_helpers[MO_SIZE + 1] = {
[MO_8] = helper_ret_ldub_mmu,
#if HOST_BIG_ENDIAN
[MO_16] = helper_be_lduw_mmu,
[MO_32] = helper_be_ldul_mmu,
[MO_64] = helper_be_ldq_mmu,
#else
[MO_16] = helper_le_lduw_mmu,
[MO_32] = helper_le_ldul_mmu,
[MO_64] = helper_le_ldq_mmu,
#endif
};
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
* uintxx_t val, MemOpIdx oi,
* uintptr_t ra)
*/
static void * const qemu_st_helpers[MO_SIZE + 1] = {
[MO_8] = helper_ret_stb_mmu,
#if HOST_BIG_ENDIAN
[MO_16] = helper_be_stw_mmu,
[MO_32] = helper_be_stl_mmu,
[MO_64] = helper_be_stq_mmu,
#else
[MO_16] = helper_le_stw_mmu,
[MO_32] = helper_le_stl_mmu,
[MO_64] = helper_le_stq_mmu,
#endif
};
bool tcg_target_has_memory_bswap(MemOp memop)
{
return false;
}
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 1, .tmp = { TCG_REG_TMP }
@ -1652,40 +1628,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_goto(s, lb->raddr);
return true;
}
#else
static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
{
ptrdiff_t offset = tcg_pcrel_diff(s, target);
tcg_debug_assert(offset == sextract64(offset, 0, 21));
tcg_out_insn(s, 3406, ADR, rd, offset);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
/* "Tail call" to the helper, with the return address back inline. */
tcg_out_adr(s, TCG_REG_LR, l->raddr);
tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
: helper_unaligned_st));
return true;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* CONFIG_SOFTMMU */
/*
* For softmmu, perform the TLB load and compare.
@ -1697,11 +1639,16 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addr_reg, MemOpIdx oi,
bool is_ld)
{
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
TCGType addr_type = s->addr_type;
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned a_mask = (1u << a_bits) - 1;
unsigned a_mask;
h->aa = atom_and_align_for_opc(s, opc,
have_lse2 ? MO_ATOM_WITHIN16
: MO_ATOM_IFALIGN,
false);
a_mask = (1 << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
@ -1716,7 +1663,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
? TCG_TYPE_I64 : TCG_TYPE_I32);
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
@ -1730,13 +1677,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Extract the TLB index from the address into X0. */
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
TCG_REG_X0, TCG_REG_X0, addr_reg,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
s->page_bits - CPU_TLB_ENTRY_BITS);
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
/* Load the tlb comparator into X0, and the fast path addend into X1. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
@ -1747,31 +1694,28 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* bits within the address. For unaligned access, we check that we don't
* cross pages using the address of the last byte of the access.
*/
if (a_bits >= s_bits) {
if (a_mask >= s_mask) {
x3 = addr_reg;
} else {
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
tcg_out_insn(s, 3401, ADDI, addr_type,
TCG_REG_X3, addr_reg, s_mask - a_mask);
x3 = TCG_REG_X3;
}
compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
compare_mask = (uint64_t)s->page_mask | a_mask;
/* Store the page mask part of the address into X3. */
tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
TCG_REG_X3, x3, compare_mask);
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
/* Perform the address comparison. */
tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
/* If not equal, we jump to the slow path. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
*h = (HostAddress){
.base = TCG_REG_X1,
.index = addr_reg,
.index_ext = addr_type
};
h->base = TCG_REG_X1,
h->index = addr_reg;
h->index_ext = addr_type;
#else
if (a_mask) {
ldst = new_ldst_label(s);
@ -1788,18 +1732,14 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
}
if (USE_GUEST_BASE) {
*h = (HostAddress){
.base = TCG_REG_GUEST_BASE,
.index = addr_reg,
.index_ext = addr_type
};
if (guest_base || addr_type == TCG_TYPE_I32) {
h->base = TCG_REG_GUEST_BASE;
h->index = addr_reg;
h->index_ext = addr_type;
} else {
*h = (HostAddress){
.base = addr_reg,
.index = TCG_REG_XZR,
.index_ext = TCG_TYPE_I64
};
h->base = addr_reg;
h->index = TCG_REG_XZR;
h->index_ext = TCG_TYPE_I64;
}
#endif
@ -2218,12 +2158,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
tcg_out_qemu_ld(s, a0, a1, a2, ext);
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
break;
@ -2860,11 +2804,15 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_movcond_i64:
return C_O1_I4(r, r, rA, rZ, rZ);
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
return C_O1_I1(r, l);
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(lZ, l);
case INDEX_op_deposit_i32:
@ -2930,8 +2878,39 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
}
}
#ifdef CONFIG_DARWIN
static bool sysctl_for_bool(const char *name)
{
int val = 0;
size_t len = sizeof(val);
if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
return val != 0;
}
/*
* We might in the future ask for properties not present in older kernels,
* but we're only asking about static properties, all of which should be
* 'int'. So we shouln't see ENOMEM (val too small), or any of the other
* more exotic errors.
*/
assert(errno == ENOENT);
return false;
}
#endif
static void tcg_target_init(TCGContext *s)
{
#ifdef __linux__
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
have_lse = hwcap & HWCAP_ATOMICS;
have_lse2 = hwcap & HWCAP_USCAT;
#endif
#ifdef CONFIG_DARWIN
have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
#endif
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
@ -3008,10 +2987,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
CPU_TEMP_BUF_NLONGS * sizeof(long));
#if !defined(CONFIG_SOFTMMU)
if (USE_GUEST_BASE) {
/*
* Note that XZR cannot be encoded in the address base register slot,
* as that actaully encodes SP. Depending on the guest, we may need
* to zero-extend the guest address via the address index register slot,
* therefore we need to load even a zero guest base into a register.
*/
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
}
#endif
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);

View File

@ -57,6 +57,9 @@ typedef enum {
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
extern bool have_lse;
extern bool have_lse2;
/* optional instructions */
#define TCG_TARGET_HAS_div_i32 1
#define TCG_TARGET_HAS_rem_i32 1
@ -126,6 +129,8 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_v64 1
#define TCG_TARGET_HAS_v128 1
#define TCG_TARGET_HAS_v256 0
@ -151,7 +156,6 @@ typedef enum {
#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 0
#define TCG_TARGET_NEED_LDST_LABELS
#define TCG_TARGET_NEED_POOL_LABELS

View File

@ -12,19 +12,19 @@
C_O0_I1(r)
C_O0_I2(r, r)
C_O0_I2(r, rIN)
C_O0_I2(s, s)
C_O0_I2(q, q)
C_O0_I2(w, r)
C_O0_I3(s, s, s)
C_O0_I3(S, p, s)
C_O0_I3(q, q, q)
C_O0_I3(Q, p, q)
C_O0_I4(r, r, rI, rI)
C_O0_I4(S, p, s, s)
C_O1_I1(r, l)
C_O0_I4(Q, p, q, q)
C_O1_I1(r, q)
C_O1_I1(r, r)
C_O1_I1(w, r)
C_O1_I1(w, w)
C_O1_I1(w, wr)
C_O1_I2(r, 0, rZ)
C_O1_I2(r, l, l)
C_O1_I2(r, q, q)
C_O1_I2(r, r, r)
C_O1_I2(r, r, rI)
C_O1_I2(r, r, rIK)
@ -39,8 +39,8 @@ C_O1_I2(w, w, wZ)
C_O1_I3(w, w, w, w)
C_O1_I4(r, r, r, rI, rI)
C_O1_I4(r, r, rIN, rIK, 0)
C_O2_I1(e, p, l)
C_O2_I2(e, p, l, l)
C_O2_I1(e, p, q)
C_O2_I2(e, p, q, q)
C_O2_I2(r, r, r, r)
C_O2_I4(r, r, r, r, rIN, rIK)
C_O2_I4(r, r, rI, rI, rIN, rIK)

View File

@ -10,9 +10,8 @@
*/
REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
REGS('r', ALL_GENERAL_REGS)
REGS('l', ALL_QLOAD_REGS)
REGS('s', ALL_QSTORE_REGS)
REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */
REGS('q', ALL_QLDST_REGS)
REGS('Q', ALL_QLDST_REGS & 0x5555) /* even qldst */
REGS('w', ALL_VECTOR_REGS)
/*

View File

@ -353,23 +353,16 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
#define ALL_VECTOR_REGS 0xffff0000u
/*
* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
* and r0-r1 doing the byte swapping, so don't use these.
* r3 is removed for softmmu to avoid clashes with helper arguments.
* r0-r3 will be overwritten when reading the tlb entry (softmmu only);
* r14 will be overwritten by the BLNE branching to the slow path.
*/
#ifdef CONFIG_SOFTMMU
#define ALL_QLOAD_REGS \
#define ALL_QLDST_REGS \
(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
(1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
(1 << TCG_REG_R14)))
#define ALL_QSTORE_REGS \
(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
(1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
#else
#define ALL_QLOAD_REGS ALL_GENERAL_REGS
#define ALL_QSTORE_REGS \
(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R14))
#endif
/*
@ -1330,45 +1323,13 @@ typedef struct {
TCGReg base;
int index;
bool index_scratch;
TCGAtomAlign aa;
} HostAddress;
#ifdef CONFIG_SOFTMMU
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* int mmu_idx, uintptr_t ra)
*/
static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
[MO_UB] = helper_ret_ldub_mmu,
[MO_SB] = helper_ret_ldsb_mmu,
#if HOST_BIG_ENDIAN
[MO_UW] = helper_be_lduw_mmu,
[MO_UL] = helper_be_ldul_mmu,
[MO_UQ] = helper_be_ldq_mmu,
[MO_SW] = helper_be_ldsw_mmu,
[MO_SL] = helper_be_ldul_mmu,
#else
[MO_UW] = helper_le_lduw_mmu,
[MO_UL] = helper_le_ldul_mmu,
[MO_UQ] = helper_le_ldq_mmu,
[MO_SW] = helper_le_ldsw_mmu,
[MO_SL] = helper_le_ldul_mmu,
#endif
};
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
* uintxx_t val, int mmu_idx, uintptr_t ra)
*/
static void * const qemu_st_helpers[MO_SIZE + 1] = {
[MO_8] = helper_ret_stb_mmu,
#if HOST_BIG_ENDIAN
[MO_16] = helper_be_stw_mmu,
[MO_32] = helper_be_stl_mmu,
[MO_64] = helper_be_stq_mmu,
#else
[MO_16] = helper_le_stw_mmu,
[MO_32] = helper_le_stl_mmu,
[MO_64] = helper_le_stq_mmu,
#endif
};
bool tcg_target_has_memory_bswap(MemOp memop)
{
return false;
}
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
{
@ -1412,50 +1373,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
return true;
}
#else
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
if (TARGET_LONG_BITS == 64) {
/* 64-bit target address is aligned into R2:R3. */
TCGMovExtend ext[2] = {
{ .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
.src = l->addrlo_reg,
.src_type = TCG_TYPE_I32, .src_ext = MO_UL },
{ .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
.src = l->addrhi_reg,
.src_type = TCG_TYPE_I32, .src_ext = MO_UL },
};
tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
} else {
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
}
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
/*
* Tail call to the helper, with the return address back inline,
* just for the clarity of the debugging traceback -- the helper
* cannot return. We have used BLNE to arrive here, so LR is
* already set.
*/
tcg_out_goto(s, COND_AL, (const void *)
(l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
return true;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* SOFTMMU */
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addrlo, TCGReg addrhi,
@ -1463,8 +1380,26 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
MemOp a_bits = get_alignment_bits(opc);
unsigned a_mask = (1 << a_bits) - 1;
unsigned a_mask;
#ifdef CONFIG_SOFTMMU
*h = (HostAddress){
.cond = COND_AL,
.base = addrlo,
.index = TCG_REG_R1,
.index_scratch = true,
};
#else
*h = (HostAddress){
.cond = COND_AL,
.base = addrlo,
.index = guest_base ? TCG_REG_GUEST_BASE : -1,
.index_scratch = false,
};
#endif
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
a_mask = (1 << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
int mem_index = get_mmuidx(oi);
@ -1489,25 +1424,25 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Extract the tlb index from the address into R0. */
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
/*
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
* Load the tlb comparator into R2/R3 and the fast path addend into R1.
*/
if (cmp_off == 0) {
if (TARGET_LONG_BITS == 64) {
tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
} else {
if (s->addr_type == TCG_TYPE_I32) {
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
} else {
tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
}
} else {
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
if (TARGET_LONG_BITS == 64) {
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
} else {
if (s->addr_type == TCG_TYPE_I32) {
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
} else {
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
}
}
@ -1533,8 +1468,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
addrlo, s_mask - a_mask);
}
if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
if (use_armv7_instructions && s->page_bits <= 16) {
tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
t_addr, TCG_REG_TMP, 0);
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
@ -1544,22 +1479,15 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
}
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
SHIFT_IMM_LSR(TARGET_PAGE_BITS));
SHIFT_IMM_LSR(s->page_bits));
tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
0, TCG_REG_R2, TCG_REG_TMP,
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
SHIFT_IMM_LSL(s->page_bits));
}
if (TARGET_LONG_BITS == 64) {
if (s->addr_type != TCG_TYPE_I32) {
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
}
*h = (HostAddress){
.cond = COND_AL,
.base = addrlo,
.index = TCG_REG_R1,
.index_scratch = true,
};
#else
if (a_mask) {
ldst = new_ldst_label(s);
@ -1568,18 +1496,11 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
/* We are expecting a_bits to max out at 7 */
/* We are expecting alignment to max out at 7 */
tcg_debug_assert(a_mask <= 0xff);
/* tst addr, #mask */
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
}
*h = (HostAddress){
.cond = COND_AL,
.base = addrlo,
.index = guest_base ? TCG_REG_GUEST_BASE : -1,
.index_scratch = false,
};
#endif
return ldst;
@ -2064,41 +1985,36 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
ARITH_MOV, args[0], 0, 0);
break;
case INDEX_op_qemu_ld_i32:
if (TARGET_LONG_BITS == 32) {
tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
args[2], TCG_TYPE_I32);
} else {
case INDEX_op_qemu_ld_a32_i32:
tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
break;
case INDEX_op_qemu_ld_a64_i32:
tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
args[3], TCG_TYPE_I32);
}
break;
case INDEX_op_qemu_ld_i64:
if (TARGET_LONG_BITS == 32) {
case INDEX_op_qemu_ld_a32_i64:
tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
args[3], TCG_TYPE_I64);
} else {
break;
case INDEX_op_qemu_ld_a64_i64:
tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_i32:
if (TARGET_LONG_BITS == 32) {
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
args[2], TCG_TYPE_I32);
} else {
case INDEX_op_qemu_st_a32_i32:
tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
break;
case INDEX_op_qemu_st_a64_i32:
tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
args[3], TCG_TYPE_I32);
}
break;
case INDEX_op_qemu_st_i64:
if (TARGET_LONG_BITS == 32) {
case INDEX_op_qemu_st_a32_i64:
tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
args[3], TCG_TYPE_I64);
} else {
break;
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_bswap16_i32:
@ -2239,14 +2155,22 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_setcond2_i32:
return C_O1_I4(r, r, r, rI, rI);
case INDEX_op_qemu_ld_i32:
return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
case INDEX_op_qemu_ld_i64:
return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
case INDEX_op_qemu_st_i32:
return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
case INDEX_op_qemu_st_i64:
return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
case INDEX_op_qemu_ld_a32_i32:
return C_O1_I1(r, q);
case INDEX_op_qemu_ld_a64_i32:
return C_O1_I2(r, q, q);
case INDEX_op_qemu_ld_a32_i64:
return C_O2_I1(e, p, q);
case INDEX_op_qemu_ld_a64_i64:
return C_O2_I2(e, p, q, q);
case INDEX_op_qemu_st_a32_i32:
return C_O0_I2(q, q);
case INDEX_op_qemu_st_a64_i32:
return C_O0_I3(q, q, q);
case INDEX_op_qemu_st_a32_i64:
return C_O0_I3(Q, p, q);
case INDEX_op_qemu_st_a64_i64:
return C_O0_I4(Q, p, q, q);
case INDEX_op_st_vec:
return C_O0_I2(w, r);

View File

@ -125,6 +125,8 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_rem_i32 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_v64 use_neon_instructions
#define TCG_TARGET_HAS_v128 use_neon_instructions
#define TCG_TARGET_HAS_v256 0
@ -150,7 +152,6 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 0
#define TCG_TARGET_NEED_LDST_LABELS
#define TCG_TARGET_NEED_POOL_LABELS

View File

@ -185,6 +185,7 @@ bool have_avx512dq;
bool have_avx512vbmi2;
bool have_avx512vl;
bool have_movbe;
bool have_atomic16;
#ifdef CONFIG_CPUID_H
static bool have_bmi2;
@ -1091,7 +1092,7 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
{
/* This function is only used for passing structs by reference. */
tcg_debug_assert(imm == (int32_t)imm);
tcg_out_modrm_offset(s, OPC_LEA, rd, rs, imm);
tcg_out_modrm_offset(s, OPC_LEA | P_REXW, rd, rs, imm);
}
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
@ -1314,8 +1315,10 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
{
if (dest != src) {
tcg_out_ext32u(s, dest, src);
}
}
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
{
@ -1773,34 +1776,13 @@ typedef struct {
int index;
int ofs;
int seg;
TCGAtomAlign aa;
} HostAddress;
#if defined(CONFIG_SOFTMMU)
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* int mmu_idx, uintptr_t ra)
*/
static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_UB] = helper_ret_ldub_mmu,
[MO_LEUW] = helper_le_lduw_mmu,
[MO_LEUL] = helper_le_ldul_mmu,
[MO_LEUQ] = helper_le_ldq_mmu,
[MO_BEUW] = helper_be_lduw_mmu,
[MO_BEUL] = helper_be_ldul_mmu,
[MO_BEUQ] = helper_be_ldq_mmu,
};
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
* uintxx_t val, int mmu_idx, uintptr_t ra)
*/
static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_UB] = helper_ret_stb_mmu,
[MO_LEUW] = helper_le_stw_mmu,
[MO_LEUL] = helper_le_stl_mmu,
[MO_LEUQ] = helper_le_stq_mmu,
[MO_BEUW] = helper_be_stw_mmu,
[MO_BEUL] = helper_be_stl_mmu,
[MO_BEUQ] = helper_be_stq_mmu,
};
bool tcg_target_has_memory_bswap(MemOp memop)
{
return have_movbe;
}
/*
* Because i686 has no register parameters and because x86_64 has xchg
@ -1837,12 +1819,12 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
/* resolve label address */
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
if (label_ptr[1]) {
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
}
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_branch(s, 1, qemu_ld_helpers[opc & MO_SIZE]);
tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
tcg_out_jmp(s, l->raddr);
@ -1859,61 +1841,18 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
/* resolve label address */
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
if (label_ptr[1]) {
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
}
tcg_out_st_helper_args(s, l, &ldst_helper_param);
tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_branch(s, 1, qemu_st_helpers[opc & MO_SIZE]);
tcg_out_jmp(s, l->raddr);
return true;
}
#else
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
tcg_patch32(l->label_ptr[0], s->code_ptr - l->label_ptr[0] - 4);
if (TCG_TARGET_REG_BITS == 32) {
int ofs = 0;
tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
ofs += 4;
tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
ofs += 4;
if (TARGET_LONG_BITS == 64) {
tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
ofs += 4;
}
tcg_out_pushi(s, (uintptr_t)l->raddr);
} else {
tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
l->addrlo_reg);
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, (uintptr_t)l->raddr);
tcg_out_push(s, TCG_REG_RAX);
}
/* "Tail call" to the helper, with the return address back inline. */
tcg_out_jmp(s, (const void *)(l->is_ld ? helper_unaligned_ld
: helper_unaligned_st));
return true;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#ifndef CONFIG_SOFTMMU
static HostAddress x86_guest_base = {
.index = -1
};
@ -1945,7 +1884,7 @@ static inline int setup_guest_base_seg(void)
return 0;
}
#endif /* setup_guest_base_seg */
#endif /* SOFTMMU */
#endif /* !SOFTMMU */
/*
* For softmmu, perform the TLB load and compare.
@ -1959,8 +1898,18 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned a_mask = (1 << a_bits) - 1;
unsigned a_mask;
#ifdef CONFIG_SOFTMMU
h->index = TCG_REG_L0;
h->ofs = 0;
h->seg = 0;
#else
*h = x86_guest_base;
#endif
h->base = addrlo;
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
a_mask = (1 << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
@ -1971,7 +1920,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
unsigned mem_index = get_mmuidx(oi);
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1 << s_bits) - 1;
target_ulong tlb_mask;
int tlb_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
@ -1980,13 +1929,11 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->addrhi_reg = addrhi;
if (TCG_TARGET_REG_BITS == 64) {
if (TARGET_LONG_BITS == 64) {
ttype = TCG_TYPE_I64;
trexw = P_REXW;
}
ttype = s->addr_type;
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
hrexw = P_REXW;
if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
if (s->page_bits + s->tlb_dyn_max_bits > 32) {
tlbtype = TCG_TYPE_I64;
tlbrexw = P_REXW;
}
@ -1995,7 +1942,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
s->page_bits - CPU_TLB_ENTRY_BITS);
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index) +
@ -2010,13 +1957,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* copy the address and mask. For lesser alignments, check that we don't
* cross pages for the complete access.
*/
if (a_bits >= s_bits) {
if (a_mask >= s_mask) {
tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
} else {
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
addrlo, s_mask - a_mask);
}
tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
tlb_mask = s->page_mask | a_mask;
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
@ -2028,7 +1975,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->label_ptr[0] = s->code_ptr;
s->code_ptr += 4;
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
/* cmp 4(TCG_REG_L0), addrhi */
tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
@ -2041,13 +1988,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* TLB Hit. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
offsetof(CPUTLBEntry, addend));
*h = (HostAddress) {
.base = addrlo,
.index = TCG_REG_L0,
};
#else
if (a_bits) {
if (a_mask) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
@ -2061,9 +2003,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->label_ptr[0] = s->code_ptr;
s->code_ptr += 4;
}
*h = x86_guest_base;
h->base = addrlo;
#endif
return ldst;
@ -2536,35 +2475,51 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
break;
case INDEX_op_qemu_ld_i32:
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
} else {
case INDEX_op_qemu_ld_a64_i32:
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
}
break;
case INDEX_op_qemu_ld_i64:
}
/* fall through */
case INDEX_op_qemu_ld_a32_i32:
tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_ld_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
} else if (TARGET_LONG_BITS == 32) {
} else {
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_ld_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
} else {
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st8_i32:
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
} else {
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st8_a64_i32:
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
}
break;
case INDEX_op_qemu_st_i64:
}
/* fall through */
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st8_a32_i32:
tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_st_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
} else if (TARGET_LONG_BITS == 32) {
} else {
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
} else {
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
}
@ -3242,26 +3197,29 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_clz_i64:
return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
case INDEX_op_qemu_ld_i32:
return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
? C_O1_I1(r, L) : C_O1_I2(r, L, L));
case INDEX_op_qemu_ld_a32_i32:
return C_O1_I1(r, L);
case INDEX_op_qemu_ld_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L);
case INDEX_op_qemu_st_i32:
return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
? C_O0_I2(L, L) : C_O0_I3(L, L, L));
case INDEX_op_qemu_st8_i32:
return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
? C_O0_I2(s, L) : C_O0_I3(s, L, L));
case INDEX_op_qemu_st_a32_i32:
return C_O0_I2(L, L);
case INDEX_op_qemu_st_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
case INDEX_op_qemu_st8_a32_i32:
return C_O0_I2(s, L);
case INDEX_op_qemu_st8_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L);
case INDEX_op_qemu_ld_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
: C_O2_I2(r, r, L, L));
case INDEX_op_qemu_ld_a32_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L);
case INDEX_op_qemu_ld_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L);
case INDEX_op_qemu_st_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L)
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
: C_O0_I4(L, L, L, L));
case INDEX_op_qemu_st_a32_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
case INDEX_op_qemu_st_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
case INDEX_op_brcond2_i32:
return C_O0_I4(r, r, ri, ri);
@ -4052,6 +4010,32 @@ static void tcg_target_init(TCGContext *s)
have_avx512dq = (b7 & bit_AVX512DQ) != 0;
have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
}
/*
* The Intel SDM has added:
* Processors that enumerate support for Intel® AVX
* (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
* guarantee that the 16-byte memory operations performed
* by the following instructions will always be carried
* out atomically:
* - MOVAPD, MOVAPS, and MOVDQA.
* - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
* - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
* with EVEX.128 and k0 (masking disabled).
* Note that these instructions require the linear addresses
* of their memory operands to be 16-byte aligned.
*
* AMD has provided an even stronger guarantee that processors
* with AVX provide 16-byte atomicity for all cachable,
* naturally aligned single loads and stores, e.g. MOVDQU.
*
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
*/
if (have_avx1) {
__cpuid(0, a, b, c, d);
have_atomic16 = (c == signature_INTEL_ecx ||
c == signature_AMD_ecx);
}
}
}
}

View File

@ -120,6 +120,7 @@ extern bool have_avx512dq;
extern bool have_avx512vbmi2;
extern bool have_avx512vl;
extern bool have_movbe;
extern bool have_atomic16;
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@ -153,9 +154,9 @@ extern bool have_movbe;
#define TCG_TARGET_HAS_mulsh_i32 0
#if TCG_TARGET_REG_BITS == 64
/* Keep target addresses zero-extended in a register. */
#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32)
#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32)
/* Keep 32-bit values zero-extended in a register. */
#define TCG_TARGET_HAS_extrl_i64_i32 1
#define TCG_TARGET_HAS_extrh_i64_i32 1
#define TCG_TARGET_HAS_div2_i64 1
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_ext8s_i64 1
@ -193,6 +194,8 @@ extern bool have_movbe;
#define TCG_TARGET_HAS_qemu_st8_i32 1
#endif
#define TCG_TARGET_HAS_qemu_ldst_i128 0
/* We do not support older SSE systems, only beginning with AVX1. */
#define TCG_TARGET_HAS_v64 have_avx1
#define TCG_TARGET_HAS_v128 have_avx1
@ -239,9 +242,6 @@ extern bool have_movbe;
#include "tcg/tcg-mo.h"
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
#define TCG_TARGET_NEED_LDST_LABELS
#define TCG_TARGET_NEED_POOL_LABELS

View File

@ -30,6 +30,7 @@
*/
#include "../tcg-ldst.c.inc"
#include <asm/hwcap.h>
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
@ -783,30 +784,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
* Load/store helpers for SoftMMU, and qemu_ld/st implementations
*/
#if defined(CONFIG_SOFTMMU)
/*
* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* MemOpIdx oi, uintptr_t ra)
*/
static void * const qemu_ld_helpers[4] = {
[MO_8] = helper_ret_ldub_mmu,
[MO_16] = helper_le_lduw_mmu,
[MO_32] = helper_le_ldul_mmu,
[MO_64] = helper_le_ldq_mmu,
};
/*
* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
* uintxx_t val, MemOpIdx oi,
* uintptr_t ra)
*/
static void * const qemu_st_helpers[4] = {
[MO_8] = helper_ret_stb_mmu,
[MO_16] = helper_le_stw_mmu,
[MO_32] = helper_le_stl_mmu,
[MO_64] = helper_le_stq_mmu,
};
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
{
tcg_out_opc_b(s, 0);
@ -845,41 +822,18 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
return tcg_out_goto(s, l->raddr);
}
#else
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
/* tail call, with the return address back inline. */
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
: helper_unaligned_st), true);
return true;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* CONFIG_SOFTMMU */
typedef struct {
TCGReg base;
TCGReg index;
TCGAtomAlign aa;
} HostAddress;
bool tcg_target_has_memory_bswap(MemOp memop)
{
return false;
}
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -890,9 +844,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addr_reg, MemOpIdx oi,
bool is_ld)
{
TCGType addr_type = s->addr_type;
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
MemOp a_bits;
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
a_bits = h->aa.align;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
@ -900,7 +858,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
tcg_target_long compare_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
@ -913,25 +870,31 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
s->page_bits - CPU_TLB_ENTRY_BITS);
tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
/* Load the tlb comparator and the addend. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
offsetof(CPUTLBEntry, addend));
/* We don't support unaligned accesses. */
/*
* For aligned accesses, we check the first byte and include the alignment
* bits within the address. For unaligned access, we check that we don't
* cross pages using the address of the last byte of the access.
*/
if (a_bits < s_bits) {
a_bits = s_bits;
unsigned a_mask = (1u << a_bits) - 1;
unsigned s_mask = (1u << s_bits) - 1;
tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
} else {
tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
}
/* Clear the non-page, non-alignment bits from the address. */
compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
a_bits, s->page_bits - 1);
/* Compare masked address with the TLB entry. */
ldst->label_ptr[0] = s->code_ptr;
@ -961,7 +924,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
#endif
if (TARGET_LONG_BITS == 32) {
if (addr_type == TCG_TYPE_I32) {
h->base = TCG_REG_TMP0;
tcg_out_ext32u(s, h->base, addr_reg);
} else {
@ -1481,16 +1444,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
break;
@ -1530,8 +1497,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_st32_i64:
case INDEX_op_st_i32:
case INDEX_op_st_i64:
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(rZ, r);
case INDEX_op_brcond_i32:
@ -1573,8 +1542,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_ld32u_i64:
case INDEX_op_ld_i32:
case INDEX_op_ld_i64:
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
return C_O1_I1(r, r);
case INDEX_op_andc_i32:
@ -1727,6 +1698,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
static void tcg_target_init(TCGContext *s)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
/* Server and desktop class cpus have UAL; embedded cpus do not. */
if (!(hwcap & HWCAP_LOONGARCH_UAL)) {
error_report("TCG: unaligned access support required; exiting");
exit(EXIT_FAILURE);
}
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;

View File

@ -168,11 +168,10 @@ typedef enum {
#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_NEED_LDST_LABELS
#define TCG_TARGET_HAS_MEMORY_BSWAP 0
#endif /* LOONGARCH_TCG_TARGET_H */

View File

@ -6,6 +6,7 @@ tcg_ss.add(files(
'tcg.c',
'tcg-common.c',
'tcg-op.c',
'tcg-op-ldst.c',
'tcg-op-gvec.c',
'tcg-op-vec.c',
))

View File

@ -354,10 +354,6 @@ typedef enum {
/* Aliases for convenience. */
ALIAS_PADD = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
? OPC_SRL : OPC_DSRL,
ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
? OPC_ADDIU : OPC_DADDIU,
} MIPSInsn;
/*
@ -1075,38 +1071,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
tcg_out_nop(s);
}
#if defined(CONFIG_SOFTMMU)
static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
[MO_UB] = helper_ret_ldub_mmu,
[MO_SB] = helper_ret_ldsb_mmu,
#if HOST_BIG_ENDIAN
[MO_UW] = helper_be_lduw_mmu,
[MO_SW] = helper_be_ldsw_mmu,
[MO_UL] = helper_be_ldul_mmu,
[MO_SL] = helper_be_ldsl_mmu,
[MO_UQ] = helper_be_ldq_mmu,
#else
[MO_UW] = helper_le_lduw_mmu,
[MO_SW] = helper_le_ldsw_mmu,
[MO_UL] = helper_le_ldul_mmu,
[MO_UQ] = helper_le_ldq_mmu,
[MO_SL] = helper_le_ldsl_mmu,
#endif
};
static void * const qemu_st_helpers[MO_SIZE + 1] = {
[MO_UB] = helper_ret_stb_mmu,
#if HOST_BIG_ENDIAN
[MO_UW] = helper_be_stw_mmu,
[MO_UL] = helper_be_stl_mmu,
[MO_UQ] = helper_be_stq_mmu,
#else
[MO_UW] = helper_le_stw_mmu,
[MO_UL] = helper_le_stl_mmu,
[MO_UQ] = helper_le_stq_mmu,
#endif
};
/* We have four temps, we might as well expose three of them. */
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
@ -1119,8 +1083,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
/* resolve label address */
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
|| (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
&& !reloc_pc16(l->label_ptr[1], tgt_rx))) {
|| (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
return false;
}
@ -1149,8 +1112,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
/* resolve label address */
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
|| (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
&& !reloc_pc16(l->label_ptr[1], tgt_rx))) {
|| (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
return false;
}
@ -1170,61 +1132,16 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
return true;
}
#else
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
void *target;
if (!reloc_pc16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
/* A0 is env, A1 is skipped, A2:A3 is the uint64_t address. */
TCGReg a2 = MIPS_BE ? l->addrhi_reg : l->addrlo_reg;
TCGReg a3 = MIPS_BE ? l->addrlo_reg : l->addrhi_reg;
if (a3 != TCG_REG_A2) {
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
} else if (a2 != TCG_REG_A3) {
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
} else {
tcg_out_mov(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A2);
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_A3);
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, TCG_TMP0);
}
} else {
tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
}
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
/*
* Tail call to the helper, with the return address back inline.
* We have arrived here via BNEL, so $31 is already set.
*/
target = (l->is_ld ? helper_unaligned_ld : helper_unaligned_st);
tcg_out_call_int(s, target, true);
return true;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* SOFTMMU */
typedef struct {
TCGReg base;
MemOp align;
TCGAtomAlign aa;
} HostAddress;
bool tcg_target_has_memory_bswap(MemOp memop)
{
return false;
}
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -1235,13 +1152,18 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, bool is_ld)
{
TCGType addr_type = s->addr_type;
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
MemOp a_bits;
unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1 << a_bits) - 1;
unsigned a_mask;
TCGReg base;
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
a_bits = h->aa.align;
a_mask = (1 << a_bits) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_mask = (1 << s_bits) - 1;
int mem_index = get_mmuidx(oi);
@ -1265,23 +1187,26 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
/* Extract the TLB index from the address into TMP3. */
tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
s->page_bits - CPU_TLB_ENTRY_BITS);
} else {
tcg_out_dsrl(s, TCG_TMP3, addrlo,
s->page_bits - CPU_TLB_ENTRY_BITS);
}
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
/* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
/* Load the (low-half) tlb comparator. */
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
} else {
tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
}
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
/* Load the tlb comparator. */
tcg_out_ld(s, addr_type, TCG_TMP0, TCG_TMP3, cmp_off);
/* Load the tlb addend for the fast path. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
} else {
/* Load the low half of the tlb comparator. */
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
}
/*
@ -1289,16 +1214,20 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* For unaligned accesses, compare against the end of the access to
* verify that it does not cross a page boundary.
*/
tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
if (a_mask < s_mask) {
tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
} else {
tcg_out_opc_imm(s, OPC_DADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
}
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
} else {
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
}
/* Zero extend a 32-bit guest address for a 64-bit host. */
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
tcg_out_ext32u(s, TCG_TMP2, addrlo);
addrlo = TCG_TMP2;
}
@ -1307,7 +1236,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
/* Load and test the high half tlb comparator. */
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
/* delay slot */
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
@ -1344,7 +1273,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
}
base = addrlo;
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
tcg_out_ext32u(s, TCG_REG_A0, base);
base = TCG_REG_A0;
}
@ -1360,7 +1289,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
#endif
h->base = base;
h->align = a_bits;
return ldst;
}
@ -1473,7 +1401,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
} else {
tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
@ -1560,7 +1488,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
} else {
tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
@ -2030,34 +1958,49 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
break;
case INDEX_op_qemu_ld_i32:
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
} else {
case INDEX_op_qemu_ld_a64_i32:
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_qemu_ld(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
}
break;
case INDEX_op_qemu_ld_i64:
}
/* fall through */
case INDEX_op_qemu_ld_a32_i32:
tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_ld_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
} else if (TARGET_LONG_BITS == 32) {
} else {
tcg_out_qemu_ld(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_ld_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
} else {
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_i32:
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
} else {
case INDEX_op_qemu_st_a64_i32:
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_qemu_st(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
}
break;
case INDEX_op_qemu_st_i64:
}
/* fall through */
case INDEX_op_qemu_st_a32_i32:
tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_st_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
} else if (TARGET_LONG_BITS == 32) {
} else {
tcg_out_qemu_st(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
} else {
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
}
@ -2216,19 +2159,22 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_brcond2_i32:
return C_O0_I4(rZ, rZ, rZ, rZ);
case INDEX_op_qemu_ld_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
? C_O1_I1(r, r) : C_O1_I2(r, r, r));
case INDEX_op_qemu_st_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
case INDEX_op_qemu_ld_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
: TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
: C_O2_I2(r, r, r, r));
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_ld_a32_i32:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
case INDEX_op_qemu_st_a32_i32:
return C_O0_I2(rZ, r);
case INDEX_op_qemu_st_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r);
case INDEX_op_qemu_ld_a32_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
case INDEX_op_qemu_ld_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
case INDEX_op_qemu_st_a32_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, rZ, r);
case INDEX_op_qemu_st_a64_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
: TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
: C_O0_I4(rZ, rZ, r, r));
default:

View File

@ -204,9 +204,9 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
#endif
#define TCG_TARGET_DEFAULT_MO 0
#define TCG_TARGET_HAS_MEMORY_BSWAP 0
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_DEFAULT_MO 0
#define TCG_TARGET_NEED_LDST_LABELS
#endif

View File

@ -2184,13 +2184,22 @@ void tcg_optimize(TCGContext *s)
CASE_OP_32_64_VEC(orc):
done = fold_orc(&ctx, op);
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
done = fold_qemu_ld(&ctx, op);
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st8_a32_i32:
case INDEX_op_qemu_st8_a64_i32:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
done = fold_qemu_st(&ctx, op);
break;
CASE_OP_32_64(rem):

View File

@ -1962,33 +1962,6 @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
[MO_BSWAP | MO_UQ] = STDBRX,
};
#if defined (CONFIG_SOFTMMU)
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
* int mmu_idx, uintptr_t ra)
*/
static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_UB] = helper_ret_ldub_mmu,
[MO_LEUW] = helper_le_lduw_mmu,
[MO_LEUL] = helper_le_ldul_mmu,
[MO_LEUQ] = helper_le_ldq_mmu,
[MO_BEUW] = helper_be_lduw_mmu,
[MO_BEUL] = helper_be_ldul_mmu,
[MO_BEUQ] = helper_be_ldq_mmu,
};
/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
* uintxx_t val, int mmu_idx, uintptr_t ra)
*/
static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_UB] = helper_ret_stb_mmu,
[MO_LEUW] = helper_le_stw_mmu,
[MO_LEUL] = helper_le_stl_mmu,
[MO_LEUQ] = helper_le_stq_mmu,
[MO_BEUW] = helper_be_stw_mmu,
[MO_BEUL] = helper_be_stl_mmu,
[MO_BEUQ] = helper_be_stq_mmu,
};
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
{
if (arg < 0) {
@ -2017,7 +1990,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
}
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tcg_out_b(s, 0, lb->raddr);
@ -2033,60 +2006,23 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
}
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
tcg_out_b(s, 0, lb->raddr);
return true;
}
#else
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
TCGReg arg = TCG_REG_R4;
arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
if (l->addrlo_reg != arg) {
tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
} else if (l->addrhi_reg != arg + 1) {
tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
} else {
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
}
} else {
tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
}
tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
/* "Tail call" to the helper, with the return address back inline. */
tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
: helper_unaligned_st));
return true;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* SOFTMMU */
typedef struct {
TCGReg base;
TCGReg index;
TCGAtomAlign aa;
} HostAddress;
bool tcg_target_has_memory_bswap(MemOp memop)
{
return true;
}
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
@ -2099,7 +2035,23 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
MemOp a_bits;
/*
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
*
* Before 3.0, "An access that is not atomic is performed as a set of
* smaller disjoint atomic accesses. In general, the number and alignment
* of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN.
*
* As of 3.0, "the non-atomic access is performed as described in
* the corresponding list", which matches MO_ATOM_SUBALIGN.
*/
h->aa = atom_and_align_for_opc(s, opc,
have_isa_3_00 ? MO_ATOM_SUBALIGN
: MO_ATOM_IFALIGN,
false);
a_bits = h->aa.align;
#ifdef CONFIG_SOFTMMU
int mem_index = get_mmuidx(oi);
@ -2125,10 +2077,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Extract the page index, shifted into place for tlb index. */
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_shri32(s, TCG_REG_R0, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
s->page_bits - CPU_TLB_ENTRY_BITS);
} else {
tcg_out_shri64(s, TCG_REG_R0, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
s->page_bits - CPU_TLB_ENTRY_BITS);
}
tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
@ -2167,7 +2119,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
a_bits = s_bits;
}
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
(32 - a_bits) & 31, 31 - s->page_bits);
} else {
TCGReg t = addrlo;
@ -2188,13 +2140,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Mask the address for the requested alignment. */
if (TARGET_LONG_BITS == 32) {
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
(32 - a_bits) & 31, 31 - s->page_bits);
} else if (a_bits == 0) {
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
} else {
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
64 - s->page_bits, s->page_bits - a_bits);
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
}
}
@ -2880,43 +2832,58 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
break;
case INDEX_op_qemu_ld_i32:
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
args[2], TCG_TYPE_I32);
} else {
case INDEX_op_qemu_ld_a64_i32:
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
args[3], TCG_TYPE_I32);
}
break;
case INDEX_op_qemu_ld_i64:
}
/* fall through */
case INDEX_op_qemu_ld_a32_i32:
tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
break;
case INDEX_op_qemu_ld_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
args[2], TCG_TYPE_I64);
} else if (TARGET_LONG_BITS == 32) {
} else {
tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
args[3], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_ld_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
args[2], TCG_TYPE_I64);
} else {
tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_i32:
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
args[2], TCG_TYPE_I32);
} else {
case INDEX_op_qemu_st_a64_i32:
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
args[3], TCG_TYPE_I32);
}
break;
case INDEX_op_qemu_st_i64:
}
/* fall through */
case INDEX_op_qemu_st_a32_i32:
tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
break;
case INDEX_op_qemu_st_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
args[2], TCG_TYPE_I64);
} else if (TARGET_LONG_BITS == 32) {
} else {
tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
args[3], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
args[2], TCG_TYPE_I64);
} else {
tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
args[4], TCG_TYPE_I64);
@ -3737,25 +3704,23 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_sub2_i32:
return C_O2_I4(r, r, rI, rZM, r, r);
case INDEX_op_qemu_ld_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
? C_O1_I1(r, r)
: C_O1_I2(r, r, r));
case INDEX_op_qemu_ld_a32_i32:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
case INDEX_op_qemu_ld_a32_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
case INDEX_op_qemu_ld_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
case INDEX_op_qemu_st_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
? C_O0_I2(r, r)
: C_O0_I3(r, r, r));
case INDEX_op_qemu_ld_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
: TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
: C_O2_I2(r, r, r, r));
case INDEX_op_qemu_st_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
: TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
: C_O0_I4(r, r, r, r));
case INDEX_op_qemu_st_a32_i32:
return C_O0_I2(r, r);
case INDEX_op_qemu_st_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
case INDEX_op_qemu_st_a32_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
case INDEX_op_qemu_st_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
case INDEX_op_add_vec:
case INDEX_op_sub_vec:

View File

@ -149,6 +149,8 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_mulsh_i64 1
#endif
#define TCG_TARGET_HAS_qemu_ldst_i128 0
/*
* While technically Altivec could support V64, it has no 64-bit store
* instruction and substituting two 32-bit stores makes the generated
@ -179,7 +181,6 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
#define TCG_TARGET_NEED_LDST_LABELS
#define TCG_TARGET_NEED_POOL_LABELS

View File

@ -846,49 +846,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
* Load/store and TLB
*/
#if defined(CONFIG_SOFTMMU)
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* MemOpIdx oi, uintptr_t ra)
*/
static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
[MO_UB] = helper_ret_ldub_mmu,
[MO_SB] = helper_ret_ldsb_mmu,
#if HOST_BIG_ENDIAN
[MO_UW] = helper_be_lduw_mmu,
[MO_SW] = helper_be_ldsw_mmu,
[MO_UL] = helper_be_ldul_mmu,
#if TCG_TARGET_REG_BITS == 64
[MO_SL] = helper_be_ldsl_mmu,
#endif
[MO_UQ] = helper_be_ldq_mmu,
#else
[MO_UW] = helper_le_lduw_mmu,
[MO_SW] = helper_le_ldsw_mmu,
[MO_UL] = helper_le_ldul_mmu,
#if TCG_TARGET_REG_BITS == 64
[MO_SL] = helper_le_ldsl_mmu,
#endif
[MO_UQ] = helper_le_ldq_mmu,
#endif
};
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
* uintxx_t val, MemOpIdx oi,
* uintptr_t ra)
*/
static void * const qemu_st_helpers[MO_SIZE + 1] = {
[MO_8] = helper_ret_stb_mmu,
#if HOST_BIG_ENDIAN
[MO_16] = helper_be_stw_mmu,
[MO_32] = helper_be_stl_mmu,
[MO_64] = helper_be_stq_mmu,
#else
[MO_16] = helper_le_stw_mmu,
[MO_32] = helper_le_stl_mmu,
[MO_64] = helper_le_stq_mmu,
#endif
};
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
{
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
@ -896,6 +853,11 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
tcg_debug_assert(ok);
}
bool tcg_target_has_memory_bswap(MemOp memop)
{
return false;
}
/* We have three temps, we might as well expose them. */
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
@ -935,34 +897,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_goto(s, l->raddr);
return true;
}
#else
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
/* tail call, with the return address back inline. */
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
: helper_unaligned_st), true);
return true;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* CONFIG_SOFTMMU */
/*
* For softmmu, perform the TLB load and compare.
@ -976,17 +910,21 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned a_mask = (1u << a_bits) - 1;
TCGAtomAlign aa;
unsigned a_mask;
aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
a_mask = (1u << aa.align) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1u << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
tcg_target_long compare_mask;
int compare_mask;
TCGReg addr_adj;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
@ -995,14 +933,33 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
s->page_bits - CPU_TLB_ENTRY_BITS);
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
/*
* For aligned accesses, we check the first byte and include the alignment
* bits within the address. For unaligned access, we check that we don't
* cross pages using the address of the last byte of the access.
*/
addr_adj = addr_reg;
if (a_mask < s_mask) {
addr_adj = TCG_REG_TMP0;
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
addr_adj, addr_reg, s_mask - a_mask);
}
compare_mask = s->page_mask | a_mask;
if (compare_mask == sextreg(compare_mask, 0, 12)) {
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
} else {
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
}
/* Load the tlb comparator and the addend. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
is_ld ? offsetof(CPUTLBEntry, addr_read)
@ -1010,29 +967,17 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
offsetof(CPUTLBEntry, addend));
/* We don't support unaligned accesses. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
/* Clear the non-page, non-alignment bits from the address. */
compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
if (compare_mask == sextreg(compare_mask, 0, 12)) {
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
} else {
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
}
/* Compare masked address with the TLB entry. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
/* TLB Hit - translate address using addend. */
addr_adj = addr_reg;
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
addr_reg = TCG_REG_TMP0;
addr_adj = TCG_REG_TMP0;
tcg_out_ext32u(s, addr_adj, addr_reg);
}
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_adj);
*pbase = TCG_REG_TMP0;
#else
if (a_mask) {
@ -1041,8 +986,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
/* We are expecting a_bits max 7, so we can always use andi. */
tcg_debug_assert(a_bits < 12);
/* We are expecting alignment max 7, so we can always use andi. */
tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
ldst->label_ptr[0] = s->code_ptr;
@ -1437,16 +1382,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_setcond(s, args[3], a0, a1, a2);
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
break;
@ -1588,11 +1537,15 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_sub2_i64:
return C_O2_I4(r, r, rZ, rZ, rM, rM);
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(rZ, r);
default:

View File

@ -163,11 +163,11 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_NEED_LDST_LABELS
#define TCG_TARGET_NEED_POOL_LABELS
#define TCG_TARGET_HAS_MEMORY_BSWAP 0
#endif

View File

@ -438,33 +438,6 @@ static const uint8_t tcg_cond_to_ltr_cond[] = {
[TCG_COND_GEU] = S390_CC_ALWAYS,
};
#ifdef CONFIG_SOFTMMU
static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
[MO_UB] = helper_ret_ldub_mmu,
[MO_SB] = helper_ret_ldsb_mmu,
[MO_LEUW] = helper_le_lduw_mmu,
[MO_LESW] = helper_le_ldsw_mmu,
[MO_LEUL] = helper_le_ldul_mmu,
[MO_LESL] = helper_le_ldsl_mmu,
[MO_LEUQ] = helper_le_ldq_mmu,
[MO_BEUW] = helper_be_lduw_mmu,
[MO_BESW] = helper_be_ldsw_mmu,
[MO_BEUL] = helper_be_ldul_mmu,
[MO_BESL] = helper_be_ldsl_mmu,
[MO_BEUQ] = helper_be_ldq_mmu,
};
static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_UB] = helper_ret_stb_mmu,
[MO_LEUW] = helper_le_stw_mmu,
[MO_LEUL] = helper_le_stl_mmu,
[MO_LEUQ] = helper_le_stq_mmu,
[MO_BEUW] = helper_be_stw_mmu,
[MO_BEUL] = helper_be_stl_mmu,
[MO_BEUQ] = helper_be_stq_mmu,
};
#endif
static const tcg_insn_unit *tb_ret_addr;
uint64_t s390_facilities[3];
@ -1599,8 +1572,14 @@ typedef struct {
TCGReg base;
TCGReg index;
int disp;
TCGAtomAlign aa;
} HostAddress;
bool tcg_target_has_memory_bswap(MemOp memop)
{
return true;
}
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
HostAddress h)
{
@ -1706,7 +1685,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
}
}
#if defined(CONFIG_SOFTMMU)
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 1, .tmp = { TCG_TMP0 }
};
@ -1721,7 +1699,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
}
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
@ -1738,39 +1716,11 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
}
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
return true;
}
#else
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
return false;
}
tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
/* "Tail call" to the helper, with the return address back inline. */
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
: helper_unaligned_st));
return true;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* CONFIG_SOFTMMU */
/*
* For softmmu, perform the TLB load and compare.
@ -1784,8 +1734,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned a_mask = (1u << a_bits) - 1;
unsigned a_mask;
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
a_mask = (1 << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
@ -1803,7 +1755,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->addrlo_reg = addr_reg;
tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
s->page_bits - CPU_TLB_ENTRY_BITS);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
@ -1815,8 +1767,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* bits within the address. For unaligned access, we check that we don't
* cross pages using the address of the last byte of the access.
*/
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
tlb_mask = (uint64_t)s->page_mask | a_mask;
if (a_off == 0) {
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
} else {
@ -1857,7 +1809,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->addrlo_reg = addr_reg;
/* We are expecting a_bits to max out at 7, much lower than TMLL. */
tcg_debug_assert(a_bits < 16);
tcg_debug_assert(a_mask <= 0xffff);
tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
@ -2258,16 +2210,20 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
args[2], const_args[2], args[3], const_args[3], args[4]);
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
break;
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
break;
@ -3141,11 +3097,15 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
return C_O0_I2(r, r);
case INDEX_op_deposit_i32:

View File

@ -140,6 +140,8 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_v256 0
@ -172,8 +174,6 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
#define TCG_TARGET_NEED_LDST_LABELS
#define TCG_TARGET_NEED_POOL_LABELS

View File

@ -12,8 +12,6 @@
C_O0_I1(r)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rJ)
C_O0_I2(sZ, s)
C_O1_I1(r, s)
C_O1_I1(r, r)
C_O1_I2(r, r, r)
C_O1_I2(r, rZ, rJ)

View File

@ -9,7 +9,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('s', ALL_QLDST_REGS)
/*
* Define constraint letters for constants:

View File

@ -27,6 +27,7 @@
#error "unsupported code generation mode"
#endif
#include "../tcg-ldst.c.inc"
#include "../tcg-pool.c.inc"
#ifdef CONFIG_DEBUG_TCG
@ -70,22 +71,12 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
#define TCG_CT_CONST_S13 0x200
#define TCG_CT_CONST_ZERO 0x400
/*
* For softmmu, we need to avoid conflicts with the first 3
* argument registers to perform the tlb lookup, and to call
* the helper function.
*/
#ifdef CONFIG_SOFTMMU
#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
#else
#define SOFTMMU_RESERVE_REGS 0
#endif
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
/* Define some temporary registers. T2 is used for constant generation. */
/* Define some temporary registers. T3 is used for constant generation. */
#define TCG_REG_T1 TCG_REG_G1
#define TCG_REG_T2 TCG_REG_O7
#define TCG_REG_T2 TCG_REG_G2
#define TCG_REG_T3 TCG_REG_O7
#ifndef CONFIG_SOFTMMU
# define TCG_GUEST_BASE_REG TCG_REG_I5
@ -110,7 +101,6 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_I4,
TCG_REG_I5,
TCG_REG_G2,
TCG_REG_G3,
TCG_REG_G4,
TCG_REG_G5,
@ -399,24 +389,27 @@ static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
}
static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
/* A 13-bit constant sign-extended to 64 bits. */
static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
{
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
}
static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
/* A 32-bit constant sign-extended to 64 bits. */
static void tcg_out_movi_s32(TCGContext *s, TCGReg ret, int32_t arg)
{
if (check_fit_i32(arg, 13)) {
/* A 13-bit constant sign-extended to 64-bits. */
tcg_out_movi_imm13(s, ret, arg);
} else {
tcg_out_sethi(s, ret, ~arg);
tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
}
/* A 32-bit constant zero-extended to 64 bits. */
static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
{
tcg_out_sethi(s, ret, arg);
if (arg & 0x3ff) {
tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
}
}
}
static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
tcg_target_long arg, bool in_prologue,
@ -425,15 +418,15 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
tcg_target_long hi, lo = (int32_t)arg;
tcg_target_long test, lsb;
/* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
tcg_out_movi_imm32(s, ret, arg);
/* A 13-bit constant sign-extended to 64-bits. */
if (check_fit_tl(arg, 13)) {
tcg_out_movi_s13(s, ret, arg);
return;
}
/* A 13-bit constant sign-extended to 64-bits. */
if (check_fit_tl(arg, 13)) {
tcg_out_movi_imm13(s, ret, arg);
/* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
tcg_out_movi_u32(s, ret, arg);
return;
}
@ -448,8 +441,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
/* A 32-bit constant sign-extended to 64-bits. */
if (arg == lo) {
tcg_out_sethi(s, ret, ~arg);
tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
tcg_out_movi_s32(s, ret, arg);
return;
}
@ -477,13 +469,13 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
/* A 64-bit constant decomposed into 2 32-bit pieces. */
if (check_fit_i32(lo, 13)) {
hi = (arg - lo) >> 32;
tcg_out_movi_imm32(s, ret, hi);
tcg_out_movi_u32(s, ret, hi);
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
} else {
hi = arg >> 32;
tcg_out_movi_imm32(s, ret, hi);
tcg_out_movi_imm32(s, scratch, lo);
tcg_out_movi_u32(s, ret, hi);
tcg_out_movi_u32(s, scratch, lo);
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
}
@ -492,8 +484,8 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
static void tcg_out_movi(TCGContext *s, TCGType type,
TCGReg ret, tcg_target_long arg)
{
tcg_debug_assert(ret != TCG_REG_T2);
tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
tcg_debug_assert(ret != TCG_REG_T3);
tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T3);
}
static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
@ -767,7 +759,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
default:
tcg_out_cmp(s, c1, c2, c2const);
tcg_out_movi_imm13(s, ret, 0);
tcg_out_movi_s13(s, ret, 0);
tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
return;
}
@ -803,11 +795,11 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
/* For 64-bit signed comparisons vs zero, we can avoid the compare
if the input does not overlap the output. */
if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
tcg_out_movi_imm13(s, ret, 0);
tcg_out_movi_s13(s, ret, 0);
tcg_out_movr(s, cond, ret, c1, 1, 1);
} else {
tcg_out_cmp(s, c1, c2, c2const);
tcg_out_movi_imm13(s, ret, 0);
tcg_out_movi_s13(s, ret, 0);
tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
}
}
@ -844,7 +836,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
if (use_vis3_instructions && !is_sub) {
/* Note that ADDXC doesn't accept immediates. */
if (bhconst && bh != 0) {
tcg_out_movi_imm13(s, TCG_REG_T2, bh);
tcg_out_movi_s13(s, TCG_REG_T2, bh);
bh = TCG_REG_T2;
}
tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
@ -866,7 +858,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
* so the adjustment fits 12 bits.
*/
if (bhconst) {
tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
tcg_out_movi_s13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
} else {
tcg_out_arithi(s, TCG_REG_T2, bh, 1,
is_sub ? ARITH_SUB : ARITH_ADD);
@ -885,10 +877,8 @@ static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest,
{
uintptr_t desti = (uintptr_t)dest;
/* Be careful not to clobber %o7 for a tail call. */
tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
desti & ~0xfff, in_prologue,
tail_call ? TCG_REG_G2 : TCG_REG_O7);
desti & ~0xfff, in_prologue, TCG_REG_T2);
tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7,
TCG_REG_T1, desti & 0xfff, JMPL);
}
@ -918,104 +908,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
}
#ifdef CONFIG_SOFTMMU
static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
static void build_trampolines(TCGContext *s)
{
static void * const qemu_ld_helpers[] = {
[MO_UB] = helper_ret_ldub_mmu,
[MO_SB] = helper_ret_ldsb_mmu,
[MO_LEUW] = helper_le_lduw_mmu,
[MO_LESW] = helper_le_ldsw_mmu,
[MO_LEUL] = helper_le_ldul_mmu,
[MO_LEUQ] = helper_le_ldq_mmu,
[MO_BEUW] = helper_be_lduw_mmu,
[MO_BESW] = helper_be_ldsw_mmu,
[MO_BEUL] = helper_be_ldul_mmu,
[MO_BEUQ] = helper_be_ldq_mmu,
};
static void * const qemu_st_helpers[] = {
[MO_UB] = helper_ret_stb_mmu,
[MO_LEUW] = helper_le_stw_mmu,
[MO_LEUL] = helper_le_stl_mmu,
[MO_LEUQ] = helper_le_stq_mmu,
[MO_BEUW] = helper_be_stw_mmu,
[MO_BEUL] = helper_be_stl_mmu,
[MO_BEUQ] = helper_be_stq_mmu,
};
int i;
for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
if (qemu_ld_helpers[i] == NULL) {
continue;
}
/* May as well align the trampoline. */
while ((uintptr_t)s->code_ptr & 15) {
tcg_out_nop(s);
}
qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
/* Set the retaddr operand. */
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
/* Tail call. */
tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
/* delay slot -- set the env argument */
tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
}
for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
if (qemu_st_helpers[i] == NULL) {
continue;
}
/* May as well align the trampoline. */
while ((uintptr_t)s->code_ptr & 15) {
tcg_out_nop(s);
}
qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
/* Set the retaddr operand. */
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
/* Tail call. */
tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
/* delay slot -- set the env argument */
tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
}
}
#else
static const tcg_insn_unit *qemu_unalign_ld_trampoline;
static const tcg_insn_unit *qemu_unalign_st_trampoline;
static void build_trampolines(TCGContext *s)
{
for (int ld = 0; ld < 2; ++ld) {
void *helper;
while ((uintptr_t)s->code_ptr & 15) {
tcg_out_nop(s);
}
if (ld) {
helper = helper_unaligned_ld;
qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr);
} else {
helper = helper_unaligned_st;
qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
}
/* Tail call. */
tcg_out_jmpl_const(s, helper, true, true);
/* delay slot -- set the env argument */
tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
}
}
#endif
/* Generate global QEMU prologue and epilogue code */
static void tcg_target_qemu_prologue(TCGContext *s)
{
@ -1060,9 +952,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
/* delay slot */
tcg_out_movi_imm13(s, TCG_REG_O0, 0);
build_trampolines(s);
tcg_out_movi_s13(s, TCG_REG_O0, 0);
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
@ -1073,78 +963,169 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
}
}
#if defined(CONFIG_SOFTMMU)
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 1, .tmp = { TCG_REG_T1 }
};
/* We expect to use a 13-bit negative offset from ENV. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
/* Perform the TLB load and compare.
Inputs:
ADDRLO and ADDRHI contain the possible two parts of the address.
MEM_INDEX and S_BITS are the memory context and log2 size of the load.
WHICH is the offset into the CPUTLBEntry structure of the slot to read.
This should be offsetof addr_read or addr_write.
The result of the TLB comparison is in %[ix]cc. The sanitized address
is in the returned register, maybe %o0. The TLB addend is in %o1. */
static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
MemOp opc, int which)
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
MemOp opc = get_memop(lb->oi);
MemOp sgn;
if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
return false;
}
/* Use inline tcg_out_ext32s; otherwise let the helper sign-extend. */
sgn = (opc & MO_SIZE) < MO_32 ? MO_SIGN : 0;
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call(s, qemu_ld_helpers[opc & (MO_SIZE | sgn)], NULL);
tcg_out_ld_helper_ret(s, lb, sgn, &ldst_helper_param);
tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
(intptr_t)lb->raddr, 0);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
MemOp opc = get_memop(lb->oi);
if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
return false;
}
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE], NULL);
tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
(intptr_t)lb->raddr, 0);
}
typedef struct {
TCGReg base;
TCGReg index;
TCGAtomAlign aa;
} HostAddress;
bool tcg_target_has_memory_bswap(MemOp memop)
{
return true;
}
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
* In both cases, return a TCGLabelQemuLdst structure if the slow path
* is required and fill in @h with the host address for the fast path.
*/
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addr_reg, MemOpIdx oi,
bool is_ld)
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
unsigned a_mask;
/* We don't support unaligned accesses. */
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
h->aa.align = MAX(h->aa.align, s_bits);
a_mask = (1u << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
const TCGReg r0 = TCG_REG_O0;
const TCGReg r1 = TCG_REG_O1;
const TCGReg r2 = TCG_REG_O2;
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
tcg_target_long compare_mask;
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write);
int add_off = offsetof(CPUTLBEntry, addend);
int compare_mask;
int cc;
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T2, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T3, TCG_AREG0, table_off);
/* Extract the page index, shifted into place for tlb index. */
tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
SHIFT_SRL);
tcg_out_arith(s, r2, r2, r0, ARITH_AND);
tcg_out_arithi(s, TCG_REG_T1, addr_reg,
s->page_bits - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T3, ARITH_ADD);
/* Load the tlb comparator and the addend. */
tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_T2, TCG_REG_T1, cmp_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T1, TCG_REG_T1, add_off);
h->base = TCG_REG_T1;
/* Mask out the page offset, except for the required alignment.
We don't support unaligned accesses. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
/* Mask out the page offset, except for the required alignment. */
compare_mask = s->page_mask | a_mask;
if (check_fit_tl(compare_mask, 13)) {
tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
} else {
tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
tcg_out_arith(s, r2, addr, r2, ARITH_AND);
tcg_out_movi_s32(s, TCG_REG_T3, compare_mask);
tcg_out_arith(s, TCG_REG_T3, addr_reg, TCG_REG_T3, ARITH_AND);
}
tcg_out_cmp(s, r0, r2, 0);
tcg_out_cmp(s, TCG_REG_T2, TCG_REG_T3, 0);
/* If the guest address must be zero-extended, do so now. */
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
ldst->label_ptr[0] = s->code_ptr;
/* bne,pn %[xi]cc, label0 */
cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
#else
/*
* If the size equals the required alignment, we can skip the test
* and allow host SIGBUS to deliver SIGBUS to the guest.
* Otherwise, test for at least natural alignment and defer
* everything else to the helper functions.
*/
if (s_bits != get_alignment_bits(opc)) {
tcg_debug_assert(check_fit_tl(a_mask, 13));
tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
ldst->label_ptr[0] = s->code_ptr;
/* bne,pn %icc, label0 */
tcg_out_bpcc0(s, COND_NE, BPCC_PN | BPCC_ICC, 0);
}
h->base = guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0;
#endif
/* If the guest address must be zero-extended, do in the delay slot. */
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, r0, addr);
return r0;
tcg_out_ext32u(s, TCG_REG_T2, addr_reg);
h->index = TCG_REG_T2;
} else {
if (ldst) {
tcg_out_nop(s);
}
return addr;
h->index = addr_reg;
}
return ldst;
}
#endif /* CONFIG_SOFTMMU */
static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
MemOpIdx oi, TCGType data_type)
{
static const int ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
[MO_UB] = LDUB,
[MO_SB] = LDSB,
[MO_UB | MO_LE] = LDUB,
@ -1165,7 +1146,25 @@ static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
[MO_LESQ] = LDX_LE,
};
static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
TCGLabelQemuLdst *ldst;
HostAddress h;
ldst = prepare_host_addr(s, &h, addr, oi, true);
tcg_out_ldst_rr(s, data, h.base, h.index,
ld_opc[get_memop(oi) & (MO_BSWAP | MO_SSIZE)]);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
}
static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
MemOpIdx oi, TCGType data_type)
{
static const int st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_UB] = STB,
[MO_BEUW] = STH,
@ -1177,284 +1176,26 @@ static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_LEUQ] = STX_LE,
};
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
MemOpIdx oi, TCGType data_type)
{
MemOp memop = get_memop(oi);
tcg_insn_unit *label_ptr;
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
unsigned memi = get_mmuidx(oi);
TCGReg addrz;
const tcg_insn_unit *func;
ldst = prepare_host_addr(s, &h, addr, oi, false);
addrz = tcg_out_tlb_load(s, addr, memi, memop,
offsetof(CPUTLBEntry, addr_read));
tcg_out_ldst_rr(s, data, h.base, h.index,
st_opc[get_memop(oi) & (MO_BSWAP | MO_SIZE)]);
/* The fast path is exactly one insn. Thus we can perform the
entire TLB Hit in the (annulled) delay slot of the branch
over the TLB Miss case. */
/* beq,a,pt %[xi]cc, label0 */
label_ptr = s->code_ptr;
tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
| (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
/* delay slot */
tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
/* TLB Miss. */
tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
/* We use the helpers to extend SB and SW data, leaving the case
of SL needing explicit extending below. */
if ((memop & MO_SSIZE) == MO_SL) {
func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
} else {
func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
tcg_debug_assert(func != NULL);
tcg_out_call_nodelay(s, func, false);
/* delay slot */
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
/* We let the helper sign-extend SB and SW, but leave SL for here. */
if ((memop & MO_SSIZE) == MO_SL) {
tcg_out_ext32s(s, data, TCG_REG_O0);
} else {
tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
}
*label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
#else
TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
unsigned a_bits = get_alignment_bits(memop);
unsigned s_bits = memop & MO_SIZE;
unsigned t_bits;
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_T1, addr);
addr = TCG_REG_T1;
}
/*
* Normal case: alignment equal to access size.
*/
if (a_bits == s_bits) {
tcg_out_ldst_rr(s, data, addr, index,
qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
return;
}
/*
* Test for at least natural alignment, and assume most accesses
* will be aligned -- perform a straight load in the delay slot.
* This is required to preserve atomicity for aligned accesses.
*/
t_bits = MAX(a_bits, s_bits);
tcg_debug_assert(t_bits < 13);
tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
/* beq,a,pt %icc, label */
label_ptr = s->code_ptr;
tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
/* delay slot */
tcg_out_ldst_rr(s, data, addr, index,
qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
if (a_bits >= s_bits) {
/*
* Overalignment: A successful alignment test will perform the memory
* operation in the delay slot, and failure need only invoke the
* handler for SIGBUS.
*/
tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
/* delay slot -- move to low part of argument reg */
tcg_out_mov_delay(s, TCG_REG_O1, addr);
} else {
/* Underalignment: load by pieces of minimum alignment. */
int ld_opc, a_size, s_size, i;
/*
* Force full address into T1 early; avoids problems with
* overlap between @addr and @data.
*/
tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
a_size = 1 << a_bits;
s_size = 1 << s_bits;
if ((memop & MO_BSWAP) == MO_BE) {
ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)];
tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
ld_opc = qemu_ld_opc[a_bits | MO_BE];
for (i = a_size; i < s_size; i += a_size) {
tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX);
tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
}
} else if (a_bits == 0) {
ld_opc = LDUB;
tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
for (i = a_size; i < s_size; i += a_size) {
if ((memop & MO_SIGN) && i == s_size - a_size) {
ld_opc = LDSB;
}
tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
}
} else {
ld_opc = qemu_ld_opc[a_bits | MO_LE];
tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc);
for (i = a_size; i < s_size; i += a_size) {
tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
if ((memop & MO_SIGN) && i == s_size - a_size) {
ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN];
}
tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc);
tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
}
}
}
*label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
#endif /* CONFIG_SOFTMMU */
}
static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
MemOpIdx oi, TCGType data_type)
{
MemOp memop = get_memop(oi);
tcg_insn_unit *label_ptr;
#ifdef CONFIG_SOFTMMU
unsigned memi = get_mmuidx(oi);
TCGReg addrz;
const tcg_insn_unit *func;
addrz = tcg_out_tlb_load(s, addr, memi, memop,
offsetof(CPUTLBEntry, addr_write));
/* The fast path is exactly one insn. Thus we can perform the entire
TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
/* beq,a,pt %[xi]cc, label0 */
label_ptr = s->code_ptr;
tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
| (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
/* delay slot */
tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
/* TLB Miss. */
tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
TCG_REG_O2, data_type, memop & MO_SIZE, data);
func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
tcg_debug_assert(func != NULL);
tcg_out_call_nodelay(s, func, false);
/* delay slot */
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
*label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
#else
TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
unsigned a_bits = get_alignment_bits(memop);
unsigned s_bits = memop & MO_SIZE;
unsigned t_bits;
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_T1, addr);
addr = TCG_REG_T1;
}
/*
* Normal case: alignment equal to access size.
*/
if (a_bits == s_bits) {
tcg_out_ldst_rr(s, data, addr, index,
qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
return;
}
/*
* Test for at least natural alignment, and assume most accesses
* will be aligned -- perform a straight store in the delay slot.
* This is required to preserve atomicity for aligned accesses.
*/
t_bits = MAX(a_bits, s_bits);
tcg_debug_assert(t_bits < 13);
tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
/* beq,a,pt %icc, label */
label_ptr = s->code_ptr;
tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
/* delay slot */
tcg_out_ldst_rr(s, data, addr, index,
qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
if (a_bits >= s_bits) {
/*
* Overalignment: A successful alignment test will perform the memory
* operation in the delay slot, and failure need only invoke the
* handler for SIGBUS.
*/
tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
/* delay slot -- move to low part of argument reg */
tcg_out_mov_delay(s, TCG_REG_O1, addr);
} else {
/* Underalignment: store by pieces of minimum alignment. */
int st_opc, a_size, s_size, i;
/*
* Force full address into T1 early; avoids problems with
* overlap between @addr and @data.
*/
tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
a_size = 1 << a_bits;
s_size = 1 << s_bits;
if ((memop & MO_BSWAP) == MO_BE) {
st_opc = qemu_st_opc[a_bits | MO_BE];
for (i = 0; i < s_size; i += a_size) {
TCGReg d = data;
int shift = (s_size - a_size - i) * 8;
if (shift) {
d = TCG_REG_T2;
tcg_out_arithi(s, d, data, shift, SHIFT_SRLX);
}
tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc);
}
} else if (a_bits == 0) {
tcg_out_ldst(s, data, TCG_REG_T1, 0, STB);
for (i = 1; i < s_size; i++) {
tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB);
}
} else {
/* Note that ST*A with immediate asi must use indexed address. */
st_opc = qemu_st_opc[a_bits + MO_LE];
tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc);
for (i = a_size; i < s_size; i += a_size) {
tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc);
}
}
}
*label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
#endif /* CONFIG_SOFTMMU */
}
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
{
if (check_fit_ptr(a0, 13)) {
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
tcg_out_movi_imm13(s, TCG_REG_O0, a0);
tcg_out_movi_s13(s, TCG_REG_O0, a0);
return;
} else {
intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
@ -1635,16 +1376,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
break;
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
break;
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
break;
@ -1766,6 +1511,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_extu_i32_i64:
case INDEX_op_extrl_i64_i32:
case INDEX_op_extrh_i64_i32:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
return C_O1_I1(r, r);
case INDEX_op_st8_i32:
@ -1775,6 +1524,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_st_i32:
case INDEX_op_st32_i64:
case INDEX_op_st_i64:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(rZ, r);
case INDEX_op_add_i32:
@ -1824,13 +1577,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_muluh_i64:
return C_O1_I2(r, r, r);
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
return C_O1_I1(r, s);
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
return C_O0_I2(sZ, s);
default:
g_assert_not_reached();
}
@ -1878,6 +1624,7 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T3); /* for internal use */
}
#define ELF_HOST_MACHINE EM_SPARCV9

View File

@ -151,10 +151,12 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
#define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_AREG0 TCG_REG_I0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
#define TCG_TARGET_NEED_LDST_LABELS
#define TCG_TARGET_NEED_POOL_LABELS
#endif

View File

@ -126,4 +126,6 @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t)
return temp_tcgv_i64(tcgv_i128_temp(t) + o);
}
bool tcg_target_has_memory_bswap(MemOp memop);
#endif /* TCG_INTERNAL_H */

1232
tcg/tcg-op-ldst.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -27,7 +27,6 @@
#include "tcg/tcg.h"
#include "tcg/tcg-temp-internal.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-mo.h"
#include "exec/plugin-gen.h"
#include "tcg-internal.h"
@ -2841,866 +2840,3 @@ void tcg_gen_lookup_and_goto_ptr(void)
tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
tcg_temp_free_ptr(ptr);
}
static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
{
/* Trigger the asserts within as early as possible. */
unsigned a_bits = get_alignment_bits(op);
/* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
if (a_bits == (op & MO_SIZE)) {
op = (op & ~MO_AMASK) | MO_ALIGN;
}
switch (op & MO_SIZE) {
case MO_8:
op &= ~MO_BSWAP;
break;
case MO_16:
break;
case MO_32:
if (!is64) {
op &= ~MO_SIGN;
}
break;
case MO_64:
if (is64) {
op &= ~MO_SIGN;
break;
}
/* fall through */
default:
g_assert_not_reached();
}
if (st) {
op &= ~MO_SIGN;
}
return op;
}
static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
MemOp memop, TCGArg idx)
{
MemOpIdx oi = make_memop_idx(memop, idx);
#if TARGET_LONG_BITS == 32
tcg_gen_op3i_i32(opc, val, addr, oi);
#else
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
} else {
tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
}
#endif
}
static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
MemOp memop, TCGArg idx)
{
MemOpIdx oi = make_memop_idx(memop, idx);
#if TARGET_LONG_BITS == 32
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
} else {
tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
}
#else
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
TCGV_LOW(addr), TCGV_HIGH(addr), oi);
} else {
tcg_gen_op3i_i64(opc, val, addr, oi);
}
#endif
}
static void tcg_gen_req_mo(TCGBar type)
{
#ifdef TCG_GUEST_DEFAULT_MO
type &= TCG_GUEST_DEFAULT_MO;
#endif
type &= ~TCG_TARGET_DEFAULT_MO;
if (type) {
tcg_gen_mb(type | TCG_BAR_SC);
}
}
static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
{
#ifdef CONFIG_PLUGIN
if (tcg_ctx->plugin_insn != NULL) {
/* Save a copy of the vaddr for use after a load. */
TCGv temp = tcg_temp_new();
tcg_gen_mov_tl(temp, vaddr);
return temp;
}
#endif
return vaddr;
}
static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
enum qemu_plugin_mem_rw rw)
{
#ifdef CONFIG_PLUGIN
if (tcg_ctx->plugin_insn != NULL) {
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
plugin_gen_empty_mem_callback(vaddr, info);
tcg_temp_free(vaddr);
}
#endif
}
void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp orig_memop;
MemOpIdx oi;
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
memop = tcg_canonicalize_memop(memop, 0, 0);
oi = make_memop_idx(memop, idx);
orig_memop = memop;
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
memop &= ~MO_BSWAP;
/* The bswap primitive benefits from zero-extended input. */
if ((memop & MO_SSIZE) == MO_SW) {
memop &= ~MO_SIGN;
}
}
addr = plugin_prep_mem_callbacks(addr);
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
if ((orig_memop ^ memop) & MO_BSWAP) {
switch (orig_memop & MO_SIZE) {
case MO_16:
tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
? TCG_BSWAP_IZ | TCG_BSWAP_OS
: TCG_BSWAP_IZ | TCG_BSWAP_OZ));
break;
case MO_32:
tcg_gen_bswap32_i32(val, val);
break;
default:
g_assert_not_reached();
}
}
}
void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
{
TCGv_i32 swap = NULL;
MemOpIdx oi;
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
memop = tcg_canonicalize_memop(memop, 0, 1);
oi = make_memop_idx(memop, idx);
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
swap = tcg_temp_ebb_new_i32();
switch (memop & MO_SIZE) {
case MO_16:
tcg_gen_bswap16_i32(swap, val, 0);
break;
case MO_32:
tcg_gen_bswap32_i32(swap, val);
break;
default:
g_assert_not_reached();
}
val = swap;
memop &= ~MO_BSWAP;
}
addr = plugin_prep_mem_callbacks(addr);
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
} else {
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
}
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
if (swap) {
tcg_temp_free_i32(swap);
}
}
void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp orig_memop;
MemOpIdx oi;
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
if (memop & MO_SIGN) {
tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
} else {
tcg_gen_movi_i32(TCGV_HIGH(val), 0);
}
return;
}
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
memop = tcg_canonicalize_memop(memop, 1, 0);
oi = make_memop_idx(memop, idx);
orig_memop = memop;
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
memop &= ~MO_BSWAP;
/* The bswap primitive benefits from zero-extended input. */
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
memop &= ~MO_SIGN;
}
}
addr = plugin_prep_mem_callbacks(addr);
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
if ((orig_memop ^ memop) & MO_BSWAP) {
int flags = (orig_memop & MO_SIGN
? TCG_BSWAP_IZ | TCG_BSWAP_OS
: TCG_BSWAP_IZ | TCG_BSWAP_OZ);
switch (orig_memop & MO_SIZE) {
case MO_16:
tcg_gen_bswap16_i64(val, val, flags);
break;
case MO_32:
tcg_gen_bswap32_i64(val, val, flags);
break;
case MO_64:
tcg_gen_bswap64_i64(val, val);
break;
default:
g_assert_not_reached();
}
}
}
void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
{
TCGv_i64 swap = NULL;
MemOpIdx oi;
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
return;
}
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
memop = tcg_canonicalize_memop(memop, 1, 1);
oi = make_memop_idx(memop, idx);
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
swap = tcg_temp_ebb_new_i64();
switch (memop & MO_SIZE) {
case MO_16:
tcg_gen_bswap16_i64(swap, val, 0);
break;
case MO_32:
tcg_gen_bswap32_i64(swap, val, 0);
break;
case MO_64:
tcg_gen_bswap64_i64(swap, val);
break;
default:
g_assert_not_reached();
}
val = swap;
memop &= ~MO_BSWAP;
}
addr = plugin_prep_mem_callbacks(addr);
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
if (swap) {
tcg_temp_free_i64(swap);
}
}
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
{
MemOp mop_1 = orig, mop_2;
tcg_debug_assert((orig & MO_SIZE) == MO_128);
tcg_debug_assert((orig & MO_SIGN) == 0);
/* Use a memory ordering implemented by the host. */
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
mop_1 &= ~MO_BSWAP;
}
/* Reduce the size to 64-bit. */
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
/* Retain the alignment constraints of the original. */
switch (orig & MO_AMASK) {
case MO_UNALN:
case MO_ALIGN_2:
case MO_ALIGN_4:
mop_2 = mop_1;
break;
case MO_ALIGN_8:
/* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
mop_2 = mop_1;
break;
case MO_ALIGN:
/* Second has 8-byte alignment; first has 16-byte alignment. */
mop_2 = mop_1;
mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
break;
case MO_ALIGN_16:
case MO_ALIGN_32:
case MO_ALIGN_64:
/* Second has 8-byte alignment; first retains original. */
mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
break;
default:
g_assert_not_reached();
}
ret[0] = mop_1;
ret[1] = mop_2;
}
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp mop[2];
TCGv addr_p8;
TCGv_i64 x, y;
canonicalize_memop_i128_as_i64(mop, memop);
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
addr = plugin_prep_mem_callbacks(addr);
/* TODO: respect atomicity of the operation. */
/* TODO: allow the tcg backend to see the whole operation. */
/*
* Since there are no global TCGv_i128, there is no visible state
* changed if the second load faults. Load directly into the two
* subwords.
*/
if ((memop & MO_BSWAP) == MO_LE) {
x = TCGV128_LOW(val);
y = TCGV128_HIGH(val);
} else {
x = TCGV128_HIGH(val);
y = TCGV128_LOW(val);
}
gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
if ((mop[0] ^ memop) & MO_BSWAP) {
tcg_gen_bswap64_i64(x, x);
}
addr_p8 = tcg_temp_new();
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
tcg_temp_free(addr_p8);
if ((mop[0] ^ memop) & MO_BSWAP) {
tcg_gen_bswap64_i64(y, y);
}
plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
QEMU_PLUGIN_MEM_R);
}
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp mop[2];
TCGv addr_p8;
TCGv_i64 x, y;
canonicalize_memop_i128_as_i64(mop, memop);
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
addr = plugin_prep_mem_callbacks(addr);
/* TODO: respect atomicity of the operation. */
/* TODO: allow the tcg backend to see the whole operation. */
if ((memop & MO_BSWAP) == MO_LE) {
x = TCGV128_LOW(val);
y = TCGV128_HIGH(val);
} else {
x = TCGV128_HIGH(val);
y = TCGV128_LOW(val);
}
addr_p8 = tcg_temp_new();
if ((mop[0] ^ memop) & MO_BSWAP) {
TCGv_i64 t = tcg_temp_ebb_new_i64();
tcg_gen_bswap64_i64(t, x);
gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
tcg_gen_bswap64_i64(t, y);
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
tcg_temp_free_i64(t);
} else {
gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
}
tcg_temp_free(addr_p8);
plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
QEMU_PLUGIN_MEM_W);
}
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
{
switch (opc & MO_SSIZE) {
case MO_SB:
tcg_gen_ext8s_i32(ret, val);
break;
case MO_UB:
tcg_gen_ext8u_i32(ret, val);
break;
case MO_SW:
tcg_gen_ext16s_i32(ret, val);
break;
case MO_UW:
tcg_gen_ext16u_i32(ret, val);
break;
default:
tcg_gen_mov_i32(ret, val);
break;
}
}
static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
{
switch (opc & MO_SSIZE) {
case MO_SB:
tcg_gen_ext8s_i64(ret, val);
break;
case MO_UB:
tcg_gen_ext8u_i64(ret, val);
break;
case MO_SW:
tcg_gen_ext16s_i64(ret, val);
break;
case MO_UW:
tcg_gen_ext16u_i64(ret, val);
break;
case MO_SL:
tcg_gen_ext32s_i64(ret, val);
break;
case MO_UL:
tcg_gen_ext32u_i64(ret, val);
break;
default:
tcg_gen_mov_i64(ret, val);
break;
}
}
typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
TCGv_i32, TCGv_i32, TCGv_i32);
typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
TCGv_i64, TCGv_i64, TCGv_i32);
typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
TCGv_i128, TCGv_i128, TCGv_i32);
typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
TCGv_i32, TCGv_i32);
typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
TCGv_i64, TCGv_i32);
#ifdef CONFIG_ATOMIC64
# define WITH_ATOMIC64(X) X,
#else
# define WITH_ATOMIC64(X)
#endif
#ifdef CONFIG_CMPXCHG128
# define WITH_ATOMIC128(X) X,
#else
# define WITH_ATOMIC128(X)
#endif
static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_8] = gen_helper_atomic_cmpxchgb,
[MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
[MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
[MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
[MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
};
void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
TCGv_i32 newv, TCGArg idx, MemOp memop)
{
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
TCGv_i32 t2 = tcg_temp_ebb_new_i32();
tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
tcg_gen_qemu_st_i32(t2, addr, idx, memop);
tcg_temp_free_i32(t2);
if (memop & MO_SIGN) {
tcg_gen_ext_i32(retv, t1, memop);
} else {
tcg_gen_mov_i32(retv, t1);
}
tcg_temp_free_i32(t1);
}
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
TCGv_i32 newv, TCGArg idx, MemOp memop)
{
gen_atomic_cx_i32 gen;
MemOpIdx oi;
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
return;
}
memop = tcg_canonicalize_memop(memop, 0, 0);
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
tcg_debug_assert(gen != NULL);
oi = make_memop_idx(memop & ~MO_SIGN, idx);
gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
if (memop & MO_SIGN) {
tcg_gen_ext_i32(retv, retv, memop);
}
}
void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
TCGv_i64 newv, TCGArg idx, MemOp memop)
{
TCGv_i64 t1, t2;
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
TCGV_LOW(newv), idx, memop);
if (memop & MO_SIGN) {
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
} else {
tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
}
return;
}
t1 = tcg_temp_ebb_new_i64();
t2 = tcg_temp_ebb_new_i64();
tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
tcg_gen_qemu_st_i64(t2, addr, idx, memop);
tcg_temp_free_i64(t2);
if (memop & MO_SIGN) {
tcg_gen_ext_i64(retv, t1, memop);
} else {
tcg_gen_mov_i64(retv, t1);
}
tcg_temp_free_i64(t1);
}
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
TCGv_i64 newv, TCGArg idx, MemOp memop)
{
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
return;
}
if ((memop & MO_SIZE) == MO_64) {
gen_atomic_cx_i64 gen;
memop = tcg_canonicalize_memop(memop, 1, 0);
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
if (gen) {
MemOpIdx oi = make_memop_idx(memop, idx);
gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
return;
}
gen_helper_exit_atomic(cpu_env);
/*
* Produce a result for a well-formed opcode stream. This satisfies
* liveness for set before used, which happens before this dead code
* is removed.
*/
tcg_gen_movi_i64(retv, 0);
return;
}
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
TCGV_LOW(newv), idx, memop);
if (memop & MO_SIGN) {
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
} else {
tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
}
} else {
TCGv_i32 c32 = tcg_temp_ebb_new_i32();
TCGv_i32 n32 = tcg_temp_ebb_new_i32();
TCGv_i32 r32 = tcg_temp_ebb_new_i32();
tcg_gen_extrl_i64_i32(c32, cmpv);
tcg_gen_extrl_i64_i32(n32, newv);
tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
tcg_temp_free_i32(c32);
tcg_temp_free_i32(n32);
tcg_gen_extu_i32_i64(retv, r32);
tcg_temp_free_i32(r32);
if (memop & MO_SIGN) {
tcg_gen_ext_i64(retv, retv, memop);
}
}
}
void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
TCGv_i128 newv, TCGArg idx, MemOp memop)
{
if (TCG_TARGET_REG_BITS == 32) {
/* Inline expansion below is simply too large for 32-bit hosts. */
gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
? gen_helper_nonatomic_cmpxchgo_le
: gen_helper_nonatomic_cmpxchgo_be);
MemOpIdx oi = make_memop_idx(memop, idx);
tcg_debug_assert((memop & MO_SIZE) == MO_128);
tcg_debug_assert((memop & MO_SIGN) == 0);
gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
} else {
TCGv_i128 oldv = tcg_temp_ebb_new_i128();
TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
TCGv_i64 t1 = tcg_temp_ebb_new_i64();
TCGv_i64 z = tcg_constant_i64(0);
tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
/* Compare i128 */
tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
tcg_gen_or_i64(t0, t0, t1);
/* tmpv = equal ? newv : oldv */
tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
TCGV128_LOW(newv), TCGV128_LOW(oldv));
tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
/* Unconditional writeback. */
tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
tcg_gen_mov_i128(retv, oldv);
tcg_temp_free_i64(t0);
tcg_temp_free_i64(t1);
tcg_temp_free_i128(tmpv);
tcg_temp_free_i128(oldv);
}
}
void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
TCGv_i128 newv, TCGArg idx, MemOp memop)
{
gen_atomic_cx_i128 gen;
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
return;
}
tcg_debug_assert((memop & MO_SIZE) == MO_128);
tcg_debug_assert((memop & MO_SIGN) == 0);
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
if (gen) {
MemOpIdx oi = make_memop_idx(memop, idx);
gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
return;
}
gen_helper_exit_atomic(cpu_env);
/*
* Produce a result for a well-formed opcode stream. This satisfies
* liveness for set before used, which happens before this dead code
* is removed.
*/
tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
}
static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
TCGArg idx, MemOp memop, bool new_val,
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
{
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
TCGv_i32 t2 = tcg_temp_ebb_new_i32();
memop = tcg_canonicalize_memop(memop, 0, 0);
tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
tcg_gen_ext_i32(t2, val, memop);
gen(t2, t1, t2);
tcg_gen_qemu_st_i32(t2, addr, idx, memop);
tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
tcg_temp_free_i32(t1);
tcg_temp_free_i32(t2);
}
static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
TCGArg idx, MemOp memop, void * const table[])
{
gen_atomic_op_i32 gen;
MemOpIdx oi;
memop = tcg_canonicalize_memop(memop, 0, 0);
gen = table[memop & (MO_SIZE | MO_BSWAP)];
tcg_debug_assert(gen != NULL);
oi = make_memop_idx(memop & ~MO_SIGN, idx);
gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
if (memop & MO_SIGN) {
tcg_gen_ext_i32(ret, ret, memop);
}
}
static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
TCGArg idx, MemOp memop, bool new_val,
void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
{
TCGv_i64 t1 = tcg_temp_ebb_new_i64();
TCGv_i64 t2 = tcg_temp_ebb_new_i64();
memop = tcg_canonicalize_memop(memop, 1, 0);
tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
tcg_gen_ext_i64(t2, val, memop);
gen(t2, t1, t2);
tcg_gen_qemu_st_i64(t2, addr, idx, memop);
tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
tcg_temp_free_i64(t1);
tcg_temp_free_i64(t2);
}
static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
TCGArg idx, MemOp memop, void * const table[])
{
memop = tcg_canonicalize_memop(memop, 1, 0);
if ((memop & MO_SIZE) == MO_64) {
#ifdef CONFIG_ATOMIC64
gen_atomic_op_i64 gen;
MemOpIdx oi;
gen = table[memop & (MO_SIZE | MO_BSWAP)];
tcg_debug_assert(gen != NULL);
oi = make_memop_idx(memop & ~MO_SIGN, idx);
gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
#else
gen_helper_exit_atomic(cpu_env);
/* Produce a result, so that we have a well-formed opcode stream
with respect to uses of the result in the (dead) code following. */
tcg_gen_movi_i64(ret, 0);
#endif /* CONFIG_ATOMIC64 */
} else {
TCGv_i32 v32 = tcg_temp_ebb_new_i32();
TCGv_i32 r32 = tcg_temp_ebb_new_i32();
tcg_gen_extrl_i64_i32(v32, val);
do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
tcg_temp_free_i32(v32);
tcg_gen_extu_i32_i64(ret, r32);
tcg_temp_free_i32(r32);
if (memop & MO_SIGN) {
tcg_gen_ext_i64(ret, ret, memop);
}
}
}
#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
[MO_8] = gen_helper_atomic_##NAME##b, \
[MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
[MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
[MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
[MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
}; \
void tcg_gen_atomic_##NAME##_i32 \
(TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
{ \
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
} else { \
do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
tcg_gen_##OP##_i32); \
} \
} \
void tcg_gen_atomic_##NAME##_i64 \
(TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
{ \
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
} else { \
do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
tcg_gen_##OP##_i64); \
} \
}
GEN_ATOMIC_HELPER(fetch_add, add, 0)
GEN_ATOMIC_HELPER(fetch_and, and, 0)
GEN_ATOMIC_HELPER(fetch_or, or, 0)
GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
GEN_ATOMIC_HELPER(add_fetch, add, 1)
GEN_ATOMIC_HELPER(and_fetch, and, 1)
GEN_ATOMIC_HELPER(or_fetch, or, 1)
GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
{
tcg_gen_mov_i32(r, b);
}
static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
{
tcg_gen_mov_i64(r, b);
}
GEN_ATOMIC_HELPER(xchg, mov2, 0)
#undef GEN_ATOMIC_HELPER

591
tcg/tcg.c
View File

@ -63,6 +63,9 @@
#include "tcg/tcg-temp-internal.h"
#include "tcg-internal.h"
#include "accel/tcg/perf.h"
#ifdef CONFIG_USER_ONLY
#include "exec/user/guest-base.h"
#endif
/* Forward declarations for functions declared in tcg-target.c.inc and
used here. */
@ -197,6 +200,38 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
const TCGLdstHelperParam *p)
__attribute__((unused));
static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
[MO_UB] = helper_ldub_mmu,
[MO_SB] = helper_ldsb_mmu,
[MO_UW] = helper_lduw_mmu,
[MO_SW] = helper_ldsw_mmu,
[MO_UL] = helper_ldul_mmu,
[MO_UQ] = helper_ldq_mmu,
#if TCG_TARGET_REG_BITS == 64
[MO_SL] = helper_ldsl_mmu,
[MO_128] = helper_ld16_mmu,
#endif
};
static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
[MO_8] = helper_stb_mmu,
[MO_16] = helper_stw_mmu,
[MO_32] = helper_stl_mmu,
[MO_64] = helper_stq_mmu,
#if TCG_TARGET_REG_BITS == 64
[MO_128] = helper_st16_mmu,
#endif
};
typedef struct {
MemOp atom; /* lg2 bits of atomicity required */
MemOp align; /* lg2 bits of alignment to use */
} TCGAtomAlign;
static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
MemOp host_atom, bool allow_two_ops)
__attribute__((unused));
TCGContext tcg_init_ctx;
__thread TCGContext *tcg_ctx;
@ -513,6 +548,82 @@ static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
tcg_out_movext1_new_src(s, i1, src1);
}
/**
* tcg_out_movext3 -- move and extend three pair
* @s: tcg context
* @i1: first move description
* @i2: second move description
* @i3: third move description
* @scratch: temporary register, or -1 for none
*
* As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
* between the sources and destinations.
*/
static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
const TCGMovExtend *i2, const TCGMovExtend *i3,
int scratch)
{
TCGReg src1 = i1->src;
TCGReg src2 = i2->src;
TCGReg src3 = i3->src;
if (i1->dst != src2 && i1->dst != src3) {
tcg_out_movext1(s, i1);
tcg_out_movext2(s, i2, i3, scratch);
return;
}
if (i2->dst != src1 && i2->dst != src3) {
tcg_out_movext1(s, i2);
tcg_out_movext2(s, i1, i3, scratch);
return;
}
if (i3->dst != src1 && i3->dst != src2) {
tcg_out_movext1(s, i3);
tcg_out_movext2(s, i1, i2, scratch);
return;
}
/*
* There is a cycle. Since there are only 3 nodes, the cycle is
* either "clockwise" or "anti-clockwise", and can be solved with
* a single scratch or two xchg.
*/
if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
/* "Clockwise" */
if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
/* The data is now in the correct registers, now extend. */
tcg_out_movext1_new_src(s, i1, i1->dst);
tcg_out_movext1_new_src(s, i2, i2->dst);
tcg_out_movext1_new_src(s, i3, i3->dst);
} else {
tcg_debug_assert(scratch >= 0);
tcg_out_mov(s, i1->src_type, scratch, src1);
tcg_out_movext1(s, i3);
tcg_out_movext1(s, i2);
tcg_out_movext1_new_src(s, i1, scratch);
}
} else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
/* "Anti-clockwise" */
if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
/* The data is now in the correct registers, now extend. */
tcg_out_movext1_new_src(s, i1, i1->dst);
tcg_out_movext1_new_src(s, i2, i2->dst);
tcg_out_movext1_new_src(s, i3, i3->dst);
} else {
tcg_debug_assert(scratch >= 0);
tcg_out_mov(s, i1->src_type, scratch, src1);
tcg_out_movext1(s, i2);
tcg_out_movext1(s, i3);
tcg_out_movext1_new_src(s, i1, scratch);
}
} else {
g_assert_not_reached();
}
}
#define C_PFX1(P, A) P##A
#define C_PFX2(P, A, B) P##A##_##B
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
@ -757,7 +868,7 @@ static TCGHelperInfo info_helper_ld32_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
| dh_typemask(env, 1)
| dh_typemask(tl, 2) /* target_ulong addr */
| dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i32, 3) /* unsigned oi */
| dh_typemask(ptr, 4) /* uintptr_t ra */
};
@ -766,7 +877,16 @@ static TCGHelperInfo info_helper_ld64_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(i64, 0) /* return uint64_t */
| dh_typemask(env, 1)
| dh_typemask(tl, 2) /* target_ulong addr */
| dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i32, 3) /* unsigned oi */
| dh_typemask(ptr, 4) /* uintptr_t ra */
};
static TCGHelperInfo info_helper_ld128_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(i128, 0) /* return Int128 */
| dh_typemask(env, 1)
| dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i32, 3) /* unsigned oi */
| dh_typemask(ptr, 4) /* uintptr_t ra */
};
@ -775,7 +895,7 @@ static TCGHelperInfo info_helper_st32_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(void, 0)
| dh_typemask(env, 1)
| dh_typemask(tl, 2) /* target_ulong addr */
| dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i32, 3) /* uint32_t data */
| dh_typemask(i32, 4) /* unsigned oi */
| dh_typemask(ptr, 5) /* uintptr_t ra */
@ -785,12 +905,22 @@ static TCGHelperInfo info_helper_st64_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(void, 0)
| dh_typemask(env, 1)
| dh_typemask(tl, 2) /* target_ulong addr */
| dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i64, 3) /* uint64_t data */
| dh_typemask(i32, 4) /* unsigned oi */
| dh_typemask(ptr, 5) /* uintptr_t ra */
};
static TCGHelperInfo info_helper_st128_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(void, 0)
| dh_typemask(env, 1)
| dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i128, 3) /* Int128 data */
| dh_typemask(i32, 4) /* unsigned oi */
| dh_typemask(ptr, 5) /* uintptr_t ra */
};
#ifdef CONFIG_TCG_INTERPRETER
static ffi_type *typecode_to_ffi(int argmask)
{
@ -1204,8 +1334,10 @@ static void tcg_context_init(unsigned max_cpus)
init_call_layout(&info_helper_ld32_mmu);
init_call_layout(&info_helper_ld64_mmu);
init_call_layout(&info_helper_ld128_mmu);
init_call_layout(&info_helper_st32_mmu);
init_call_layout(&info_helper_st64_mmu);
init_call_layout(&info_helper_st128_mmu);
#ifdef CONFIG_TCG_INTERPRETER
init_ffi_layouts();
@ -1391,6 +1523,9 @@ void tcg_func_start(TCGContext *s)
QTAILQ_INIT(&s->ops);
QTAILQ_INIT(&s->free_ops);
QSIMPLEQ_INIT(&s->labels);
tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
s->addr_type == TCG_TYPE_I64);
}
static TCGTemp *tcg_temp_alloc(TCGContext *s)
@ -1707,15 +1842,26 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_exit_tb:
case INDEX_op_goto_tb:
case INDEX_op_goto_ptr:
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
return true;
case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_st8_a32_i32:
case INDEX_op_qemu_st8_a64_i32:
return TCG_TARGET_HAS_qemu_st8_i32;
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
return TCG_TARGET_HAS_qemu_ldst_i128;
case INDEX_op_mov_i32:
case INDEX_op_setcond_i32:
case INDEX_op_brcond_i32:
@ -2168,7 +2314,7 @@ static const char * const cond_name[] =
[TCG_COND_GTU] = "gtu"
};
static const char * const ldst_name[] =
static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
{
[MO_UB] = "ub",
[MO_SB] = "sb",
@ -2182,6 +2328,8 @@ static const char * const ldst_name[] =
[MO_BEUL] = "beul",
[MO_BESL] = "besl",
[MO_BEUQ] = "beq",
[MO_128 + MO_BE] = "beo",
[MO_128 + MO_LE] = "leo",
};
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
@ -2195,6 +2343,15 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
[MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
};
static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
[MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
[MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
[MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
[MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
[MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
[MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
};
static const char bswap_flag_name[][6] = {
[TCG_BSWAP_IZ] = "iz",
[TCG_BSWAP_OZ] = "oz",
@ -2240,13 +2397,8 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
col += ne_fprintf(f, "\n ----");
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
target_ulong a;
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
#else
a = op->args[i];
#endif
col += ne_fprintf(f, " " TARGET_FMT_lx, a);
col += ne_fprintf(f, " %016" PRIx64,
tcg_get_insn_start_param(op, i));
}
} else if (c == INDEX_op_call) {
const TCGHelperInfo *info = tcg_call_info(op);
@ -2324,23 +2476,38 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
}
i = 1;
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st8_a32_i32:
case INDEX_op_qemu_st8_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
{
const char *s_al, *s_op, *s_at;
MemOpIdx oi = op->args[k++];
MemOp op = get_memop(oi);
unsigned ix = get_mmuidx(oi);
if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
col += ne_fprintf(f, ",$0x%x,%u", op, ix);
} else {
const char *s_al, *s_op;
s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
/* If all fields are accounted for, print symbolically. */
if (!op && s_al && s_op && s_at) {
col += ne_fprintf(f, ",%s%s%s,%u",
s_at, s_al, s_op, ix);
} else {
op = get_memop(oi);
col += ne_fprintf(f, ",$0x%x,%u", op, ix);
}
i = 1;
}
@ -5087,6 +5254,92 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
}
}
/**
* atom_and_align_for_opc:
* @s: tcg context
* @opc: memory operation code
* @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
* @allow_two_ops: true if we are prepared to issue two operations
*
* Return the alignment and atomicity to use for the inline fast path
* for the given memory operation. The alignment may be larger than
* that specified in @opc, and the correct alignment will be diagnosed
* by the slow path helper.
*
* If @allow_two_ops, the host is prepared to test for 2x alignment,
* and issue two loads or stores for subalignment.
*/
static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
MemOp host_atom, bool allow_two_ops)
{
MemOp align = get_alignment_bits(opc);
MemOp size = opc & MO_SIZE;
MemOp half = size ? size - 1 : 0;
MemOp atmax;
MemOp atom;
/* When serialized, no further atomicity required. */
if (s->gen_tb->cflags & CF_PARALLEL) {
atom = opc & MO_ATOM_MASK;
} else {
atom = MO_ATOM_NONE;
}
switch (atom) {
case MO_ATOM_NONE:
/* The operation requires no specific atomicity. */
atmax = MO_8;
break;
case MO_ATOM_IFALIGN:
atmax = size;
break;
case MO_ATOM_IFALIGN_PAIR:
atmax = half;
break;
case MO_ATOM_WITHIN16:
atmax = size;
if (size == MO_128) {
/* Misalignment implies !within16, and therefore no atomicity. */
} else if (host_atom != MO_ATOM_WITHIN16) {
/* The host does not implement within16, so require alignment. */
align = MAX(align, size);
}
break;
case MO_ATOM_WITHIN16_PAIR:
atmax = size;
/*
* Misalignment implies !within16, and therefore half atomicity.
* Any host prepared for two operations can implement this with
* half alignment.
*/
if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
align = MAX(align, half);
}
break;
case MO_ATOM_SUBALIGN:
atmax = size;
if (host_atom != MO_ATOM_SUBALIGN) {
/* If unaligned but not odd, there are subobjects up to half. */
if (allow_two_ops) {
align = MAX(align, half);
} else {
align = MAX(align, size);
}
}
break;
default:
g_assert_not_reached();
}
return (TCGAtomAlign){ .atom = atmax, .align = align };
}
/*
* Similarly for qemu_ld/st slow path helpers.
* We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
@ -5109,57 +5362,12 @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
return ofs;
}
static void tcg_out_helper_load_regs(TCGContext *s,
unsigned nmov, TCGMovExtend *mov,
unsigned ntmp, const int *tmp)
{
switch (nmov) {
default:
/* The backend must have provided enough temps for the worst case. */
tcg_debug_assert(ntmp + 1 >= nmov);
for (unsigned i = nmov - 1; i >= 2; --i) {
TCGReg dst = mov[i].dst;
for (unsigned j = 0; j < i; ++j) {
if (dst == mov[j].src) {
/*
* Conflict.
* Copy the source to a temporary, recurse for the
* remaining moves, perform the extension from our
* scratch on the way out.
*/
TCGReg scratch = tmp[--ntmp];
tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
mov[i].src = scratch;
tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
tcg_out_movext1(s, &mov[i]);
return;
}
}
/* No conflicts: perform this move and continue. */
tcg_out_movext1(s, &mov[i]);
}
/* fall through for the final two moves */
case 2:
tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
return;
case 1:
tcg_out_movext1(s, mov);
return;
case 0:
g_assert_not_reached();
}
}
static void tcg_out_helper_load_slots(TCGContext *s,
unsigned nmov, TCGMovExtend *mov,
const TCGLdstHelperParam *parm)
{
unsigned i;
TCGReg dst3;
/*
* Start from the end, storing to the stack first.
@ -5197,7 +5405,47 @@ static void tcg_out_helper_load_slots(TCGContext *s,
for (i = 0; i < nmov; ++i) {
mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
}
tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
switch (nmov) {
case 4:
/* The backend must have provided enough temps for the worst case. */
tcg_debug_assert(parm->ntmp >= 2);
dst3 = mov[3].dst;
for (unsigned j = 0; j < 3; ++j) {
if (dst3 == mov[j].src) {
/*
* Conflict. Copy the source to a temporary, perform the
* remaining moves, then the extension from our scratch
* on the way out.
*/
TCGReg scratch = parm->tmp[1];
tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
tcg_out_movext1_new_src(s, &mov[3], scratch);
break;
}
}
/* No conflicts: perform this move and continue. */
tcg_out_movext1(s, &mov[3]);
/* fall through */
case 3:
tcg_out_movext3(s, mov, mov + 1, mov + 2,
parm->ntmp ? parm->tmp[0] : -1);
break;
case 2:
tcg_out_movext2(s, mov, mov + 1,
parm->ntmp ? parm->tmp[0] : -1);
break;
case 1:
tcg_out_movext1(s, mov);
break;
default:
g_assert_not_reached();
}
}
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
@ -5288,6 +5536,8 @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
TCGType dst_type, TCGType src_type,
TCGReg lo, TCGReg hi)
{
MemOp reg_mo;
if (dst_type <= TCG_TYPE_REG) {
MemOp src_ext;
@ -5315,19 +5565,25 @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
return 1;
}
assert(TCG_TARGET_REG_BITS == 32);
if (TCG_TARGET_REG_BITS == 32) {
assert(dst_type == TCG_TYPE_I64);
reg_mo = MO_32;
} else {
assert(dst_type == TCG_TYPE_I128);
reg_mo = MO_64;
}
mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
mov[0].src = lo;
mov[0].dst_type = TCG_TYPE_I32;
mov[0].src_type = TCG_TYPE_I32;
mov[0].src_ext = MO_32;
mov[0].dst_type = TCG_TYPE_REG;
mov[0].src_type = TCG_TYPE_REG;
mov[0].src_ext = reg_mo;
mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
mov[1].src = hi;
mov[1].dst_type = TCG_TYPE_I32;
mov[1].src_type = TCG_TYPE_I32;
mov[1].src_ext = MO_32;
mov[1].dst_type = TCG_TYPE_REG;
mov[1].src_type = TCG_TYPE_REG;
mov[1].src_ext = reg_mo;
return 2;
}
@ -5350,6 +5606,9 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
case MO_64:
info = &info_helper_ld64_mmu;
break;
case MO_128:
info = &info_helper_ld128_mmu;
break;
default:
g_assert_not_reached();
}
@ -5358,14 +5617,54 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
next_arg = 1;
loc = &info->in[next_arg];
nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
/*
* 32-bit host with 32-bit guest: zero-extend the guest address
* to 64-bits for the helper by storing the low part, then
* load a zero for the high part.
*/
tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
TCG_TYPE_I32, TCG_TYPE_I32,
ldst->addrlo_reg, -1);
tcg_out_helper_load_slots(s, 1, mov, parm);
tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
TCG_TYPE_I32, 0, parm);
next_arg += 2;
} else {
nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
ldst->addrlo_reg, ldst->addrhi_reg);
next_arg += nmov;
tcg_out_helper_load_slots(s, nmov, mov, parm);
next_arg += nmov;
}
/* No special attention for 32 and 64-bit return values. */
tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
switch (info->out_kind) {
case TCG_CALL_RET_NORMAL:
case TCG_CALL_RET_BY_VEC:
break;
case TCG_CALL_RET_BY_REF:
/*
* The return reference is in the first argument slot.
* We need memory in which to return: re-use the top of stack.
*/
{
int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
if (arg_slot_reg_p(0)) {
tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
TCG_REG_CALL_STACK, ofs_slot0);
} else {
tcg_debug_assert(parm->ntmp != 0);
tcg_out_addi_ptr(s, parm->tmp[0],
TCG_REG_CALL_STACK, ofs_slot0);
tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
TCG_REG_CALL_STACK, ofs_slot0);
}
}
break;
default:
g_assert_not_reached();
}
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
@ -5374,11 +5673,18 @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
bool load_sign,
const TCGLdstHelperParam *parm)
{
TCGMovExtend mov[2];
if (ldst->type <= TCG_TYPE_REG) {
MemOp mop = get_memop(ldst->oi);
TCGMovExtend mov[2];
int ofs_slot0;
switch (ldst->type) {
case TCG_TYPE_I64:
if (TCG_TARGET_REG_BITS == 32) {
break;
}
/* fall through */
case TCG_TYPE_I32:
mov[0].dst = ldst->datalo_reg;
mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
mov[0].dst_type = ldst->type;
@ -5404,26 +5710,50 @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
mov[0].src_ext = mop & MO_SSIZE;
}
tcg_out_movext1(s, mov);
} else {
assert(TCG_TARGET_REG_BITS == 32);
return;
case TCG_TYPE_I128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
switch (TCG_TARGET_CALL_RET_I128) {
case TCG_CALL_RET_NORMAL:
break;
case TCG_CALL_RET_BY_VEC:
tcg_out_st(s, TCG_TYPE_V128,
tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
TCG_REG_CALL_STACK, ofs_slot0);
/* fall through */
case TCG_CALL_RET_BY_REF:
tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
return;
default:
g_assert_not_reached();
}
break;
default:
g_assert_not_reached();
}
mov[0].dst = ldst->datalo_reg;
mov[0].src =
tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
mov[0].dst_type = TCG_TYPE_I32;
mov[0].src_type = TCG_TYPE_I32;
mov[0].src_ext = MO_32;
mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
mov[1].dst = ldst->datahi_reg;
mov[1].src =
tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
mov[1].dst_type = TCG_TYPE_REG;
mov[1].src_type = TCG_TYPE_REG;
mov[1].src_ext = MO_32;
mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
}
}
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
const TCGLdstHelperParam *parm)
@ -5446,6 +5776,10 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
info = &info_helper_st64_mmu;
data_type = TCG_TYPE_I64;
break;
case MO_128:
info = &info_helper_st128_mmu;
data_type = TCG_TYPE_I128;
break;
default:
g_assert_not_reached();
}
@ -5456,20 +5790,74 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
/* Handle addr argument. */
loc = &info->in[next_arg];
n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
/*
* 32-bit host with 32-bit guest: zero-extend the guest address
* to 64-bits for the helper by storing the low part. Later,
* after we have processed the register inputs, we will load a
* zero for the high part.
*/
tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
TCG_TYPE_I32, TCG_TYPE_I32,
ldst->addrlo_reg, -1);
next_arg += 2;
nmov += 1;
} else {
n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
ldst->addrlo_reg, ldst->addrhi_reg);
next_arg += n;
nmov += n;
}
/* Handle data argument. */
loc = &info->in[next_arg];
switch (loc->kind) {
case TCG_CALL_ARG_NORMAL:
case TCG_CALL_ARG_EXTEND_U:
case TCG_CALL_ARG_EXTEND_S:
n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
ldst->datalo_reg, ldst->datahi_reg);
next_arg += n;
nmov += n;
tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
tcg_out_helper_load_slots(s, nmov, mov, parm);
break;
case TCG_CALL_ARG_BY_REF:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
tcg_debug_assert(data_type == TCG_TYPE_I128);
tcg_out_st(s, TCG_TYPE_I64,
HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
tcg_out_st(s, TCG_TYPE_I64,
HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
tcg_out_helper_load_slots(s, nmov, mov, parm);
if (arg_slot_reg_p(loc->arg_slot)) {
tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
TCG_REG_CALL_STACK,
arg_slot_stk_ofs(loc->ref_slot));
} else {
tcg_debug_assert(parm->ntmp != 0);
tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
arg_slot_stk_ofs(loc->ref_slot));
tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
}
next_arg += 2;
break;
default:
g_assert_not_reached();
}
if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
/* Zero extend the address by loading a zero for the high part. */
loc = &info->in[1 + !HOST_BIG_ENDIAN];
tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
}
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
@ -5582,7 +5970,7 @@ int64_t tcg_cpu_exec_time(void)
#endif
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
{
#ifdef CONFIG_PROFILER
TCGProfile *prof = &s->prof;
@ -5743,13 +6131,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
}
num_insns++;
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
target_ulong a;
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
#else
a = op->args[i];
#endif
s->gen_insn_data[num_insns][i] = a;
s->gen_insn_data[num_insns][i] =
tcg_get_insn_start_param(op, i);
}
break;
case INDEX_op_discard:

235
tcg/tci.c
View File

@ -286,162 +286,54 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
return result;
}
static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
static uint64_t tci_qemu_ld(CPUArchState *env, uint64_t taddr,
MemOpIdx oi, const void *tb_ptr)
{
MemOp mop = get_memop(oi);
uintptr_t ra = (uintptr_t)tb_ptr;
#ifdef CONFIG_SOFTMMU
switch (mop & (MO_BSWAP | MO_SSIZE)) {
switch (mop & MO_SSIZE) {
case MO_UB:
return helper_ret_ldub_mmu(env, taddr, oi, ra);
return helper_ldub_mmu(env, taddr, oi, ra);
case MO_SB:
return helper_ret_ldsb_mmu(env, taddr, oi, ra);
case MO_LEUW:
return helper_le_lduw_mmu(env, taddr, oi, ra);
case MO_LESW:
return helper_le_ldsw_mmu(env, taddr, oi, ra);
case MO_LEUL:
return helper_le_ldul_mmu(env, taddr, oi, ra);
case MO_LESL:
return helper_le_ldsl_mmu(env, taddr, oi, ra);
case MO_LEUQ:
return helper_le_ldq_mmu(env, taddr, oi, ra);
case MO_BEUW:
return helper_be_lduw_mmu(env, taddr, oi, ra);
case MO_BESW:
return helper_be_ldsw_mmu(env, taddr, oi, ra);
case MO_BEUL:
return helper_be_ldul_mmu(env, taddr, oi, ra);
case MO_BESL:
return helper_be_ldsl_mmu(env, taddr, oi, ra);
case MO_BEUQ:
return helper_be_ldq_mmu(env, taddr, oi, ra);
return helper_ldsb_mmu(env, taddr, oi, ra);
case MO_UW:
return helper_lduw_mmu(env, taddr, oi, ra);
case MO_SW:
return helper_ldsw_mmu(env, taddr, oi, ra);
case MO_UL:
return helper_ldul_mmu(env, taddr, oi, ra);
case MO_SL:
return helper_ldsl_mmu(env, taddr, oi, ra);
case MO_UQ:
return helper_ldq_mmu(env, taddr, oi, ra);
default:
g_assert_not_reached();
}
#else
void *haddr = g2h(env_cpu(env), taddr);
unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
uint64_t ret;
set_helper_retaddr(ra);
if (taddr & a_mask) {
helper_unaligned_ld(env, taddr);
}
switch (mop & (MO_BSWAP | MO_SSIZE)) {
case MO_UB:
ret = ldub_p(haddr);
break;
case MO_SB:
ret = ldsb_p(haddr);
break;
case MO_LEUW:
ret = lduw_le_p(haddr);
break;
case MO_LESW:
ret = ldsw_le_p(haddr);
break;
case MO_LEUL:
ret = (uint32_t)ldl_le_p(haddr);
break;
case MO_LESL:
ret = (int32_t)ldl_le_p(haddr);
break;
case MO_LEUQ:
ret = ldq_le_p(haddr);
break;
case MO_BEUW:
ret = lduw_be_p(haddr);
break;
case MO_BESW:
ret = ldsw_be_p(haddr);
break;
case MO_BEUL:
ret = (uint32_t)ldl_be_p(haddr);
break;
case MO_BESL:
ret = (int32_t)ldl_be_p(haddr);
break;
case MO_BEUQ:
ret = ldq_be_p(haddr);
break;
default:
g_assert_not_reached();
}
clear_helper_retaddr();
return ret;
#endif
}
static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val,
MemOpIdx oi, const void *tb_ptr)
{
MemOp mop = get_memop(oi);
uintptr_t ra = (uintptr_t)tb_ptr;
#ifdef CONFIG_SOFTMMU
switch (mop & (MO_BSWAP | MO_SIZE)) {
switch (mop & MO_SIZE) {
case MO_UB:
helper_ret_stb_mmu(env, taddr, val, oi, ra);
helper_stb_mmu(env, taddr, val, oi, ra);
break;
case MO_LEUW:
helper_le_stw_mmu(env, taddr, val, oi, ra);
case MO_UW:
helper_stw_mmu(env, taddr, val, oi, ra);
break;
case MO_LEUL:
helper_le_stl_mmu(env, taddr, val, oi, ra);
case MO_UL:
helper_stl_mmu(env, taddr, val, oi, ra);
break;
case MO_LEUQ:
helper_le_stq_mmu(env, taddr, val, oi, ra);
break;
case MO_BEUW:
helper_be_stw_mmu(env, taddr, val, oi, ra);
break;
case MO_BEUL:
helper_be_stl_mmu(env, taddr, val, oi, ra);
break;
case MO_BEUQ:
helper_be_stq_mmu(env, taddr, val, oi, ra);
case MO_UQ:
helper_stq_mmu(env, taddr, val, oi, ra);
break;
default:
g_assert_not_reached();
}
#else
void *haddr = g2h(env_cpu(env), taddr);
unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
set_helper_retaddr(ra);
if (taddr & a_mask) {
helper_unaligned_st(env, taddr);
}
switch (mop & (MO_BSWAP | MO_SIZE)) {
case MO_UB:
stb_p(haddr, val);
break;
case MO_LEUW:
stw_le_p(haddr, val);
break;
case MO_LEUL:
stl_le_p(haddr, val);
break;
case MO_LEUQ:
stq_le_p(haddr, val);
break;
case MO_BEUW:
stw_be_p(haddr, val);
break;
case MO_BEUL:
stl_be_p(haddr, val);
break;
case MO_BEUQ:
stq_be_p(haddr, val);
break;
default:
g_assert_not_reached();
}
clear_helper_retaddr();
#endif
}
#if TCG_TARGET_REG_BITS == 64
@ -480,10 +372,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
TCGReg r0, r1, r2, r3, r4, r5;
tcg_target_ulong t1;
TCGCond condition;
target_ulong taddr;
uint8_t pos, len;
uint32_t tmp32;
uint64_t tmp64;
uint64_t tmp64, taddr;
uint64_t T1, T2;
MemOpIdx oi;
int32_t ofs;
@ -1030,30 +921,41 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tb_ptr = ptr;
break;
case INDEX_op_qemu_ld_i32:
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
case INDEX_op_qemu_ld_a32_i32:
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = (uint32_t)regs[r1];
goto do_ld_i32;
case INDEX_op_qemu_ld_a64_i32:
if (TCG_TARGET_REG_BITS == 64) {
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = regs[r1];
} else {
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
taddr = tci_uint64(regs[r2], regs[r1]);
}
tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr);
regs[r0] = tmp32;
do_ld_i32:
regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = (uint32_t)regs[r1];
} else {
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
taddr = (uint32_t)regs[r2];
}
goto do_ld_i64;
case INDEX_op_qemu_ld_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = regs[r1];
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
taddr = regs[r2];
} else {
tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
taddr = tci_uint64(regs[r3], regs[r2]);
oi = regs[r4];
}
do_ld_i64:
tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
if (TCG_TARGET_REG_BITS == 32) {
tci_write_reg64(regs, r1, r0, tmp64);
@ -1062,34 +964,45 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
}
break;
case INDEX_op_qemu_st_i32:
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
case INDEX_op_qemu_st_a32_i32:
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = (uint32_t)regs[r1];
goto do_st_i32;
case INDEX_op_qemu_st_a64_i32:
if (TCG_TARGET_REG_BITS == 64) {
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = regs[r1];
} else {
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
taddr = tci_uint64(regs[r2], regs[r1]);
}
tmp32 = regs[r0];
tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
do_st_i32:
tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
break;
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = regs[r1];
tmp64 = regs[r0];
taddr = (uint32_t)regs[r1];
} else {
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
taddr = regs[r2];
tmp64 = tci_uint64(regs[r1], regs[r0]);
taddr = (uint32_t)regs[r2];
}
goto do_st_i64;
case INDEX_op_qemu_st_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tci_args_rrm(insn, &r0, &r1, &oi);
tmp64 = regs[r0];
taddr = regs[r1];
} else {
tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
tmp64 = tci_uint64(regs[r1], regs[r0]);
taddr = tci_uint64(regs[r3], regs[r2]);
oi = regs[r4];
}
tmp64 = tci_uint64(regs[r1], regs[r0]);
}
do_st_i64:
tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
break;
@ -1359,15 +1272,21 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
str_r(r3), str_r(r4), str_r(r5));
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
len = DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_st_a32_i32:
len = 1 + 1;
goto do_qemu_ldst;
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_st_a64_i32:
len = 1 + DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
goto do_qemu_ldst;
case INDEX_op_qemu_ld_a64_i64:
case INDEX_op_qemu_st_a64_i64:
len = 2 * DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
goto do_qemu_ldst;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_st_i32:
len = 1;
do_qemu_ldst:
len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS);
switch (len) {
case 2:
tci_args_rrm(insn, &r0, &r1, &oi);

View File

@ -156,22 +156,22 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_setcond2_i32:
return C_O1_I4(r, r, r, r, r);
case INDEX_op_qemu_ld_i32:
return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
? C_O1_I1(r, r)
: C_O1_I2(r, r, r));
case INDEX_op_qemu_ld_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, r)
: C_O2_I2(r, r, r, r));
case INDEX_op_qemu_st_i32:
return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
? C_O0_I2(r, r)
: C_O0_I3(r, r, r));
case INDEX_op_qemu_st_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(r, r, r)
: C_O0_I4(r, r, r, r));
case INDEX_op_qemu_ld_a32_i32:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
case INDEX_op_qemu_ld_a32_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
case INDEX_op_qemu_ld_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
case INDEX_op_qemu_st_a32_i32:
return C_O0_I2(r, r);
case INDEX_op_qemu_st_a64_i32:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
case INDEX_op_qemu_st_a32_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
case INDEX_op_qemu_st_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
default:
g_assert_not_reached();
@ -243,7 +243,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
return false;
}
static void stack_bounds_check(TCGReg base, target_long offset)
static void stack_bounds_check(TCGReg base, intptr_t offset)
{
if (base == TCG_REG_CALL_STACK) {
tcg_debug_assert(offset >= 0);
@ -849,21 +849,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_st_i32:
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_st_a32_i32:
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
break;
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_st_a32_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
} else {
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
}
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_ld_a64_i64:
case INDEX_op_qemu_st_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
} else {
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
tcg_out_op_rrrrr(s, opc, args[0], args[1],
@ -963,3 +966,8 @@ static void tcg_target_init(TCGContext *s)
static inline void tcg_target_qemu_prologue(TCGContext *s)
{
}
bool tcg_target_has_memory_bswap(MemOp memop)
{
return true;
}

View File

@ -127,6 +127,8 @@
#define TCG_TARGET_HAS_mulu2_i32 1
#endif /* TCG_TARGET_REG_BITS == 64 */
#define TCG_TARGET_HAS_qemu_ldst_i128 0
/* Number of registers available. */
#define TCG_TARGET_NB_REGS 16
@ -176,6 +178,4 @@ typedef enum {
We prefer consistency across hosts on this. */
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
#endif /* TCG_TARGET_H */