mirror of https://github.com/xemu-project/xemu.git
Misc tcg-related patch queue.
-----BEGIN PGP SIGNATURE----- iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRRb30dHHJpY2hhcmQu aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+obAgAmL4F1gdkbUUPKnkv poYwutCX+c3kog22TF29BlKgC8vJa6UbRLMphz5q7v3wbCKQJMeNV/sKa+mhnHBK CB3wP8xXVAahWFARmWTIZEqlB3HQ/RIzhc5saKkiSzcGIrtXUj6fdfrz7mae+w/g kDGCbK8hGyuE580j9QAIPbpfqPoNhIPziECFA1AsNf5Krpxc1nDqIfZEuUzTLtLO 1WoSaUVbiGDQrTe2OVKF2mtrGbr2vWI1vnHJl67Lom6rG0LzOjb3W/8IN+n0+46E 7pMlUCDT1zeTxevRxBvDmwgCYA/QjFosd4enUuhVReTxTNhUc69+QyuOAhHO/IEq T0V3eA== =qZDQ -----END PGP SIGNATURE----- Merge tag 'pull-tcg-20230502-2' of https://gitlab.com/rth7680/qemu into staging Misc tcg-related patch queue. # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRRb30dHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+obAgAmL4F1gdkbUUPKnkv # poYwutCX+c3kog22TF29BlKgC8vJa6UbRLMphz5q7v3wbCKQJMeNV/sKa+mhnHBK # CB3wP8xXVAahWFARmWTIZEqlB3HQ/RIzhc5saKkiSzcGIrtXUj6fdfrz7mae+w/g # kDGCbK8hGyuE580j9QAIPbpfqPoNhIPziECFA1AsNf5Krpxc1nDqIfZEuUzTLtLO # 1WoSaUVbiGDQrTe2OVKF2mtrGbr2vWI1vnHJl67Lom6rG0LzOjb3W/8IN+n0+46E # 7pMlUCDT1zeTxevRxBvDmwgCYA/QjFosd4enUuhVReTxTNhUc69+QyuOAhHO/IEq # T0V3eA== # =qZDQ # -----END PGP SIGNATURE----- # gpg: Signature made Tue 02 May 2023 09:15:57 PM BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate] * tag 'pull-tcg-20230502-2' of https://gitlab.com/rth7680/qemu: tcg: Introduce tcg_out_movext2 tcg/mips: Conditionalize tcg_out_exts_i32_i64 tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64 accel/tcg: Add cpu_ld*_code_mmu migration/xbzrle: Use __attribute__((target)) for avx512 qemu/int128: Re-shuffle Int128Alias members tcg: Add tcg_gen_gvec_rotrs tcg: Add tcg_gen_gvec_andcs qemu/host-utils.h: Add clz and ctz functions for lower-bit integers qemu/bitops.h: Limit rotate amounts accel/tcg: Uncache the host address for instruction fetch when tlb size < 1 softmmu: Tidy dirtylimit_dirty_ring_full_time Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
4ebc33f3f3
|
@ -1696,6 +1696,11 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
|
|||
if (p == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (full->lg_page_size < TARGET_PAGE_BITS) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hostp) {
|
||||
*hostp = p;
|
||||
}
|
||||
|
@ -2768,3 +2773,51 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
|
|||
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
|
||||
return full_ldq_code(env, addr, oi, 0);
|
||||
}
|
||||
|
||||
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t retaddr)
|
||||
{
|
||||
return full_ldub_code(env, addr, oi, retaddr);
|
||||
}
|
||||
|
||||
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t retaddr)
|
||||
{
|
||||
MemOp mop = get_memop(oi);
|
||||
int idx = get_mmuidx(oi);
|
||||
uint16_t ret;
|
||||
|
||||
ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
|
||||
if ((mop & MO_BSWAP) != MO_TE) {
|
||||
ret = bswap16(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t retaddr)
|
||||
{
|
||||
MemOp mop = get_memop(oi);
|
||||
int idx = get_mmuidx(oi);
|
||||
uint32_t ret;
|
||||
|
||||
ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
|
||||
if ((mop & MO_BSWAP) != MO_TE) {
|
||||
ret = bswap32(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t retaddr)
|
||||
{
|
||||
MemOp mop = get_memop(oi);
|
||||
int idx = get_mmuidx(oi);
|
||||
uint64_t ret;
|
||||
|
||||
ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
|
||||
if ((mop & MO_BSWAP) != MO_TE) {
|
||||
ret = bswap64(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -550,6 +550,17 @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
|
|||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||
*(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
|
|
|
@ -217,6 +217,7 @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
|||
DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
|
|
|
@ -1219,6 +1219,64 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t ra)
|
||||
{
|
||||
void *haddr;
|
||||
uint8_t ret;
|
||||
|
||||
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
|
||||
ret = ldub_p(haddr);
|
||||
clear_helper_retaddr();
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t ra)
|
||||
{
|
||||
void *haddr;
|
||||
uint16_t ret;
|
||||
|
||||
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
|
||||
ret = lduw_p(haddr);
|
||||
clear_helper_retaddr();
|
||||
if (get_memop(oi) & MO_BSWAP) {
|
||||
ret = bswap16(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t ra)
|
||||
{
|
||||
void *haddr;
|
||||
uint32_t ret;
|
||||
|
||||
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
|
||||
ret = ldl_p(haddr);
|
||||
clear_helper_retaddr();
|
||||
if (get_memop(oi) & MO_BSWAP) {
|
||||
ret = bswap32(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t ra)
|
||||
{
|
||||
void *haddr;
|
||||
uint64_t ret;
|
||||
|
||||
validate_memop(oi, MO_BEUQ);
|
||||
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
|
||||
ret = ldq_p(haddr);
|
||||
clear_helper_retaddr();
|
||||
if (get_memop(oi) & MO_BSWAP) {
|
||||
ret = bswap64(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#include "ldst_common.c.inc"
|
||||
|
||||
/*
|
||||
|
|
|
@ -445,6 +445,15 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
|
|||
# define cpu_stq_mmu cpu_stq_le_mmu
|
||||
#endif
|
||||
|
||||
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t ra);
|
||||
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t ra);
|
||||
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t ra);
|
||||
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
|
||||
MemOpIdx oi, uintptr_t ra);
|
||||
|
||||
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
|
||||
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
|
||||
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
|
||||
|
|
|
@ -218,7 +218,7 @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
|
|||
*/
|
||||
static inline uint8_t rol8(uint8_t word, unsigned int shift)
|
||||
{
|
||||
return (word << shift) | (word >> ((8 - shift) & 7));
|
||||
return (word << (shift & 7)) | (word >> (-shift & 7));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -228,7 +228,7 @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
|
|||
*/
|
||||
static inline uint8_t ror8(uint8_t word, unsigned int shift)
|
||||
{
|
||||
return (word >> shift) | (word << ((8 - shift) & 7));
|
||||
return (word >> (shift & 7)) | (word << (-shift & 7));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -238,7 +238,7 @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
|
|||
*/
|
||||
static inline uint16_t rol16(uint16_t word, unsigned int shift)
|
||||
{
|
||||
return (word << shift) | (word >> ((16 - shift) & 15));
|
||||
return (word << (shift & 15)) | (word >> (-shift & 15));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -248,7 +248,7 @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
|
|||
*/
|
||||
static inline uint16_t ror16(uint16_t word, unsigned int shift)
|
||||
{
|
||||
return (word >> shift) | (word << ((16 - shift) & 15));
|
||||
return (word >> (shift & 15)) | (word << (-shift & 15));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -258,7 +258,7 @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
|
|||
*/
|
||||
static inline uint32_t rol32(uint32_t word, unsigned int shift)
|
||||
{
|
||||
return (word << shift) | (word >> ((32 - shift) & 31));
|
||||
return (word << (shift & 31)) | (word >> (-shift & 31));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -268,7 +268,7 @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
|
|||
*/
|
||||
static inline uint32_t ror32(uint32_t word, unsigned int shift)
|
||||
{
|
||||
return (word >> shift) | (word << ((32 - shift) & 31));
|
||||
return (word >> (shift & 31)) | (word << (-shift & 31));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -278,7 +278,7 @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
|
|||
*/
|
||||
static inline uint64_t rol64(uint64_t word, unsigned int shift)
|
||||
{
|
||||
return (word << shift) | (word >> ((64 - shift) & 63));
|
||||
return (word << (shift & 63)) | (word >> (-shift & 63));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -288,7 +288,7 @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
|
|||
*/
|
||||
static inline uint64_t ror64(uint64_t word, unsigned int shift)
|
||||
{
|
||||
return (word >> shift) | (word << ((64 - shift) & 63));
|
||||
return (word >> (shift & 63)) | (word << (-shift & 63));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -107,6 +107,36 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
|
|||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* clz8 - count leading zeros in a 8-bit value.
|
||||
* @val: The value to search
|
||||
*
|
||||
* Returns 8 if the value is zero. Note that the GCC builtin is
|
||||
* undefined if the value is zero.
|
||||
*
|
||||
* Note that the GCC builtin will upcast its argument to an `unsigned int`
|
||||
* so this function subtracts off the number of prepended zeroes.
|
||||
*/
|
||||
static inline int clz8(uint8_t val)
|
||||
{
|
||||
return val ? __builtin_clz(val) - 24 : 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* clz16 - count leading zeros in a 16-bit value.
|
||||
* @val: The value to search
|
||||
*
|
||||
* Returns 16 if the value is zero. Note that the GCC builtin is
|
||||
* undefined if the value is zero.
|
||||
*
|
||||
* Note that the GCC builtin will upcast its argument to an `unsigned int`
|
||||
* so this function subtracts off the number of prepended zeroes.
|
||||
*/
|
||||
static inline int clz16(uint16_t val)
|
||||
{
|
||||
return val ? __builtin_clz(val) - 16 : 16;
|
||||
}
|
||||
|
||||
/**
|
||||
* clz32 - count leading zeros in a 32-bit value.
|
||||
* @val: The value to search
|
||||
|
@ -153,6 +183,30 @@ static inline int clo64(uint64_t val)
|
|||
return clz64(~val);
|
||||
}
|
||||
|
||||
/**
|
||||
* ctz8 - count trailing zeros in a 8-bit value.
|
||||
* @val: The value to search
|
||||
*
|
||||
* Returns 8 if the value is zero. Note that the GCC builtin is
|
||||
* undefined if the value is zero.
|
||||
*/
|
||||
static inline int ctz8(uint8_t val)
|
||||
{
|
||||
return val ? __builtin_ctz(val) : 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* ctz16 - count trailing zeros in a 16-bit value.
|
||||
* @val: The value to search
|
||||
*
|
||||
* Returns 16 if the value is zero. Note that the GCC builtin is
|
||||
* undefined if the value is zero.
|
||||
*/
|
||||
static inline int ctz16(uint16_t val)
|
||||
{
|
||||
return val ? __builtin_ctz(val) : 16;
|
||||
}
|
||||
|
||||
/**
|
||||
* ctz32 - count trailing zeros in a 32-bit value.
|
||||
* @val: The value to search
|
||||
|
|
|
@ -483,9 +483,9 @@ static inline void bswap128s(Int128 *s)
|
|||
*/
|
||||
#ifdef CONFIG_INT128
|
||||
typedef union {
|
||||
Int128 s;
|
||||
__int128_t i;
|
||||
__uint128_t u;
|
||||
__int128_t i;
|
||||
Int128 s;
|
||||
} Int128Alias __attribute__((transparent_union));
|
||||
#else
|
||||
typedef Int128 Int128Alias;
|
||||
|
|
|
@ -330,6 +330,8 @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
|
|||
|
||||
void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
|
@ -369,6 +371,8 @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
|
|||
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
/*
|
||||
* Perform vector shift by vector element, modulo the element size.
|
||||
|
|
|
@ -2370,12 +2370,9 @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
|
|||
config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
|
||||
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
|
||||
.require(cc.links('''
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bw")
|
||||
#include <cpuid.h>
|
||||
#include <immintrin.h>
|
||||
static int bar(void *a) {
|
||||
|
||||
static int __attribute__((target("avx512bw"))) bar(void *a) {
|
||||
__m512i *x = a;
|
||||
__m512i res= _mm512_abs_epi8(*x);
|
||||
return res[1];
|
||||
|
|
|
@ -177,11 +177,11 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
|
|||
}
|
||||
|
||||
#if defined(CONFIG_AVX512BW_OPT)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bw")
|
||||
#include <immintrin.h>
|
||||
int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
||||
uint8_t *dst, int dlen)
|
||||
|
||||
int __attribute__((target("avx512bw")))
|
||||
xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
||||
uint8_t *dst, int dlen)
|
||||
{
|
||||
uint32_t zrun_len = 0, nzrun_len = 0;
|
||||
int d = 0, i = 0, num = 0;
|
||||
|
@ -296,5 +296,4 @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
|||
}
|
||||
return d;
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
|
|
@ -232,18 +232,23 @@ bool dirtylimit_vcpu_index_valid(int cpu_index)
|
|||
cpu_index >= ms->smp.max_cpus);
|
||||
}
|
||||
|
||||
static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
|
||||
static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
|
||||
{
|
||||
static uint64_t max_dirtyrate;
|
||||
uint32_t dirty_ring_size = kvm_dirty_ring_size();
|
||||
uint64_t dirty_ring_size_meory_MB =
|
||||
dirty_ring_size * qemu_target_page_size() >> 20;
|
||||
unsigned target_page_bits = qemu_target_page_bits();
|
||||
uint64_t dirty_ring_size_MB;
|
||||
|
||||
/* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */
|
||||
assert(target_page_bits < 20);
|
||||
|
||||
/* Convert ring size (pages) to MiB (2**20). */
|
||||
dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits);
|
||||
|
||||
if (max_dirtyrate < dirtyrate) {
|
||||
max_dirtyrate = dirtyrate;
|
||||
}
|
||||
|
||||
return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
|
||||
return dirty_ring_size_MB * 1000000 / max_dirtyrate;
|
||||
}
|
||||
|
||||
static inline bool dirtylimit_done(uint64_t quota,
|
||||
|
|
|
@ -1545,7 +1545,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
|
|||
|
||||
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
|
||||
{
|
||||
TCGReg argreg, datalo, datahi;
|
||||
TCGReg argreg;
|
||||
MemOpIdx oi = lb->oi;
|
||||
MemOp opc = get_memop(oi);
|
||||
|
||||
|
@ -1565,22 +1565,16 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
|
|||
/* Use the canonical unsigned helpers and minimize icache usage. */
|
||||
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
|
||||
|
||||
datalo = lb->datalo_reg;
|
||||
datahi = lb->datahi_reg;
|
||||
if ((opc & MO_SIZE) == MO_64) {
|
||||
if (datalo != TCG_REG_R1) {
|
||||
tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
|
||||
tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
|
||||
} else if (datahi != TCG_REG_R0) {
|
||||
tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
|
||||
tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
|
||||
} else {
|
||||
tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
|
||||
tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
|
||||
tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
|
||||
}
|
||||
TCGMovExtend ext[2] = {
|
||||
{ .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
|
||||
.src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
|
||||
{ .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
|
||||
.src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
|
||||
};
|
||||
tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
|
||||
} else {
|
||||
tcg_out_movext(s, TCG_TYPE_I32, datalo,
|
||||
tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
|
||||
TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
|
||||
}
|
||||
|
||||
|
@ -1663,17 +1657,15 @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
|
|||
|
||||
if (TARGET_LONG_BITS == 64) {
|
||||
/* 64-bit target address is aligned into R2:R3. */
|
||||
if (l->addrhi_reg != TCG_REG_R2) {
|
||||
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
|
||||
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
|
||||
} else if (l->addrlo_reg != TCG_REG_R3) {
|
||||
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
|
||||
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
|
||||
} else {
|
||||
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
|
||||
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
|
||||
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
|
||||
}
|
||||
TCGMovExtend ext[2] = {
|
||||
{ .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
|
||||
.src = l->addrlo_reg,
|
||||
.src_type = TCG_TYPE_I32, .src_ext = MO_UL },
|
||||
{ .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
|
||||
.src = l->addrhi_reg,
|
||||
.src_type = TCG_TYPE_I32, .src_ext = MO_UL },
|
||||
};
|
||||
tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
|
||||
} else {
|
||||
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
|
||||
}
|
||||
|
|
|
@ -1914,7 +1914,6 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
|
|||
{
|
||||
MemOpIdx oi = l->oi;
|
||||
MemOp opc = get_memop(oi);
|
||||
TCGReg data_reg;
|
||||
tcg_insn_unit **label_ptr = &l->label_ptr[0];
|
||||
|
||||
/* resolve label address */
|
||||
|
@ -1951,18 +1950,16 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
|
|||
|
||||
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
|
||||
|
||||
data_reg = l->datalo_reg;
|
||||
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
|
||||
if (data_reg == TCG_REG_EDX) {
|
||||
/* xchg %edx, %eax */
|
||||
tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
|
||||
tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
|
||||
} else {
|
||||
tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
|
||||
tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
|
||||
}
|
||||
TCGMovExtend ext[2] = {
|
||||
{ .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
|
||||
.src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
|
||||
{ .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
|
||||
.src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
|
||||
};
|
||||
tcg_out_movext2(s, &ext[0], &ext[1], -1);
|
||||
} else {
|
||||
tcg_out_movext(s, l->type, data_reg,
|
||||
tcg_out_movext(s, l->type, l->datalo_reg,
|
||||
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
|
||||
}
|
||||
|
||||
|
|
|
@ -463,7 +463,9 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
|
|||
|
||||
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
|
||||
{
|
||||
tcg_out_ext32s(s, ret, arg);
|
||||
if (ret != arg) {
|
||||
tcg_out_ext32s(s, ret, arg);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
|
||||
|
|
|
@ -582,7 +582,9 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs)
|
|||
|
||||
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
|
||||
{
|
||||
tcg_out_ext32s(s, rd, rs);
|
||||
if (rd != rs) {
|
||||
tcg_out_ext32s(s, rd, rs);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
|
||||
|
|
|
@ -2761,6 +2761,23 @@ void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
|
|||
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
|
||||
}
|
||||
|
||||
void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
|
||||
{
|
||||
static GVecGen2s g = {
|
||||
.fni8 = tcg_gen_andc_i64,
|
||||
.fniv = tcg_gen_andc_vec,
|
||||
.fno = gen_helper_gvec_andcs,
|
||||
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
|
||||
.vece = MO_64
|
||||
};
|
||||
|
||||
TCGv_i64 tmp = tcg_temp_ebb_new_i64();
|
||||
tcg_gen_dup_i64(vece, tmp, c);
|
||||
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g);
|
||||
tcg_temp_free_i64(tmp);
|
||||
}
|
||||
|
||||
static const GVecGen2s gop_xors = {
|
||||
.fni8 = tcg_gen_xor_i64,
|
||||
.fniv = tcg_gen_xor_vec,
|
||||
|
@ -3336,6 +3353,17 @@ void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
|
|||
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
|
||||
}
|
||||
|
||||
void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
|
||||
{
|
||||
TCGv_i32 tmp = tcg_temp_ebb_new_i32();
|
||||
|
||||
tcg_gen_neg_i32(tmp, shift);
|
||||
tcg_gen_andi_i32(tmp, tmp, (8 << vece) - 1);
|
||||
tcg_gen_gvec_rotls(vece, dofs, aofs, tmp, oprsz, maxsz);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Expand D = A << (B % element bits)
|
||||
*
|
||||
|
|
69
tcg/tcg.c
69
tcg/tcg.c
|
@ -115,8 +115,7 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
|
|||
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
|
||||
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
|
||||
static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
|
||||
static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
|
||||
__attribute__((unused));
|
||||
static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
|
||||
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
|
||||
static void tcg_out_goto_tb(TCGContext *s, int which);
|
||||
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
|
@ -354,6 +353,14 @@ void tcg_raise_tb_overflow(TCGContext *s)
|
|||
siglongjmp(s->jmp_trans, -2);
|
||||
}
|
||||
|
||||
typedef struct TCGMovExtend {
|
||||
TCGReg dst;
|
||||
TCGReg src;
|
||||
TCGType dst_type;
|
||||
TCGType src_type;
|
||||
MemOp src_ext;
|
||||
} TCGMovExtend;
|
||||
|
||||
/**
|
||||
* tcg_out_movext -- move and extend
|
||||
* @s: tcg context
|
||||
|
@ -365,9 +372,8 @@ void tcg_raise_tb_overflow(TCGContext *s)
|
|||
*
|
||||
* Move or extend @src into @dst, depending on @src_ext and the types.
|
||||
*/
|
||||
static void __attribute__((unused))
|
||||
tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
|
||||
TCGType src_type, MemOp src_ext, TCGReg src)
|
||||
static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
|
||||
TCGType src_type, MemOp src_ext, TCGReg src)
|
||||
{
|
||||
switch (src_ext) {
|
||||
case MO_UB:
|
||||
|
@ -417,6 +423,59 @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
|
|||
}
|
||||
}
|
||||
|
||||
/* Minor variations on a theme, using a structure. */
|
||||
static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
|
||||
TCGReg src)
|
||||
{
|
||||
tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
|
||||
}
|
||||
|
||||
static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
|
||||
{
|
||||
tcg_out_movext1_new_src(s, i, i->src);
|
||||
}
|
||||
|
||||
/**
|
||||
* tcg_out_movext2 -- move and extend two pair
|
||||
* @s: tcg context
|
||||
* @i1: first move description
|
||||
* @i2: second move description
|
||||
* @scratch: temporary register, or -1 for none
|
||||
*
|
||||
* As tcg_out_movext, for both @i1 and @i2, caring for overlap
|
||||
* between the sources and destinations.
|
||||
*/
|
||||
|
||||
static void __attribute__((unused))
|
||||
tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
|
||||
const TCGMovExtend *i2, int scratch)
|
||||
{
|
||||
TCGReg src1 = i1->src;
|
||||
TCGReg src2 = i2->src;
|
||||
|
||||
if (i1->dst != src2) {
|
||||
tcg_out_movext1(s, i1);
|
||||
tcg_out_movext1(s, i2);
|
||||
return;
|
||||
}
|
||||
if (i2->dst == src1) {
|
||||
TCGType src1_type = i1->src_type;
|
||||
TCGType src2_type = i2->src_type;
|
||||
|
||||
if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
|
||||
/* The data is now in the correct registers, now extend. */
|
||||
src1 = i2->src;
|
||||
src2 = i1->src;
|
||||
} else {
|
||||
tcg_debug_assert(scratch >= 0);
|
||||
tcg_out_mov(s, src1_type, scratch, src1);
|
||||
src1 = scratch;
|
||||
}
|
||||
}
|
||||
tcg_out_movext1_new_src(s, i2, src2);
|
||||
tcg_out_movext1_new_src(s, i1, src1);
|
||||
}
|
||||
|
||||
#define C_PFX1(P, A) P##A
|
||||
#define C_PFX2(P, A, B) P##A##_##B
|
||||
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
|
||||
|
|
Loading…
Reference in New Issue