mirror of https://github.com/xemu-project/xemu.git
tcg/s390x: Support 128-bit load/store
Use LPQ/STPQ when 16-byte atomicity is required. Note that these instructions require 16-byte alignment. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
526cd4ec01
commit
4caad79f8d
|
@ -14,6 +14,7 @@ C_O0_I2(r, r)
|
|||
C_O0_I2(r, ri)
|
||||
C_O0_I2(r, rA)
|
||||
C_O0_I2(v, r)
|
||||
C_O0_I3(o, m, r)
|
||||
C_O1_I1(r, r)
|
||||
C_O1_I1(v, r)
|
||||
C_O1_I1(v, v)
|
||||
|
@ -36,6 +37,7 @@ C_O1_I2(v, v, v)
|
|||
C_O1_I3(v, v, v, v)
|
||||
C_O1_I4(r, r, ri, rI, r)
|
||||
C_O1_I4(r, r, rA, rI, r)
|
||||
C_O2_I1(o, m, r)
|
||||
C_O2_I2(o, m, 0, r)
|
||||
C_O2_I2(o, m, r, r)
|
||||
C_O2_I3(o, m, 0, 1, r)
|
||||
|
|
|
@ -243,6 +243,7 @@ typedef enum S390Opcode {
|
|||
RXY_LLGF = 0xe316,
|
||||
RXY_LLGH = 0xe391,
|
||||
RXY_LMG = 0xeb04,
|
||||
RXY_LPQ = 0xe38f,
|
||||
RXY_LRV = 0xe31e,
|
||||
RXY_LRVG = 0xe30f,
|
||||
RXY_LRVH = 0xe31f,
|
||||
|
@ -253,6 +254,7 @@ typedef enum S390Opcode {
|
|||
RXY_STG = 0xe324,
|
||||
RXY_STHY = 0xe370,
|
||||
RXY_STMG = 0xeb24,
|
||||
RXY_STPQ = 0xe38e,
|
||||
RXY_STRV = 0xe33e,
|
||||
RXY_STRVG = 0xe32f,
|
||||
RXY_STRVH = 0xe33f,
|
||||
|
@ -1577,7 +1579,18 @@ typedef struct {
|
|||
|
||||
bool tcg_target_has_memory_bswap(MemOp memop)
|
||||
{
|
||||
return true;
|
||||
TCGAtomAlign aa;
|
||||
|
||||
if ((memop & MO_SIZE) <= MO_64) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reject 16-byte memop with 16-byte atomicity,
|
||||
* but do allow a pair of 64-bit operations.
|
||||
*/
|
||||
aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
|
||||
return aa.atom <= MO_64;
|
||||
}
|
||||
|
||||
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
|
||||
|
@ -1734,13 +1747,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
|||
{
|
||||
TCGLabelQemuLdst *ldst = NULL;
|
||||
MemOp opc = get_memop(oi);
|
||||
MemOp s_bits = opc & MO_SIZE;
|
||||
unsigned a_mask;
|
||||
|
||||
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
|
||||
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
|
||||
a_mask = (1 << h->aa.align) - 1;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned s_mask = (1 << s_bits) - 1;
|
||||
int mem_index = get_mmuidx(oi);
|
||||
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
|
||||
|
@ -1865,6 +1878,80 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
|
|||
}
|
||||
}
|
||||
|
||||
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
||||
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
|
||||
{
|
||||
TCGLabel *l1 = NULL, *l2 = NULL;
|
||||
TCGLabelQemuLdst *ldst;
|
||||
HostAddress h;
|
||||
bool need_bswap;
|
||||
bool use_pair;
|
||||
S390Opcode insn;
|
||||
|
||||
ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
|
||||
|
||||
use_pair = h.aa.atom < MO_128;
|
||||
need_bswap = get_memop(oi) & MO_BSWAP;
|
||||
|
||||
if (!use_pair) {
|
||||
/*
|
||||
* Atomicity requires we use LPQ. If we've already checked for
|
||||
* 16-byte alignment, that's all we need. If we arrive with
|
||||
* lesser alignment, we have determined that less than 16-byte
|
||||
* alignment can be satisfied with two 8-byte loads.
|
||||
*/
|
||||
if (h.aa.align < MO_128) {
|
||||
use_pair = true;
|
||||
l1 = gen_new_label();
|
||||
l2 = gen_new_label();
|
||||
|
||||
tcg_out_insn(s, RI, TMLL, addr_reg, 15);
|
||||
tgen_branch(s, 7, l1); /* CC in {1,2,3} */
|
||||
}
|
||||
|
||||
tcg_debug_assert(!need_bswap);
|
||||
tcg_debug_assert(datalo & 1);
|
||||
tcg_debug_assert(datahi == datalo - 1);
|
||||
insn = is_ld ? RXY_LPQ : RXY_STPQ;
|
||||
tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
|
||||
|
||||
if (use_pair) {
|
||||
tgen_branch(s, S390_CC_ALWAYS, l2);
|
||||
tcg_out_label(s, l1);
|
||||
}
|
||||
}
|
||||
if (use_pair) {
|
||||
TCGReg d1, d2;
|
||||
|
||||
if (need_bswap) {
|
||||
d1 = datalo, d2 = datahi;
|
||||
insn = is_ld ? RXY_LRVG : RXY_STRVG;
|
||||
} else {
|
||||
d1 = datahi, d2 = datalo;
|
||||
insn = is_ld ? RXY_LG : RXY_STG;
|
||||
}
|
||||
|
||||
if (h.base == d1 || h.index == d1) {
|
||||
tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
|
||||
h.base = TCG_TMP0;
|
||||
h.index = TCG_REG_NONE;
|
||||
h.disp = 0;
|
||||
}
|
||||
tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
|
||||
tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
|
||||
}
|
||||
if (l2) {
|
||||
tcg_out_label(s, l2);
|
||||
}
|
||||
|
||||
if (ldst) {
|
||||
ldst->type = TCG_TYPE_I128;
|
||||
ldst->datalo_reg = datalo;
|
||||
ldst->datahi_reg = datahi;
|
||||
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
|
||||
{
|
||||
/* Reuse the zeroing that exists for goto_ptr. */
|
||||
|
@ -2226,6 +2313,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
case INDEX_op_qemu_st_a64_i64:
|
||||
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
|
||||
break;
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
|
||||
break;
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
|
||||
break;
|
||||
|
||||
case INDEX_op_ld16s_i64:
|
||||
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
|
||||
|
@ -3107,6 +3202,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|||
case INDEX_op_qemu_st_a32_i32:
|
||||
case INDEX_op_qemu_st_a64_i32:
|
||||
return C_O0_I2(r, r);
|
||||
case INDEX_op_qemu_ld_a32_i128:
|
||||
case INDEX_op_qemu_ld_a64_i128:
|
||||
return C_O2_I1(o, m, r);
|
||||
case INDEX_op_qemu_st_a32_i128:
|
||||
case INDEX_op_qemu_st_a64_i128:
|
||||
return C_O0_I3(o, m, r);
|
||||
|
||||
case INDEX_op_deposit_i32:
|
||||
case INDEX_op_deposit_i64:
|
||||
|
|
|
@ -140,7 +140,7 @@ extern uint64_t s390_facilities[3];
|
|||
#define TCG_TARGET_HAS_muluh_i64 0
|
||||
#define TCG_TARGET_HAS_mulsh_i64 0
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 1
|
||||
|
||||
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
|
||||
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
|
||||
|
|
Loading…
Reference in New Issue