From 352bcb0a2b816ff9ab9d75d0f2384650d9e9ab19 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 1 Sep 2015 15:58:02 -0400 Subject: [PATCH 1/2] tcg/aarch64: Fix tcg_out_qemu_{ld, st} for guest_base == 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In ffc6372851d8631a9f9fa56ec613b3244dc635b9, we swapped the guest base to the address base register from the address index register. Except that 31 in the base slot is SP not XZR, so we need to be more intelligent about which reg gets placed in which slot. Cc: qemu-stable@nongnu.org (v2.4.0) Reviewed-by: Paolo Bonzini Reported-by: Andreas Färber Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 01ae610cd7..0ed10a9741 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -56,6 +56,11 @@ static const int tcg_target_call_oarg_regs[1] = { #define TCG_REG_TMP TCG_REG_X30 #ifndef CONFIG_SOFTMMU +/* Note that XZR cannot be encoded in the address base register slot, + as that actaully encodes SP. So if we need to zero-extend the guest + address, via the address index register slot, we need to load even + a zero guest base into a register. */ +#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) #define TCG_REG_GUEST_BASE TCG_REG_X28 #endif @@ -1224,9 +1229,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - guest_base ? TCG_REG_GUEST_BASE : TCG_REG_XZR, - otype, addr_reg); + if (USE_GUEST_BASE) { + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + TCG_REG_GUEST_BASE, otype, addr_reg); + } else { + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + addr_reg, TCG_TYPE_I64, TCG_REG_XZR); + } #endif /* CONFIG_SOFTMMU */ } @@ -1245,9 +1254,13 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - tcg_out_qemu_st_direct(s, memop, data_reg, - guest_base ? TCG_REG_GUEST_BASE : TCG_REG_XZR, - otype, addr_reg); + if (USE_GUEST_BASE) { + tcg_out_qemu_st_direct(s, memop, data_reg, + TCG_REG_GUEST_BASE, otype, addr_reg); + } else { + tcg_out_qemu_st_direct(s, memop, data_reg, + addr_reg, TCG_TYPE_I64, TCG_REG_XZR); + } #endif /* CONFIG_SOFTMMU */ } @@ -1806,7 +1819,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) CPU_TEMP_BUF_NLONGS * sizeof(long)); #if !defined(CONFIG_SOFTMMU) - if (guest_base) { + if (USE_GUEST_BASE) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); } From 08b0b23be639b955e5e3d84dc42fa4e5ce6a8728 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 19 Jul 2015 13:50:32 +0200 Subject: [PATCH 2/2] tcg/i386: omit a few REXW prefixes in softmmu code When computing the TLB address we are likely to mask out the high 32-bits by using shr + and. We can use 32-bit instructions in that case. This saves 2 bytes per TLB access. Signed-off-by: Aurelien Jarno Message-Id: <1437306632-20655-1-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index d2adbc4d17..9187d34caf 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1178,8 +1178,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, const TCGReg r0 = TCG_REG_L0; const TCGReg r1 = TCG_REG_L1; TCGType ttype = TCG_TYPE_I32; - TCGType htype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0; + TCGType tlbtype = TCG_TYPE_I32; + int trexw = 0, hrexw = 0, tlbrexw = 0; int s_mask = (1 << (opc & MO_SIZE)) - 1; bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; @@ -1189,12 +1189,15 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, trexw = P_REXW; } if (TCG_TYPE_PTR == TCG_TYPE_I64) { - htype = TCG_TYPE_I64; hrexw = P_REXW; + if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) { + tlbtype = TCG_TYPE_I64; + tlbrexw = P_REXW; + } } } - tcg_out_mov(s, htype, r0, addrlo); + tcg_out_mov(s, tlbtype, r0, addrlo); if (aligned) { tcg_out_mov(s, ttype, r1, addrlo); } else { @@ -1203,12 +1206,12 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); } - tcg_out_shifti(s, SHIFT_SHR + hrexw, r0, + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); - tgen_arithi(s, ARITH_AND + hrexw, r0, + tgen_arithi(s, ARITH_AND + tlbrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,