tcg-aarch64: Avoid add with zero in tlb load

Some guest env are small enough to reach the tlb with only a 12-bit addition.

Reviewed-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2013-08-10 14:56:12 -04:00 committed by Richard Henderson
parent 38d195aa05
commit 6f4724672c
1 changed files with 19 additions and 9 deletions

View File

@ -1128,47 +1128,57 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
slow path for the failure case, which will be patched later when finalizing slow path for the failure case, which will be patched later when finalizing
the slow path. Generated code returns the host addend in X1, the slow path. Generated code returns the host addend in X1,
clobbers X0,X2,X3,TMP. */ clobbers X0,X2,X3,TMP. */
static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, int s_bits,
int s_bits, uint8_t **label_ptr, int mem_index, int is_read) uint8_t **label_ptr, int mem_index, bool is_read)
{ {
TCGReg base = TCG_AREG0; TCGReg base = TCG_AREG0;
int tlb_offset = is_read ? int tlb_offset = is_read ?
offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
/* Extract the TLB index from the address into X0. /* Extract the TLB index from the address into X0.
X0<CPU_TLB_BITS:0> = X0<CPU_TLB_BITS:0> =
addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */ addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg, tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
/* Store the page mask part of the address and the low s_bits into X3. /* Store the page mask part of the address and the low s_bits into X3.
Later this allows checking for equality and alignment at the same time. Later this allows checking for equality and alignment at the same time.
X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */ X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3, tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
/* Add any "high bits" from the tlb offset to the env address into X2, /* Add any "high bits" from the tlb offset to the env address into X2,
to take advantage of the LSL12 form of the ADDI instruction. to take advantage of the LSL12 form of the ADDI instruction.
X2 = env + (tlb_offset & 0xfff000) */ X2 = env + (tlb_offset & 0xfff000) */
tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base, if (tlb_offset & 0xfff000) {
tlb_offset & 0xfff000); tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
tlb_offset & 0xfff000);
base = TCG_REG_X2;
}
/* Merge the tlb index contribution into X2. /* Merge the tlb index contribution into X2.
X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2, tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
TCG_REG_X0, CPU_TLB_ENTRY_BITS); TCG_REG_X0, CPU_TLB_ENTRY_BITS);
/* Merge "low bits" from tlb offset, load the tlb comparator into X0. /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
X0 = load [X2 + (tlb_offset & 0x000fff)] */ X0 = load [X2 + (tlb_offset & 0x000fff)] */
tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32, tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
LDST_LD, TCG_REG_X0, TCG_REG_X2, LDST_LD, TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
(tlb_offset & 0xfff));
/* Load the tlb addend. Do that early to avoid stalling. /* Load the tlb addend. Do that early to avoid stalling.
X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2, tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
(tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
(is_read ? offsetof(CPUTLBEntry, addr_read) (is_read ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write))); : offsetof(CPUTLBEntry, addr_write)));
/* Perform the address comparison. */ /* Perform the address comparison. */
tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
*label_ptr = s->code_ptr;
/* If not equal, we jump to the slow path. */ /* If not equal, we jump to the slow path. */
*label_ptr = s->code_ptr;
tcg_out_goto_cond_noaddr(s, TCG_COND_NE); tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
} }