diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c index e7962c9348..9a5fabf625 100644 --- a/accel/tcg/cpu-exec-common.c +++ b/accel/tcg/cpu-exec-common.c @@ -22,6 +22,7 @@ #include "sysemu/tcg.h" #include "exec/exec-all.h" #include "qemu/plugin.h" +#include "internal.h" bool tcg_allowed; @@ -81,6 +82,8 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc) { + /* Prevent looping if already executing in a serial context. */ + g_assert(!cpu_in_serial_context(cpu)); cpu->exception_index = EXCP_ATOMIC; cpu_loop_exit_restore(cpu, pc); } diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 3117886af1..617777055a 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1441,34 +1441,17 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, } } -static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) -{ -#if TCG_OVERSIZED_GUEST - return *(target_ulong *)((uintptr_t)entry + ofs); -#else - /* ofs might correspond to .addr_write, so use qatomic_read */ - return qatomic_read((target_ulong *)((uintptr_t)entry + ofs)); -#endif -} - /* Return true if ADDR is present in the victim tlb, and has been copied back to the main tlb. */ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, - size_t elt_ofs, target_ulong page) + MMUAccessType access_type, target_ulong page) { size_t vidx; assert_cpu_is_self(env_cpu(env)); for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; - target_ulong cmp; - - /* elt_ofs might correspond to .addr_write, so use qatomic_read */ -#if TCG_OVERSIZED_GUEST - cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); -#else - cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); -#endif + target_ulong cmp = tlb_read_idx(vtlb, access_type); if (cmp == page) { /* Found entry in victim tlb, swap tlb and iotlb. */ @@ -1490,11 +1473,6 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, return false; } -/* Macro to call the above, with local variables from the use context. */ -#define VICTIM_TLB_HIT(TY, ADDR) \ - victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ - (ADDR) & TARGET_PAGE_MASK) - static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, CPUTLBEntryFull *full, uintptr_t retaddr) { @@ -1527,29 +1505,12 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, { uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr, page_addr; - size_t elt_ofs; - int flags; + target_ulong tlb_addr = tlb_read_idx(entry, access_type); + target_ulong page_addr = addr & TARGET_PAGE_MASK; + int flags = TLB_FLAGS_MASK; - switch (access_type) { - case MMU_DATA_LOAD: - elt_ofs = offsetof(CPUTLBEntry, addr_read); - break; - case MMU_DATA_STORE: - elt_ofs = offsetof(CPUTLBEntry, addr_write); - break; - case MMU_INST_FETCH: - elt_ofs = offsetof(CPUTLBEntry, addr_code); - break; - default: - g_assert_not_reached(); - } - tlb_addr = tlb_read_ofs(entry, elt_ofs); - - flags = TLB_FLAGS_MASK; - page_addr = addr & TARGET_PAGE_MASK; if (!tlb_hit_page(tlb_addr, page_addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) { CPUState *cs = env_cpu(env); if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type, @@ -1571,7 +1532,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, */ flags &= ~TLB_INVALID_MASK; } - tlb_addr = tlb_read_ofs(entry, elt_ofs); + tlb_addr = tlb_read_idx(entry, access_type); } flags &= tlb_addr; @@ -1755,6 +1716,179 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, #endif +/* + * Probe for a load/store operation. + * Return the host address and into @flags. + */ + +typedef struct MMULookupPageData { + CPUTLBEntryFull *full; + void *haddr; + target_ulong addr; + int flags; + int size; +} MMULookupPageData; + +typedef struct MMULookupLocals { + MMULookupPageData page[2]; + MemOp memop; + int mmu_idx; +} MMULookupLocals; + +/** + * mmu_lookup1: translate one page + * @env: cpu context + * @data: lookup parameters + * @mmu_idx: virtual address context + * @access_type: load/store/code + * @ra: return address into tcg generated code, or 0 + * + * Resolve the translation for the one page at @data.addr, filling in + * the rest of @data with the results. If the translation fails, + * tlb_fill will longjmp out. Return true if the softmmu tlb for + * @mmu_idx may have resized. + */ +static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, + int mmu_idx, MMUAccessType access_type, uintptr_t ra) +{ + target_ulong addr = data->addr; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_read_idx(entry, access_type); + bool maybe_resized = false; + + /* If the TLB entry is for a different page, reload and try again. */ + if (!tlb_hit(tlb_addr, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, access_type, + addr & TARGET_PAGE_MASK)) { + tlb_fill(env_cpu(env), addr, data->size, access_type, mmu_idx, ra); + maybe_resized = true; + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK; + } + + data->flags = tlb_addr & TLB_FLAGS_MASK; + data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; + /* Compute haddr speculatively; depending on flags it might be invalid. */ + data->haddr = (void *)((uintptr_t)addr + entry->addend); + + return maybe_resized; +} + +/** + * mmu_watch_or_dirty + * @env: cpu context + * @data: lookup parameters + * @access_type: load/store/code + * @ra: return address into tcg generated code, or 0 + * + * Trigger watchpoints for @data.addr:@data.size; + * record writes to protected clean pages. + */ +static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data, + MMUAccessType access_type, uintptr_t ra) +{ + CPUTLBEntryFull *full = data->full; + target_ulong addr = data->addr; + int flags = data->flags; + int size = data->size; + + /* On watchpoint hit, this will longjmp out. */ + if (flags & TLB_WATCHPOINT) { + int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ; + cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, wp, ra); + flags &= ~TLB_WATCHPOINT; + } + + /* Note that notdirty is only set for writes. */ + if (flags & TLB_NOTDIRTY) { + notdirty_write(env_cpu(env), addr, size, full, ra); + flags &= ~TLB_NOTDIRTY; + } + data->flags = flags; +} + +/** + * mmu_lookup: translate page(s) + * @env: cpu context + * @addr: virtual address + * @oi: combined mmu_idx and MemOp + * @ra: return address into tcg generated code, or 0 + * @access_type: load/store/code + * @l: output result + * + * Resolve the translation for the page(s) beginning at @addr, for MemOp.size + * bytes. Return true if the lookup crosses a page boundary. + */ +static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType type, MMULookupLocals *l) +{ + unsigned a_bits; + bool crosspage; + int flags; + + l->memop = get_memop(oi); + l->mmu_idx = get_mmuidx(oi); + + tcg_debug_assert(l->mmu_idx < NB_MMU_MODES); + + /* Handle CPU specific unaligned behaviour */ + a_bits = get_alignment_bits(l->memop); + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(env_cpu(env), addr, type, l->mmu_idx, ra); + } + + l->page[0].addr = addr; + l->page[0].size = memop_size(l->memop); + l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK; + l->page[1].size = 0; + crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK; + + if (likely(!crosspage)) { + mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra); + + flags = l->page[0].flags; + if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) { + mmu_watch_or_dirty(env, &l->page[0], type, ra); + } + if (unlikely(flags & TLB_BSWAP)) { + l->memop ^= MO_BSWAP; + } + } else { + /* Finish compute of page crossing. */ + int size0 = l->page[1].addr - addr; + l->page[1].size = l->page[0].size - size0; + l->page[0].size = size0; + + /* + * Lookup both pages, recognizing exceptions from either. If the + * second lookup potentially resized, refresh first CPUTLBEntryFull. + */ + mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra); + if (mmu_lookup1(env, &l->page[1], l->mmu_idx, type, ra)) { + uintptr_t index = tlb_index(env, l->mmu_idx, addr); + l->page[0].full = &env_tlb(env)->d[l->mmu_idx].fulltlb[index]; + } + + flags = l->page[0].flags | l->page[1].flags; + if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) { + mmu_watch_or_dirty(env, &l->page[0], type, ra); + mmu_watch_or_dirty(env, &l->page[1], type, ra); + } + + /* + * Since target/sparc is the only user of TLB_BSWAP, and all + * Sparc accesses are aligned, any treatment across two pages + * would be arbitrary. Refuse it until there's a use. + */ + tcg_debug_assert((flags & TLB_BSWAP) == 0); + } + + return crosspage; +} + /* * Probe for an atomic operation. Do not allow unaligned operations, * or io operations to proceed. Return the host address. @@ -1802,7 +1936,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, if (prot & PAGE_WRITE) { tlb_addr = tlb_addr_write(tlbe); if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE, + addr & TARGET_PAGE_MASK)) { tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, mmu_idx, retaddr); index = tlb_index(env, mmu_idx, addr); @@ -1835,7 +1970,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, } else /* if (prot & PAGE_READ) */ { tlb_addr = tlbe->addr_read; if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD, + addr & TARGET_PAGE_MASK)) { tlb_fill(env_cpu(env), addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); index = tlb_index(env, mmu_idx, addr); @@ -1927,117 +2063,6 @@ load_memop(const void *haddr, MemOp op) } } -static inline uint64_t QEMU_ALWAYS_INLINE -load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, - uintptr_t retaddr, MemOp op, bool code_read, - FullLoadHelper *full_load) -{ - const size_t tlb_off = code_read ? - offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); - const MMUAccessType access_type = - code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; - const unsigned a_bits = get_alignment_bits(get_memop(oi)); - const size_t size = memop_size(op); - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index; - CPUTLBEntry *entry; - target_ulong tlb_addr; - void *haddr; - uint64_t res; - - tcg_debug_assert(mmu_idx < NB_MMU_MODES); - - /* Handle CPU specific unaligned behaviour */ - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(env_cpu(env), addr, access_type, - mmu_idx, retaddr); - } - - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = code_read ? entry->addr_code : entry->addr_read; - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, - addr & TARGET_PAGE_MASK)) { - tlb_fill(env_cpu(env), addr, size, - access_type, mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = code_read ? entry->addr_code : entry->addr_read; - tlb_addr &= ~TLB_INVALID_MASK; - } - - /* Handle anything that isn't just a straight memory access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUTLBEntryFull *full; - bool need_swap; - - /* For anything that is unaligned, recurse through full_load. */ - if ((addr & (size - 1)) != 0) { - goto do_unaligned_access; - } - - full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; - - /* Handle watchpoints. */ - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - /* On watchpoint hit, this will longjmp out. */ - cpu_check_watchpoint(env_cpu(env), addr, size, - full->attrs, BP_MEM_READ, retaddr); - } - - need_swap = size > 1 && (tlb_addr & TLB_BSWAP); - - /* Handle I/O access. */ - if (likely(tlb_addr & TLB_MMIO)) { - return io_readx(env, full, mmu_idx, addr, retaddr, - access_type, op ^ (need_swap * MO_BSWAP)); - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - - /* - * Keep these two load_memop separate to ensure that the compiler - * is able to fold the entire function to a single instruction. - * There is a build-time assert inside to remind you of this. ;-) - */ - if (unlikely(need_swap)) { - return load_memop(haddr, op ^ MO_BSWAP); - } - return load_memop(haddr, op); - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (size > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 - >= TARGET_PAGE_SIZE)) { - target_ulong addr1, addr2; - uint64_t r1, r2; - unsigned shift; - do_unaligned_access: - addr1 = addr & ~((target_ulong)size - 1); - addr2 = addr1 + size; - r1 = full_load(env, addr1, oi, retaddr); - r2 = full_load(env, addr2, oi, retaddr); - shift = (addr & (size - 1)) * 8; - - if (memop_big_endian(op)) { - /* Big-endian combine. */ - res = (r1 << shift) | (r2 >> ((size * 8) - shift)); - } else { - /* Little-endian combine. */ - res = (r1 >> shift) | (r2 << ((size * 8) - shift)); - } - return res & MAKE_64BIT_MASK(0, size * 8); - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - return load_memop(haddr, op); -} - /* * For the benefit of TCG generated code, we want to avoid the * complication of ABI-specific return type promotion and always @@ -2048,89 +2073,250 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. */ -static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) +/** + * do_ld_mmio_beN: + * @env: cpu context + * @p: translation parameters + * @ret_be: accumulated data + * @mmu_idx: virtual address context + * @ra: return address into tcg generated code, or 0 + * + * Load @p->size bytes from @p->addr, which is memory-mapped i/o. + * The bytes are concatenated in big-endian order with @ret_be. + */ +static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p, + uint64_t ret_be, int mmu_idx, + MMUAccessType type, uintptr_t ra) { - validate_memop(oi, MO_UB); - return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); + CPUTLBEntryFull *full = p->full; + target_ulong addr = p->addr; + int i, size = p->size; + + QEMU_IOTHREAD_LOCK_GUARD(); + for (i = 0; i < size; i++) { + uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB); + ret_be = (ret_be << 8) | x; + } + return ret_be; +} + +/** + * do_ld_bytes_beN + * @p: translation parameters + * @ret_be: accumulated data + * + * Load @p->size bytes from @p->haddr, which is RAM. + * The bytes to concatenated in big-endian order with @ret_be. + */ +static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be) +{ + uint8_t *haddr = p->haddr; + int i, size = p->size; + + for (i = 0; i < size; i++) { + ret_be = (ret_be << 8) | haddr[i]; + } + return ret_be; +} + +/* + * Wrapper for the above. + */ +static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p, + uint64_t ret_be, int mmu_idx, + MMUAccessType type, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra); + } else { + return do_ld_bytes_beN(p, ret_be); + } +} + +static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx, + MMUAccessType type, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB); + } else { + return *(uint8_t *)p->haddr; + } +} + +static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx, + MMUAccessType type, MemOp memop, uintptr_t ra) +{ + uint64_t ret; + + if (unlikely(p->flags & TLB_MMIO)) { + return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); + } + + /* Perform the load host endian, then swap if necessary. */ + ret = load_memop(p->haddr, MO_UW); + if (memop & MO_BSWAP) { + ret = bswap16(ret); + } + return ret; +} + +static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx, + MMUAccessType type, MemOp memop, uintptr_t ra) +{ + uint32_t ret; + + if (unlikely(p->flags & TLB_MMIO)) { + return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); + } + + /* Perform the load host endian. */ + ret = load_memop(p->haddr, MO_UL); + if (memop & MO_BSWAP) { + ret = bswap32(ret); + } + return ret; +} + +static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx, + MMUAccessType type, MemOp memop, uintptr_t ra) +{ + uint64_t ret; + + if (unlikely(p->flags & TLB_MMIO)) { + return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); + } + + /* Perform the load host endian. */ + ret = load_memop(p->haddr, MO_UQ); + if (memop & MO_BSWAP) { + ret = bswap64(ret); + } + return ret; +} + +static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType access_type) +{ + MMULookupLocals l; + bool crosspage; + + crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); + tcg_debug_assert(!crosspage); + + return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra); } tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_ldub_mmu(env, addr, oi, retaddr); + validate_memop(oi, MO_UB); + return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } -static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) +static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType access_type) { - validate_memop(oi, MO_LEUW); - return load_helper(env, addr, oi, retaddr, MO_LEUW, false, - full_le_lduw_mmu); + MMULookupLocals l; + bool crosspage; + uint16_t ret; + uint8_t a, b; + + crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); + if (likely(!crosspage)) { + return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); + } + + a = do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra); + b = do_ld_1(env, &l.page[1], l.mmu_idx, access_type, ra); + + if ((l.memop & MO_BSWAP) == MO_LE) { + ret = a | (b << 8); + } else { + ret = b | (a << 8); + } + return ret; } tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_le_lduw_mmu(env, addr, oi, retaddr); -} - -static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUW); - return load_helper(env, addr, oi, retaddr, MO_BEUW, false, - full_be_lduw_mmu); + validate_memop(oi, MO_LEUW); + return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_be_lduw_mmu(env, addr, oi, retaddr); + validate_memop(oi, MO_BEUW); + return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } -static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) +static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType access_type) { - validate_memop(oi, MO_LEUL); - return load_helper(env, addr, oi, retaddr, MO_LEUL, false, - full_le_ldul_mmu); + MMULookupLocals l; + bool crosspage; + uint32_t ret; + + crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); + if (likely(!crosspage)) { + return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); + } + + ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra); + ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra); + if ((l.memop & MO_BSWAP) == MO_LE) { + ret = bswap32(ret); + } + return ret; } tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_le_ldul_mmu(env, addr, oi, retaddr); -} - -static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUL); - return load_helper(env, addr, oi, retaddr, MO_BEUL, false, - full_be_ldul_mmu); + validate_memop(oi, MO_LEUL); + return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { - return full_be_ldul_mmu(env, addr, oi, retaddr); + validate_memop(oi, MO_BEUL); + return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); +} + +static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, + uintptr_t ra, MMUAccessType access_type) +{ + MMULookupLocals l; + bool crosspage; + uint64_t ret; + + crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); + if (likely(!crosspage)) { + return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); + } + + ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra); + ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra); + if ((l.memop & MO_BSWAP) == MO_LE) { + ret = bswap64(ret); + } + return ret; } uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_LEUQ); - return load_helper(env, addr, oi, retaddr, MO_LEUQ, false, - helper_le_ldq_mmu); + return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUQ); - return load_helper(env, addr, oi, retaddr, MO_BEUQ, false, - helper_be_ldq_mmu); + return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } /* @@ -2173,56 +2359,85 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, * Load helpers for cpu_ldst.h. */ -static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, - MemOpIdx oi, uintptr_t retaddr, - FullLoadHelper *full_load) +static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) { - uint64_t ret; - - ret = full_load(env, addr, oi, retaddr); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); - return ret; } uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu); + uint8_t ret; + + validate_memop(oi, MO_UB); + ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu); + uint16_t ret; + + validate_memop(oi, MO_BEUW); + ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu); + uint32_t ret; + + validate_memop(oi, MO_BEUL); + ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, helper_be_ldq_mmu); + uint64_t ret; + + validate_memop(oi, MO_BEUQ); + ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu); + uint16_t ret; + + validate_memop(oi, MO_LEUW); + ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu); + uint32_t ret; + + validate_memop(oi, MO_LEUL); + ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) { - return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu); + uint64_t ret; + + validate_memop(oi, MO_LEUQ); + ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD); + plugin_load_cb(env, addr, oi); + return ret; } Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr, @@ -2317,324 +2532,300 @@ store_memop(void *haddr, uint64_t val, MemOp op) } } -static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr); - -static void __attribute__((noinline)) -store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, - uintptr_t retaddr, size_t size, uintptr_t mmu_idx, - bool big_endian) +/** + * do_st_mmio_leN: + * @env: cpu context + * @p: translation parameters + * @val_le: data to store + * @mmu_idx: virtual address context + * @ra: return address into tcg generated code, or 0 + * + * Store @p->size bytes at @p->addr, which is memory-mapped i/o. + * The bytes to store are extracted in little-endian order from @val_le; + * return the bytes of @val_le beyond @p->size that have not been stored. + */ +static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p, + uint64_t val_le, int mmu_idx, uintptr_t ra) { - const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); - uintptr_t index, index2; - CPUTLBEntry *entry, *entry2; - target_ulong page1, page2, tlb_addr, tlb_addr2; - MemOpIdx oi; - size_t size2; - int i; + CPUTLBEntryFull *full = p->full; + target_ulong addr = p->addr; + int i, size = p->size; - /* - * Ensure the second page is in the TLB. Note that the first page - * is already guaranteed to be filled, and that the second page - * cannot evict the first. An exception to this rule is PAGE_WRITE_INV - * handling: the first page could have evicted itself. - */ - page1 = addr & TARGET_PAGE_MASK; - page2 = (addr + size) & TARGET_PAGE_MASK; - size2 = (addr + size) & ~TARGET_PAGE_MASK; - index2 = tlb_index(env, mmu_idx, page2); - entry2 = tlb_entry(env, mmu_idx, page2); - - tlb_addr2 = tlb_addr_write(entry2); - if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) { - if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { - tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, - mmu_idx, retaddr); - index2 = tlb_index(env, mmu_idx, page2); - entry2 = tlb_entry(env, mmu_idx, page2); - } - tlb_addr2 = tlb_addr_write(entry2); + QEMU_IOTHREAD_LOCK_GUARD(); + for (i = 0; i < size; i++, val_le >>= 8) { + io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB); } + return val_le; +} - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); +/** + * do_st_bytes_leN: + * @p: translation parameters + * @val_le: data to store + * + * Store @p->size bytes at @p->haddr, which is RAM. + * The bytes to store are extracted in little-endian order from @val_le; + * return the bytes of @val_le beyond @p->size that have not been stored. + */ +static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le) +{ + uint8_t *haddr = p->haddr; + int i, size = p->size; - /* - * Handle watchpoints. Since this may trap, all checks - * must happen before any store. - */ - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - cpu_check_watchpoint(env_cpu(env), addr, size - size2, - env_tlb(env)->d[mmu_idx].fulltlb[index].attrs, - BP_MEM_WRITE, retaddr); - } - if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { - cpu_check_watchpoint(env_cpu(env), page2, size2, - env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs, - BP_MEM_WRITE, retaddr); + for (i = 0; i < size; i++, val_le >>= 8) { + haddr[i] = val_le; } + return val_le; +} - /* - * XXX: not efficient, but simple. - * This loop must go in the forward direction to avoid issues - * with self-modifying code in Windows 64-bit. - */ - oi = make_memop_idx(MO_UB, mmu_idx); - if (big_endian) { - for (i = 0; i < size; ++i) { - /* Big-endian extract. */ - uint8_t val8 = val >> (((size - 1) * 8) - (i * 8)); - full_stb_mmu(env, addr + i, val8, oi, retaddr); - } +/* + * Wrapper for the above. + */ +static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p, + uint64_t val_le, int mmu_idx, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + return do_st_mmio_leN(env, p, val_le, mmu_idx, ra); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + return val_le >> (p->size * 8); } else { - for (i = 0; i < size; ++i) { - /* Little-endian extract. */ - uint8_t val8 = val >> (i * 8); - full_stb_mmu(env, addr + i, val8, oi, retaddr); - } + return do_st_bytes_leN(p, val_le); } } -static inline void QEMU_ALWAYS_INLINE -store_helper(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr, MemOp op) +static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val, + int mmu_idx, uintptr_t ra) { - const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); - const unsigned a_bits = get_alignment_bits(get_memop(oi)); - const size_t size = memop_size(op); - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index; - CPUTLBEntry *entry; - target_ulong tlb_addr; - void *haddr; - - tcg_debug_assert(mmu_idx < NB_MMU_MODES); - - /* Handle CPU specific unaligned behaviour */ - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + *(uint8_t *)p->haddr = val; } - - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, - addr & TARGET_PAGE_MASK)) { - tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; - } - - /* Handle anything that isn't just a straight memory access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUTLBEntryFull *full; - bool need_swap; - - /* For anything that is unaligned, recurse through byte stores. */ - if ((addr & (size - 1)) != 0) { - goto do_unaligned_access; - } - - full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; - - /* Handle watchpoints. */ - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - /* On watchpoint hit, this will longjmp out. */ - cpu_check_watchpoint(env_cpu(env), addr, size, - full->attrs, BP_MEM_WRITE, retaddr); - } - - need_swap = size > 1 && (tlb_addr & TLB_BSWAP); - - /* Handle I/O access. */ - if (tlb_addr & TLB_MMIO) { - io_writex(env, full, mmu_idx, val, addr, retaddr, - op ^ (need_swap * MO_BSWAP)); - return; - } - - /* Ignore writes to ROM. */ - if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { - return; - } - - /* Handle clean RAM pages. */ - if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, full, retaddr); - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - - /* - * Keep these two store_memop separate to ensure that the compiler - * is able to fold the entire function to a single instruction. - * There is a build-time assert inside to remind you of this. ;-) - */ - if (unlikely(need_swap)) { - store_memop(haddr, val, op ^ MO_BSWAP); - } else { - store_memop(haddr, val, op); - } - return; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (size > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 - >= TARGET_PAGE_SIZE)) { - do_unaligned_access: - store_helper_unaligned(env, addr, val, retaddr, size, - mmu_idx, memop_big_endian(op)); - return; - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - store_memop(haddr, val, op); } -static void __attribute__((noinline)) -full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val, + int mmu_idx, MemOp memop, uintptr_t ra) { - validate_memop(oi, MO_UB); - store_helper(env, addr, val, oi, retaddr, MO_UB); + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap16(val); + } + store_memop(p->haddr, val, MO_UW); + } +} + +static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val, + int mmu_idx, MemOp memop, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap32(val); + } + store_memop(p->haddr, val, MO_UL); + } +} + +static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val, + int mmu_idx, MemOp memop, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap64(val); + } + store_memop(p->haddr, val, MO_UQ); + } } void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val, - MemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t ra) { - full_stb_mmu(env, addr, val, oi, retaddr); + MMULookupLocals l; + bool crosspage; + + validate_memop(oi, MO_UB); + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + tcg_debug_assert(!crosspage); + + do_st_1(env, &l.page[0], val, l.mmu_idx, ra); } -static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t ra) { - validate_memop(oi, MO_LEUW); - store_helper(env, addr, val, oi, retaddr, MO_LEUW); + MMULookupLocals l; + bool crosspage; + uint8_t a, b; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + if ((l.memop & MO_BSWAP) == MO_LE) { + a = val, b = val >> 8; + } else { + b = val, a = val >> 8; + } + do_st_1(env, &l.page[0], a, l.mmu_idx, ra); + do_st_1(env, &l.page[1], b, l.mmu_idx, ra); } void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_le_stw_mmu(env, addr, val, oi, retaddr); -} - -static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUW); - store_helper(env, addr, val, oi, retaddr, MO_BEUW); + validate_memop(oi, MO_LEUW); + do_st2_mmu(env, addr, val, oi, retaddr); } void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_be_stw_mmu(env, addr, val, oi, retaddr); + validate_memop(oi, MO_BEUW); + do_st2_mmu(env, addr, val, oi, retaddr); } -static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t ra) { - validate_memop(oi, MO_LEUL); - store_helper(env, addr, val, oi, retaddr, MO_LEUL); + MMULookupLocals l; + bool crosspage; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + /* Swap to little endian for simplicity, then store by bytes. */ + if ((l.memop & MO_BSWAP) != MO_LE) { + val = bswap32(val); + } + val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra); + (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra); } void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_le_stl_mmu(env, addr, val, oi, retaddr); -} - -static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUL); - store_helper(env, addr, val, oi, retaddr, MO_BEUL); + validate_memop(oi, MO_LEUL); + do_st4_mmu(env, addr, val, oi, retaddr); } void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_be_stl_mmu(env, addr, val, oi, retaddr); + validate_memop(oi, MO_BEUL); + do_st4_mmu(env, addr, val, oi, retaddr); +} + +static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t ra) +{ + MMULookupLocals l; + bool crosspage; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + /* Swap to little endian for simplicity, then store by bytes. */ + if ((l.memop & MO_BSWAP) != MO_LE) { + val = bswap64(val); + } + val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra); + (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra); } void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_LEUQ); - store_helper(env, addr, val, oi, retaddr, MO_LEUQ); + do_st8_mmu(env, addr, val, oi, retaddr); } void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUQ); - store_helper(env, addr, val, oi, retaddr, MO_BEUQ); + do_st8_mmu(env, addr, val, oi, retaddr); } /* * Store Helpers for cpu_ldst.h */ -typedef void FullStoreHelper(CPUArchState *env, target_ulong addr, - uint64_t val, MemOpIdx oi, uintptr_t retaddr); - -static inline void cpu_store_helper(CPUArchState *env, target_ulong addr, - uint64_t val, MemOpIdx oi, uintptr_t ra, - FullStoreHelper *full_store) +static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) { - full_store(env, addr, val, oi, ra); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu); + helper_ret_stb_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu); + helper_be_stw_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu); + helper_be_stl_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu); + helper_be_stq_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu); + helper_le_stw_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu); + helper_le_stl_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu); + helper_le_stq_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val, @@ -2726,98 +2917,50 @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val, /* Code access functions. */ -static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); -} - uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) { MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); - return full_ldub_code(env, addr, oi, 0); -} - -static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); + return do_ld1_mmu(env, addr, oi, 0, MMU_INST_FETCH); } uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) { MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); - return full_lduw_code(env, addr, oi, 0); -} - -static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); + return do_ld2_mmu(env, addr, oi, 0, MMU_INST_FETCH); } uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) { MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); - return full_ldl_code(env, addr, oi, 0); -} - -static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, - MemOpIdx oi, uintptr_t retaddr) -{ - return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code); + return do_ld4_mmu(env, addr, oi, 0, MMU_INST_FETCH); } uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) { MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true)); - return full_ldq_code(env, addr, oi, 0); + return do_ld8_mmu(env, addr, oi, 0, MMU_INST_FETCH); } uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t retaddr) { - return full_ldub_code(env, addr, oi, retaddr); + return do_ld1_mmu(env, addr, oi, retaddr, MMU_INST_FETCH); } uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t retaddr) { - MemOp mop = get_memop(oi); - int idx = get_mmuidx(oi); - uint16_t ret; - - ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr); - if ((mop & MO_BSWAP) != MO_TE) { - ret = bswap16(ret); - } - return ret; + return do_ld2_mmu(env, addr, oi, retaddr, MMU_INST_FETCH); } uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t retaddr) { - MemOp mop = get_memop(oi); - int idx = get_mmuidx(oi); - uint32_t ret; - - ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr); - if ((mop & MO_BSWAP) != MO_TE) { - ret = bswap32(ret); - } - return ret; + return do_ld4_mmu(env, addr, oi, retaddr, MMU_INST_FETCH); } uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t retaddr) { - MemOp mop = get_memop(oi); - int idx = get_mmuidx(oi); - uint64_t ret; - - ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr); - if ((mop & MO_BSWAP) != MO_TE) { - ret = bswap64(ret); - } - return ret; + return do_ld8_mmu(env, addr, oi, retaddr, MMU_INST_FETCH); } diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 7bb0fdbe14..24f225cac7 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -64,6 +64,15 @@ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) } } +/* + * Return true if CS is not running in parallel with other cpus, either + * because there are no other cpus or we are within an exclusive context. + */ +static inline bool cpu_in_serial_context(CPUState *cs) +{ + return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs); +} + extern int64_t max_delay; extern int64_t max_advance; diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index 0dd173fbf0..991746f80f 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -760,7 +760,7 @@ void tb_flush(CPUState *cpu) if (tcg_enabled()) { unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count); - if (cpu_in_exclusive_context(cpu)) { + if (cpu_in_serial_context(cpu)) { do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count)); } else { async_safe_run_on_cpu(cpu, do_tb_flush, diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c index 84cc7421be..3d2cfbbc97 100644 --- a/accel/tcg/tcg-accel-ops-icount.c +++ b/accel/tcg/tcg-accel-ops-icount.c @@ -89,7 +89,20 @@ void icount_handle_deadline(void) } } -void icount_prepare_for_run(CPUState *cpu) +/* Distribute the budget evenly across all CPUs */ +int64_t icount_percpu_budget(int cpu_count) +{ + int64_t limit = icount_get_limit(); + int64_t timeslice = limit / cpu_count; + + if (timeslice == 0) { + timeslice = limit; + } + + return timeslice; +} + +void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget) { int insns_left; @@ -101,13 +114,13 @@ void icount_prepare_for_run(CPUState *cpu) g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0); g_assert(cpu->icount_extra == 0); - cpu->icount_budget = icount_get_limit(); + replay_mutex_lock(); + + cpu->icount_budget = MIN(icount_get_limit(), cpu_budget); insns_left = MIN(0xffff, cpu->icount_budget); cpu_neg(cpu)->icount_decr.u16.low = insns_left; cpu->icount_extra = cpu->icount_budget - insns_left; - replay_mutex_lock(); - if (cpu->icount_budget == 0) { /* * We're called without the iothread lock, so must take it while diff --git a/accel/tcg/tcg-accel-ops-icount.h b/accel/tcg/tcg-accel-ops-icount.h index 1b6fd9c607..16a301b6dc 100644 --- a/accel/tcg/tcg-accel-ops-icount.h +++ b/accel/tcg/tcg-accel-ops-icount.h @@ -11,7 +11,8 @@ #define TCG_ACCEL_OPS_ICOUNT_H void icount_handle_deadline(void); -void icount_prepare_for_run(CPUState *cpu); +void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget); +int64_t icount_percpu_budget(int cpu_count); void icount_process_data(CPUState *cpu); void icount_handle_interrupt(CPUState *cpu, int mask); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 290833a37f..5788efa5ff 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -24,6 +24,7 @@ */ #include "qemu/osdep.h" +#include "qemu/lockable.h" #include "sysemu/tcg.h" #include "sysemu/replay.h" #include "sysemu/cpu-timers.h" @@ -139,6 +140,33 @@ static void rr_force_rcu(Notifier *notify, void *data) rr_kick_next_cpu(); } +/* + * Calculate the number of CPUs that we will process in a single iteration of + * the main CPU thread loop so that we can fairly distribute the instruction + * count across CPUs. + * + * The CPU count is cached based on the CPU list generation ID to avoid + * iterating the list every time. + */ +static int rr_cpu_count(void) +{ + static unsigned int last_gen_id = ~0; + static int cpu_count; + CPUState *cpu; + + QEMU_LOCK_GUARD(&qemu_cpu_list_lock); + + if (cpu_list_generation_id_get() != last_gen_id) { + cpu_count = 0; + CPU_FOREACH(cpu) { + ++cpu_count; + } + last_gen_id = cpu_list_generation_id_get(); + } + + return cpu_count; +} + /* * In the single-threaded case each vCPU is simulated in turn. If * there is more than a single vCPU we create a simple timer to kick @@ -185,11 +213,16 @@ static void *rr_cpu_thread_fn(void *arg) cpu->exit_request = 1; while (1) { + /* Only used for icount_enabled() */ + int64_t cpu_budget = 0; + qemu_mutex_unlock_iothread(); replay_mutex_lock(); qemu_mutex_lock_iothread(); if (icount_enabled()) { + int cpu_count = rr_cpu_count(); + /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ icount_account_warp_timer(); /* @@ -197,6 +230,8 @@ static void *rr_cpu_thread_fn(void *arg) * waking up the I/O thread and waiting for completion. */ icount_handle_deadline(); + + cpu_budget = icount_percpu_budget(cpu_count); } replay_mutex_unlock(); @@ -218,7 +253,7 @@ static void *rr_cpu_thread_fn(void *arg) qemu_mutex_unlock_iothread(); if (icount_enabled()) { - icount_prepare_for_run(cpu); + icount_prepare_for_run(cpu, cpu_budget); } r = tcg_cpus_exec(cpu); if (icount_enabled()) { diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c index fbcdc94b96..1f650bdde8 100644 --- a/bsd-user/elfload.c +++ b/bsd-user/elfload.c @@ -352,9 +352,10 @@ static abi_ulong load_elf_interp(struct elfhdr *interp_elf_ex, static int symfind(const void *s0, const void *s1) { - target_ulong addr = *(target_ulong *)s0; struct elf_sym *sym = (struct elf_sym *)s1; + __typeof(sym->st_value) addr = *(uint64_t *)s0; int result = 0; + if (addr < sym->st_value) { result = -1; } else if (addr >= sym->st_value + sym->st_size) { @@ -363,7 +364,7 @@ static int symfind(const void *s0, const void *s1) return result; } -static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr) +static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr) { #if ELF_CLASS == ELFCLASS32 struct elf_sym *syms = s->disas_symtab.elf32; diff --git a/configs/targets/mips-linux-user.mak b/configs/targets/mips-linux-user.mak index 71fa77d464..b4569a9893 100644 --- a/configs/targets/mips-linux-user.mak +++ b/configs/targets/mips-linux-user.mak @@ -2,5 +2,4 @@ TARGET_ARCH=mips TARGET_ABI_MIPSO32=y TARGET_SYSTBL_ABI=o32 TARGET_SYSTBL=syscall_o32.tbl -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/configs/targets/mips-softmmu.mak b/configs/targets/mips-softmmu.mak index 7787a4d94c..d34b4083fc 100644 --- a/configs/targets/mips-softmmu.mak +++ b/configs/targets/mips-softmmu.mak @@ -1,4 +1,3 @@ TARGET_ARCH=mips -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y TARGET_SUPPORTS_MTTCG=y diff --git a/configs/targets/mips64-linux-user.mak b/configs/targets/mips64-linux-user.mak index 5a4771f22d..d2ff509a11 100644 --- a/configs/targets/mips64-linux-user.mak +++ b/configs/targets/mips64-linux-user.mak @@ -3,5 +3,4 @@ TARGET_ABI_MIPSN64=y TARGET_BASE_ARCH=mips TARGET_SYSTBL_ABI=n64 TARGET_SYSTBL=syscall_n64.tbl -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/configs/targets/mips64-softmmu.mak b/configs/targets/mips64-softmmu.mak index 568d66650c..12d9483bf0 100644 --- a/configs/targets/mips64-softmmu.mak +++ b/configs/targets/mips64-softmmu.mak @@ -1,4 +1,3 @@ TARGET_ARCH=mips64 TARGET_BASE_ARCH=mips -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/configs/targets/mips64el-linux-user.mak b/configs/targets/mips64el-linux-user.mak index f348f35997..f9efeec8ea 100644 --- a/configs/targets/mips64el-linux-user.mak +++ b/configs/targets/mips64el-linux-user.mak @@ -3,4 +3,3 @@ TARGET_ABI_MIPSN64=y TARGET_BASE_ARCH=mips TARGET_SYSTBL_ABI=n64 TARGET_SYSTBL=syscall_n64.tbl -TARGET_ALIGNED_ONLY=y diff --git a/configs/targets/mips64el-softmmu.mak b/configs/targets/mips64el-softmmu.mak index 5a52aa4b64..8d9ab3ddc4 100644 --- a/configs/targets/mips64el-softmmu.mak +++ b/configs/targets/mips64el-softmmu.mak @@ -1,4 +1,3 @@ TARGET_ARCH=mips64 TARGET_BASE_ARCH=mips -TARGET_ALIGNED_ONLY=y TARGET_NEED_FDT=y diff --git a/configs/targets/mipsel-linux-user.mak b/configs/targets/mipsel-linux-user.mak index e23793070c..e8d7241d31 100644 --- a/configs/targets/mipsel-linux-user.mak +++ b/configs/targets/mipsel-linux-user.mak @@ -2,4 +2,3 @@ TARGET_ARCH=mips TARGET_ABI_MIPSO32=y TARGET_SYSTBL_ABI=o32 TARGET_SYSTBL=syscall_o32.tbl -TARGET_ALIGNED_ONLY=y diff --git a/configs/targets/mipsel-softmmu.mak b/configs/targets/mipsel-softmmu.mak index c7c41f4fb7..0829659fc2 100644 --- a/configs/targets/mipsel-softmmu.mak +++ b/configs/targets/mipsel-softmmu.mak @@ -1,3 +1,2 @@ TARGET_ARCH=mips -TARGET_ALIGNED_ONLY=y TARGET_SUPPORTS_MTTCG=y diff --git a/configs/targets/mipsn32-linux-user.mak b/configs/targets/mipsn32-linux-user.mak index 1e80b302fc..206095da64 100644 --- a/configs/targets/mipsn32-linux-user.mak +++ b/configs/targets/mipsn32-linux-user.mak @@ -4,5 +4,4 @@ TARGET_ABI32=y TARGET_BASE_ARCH=mips TARGET_SYSTBL_ABI=n32 TARGET_SYSTBL=syscall_n32.tbl -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/configs/targets/mipsn32el-linux-user.mak b/configs/targets/mipsn32el-linux-user.mak index f31a9c394b..ca2a3ed753 100644 --- a/configs/targets/mipsn32el-linux-user.mak +++ b/configs/targets/mipsn32el-linux-user.mak @@ -4,4 +4,3 @@ TARGET_ABI32=y TARGET_BASE_ARCH=mips TARGET_SYSTBL_ABI=n32 TARGET_SYSTBL=syscall_n32.tbl -TARGET_ALIGNED_ONLY=y diff --git a/configs/targets/nios2-softmmu.mak b/configs/targets/nios2-softmmu.mak index 5823fc02c8..c99ae3777e 100644 --- a/configs/targets/nios2-softmmu.mak +++ b/configs/targets/nios2-softmmu.mak @@ -1,3 +1,2 @@ TARGET_ARCH=nios2 -TARGET_ALIGNED_ONLY=y TARGET_NEED_FDT=y diff --git a/configs/targets/sh4-linux-user.mak b/configs/targets/sh4-linux-user.mak index 0152d6621e..9908887566 100644 --- a/configs/targets/sh4-linux-user.mak +++ b/configs/targets/sh4-linux-user.mak @@ -1,5 +1,4 @@ TARGET_ARCH=sh4 TARGET_SYSTBL_ABI=common TARGET_SYSTBL=syscall.tbl -TARGET_ALIGNED_ONLY=y TARGET_HAS_BFLT=y diff --git a/configs/targets/sh4-softmmu.mak b/configs/targets/sh4-softmmu.mak index 95896376c4..f9d62d91e4 100644 --- a/configs/targets/sh4-softmmu.mak +++ b/configs/targets/sh4-softmmu.mak @@ -1,2 +1 @@ TARGET_ARCH=sh4 -TARGET_ALIGNED_ONLY=y diff --git a/configs/targets/sh4eb-linux-user.mak b/configs/targets/sh4eb-linux-user.mak index 6724165efe..9db6b3609c 100644 --- a/configs/targets/sh4eb-linux-user.mak +++ b/configs/targets/sh4eb-linux-user.mak @@ -1,6 +1,5 @@ TARGET_ARCH=sh4 TARGET_SYSTBL_ABI=common TARGET_SYSTBL=syscall.tbl -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y TARGET_HAS_BFLT=y diff --git a/configs/targets/sh4eb-softmmu.mak b/configs/targets/sh4eb-softmmu.mak index dc8b30bf7a..226b1fc698 100644 --- a/configs/targets/sh4eb-softmmu.mak +++ b/configs/targets/sh4eb-softmmu.mak @@ -1,3 +1,2 @@ TARGET_ARCH=sh4 -TARGET_ALIGNED_ONLY=y TARGET_BIG_ENDIAN=y diff --git a/cpus-common.c b/cpus-common.c index a53716deb4..45c745ecf6 100644 --- a/cpus-common.c +++ b/cpus-common.c @@ -25,7 +25,7 @@ #include "qemu/lockable.h" #include "trace/trace-root.h" -static QemuMutex qemu_cpu_list_lock; +QemuMutex qemu_cpu_list_lock; static QemuCond exclusive_cond; static QemuCond exclusive_resume; static QemuCond qemu_work_cond; diff --git a/disas/disas-internal.h b/disas/disas-internal.h new file mode 100644 index 0000000000..84a01f126f --- /dev/null +++ b/disas/disas-internal.h @@ -0,0 +1,21 @@ +/* + * Definitions used internally in the disassembly code + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef DISAS_INTERNAL_H +#define DISAS_INTERNAL_H + +#include "disas/dis-asm.h" + +typedef struct CPUDebug { + struct disassemble_info info; + CPUState *cpu; +} CPUDebug; + +void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu); +int disas_gstring_printf(FILE *stream, const char *fmt, ...) + G_GNUC_PRINTF(2, 3); + +#endif diff --git a/disas/disas-mon.c b/disas/disas-mon.c new file mode 100644 index 0000000000..48ac492c6c --- /dev/null +++ b/disas/disas-mon.c @@ -0,0 +1,65 @@ +/* + * Functions related to disassembly from the monitor + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "disas-internal.h" +#include "disas/disas.h" +#include "exec/memory.h" +#include "hw/core/cpu.h" +#include "monitor/monitor.h" + +static int +physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length, + struct disassemble_info *info) +{ + CPUDebug *s = container_of(info, CPUDebug, info); + MemTxResult res; + + res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED, + myaddr, length); + return res == MEMTX_OK ? 0 : EIO; +} + +/* Disassembler for the monitor. */ +void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc, + int nb_insn, bool is_physical) +{ + int count, i; + CPUDebug s; + g_autoptr(GString) ds = g_string_new(""); + + disas_initialize_debug_target(&s, cpu); + s.info.fprintf_func = disas_gstring_printf; + s.info.stream = (FILE *)ds; /* abuse this slot */ + + if (is_physical) { + s.info.read_memory_func = physical_read_memory; + } + s.info.buffer_vma = pc; + + if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) { + monitor_puts(mon, ds->str); + return; + } + + if (!s.info.print_insn) { + monitor_printf(mon, "0x%08" PRIx64 + ": Asm output not supported on this arch\n", pc); + return; + } + + for (i = 0; i < nb_insn; i++) { + g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc); + count = s.info.print_insn(pc, &s.info); + g_string_append_c(ds, '\n'); + if (count < 0) { + break; + } + pc += count; + } + + monitor_puts(mon, ds->str); +} diff --git a/disas.c b/disas/disas.c similarity index 78% rename from disas.c rename to disas/disas.c index b087c12c47..0d2d06c2ec 100644 --- a/disas.c +++ b/disas/disas.c @@ -1,16 +1,12 @@ /* General "disassemble this chunk" code. Used for debugging. */ #include "qemu/osdep.h" -#include "disas/dis-asm.h" +#include "disas/disas-internal.h" #include "elf.h" #include "qemu/qemu-print.h" - #include "disas/disas.h" #include "disas/capstone.h" - -typedef struct CPUDebug { - struct disassemble_info info; - CPUState *cpu; -} CPUDebug; +#include "hw/core/cpu.h" +#include "exec/memory.h" /* Filled in by elfload.c. Simplistic, but will do for now. */ struct syminfo *syminfos = NULL; @@ -119,18 +115,18 @@ static void initialize_debug(CPUDebug *s) s->info.symbol_at_address_func = symbol_at_address; } -static void initialize_debug_target(CPUDebug *s, CPUState *cpu) +void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu) { initialize_debug(s); s->cpu = cpu; s->info.read_memory_func = target_read_memory; s->info.print_address_func = print_address; -#if TARGET_BIG_ENDIAN - s->info.endian = BFD_ENDIAN_BIG; -#else - s->info.endian = BFD_ENDIAN_LITTLE; -#endif + if (target_words_bigendian()) { + s->info.endian = BFD_ENDIAN_BIG; + } else { + s->info.endian = BFD_ENDIAN_LITTLE; + } CPUClass *cc = CPU_GET_CLASS(cpu); if (cc->disas_set_info) { @@ -168,7 +164,7 @@ static void initialize_debug_host(CPUDebug *s) # ifdef _ARCH_PPC64 s->info.cap_mode = CS_MODE_64; # endif -#elif defined(__riscv) && defined(CONFIG_RISCV_DIS) +#elif defined(__riscv) #if defined(_ILP32) || (__riscv_xlen == 32) s->info.print_insn = print_insn_riscv32; #elif defined(_LP64) @@ -204,14 +200,13 @@ static void initialize_debug_host(CPUDebug *s) } /* Disassemble this for me please... (debugging). */ -void target_disas(FILE *out, CPUState *cpu, target_ulong code, - target_ulong size) +void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size) { - target_ulong pc; + uint64_t pc; int count; CPUDebug s; - initialize_debug_target(&s, cpu); + disas_initialize_debug_target(&s, cpu); s.info.fprintf_func = fprintf; s.info.stream = out; s.info.buffer_vma = code; @@ -226,11 +221,12 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code, } for (pc = code; size > 0; pc += count, size -= count) { - fprintf(out, "0x" TARGET_FMT_lx ": ", pc); - count = s.info.print_insn(pc, &s.info); - fprintf(out, "\n"); - if (count < 0) - break; + fprintf(out, "0x%08" PRIx64 ": ", pc); + count = s.info.print_insn(pc, &s.info); + fprintf(out, "\n"); + if (count < 0) { + break; + } if (size < count) { fprintf(out, "Disassembler disagrees with translator over instruction " @@ -241,8 +237,7 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code, } } -static int G_GNUC_PRINTF(2, 3) -gstring_printf(FILE *stream, const char *fmt, ...) +int disas_gstring_printf(FILE *stream, const char *fmt, ...) { /* We abuse the FILE parameter to pass a GString. */ GString *s = (GString *)stream; @@ -272,8 +267,8 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size) CPUDebug s; GString *ds = g_string_new(NULL); - initialize_debug_target(&s, cpu); - s.info.fprintf_func = gstring_printf; + disas_initialize_debug_target(&s, cpu); + s.info.fprintf_func = disas_gstring_printf; s.info.stream = (FILE *)ds; /* abuse this slot */ s.info.buffer_vma = addr; s.info.buffer_length = size; @@ -292,7 +287,7 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size) } /* Disassemble this for me please... (debugging). */ -void disas(FILE *out, const void *code, unsigned long size) +void disas(FILE *out, const void *code, size_t size) { uintptr_t pc; int count; @@ -324,7 +319,7 @@ void disas(FILE *out, const void *code, unsigned long size) } /* Look up symbol for debugging purpose. Returns "" if unknown. */ -const char *lookup_symbol(target_ulong orig_addr) +const char *lookup_symbol(uint64_t orig_addr) { const char *symbol = ""; struct syminfo *s; @@ -338,61 +333,3 @@ const char *lookup_symbol(target_ulong orig_addr) return symbol; } - -#if !defined(CONFIG_USER_ONLY) - -#include "monitor/monitor.h" - -static int -physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length, - struct disassemble_info *info) -{ - CPUDebug *s = container_of(info, CPUDebug, info); - MemTxResult res; - - res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED, - myaddr, length); - return res == MEMTX_OK ? 0 : EIO; -} - -/* Disassembler for the monitor. */ -void monitor_disas(Monitor *mon, CPUState *cpu, - target_ulong pc, int nb_insn, int is_physical) -{ - int count, i; - CPUDebug s; - g_autoptr(GString) ds = g_string_new(""); - - initialize_debug_target(&s, cpu); - s.info.fprintf_func = gstring_printf; - s.info.stream = (FILE *)ds; /* abuse this slot */ - - if (is_physical) { - s.info.read_memory_func = physical_read_memory; - } - s.info.buffer_vma = pc; - - if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) { - monitor_puts(mon, ds->str); - return; - } - - if (!s.info.print_insn) { - monitor_printf(mon, "0x" TARGET_FMT_lx - ": Asm output not supported on this arch\n", pc); - return; - } - - for (i = 0; i < nb_insn; i++) { - g_string_append_printf(ds, "0x" TARGET_FMT_lx ": ", pc); - count = s.info.print_insn(pc, &s.info); - g_string_append_c(ds, '\n'); - if (count < 0) { - break; - } - pc += count; - } - - monitor_puts(mon, ds->str); -} -#endif diff --git a/disas/meson.build b/disas/meson.build index c865bdd882..832727e4b3 100644 --- a/disas/meson.build +++ b/disas/meson.build @@ -10,4 +10,8 @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files('riscv.c')) common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c')) common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c')) common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c')) -common_ss.add(when: capstone, if_true: files('capstone.c')) +common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone]) +common_ss.add(files('disas.c')) + +softmmu_ss.add(files('disas-mon.c')) +specific_ss.add(capstone) diff --git a/include/disas/disas.h b/include/disas/disas.h index d363e95ede..176775eff7 100644 --- a/include/disas/disas.h +++ b/include/disas/disas.h @@ -1,34 +1,23 @@ #ifndef QEMU_DISAS_H #define QEMU_DISAS_H -#include "exec/hwaddr.h" - -#ifdef NEED_CPU_H -#include "cpu.h" - /* Disassemble this for me please... (debugging). */ -void disas(FILE *out, const void *code, unsigned long size); -void target_disas(FILE *out, CPUState *cpu, target_ulong code, - target_ulong size); +void disas(FILE *out, const void *code, size_t size); +void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size); -void monitor_disas(Monitor *mon, CPUState *cpu, - target_ulong pc, int nb_insn, int is_physical); +void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc, + int nb_insn, bool is_physical); char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size); /* Look up symbol for debugging purpose. Returns "" if unknown. */ -const char *lookup_symbol(target_ulong orig_addr); -#endif +const char *lookup_symbol(uint64_t orig_addr); struct syminfo; struct elf32_sym; struct elf64_sym; -#if defined(CONFIG_USER_ONLY) -typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr); -#else -typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr); -#endif +typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr); struct syminfo { lookup_symbol_t lookup_symbol; diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 1be4a3117e..e5a55ede5f 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -32,6 +32,7 @@ extern intptr_t qemu_host_page_mask; #define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size()) /* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */ +extern QemuMutex qemu_cpu_list_lock; void qemu_init_cpu_list(void); void cpu_list_lock(void); void cpu_list_unlock(void); diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index e1c498ef4b..a6e0cf1812 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -111,8 +111,11 @@ typedef struct CPUTLBEntry { use the corresponding iotlb value. */ uintptr_t addend; }; - /* padding to get a power of two size */ - uint8_t dummy[1 << CPU_TLB_ENTRY_BITS]; + /* + * Padding to get a power of two size, as well as index + * access to addr_{read,write,code}. + */ + target_ulong addr_idx[(1 << CPU_TLB_ENTRY_BITS) / TARGET_LONG_SIZE]; }; } CPUTLBEntry; diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index c141f0394f..7c867c94c3 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -360,13 +360,29 @@ static inline void clear_helper_retaddr(void) /* Needed for TCG_OVERSIZED_GUEST */ #include "tcg/tcg.h" +static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry, + MMUAccessType access_type) +{ + /* Do not rearrange the CPUTLBEntry structure members. */ + QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) != + MMU_DATA_LOAD * TARGET_LONG_SIZE); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) != + MMU_DATA_STORE * TARGET_LONG_SIZE); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) != + MMU_INST_FETCH * TARGET_LONG_SIZE); + + const target_ulong *ptr = &entry->addr_idx[access_type]; +#if TCG_OVERSIZED_GUEST + return *ptr; +#else + /* ofs might correspond to .addr_write, so use qatomic_read */ + return qatomic_read(ptr); +#endif +} + static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) { -#if TCG_OVERSIZED_GUEST - return entry->addr_write; -#else - return qatomic_read(&entry->addr_write); -#endif + return tlb_read_idx(entry, MMU_DATA_STORE); } /* Find the TLB index corresponding to the mmu_idx + address pair. */ diff --git a/include/exec/memop.h b/include/exec/memop.h index 25d027434a..07f5f88188 100644 --- a/include/exec/memop.h +++ b/include/exec/memop.h @@ -47,8 +47,6 @@ typedef enum MemOp { * MO_UNALN accesses are never checked for alignment. * MO_ALIGN accesses will result in a call to the CPU's * do_unaligned_access hook if the guest address is not aligned. - * The default depends on whether the target CPU defines - * TARGET_ALIGNED_ONLY. * * Some architectures (e.g. ARMv8) need the address which is aligned * to a size more than the size of the memory access. @@ -65,21 +63,14 @@ typedef enum MemOp { */ MO_ASHIFT = 5, MO_AMASK = 0x7 << MO_ASHIFT, -#ifdef NEED_CPU_H -#ifdef TARGET_ALIGNED_ONLY - MO_ALIGN = 0, - MO_UNALN = MO_AMASK, -#else - MO_ALIGN = MO_AMASK, - MO_UNALN = 0, -#endif -#endif + MO_UNALN = 0, MO_ALIGN_2 = 1 << MO_ASHIFT, MO_ALIGN_4 = 2 << MO_ASHIFT, MO_ALIGN_8 = 3 << MO_ASHIFT, MO_ALIGN_16 = 4 << MO_ASHIFT, MO_ALIGN_32 = 5 << MO_ASHIFT, MO_ALIGN_64 = 6 << MO_ASHIFT, + MO_ALIGN = MO_AMASK, /* Combinations of the above, for ease of use. */ MO_UB = MO_8, diff --git a/include/exec/poison.h b/include/exec/poison.h index 140daa4a85..256736e11a 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -35,7 +35,6 @@ #pragma GCC poison TARGET_TRICORE #pragma GCC poison TARGET_XTENSA -#pragma GCC poison TARGET_ALIGNED_ONLY #pragma GCC poison TARGET_HAS_BFLT #pragma GCC poison TARGET_NAME #pragma GCC poison TARGET_SUPPORTS_MTTCG diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 703f7434a0..418ad92598 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -17,6 +17,7 @@ #include "qemu/guest-random.h" #include "qemu/units.h" #include "qemu/selfmap.h" +#include "qemu/lockable.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "target_signal.h" @@ -3327,9 +3328,10 @@ static void load_elf_interp(const char *filename, struct image_info *info, static int symfind(const void *s0, const void *s1) { - target_ulong addr = *(target_ulong *)s0; struct elf_sym *sym = (struct elf_sym *)s1; + __typeof(sym->st_value) addr = *(uint64_t *)s0; int result = 0; + if (addr < sym->st_value) { result = -1; } else if (addr >= sym->st_value + sym->st_size) { @@ -3338,7 +3340,7 @@ static int symfind(const void *s0, const void *s1) return result; } -static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr) +static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr) { #if ELF_CLASS == ELFCLASS32 struct elf_sym *syms = s->disas_symtab.elf32; @@ -4237,14 +4239,14 @@ static int fill_note_info(struct elf_note_info *info, info->notes_size += note_size(&info->notes[i]); /* read and fill status of all threads */ - cpu_list_lock(); - CPU_FOREACH(cpu) { - if (cpu == thread_cpu) { - continue; + WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { + CPU_FOREACH(cpu) { + if (cpu == thread_cpu) { + continue; + } + fill_thread_info(info, cpu->env_ptr); } - fill_thread_info(info, cpu->env_ptr); } - cpu_list_unlock(); return (0); } diff --git a/meson.build b/meson.build index 5c7af6f3bc..d3cf48960b 100644 --- a/meson.build +++ b/meson.build @@ -3153,9 +3153,6 @@ specific_ss.add(files('cpu.c')) subdir('softmmu') -common_ss.add(capstone) -specific_ss.add(files('disas.c'), capstone) - # Work around a gcc bug/misfeature wherein constant propagation looks # through an alias: # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696 diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 180ba38c7a..388337a332 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -150,25 +150,25 @@ int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, retry: init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - cpu_list_lock(); - gen_id = cpu_list_generation_id_get(); - records = vcpu_dirty_stat_alloc(stat); - vcpu_dirty_stat_collect(stat, records, true); - cpu_list_unlock(); + WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { + gen_id = cpu_list_generation_id_get(); + records = vcpu_dirty_stat_alloc(stat); + vcpu_dirty_stat_collect(stat, records, true); + } duration = dirty_stat_wait(calc_time_ms, init_time_ms); global_dirty_log_sync(flag, one_shot); - cpu_list_lock(); - if (gen_id != cpu_list_generation_id_get()) { - g_free(records); - g_free(stat->rates); - cpu_list_unlock(); - goto retry; + WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { + if (gen_id != cpu_list_generation_id_get()) { + g_free(records); + g_free(stat->rates); + cpu_list_unlock(); + goto retry; + } + vcpu_dirty_stat_collect(stat, records, false); } - vcpu_dirty_stat_collect(stat, records, false); - cpu_list_unlock(); for (i = 0; i < stat->nvcpu; i++) { dirtyrate = do_calculate_dirtyrate(records[i], duration); diff --git a/replay/replay.c b/replay/replay.c index c39156c522..0f7d766efe 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -74,7 +74,7 @@ uint64_t replay_get_current_icount(void) int replay_get_instructions(void) { int res = 0; - replay_mutex_lock(); + g_assert(replay_mutex_locked()); if (replay_next_event_is(EVENT_INSTRUCTION)) { res = replay_state.instruction_count; if (replay_break_icount != -1LL) { @@ -85,7 +85,6 @@ int replay_get_instructions(void) } } } - replay_mutex_unlock(); return res; } diff --git a/target/loongarch/csr_helper.c b/target/loongarch/csr_helper.c index 7e02787895..6526367946 100644 --- a/target/loongarch/csr_helper.c +++ b/target/loongarch/csr_helper.c @@ -15,7 +15,6 @@ #include "exec/cpu_ldst.h" #include "hw/irq.h" #include "cpu-csr.h" -#include "tcg/tcg-ldst.h" target_ulong helper_csrrd_pgd(CPULoongArchState *env) { diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/iocsr_helper.c index 505853e17b..dda9845d6c 100644 --- a/target/loongarch/iocsr_helper.c +++ b/target/loongarch/iocsr_helper.c @@ -12,7 +12,6 @@ #include "exec/helper-proto.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" -#include "tcg/tcg-ldst.h" #define GET_MEMTXATTRS(cas) \ ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index}) diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 744eb3748b..44d852b106 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -959,6 +959,7 @@ static void gen_load_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp, switch (opsize) { case OS_BYTE: case OS_WORD: + case OS_LONG: tcg_gen_qemu_ld_tl(tmp, addr, index, opsize | MO_SIGN | MO_TE); gen_helper_exts32(cpu_env, fp, tmp); break; diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc index e8b193aeda..211d102cf6 100644 --- a/target/mips/tcg/micromips_translate.c.inc +++ b/target/mips/tcg/micromips_translate.c.inc @@ -977,20 +977,24 @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd, gen_reserved_instruction(ctx); return; } - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | + ctx->default_tcg_memop_mask); gen_store_gpr(t1, rd); tcg_gen_movi_tl(t1, 4); gen_op_addr_add(ctx, t0, t0, t1); - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | + ctx->default_tcg_memop_mask); gen_store_gpr(t1, rd + 1); break; case SWP: gen_load_gpr(t1, rd); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); tcg_gen_movi_tl(t1, 4); gen_op_addr_add(ctx, t0, t0, t1); gen_load_gpr(t1, rd + 1); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); break; #ifdef TARGET_MIPS64 case LDP: @@ -998,20 +1002,24 @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd, gen_reserved_instruction(ctx); return; } - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ | + ctx->default_tcg_memop_mask); gen_store_gpr(t1, rd); tcg_gen_movi_tl(t1, 8); gen_op_addr_add(ctx, t0, t0, t1); - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ | + ctx->default_tcg_memop_mask); gen_store_gpr(t1, rd + 1); break; case SDP: gen_load_gpr(t1, rd); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ | + ctx->default_tcg_memop_mask); tcg_gen_movi_tl(t1, 8); gen_op_addr_add(ctx, t0, t0, t1); gen_load_gpr(t1, rd + 1); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ | + ctx->default_tcg_memop_mask); break; #endif } diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc index 602f5f0c02..5cffe0e412 100644 --- a/target/mips/tcg/mips16e_translate.c.inc +++ b/target/mips/tcg/mips16e_translate.c.inc @@ -172,22 +172,26 @@ static void gen_mips16_save(DisasContext *ctx, case 4: gen_base_offset_addr(ctx, t0, 29, 12); gen_load_gpr(t1, 7); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); /* Fall through */ case 3: gen_base_offset_addr(ctx, t0, 29, 8); gen_load_gpr(t1, 6); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); /* Fall through */ case 2: gen_base_offset_addr(ctx, t0, 29, 4); gen_load_gpr(t1, 5); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); /* Fall through */ case 1: gen_base_offset_addr(ctx, t0, 29, 0); gen_load_gpr(t1, 4); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); } gen_load_gpr(t0, 29); @@ -196,7 +200,8 @@ static void gen_mips16_save(DisasContext *ctx, tcg_gen_movi_tl(t2, -4); \ gen_op_addr_add(ctx, t0, t0, t2); \ gen_load_gpr(t1, reg); \ - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \ + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | \ + ctx->default_tcg_memop_mask); \ } while (0) if (do_ra) { @@ -298,7 +303,8 @@ static void gen_mips16_restore(DisasContext *ctx, #define DECR_AND_LOAD(reg) do { \ tcg_gen_movi_tl(t2, -4); \ gen_op_addr_add(ctx, t0, t0, t2); \ - tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \ + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | \ + ctx->default_tcg_memop_mask); \ gen_store_gpr(t1, reg); \ } while (0) diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c index bdd20709c0..be038b5f07 100644 --- a/target/mips/tcg/mxu_translate.c +++ b/target/mips/tcg/mxu_translate.c @@ -831,7 +831,8 @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx) tcg_gen_ori_tl(t1, t1, 0xFFFFF000); } tcg_gen_add_tl(t1, t0, t1); - tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP)); + tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) | + ctx->default_tcg_memop_mask); gen_store_mxu_gpr(t1, XRa); } diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc index 97b9572caa..a98dde0d2e 100644 --- a/target/mips/tcg/nanomips_translate.c.inc +++ b/target/mips/tcg/nanomips_translate.c.inc @@ -998,7 +998,7 @@ static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset, TCGv tmp2 = tcg_temp_new(); gen_base_offset_addr(ctx, taddr, base, offset); - tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ | MO_ALIGN); if (cpu_is_bigendian(ctx)) { tcg_gen_extr_i64_tl(tmp2, tmp1, tval); } else { @@ -1039,7 +1039,8 @@ static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset, tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp)); tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval, - eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64); + eva ? MIPS_HFLAG_UM : ctx->mem_idx, + MO_64 | MO_ALIGN); if (reg1 != 0) { tcg_gen_movi_tl(cpu_gpr[reg1], 1); } @@ -2640,52 +2641,49 @@ static void gen_p_lsx(DisasContext *ctx, int rd, int rs, int rt) switch (extract32(ctx->opcode, 7, 4)) { case NM_LBX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_SB); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB); gen_store_gpr(t0, rd); break; case NM_LHX: /*case NM_LHXS:*/ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_TESW); + MO_TESW | ctx->default_tcg_memop_mask); gen_store_gpr(t0, rd); break; case NM_LWX: /*case NM_LWXS:*/ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_TESL); + MO_TESL | ctx->default_tcg_memop_mask); gen_store_gpr(t0, rd); break; case NM_LBUX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_UB); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB); gen_store_gpr(t0, rd); break; case NM_LHUX: /*case NM_LHUXS:*/ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, - MO_TEUW); + MO_TEUW | ctx->default_tcg_memop_mask); gen_store_gpr(t0, rd); break; case NM_SBX: check_nms(ctx); gen_load_gpr(t1, rd); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, - MO_8); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_8); break; case NM_SHX: /*case NM_SHXS:*/ check_nms(ctx); gen_load_gpr(t1, rd); tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, - MO_TEUW); + MO_TEUW | ctx->default_tcg_memop_mask); break; case NM_SWX: /*case NM_SWXS:*/ check_nms(ctx); gen_load_gpr(t1, rd); tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, - MO_TEUL); + MO_TEUL | ctx->default_tcg_memop_mask); break; case NM_LWC1X: /*case NM_LWC1XS:*/ @@ -3738,7 +3736,8 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) addr_off); tcg_gen_movi_tl(t0, addr); - tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, + MO_TESL | ctx->default_tcg_memop_mask); } break; case NM_SWPC48: @@ -3754,7 +3753,8 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) tcg_gen_movi_tl(t0, addr); gen_load_gpr(t1, rt); - tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, + MO_TEUL | ctx->default_tcg_memop_mask); } break; default: @@ -4305,7 +4305,7 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) TCGv va = tcg_temp_new(); TCGv t1 = tcg_temp_new(); MemOp memop = (extract32(ctx->opcode, 8, 3)) == - NM_P_LS_UAWM ? MO_UNALN : 0; + NM_P_LS_UAWM ? MO_UNALN : MO_ALIGN; count = (count == 0) ? 8 : count; while (counter != count) { diff --git a/target/nios2/translate.c b/target/nios2/translate.c index 6610e22236..a548e16ed5 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -298,6 +298,11 @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags) TCGv data = dest_gpr(dc, instr.b); tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s); +#ifdef CONFIG_USER_ONLY + flags |= MO_UNALN; +#else + flags |= MO_ALIGN; +#endif tcg_gen_qemu_ld_tl(data, addr, dc->mem_idx, flags); } @@ -309,6 +314,11 @@ static void gen_stx(DisasContext *dc, uint32_t code, uint32_t flags) TCGv addr = tcg_temp_new(); tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s); +#ifdef CONFIG_USER_ONLY + flags |= MO_UNALN; +#else + flags |= MO_ALIGN; +#endif tcg_gen_qemu_st_tl(val, addr, dc->mem_idx, flags); } diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 6e40d5dd6a..0dedbb8210 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -527,13 +527,15 @@ static void _decode_opc(DisasContext * ctx) case 0x9000: /* mov.w @(disp,PC),Rn */ { TCGv addr = tcg_constant_i32(ctx->base.pc_next + 4 + B7_0 * 2); - tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW); + tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, + MO_TESW | MO_ALIGN); } return; case 0xd000: /* mov.l @(disp,PC),Rn */ { TCGv addr = tcg_constant_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3); - tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, + MO_TESL | MO_ALIGN); } return; case 0x7000: /* add #imm,Rn */ @@ -801,9 +803,11 @@ static void _decode_opc(DisasContext * ctx) { TCGv arg0, arg1; arg0 = tcg_temp_new(); - tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, + MO_TESL | MO_ALIGN); arg1 = tcg_temp_new(); - tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); gen_helper_macl(cpu_env, arg0, arg1); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); @@ -813,9 +817,11 @@ static void _decode_opc(DisasContext * ctx) { TCGv arg0, arg1; arg0 = tcg_temp_new(); - tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, + MO_TESL | MO_ALIGN); arg1 = tcg_temp_new(); - tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); gen_helper_macw(cpu_env, arg0, arg1); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2); @@ -961,30 +967,36 @@ static void _decode_opc(DisasContext * ctx) if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp, XHACK(B7_4)); - tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEUQ); + tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, + MO_TEUQ | MO_ALIGN); } else { - tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, + MO_TEUL | MO_ALIGN); } return; case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ); + tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, + MO_TEUQ | MO_ALIGN); gen_store_fpr64(ctx, fp, XHACK(B11_8)); } else { - tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, + MO_TEUL | MO_ALIGN); } return; case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ); + tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, + MO_TEUQ | MO_ALIGN); gen_store_fpr64(ctx, fp, XHACK(B11_8)); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8); } else { - tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, + MO_TEUL | MO_ALIGN); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4); } return; @@ -996,10 +1008,12 @@ static void _decode_opc(DisasContext * ctx) TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp, XHACK(B7_4)); tcg_gen_subi_i32(addr, REG(B11_8), 8); - tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ); + tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, + MO_TEUQ | MO_ALIGN); } else { tcg_gen_subi_i32(addr, REG(B11_8), 4); - tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, + MO_TEUL | MO_ALIGN); } tcg_gen_mov_i32(REG(B11_8), addr); } @@ -1011,10 +1025,12 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_add_i32(addr, REG(B7_4), REG(0)); if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEUQ); + tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, + MO_TEUQ | MO_ALIGN); gen_store_fpr64(ctx, fp, XHACK(B11_8)); } else { - tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, + MO_TEUL | MO_ALIGN); } } return; @@ -1026,9 +1042,11 @@ static void _decode_opc(DisasContext * ctx) if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp, XHACK(B7_4)); - tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ); + tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, + MO_TEUQ | MO_ALIGN); } else { - tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, + MO_TEUL | MO_ALIGN); } } return; @@ -1158,14 +1176,14 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new(); tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2); - tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW); + tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW | MO_ALIGN); } return; case 0xc600: /* mov.l @(disp,GBR),R0 */ { TCGv addr = tcg_temp_new(); tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4); - tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL | MO_ALIGN); } return; case 0xc000: /* mov.b R0,@(disp,GBR) */ @@ -1179,14 +1197,14 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new(); tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2); - tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW); + tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW | MO_ALIGN); } return; case 0xc200: /* mov.l R0,@(disp,GBR) */ { TCGv addr = tcg_temp_new(); tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4); - tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL | MO_ALIGN); } return; case 0x8000: /* mov.b R0,@(disp,Rn) */ @@ -1286,7 +1304,8 @@ static void _decode_opc(DisasContext * ctx) return; case 0x4087: /* ldc.l @Rm+,Rn_BANK */ CHECK_PRIVILEGED - tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); return; case 0x0082: /* stc Rm_BANK,Rn */ @@ -1298,7 +1317,8 @@ static void _decode_opc(DisasContext * ctx) { TCGv addr = tcg_temp_new(); tcg_gen_subi_i32(addr, REG(B11_8), 4); - tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, + MO_TEUL | MO_ALIGN); tcg_gen_mov_i32(REG(B11_8), addr); } return; @@ -1354,7 +1374,8 @@ static void _decode_opc(DisasContext * ctx) CHECK_PRIVILEGED { TCGv val = tcg_temp_new(); - tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_andi_i32(val, val, 0x700083f3); gen_write_sr(val); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); @@ -1372,7 +1393,7 @@ static void _decode_opc(DisasContext * ctx) TCGv val = tcg_temp_new(); tcg_gen_subi_i32(addr, REG(B11_8), 4); gen_read_sr(val); - tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN); tcg_gen_mov_i32(REG(B11_8), addr); } return; @@ -1383,7 +1404,8 @@ static void _decode_opc(DisasContext * ctx) return; \ case ldpnum: \ prechk \ - tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \ + tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, \ + MO_TESL | MO_ALIGN); \ tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); \ return; #define ST(reg,stnum,stpnum,prechk) \ @@ -1396,7 +1418,8 @@ static void _decode_opc(DisasContext * ctx) { \ TCGv addr = tcg_temp_new(); \ tcg_gen_subi_i32(addr, REG(B11_8), 4); \ - tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \ + tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, \ + MO_TEUL | MO_ALIGN); \ tcg_gen_mov_i32(REG(B11_8), addr); \ } \ return; @@ -1423,7 +1446,8 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED { TCGv addr = tcg_temp_new(); - tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); gen_helper_ld_fpscr(cpu_env, addr); ctx->base.is_jmp = DISAS_STOP; @@ -1441,16 +1465,18 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff); addr = tcg_temp_new(); tcg_gen_subi_i32(addr, REG(B11_8), 4); - tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN); tcg_gen_mov_i32(REG(B11_8), addr); } return; case 0x00c3: /* movca.l R0,@Rm */ { TCGv val = tcg_temp_new(); - tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, + MO_TEUL | MO_ALIGN); gen_helper_movcal(cpu_env, REG(B11_8), val); - tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TEUL | MO_ALIGN); } ctx->has_movcal = 1; return; @@ -1492,11 +1518,13 @@ static void _decode_opc(DisasContext * ctx) cpu_lock_addr, fail); tmp = tcg_temp_new(); tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value, - REG(0), ctx->memidx, MO_TEUL); + REG(0), ctx->memidx, + MO_TEUL | MO_ALIGN); tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value); } else { tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail); - tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TEUL | MO_ALIGN); tcg_gen_movi_i32(cpu_sr_t, 1); } tcg_gen_br(done); @@ -1521,11 +1549,13 @@ static void _decode_opc(DisasContext * ctx) if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) { TCGv tmp = tcg_temp_new(); tcg_gen_mov_i32(tmp, REG(B11_8)); - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_mov_i32(cpu_lock_value, REG(0)); tcg_gen_mov_i32(cpu_lock_addr, tmp); } else { - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TESL | MO_ALIGN); tcg_gen_movi_i32(cpu_lock_addr, 0); } return; diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index d8d464e4a0..62dd22d73c 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1580,13 +1580,6 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, } } -static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) -{ - ptrdiff_t offset = tcg_pcrel_diff(s, target); - tcg_debug_assert(offset == sextract64(offset, 0, 21)); - tcg_out_insn(s, 3406, ADR, rd, offset); -} - typedef struct { TCGReg base; TCGReg index; @@ -1627,151 +1620,44 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = { #endif }; +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 1, .tmp = { TCG_REG_TMP } +}; + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(lb->oi); if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); - tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); - tcg_out_adr(s, TCG_REG_X3, lb->raddr); + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); - - tcg_out_movext(s, lb->type, lb->datalo_reg, - TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0); + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); tcg_out_goto(s, lb->raddr); return true; } static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - MemOp size = opc & MO_SIZE; + MemOp opc = get_memop(lb->oi); if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); - tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); - tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); - tcg_out_adr(s, TCG_REG_X4, lb->raddr); + tcg_out_st_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); tcg_out_goto(s, lb->raddr); return true; } - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, - TCGType ext, TCGReg data_reg, TCGReg addr_reg, - tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = ext; - label->datalo_reg = data_reg; - label->addrlo_reg = addr_reg; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr; -} - -/* We expect to use a 7-bit scaled negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); - -/* These offsets are built into the LDP below. */ -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); - -/* Load and compare a TLB entry, emitting the conditional jump to the - slow path for the failure case, which will be patched later when finalizing - the slow path. Generated code returns the host addend in X1, - clobbers X0,X2,X3,TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, - tcg_insn_unit **label_ptr, int mem_index, - bool is_read) -{ - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - unsigned a_mask = (1u << a_bits) - 1; - unsigned s_mask = (1u << s_bits) - 1; - TCGReg x3; - TCGType mask_type; - uint64_t compare_mask; - - mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 - ? TCG_TYPE_I64 : TCG_TYPE_I32); - - /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ - tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, - TLB_MASK_TABLE_OFS(mem_index), 1, 0); - - /* Extract the TLB index from the address into X0. */ - tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, - TCG_REG_X0, TCG_REG_X0, addr_reg, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ - tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); - - /* Load the tlb comparator into X0, and the fast path addend into X1. */ - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read - ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, - offsetof(CPUTLBEntry, addend)); - - /* For aligned accesses, we check the first byte and include the alignment - bits within the address. For unaligned access, we check that we don't - cross pages using the address of the last byte of the access. */ - if (a_bits >= s_bits) { - x3 = addr_reg; - } else { - tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, addr_reg, s_mask - a_mask); - x3 = TCG_REG_X3; - } - compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; - - /* Store the page mask part of the address into X3. */ - tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, x3, compare_mask); - - /* Perform the address comparison. */ - tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); - - /* If not equal, we jump to the slow path. */ - *label_ptr = s->code_ptr; - tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); -} - #else -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, - unsigned a_bits) +static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) { - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->addrlo_reg = addr_reg; - - /* tst addr, #mask */ - tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); - - label->label_ptr[0] = s->code_ptr; - - /* b.ne slow_path */ - tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); - - label->raddr = tcg_splitwx_to_rx(s->code_ptr); + ptrdiff_t offset = tcg_pcrel_diff(s, target); + tcg_debug_assert(offset == sextract64(offset, 0, 21)); + tcg_out_insn(s, 3406, ADR, rd, offset); } static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) @@ -1801,6 +1687,125 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } #endif /* CONFIG_SOFTMMU */ +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) +{ + TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned a_mask = (1u << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + unsigned s_mask = (1u << s_bits) - 1; + unsigned mem_index = get_mmuidx(oi); + TCGReg x3; + TCGType mask_type; + uint64_t compare_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32 + ? TCG_TYPE_I64 : TCG_TYPE_I32); + + /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); + tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, + TLB_MASK_TABLE_OFS(mem_index), 1, 0); + + /* Extract the TLB index from the address into X0. */ + tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, + TCG_REG_X0, TCG_REG_X0, addr_reg, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ + tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); + + /* Load the tlb comparator into X0, and the fast path addend into X1. */ + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, + is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, + offsetof(CPUTLBEntry, addend)); + + /* + * For aligned accesses, we check the first byte and include the alignment + * bits within the address. For unaligned access, we check that we don't + * cross pages using the address of the last byte of the access. + */ + if (a_bits >= s_bits) { + x3 = addr_reg; + } else { + tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, addr_reg, s_mask - a_mask); + x3 = TCG_REG_X3; + } + compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; + + /* Store the page mask part of the address into X3. */ + tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, x3, compare_mask); + + /* Perform the address comparison. */ + tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0); + + /* If not equal, we jump to the slow path. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); + + *h = (HostAddress){ + .base = TCG_REG_X1, + .index = addr_reg, + .index_ext = addr_type + }; +#else + if (a_mask) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* tst addr, #mask */ + tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); + + /* b.ne slow_path */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); + } + + if (USE_GUEST_BASE) { + *h = (HostAddress){ + .base = TCG_REG_GUEST_BASE, + .index = addr_reg, + .index_ext = addr_type + }; + } else { + *h = (HostAddress){ + .base = addr_reg, + .index = TCG_REG_XZR, + .index_ext = TCG_TYPE_I64 + }; + } +#endif + + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, TCGReg data_r, HostAddress h) { @@ -1857,93 +1862,33 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp memop = get_memop(oi); - TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + TCGLabelQemuLdst *ldst; HostAddress h; - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((memop & MO_BSWAP) == 0); + ldst = prepare_host_addr(s, &h, addr_reg, oi, true); + tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h); -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr; - - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1); - - h = (HostAddress){ - .base = TCG_REG_X1, - .index = addr_reg, - .index_ext = addr_type - }; - tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h); - - add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - unsigned a_bits = get_alignment_bits(memop); - if (a_bits) { - tcg_out_test_alignment(s, true, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - if (USE_GUEST_BASE) { - h = (HostAddress){ - .base = TCG_REG_GUEST_BASE, - .index = addr_reg, - .index_ext = addr_type - }; - } else { - h = (HostAddress){ - .base = addr_reg, - .index = TCG_REG_XZR, - .index_ext = TCG_TYPE_I64 - }; - } - tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h); -#endif /* CONFIG_SOFTMMU */ } static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp memop = get_memop(oi); - TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + TCGLabelQemuLdst *ldst; HostAddress h; - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((memop & MO_BSWAP) == 0); + ldst = prepare_host_addr(s, &h, addr_reg, oi, false); + tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr; - - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0); - - h = (HostAddress){ - .base = TCG_REG_X1, - .index = addr_reg, - .index_ext = addr_type - }; - tcg_out_qemu_st_direct(s, memop, data_reg, h); - - add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - unsigned a_bits = get_alignment_bits(memop); - if (a_bits) { - tcg_out_test_alignment(s, false, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - if (USE_GUEST_BASE) { - h = (HostAddress){ - .base = TCG_REG_GUEST_BASE, - .index = addr_reg, - .index_ext = addr_type - }; - } else { - h = (HostAddress){ - .base = addr_reg, - .index = TCG_REG_XZR, - .index_ext = TCG_TYPE_I64 - }; - } - tcg_out_qemu_st_direct(s, memop, data_reg, h); -#endif /* CONFIG_SOFTMMU */ } static const tcg_insn_unit *tb_ret_addr; diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index b6b4ffc546..df514e56fc 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -690,8 +690,8 @@ tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm) tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1); } -static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, - TCGReg rn, int imm8) +static void __attribute__((unused)) +tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8) { tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); } @@ -969,28 +969,16 @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff); } -static void __attribute__((unused)) -tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) -{ - tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); -} - static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn) { /* sxth */ tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn); } -static void tcg_out_ext16u_cond(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn) -{ - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); -} - static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) { - tcg_out_ext16u_cond(s, COND_AL, rd, rn); + /* uxth */ + tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn); } static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) @@ -1382,82 +1370,121 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = { #endif }; -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * argreg is where we want to put this argument, arg is the argument itself. - * Return value is the updated argreg ready for the next call. - * Note that argreg 0..3 is real registers, 4+ on stack. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ -#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ -static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ -{ \ - if (argreg < 4) { \ - MOV_ARG(s, COND_AL, argreg, arg); \ - } else { \ - int ofs = (argreg - 4) * 4; \ - EXT_ARG; \ - tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ - tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ - } \ - return argreg + 1; \ +static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) +{ + /* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */ + return TCG_REG_R14; } -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, - (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond, - (tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u_cond, - (tcg_out_ext16u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) +static const TCGLdstHelperParam ldst_helper_param = { + .ra_gen = ldst_ra_gen, + .ntmp = 1, + .tmp = { TCG_REG_TMP }, +}; -static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, - TCGReg arglo, TCGReg arghi) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - /* 64 bit arguments must go in even/odd register pairs - * and in 8-aligned stack slots. - */ - if (argreg & 1) { - argreg++; + MemOp opc = get_memop(lb->oi); + + if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; } - if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) { - tcg_out_strd_8(s, COND_AL, arglo, - TCG_REG_CALL_STACK, (argreg - 4) * 4); - return argreg + 2; + + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); + + tcg_out_goto(s, COND_AL, lb->raddr); + return true; +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + MemOp opc = get_memop(lb->oi); + + if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + tcg_out_st_helper_args(s, lb, &ldst_helper_param); + + /* Tail-call to the helper, which will return to the fast path. */ + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]); + return true; +} +#else +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + if (TARGET_LONG_BITS == 64) { + /* 64-bit target address is aligned into R2:R3. */ + TCGMovExtend ext[2] = { + { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32, + .src = l->addrlo_reg, + .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, + { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32, + .src = l->addrhi_reg, + .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, + }; + tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP); } else { - argreg = tcg_out_arg_reg32(s, argreg, arglo); - argreg = tcg_out_arg_reg32(s, argreg, arghi); - return argreg; + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg); } + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0); + + /* + * Tail call to the helper, with the return address back inline, + * just for the clarity of the debugging traceback -- the helper + * cannot return. We have used BLNE to arrive here, so LR is + * already set. + */ + tcg_out_goto(s, COND_AL, (const void *) + (l->is_ld ? helper_unaligned_ld : helper_unaligned_st)); + return true; } -#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) - -/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256); - -/* These offsets are built into the LDRD below. */ -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); - -/* Load and compare a TLB entry, leaving the flags set. Returns the register - containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - MemOp opc, int mem_index, bool is_load) +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} +#endif /* SOFTMMU */ + +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + MemOp a_bits = get_alignment_bits(opc); + unsigned a_mask = (1 << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + int mem_index = get_mmuidx(oi); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); int fast_off = TLB_MASK_TABLE_OFS(mem_index); unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; - unsigned a_mask = (1 << get_alignment_bits(opc)) - 1; TCGReg t_addr; + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); /* Extract the tlb index from the address into R0. */ @@ -1527,182 +1554,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0); } - return TCG_REG_R1; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, - MemOpIdx oi, TCGType type, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - tcg_insn_unit *raddr, - tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr; -} - -static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg argreg; - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - - if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { - return false; - } - - argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - - /* Use the canonical unsigned helpers and minimize icache usage. */ - tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); - - if ((opc & MO_SIZE) == MO_64) { - TCGMovExtend ext[2] = { - { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32, - .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32, - .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - }; - tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP); - } else { - tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg, - TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0); - } - - tcg_out_goto(s, COND_AL, lb->raddr); - return true; -} - -static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg argreg, datalo, datahi; - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - - if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { - return false; - } - - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - - datalo = lb->datalo_reg; - datahi = lb->datahi_reg; - switch (opc & MO_SIZE) { - case MO_8: - argreg = tcg_out_arg_reg8(s, argreg, datalo); - break; - case MO_16: - argreg = tcg_out_arg_reg16(s, argreg, datalo); - break; - case MO_32: - default: - argreg = tcg_out_arg_reg32(s, argreg, datalo); - break; - case MO_64: - argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); - break; - } - - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - - /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]); - return true; -} + *h = (HostAddress){ + .cond = COND_AL, + .base = addrlo, + .index = TCG_REG_R1, + .index_scratch = true, + }; #else + if (a_mask) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - - /* We are expecting a_bits to max out at 7, and can easily support 8. */ - tcg_debug_assert(a_mask <= 0xff); - /* tst addr, #mask */ - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); - - /* blne slow_path */ - label->label_ptr[0] = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); - - label->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - -static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) -{ - if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { - return false; + /* We are expecting a_bits to max out at 7 */ + tcg_debug_assert(a_mask <= 0xff); + /* tst addr, #mask */ + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); } - if (TARGET_LONG_BITS == 64) { - /* 64-bit target address is aligned into R2:R3. */ - TCGMovExtend ext[2] = { - { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32, - .src = l->addrlo_reg, - .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32, - .src = l->addrhi_reg, - .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - }; - tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP); - } else { - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg); - } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0); + *h = (HostAddress){ + .cond = COND_AL, + .base = addrlo, + .index = guest_base ? TCG_REG_GUEST_BASE : -1, + .index_scratch = false, + }; +#endif - /* - * Tail call to the helper, with the return address back inline, - * just for the clarity of the debugging traceback -- the helper - * cannot return. We have used BLNE to arrive here, so LR is - * already set. - */ - tcg_out_goto(s, COND_AL, (const void *) - (l->is_ld ? helper_unaligned_ld : helper_unaligned_st)); - return true; + return ldst; } -static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - return tcg_out_fail_alignment(s, l); -} - -static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - return tcg_out_fail_alignment(s, l); -} -#endif /* SOFTMMU */ - static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, HostAddress h) { @@ -1799,37 +1681,28 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - h.cond = COND_AL; - h.base = addrlo; - h.index_scratch = true; - h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1); + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; - /* - * This a conditional BL only to load a pointer within this opcode into - * LR for the slow path. We will not be using the value for a tail call. - */ - tcg_insn_unit *label_ptr = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); + /* + * This a conditional BL only to load a pointer within this + * opcode into LR for the slow path. We will not be using + * the value for a tail call. + */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); - - add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); } - - h.cond = COND_AL; - h.base = addrlo; - h.index = guest_base ? TCG_REG_GUEST_BASE : -1; - h.index_scratch = false; - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, @@ -1891,35 +1764,25 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - h.cond = COND_EQ; - h.base = addrlo; - h.index_scratch = true; - h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0); - tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; - /* The conditional call must come last, as we're going to return here. */ - tcg_insn_unit *label_ptr = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); - - add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - - h.cond = COND_AL; - if (a_bits) { - tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); h.cond = COND_EQ; - } + tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); - h.base = addrlo; - h.index = guest_base ? TCG_REG_GUEST_BASE : -1; - h.index_scratch = false; - tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); -#endif + /* The conditional call is last, as we're going to return here. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); + } } static void tcg_out_epilogue(TCGContext *s); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index aae698121a..a01bfad773 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1802,142 +1802,37 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { [MO_BEUQ] = helper_be_stq_mmu, }; -/* Perform the TLB load and compare. - - Inputs: - ADDRLO and ADDRHI contain the low and high part of the address. - - MEM_INDEX and S_BITS are the memory context and log2 size of the load. - - WHICH is the offset into the CPUTLBEntry structure of the slot to read. - This should be offsetof addr_read or addr_write. - - Outputs: - LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) - positions of the displacements of forward jumps to the TLB miss case. - - Second argument register is loaded with the low part of the address. - In the TLB hit case, it has been adjusted as indicated by the TLB - and so is a host address. In the TLB miss case, it continues to - hold a guest address. - - First argument register is clobbered. */ - -static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, MemOp opc, - tcg_insn_unit **label_ptr, int which) -{ - TCGType ttype = TCG_TYPE_I32; - TCGType tlbtype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0, tlbrexw = 0; - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - unsigned a_mask = (1 << a_bits) - 1; - unsigned s_mask = (1 << s_bits) - 1; - target_ulong tlb_mask; - - if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 64) { - ttype = TCG_TYPE_I64; - trexw = P_REXW; - } - if (TCG_TYPE_PTR == TCG_TYPE_I64) { - hrexw = P_REXW; - if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) { - tlbtype = TCG_TYPE_I64; - tlbrexw = P_REXW; - } - } - } - - tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); - tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, - TLB_MASK_TABLE_OFS(mem_index) + - offsetof(CPUTLBDescFast, mask)); - - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, - TLB_MASK_TABLE_OFS(mem_index) + - offsetof(CPUTLBDescFast, table)); - - /* If the required alignment is at least as large as the access, simply - copy the address and mask. For lesser alignments, check that we don't - cross pages for the complete access. */ - if (a_bits >= s_bits) { - tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); - } else { - tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, - addrlo, s_mask - a_mask); - } - tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; - tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); - - /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, - TCG_REG_L1, TCG_REG_L0, which); - - /* Prepare for both the fast path add of the tlb addend, and the slow - path function argument setup. */ - tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[0] = s->code_ptr; - s->code_ptr += 4; - - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - /* cmp 4(TCG_REG_L0), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[1] = s->code_ptr; - s->code_ptr += 4; - } - - /* TLB Hit. */ - - /* add addend(TCG_REG_L0), TCG_REG_L1 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0, - offsetof(CPUTLBEntry, addend)); -} - /* - * Record the context of a call to the out of line helper code for the slow path - * for a load or store, so that we can later generate the correct helper code + * Because i686 has no register parameters and because x86_64 has xchg + * to handle addr/data register overlap, we have placed all input arguments + * before we need might need a scratch reg. + * + * Even then, a scratch is only needed for l->raddr. Rather than expose + * a general-purpose scratch when we don't actually know it's available, + * use the ra_gen hook to load into RAX if needed. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, - TCGType type, MemOpIdx oi, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - tcg_insn_unit *raddr, - tcg_insn_unit **label_ptr) +#if TCG_TARGET_REG_BITS == 64 +static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) { - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr[0]; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - label->label_ptr[1] = label_ptr[1]; + if (arg < 0) { + arg = TCG_REG_RAX; } + tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr); + return arg; } +static const TCGLdstHelperParam ldst_helper_param = { + .ra_gen = ldst_ra_gen +}; +#else +static const TCGLdstHelperParam ldst_helper_param = { }; +#endif /* * Generate code for the slow path for a load at the end of block */ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(l->oi); tcg_insn_unit **label_ptr = &l->label_ptr[0]; /* resolve label address */ @@ -1946,48 +1841,10 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); } - if (TCG_TARGET_REG_BITS == 32) { - int ofs = 0; - - tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (TARGET_LONG_BITS == 64) { - tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi); - tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], - (uintptr_t)l->raddr); - } - + tcg_out_ld_helper_args(s, l, &ldst_helper_param); tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param); - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - TCGMovExtend ext[2] = { - { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32, - .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32, - .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, - }; - tcg_out_movext2(s, &ext[0], &ext[1], -1); - } else { - tcg_out_movext(s, l->type, l->datalo_reg, - TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX); - } - - /* Jump to the code corresponding to next IR of qemu_st */ tcg_out_jmp(s, l->raddr); return true; } @@ -1997,11 +1854,8 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) */ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; + MemOp opc = get_memop(l->oi); tcg_insn_unit **label_ptr = &l->label_ptr[0]; - TCGReg retaddr; /* resolve label address */ tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); @@ -2009,79 +1863,13 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); } - if (TCG_TARGET_REG_BITS == 32) { - int ofs = 0; + tcg_out_st_helper_args(s, l, &ldst_helper_param); + tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); - tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (TARGET_LONG_BITS == 64) { - tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (s_bits == MO_64) { - tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs); - ofs += 4; - - retaddr = TCG_REG_EAX; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ - tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), - tcg_target_call_iarg_regs[2], l->datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi); - - if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { - retaddr = tcg_target_call_iarg_regs[4]; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - } else { - retaddr = TCG_REG_RAX; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, - TCG_TARGET_CALL_STACK_OFFSET); - } - } - - /* "Tail call" to the helper, with the return address back inline. */ - tcg_out_push(s, retaddr); - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_jmp(s, l->raddr); return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label; - - tcg_out_testi(s, addrlo, a_mask); - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - - label = new_ldst_label(s); - label->is_ld = is_ld; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4); - label->label_ptr[0] = s->code_ptr; - - s->code_ptr += 4; -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { /* resolve label address */ @@ -2159,6 +1947,128 @@ static inline int setup_guest_base_seg(void) #endif /* setup_guest_base_seg */ #endif /* SOFTMMU */ +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned a_mask = (1 << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + TCGType ttype = TCG_TYPE_I32; + TCGType tlbtype = TCG_TYPE_I32; + int trexw = 0, hrexw = 0, tlbrexw = 0; + unsigned mem_index = get_mmuidx(oi); + unsigned s_bits = opc & MO_SIZE; + unsigned s_mask = (1 << s_bits) - 1; + target_ulong tlb_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { + ttype = TCG_TYPE_I64; + trexw = P_REXW; + } + if (TCG_TYPE_PTR == TCG_TYPE_I64) { + hrexw = P_REXW; + if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) { + tlbtype = TCG_TYPE_I64; + tlbrexw = P_REXW; + } + } + } + + tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, + TLB_MASK_TABLE_OFS(mem_index) + + offsetof(CPUTLBDescFast, mask)); + + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, + TLB_MASK_TABLE_OFS(mem_index) + + offsetof(CPUTLBDescFast, table)); + + /* + * If the required alignment is at least as large as the access, simply + * copy the address and mask. For lesser alignments, check that we don't + * cross pages for the complete access. + */ + if (a_bits >= s_bits) { + tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); + } else { + tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, + addrlo, s_mask - a_mask); + } + tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; + tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); + + /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, + TCG_REG_L1, TCG_REG_L0, cmp_ofs); + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + ldst->label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* cmp 4(TCG_REG_L0), addrhi */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4); + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + ldst->label_ptr[1] = s->code_ptr; + s->code_ptr += 4; + } + + /* TLB Hit. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, + offsetof(CPUTLBEntry, addend)); + + *h = (HostAddress) { + .base = addrlo, + .index = TCG_REG_L0, + }; +#else + if (a_bits) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + tcg_out_testi(s, addrlo, a_mask); + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + ldst->label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + } + + *h = x86_guest_base; + h->base = addrlo; +#endif + + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, HostAddress h, TCGType type, MemOp memop) { @@ -2258,35 +2168,18 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); + tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi)); - tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc, - label_ptr, offsetof(CPUTLBEntry, addr_read)); - - /* TLB Hit. */ - h.base = TCG_REG_L1; - h.index = -1; - h.ofs = 0; - h.seg = 0; - tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc); - - /* Record the current context of a load into ldst label */ - add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - - h = x86_guest_base; - h.base = addrlo; - tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc); -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, @@ -2345,36 +2238,18 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); + tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi)); - tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc, - label_ptr, offsetof(CPUTLBEntry, addr_write)); - - /* TLB Hit. */ - h.base = TCG_REG_L1; - h.index = -1; - h.ofs = 0; - h.seg = 0; - tcg_out_qemu_st_direct(s, datalo, datahi, h, opc); - - /* Record the current context of a store into ldst label */ - add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - - h = x86_guest_base; - h.base = addrlo; - - tcg_out_qemu_st_direct(s, datalo, datahi, h, opc); -#endif } static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index 172c107289..c2bde44613 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -17,9 +17,7 @@ C_O0_I1(r) C_O0_I2(rZ, r) C_O0_I2(rZ, rZ) -C_O0_I2(LZ, L) C_O1_I1(r, r) -C_O1_I1(r, L) C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h index 541ff47fa9..6e9ccca3ad 100644 --- a/tcg/loongarch64/tcg-target-con-str.h +++ b/tcg/loongarch64/tcg-target-con-str.h @@ -14,7 +14,6 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* * Define constraint letters for constants: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 6a87a5e5a3..83fa45c802 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -133,18 +133,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_C12 0x1000 #define TCG_CT_CONST_WSZ 0x2000 -#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) -/* - * For softmmu, we need to avoid conflicts with the first 5 - * argument registers to call the helper. Some of these are - * also used for the tlb lookup. - */ -#ifdef CONFIG_SOFTMMU -#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5) -#else -#define SOFTMMU_RESERVE_REGS 0 -#endif - +#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) { @@ -818,156 +807,45 @@ static void * const qemu_st_helpers[4] = { [MO_64] = helper_le_stq_mmu, }; -/* We expect to use a 12-bit negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); - static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) { tcg_out_opc_b(s, 0); return reloc_br_sd10k16(s->code_ptr - 1, target); } -/* - * Emits common code for TLB addend lookup, that eventually loads the - * addend in TCG_REG_TMP2. - */ -static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi, - tcg_insn_unit **label_ptr, bool is_load) -{ - MemOp opc = get_memop(oi); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - tcg_target_long compare_mask; - int mem_index = get_mmuidx(oi); - int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); - int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); - int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); - - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); - - tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); - tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); - - /* Load the tlb comparator and the addend. */ - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, - is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, - offsetof(CPUTLBEntry, addend)); - - /* We don't support unaligned accesses. */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - /* Clear the non-page, non-alignment bits from the address. */ - compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1); - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); - tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl); - - /* Compare masked address with the TLB entry. */ - label_ptr[0] = s->code_ptr; - tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); - - /* TLB Hit - addend in TCG_REG_TMP2, ready for use. */ -} - -static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, - TCGType type, - TCGReg datalo, TCGReg addrlo, - void *raddr, tcg_insn_unit **label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = datalo; - label->datahi_reg = 0; /* unused */ - label->addrlo_reg = addrlo; - label->addrhi_reg = 0; /* unused */ - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr[0]; -} +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 1, .tmp = { TCG_REG_TMP0 } +}; static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp size = opc & MO_SIZE; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - /* call load helper */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr); - - tcg_out_call_int(s, qemu_ld_helpers[size], false); - - tcg_out_movext(s, l->type, l->datalo_reg, - TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_A0); + tcg_out_ld_helper_args(s, l, &ldst_helper_param); + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false); + tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param); return tcg_out_goto(s, l->raddr); } static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp size = opc & MO_SIZE; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - /* call store helper */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg); - tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I32 : TCG_TYPE_I32, TCG_REG_A2, - l->type, size, l->datalo_reg); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr); - - tcg_out_call_int(s, qemu_st_helpers[size], false); - + tcg_out_st_helper_args(s, l, &ldst_helper_param); + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); return tcg_out_goto(s, l->raddr); } #else - -/* - * Alignment helpers for user-mode emulation - */ - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, - unsigned a_bits) -{ - TCGLabelQemuLdst *l = new_ldst_label(s); - - l->is_ld = is_ld; - l->addrlo_reg = addr_reg; - - /* - * Without micro-architecture details, we don't know which of bstrpick or - * andi is faster, so use bstrpick as it's not constrained by imm field - * width. (Not to say alignments >= 2^12 are going to happen any time - * soon, though) - */ - tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); - - l->label_ptr[0] = s->code_ptr; - tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); - - l->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { /* resolve label address */ @@ -997,27 +875,102 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) #endif /* CONFIG_SOFTMMU */ -/* - * `ext32u` the address register into the temp register given, - * if target is 32-bit, no-op otherwise. - * - * Returns the address register ready for use with TLB addend. - */ -static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s, - TCGReg addr, TCGReg tmp) -{ - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, tmp, addr); - return tmp; - } - return addr; -} - typedef struct { TCGReg base; TCGReg index; } HostAddress; +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + int mem_index = get_mmuidx(oi); + int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); + int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); + int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); + tcg_target_long compare_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); + + tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); + tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); + + /* Load the tlb comparator and the addend. */ + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, + is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, + offsetof(CPUTLBEntry, addend)); + + /* We don't support unaligned accesses. */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + /* Clear the non-page, non-alignment bits from the address. */ + compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1); + tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); + tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg); + + /* Compare masked address with the TLB entry. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0); + + h->index = TCG_REG_TMP2; +#else + if (a_bits) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* + * Without micro-architecture details, we don't know which of + * bstrpick or andi is faster, so use bstrpick as it's not + * constrained by imm field width. Not to say alignments >= 2^12 + * are going to happen any time soon. + */ + tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); + + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); + } + + h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; +#endif + + if (TARGET_LONG_BITS == 32) { + h->base = TCG_REG_TMP0; + tcg_out_ext32u(s, h->base, addr_reg); + } else { + h->base = addr_reg; + } + + return ldst; +} + static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, TCGReg rd, HostAddress h) { @@ -1057,29 +1010,17 @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr[1]; + ldst = prepare_host_addr(s, &h, addr_reg, oi, true); + tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h); - tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1); - h.index = TCG_REG_TMP2; -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; -#endif - - h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0); - tcg_out_qemu_ld_indexed(s, opc, data_type, data_reg, h); - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#endif } static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, @@ -1109,29 +1050,17 @@ static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr[1]; + ldst = prepare_host_addr(s, &h, addr_reg, oi, false); + tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h); - tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0); - h.index = TCG_REG_TMP2; -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, false, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; -#endif - - h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0); - tcg_out_qemu_st_indexed(s, opc, data_reg, h); - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#endif } /* @@ -1601,16 +1530,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_st32_i64: case INDEX_op_st_i32: case INDEX_op_st_i64: + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: return C_O0_I2(rZ, r); case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(rZ, rZ); - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - return C_O0_I2(LZ, L); - case INDEX_op_ext8s_i32: case INDEX_op_ext8s_i64: case INDEX_op_ext8u_i32: @@ -1646,11 +1573,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_ld32u_i64: case INDEX_op_ld_i32: case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, L); + return C_O1_I1(r, r); case INDEX_op_andc_i32: case INDEX_op_andc_i64: diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index fe3e868a2f..864034f468 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -12,15 +12,13 @@ C_O0_I1(r) C_O0_I2(rZ, r) C_O0_I2(rZ, rZ) -C_O0_I2(SZ, S) -C_O0_I3(SZ, S, S) -C_O0_I3(SZ, SZ, S) +C_O0_I3(rZ, r, r) +C_O0_I3(rZ, rZ, r) C_O0_I4(rZ, rZ, rZ, rZ) -C_O0_I4(SZ, SZ, S, S) -C_O1_I1(r, L) +C_O0_I4(rZ, rZ, r, r) C_O1_I1(r, r) C_O1_I2(r, 0, rZ) -C_O1_I2(r, L, L) +C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_O1_I2(r, r, rIK) @@ -30,7 +28,6 @@ C_O1_I2(r, rZ, rN) C_O1_I2(r, rZ, rZ) C_O1_I4(r, rZ, rZ, rZ, 0) C_O1_I4(r, rZ, rZ, rZ, rZ) -C_O2_I1(r, r, L) -C_O2_I2(r, r, L, L) +C_O2_I1(r, r, r) C_O2_I2(r, r, r, r) C_O2_I4(r, r, rZ, rZ, rN, rN) diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h index e4b2965c72..413c280a7a 100644 --- a/tcg/mips/tcg-target-con-str.h +++ b/tcg/mips/tcg-target-con-str.h @@ -9,8 +9,6 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('L', ALL_QLOAD_REGS) -REGS('S', ALL_QSTORE_REGS) /* * Define constraint letters for constants: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ef8350e9cd..5ad9867882 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -176,20 +176,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, #define TCG_CT_CONST_WSZ 0x2000 /* word size */ #define ALL_GENERAL_REGS 0xffffffffu -#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0)) - -#ifdef CONFIG_SOFTMMU -#define ALL_QLOAD_REGS \ - (NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2)) -#define ALL_QSTORE_REGS \ - (NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS \ - ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3) \ - : (1 << TCG_REG_A1))) -#else -#define ALL_QLOAD_REGS NOA0_REGS -#define ALL_QSTORE_REGS NOA0_REGS -#endif - static bool is_p2m1(tcg_target_long val) { @@ -370,6 +356,8 @@ typedef enum { ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU, ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32 ? OPC_SRL : OPC_DSRL, + ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32 + ? OPC_ADDIU : OPC_DADDIU, } MIPSInsn; /* @@ -1088,220 +1076,46 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, } #if defined(CONFIG_SOFTMMU) -static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = { +static void * const qemu_ld_helpers[MO_SSIZE + 1] = { [MO_UB] = helper_ret_ldub_mmu, [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEUQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEUQ] = helper_be_ldq_mmu, -#if TCG_TARGET_REG_BITS == 64 - [MO_LESL] = helper_le_ldsl_mmu, - [MO_BESL] = helper_be_ldsl_mmu, +#if HOST_BIG_ENDIAN + [MO_UW] = helper_be_lduw_mmu, + [MO_SW] = helper_be_ldsw_mmu, + [MO_UL] = helper_be_ldul_mmu, + [MO_SL] = helper_be_ldsl_mmu, + [MO_UQ] = helper_be_ldq_mmu, +#else + [MO_UW] = helper_le_lduw_mmu, + [MO_SW] = helper_le_ldsw_mmu, + [MO_UL] = helper_le_ldul_mmu, + [MO_UQ] = helper_le_ldq_mmu, + [MO_SL] = helper_le_ldsl_mmu, #endif }; -static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { +static void * const qemu_st_helpers[MO_SIZE + 1] = { [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEUQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEUQ] = helper_be_stq_mmu, +#if HOST_BIG_ENDIAN + [MO_UW] = helper_be_stw_mmu, + [MO_UL] = helper_be_stl_mmu, + [MO_UQ] = helper_be_stq_mmu, +#else + [MO_UW] = helper_le_stw_mmu, + [MO_UL] = helper_le_stl_mmu, + [MO_UQ] = helper_le_stq_mmu, +#endif }; -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * I is where we want to put this argument, and is updated and returned - * for the next call. ARG is the argument itself. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ - -static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) -{ - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); - } else { - /* For N32 and N64, the initial offset is different. But there - we also have 8 argument register so we don't run out here. */ - tcg_debug_assert(TCG_TARGET_REG_BITS == 32); - tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); - } - return i + 1; -} - -static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_ext8u(s, tmp, arg); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) -{ - TCGReg tmp = TCG_TMP0; - if (arg == 0) { - tmp = TCG_REG_ZERO; - } else { - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); - } - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) -{ - tcg_debug_assert(TCG_TARGET_REG_BITS == 32); - i = (i + 1) & ~1; - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); - return i; -} - -/* We expect to use a 16-bit negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); - -/* - * Perform the tlb comparison operation. - * The complete host address is placed in BASE. - * Clobbers TMP0, TMP1, TMP2, TMP3. - */ -static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, - TCGReg addrh, MemOpIdx oi, - tcg_insn_unit *label_ptr[2], bool is_load) -{ - MemOp opc = get_memop(oi); - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - unsigned a_mask = (1 << a_bits) - 1; - unsigned s_mask = (1 << s_bits) - 1; - int mem_index = get_mmuidx(oi); - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); - int add_off = offsetof(CPUTLBEntry, addend); - int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - target_ulong tlb_mask; - - /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off); - - /* Extract the TLB index from the address into TMP3. */ - tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrl, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0); - - /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */ - tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1); - - /* Load the (low-half) tlb comparator. */ - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); - } else { - tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD - : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW), - TCG_TMP0, TCG_TMP3, cmp_off); - } - - /* Zero extend a 32-bit guest address for a 64-bit host. */ - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, base, addrl); - addrl = base; - } - - /* - * Mask the page bits, keeping the alignment bits to compare against. - * For unaligned accesses, compare against the end of the access to - * verify that it does not cross a page boundary. - */ - tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; - tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask); - if (a_mask >= s_mask) { - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); - } else { - tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask); - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2); - } - - if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { - /* Load the tlb addend for the fast path. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); - } - - label_ptr[0] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); - - /* Load and test the high half tlb comparator. */ - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - /* delay slot */ - tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); - - /* Load the tlb addend for the fast path. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); - - label_ptr[1] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0); - } - - /* delay slot */ - tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl); -} - -static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, - TCGType ext, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - void *raddr, tcg_insn_unit *label_ptr[2]) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = ext; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr[0]; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - label->label_ptr[1] = label_ptr[1]; - } -} +/* We have four temps, we might as well expose three of them. */ +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 } +}; static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - TCGReg v0; - int i; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_pc16(l->label_ptr[0], tgt_rx) @@ -1310,29 +1124,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return false; } - i = 1; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - i = tcg_out_call_iarg_imm(s, i, oi); - i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); - tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_ld_helper_args(s, l, &ldst_helper_param); - v0 = l->datalo_reg; - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - /* We eliminated V0 from the possible output registers, so it - cannot be clobbered here. So we must move V1 first. */ - if (MIPS_BE) { - tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); - v0 = l->datahi_reg; - } else { - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); - } - } + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false); + /* delay slot */ + tcg_out_nop(s); + + tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); if (!reloc_pc16(s->code_ptr - 1, l->raddr)) { @@ -1340,22 +1138,14 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* delay slot */ - if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) { - /* we always sign-extend 32-bit loads */ - tcg_out_ext32s(s, v0, TCG_REG_V0); - } else { - tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO); - } + tcg_out_nop(s); return true; } static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; - int i; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_pc16(l->label_ptr[0], tgt_rx) @@ -1364,71 +1154,23 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return false; } - i = 1; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - switch (s_bits) { - case MO_8: - i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); - break; - case MO_16: - i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); - break; - case MO_32: - i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); - break; - case MO_64: - if (TCG_TARGET_REG_BITS == 32) { - i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); - } - break; - default: - g_assert_not_reached(); - } - i = tcg_out_call_iarg_imm(s, i, oi); + tcg_out_st_helper_args(s, l, &ldst_helper_param); - /* Tail call to the store helper. Thus force the return address - computation to take place in the return address register. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); - i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); - tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_nop(s); + + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); + if (!reloc_pc16(s->code_ptr - 1, l->raddr)) { + return false; + } + + /* delay slot */ + tcg_out_nop(s); return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *l = new_ldst_label(s); - - l->is_ld = is_ld; - l->addrlo_reg = addrlo; - l->addrhi_reg = addrhi; - - /* We are expecting a_bits to max out at 7, much lower than ANDI. */ - tcg_debug_assert(a_bits < 16); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask); - - l->label_ptr[0] = s->code_ptr; - if (use_mips32r6_instructions) { - tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0); - } else { - tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO); - tcg_out_nop(s); - } - - l->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { void *target; @@ -1478,55 +1220,166 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } #endif /* SOFTMMU */ +typedef struct { + TCGReg base; + MemOp align; +} HostAddress; + +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned s_bits = opc & MO_SIZE; + unsigned a_mask = (1 << a_bits) - 1; + TCGReg base; + +#ifdef CONFIG_SOFTMMU + unsigned s_mask = (1 << s_bits) - 1; + int mem_index = get_mmuidx(oi); + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); + int table_off = fast_off + offsetof(CPUTLBDescFast, table); + int add_off = offsetof(CPUTLBEntry, addend); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off); + + /* Extract the TLB index from the address into TMP3. */ + tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0); + + /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */ + tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1); + + /* Load the (low-half) tlb comparator. */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); + } else { + tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off); + } + + if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + /* Load the tlb addend for the fast path. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off); + } + + /* + * Mask the page bits, keeping the alignment bits to compare against. + * For unaligned accesses, compare against the end of the access to + * verify that it does not cross a page boundary. + */ + tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask); + if (a_mask < s_mask) { + tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2); + } else { + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo); + } + + /* Zero extend a 32-bit guest address for a 64-bit host. */ + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_TMP2, addrlo); + addrlo = TCG_TMP2; + } + + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); + + /* Load and test the high half tlb comparator. */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + /* delay slot */ + tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); + + /* Load the tlb addend for the fast path. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off); + + ldst->label_ptr[1] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0); + } + + /* delay slot */ + base = TCG_TMP3; + tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo); +#else + if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) { + ldst = new_ldst_label(s); + + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask); + + ldst->label_ptr[0] = s->code_ptr; + if (use_mips32r6_instructions) { + tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0); + } else { + tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO); + tcg_out_nop(s); + } + } + + base = addrlo; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_A0, base); + base = TCG_REG_A0; + } + if (guest_base) { + if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base); + } else { + tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base, + TCG_GUEST_BASE_REG); + } + base = TCG_REG_A0; + } +#endif + + h->base = base; + h->align = a_bits; + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, TCGType type) { - switch (opc & (MO_SSIZE | MO_BSWAP)) { + switch (opc & MO_SSIZE) { case MO_UB: tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); break; case MO_SB: tcg_out_opc_imm(s, OPC_LB, lo, base, 0); break; - case MO_UW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ); - break; case MO_UW: tcg_out_opc_imm(s, OPC_LHU, lo, base, 0); break; - case MO_SW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS); - break; case MO_SW: tcg_out_opc_imm(s, OPC_LH, lo, base, 0); break; - case MO_UL | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) { - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); - tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ); - } else { - tcg_out_bswap_subr(s, bswap32u_addr); - /* delay slot */ - tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0); - tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3); - } - break; - } - /* FALLTHRU */ - case MO_SL | MO_BSWAP: - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, OPC_LW, lo, base, 0); - tcg_out_bswap32(s, lo, lo, 0); - } else { - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0); - tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3); - } - break; case MO_UL: if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) { tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); @@ -1536,35 +1389,6 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, case MO_SL: tcg_out_opc_imm(s, OPC_LW, lo, base, 0); break; - case MO_UQ | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64) { - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, OPC_LD, lo, base, 0); - tcg_out_bswap64(s, lo, lo); - } else { - tcg_out_bswap_subr(s, bswap64_addr); - /* delay slot */ - tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0); - tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3); - } - } else if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1); - tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16); - tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16); - } else { - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4); - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3); - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3); - } - break; case MO_UQ: /* Prefer to load from offset 0 first, but allow for overlap. */ if (TCG_TARGET_REG_BITS == 64) { @@ -1589,25 +1413,20 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL; const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR; const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL; + bool sgn = opc & MO_SIGN; - bool sgn = (opc & MO_SIGN); - - switch (opc & (MO_SSIZE | MO_BSWAP)) { - case MO_SW | MO_BE: - case MO_UW | MO_BE: - tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_LBU, lo, base, 1); - if (use_mips32r2_instructions) { - tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); - } else { - tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); - tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1); - } - break; - - case MO_SW | MO_LE: - case MO_UW | MO_LE: - if (use_mips32r2_instructions && lo != base) { + switch (opc & MO_SIZE) { + case MO_16: + if (HOST_BIG_ENDIAN) { + tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, OPC_LBU, lo, base, 1); + if (use_mips32r2_instructions) { + tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); + } else { + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, lo, lo, TCG_TMP0); + } + } else if (use_mips32r2_instructions && lo != base) { tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 1); tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); @@ -1619,8 +1438,7 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } break; - case MO_SL: - case MO_UL: + case MO_32: tcg_out_opc_imm(s, lw1, lo, base, 0); tcg_out_opc_imm(s, lw2, lo, base, 3); if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn) { @@ -1628,28 +1446,7 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } break; - case MO_UL | MO_BSWAP: - case MO_SL | MO_BSWAP: - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, lw1, lo, base, 0); - tcg_out_opc_imm(s, lw2, lo, base, 3); - tcg_out_bswap32(s, lo, lo, - TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 - ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0); - } else { - const tcg_insn_unit *subr = - (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn - ? bswap32u_addr : bswap32_addr); - - tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0); - tcg_out_bswap_subr(s, subr); - /* delay slot */ - tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3); - tcg_out_mov(s, type, lo, TCG_TMP3); - } - break; - - case MO_UQ: + case MO_64: if (TCG_TARGET_REG_BITS == 64) { tcg_out_opc_imm(s, ld1, lo, base, 0); tcg_out_opc_imm(s, ld2, lo, base, 7); @@ -1661,42 +1458,6 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } break; - case MO_UQ | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64) { - if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, ld1, lo, base, 0); - tcg_out_opc_imm(s, ld2, lo, base, 7); - tcg_out_bswap64(s, lo, lo); - } else { - tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0); - tcg_out_bswap_subr(s, bswap64_addr); - /* delay slot */ - tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7); - tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3); - } - } else if (use_mips32r2_instructions) { - tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0); - tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3); - tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0); - tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1); - tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16); - tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16); - } else { - tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0); - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3); - tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0); - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3); - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot */ - tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3); - tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3); - } - break; - default: g_assert_not_reached(); } @@ -1707,106 +1468,38 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - TCGReg base; + TCGLabelQemuLdst *ldst; + HostAddress h; - /* - * R6 removes the left/right instructions but requires the - * system to support misaligned memory accesses. - */ -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); - base = TCG_REG_A0; - tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 1); - if (use_mips32r6_instructions || a_bits >= s_bits) { - tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type); + if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) { + tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type); } else { - tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type); + tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type); } - add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - base = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_A0, base); - base = TCG_REG_A0; + + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - if (guest_base) { - if (guest_base == (int16_t)guest_base) { - tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base); - } else { - tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base, - TCG_GUEST_BASE_REG); - } - base = TCG_REG_A0; - } - if (use_mips32r6_instructions) { - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type); - } else { - if (a_bits && a_bits != s_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - if (a_bits >= s_bits) { - tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type); - } else { - tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type); - } - } -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { - /* Don't clutter the code below with checks to avoid bswapping ZERO. */ - if ((lo | hi) == 0) { - opc &= ~MO_BSWAP; - } - - switch (opc & (MO_SIZE | MO_BSWAP)) { + switch (opc & MO_SIZE) { case MO_8: tcg_out_opc_imm(s, OPC_SB, lo, base, 0); break; - - case MO_16 | MO_BSWAP: - tcg_out_bswap16(s, TCG_TMP1, lo, 0); - lo = TCG_TMP1; - /* FALLTHRU */ case MO_16: tcg_out_opc_imm(s, OPC_SH, lo, base, 0); break; - - case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP3, lo, 0); - lo = TCG_TMP3; - /* FALLTHRU */ case MO_32: tcg_out_opc_imm(s, OPC_SW, lo, base, 0); break; - - case MO_64 | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_bswap64(s, TCG_TMP3, lo); - tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0); - } else if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo); - tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16); - tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4); - } else { - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0); - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4); - } - break; case MO_64: if (TCG_TARGET_REG_BITS == 64) { tcg_out_opc_imm(s, OPC_SD, lo, base, 0); @@ -1815,7 +1508,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4); } break; - default: g_assert_not_reached(); } @@ -1829,54 +1521,18 @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR; const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL; - /* Don't clutter the code below with checks to avoid bswapping ZERO. */ - if ((lo | hi) == 0) { - opc &= ~MO_BSWAP; - } - - switch (opc & (MO_SIZE | MO_BSWAP)) { - case MO_16 | MO_BE: + switch (opc & MO_SIZE) { + case MO_16: tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8); - tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0); - tcg_out_opc_imm(s, OPC_SB, lo, base, 1); + tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? TCG_TMP0 : lo, base, 0); + tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? lo : TCG_TMP0, base, 1); break; - case MO_16 | MO_LE: - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8); - tcg_out_opc_imm(s, OPC_SB, lo, base, 0); - tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1); - break; - - case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP3, lo, 0); - lo = TCG_TMP3; - /* fall through */ case MO_32: tcg_out_opc_imm(s, sw1, lo, base, 0); tcg_out_opc_imm(s, sw2, lo, base, 3); break; - case MO_64 | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_bswap64(s, TCG_TMP3, lo); - lo = TCG_TMP3; - } else if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo); - tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi); - tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16); - tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16); - hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1; - lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0; - } else { - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0); - tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0 + 0); - tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 0 + 3); - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0); - tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4 + 0); - tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 4 + 3); - break; - } - /* fall through */ case MO_64: if (TCG_TARGET_REG_BITS == 64) { tcg_out_opc_imm(s, sd1, lo, base, 0); @@ -1899,57 +1555,23 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); - unsigned a_bits = get_alignment_bits(opc); - unsigned s_bits = opc & MO_SIZE; - TCGReg base; + TCGLabelQemuLdst *ldst; + HostAddress h; - /* - * R6 removes the left/right instructions but requires the - * system to support misaligned memory accesses. - */ -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); - base = TCG_REG_A0; - tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 0); - if (use_mips32r6_instructions || a_bits >= s_bits) { - tcg_out_qemu_st_direct(s, datalo, datahi, base, opc); + if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) { + tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc); } else { - tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc); + tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc); } - add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - base = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_A0, base); - base = TCG_REG_A0; + + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - if (guest_base) { - if (guest_base == (int16_t)guest_base) { - tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base); - } else { - tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base, - TCG_GUEST_BASE_REG); - } - base = TCG_REG_A0; - } - if (use_mips32r6_instructions) { - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - tcg_out_qemu_st_direct(s, datalo, datahi, base, opc); - } else { - if (a_bits && a_bits != s_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - if (a_bits >= s_bits) { - tcg_out_qemu_st_direct(s, datalo, datahi, base, opc); - } else { - tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc); - } - } -#endif } static void tcg_out_mb(TCGContext *s, TCGArg a0) @@ -2596,18 +2218,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 - ? C_O1_I1(r, L) : C_O1_I2(r, L, L)); + ? C_O1_I1(r, r) : C_O1_I2(r, r, r)); case INDEX_op_qemu_st_i32: return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 - ? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S)); + ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r)); case INDEX_op_qemu_ld_i64: - return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) - : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L) - : C_O2_I2(r, r, L, L)); + return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) + : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r) + : C_O2_I2(r, r, r, r)); case INDEX_op_qemu_st_i64: - return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S) - : TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S) - : C_O0_I4(SZ, SZ, S, S)); + return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) + : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r) + : C_O0_I4(rZ, rZ, r, r)); default: g_assert_not_reached(); diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 2431fc5353..42bd7fff01 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -204,8 +204,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */ #endif -#define TCG_TARGET_DEFAULT_MO (0) -#define TCG_TARGET_HAS_MEMORY_BSWAP 1 +#define TCG_TARGET_DEFAULT_MO 0 +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index a1a345883d..f206b29205 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -12,18 +12,15 @@ C_O0_I1(r) C_O0_I2(r, r) C_O0_I2(r, ri) -C_O0_I2(S, S) C_O0_I2(v, r) -C_O0_I3(S, S, S) +C_O0_I3(r, r, r) C_O0_I4(r, r, ri, ri) -C_O0_I4(S, S, S, S) -C_O1_I1(r, L) +C_O0_I4(r, r, r, r) C_O1_I1(r, r) C_O1_I1(v, r) C_O1_I1(v, v) C_O1_I1(v, vr) C_O1_I2(r, 0, rZ) -C_O1_I2(r, L, L) C_O1_I2(r, rI, ri) C_O1_I2(r, rI, rT) C_O1_I2(r, r, r) @@ -36,7 +33,7 @@ C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) C_O1_I4(r, r, ri, rZ, rZ) C_O1_I4(r, r, r, ri, ri) -C_O2_I1(L, L, L) -C_O2_I2(L, L, L, L) +C_O2_I1(r, r, r) +C_O2_I2(r, r, r, r) C_O2_I4(r, r, rI, rZM, r, r) C_O2_I4(r, r, r, r, rI, rZM) diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h index 298ca20d5b..094613cbcb 100644 --- a/tcg/ppc/tcg-target-con-str.h +++ b/tcg/ppc/tcg-target-con-str.h @@ -10,19 +10,12 @@ */ REGS('r', ALL_GENERAL_REGS) REGS('v', ALL_VECTOR_REGS) -REGS('A', 1u << TCG_REG_R3) -REGS('B', 1u << TCG_REG_R4) -REGS('C', 1u << TCG_REG_R5) -REGS('D', 1u << TCG_REG_R6) -REGS('L', ALL_QLOAD_REGS) -REGS('S', ALL_QSTORE_REGS) /* * Define constraint letters for constants: * CONST(letter, TCG_CT_CONST_* bit set) */ CONST('I', TCG_CT_CONST_S16) -CONST('J', TCG_CT_CONST_U16) CONST('M', TCG_CT_CONST_MONE) CONST('T', TCG_CT_CONST_S32) CONST('U', TCG_CT_CONST_U32) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index cd473deb36..29bfbfcc61 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -68,6 +68,7 @@ #else # define TCG_REG_TMP1 TCG_REG_R12 #endif +#define TCG_REG_TMP2 TCG_REG_R11 #define TCG_VEC_TMP1 TCG_REG_V0 #define TCG_VEC_TMP2 TCG_REG_V1 @@ -82,7 +83,6 @@ #define SZR (TCG_TARGET_REG_BITS / 8) #define TCG_CT_CONST_S16 0x100 -#define TCG_CT_CONST_U16 0x200 #define TCG_CT_CONST_S32 0x400 #define TCG_CT_CONST_U32 0x800 #define TCG_CT_CONST_ZERO 0x1000 @@ -92,18 +92,6 @@ #define ALL_GENERAL_REGS 0xffffffffu #define ALL_VECTOR_REGS 0xffffffff00000000ull -#ifdef CONFIG_SOFTMMU -#define ALL_QLOAD_REGS \ - (ALL_GENERAL_REGS & \ - ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5))) -#define ALL_QSTORE_REGS \ - (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \ - (1 << TCG_REG_R5) | (1 << TCG_REG_R6))) -#else -#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3)) -#define ALL_QSTORE_REGS ALL_QLOAD_REGS -#endif - TCGPowerISA have_isa; static bool have_isel; bool have_altivec; @@ -281,8 +269,6 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; - } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { - return 1; } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { return 1; } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { @@ -2003,178 +1989,36 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { [MO_BEUQ] = helper_be_stq_mmu, }; -/* We expect to use a 16-bit negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); - -/* Perform the TLB load and compare. Places the result of the comparison - in CR7, loads the addend of the TLB into R3, and returns the register - containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc, - TCGReg addrlo, TCGReg addrhi, - int mem_index, bool is_read) +static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) { - int cmp_off - = (is_read - ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - - /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off); - - /* Extract the page index, shifted into place for tlb index. */ - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_shri32(s, TCG_REG_TMP1, addrlo, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - } else { - tcg_out_shri64(s, TCG_REG_TMP1, addrlo, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + if (arg < 0) { + arg = TCG_REG_TMP1; } - tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1)); - - /* Load the TLB comparator. */ - if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { - uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32 - ? LWZUX : LDUX); - tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4)); - } else { - tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4)); - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); - } else { - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); - } - } - - /* Load the TLB addend for use on the fast path. Do this asap - to minimize any load use delay. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, - offsetof(CPUTLBEntry, addend)); - - /* Clear the non-page, non-alignment bits from the address */ - if (TCG_TARGET_REG_BITS == 32) { - /* We don't support unaligned accesses on 32-bits. - * Preserve the bottom bits and thus trigger a comparison - * failure on unaligned accesses. - */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, - (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); - } else { - TCGReg t = addrlo; - - /* If the access is unaligned, we need to make sure we fail if we - * cross a page boundary. The trick is to add the access size-1 - * to the address before masking the low bits. That will make the - * address overflow to the next page if we cross a page boundary, - * which will then force a mismatch of the TLB compare. - */ - if (a_bits < s_bits) { - unsigned a_mask = (1 << a_bits) - 1; - unsigned s_mask = (1 << s_bits) - 1; - tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); - t = TCG_REG_R0; - } - - /* Mask the address for the requested alignment. */ - if (TARGET_LONG_BITS == 32) { - tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, - (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); - /* Zero-extend the address for use in the final address. */ - tcg_out_ext32u(s, TCG_REG_R4, addrlo); - addrlo = TCG_REG_R4; - } else if (a_bits == 0) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS); - } else { - tcg_out_rld(s, RLDICL, TCG_REG_R0, t, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); - } - } - - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, - 0, 7, TCG_TYPE_I32); - tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); - tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); - } else { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, - 0, 7, TCG_TYPE_TL); - } - - return addrlo; + tcg_out32(s, MFSPR | RT(arg) | LR); + return arg; } -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, - TCGType type, MemOpIdx oi, - TCGReg datalo_reg, TCGReg datahi_reg, - TCGReg addrlo_reg, TCGReg addrhi_reg, - tcg_insn_unit *raddr, tcg_insn_unit *lptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->type = type; - label->oi = oi; - label->datalo_reg = datalo_reg; - label->datahi_reg = datahi_reg; - label->addrlo_reg = addrlo_reg; - label->addrhi_reg = addrhi_reg; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = lptr; -} +/* + * For the purposes of ppc32 sorting 4 input registers into 4 argument + * registers, there is an outside chance we would require 3 temps. + */ +static const TCGLdstHelperParam ldst_helper_param = { + .ra_gen = ldst_ra_gen, + .ntmp = 3, + .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 } +}; static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - TCGReg hi, lo, arg = TCG_REG_R3; + MemOp opc = get_memop(lb->oi); if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); - - lo = lb->addrlo_reg; - hi = lb->addrhi_reg; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); - } - - tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); - tcg_out32(s, MFSPR | RT(arg) | LR); - + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - lo = lb->datalo_reg; - hi = lb->datahi_reg; - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); - tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); - } else { - tcg_out_movext(s, lb->type, lo, - TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_R3); - } + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); tcg_out_b(s, 0, lb->raddr); return true; @@ -2182,70 +2026,19 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; - TCGReg hi, lo, arg = TCG_REG_R3; + MemOp opc = get_memop(lb->oi); if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); - - lo = lb->addrlo_reg; - hi = lb->addrhi_reg; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); - } - - lo = lb->datalo_reg; - hi = lb->datahi_reg; - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, - arg++, lb->type, s_bits, lo); - } - - tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); - tcg_out32(s, MFSPR | RT(arg) | LR); - + tcg_out_st_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_b(s, 0, lb->raddr); return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - - /* We are expecting a_bits to max out at 7, much lower than ANDI. */ - tcg_debug_assert(a_bits < 16); - tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask)); - - label->label_ptr[0] = s->code_ptr; - tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); - - label->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -2294,37 +2087,187 @@ typedef struct { TCGReg index; } HostAddress; +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + +#ifdef CONFIG_SOFTMMU + int mem_index = get_mmuidx(oi); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); + int table_off = fast_off + offsetof(CPUTLBDescFast, table); + unsigned s_bits = opc & MO_SIZE; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off); + + /* Extract the page index, shifted into place for tlb index. */ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_shri32(s, TCG_REG_R0, addrlo, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_shri64(s, TCG_REG_R0, addrlo, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + } + tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); + + /* Load the (low part) TLB comparator into TMP2. */ + if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32 + ? LWZUX : LDUX); + tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); + } else { + tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, + TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN); + } else { + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); + } + } + + /* + * Load the TLB addend for use on the fast path. + * Do this asap to minimize any load use delay. + */ + if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, + offsetof(CPUTLBEntry, addend)); + } + + /* Clear the non-page, non-alignment bits from the address in R0. */ + if (TCG_TARGET_REG_BITS == 32) { + /* + * We don't support unaligned accesses on 32-bits. + * Preserve the bottom bits and thus trigger a comparison + * failure on unaligned accesses. + */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); + } else { + TCGReg t = addrlo; + + /* + * If the access is unaligned, we need to make sure we fail if we + * cross a page boundary. The trick is to add the access size-1 + * to the address before masking the low bits. That will make the + * address overflow to the next page if we cross a page boundary, + * which will then force a mismatch of the TLB compare. + */ + if (a_bits < s_bits) { + unsigned a_mask = (1 << a_bits) - 1; + unsigned s_mask = (1 << s_bits) - 1; + tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask)); + t = TCG_REG_R0; + } + + /* Mask the address for the requested alignment. */ + if (TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); + } else if (a_bits == 0) { + tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS); + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R0, t, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } + } + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + /* Low part comparison into cr7. */ + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, + 0, 7, TCG_TYPE_I32); + + /* Load the high part TLB comparator into TMP2. */ + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, + cmp_off + 4 * !HOST_BIG_ENDIAN); + + /* Load addend, deferred for this case. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, + offsetof(CPUTLBEntry, addend)); + + /* High part comparison into cr6. */ + tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32); + + /* Combine comparisons into cr7. */ + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } else { + /* Full comparison into cr7. */ + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, + 0, 7, TCG_TYPE_TL); + } + + /* Load a pointer into the current opcode w/conditional branch-link. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + + h->base = TCG_REG_TMP1; +#else + if (a_bits) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1)); + + ldst->label_ptr[0] = s->code_ptr; + tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); + } + + h->base = guest_base ? TCG_GUEST_BASE_REG : 0; +#endif + + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + /* Zero-extend the guest address for use in the host address. */ + tcg_out_ext32u(s, TCG_REG_R0, addrlo); + h->index = TCG_REG_R0; + } else { + h->index = addrlo; + } + + return ldst; +} + static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); - h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true); - h.base = TCG_REG_R3; - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); -#else /* !CONFIG_SOFTMMU */ - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); - } - h.base = guest_base ? TCG_GUEST_BASE_REG : 0; - h.index = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - h.index = TCG_REG_TMP1; - } -#endif - - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { if (opc & MO_BSWAP) { tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index)); @@ -2357,10 +2300,12 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#endif + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } } static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, @@ -2368,32 +2313,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - tcg_insn_unit *label_ptr; + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); - h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false); - h.base = TCG_REG_R3; - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); -#else /* !CONFIG_SOFTMMU */ - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); - } - h.base = guest_base ? TCG_GUEST_BASE_REG : 0; - h.index = addrlo; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - h.index = TCG_REG_TMP1; - } -#endif - - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { if (opc & MO_BSWAP) { tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4)); tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index)); @@ -2418,10 +2343,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#endif + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } } static void tcg_out_nop_fill(tcg_insn_unit *p, int count) @@ -3812,23 +3739,23 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 - ? C_O1_I1(r, L) - : C_O1_I2(r, L, L)); + ? C_O1_I1(r, r) + : C_O1_I2(r, r, r)); case INDEX_op_qemu_st_i32: return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32 - ? C_O0_I2(S, S) - : C_O0_I3(S, S, S)); + ? C_O0_I2(r, r) + : C_O0_I3(r, r, r)); case INDEX_op_qemu_ld_i64: - return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) - : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L) - : C_O2_I2(L, L, L, L)); + return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) + : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r) + : C_O2_I2(r, r, r, r)); case INDEX_op_qemu_st_i64: - return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S) - : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S) - : C_O0_I4(S, S, S, S)); + return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) + : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r) + : C_O0_I4(r, r, r, r)); case INDEX_op_add_vec: case INDEX_op_sub_vec: @@ -3978,7 +3905,8 @@ static void tcg_target_init(TCGContext *s) #if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ #endif - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); if (USE_REG_TB) { diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index d4cff673b0..d88888d3ac 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -10,10 +10,8 @@ * tcg-target-con-str.h; the constraint combination is inclusive or. */ C_O0_I1(r) -C_O0_I2(LZ, L) C_O0_I2(rZ, r) C_O0_I2(rZ, rZ) -C_O1_I1(r, L) C_O1_I1(r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h index 8d8afaee53..6f1cfb976c 100644 --- a/tcg/riscv/tcg-target-con-str.h +++ b/tcg/riscv/tcg-target-con-str.h @@ -9,7 +9,6 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* * Define constraint letters for constants: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index a4cf60ca75..d12b824d8c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -125,17 +125,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_N12 0x400 #define TCG_CT_CONST_M12 0x800 -#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) -/* - * For softmmu, we need to avoid conflicts with the first 5 - * argument registers to call the helper. Some of these are - * also used for the tlb lookup. - */ -#ifdef CONFIG_SOFTMMU -#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5) -#else -#define SOFTMMU_RESERVE_REGS 0 -#endif +#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) #define sextreg sextract64 @@ -899,10 +889,6 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = { #endif }; -/* We expect to use a 12-bit negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); - static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) { tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); @@ -910,84 +896,14 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) tcg_debug_assert(ok); } -static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, MemOpIdx oi, - tcg_insn_unit **label_ptr, bool is_load) -{ - MemOp opc = get_memop(oi); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - tcg_target_long compare_mask; - int mem_index = get_mmuidx(oi); - int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); - int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); - int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); - TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; - - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs); - - tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); - - /* Load the tlb comparator and the addend. */ - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, - is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, - offsetof(CPUTLBEntry, addend)); - - /* We don't support unaligned accesses. */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - /* Clear the non-page, non-alignment bits from the address. */ - compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1); - if (compare_mask == sextreg(compare_mask, 0, 12)) { - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr, compare_mask); - } else { - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr); - } - - /* Compare masked address with the TLB entry. */ - label_ptr[0] = s->code_ptr; - tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); - - /* TLB Hit - translate address using addend. */ - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_TMP0, addr); - addr = TCG_REG_TMP0; - } - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr); - return TCG_REG_TMP0; -} - -static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, - TCGType data_type, TCGReg data_reg, - TCGReg addr_reg, void *raddr, - tcg_insn_unit **label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = data_type; - label->datalo_reg = data_reg; - label->addrlo_reg = addr_reg; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr[0]; -} +/* We have three temps, we might as well expose them. */ +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 } +}; static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - TCGReg a0 = tcg_target_call_iarg_regs[0]; - TCGReg a1 = tcg_target_call_iarg_regs[1]; - TCGReg a2 = tcg_target_call_iarg_regs[2]; - TCGReg a3 = tcg_target_call_iarg_regs[3]; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -995,13 +911,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* call load helper */ - tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); - tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); - + tcg_out_ld_helper_args(s, l, &ldst_helper_param); tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false); - tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); + tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param); tcg_out_goto(s, l->raddr); return true; @@ -1009,14 +921,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; - TCGReg a0 = tcg_target_call_iarg_regs[0]; - TCGReg a1 = tcg_target_call_iarg_regs[1]; - TCGReg a2 = tcg_target_call_iarg_regs[2]; - TCGReg a3 = tcg_target_call_iarg_regs[3]; - TCGReg a4 = tcg_target_call_iarg_regs[4]; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -1024,39 +929,13 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* call store helper */ - tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg); - tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, a2, - l->type, s_bits, l->datalo_reg); - tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); - tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); - + tcg_out_st_helper_args(s, l, &ldst_helper_param); tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); tcg_out_goto(s, l->raddr); return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, - unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *l = new_ldst_label(s); - - l->is_ld = is_ld; - l->addrlo_reg = addr_reg; - - /* We are expecting a_bits to max out at 7, so we can always use andi. */ - tcg_debug_assert(a_bits < 12); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask); - - l->label_ptr[0] = s->code_ptr; - tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0); - - l->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { /* resolve label address */ @@ -1083,9 +962,108 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { return tcg_out_fail_alignment(s, l); } - #endif /* CONFIG_SOFTMMU */ +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned a_mask = (1u << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + int mem_index = get_mmuidx(oi); + int fast_ofs = TLB_MASK_TABLE_OFS(mem_index); + int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); + int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); + TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; + tcg_target_long compare_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs); + + tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); + + /* Load the tlb comparator and the addend. */ + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2, + is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2, + offsetof(CPUTLBEntry, addend)); + + /* We don't support unaligned accesses. */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + /* Clear the non-page, non-alignment bits from the address. */ + compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask; + if (compare_mask == sextreg(compare_mask, 0, 12)) { + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask); + } else { + tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg); + } + + /* Compare masked address with the TLB entry. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); + + /* TLB Hit - translate address using addend. */ + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg); + addr_reg = TCG_REG_TMP0; + } + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg); + *pbase = TCG_REG_TMP0; +#else + if (a_mask) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* We are expecting a_bits max 7, so we can always use andi. */ + tcg_debug_assert(a_bits < 12); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask); + + ldst->label_ptr[0] = s->code_ptr; + tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0); + } + + TCGReg base = addr_reg; + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_TMP0, base); + base = TCG_REG_TMP0; + } + if (guest_base != 0) { + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); + base = TCG_REG_TMP0; + } + *pbase = base; +#endif + + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val, TCGReg base, MemOp opc, TCGType type) { @@ -1125,32 +1103,17 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val, static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; TCGReg base; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[1]; + ldst = prepare_host_addr(s, &base, addr_reg, oi, true); + tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type); - base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1); - tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type); - add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - base = addr_reg; - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_TMP0, base); - base = TCG_REG_TMP0; - } - if (guest_base != 0) { - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); - base = TCG_REG_TMP0; - } - tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type); -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, @@ -1180,32 +1143,17 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; TCGReg base; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[1]; + ldst = prepare_host_addr(s, &base, addr_reg, oi, false); + tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi)); - base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0); - tcg_out_qemu_st_direct(s, data_reg, base, opc); - add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, false, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - base = addr_reg; - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_TMP0, base); - base = TCG_REG_TMP0; - } - if (guest_base != 0) { - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); - base = TCG_REG_TMP0; - } - tcg_out_qemu_st_direct(s, data_reg, base, opc); -#endif } static const tcg_insn_unit *tb_ret_addr; @@ -1642,10 +1590,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, L); + return C_O1_I1(r, r); case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i64: - return C_O0_I2(LZ, L); + return C_O0_I2(rZ, r); default: g_assert_not_reached(); diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 15f1c55103..ecc079bb6d 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -10,12 +10,10 @@ * tcg-target-con-str.h; the constraint combination is inclusive or. */ C_O0_I1(r) -C_O0_I2(L, L) C_O0_I2(r, r) C_O0_I2(r, ri) C_O0_I2(r, rA) C_O0_I2(v, r) -C_O1_I1(r, L) C_O1_I1(r, r) C_O1_I1(v, r) C_O1_I1(v, v) diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h index 6fa64a1ed6..25675b449e 100644 --- a/tcg/s390x/tcg-target-con-str.h +++ b/tcg/s390x/tcg-target-con-str.h @@ -9,7 +9,6 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) REGS('v', ALL_VECTOR_REGS) REGS('o', 0xaaaa) /* odd numbered general regs */ diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index da7ee5b085..aacbaf21d5 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -44,18 +44,6 @@ #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) -/* - * For softmmu, we need to avoid conflicts with the first 3 - * argument registers to perform the tlb lookup, and to call - * the helper function. - */ -#ifdef CONFIG_SOFTMMU -#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3) -#else -#define SOFTMMU_RESERVE_REGS 0 -#endif - - /* Several places within the instruction set 0 means "no register" rather than TCG_REG_R0. */ #define TCG_REG_NONE 0 @@ -149,6 +137,7 @@ typedef enum S390Opcode { RRE_ALGR = 0xb90a, RRE_ALCR = 0xb998, RRE_ALCGR = 0xb988, + RRE_ALGFR = 0xb91a, RRE_CGR = 0xb920, RRE_CLGR = 0xb921, RRE_DLGR = 0xb987, @@ -1718,98 +1707,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data, } #if defined(CONFIG_SOFTMMU) -/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); - -/* Load and compare a TLB entry, leaving the flags set. Loads the TLB - addend into R2. Returns a register with the santitized guest address. */ -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, - int mem_index, bool is_ld) -{ - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - unsigned s_mask = (1 << s_bits) - 1; - unsigned a_mask = (1 << a_bits) - 1; - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); - int ofs, a_off; - uint64_t tlb_mask; - - tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off); - tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off); - - /* For aligned accesses, we check the first byte and include the alignment - bits within the address. For unaligned access, we check that we don't - cross pages using the address of the last byte of the access. */ - a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask); - tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; - if (a_off == 0) { - tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); - } else { - tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); - tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); - } - - if (is_ld) { - ofs = offsetof(CPUTLBEntry, addr_read); - } else { - ofs = offsetof(CPUTLBEntry, addr_write); - } - if (TARGET_LONG_BITS == 32) { - tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs); - } else { - tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs); - } - - tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE, - offsetof(CPUTLBEntry, addend)); - - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_R3, addr_reg); - return TCG_REG_R3; - } - return addr_reg; -} - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, - TCGType type, TCGReg data, TCGReg addr, - tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = data; - label->addrlo_reg = addr; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr; -} +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 1, .tmp = { TCG_TMP0 } +}; static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg addr_reg = lb->addrlo_reg; - TCGReg data_reg = lb->datalo_reg; - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(lb->oi); if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); - } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); - tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); return true; @@ -1817,51 +1730,20 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg addr_reg = lb->addrlo_reg; - TCGReg data_reg = lb->datalo_reg; - MemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - MemOp size = opc & MO_SIZE; + MemOp opc = get_memop(lb->oi); if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) { return false; } - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); - } - tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, - TCG_REG_R4, lb->type, size, data_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); + tcg_out_st_helper_args(s, lb, &ldst_helper_param); tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); return true; } #else -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, - TCGReg addrlo, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *l = new_ldst_label(s); - - l->is_ld = is_ld; - l->addrlo_reg = addrlo; - - /* We are expecting a_bits to max out at 7, much lower than TMLL. */ - tcg_debug_assert(a_bits < 16); - tcg_out_insn(s, RI, TMLL, addrlo, a_mask); - - tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */ - l->label_ptr[0] = s->code_ptr; - s->code_ptr += 1; - - l->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL, @@ -1888,91 +1770,147 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { return tcg_out_fail_alignment(s, l); } +#endif /* CONFIG_SOFTMMU */ -static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg) +/* + * For softmmu, perform the TLB load and compare. + * For useronly, perform any required alignment tests. + * In both cases, return a TCGLabelQemuLdst structure if the slow path + * is required and fill in @h with the host address for the fast path. + */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) { - TCGReg index; - int disp; + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_bits = get_alignment_bits(opc); + unsigned a_mask = (1u << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + unsigned s_bits = opc & MO_SIZE; + unsigned s_mask = (1 << s_bits) - 1; + int mem_index = get_mmuidx(oi); + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); + int table_off = fast_off + offsetof(CPUTLBDescFast, table); + int ofs, a_off; + uint64_t tlb_mask; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); + tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off); + tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off); + + /* + * For aligned accesses, we check the first byte and include the alignment + * bits within the address. For unaligned access, we check that we don't + * cross pages using the address of the last byte of the access. + */ + a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask); + tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; + if (a_off == 0) { + tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask); + } else { + tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R0, tlb_mask); + } + + if (is_ld) { + ofs = offsetof(CPUTLBEntry, addr_read); + } else { + ofs = offsetof(CPUTLBEntry, addr_write); + } + if (TARGET_LONG_BITS == 32) { + tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs); + } else { + tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs); + } + + tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); + ldst->label_ptr[0] = s->code_ptr++; + + h->index = TCG_TMP0; + tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE, + offsetof(CPUTLBEntry, addend)); + if (TARGET_LONG_BITS == 32) { + tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg); + h->base = TCG_REG_NONE; + } else { + h->base = addr_reg; + } + h->disp = 0; +#else + if (a_mask) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addr_reg; + + /* We are expecting a_bits to max out at 7, much lower than TMLL. */ + tcg_debug_assert(a_bits < 16); + tcg_out_insn(s, RI, TMLL, addr_reg, a_mask); + + tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */ + ldst->label_ptr[0] = s->code_ptr++; + } + + h->base = addr_reg; if (TARGET_LONG_BITS == 32) { tcg_out_ext32u(s, TCG_TMP0, addr_reg); - addr_reg = TCG_TMP0; + h->base = TCG_TMP0; } if (guest_base < 0x80000) { - index = TCG_REG_NONE; - disp = guest_base; + h->index = TCG_REG_NONE; + h->disp = guest_base; } else { - index = TCG_GUEST_BASE_REG; - disp = 0; + h->index = TCG_GUEST_BASE_REG; + h->disp = 0; } - return (HostAddress){ .base = addr_reg, .index = index, .disp = disp }; +#endif + + return ldst; } -#endif /* CONFIG_SOFTMMU */ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; + ldst = prepare_host_addr(s, &h, addr_reg, oi, true); + tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h); - h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); - h.index = TCG_REG_R2; - h.disp = 0; - - tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); - label_ptr = s->code_ptr; - s->code_ptr += 1; - - tcg_out_qemu_ld_direct(s, opc, data_reg, h); - - add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - - if (a_bits) { - tcg_out_test_alignment(s, true, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - h = tcg_prepare_user_ldst(s, addr_reg); - tcg_out_qemu_ld_direct(s, opc, data_reg, h); -#endif } static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { - MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; + ldst = prepare_host_addr(s, &h, addr_reg, oi, false); + tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); - h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); - h.index = TCG_REG_R2; - h.disp = 0; - - tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); - label_ptr = s->code_ptr; - s->code_ptr += 1; - - tcg_out_qemu_st_direct(s, opc, data_reg, h); - - add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - - if (a_bits) { - tcg_out_test_alignment(s, false, addr_reg, a_bits); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = data_reg; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } - h = tcg_prepare_user_ldst(s, addr_reg); - tcg_out_qemu_st_direct(s, opc, data_reg, h); -#endif } static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) @@ -3205,10 +3143,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, L); + return C_O1_I1(r, r); case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i32: - return C_O0_I2(L, L); + return C_O0_I2(r, r); case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: diff --git a/tcg/tcg.c b/tcg/tcg.c index 057423c121..1231c8ab4c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -181,6 +181,22 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct); static int tcg_out_ldst_finalize(TCGContext *s); #endif +typedef struct TCGLdstHelperParam { + TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); + unsigned ntmp; + int tmp[3]; +} TCGLdstHelperParam; + +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, + const TCGLdstHelperParam *p) + __attribute__((unused)); +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, + bool load_sign, const TCGLdstHelperParam *p) + __attribute__((unused)); +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, + const TCGLdstHelperParam *p) + __attribute__((unused)); + TCGContext tcg_init_ctx; __thread TCGContext *tcg_ctx; @@ -366,8 +382,17 @@ void tcg_raise_tb_overflow(TCGContext *s) siglongjmp(s->jmp_trans, -2); } +/* + * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. + * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. + * + * However, tcg_out_helper_load_slots reuses this field to hold an + * argument slot number (which may designate a argument register or an + * argument stack slot), converting to TCGReg once all arguments that + * are destined for the stack are processed. + */ typedef struct TCGMovExtend { - TCGReg dst; + unsigned dst; TCGReg src; TCGType dst_type; TCGType src_type; @@ -459,9 +484,8 @@ static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) * between the sources and destinations. */ -static void __attribute__((unused)) -tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, - const TCGMovExtend *i2, int scratch) +static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, + const TCGMovExtend *i2, int scratch) { TCGReg src1 = i1->src; TCGReg src2 = i2->src; @@ -715,6 +739,58 @@ static TCGHelperInfo all_helpers[] = { }; static GHashTable *helper_table; +/* + * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, + * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. + * We only use these for layout in tcg_out_ld_helper_ret and + * tcg_out_st_helper_args, and share them between several of + * the helpers, with the end result that it's easier to build manually. + */ + +#if TCG_TARGET_REG_BITS == 32 +# define dh_typecode_ttl dh_typecode_i32 +#else +# define dh_typecode_ttl dh_typecode_i64 +#endif + +static TCGHelperInfo info_helper_ld32_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* unsigned oi */ + | dh_typemask(ptr, 4) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_ld64_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(i64, 0) /* return uint64_t */ + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* unsigned oi */ + | dh_typemask(ptr, 4) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_st32_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(void, 0) + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i32, 3) /* uint32_t data */ + | dh_typemask(i32, 4) /* unsigned oi */ + | dh_typemask(ptr, 5) /* uintptr_t ra */ +}; + +static TCGHelperInfo info_helper_st64_mmu = { + .flags = TCG_CALL_NO_WG, + .typemask = dh_typemask(void, 0) + | dh_typemask(env, 1) + | dh_typemask(tl, 2) /* target_ulong addr */ + | dh_typemask(i64, 3) /* uint64_t data */ + | dh_typemask(i32, 4) /* unsigned oi */ + | dh_typemask(ptr, 5) /* uintptr_t ra */ +}; + #ifdef CONFIG_TCG_INTERPRETER static ffi_type *typecode_to_ffi(int argmask) { @@ -1126,6 +1202,11 @@ static void tcg_context_init(unsigned max_cpus) (gpointer)&all_helpers[i]); } + init_call_layout(&info_helper_ld32_mmu); + init_call_layout(&info_helper_ld64_mmu); + init_call_layout(&info_helper_st32_mmu); + init_call_layout(&info_helper_st64_mmu); + #ifdef CONFIG_TCG_INTERPRETER init_ffi_layouts(); #endif @@ -2104,13 +2185,8 @@ static const char * const ldst_name[] = }; static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { -#ifdef TARGET_ALIGNED_ONLY [MO_UNALN >> MO_ASHIFT] = "un+", - [MO_ALIGN >> MO_ASHIFT] = "", -#else - [MO_UNALN >> MO_ASHIFT] = "", [MO_ALIGN >> MO_ASHIFT] = "al+", -#endif [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", @@ -5011,6 +5087,392 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) } } +/* + * Similarly for qemu_ld/st slow path helpers. + * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, + * using only the provided backend tcg_out_* functions. + */ + +static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) +{ + int ofs = arg_slot_stk_ofs(slot); + + /* + * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not + * require extension to uint64_t, adjust the address for uint32_t. + */ + if (HOST_BIG_ENDIAN && + TCG_TARGET_REG_BITS == 64 && + type == TCG_TYPE_I32) { + ofs += 4; + } + return ofs; +} + +static void tcg_out_helper_load_regs(TCGContext *s, + unsigned nmov, TCGMovExtend *mov, + unsigned ntmp, const int *tmp) +{ + switch (nmov) { + default: + /* The backend must have provided enough temps for the worst case. */ + tcg_debug_assert(ntmp + 1 >= nmov); + + for (unsigned i = nmov - 1; i >= 2; --i) { + TCGReg dst = mov[i].dst; + + for (unsigned j = 0; j < i; ++j) { + if (dst == mov[j].src) { + /* + * Conflict. + * Copy the source to a temporary, recurse for the + * remaining moves, perform the extension from our + * scratch on the way out. + */ + TCGReg scratch = tmp[--ntmp]; + tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src); + mov[i].src = scratch; + + tcg_out_helper_load_regs(s, i, mov, ntmp, tmp); + tcg_out_movext1(s, &mov[i]); + return; + } + } + + /* No conflicts: perform this move and continue. */ + tcg_out_movext1(s, &mov[i]); + } + /* fall through for the final two moves */ + + case 2: + tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1); + return; + case 1: + tcg_out_movext1(s, mov); + return; + case 0: + g_assert_not_reached(); + } +} + +static void tcg_out_helper_load_slots(TCGContext *s, + unsigned nmov, TCGMovExtend *mov, + const TCGLdstHelperParam *parm) +{ + unsigned i; + + /* + * Start from the end, storing to the stack first. + * This frees those registers, so we need not consider overlap. + */ + for (i = nmov; i-- > 0; ) { + unsigned slot = mov[i].dst; + + if (arg_slot_reg_p(slot)) { + goto found_reg; + } + + TCGReg src = mov[i].src; + TCGType dst_type = mov[i].dst_type; + MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; + + /* The argument is going onto the stack; extend into scratch. */ + if ((mov[i].src_ext & MO_SIZE) != dst_mo) { + tcg_debug_assert(parm->ntmp != 0); + mov[i].dst = src = parm->tmp[0]; + tcg_out_movext1(s, &mov[i]); + } + + tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, + tcg_out_helper_stk_ofs(dst_type, slot)); + } + return; + + found_reg: + /* + * The remaining arguments are in registers. + * Convert slot numbers to argument registers. + */ + nmov = i + 1; + for (i = 0; i < nmov; ++i) { + mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; + } + tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp); +} + +static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, + TCGType type, tcg_target_long imm, + const TCGLdstHelperParam *parm) +{ + if (arg_slot_reg_p(slot)) { + tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); + } else { + int ofs = tcg_out_helper_stk_ofs(type, slot); + if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { + tcg_debug_assert(parm->ntmp != 0); + tcg_out_movi(s, type, parm->tmp[0], imm); + tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); + } + } +} + +static void tcg_out_helper_load_common_args(TCGContext *s, + const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm, + const TCGHelperInfo *info, + unsigned next_arg) +{ + TCGMovExtend ptr_mov = { + .dst_type = TCG_TYPE_PTR, + .src_type = TCG_TYPE_PTR, + .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 + }; + const TCGCallArgumentLoc *loc = &info->in[0]; + TCGType type; + unsigned slot; + tcg_target_ulong imm; + + /* + * Handle env, which is always first. + */ + ptr_mov.dst = loc->arg_slot; + ptr_mov.src = TCG_AREG0; + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); + + /* + * Handle oi. + */ + imm = ldst->oi; + loc = &info->in[next_arg]; + type = TCG_TYPE_I32; + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + break; + case TCG_CALL_ARG_EXTEND_U: + case TCG_CALL_ARG_EXTEND_S: + /* No extension required for MemOpIdx. */ + tcg_debug_assert(imm <= INT32_MAX); + type = TCG_TYPE_REG; + break; + default: + g_assert_not_reached(); + } + tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); + next_arg++; + + /* + * Handle ra. + */ + loc = &info->in[next_arg]; + slot = loc->arg_slot; + if (parm->ra_gen) { + int arg_reg = -1; + TCGReg ra_reg; + + if (arg_slot_reg_p(slot)) { + arg_reg = tcg_target_call_iarg_regs[slot]; + } + ra_reg = parm->ra_gen(s, ldst, arg_reg); + + ptr_mov.dst = slot; + ptr_mov.src = ra_reg; + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); + } else { + imm = (uintptr_t)ldst->raddr; + tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); + } +} + +static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, + const TCGCallArgumentLoc *loc, + TCGType dst_type, TCGType src_type, + TCGReg lo, TCGReg hi) +{ + if (dst_type <= TCG_TYPE_REG) { + MemOp src_ext; + + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; + break; + case TCG_CALL_ARG_EXTEND_U: + dst_type = TCG_TYPE_REG; + src_ext = MO_UL; + break; + case TCG_CALL_ARG_EXTEND_S: + dst_type = TCG_TYPE_REG; + src_ext = MO_SL; + break; + default: + g_assert_not_reached(); + } + + mov[0].dst = loc->arg_slot; + mov[0].dst_type = dst_type; + mov[0].src = lo; + mov[0].src_type = src_type; + mov[0].src_ext = src_ext; + return 1; + } + + assert(TCG_TARGET_REG_BITS == 32); + + mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; + mov[0].src = lo; + mov[0].dst_type = TCG_TYPE_I32; + mov[0].src_type = TCG_TYPE_I32; + mov[0].src_ext = MO_32; + + mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; + mov[1].src = hi; + mov[1].dst_type = TCG_TYPE_I32; + mov[1].src_type = TCG_TYPE_I32; + mov[1].src_ext = MO_32; + + return 2; +} + +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm) +{ + const TCGHelperInfo *info; + const TCGCallArgumentLoc *loc; + TCGMovExtend mov[2]; + unsigned next_arg, nmov; + MemOp mop = get_memop(ldst->oi); + + switch (mop & MO_SIZE) { + case MO_8: + case MO_16: + case MO_32: + info = &info_helper_ld32_mmu; + break; + case MO_64: + info = &info_helper_ld64_mmu; + break; + default: + g_assert_not_reached(); + } + + /* Defer env argument. */ + next_arg = 1; + + loc = &info->in[next_arg]; + nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL, + ldst->addrlo_reg, ldst->addrhi_reg); + next_arg += nmov; + + tcg_out_helper_load_slots(s, nmov, mov, parm); + + /* No special attention for 32 and 64-bit return values. */ + tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL); + + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); +} + +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, + bool load_sign, + const TCGLdstHelperParam *parm) +{ + TCGMovExtend mov[2]; + + if (ldst->type <= TCG_TYPE_REG) { + MemOp mop = get_memop(ldst->oi); + + mov[0].dst = ldst->datalo_reg; + mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); + mov[0].dst_type = ldst->type; + mov[0].src_type = TCG_TYPE_REG; + + /* + * If load_sign, then we allowed the helper to perform the + * appropriate sign extension to tcg_target_ulong, and all + * we need now is a plain move. + * + * If they do not, then we expect the relevant extension + * instruction to be no more expensive than a move, and + * we thus save the icache etc by only using one of two + * helper functions. + */ + if (load_sign || !(mop & MO_SIGN)) { + if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { + mov[0].src_ext = MO_32; + } else { + mov[0].src_ext = MO_64; + } + } else { + mov[0].src_ext = mop & MO_SSIZE; + } + tcg_out_movext1(s, mov); + } else { + assert(TCG_TARGET_REG_BITS == 32); + + mov[0].dst = ldst->datalo_reg; + mov[0].src = + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); + mov[0].dst_type = TCG_TYPE_I32; + mov[0].src_type = TCG_TYPE_I32; + mov[0].src_ext = MO_32; + + mov[1].dst = ldst->datahi_reg; + mov[1].src = + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); + mov[1].dst_type = TCG_TYPE_REG; + mov[1].src_type = TCG_TYPE_REG; + mov[1].src_ext = MO_32; + + tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); + } +} + +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, + const TCGLdstHelperParam *parm) +{ + const TCGHelperInfo *info; + const TCGCallArgumentLoc *loc; + TCGMovExtend mov[4]; + TCGType data_type; + unsigned next_arg, nmov, n; + MemOp mop = get_memop(ldst->oi); + + switch (mop & MO_SIZE) { + case MO_8: + case MO_16: + case MO_32: + info = &info_helper_st32_mmu; + data_type = TCG_TYPE_I32; + break; + case MO_64: + info = &info_helper_st64_mmu; + data_type = TCG_TYPE_I64; + break; + default: + g_assert_not_reached(); + } + + /* Defer env argument. */ + next_arg = 1; + nmov = 0; + + /* Handle addr argument. */ + loc = &info->in[next_arg]; + n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL, + ldst->addrlo_reg, ldst->addrhi_reg); + next_arg += n; + nmov += n; + + /* Handle data argument. */ + loc = &info->in[next_arg]; + n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, + ldst->datalo_reg, ldst->datahi_reg); + next_arg += n; + nmov += n; + tcg_debug_assert(nmov <= ARRAY_SIZE(mov)); + + tcg_out_helper_load_slots(s, nmov, mov, parm); + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); +} + #ifdef CONFIG_PROFILER /* avoid copy/paste errors */ diff --git a/trace/control-target.c b/trace/control-target.c index 232c97a4a1..c0c1e2310a 100644 --- a/trace/control-target.c +++ b/trace/control-target.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" +#include "qemu/lockable.h" #include "cpu.h" #include "trace/trace-root.h" #include "trace/control.h" @@ -116,11 +117,9 @@ static bool adding_first_cpu1(void) static bool adding_first_cpu(void) { - bool res; - cpu_list_lock(); - res = adding_first_cpu1(); - cpu_list_unlock(); - return res; + QEMU_LOCK_GUARD(&qemu_cpu_list_lock); + + return adding_first_cpu1(); } void trace_init_vcpu(CPUState *vcpu)