diff --git a/CMakeLists.txt b/CMakeLists.txt index f50635f79..baf97f7f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -645,7 +645,6 @@ target_sources(${PROJECT_NAME} PRIVATE if(NOT WIN32) target_sources(${PROJECT_NAME} PRIVATE core/linux/context.cpp - core/linux/context.h core/linux/posix_vmem.cpp) endif() @@ -699,6 +698,8 @@ target_sources(${PROJECT_NAME} PRIVATE core/oslib/audiobackend_sdl2.cpp core/oslib/audiostream.cpp core/oslib/audiostream.h + core/oslib/directory.h + core/oslib/host_context.h core/oslib/oslib.h) target_sources(${PROJECT_NAME} PRIVATE diff --git a/core/hw/mem/vmem32.cpp b/core/hw/mem/vmem32.cpp index 69f91ed03..c43396157 100644 --- a/core/hw/mem/vmem32.cpp +++ b/core/hw/mem/vmem32.cpp @@ -147,7 +147,7 @@ void vmem32_protect_vram(u32 addr, u32 size) { if (!vmem32_inited) return; - for (int page = (addr & VRAM_MASK) / VRAM_PROT_SEGMENT; page <= ((addr & VRAM_MASK) + size - 1) / VRAM_PROT_SEGMENT; page++) + for (u32 page = (addr & VRAM_MASK) / VRAM_PROT_SEGMENT; page <= ((addr & VRAM_MASK) + size - 1) / VRAM_PROT_SEGMENT; page++) { vram_blocks[page].push_back({ addr, addr + size - 1 }); } @@ -156,7 +156,7 @@ void vmem32_unprotect_vram(u32 addr, u32 size) { if (!vmem32_inited) return; - for (int page = (addr & VRAM_MASK) / VRAM_PROT_SEGMENT; page <= ((addr & VRAM_MASK) + size - 1) / VRAM_PROT_SEGMENT; page++) + for (u32 page = (addr & VRAM_MASK) / VRAM_PROT_SEGMENT; page <= ((addr & VRAM_MASK) + size - 1) / VRAM_PROT_SEGMENT; page++) { std::vector& block_list = vram_blocks[page]; for (auto it = block_list.begin(); it != block_list.end(); ) @@ -241,7 +241,7 @@ static u32 vmem32_map_mmu(u32 address, bool write) u32 vpn = (entry->Address.VPN << 10) & ~(page_size - 1); u32 ppn = (entry->Data.PPN << 10) & ~(page_size - 1); u32 offset = vmem32_paddr_to_offset(ppn); - if (offset == -1) + if (offset == (u32)-1) return VMEM32_ERROR_NOT_MAPPED; bool allow_write = (entry->Data.PR & 1) != 0; @@ -334,27 +334,30 @@ static u32 vmem32_map_address(u32 address, bool write) } #if !defined(NO_MMU) && defined(HOST_64BIT_CPU) -bool vmem32_handle_signal(void *fault_addr, bool write, u32 exception_pc) +// returns: +// 0 if the fault address isn't handled by the mmu +// 1 if the fault was handled and the access should be reattempted +// -1 if an sh4 exception has been thrown +int vmem32_handle_signal(void *fault_addr, bool write, u32 exception_pc) { if (!vmem32_inited || (u8*)fault_addr < virt_ram_base || (u8*)fault_addr >= virt_ram_base + VMEM32_SIZE) - return false; + return 0; //vmem32_page_faults++; u32 guest_addr = (u8*)fault_addr - virt_ram_base; u32 rv = vmem32_map_address(guest_addr, write); DEBUG_LOG(VMEM, "vmem32_handle_signal handled signal %s @ %p -> %08x rv=%d", write ? "W" : "R", fault_addr, guest_addr, rv); if (rv == MMU_ERROR_NONE) - return true; + return 1; if (rv == VMEM32_ERROR_NOT_MAPPED) - return false; + return 0; #if HOST_CPU == CPU_ARM64 p_sh4rcb->cntx.pc = exception_pc; #else p_sh4rcb->cntx.pc = p_sh4rcb->cntx.exception_pc; #endif DoMMUException(guest_addr, rv, write ? MMU_TT_DWRITE : MMU_TT_DREAD); - ngen_HandleException(); - // not reached - return true; + + return -1; } #endif diff --git a/core/hw/mem/vmem32.h b/core/hw/mem/vmem32.h index b233ee933..6bcc796a4 100644 --- a/core/hw/mem/vmem32.h +++ b/core/hw/mem/vmem32.h @@ -24,7 +24,7 @@ bool vmem32_init(); void vmem32_term(); -bool vmem32_handle_signal(void *fault_addr, bool write, u32 exception_pc); +int vmem32_handle_signal(void *fault_addr, bool write, u32 exception_pc); void vmem32_flush_mmu(); void vmem32_protect_vram(u32 addr, u32 size); void vmem32_unprotect_vram(u32 addr, u32 size); diff --git a/core/hw/sh4/dyna/blockmanager.cpp b/core/hw/sh4/dyna/blockmanager.cpp index 26df047ed..6d751b2a6 100644 --- a/core/hw/sh4/dyna/blockmanager.cpp +++ b/core/hw/sh4/dyna/blockmanager.cpp @@ -256,8 +256,8 @@ void bm_Reset() } if (_nvmem_4gb_space()) { - mem_region_unlock(virt_ram_base + 0x8C000000, 0x90000000 - 0x8C000000); - mem_region_unlock(virt_ram_base + 0xAC000000, 0xB0000000 - 0xAC000000); + mem_region_unlock(virt_ram_base + 0x8C000000u, 0x90000000u - 0x8C000000u); + mem_region_unlock(virt_ram_base + 0xAC000000u, 0xB0000000u - 0xAC000000u); } } diff --git a/core/hw/sh4/dyna/blockmanager.h b/core/hw/sh4/dyna/blockmanager.h index 8dda7968a..c6d2823c3 100644 --- a/core/hw/sh4/dyna/blockmanager.h +++ b/core/hw/sh4/dyna/blockmanager.h @@ -35,7 +35,7 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core fpscr_t fpu_cfg; u32 guest_cycles; u32 guest_opcodes; - u32 host_opcodes; + u32 host_opcodes; // set by host code generator, optional bool has_fpu_op; u32 blockcheck_failures; bool temp_block; @@ -49,7 +49,6 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core u32 relink_offset; u32 relink_data; - u32 csc_RetCache; //only for stats for now BlockEndType BlockType; bool has_jcond; @@ -73,12 +72,8 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core void RemRef(RuntimeBlockInfoPtr other); void Discard(); - void UpdateRefs(); void SetProtectedFlags(); - u32 memops; - u32 linkedmemops; - std::map memory_accesses; // key is host pc when access is made, value is opcode id bool read_only; }; diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index 5286a8a68..31f0cdcbd 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -149,8 +149,9 @@ bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg) pBranchBlock=pNextBlock=0; code=0; has_jcond=false; - BranchBlock=NextBlock=csc_RetCache=0xFFFFFFFF; - BlockType=BET_SCL_Intr; + BranchBlock = 0xFFFFFFFF; + NextBlock = 0xFFFFFFFF; + BlockType = BET_SCL_Intr; has_fpu_op = false; temp_block = false; diff --git a/core/hw/sh4/dyna/ngen.h b/core/hw/sh4/dyna/ngen.h index a45994f4c..212eee0f6 100644 --- a/core/hw/sh4/dyna/ngen.h +++ b/core/hw/sh4/dyna/ngen.h @@ -43,6 +43,7 @@ #pragma once #include "decoder.h" #include "blockmanager.h" +#include "oslib/host_context.h" #define CODE_SIZE (10*1024*1024) #ifdef NO_MMU @@ -111,7 +112,8 @@ extern void (*ngen_FailedToFindBlock)(); void ngen_mainloop(void* cntx); void ngen_GetFeatures(ngen_features* dst); -void ngen_HandleException(); +void ngen_HandleException(host_context_t &context); +bool ngen_Rewrite(host_context_t &context, void *faultAddress); //Canonical callback interface enum CanonicalParamType diff --git a/core/hw/sh4/modules/fastmmu.cpp b/core/hw/sh4/modules/fastmmu.cpp index afebe609e..c12c989ba 100644 --- a/core/hw/sh4/modules/fastmmu.cpp +++ b/core/hw/sh4/modules/fastmmu.cpp @@ -82,7 +82,6 @@ bool find_entry_by_page_size(u32 address, const TLB_Entry **ret_entry) u32 vpn = (address >> (10 + shift)) << shift; u16 bucket = bucket_index(vpn << 10, size); TLB_LinkedEntry *pEntry = entry_buckets[bucket]; - u32 length = 0; while (pEntry != NULL) { if (pEntry->entry.Address.VPN == vpn && (size >> 1) == pEntry->entry.Data.SZ1 && (size & 1) == pEntry->entry.Data.SZ0) diff --git a/core/linux/common.cpp b/core/linux/common.cpp index 7cd1d7060..bf4c3768b 100644 --- a/core/linux/common.cpp +++ b/core/linux/common.cpp @@ -16,20 +16,21 @@ #include "hw/sh4/dyna/blockmanager.h" #include "hw/mem/vmem32.h" -#include "linux/context.h" +#include "oslib/host_context.h" #include "hw/sh4/dyna/ngen.h" #if !defined(TARGET_NO_EXCEPTIONS) -bool ngen_Rewrite(unat& addr,unat retadr,unat acc); -u32* ngen_readm_fail_v2(u32* ptr,u32* regs,u32 saddr); bool VramLockedWrite(u8* address); bool BM_LockedWrite(u8* address); +void context_from_segfault(host_context_t* hctx, void* segfault_ctx); +void context_to_segfault(host_context_t* hctx, void* segfault_ctx); + #if defined(__APPLE__) void sigill_handler(int sn, siginfo_t * si, void *segfault_ctx) { - rei_host_context_t ctx; + host_context_t ctx; context_from_segfault(&ctx, segfault_ctx); @@ -45,13 +46,12 @@ void sigill_handler(int sn, siginfo_t * si, void *segfault_ctx) { void fault_handler (int sn, siginfo_t * si, void *segfault_ctx) { - rei_host_context_t ctx; - context_from_segfault(&ctx, segfault_ctx); - - bool dyna_cde = ((unat)CC_RX2RW(ctx.pc) > (unat)CodeCache) && ((unat)CC_RX2RW(ctx.pc) < (unat)(CodeCache + CODE_SIZE + TEMP_CODE_SIZE)); - #if !defined(NO_MMU) && defined(HOST_64BIT_CPU) + // WinCE virtual memory #if HOST_CPU == CPU_ARM64 +#define HOST_CTX_READY + host_context_t ctx; + context_from_segfault(&ctx, segfault_ctx); u32 op = *(u32*)ctx.pc; bool write = (op & 0x00400000) == 0; u32 exception_pc = ctx.x2; @@ -59,52 +59,49 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx) bool write = false; // TODO? u32 exception_pc = 0; #endif - if (vmem32_handle_signal(si->si_addr, write, exception_pc)) + int rv = vmem32_handle_signal(si->si_addr, write, exception_pc); + if (rv == 1) return; + if (rv == -1) + { +#ifndef HOST_CTX_READY + host_context_t ctx; + context_from_segfault(&ctx, segfault_ctx); #endif + ngen_HandleException(ctx); + context_to_segfault(&ctx, segfault_ctx); + return; + } +#endif + // code protection in RAM if (bm_RamWriteAccess(si->si_addr)) return; - if (VramLockedWrite((u8*)si->si_addr) || BM_LockedWrite((u8*)si->si_addr)) + // texture protection in VRAM + if (VramLockedWrite((u8*)si->si_addr)) + return; + // FPCB jump table protection + if (BM_LockedWrite((u8*)si->si_addr)) return; - #if FEAT_SHREC == DYNAREC_JIT - #if HOST_CPU==CPU_ARM - else if (dyna_cde) - { - ctx.pc = (u32)ngen_readm_fail_v2((u32*)ctx.pc, ctx.r, (unat)si->si_addr); - context_to_segfault(&ctx, segfault_ctx); - } - #elif HOST_CPU==CPU_X86 - else if (ngen_Rewrite((unat&)ctx.pc, *(unat*)ctx.esp, ctx.eax)) - { - //remove the call from call stack - ctx.esp += 4; - //restore the addr from eax to ecx so it's valid again - ctx.ecx = ctx.eax; +#if FEAT_SHREC == DYNAREC_JIT + // fast mem access rewriting +#ifndef HOST_CTX_READY + host_context_t ctx; + context_from_segfault(&ctx, segfault_ctx); +#endif + bool dyna_cde = ((unat)CC_RX2RW(ctx.pc) >= (unat)CodeCache) && ((unat)CC_RX2RW(ctx.pc) < (unat)(CodeCache + CODE_SIZE + TEMP_CODE_SIZE)); - context_to_segfault(&ctx, segfault_ctx); - } - #elif HOST_CPU == CPU_X64 - else if (dyna_cde && ngen_Rewrite((unat&)ctx.pc, 0, 0)) - { - context_to_segfault(&ctx, segfault_ctx); - } - #elif HOST_CPU == CPU_ARM64 - else if (dyna_cde && ngen_Rewrite(ctx.pc, 0, 0)) - { - context_to_segfault(&ctx, segfault_ctx); - } - #else - #error JIT: Not supported arch - #endif - #endif - else + if (dyna_cde && ngen_Rewrite(ctx, si->si_addr)) { - ERROR_LOG(COMMON, "SIGSEGV @ %zx -> %p was not in vram, dynacode:%d", ctx.pc, si->si_addr, dyna_cde); - die("segfault"); - signal(SIGSEGV, SIG_DFL); + context_to_segfault(&ctx, segfault_ctx); + return; } +#endif + ERROR_LOG(COMMON, "SIGSEGV @ %p -> %p was not in vram, dynacode:%d", (void *)ctx.pc, si->si_addr, dyna_cde); + die("segfault"); + signal(SIGSEGV, SIG_DFL); } +#undef HOST_CTX_READY void install_fault_handler(void) { diff --git a/core/linux/context.cpp b/core/linux/context.cpp index b61c3b495..385d6769a 100644 --- a/core/linux/context.cpp +++ b/core/linux/context.cpp @@ -1,4 +1,4 @@ -#include "context.h" +#include "oslib/host_context.h" #if defined(__ANDROID__) #include @@ -17,76 +17,86 @@ ////// #define MCTX(p) (((ucontext_t *)(segfault_ctx))->uc_mcontext p) -template -void bicopy(Ta& rei, Tb& seg, bool to_segfault) { - if (to_segfault) { - seg = rei; - } - else { - rei = seg; - } +template +static void bicopy(Tctx& ctx, Tseg& seg) +{ + static_assert(sizeof(Tctx) == sizeof(Tseg), "Invalid assignment"); + if (ToSegfault) + seg = (Tseg)ctx; + else + ctx = (Tctx)seg; } -void context_segfault(rei_host_context_t* reictx, void* segfault_ctx, bool to_segfault) { - +template +static void context_segfault(host_context_t* hostctx, void* segfault_ctx) +{ #if !defined(TARGET_NO_EXCEPTIONS) #if HOST_CPU == CPU_ARM #if defined(__FreeBSD__) - bicopy(reictx->pc, MCTX(.__gregs[_REG_PC]), to_segfault); + bicopy(hostctx->pc, MCTX(.__gregs[_REG_PC])); for (int i = 0; i < 15; i++) - bicopy(reictx->r[i], MCTX(.__gregs[i]), to_segfault); + bicopy(hostctx->reg[i], MCTX(.__gregs[i])); #elif HOST_OS == OS_LINUX - bicopy(reictx->pc, MCTX(.arm_pc), to_segfault); - u32* r =(u32*) &MCTX(.arm_r0); + bicopy(hostctx->pc, MCTX(.arm_pc)); + u32* reg =(u32*) &MCTX(.arm_r0); for (int i = 0; i < 15; i++) - bicopy(reictx->r[i], r[i], to_segfault); + bicopy(hostctx->reg[i], reg[i]); #elif defined(__APPLE__) - bicopy(reictx->pc, MCTX(->__ss.__pc), to_segfault); + bicopy(hostctx->pc, MCTX(->__ss.__pc)); for (int i = 0; i < 15; i++) - bicopy(reictx->r[i], MCTX(->__ss.__r[i]), to_segfault); + bicopy(hostctx->reg[i], MCTX(->__ss.__r[i])); #else #error HOST_OS #endif #elif HOST_CPU == CPU_ARM64 - bicopy(reictx->pc, MCTX(.pc), to_segfault); - bicopy(reictx->x2, MCTX(.regs[2]), to_segfault); + bicopy(hostctx->pc, MCTX(.pc)); + bicopy(hostctx->x2, MCTX(.regs[2])); #elif HOST_CPU == CPU_X86 #if defined(__FreeBSD__) - bicopy(reictx->pc, MCTX(.mc_eip), to_segfault); - bicopy(reictx->esp, MCTX(.mc_esp), to_segfault); - bicopy(reictx->eax, MCTX(.mc_eax), to_segfault); - bicopy(reictx->ecx, MCTX(.mc_ecx), to_segfault); + bicopy(hostctx->pc, MCTX(.mc_eip)); + bicopy(hostctx->esp, MCTX(.mc_esp)); + bicopy(hostctx->eax, MCTX(.mc_eax)); + bicopy(hostctx->ecx, MCTX(.mc_ecx)); #elif HOST_OS == OS_LINUX - bicopy(reictx->pc, MCTX(.gregs[REG_EIP]), to_segfault); - bicopy(reictx->esp, MCTX(.gregs[REG_ESP]), to_segfault); - bicopy(reictx->eax, MCTX(.gregs[REG_EAX]), to_segfault); - bicopy(reictx->ecx, MCTX(.gregs[REG_ECX]), to_segfault); + bicopy(hostctx->pc, MCTX(.gregs[REG_EIP])); + bicopy(hostctx->esp, MCTX(.gregs[REG_ESP])); + bicopy(hostctx->eax, MCTX(.gregs[REG_EAX])); + bicopy(hostctx->ecx, MCTX(.gregs[REG_ECX])); #elif defined(__APPLE__) - bicopy(reictx->pc, MCTX(->__ss.__eip), to_segfault); - bicopy(reictx->esp, MCTX(->__ss.__esp), to_segfault); - bicopy(reictx->eax, MCTX(->__ss.__eax), to_segfault); - bicopy(reictx->ecx, MCTX(->__ss.__ecx), to_segfault); + bicopy(hostctx->pc, MCTX(->__ss.__eip)); + bicopy(hostctx->esp, MCTX(->__ss.__esp)); + bicopy(hostctx->eax, MCTX(->__ss.__eax)); + bicopy(hostctx->ecx, MCTX(->__ss.__ecx)); #else #error HOST_OS #endif #elif HOST_CPU == CPU_X64 #if defined(__FreeBSD__) || defined(__DragonFly__) - bicopy(reictx->pc, MCTX(.mc_rip), to_segfault); + bicopy(hostctx->pc, MCTX(.mc_rip)); #elif defined(__NetBSD__) - bicopy(reictx->pc, MCTX(.__gregs[_REG_RIP]), to_segfault); + bicopy(hostctx->pc, MCTX(.__gregs[_REG_RIP])); + bicopy(hostctx->rsp, MCTX(.__gregs[REG_RSP])); + bicopy(hostctx->r9, MCTX(.__gregs[REG_R9])); + bicopy(hostctx->rdi, MCTX(.__gregs[REG_RDI])); #elif HOST_OS == OS_LINUX - bicopy(reictx->pc, MCTX(.gregs[REG_RIP]), to_segfault); + bicopy(hostctx->pc, MCTX(.gregs[REG_RIP])); + bicopy(hostctx->rsp, MCTX(.gregs[REG_RSP])); + bicopy(hostctx->r9, MCTX(.gregs[REG_R9])); + bicopy(hostctx->rdi, MCTX(.gregs[REG_RDI])); #elif defined(__APPLE__) - bicopy(reictx->pc, MCTX(->__ss.__rip), to_segfault); + bicopy(hostctx->pc, MCTX(->__ss.__rip)); + bicopy(hostctx->rsp, MCTX(->__ss.__rsp)); + bicopy(hostctx->r9, MCTX(->__ss.__r9)); + bicopy(hostctx->rdi, MCTX(->__ss.__rdi)); #else #error HOST_OS #endif #elif HOST_CPU == CPU_MIPS - bicopy(reictx->pc, MCTX(.pc), to_segfault); + bicopy(hostctx->pc, MCTX(.pc)); #elif HOST_CPU == CPU_GENERIC //nothing! #else @@ -96,10 +106,10 @@ void context_segfault(rei_host_context_t* reictx, void* segfault_ctx, bool to_se } -void context_from_segfault(rei_host_context_t* reictx, void* segfault_ctx) { - context_segfault(reictx, segfault_ctx, false); +void context_from_segfault(host_context_t* hostctx, void* segfault_ctx) { + context_segfault(hostctx, segfault_ctx); } -void context_to_segfault(rei_host_context_t* reictx, void* segfault_ctx) { - context_segfault(reictx, segfault_ctx, true); +void context_to_segfault(host_context_t* hostctx, void* segfault_ctx) { + context_segfault(hostctx, segfault_ctx); } diff --git a/core/linux/context.h b/core/oslib/host_context.h similarity index 51% rename from core/linux/context.h rename to core/oslib/host_context.h index bcfceaa90..5a869309a 100644 --- a/core/linux/context.h +++ b/core/oslib/host_context.h @@ -1,9 +1,7 @@ #pragma once - #include "types.h" - -struct rei_host_context_t { +struct host_context_t { #if HOST_CPU != CPU_GENERIC unat pc; #endif @@ -12,12 +10,17 @@ struct rei_host_context_t { u32 eax; u32 ecx; u32 esp; +#elif HOST_CPU == CPU_X64 + u64 rsp; + u64 r9; +#ifdef _WIN32 + u64 rcx; +#else + u64 rdi; +#endif #elif HOST_CPU == CPU_ARM - u32 r[15]; + u32 reg[15]; #elif HOST_CPU == CPU_ARM64 u64 x2; #endif }; - -void context_from_segfault(rei_host_context_t* reictx, void* segfault_ctx); -void context_to_segfault(rei_host_context_t* reictx, void* segfault_ctx); diff --git a/core/rec-ARM/rec_arm.cpp b/core/rec-ARM/rec_arm.cpp index 335e13576..8d442e1f9 100644 --- a/core/rec-ARM/rec_arm.cpp +++ b/core/rec-ARM/rec_arm.cpp @@ -747,9 +747,10 @@ void vmem_slowpath(eReg raddr, eReg rt, eFSReg ft, eFDReg fd, mem_op_type optp, } } -u32* ngen_readm_fail_v2(u32* ptrv,u32* regs,u32 fault_addr) +bool ngen_Rewrite(host_context_t &context, void *faultAddress) { - arm_mem_op* ptr=(arm_mem_op*)ptrv; + u32 *regs = context.reg; + arm_mem_op *ptr = (arm_mem_op *)context.pc; static_assert(sizeof(*ptr) == 4, "sizeof(arm_mem_op) == 4"); @@ -804,7 +805,7 @@ u32* ngen_readm_fail_v2(u32* ptrv,u32* regs,u32 fault_addr) //get some other relevant data u32 sh4_addr=regs[raddr]; - u32 fault_offs=fault_addr-regs[8]; + u32 fault_offs = (uintptr_t)faultAddress - regs[8]; u8* sh4_ctr=(u8*)regs[8]; bool is_sq=(sh4_addr>>26)==0x38; @@ -900,9 +901,10 @@ u32* ngen_readm_fail_v2(u32* ptrv,u32* regs,u32 fault_addr) vmem_platform_flush_cache((void*)ptr, (u8*)emit_ptr - 1, (void*)ptr, (u8*)emit_ptr - 1); - emit_ptr=0; + emit_ptr = 0; + context.pc = (size_t)ptr; - return (u32*)ptr; + return true; } EAPI NEG(eReg Rd, eReg Rs) diff --git a/core/rec-ARM64/rec_arm64.cpp b/core/rec-ARM64/rec_arm64.cpp index e40204d59..fa9298ffc 100644 --- a/core/rec-ARM64/rec_arm64.cpp +++ b/core/rec-ARM64/rec_arm64.cpp @@ -2228,10 +2228,10 @@ static const u32 op_sizes[] = { 4, 8, }; -bool ngen_Rewrite(unat& host_pc, unat, unat) +bool ngen_Rewrite(host_context_t &context, void *faultAddress) { - //LOGI("ngen_Rewrite pc %zx\n", host_pc); - u32 *code_ptr = (u32 *)CC_RX2RW(host_pc); + //LOGI("ngen_Rewrite pc %zx\n", context.pc); + u32 *code_ptr = (u32 *)CC_RX2RW(context.pc); u32 armv8_op = *code_ptr; bool is_read; u32 size; @@ -2264,7 +2264,7 @@ bool ngen_Rewrite(unat& host_pc, unat, unat) } assembler->Finalize(true); delete assembler; - host_pc = (unat)CC_RW2RX(code_rewrite); + context.pc = (unat)CC_RW2RX(code_rewrite); return true; } @@ -2287,8 +2287,9 @@ RuntimeBlockInfo* ngen_AllocateBlock() return new DynaRBI(); } -void ngen_HandleException() +void ngen_HandleException(host_context_t &context) { + // TODO longjmp(jmp_env, 1); } diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index 975e8cb9f..4b638eb75 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -1956,7 +1956,7 @@ void ngen_ResetBlocks() */ } -void ngen_HandleException() +void ngen_HandleException(host_context_t &context) { die("rec-cpp exceptions not supported"); } diff --git a/core/rec-x64/rec_x64.cpp b/core/rec-x64/rec_x64.cpp index 7fbab8828..22f823d42 100644 --- a/core/rec-x64/rec_x64.cpp +++ b/core/rec-x64/rec_x64.cpp @@ -1,7 +1,6 @@ #include "build.h" #if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64 -#include //#define CANONICAL_TEST @@ -37,12 +36,13 @@ struct DynaRBI : RuntimeBlockInfo static int cycle_counter; static void (*mainloop)(); +static void (*handleException)(); u32 mem_writes, mem_reads; u32 mem_rewrites_w, mem_rewrites_r; -static jmp_buf jmp_env; static u32 exception_raised; +static u64 jmp_rsp; namespace MemSize { enum { @@ -73,6 +73,7 @@ static const u8 *MemHandlerStart, *MemHandlerEnd; void ngen_mainloop(void *) { + verify(mainloop != nullptr); try { mainloop(); } catch (const SH4ThrownException&) { @@ -106,13 +107,9 @@ static void handle_mem_exception(u32 exception_raised, u32 pc) { if (exception_raised) { - if (pc & 1) - // Delay slot - spc = pc - 1; - else - spc = pc; + spc = pc; cycle_counter += 2; // probably more is needed but no easy way to find out - longjmp(jmp_env, 1); + handleException(); } } @@ -150,7 +147,7 @@ static void handle_sh4_exception(SH4ThrownException& ex, u32 pc) } Do_Exception(pc, ex.expEvn, ex.callVect); cycle_counter += 4; // probably more is needed - longjmp(jmp_env, 1); + handleException(); } static void interpreter_fallback(u16 op, OpCallFP *oph, u32 pc) @@ -291,7 +288,7 @@ public: int size = op.flags & 0x7f; if (mmu_enabled()) - mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc + mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc size = size == 1 ? MemSize::S8 : size == 2 ? MemSize::S16 : size == 4 ? MemSize::S32 : MemSize::S64; GenCall((void (*)())MemHandlers[optimise ? MemType::Fast : MemType::Slow][size][MemOp::R], mmu_enabled()); @@ -331,7 +328,7 @@ public: } if (mmu_enabled()) - mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc + mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc size = size == 1 ? MemSize::S8 : size == 2 ? MemSize::S16 : size == 4 ? MemSize::S32 : MemSize::S64; GenCall((void (*)())MemHandlers[optimise ? MemType::Fast : MemType::Slow][size][MemOp::W], mmu_enabled()); } @@ -670,16 +667,7 @@ public: #endif mov(dword[rip + &cycle_counter], SH4_TIMESLICE); - - lea(call_regs64[0], qword[rip + &jmp_env]); -#ifdef _WIN32 - xor_(call_regs64[1], call_regs64[1]); // no frame pointer -#endif -#ifdef _MSC_VER - // FIXME call((const void *)_setjmp); -#else - call((const void *)_setjmp); -#endif + mov(qword[rip + &jmp_rsp], rsp); //run_loop: Xbyak::Label run_loop; @@ -726,29 +714,37 @@ public: pop(rbx); ret(); + //handleException: + Xbyak::Label handleExceptionLabel; + L(handleExceptionLabel); + mov(rsp, qword[rip + &jmp_rsp]); + jmp(run_loop); + genMemHandlers(); ready(); mainloop = (void (*)())getCode(); + handleException = (void(*)())handleExceptionLabel.getAddress(); emit_Skip(getSize()); } - bool rewriteMemAccess(size_t& host_pc, size_t retadr, size_t accessedAddress) + bool rewriteMemAccess(host_context_t &context) { if (!_nvmem_enabled() || (mmu_enabled() && !vmem32_enabled())) return false; - //printf("ngen_Rewrite pc %p\n", host_pc); - if (host_pc < (size_t)MemHandlerStart || host_pc >= (size_t)MemHandlerEnd) + //printf("ngen_Rewrite pc %p\n", context.pc); + if (context.pc < (size_t)MemHandlerStart || context.pc >= (size_t)MemHandlerEnd) return false; - size_t ca = *(s32 *)(retadr - 4) + retadr; + u8 *retAddr = *(u8 **)context.rsp; + void *ca = *(s32 *)(retAddr - 4) + retAddr; for (int size = 0; size < MemSize::Count; size++) { for (int op = 0; op < MemOp::Count; op++) { - if ((size_t)MemHandlers[MemType::Fast][size][op] != ca) + if ((void *)MemHandlers[MemType::Fast][size][op] != ca) continue; //found ! @@ -758,12 +754,21 @@ public: ready(); - host_pc = retadr - 5; + context.pc = (uintptr_t)(retAddr - 5); + // remove the call from the stack + context.rsp += 8; + if (!_nvmem_4gb_space()) + //restore the addr from r9 to arg0 (rcx or rdi) so it's valid again +#ifdef _WIN32 + context.rcx = context.r9; +#else + context.rdi = context.r9; +#endif return true; } } - ERROR_LOG(DYNAREC, "rewriteMemAccess code not found: hpc %08x retadr %08x acc %08x", host_pc, retadr, accessedAddress); + ERROR_LOG(DYNAREC, "rewriteMemAccess code not found: host pc %p", (void *)context.pc); die("Failed to match the code"); return false; @@ -1090,9 +1095,11 @@ private: mov(dword[rax], call_regs[2]); } mov(rax, (uintptr_t)virt_ram_base); - mov(r9, call_regs64[0]); if (!_nvmem_4gb_space()) + { + mov(r9, call_regs64[0]); and_(call_regs[0], 0x1FFFFFFF); + } switch (size) { case MemSize::S8: @@ -1133,18 +1140,22 @@ private: mov(call_regs[1], call_regs[2]); switch (size) { case MemSize::S8: + sub(rsp, 8); if (mmu_enabled()) call((const void *)ReadMemNoEx); else call((const void *)ReadMem8); movsx(eax, al); + add(rsp, 8); break; case MemSize::S16: + sub(rsp, 8); if (mmu_enabled()) call((const void *)ReadMemNoEx); else call((const void *)ReadMem16); movsx(eax, ax); + add(rsp, 8); break; case MemSize::S32: if (mmu_enabled()) @@ -1253,7 +1264,7 @@ private: if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped) { u32 stack_size = 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped); - int offset = stack_size; + int offset = stack_size - 4; stack_size = (((stack_size + 15) >> 4) << 4); // Stack needs to be 16-byte aligned before the call if (xmm11_mapped) { @@ -1321,55 +1332,57 @@ void X64RegAlloc::Writeback_FPU(u32 reg, s8 nreg) compiler->RegWriteback_FPU(reg, nreg); } -static BlockCompiler* compiler; +static BlockCompiler* ccCompiler; void ngen_Compile(RuntimeBlockInfo* block, bool smc_checks, bool reset, bool staging, bool optimise) { verify(emit_FreeSpace() >= 16 * 1024); - compiler = new BlockCompiler(); + BlockCompiler compiler; + ::ccCompiler = &compiler; try { - compiler->compile(block, smc_checks, reset, staging, optimise); + compiler.compile(block, smc_checks, reset, staging, optimise); } catch (const Xbyak::Error& e) { ERROR_LOG(DYNAREC, "Fatal xbyak error: %s", e.what()); } - - delete compiler; + ::ccCompiler = nullptr; } void ngen_CC_Start(shil_opcode* op) { - compiler->ngen_CC_Start(*op); + ccCompiler->ngen_CC_Start(*op); } void ngen_CC_Param(shil_opcode* op, shil_param* par, CanonicalParamType tp) { - compiler->ngen_CC_param(*op, *par, tp); + ccCompiler->ngen_CC_param(*op, *par, tp); } void ngen_CC_Call(shil_opcode* op, void* function) { - compiler->ngen_CC_Call(*op, function); + ccCompiler->ngen_CC_Call(*op, function); } void ngen_CC_Finish(shil_opcode* op) { } -bool ngen_Rewrite(size_t& host_pc, size_t retadr, size_t acc) +bool ngen_Rewrite(host_context_t &context, void *faultAddress) { - std::unique_ptr compiler(new BlockCompiler((u8*)(retadr - 5))); + u8 *retAddr = *(u8 **)context.rsp - 5; + BlockCompiler compiler(retAddr); try { - return compiler->rewriteMemAccess(host_pc, retadr, acc); + return compiler.rewriteMemAccess(context); } catch (const Xbyak::Error& e) { ERROR_LOG(DYNAREC, "Fatal xbyak error: %s", e.what()); return false; } } -void ngen_HandleException() +void ngen_HandleException(host_context_t &context) { - longjmp(jmp_env, 1); + context.pc = (uintptr_t)handleException; + context.rsp = jmp_rsp; } void ngen_ResetBlocks() @@ -1378,9 +1391,9 @@ void ngen_ResetBlocks() if (mainloop != nullptr && mainloop != emit_GetCCPtr()) return; - std::unique_ptr compiler(new BlockCompiler()); + BlockCompiler compiler; try { - compiler->genMainloop(); + compiler.genMainloop(); } catch (const Xbyak::Error& e) { ERROR_LOG(DYNAREC, "Fatal xbyak error: %s", e.what()); } diff --git a/core/rec-x86/rec_x86.cpp b/core/rec-x86/rec_x86.cpp index 85f8137bc..9405607c2 100644 --- a/core/rec-x86/rec_x86.cpp +++ b/core/rec-x86/rec_x86.cpp @@ -752,10 +752,11 @@ void ngen_Compile(RuntimeBlockInfo* block, bool smc_checks, bool, bool, bool opt delete compiler; } -bool ngen_Rewrite(size_t& host_pc, size_t addr, size_t acc) +bool ngen_Rewrite(host_context_t &context, void *faultAddress) { - X86Compiler *compiler = new X86Compiler((u8*)(addr - 5)); - bool rv = compiler->rewriteMemAccess(host_pc, addr, acc); + u8 *rewriteAddr = *(u8 **)context.esp - 5; + X86Compiler *compiler = new X86Compiler(rewriteAddr); + bool rv = compiler->rewriteMemAccess(context); delete compiler; return rv; diff --git a/core/rec-x86/rec_x86.h b/core/rec-x86/rec_x86.h index 2a6fc982c..42f47ee29 100644 --- a/core/rec-x86/rec_x86.h +++ b/core/rec-x86/rec_x86.h @@ -75,7 +75,7 @@ public: void genMainloop(); u32 relinkBlock(RuntimeBlockInfo *block); - bool rewriteMemAccess(size_t& host_pc, size_t retadr, size_t acc); + bool rewriteMemAccess(host_context_t &context); private: void genOpcode(RuntimeBlockInfo *block, bool optimise, shil_opcode& op); diff --git a/core/rec-x86/x86_ops.cpp b/core/rec-x86/x86_ops.cpp index d76885a8a..2c5d20e53 100644 --- a/core/rec-x86/x86_ops.cpp +++ b/core/rec-x86/x86_ops.cpp @@ -422,19 +422,20 @@ void X86Compiler::genOpcode(RuntimeBlockInfo* block, bool optimise, shil_opcode& } } -bool X86Compiler::rewriteMemAccess(size_t& host_pc, size_t retadr, size_t acc) +bool X86Compiler::rewriteMemAccess(host_context_t &context) { - //DEBUG_LOG(DYNAREC, "rewriteMemAccess hpc %08x retadr %08x", host_pc, retadr); - if (host_pc < (size_t)MemHandlerStart || host_pc >= (size_t)MemHandlerEnd) + u8 *retAddr = *(u8 **)context.esp; + //DEBUG_LOG(DYNAREC, "rewriteMemAccess hpc %08x retadr %08x", context.pc, (size_t)retAddr); + if (context.pc < (size_t)MemHandlerStart || context.pc >= (size_t)MemHandlerEnd) return false; - u32 ca = *(u32 *)(retadr - 4) + retadr; + void *ca = *(u32 *)(retAddr - 4) + retAddr; for (int size = 0; size < MemOp::SizeCount; size++) { for (int op = 0; op < MemOp::OpCount; op++) { - if ((u32)MemHandlers[MemOp::Fast][size][op] != ca) + if ((void *)MemHandlers[MemOp::Fast][size][op] != ca) continue; //found ! @@ -444,12 +445,16 @@ bool X86Compiler::rewriteMemAccess(size_t& host_pc, size_t retadr, size_t acc) ready(); - host_pc = retadr - 5; + context.pc = (size_t)(retAddr - 5); + //remove the call from call stack + context.esp += 4; + //restore the addr from eax to ecx so it's valid again + context.ecx = context.eax; return true; } } - ERROR_LOG(DYNAREC, "rewriteMemAccess code not found: hpc %08x retadr %08x acc %08x", host_pc, retadr, acc); + ERROR_LOG(DYNAREC, "rewriteMemAccess code not found: hpc %08x retadr %p acc %08x", context.pc, retAddr, context.eax); die("Failed to match the code"); return false; diff --git a/core/windows/winmain.cpp b/core/windows/winmain.cpp index c7943d28a..0b0cd6421 100644 --- a/core/windows/winmain.cpp +++ b/core/windows/winmain.cpp @@ -15,6 +15,8 @@ #include "hw/maple/maple_devs.h" #include "emulator.h" #include "rend/mainui.h" +#include "hw/sh4/dyna/ngen.h" +#include "oslib/host_context.h" #include #include @@ -114,7 +116,6 @@ PCHAR* } bool VramLockedWrite(u8* address); -bool ngen_Rewrite(unat& addr,unat retadr,unat acc); bool BM_LockedWrite(u8* address); static std::shared_ptr kb_gamepad; @@ -133,59 +134,75 @@ void os_SetupInput() #endif } -LONG ExeptionHandler(EXCEPTION_POINTERS *ExceptionInfo) +static void readContext(const EXCEPTION_POINTERS *ep, host_context_t &context) { - EXCEPTION_POINTERS* ep = ExceptionInfo; +#if HOST_CPU == CPU_X86 + context.pc = ep->ContextRecord->Eip; + context.esp = ep->ContextRecord->Esp; + context.eax = ep->ContextRecord->Eax; + context.ecx = ep->ContextRecord->Ecx; +#elif HOST_CPU == CPU_X64 + context.pc = ep->ContextRecord->Rip; + context.rsp = ep->ContextRecord->Rsp; + context.r9 = ep->ContextRecord->R9; + context.rcx = ep->ContextRecord->Rcx; +#endif +} +static void writeContext(EXCEPTION_POINTERS *ep, const host_context_t &context) +{ +#if HOST_CPU == CPU_X86 + ep->ContextRecord->Eip = context.pc; + ep->ContextRecord->Esp = context.esp; + ep->ContextRecord->Eax = context.eax; + ep->ContextRecord->Ecx = context.ecx; +#elif HOST_CPU == CPU_X64 + ep->ContextRecord->Rip = context.pc; + ep->ContextRecord->Rsp = context.rsp; + ep->ContextRecord->R9 = context.r9; + ep->ContextRecord->Rcx = context.rcx; +#endif +} +LONG ExeptionHandler(EXCEPTION_POINTERS *ep) +{ u32 dwCode = ep->ExceptionRecord->ExceptionCode; - EXCEPTION_RECORD* pExceptionRecord=ep->ExceptionRecord; - if (dwCode != EXCEPTION_ACCESS_VIOLATION) return EXCEPTION_CONTINUE_SEARCH; - u8* address=(u8*)pExceptionRecord->ExceptionInformation[1]; + EXCEPTION_RECORD* pExceptionRecord = ep->ExceptionRecord; + u8* address = (u8 *)pExceptionRecord->ExceptionInformation[1]; //printf("[EXC] During access to : 0x%X\n", address); #if 0 - bool write = false; // TODO? + // WinCE virtual memory + bool write = false; if (vmem32_handle_signal(address, write, 0)) return EXCEPTION_CONTINUE_EXECUTION; #endif + // code protection in RAM if (bm_RamWriteAccess(address)) - { return EXCEPTION_CONTINUE_EXECUTION; - } - else if (VramLockedWrite(address)) - { + // texture protection in VRAM + if (VramLockedWrite(address)) return EXCEPTION_CONTINUE_EXECUTION; - } - else if (BM_LockedWrite(address)) - { + // FPCB jump table protection + if (BM_LockedWrite(address)) return EXCEPTION_CONTINUE_EXECUTION; - } + + host_context_t context; + readContext(ep, context); #if FEAT_SHREC == DYNAREC_JIT -#if HOST_CPU == CPU_X86 - else if ( ngen_Rewrite((unat&)ep->ContextRecord->Eip,*(unat*)ep->ContextRecord->Esp,ep->ContextRecord->Eax) ) - { - //remove the call from call stack - ep->ContextRecord->Esp+=4; - //restore the addr from eax to ecx so its valid again - ep->ContextRecord->Ecx=ep->ContextRecord->Eax; - return EXCEPTION_CONTINUE_EXECUTION; - } -#elif HOST_CPU == CPU_X64 - else if (ngen_Rewrite((unat&)ep->ContextRecord->Rip, 0, 0)) - { - return EXCEPTION_CONTINUE_EXECUTION; - } -#endif -#endif - else + // fast mem access rewriting + if (ngen_Rewrite(context, address)) { - ERROR_LOG(COMMON, "[GPF]Unhandled access to : %p", address); - os_DebugBreak(); + writeContext(ep, context); + return EXCEPTION_CONTINUE_EXECUTION; } +#endif + + ERROR_LOG(COMMON, "[GPF] PC %p unhandled access to %p", (void *)context.pc, address); + os_DebugBreak(); return EXCEPTION_CONTINUE_SEARCH; } @@ -592,7 +609,6 @@ void ReserveBottomMemory() } #ifdef _WIN64 -#include "hw/sh4/dyna/ngen.h" typedef union _UNWIND_CODE { struct { diff --git a/shell/apple/emulator-osx/reicast-osx.xcodeproj/project.pbxproj b/shell/apple/emulator-osx/reicast-osx.xcodeproj/project.pbxproj index e7eb04a2c..9fd5122f9 100644 --- a/shell/apple/emulator-osx/reicast-osx.xcodeproj/project.pbxproj +++ b/shell/apple/emulator-osx/reicast-osx.xcodeproj/project.pbxproj @@ -568,7 +568,6 @@ 84B7BE631B72720100F9733F /* khrplatform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = khrplatform.h; sourceTree = ""; }; 84B7BE651B72720100F9733F /* common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = common.cpp; sourceTree = ""; }; 84B7BE661B72720100F9733F /* context.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = context.cpp; sourceTree = ""; }; - 84B7BE671B72720100F9733F /* context.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = context.h; sourceTree = ""; }; 84B7BE6E1B72720200F9733F /* nullDC.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = nullDC.cpp; path = ../../../core/nullDC.cpp; sourceTree = ""; }; 84B7BE701B72720200F9733F /* audiobackend_alsa.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = audiobackend_alsa.cpp; sourceTree = ""; }; 84B7BE711B72720200F9733F /* audiobackend_alsa.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = audiobackend_alsa.h; sourceTree = ""; }; @@ -1805,7 +1804,6 @@ children = ( 84B7BE651B72720100F9733F /* common.cpp */, 84B7BE661B72720100F9733F /* context.cpp */, - 84B7BE671B72720100F9733F /* context.h */, AEF2564722886A2E00348550 /* posix_vmem.cpp */, ); name = linux;