diff --git a/core/linux/unwind_info.cpp b/core/linux/unwind_info.cpp index 1796c4941..20caac489 100644 --- a/core/linux/unwind_info.cpp +++ b/core/linux/unwind_info.cpp @@ -55,6 +55,14 @@ constexpr int dwarfRegRAId = 16; constexpr int dwarfRegXmmId = 17; constexpr int dwarfRegSP = dwarfRegId[4]; // RSP +#elif HOST_CPU == CPU_X86 + +inline static int registerId(int x86Id) { + return x86Id; +} +constexpr int dwarfRegRAId = 8; +constexpr int dwarfRegSP = 4; // ESP + #elif HOST_CPU == CPU_ARM64 // https://developer.arm.com/documentation/ihi0057/latest // @@ -71,7 +79,7 @@ constexpr int dwarfRegSP = 31; #endif -#if HOST_CPU == CPU_X64 || HOST_CPU == CPU_ARM64 +#if HOST_CPU == CPU_X64 || HOST_CPU == CPU_ARM64 || HOST_CPU == CPU_X86 using ByteStream = std::vector; @@ -120,10 +128,10 @@ static void writeSLEB128(ByteStream &stream, int32_t v) static void writePadding(ByteStream &stream) { - int padding = stream.size() % 8; + int padding = stream.size() % sizeof(uintptr_t); if (padding != 0) { - padding = 8 - padding; + padding = sizeof(uintptr_t) - padding; for (int i = 0; i < padding; i++) write(stream, 0); } @@ -160,8 +168,8 @@ static void writeFDE(ByteStream &stream, const ByteStream &fdeInstructions, u32 write(stream, offsetToCIE); functionStart = stream.size(); - write(stream, 0); // func start - write(stream, 0); // func size + write(stream, 0); // func start + write(stream, 0); // func size writeULEB128(stream, 0); // LEB128 augmentation size @@ -171,9 +179,9 @@ static void writeFDE(ByteStream &stream, const ByteStream &fdeInstructions, u32 writeLength(stream, lengthPos, stream.size() - lengthPos - 4); } -static void writeAdvanceLoc(ByteStream &fdeInstructions, u64 offset, u64 &lastOffset) +static void writeAdvanceLoc(ByteStream &fdeInstructions, uintptr_t offset, uintptr_t &lastOffset) { - u64 delta = offset - lastOffset; + uintptr_t delta = offset - lastOffset; if (delta == 0) return; if (delta < (1 << 6)) @@ -227,8 +235,8 @@ static void writeRegisterStackLocationExtended(ByteStream &instructions, int dwa void UnwindInfo::start(void *address) { startAddr = (u8 *)address; -#if HOST_CPU == CPU_X64 - stackOffset = 8; +#if HOST_CPU == CPU_X64 || HOST_CPU == CPU_X86 + stackOffset = sizeof(uintptr_t); #else stackOffset = 0; #endif @@ -243,7 +251,7 @@ void UnwindInfo::start(void *address) void UnwindInfo::pushReg(u32 offset, int reg) { - stackOffset += 8; + stackOffset += sizeof(uintptr_t); writeAdvanceLoc(fdeInstructions, offset, lastOffset); writeDefineStackOffset(fdeInstructions, stackOffset); writeRegisterStackLocation(fdeInstructions, registerId(reg), stackOffset); @@ -289,7 +297,7 @@ size_t UnwindInfo::end(u32 offset, ptrdiff_t rwRxOffset) if (!unwindInfo.empty()) { - u64 *unwindfuncaddr = (u64 *)(unwindInfoDest + functionStart); + uintptr_t *unwindfuncaddr = (uintptr_t *)(unwindInfoDest + functionStart); unwindfuncaddr[0] = (uintptr_t)startAddr + rwRxOffset; unwindfuncaddr[1] = (ptrdiff_t)(endAddr - startAddr); diff --git a/core/oslib/oslib.h b/core/oslib/oslib.h index 8f0fe7ad7..ea48bbfc0 100644 --- a/core/oslib/oslib.h +++ b/core/oslib/oslib.h @@ -76,14 +76,14 @@ private: #endif #if defined(__unix__) || defined(__APPLE__) || defined(__SWITCH__) int stackOffset = 0; - u64 lastOffset = 0; + uintptr_t lastOffset = 0; std::vector cieInstructions; std::vector fdeInstructions; std::vector registeredFrames; #endif }; -#if HOST_CPU != CPU_X64 && HOST_CPU != CPU_ARM64 +#if HOST_CPU != CPU_X64 && HOST_CPU != CPU_ARM64 && (HOST_CPU != CPU_X86 || defined(_WIN32)) inline void UnwindInfo::start(void *address) { } inline void UnwindInfo::pushReg(u32 offset, int reg) { diff --git a/core/rec-x86/rec_x86.cpp b/core/rec-x86/rec_x86.cpp index bfa8a9c04..111bb5cc8 100644 --- a/core/rec-x86/rec_x86.cpp +++ b/core/rec-x86/rec_x86.cpp @@ -26,6 +26,7 @@ #include "hw/sh4/sh4_interrupts.h" #include "hw/sh4/sh4_mem.h" #include "hw/mem/_vmem.h" +#include "oslib/oslib.h" static int cycle_counter; static void (*mainloop)(); @@ -43,6 +44,7 @@ static X86Compiler* compiler; static Xbyak::Operand::Code alloc_regs[] { Xbyak::Operand::EBX, Xbyak::Operand::EBP, Xbyak::Operand::ESI, Xbyak::Operand::EDI, (Xbyak::Operand::Code)-1 }; static s8 alloc_fregs[] = { 7, 6, 5, 4, -1 }; alignas(16) static f32 thaw_regs[4]; +static UnwindInfo unwinder; void X86RegAlloc::doAlloc(RuntimeBlockInfo* block) { @@ -359,14 +361,21 @@ void X86Compiler::thawXMM() void X86Compiler::genMainloop() { + unwinder.start((void *)getCurr()); push(esi); + unwinder.pushReg(getSize(), Xbyak::Operand::ESI); push(edi); + unwinder.pushReg(getSize(), Xbyak::Operand::EDI); push(ebp); + unwinder.pushReg(getSize(), Xbyak::Operand::EBP); push(ebx); + unwinder.pushReg(getSize(), Xbyak::Operand::EBX); #ifndef _WIN32 // 16-byte alignment sub(esp, 12); + unwinder.allocStack(getSize(), 12); #endif + unwinder.endProlog(getSize()); mov(ecx, dword[&Sh4cntx.pc]); @@ -374,7 +383,6 @@ void X86Compiler::genMainloop() mov(eax, 0); //next_pc _MUST_ be on ecx - Xbyak::Label do_iter; Xbyak::Label cleanup; //no_update: Xbyak::Label no_updateLabel; @@ -384,25 +392,6 @@ void X86Compiler::genMainloop() and_(ecx, RAM_SIZE_MAX - 2); jmp(dword[eax + ecx * 2]); -//intc_sched: - Xbyak::Label intc_schedLabel; - L(intc_schedLabel); - add(dword[&cycle_counter], SH4_TIMESLICE); - call((void *)UpdateSystem); - cmp(eax, 0); - jnz(do_iter); - ret(); - -//do_iter: - L(do_iter); - pop(ecx); - call((void *)rdv_DoInterrupts); - mov(ecx, eax); - mov(edx, dword[&Sh4cntx.CpuRunning]); - cmp(edx, 0); - jz(cleanup); - jmp(no_updateLabel); - //cleanup: L(cleanup); #ifndef _WIN32 @@ -416,6 +405,17 @@ void X86Compiler::genMainloop() ret(); +//do_iter: + Xbyak::Label do_iter; + L(do_iter); + pop(ecx); + call((void *)rdv_DoInterrupts); + mov(ecx, eax); + mov(edx, dword[&Sh4cntx.CpuRunning]); + cmp(edx, 0); + jz(cleanup); + jmp(no_updateLabel); + //ngen_LinkBlock_Shared_stub: Xbyak::Label ngen_LinkBlock_Shared_stub; L(ngen_LinkBlock_Shared_stub); @@ -424,6 +424,23 @@ void X86Compiler::genMainloop() call((void *)rdv_LinkBlock); jmp(eax); + size_t unwindSize = unwinder.end(getSize()); + setSize(getSize() + unwindSize); + + // Functions called by blocks + +//intc_sched: + unwinder.start((void *)getCurr()); + size_t startOffset = getSize(); + unwinder.endProlog(0); + Xbyak::Label intc_schedLabel; + L(intc_schedLabel); + add(dword[&cycle_counter], SH4_TIMESLICE); + call((void *)UpdateSystem); + cmp(eax, 0); + jnz(do_iter); + ret(); + //ngen_LinkBlock_cond_Next_stub: Xbyak::Label ngen_LinkBlock_cond_Next_label; L(ngen_LinkBlock_cond_Next_label); @@ -442,6 +459,25 @@ void X86Compiler::genMainloop() mov(edx, dword[&Sh4cntx.jdyn]); jmp(ngen_LinkBlock_Shared_stub); + genMemHandlers(); + + unwindSize = unwinder.end(getSize() - startOffset); + setSize(getSize() + unwindSize); + + // The following code and all code blocks use the same stack frame as mainloop() + // (direct jump from there or from a block) + unwinder.start((void *)getCurr()); + startOffset = getSize(); + unwinder.pushReg(0, Xbyak::Operand::ESI); + unwinder.pushReg(0, Xbyak::Operand::EDI); + unwinder.pushReg(0, Xbyak::Operand::EBP); + unwinder.pushReg(0, Xbyak::Operand::EBX); +#ifndef _WIN32 + // 16-byte alignment + unwinder.allocStack(0, 12); +#endif + unwinder.endProlog(0); + //ngen_FailedToFindBlock_: Xbyak::Label failedToFindBlock; L(failedToFindBlock); @@ -455,7 +491,8 @@ void X86Compiler::genMainloop() call((void *)rdv_BlockCheckFail); jmp(eax); - genMemHandlers(); + unwindSize = unwinder.end(CODE_SIZE - 128 - startOffset); + verify(unwindSize <= 128); ready(); @@ -711,6 +748,7 @@ void ngen_ResetBlocks() if (mainloop != nullptr) return; + unwinder.clear(); compiler = new X86Compiler(); try { @@ -732,8 +770,6 @@ void ngen_mainloop(void* v_cntx) mainloop(); } catch (const SH4ThrownException&) { ERROR_LOG(DYNAREC, "SH4ThrownException in mainloop"); - } catch (...) { - ERROR_LOG(DYNAREC, "Uncaught unknown exception in mainloop"); } }