From 8a9fbb43e645b7b3aab9c74c89c7ee7b19210531 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Tue, 20 Aug 2024 20:31:36 -0500 Subject: [PATCH 1/7] EE:Rec: Avoid rbx Will be used for holding a pointer to the PCSX2 text section --- pcsx2/x86/R5900_Profiler.h | 9 ++++----- pcsx2/x86/iCore.cpp | 4 ++++ pcsx2/x86/iR3000A.cpp | 16 ++++++++-------- pcsx2/x86/ix86-32/iR5900.cpp | 20 ++++++++++---------- pcsx2/x86/ix86-32/recVTLB.cpp | 11 ++++------- pcsx2/x86/microVU_Misc.h | 8 ++++---- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/pcsx2/x86/R5900_Profiler.h b/pcsx2/x86/R5900_Profiler.h index a42e862fc0..e0c2124f16 100644 --- a/pcsx2/x86/R5900_Profiler.h +++ b/pcsx2/x86/R5900_Profiler.h @@ -364,14 +364,13 @@ struct eeProfiler } } - // Warning dirty ebx - void EmitMem() + void EmitMem(int addr_reg) { // Compact the 4GB virtual address to a 512KB virtual address if (x86caps.hasBMI2) { - xPEXT(ebx, ecx, ptr[&memMask]); - xADD(ptr32[(rbx * 4) + memStats], 1); + xPEXT(arg1regd, xRegister32(addr_reg), ptr[&memMask]); + xADD(ptr32[(arg1reg * 4) + memStats], 1); } } @@ -403,7 +402,7 @@ struct eeProfiler __fi void Reset() {} __fi void EmitOp(eeOpcode op) {} __fi void Print() {} - __fi void EmitMem() {} + __fi void EmitMem(int addrReg) {} __fi void EmitConstMem(u32 add) {} __fi void EmitSlowMem() {} __fi void EmitFastMem() {} diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index a0fc64ab10..4d91c1a181 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -52,6 +52,10 @@ bool _isAllocatableX86reg(int x86reg) if (CHECK_FASTMEM && x86reg == 5) return false; + // rbx is used to reference PCSX2 program text + if (xGetTextPtr() && x86reg == RTEXTPTR.GetId()) + return false; + #ifdef ENABLE_VTUNE // vtune needs ebp... if (!CHECK_FASTMEM && x86reg == 5) diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index ac0139a8e4..0a5856390b 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -175,10 +175,10 @@ static const void* _DynGen_JITCompile() xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc]); xMOV(eax, ptr[&psxRegs.pc]); - xMOV(ebx, eax); + xMOV(edx, eax); xSHR(eax, 16); xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]); - xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); + xJMP(ptrNative[rdx * (wordsize / 4) + rcx]); return retval; } @@ -196,10 +196,10 @@ static const void* _DynGen_DispatcherReg() u8* retval = xGetPtr(); xMOV(eax, ptr[&psxRegs.pc]); - xMOV(ebx, eax); + xMOV(edx, eax); xSHR(eax, 16); xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]); - xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); + xJMP(ptrNative[rdx * (wordsize / 4) + rcx]); return retval; } @@ -1181,16 +1181,16 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) } else { - xMOV(ebx, ptr32[&psxRegs.cycle]); - xADD(ebx, blockCycles); - xMOV(ptr32[&psxRegs.cycle], ebx); // update cycles + xMOV(r12d, ptr32[&psxRegs.cycle]); + xADD(r12d, blockCycles); + xMOV(ptr32[&psxRegs.cycle], r12d); // update cycles // jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE) iPsxAddEECycles(blockCycles); xJLE(iopExitRecompiledCode); // check if an event is pending - xSUB(ebx, ptr32[&psxRegs.iopNextEventCycle]); + xSUB(r12d, ptr32[&psxRegs.iopNextEventCycle]); xForwardJS nointerruptpending; xFastCall((void*)iopEventTest); diff --git a/pcsx2/x86/ix86-32/iR5900.cpp b/pcsx2/x86/ix86-32/iR5900.cpp index c08bb247de..a716d1f07f 100644 --- a/pcsx2/x86/ix86-32/iR5900.cpp +++ b/pcsx2/x86/ix86-32/iR5900.cpp @@ -381,10 +381,10 @@ static const void* _DynGen_JITCompile() // void(**base)() = (void(**)())recLUT[addr >> 16]; // base[addr >> 2](); xMOV(eax, ptr[&cpuRegs.pc]); - xMOV(ebx, eax); + xMOV(edx, eax); xSHR(eax, 16); xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]); - xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); + xJMP(ptrNative[rdx * (wordsize / 4) + rcx]); return retval; } @@ -406,10 +406,10 @@ static const void* _DynGen_DispatcherReg() // void(**base)() = (void(**)())recLUT[addr >> 16]; // base[addr >> 2](); xMOV(eax, ptr[&cpuRegs.pc]); - xMOV(ebx, eax); + xMOV(edx, eax); xSHR(eax, 16); xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]); - xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); + xJMP(ptrNative[rdx * (wordsize / 4) + rcx]); return retval; } @@ -2138,26 +2138,26 @@ static bool recSkipTimeoutLoop(s32 reg, bool is_timeout_loop) // if new_v0 > 0 { jump to dispatcher because loop exited early } // else new_v0 is 0, so exit loop - xMOV(ebx, ptr32[&cpuRegs.cycle]); // ebx = cycle + xMOV(r12d, ptr32[&cpuRegs.cycle]); // r12d = cycle xMOV(ecx, ptr32[&cpuRegs.nextEventCycle]); // ecx = nextEventCycle - xCMP(ebx, ecx); + xCMP(r12d, ecx); //xJAE((void*)DispatcherEvent); // jump to dispatcher if event immediately // TODO: In the case where nextEventCycle < cycle because it's overflowed, tack 8 // cycles onto the event count, so hopefully it'll wrap around. This is pretty // gross, but until we switch to 64-bit counters, not many better options. xForwardJB8 not_dispatcher; - xADD(ebx, 8); - xMOV(ptr32[&cpuRegs.cycle], ebx); + xADD(r12d, 8); + xMOV(ptr32[&cpuRegs.cycle], r12d); xJMP((void*)DispatcherEvent); not_dispatcher.SetTarget(); xMOV(edx, ptr32[&cpuRegs.GPR.r[reg].UL[0]]); // eax = v0 - xLEA(rax, ptrNative[rdx * 8 + rbx]); // edx = v0 * 8 + cycle + xLEA(rax, ptrNative[rdx * 8 + r12]); // edx = v0 * 8 + cycle xCMP(rcx, rax); xCMOVB(rax, rcx); // eax = new_cycles = min(v8 * 8, nextEventCycle) xMOV(ptr32[&cpuRegs.cycle], eax); // writeback new_cycles - xSUB(eax, ebx); // new_cycles -= cycle + xSUB(eax, r12d); // new_cycles -= cycle xSHR(eax, 3); // compute new v0 value xSUB(edx, eax); // v0 -= cycle_diff xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], edx); // write back new value of v0 diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index d6e9a099ac..1a95793de0 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -119,14 +119,12 @@ static void __vectorcall LogWriteQuad(u32 addr, __m128i val) namespace vtlb_private { // ------------------------------------------------------------------------ - // Prepares eax, ecx, and, ebx for Direct or Indirect operations. - // Returns the writeback pointer for ebx (return address from indirect handling) + // Prepares eax and ecx for Direct or Indirect operations. // static void DynGen_PrepRegs(int addr_reg, int value_reg, u32 sz, bool xmm) { - EE::Profiler.EmitMem(); - _freeX86reg(arg1regd); + EE::Profiler.EmitMem(addr_reg); xMOV(arg1regd, xRegister32(addr_reg)); if (value_reg >= 0) @@ -269,7 +267,7 @@ static void DynGen_HandlerTest(const GenDirectFn& gen_direct, int mode, int bits // ------------------------------------------------------------------------ // Generates the various instances of the indirect dispatchers -// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64), rbx: function return ptr +// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64) // Out: eax: result (if mode < 64) static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign) { @@ -939,14 +937,13 @@ void vtlb_DynBackpatchLoadStore(uptr code_address, u32 code_size, u32 guest_pc, u32 num_gprs = 0; u32 num_fprs = 0; - const u32 rbxid = static_cast(rbx.GetId()); const u32 arg1id = static_cast(arg1reg.GetId()); const u32 arg2id = static_cast(arg2reg.GetId()); const u32 arg3id = static_cast(arg3reg.GetId()); for (u32 i = 0; i < iREGCNT_GPR; i++) { - if ((gpr_bitmask & (1u << i)) && (i == rbxid || i == arg1id || i == arg2id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i)) + if ((gpr_bitmask & (1u << i)) && (i == arg1id || i == arg2id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i)) num_gprs++; } for (u32 i = 0; i < iREGCNT_XMM; i++) diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 9c587820a2..6981a3be3e 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -147,10 +147,10 @@ static const char branchSTR[16][8] = { #define gprT1b ax // Low 16-bit of gprT1 (eax) #define gprT2b cx // Low 16-bit of gprT2 (ecx) -#define gprF0 ebx // Status Flag 0 -#define gprF1 r12d // Status Flag 1 -#define gprF2 r13d // Status Flag 2 -#define gprF3 r14d // Status Flag 3 +#define gprF0 r12d // Status Flag 0 +#define gprF1 r13d // Status Flag 1 +#define gprF2 r14d // Status Flag 2 +#define gprF3 r15d // Status Flag 3 // Function Params #define mP microVU& mVU, int recPass From fe2f97eeb5079b909577bba806372362290d162c Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 21 Aug 2024 00:54:07 -0500 Subject: [PATCH 2/7] EE:Rec: Allow rec memory anywhere --- common/emitter/x86emitter.cpp | 41 ++++++++++++++++++++++++++++++---- common/emitter/x86types.h | 4 ++++ pcsx2/x86/iR3000A.cpp | 4 ++++ pcsx2/x86/iR5900.h | 5 +++++ pcsx2/x86/ix86-32/iR5900.cpp | 5 +++++ pcsx2/x86/ix86-32/recVTLB.cpp | 1 + pcsx2/x86/microVU.cpp | 1 + pcsx2/x86/microVU.h | 1 + pcsx2/x86/microVU_Execute.inl | 21 +++++++++-------- pcsx2/x86/microVU_IR.h | 1 + pcsx2/x86/microVU_Lower.inl | 20 ++++++++--------- pcsx2/x86/microVU_Misc.inl | 13 +++++++++-- pcsx2/x86/newVif_Dynarec.cpp | 16 ++++++------- pcsx2/x86/newVif_UnpackSSE.cpp | 9 +++++--- pcsx2/x86/newVif_UnpackSSE.h | 1 + 15 files changed, 107 insertions(+), 36 deletions(-) diff --git a/common/emitter/x86emitter.cpp b/common/emitter/x86emitter.cpp index 157d25e4b4..f34a375e76 100644 --- a/common/emitter/x86emitter.cpp +++ b/common/emitter/x86emitter.cpp @@ -49,6 +49,7 @@ thread_local u8* x86Ptr; +thread_local u8* xTextPtr; thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT}; namespace x86Emitter @@ -295,13 +296,27 @@ const xRegister32 void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset) { sptr displacement = (sptr)address; + sptr textRelative = (sptr)address - (sptr)xTextPtr; sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset); + // Can we use an 8-bit offset from the text pointer? + if (textRelative == (s8)textRelative && xTextPtr) + { + ModRM(1, regfield, RTEXTPTR.GetId()); + xWrite((s8)textRelative); + return; + } // Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter) - if (ripRelative == (s32)ripRelative) + else if (ripRelative == (s32)ripRelative) { ModRM(0, regfield, ModRm_UseDisp32); displacement = ripRelative; } + // How about from the text pointer? + else if (textRelative == (s32)textRelative && xTextPtr) + { + ModRM(2, regfield, RTEXTPTR.GetId()); + displacement = textRelative; + } else { pxAssertMsg(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register"); @@ -539,6 +554,12 @@ const xRegister32 x86Ptr = (u8*)ptr; } + // Assigns the current emitter text base address. + __emitinline void xSetTextPtr(void* ptr) + { + xTextPtr = (u8*)ptr; + } + // Retrieves the current emitter buffer target address. // This is provided instead of using x86Ptr directly, since we may in the future find // a need to change the storage class system for the x86Ptr 'under the hood.' @@ -547,6 +568,12 @@ const xRegister32 return x86Ptr; } + // Retrieves the current emitter text base address. + __emitinline u8* xGetTextPtr() + { + return xTextPtr; + } + __emitinline void xAlignPtr(uint bytes) { // forward align @@ -1229,6 +1256,9 @@ const xRegister32 #endif stackAlign(m_offset, true); + + if (u8* ptr = xGetTextPtr()) + xMOV64(RTEXTPTR, (sptr)ptr); } xScopedStackFrame::~xScopedStackFrame() @@ -1285,11 +1315,14 @@ const xRegister32 { return offset + base; } - else + if (u8* ptr = xGetTextPtr()) { - xLEA(tmpRegister, ptr[base]); - return offset + tmpRegister; + sptr tbase = (sptr)base - (sptr)ptr; + if (tbase == (s32)tbase) + return offset + RTEXTPTR + tbase; } + xLEA(tmpRegister, ptr[base]); + return offset + tmpRegister; } void xLoadFarAddr(const xAddressReg& dst, void* addr) diff --git a/common/emitter/x86types.h b/common/emitter/x86types.h index e898b9400a..98788421c7 100644 --- a/common/emitter/x86types.h +++ b/common/emitter/x86types.h @@ -149,11 +149,13 @@ namespace x86Emitter static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field) extern void xSetPtr(void* ptr); + extern void xSetTextPtr(void* ptr); extern void xAlignPtr(uint bytes); extern void xAdvancePtr(uint bytes); extern void xAlignCallTarget(); extern u8* xGetPtr(); + extern u8* xGetTextPtr(); extern u8* xGetAlignedCallTarget(); extern JccComparisonType xInvertCond(JccComparisonType src); @@ -646,6 +648,8 @@ extern const xRegister32 calleeSavedReg1d, calleeSavedReg2d; +/// Holds a pointer to program text at all times so we don't need to be within 2GB of text +static constexpr const xAddressReg& RTEXTPTR = rbx; // clang-format on diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 0a5856390b..1bdb3956e0 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -890,10 +890,13 @@ static void recReserve() pxFailRel("Failed to allocate R3000 InstCache array."); } +#define R3000A_TEXTPTR (&psxRegs.GPR.r[33]) + void recResetIOP() { DevCon.WriteLn("iR3000A Recompiler reset."); + xSetTextPtr(R3000A_TEXTPTR); xSetPtr(SysMemory::GetIOPRec()); _DynGen_Dispatchers(); recPtr = xGetPtr(); @@ -1565,6 +1568,7 @@ static void iopRecRecompile(const u32 startpc) recResetIOP(); } + xSetTextPtr(R3000A_TEXTPTR); xSetPtr(recPtr); recPtr = xGetAlignedCallTarget(); diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 1c0bfe4a4b..f5fc4e3226 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -21,6 +21,11 @@ extern u32 target; // branch target extern u32 s_nBlockCycles; // cycles of current block recompiling extern bool s_nBlockInterlocked; // Current block has VU0 interlocking +// x86 can use shorter displacement if it fits in an s8, so offset 144 bytes into the cpuRegs +// This will allow us to reach r1-r16 with a shorter encoding +// TODO: Actually figure out what things are used most often, maybe rearrange the cpuRegs struct, and point at that +#define R5900_TEXTPTR (&cpuRegs.GPR.r[9]) + ////////////////////////////////////////////////////////////////////////////////////////// // diff --git a/pcsx2/x86/ix86-32/iR5900.cpp b/pcsx2/x86/ix86-32/iR5900.cpp index a716d1f07f..26c71edc3d 100644 --- a/pcsx2/x86/ix86-32/iR5900.cpp +++ b/pcsx2/x86/ix86-32/iR5900.cpp @@ -445,6 +445,8 @@ static const void* _DynGen_EnterRecompiledCode() xSUB(rsp, stack_size); #endif + if (u8* ptr = xGetTextPtr()) + xMOV64(RTEXTPTR, (sptr)ptr); if (CHECK_FASTMEM) xMOV(RFASTMEMBASE, ptrNative[&vtlb_private::vtlbdata.fastmem_base]); @@ -585,6 +587,7 @@ static void recResetRaw() EE::Profiler.Reset(); + xSetTextPtr(R5900_TEXTPTR); xSetPtr(SysMemory::GetEERec()); _DynGen_Dispatchers(); vtlb_DynGenDispatchers(); @@ -897,6 +900,7 @@ u8* recBeginThunk() if (recPtr >= recPtrEnd) eeRecNeedsReset = true; + xSetTextPtr(R5900_TEXTPTR); xSetPtr(recPtr); recPtr = xGetAlignedCallTarget(); @@ -2191,6 +2195,7 @@ static void recRecompile(const u32 startpc) recResetRaw(); } + xSetTextPtr(R5900_TEXTPTR); xSetPtr(recPtr); recPtr = xGetAlignedCallTarget(); diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 1a95793de0..4884d31442 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -345,6 +345,7 @@ void vtlb_DynGenDispatchers() for (int sign = 0; sign < (!mode && bits < 3 ? 2 : 1); sign++) { xSetPtr(GetIndirectDispatcherPtr(mode, bits, !!sign)); + xSetTextPtr(R5900_TEXTPTR); DynGen_IndirectTlbDispatcher(mode, bits, !!sign); } diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 6c2c4b2a2e..a2b7b39310 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -42,6 +42,7 @@ void mVUreset(microVU& mVU, bool resetReserve) VU0.VI[REG_VPU_STAT].UL &= ~0x100; } + xSetTextPtr(mVU.textPtr()); xSetPtr(mVU.cache); mVUdispatcherAB(mVU); mVUdispatcherCD(mVU); diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 2fd22f678d..db42203e51 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -123,6 +123,7 @@ struct microVU s32 cycles; // Cycles Counter VURegs& regs() const { return ::vuRegs[index]; } + void* textPtr() const { return (index && THREAD_VU1) ? (void*)®s().VF[9] : (void*)R5900_TEXTPTR; } __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index f59910988a..cf15fd019d 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -207,15 +207,17 @@ static void mVUGenerateCopyPipelineState(mV) { mVU.copyPLState = xGetAlignedCallTarget(); + xLoadFarAddr(rdx, reinterpret_cast(&mVU.prog.lpState)); + if (cpuinfo_has_x86_avx()) { xVMOVAPS(ymm0, ptr[rax]); xVMOVAPS(ymm1, ptr[rax + 32u]); xVMOVAPS(ymm2, ptr[rax + 64u]); - xVMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState)], ymm0); - xVMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 32u], ymm1); - xVMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 64u], ymm2); + xVMOVUPS(ptr[rdx], ymm0); + xVMOVUPS(ptr[rdx + 32u], ymm1); + xVMOVUPS(ptr[rdx + 64u], ymm2); xVZEROUPPER(); } @@ -228,12 +230,12 @@ static void mVUGenerateCopyPipelineState(mV) xMOVAPS(xmm4, ptr[rax + 64u]); xMOVAPS(xmm5, ptr[rax + 80u]); - xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState)], xmm0); - xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 16u], xmm1); - xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 32u], xmm2); - xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 48u], xmm3); - xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 64u], xmm4); - xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 80u], xmm5); + xMOVUPS(ptr[rdx], xmm0); + xMOVUPS(ptr[rdx + 16u], xmm1); + xMOVUPS(ptr[rdx + 32u], xmm2); + xMOVUPS(ptr[rdx + 48u], xmm3); + xMOVUPS(ptr[rdx + 64u], xmm4); + xMOVUPS(ptr[rdx + 80u], xmm5); } xRET(); @@ -326,6 +328,7 @@ _mVUt void* mVUexecute(u32 startPC, u32 cycles) mVU.cycles = cycles; mVU.totalCycles = cycles; + xSetTextPtr(mVU.textPtr()); xSetPtr(mVU.prog.x86ptr); // Set x86ptr to where last program left off return mVUsearchProg(startPC & vuLimit, (uptr)&mVU.prog.lpState); // Find and set correct program } diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 855e064570..0091639f64 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -411,6 +411,7 @@ public: } } + gprMap[RTEXTPTR.GetId()].usable = !xGetTextPtr(); gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM; } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 916f6cb43c..ce2b179fef 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -1106,7 +1106,7 @@ mVUop(mVU_ILW) mVU.regAlloc->moveVIToGPR(gprT1, _Is_); if (_Imm11_ != 0) xADD(gprT1, _Imm11_); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); } const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI); @@ -1133,7 +1133,7 @@ mVUop(mVU_ILWR) if (_Is_) { mVU.regAlloc->moveVIToGPR(gprT1, _Is_); - mVUaddrFix (mVU, gprT1q); + mVUaddrFix (mVU, gprT1q, gprT2q); const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI); xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]); @@ -1170,7 +1170,7 @@ mVUop(mVU_ISW) mVU.regAlloc->moveVIToGPR(gprT1, _Is_); if (_Imm11_ != 0) xADD(gprT1, _Imm11_); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); } // If regT is dirty, the high bits might not be zero. @@ -1201,7 +1201,7 @@ mVUop(mVU_ISWR) if (_Is_) { mVU.regAlloc->moveVIToGPR(gprT1, _Is_); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); is = gprT1q; } const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true); @@ -1257,7 +1257,7 @@ mVUop(mVU_LQ) mVU.regAlloc->moveVIToGPR(gprT1, _Is_); if (_Imm11_ != 0) xADD(gprT1, _Imm11_); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); } const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); @@ -1281,7 +1281,7 @@ mVUop(mVU_LQD) xDEC(regS); xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm mVU.regAlloc->clearNeeded(regS); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); is = gprT1q; } else @@ -1319,7 +1319,7 @@ mVUop(mVU_LQI) xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm xINC(regS); mVU.regAlloc->clearNeeded(regS); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); is = gprT1q; } if (!mVUlow.noWriteVF) @@ -1351,7 +1351,7 @@ mVUop(mVU_SQ) mVU.regAlloc->moveVIToGPR(gprT1, _It_); if (_Imm11_ != 0) xADD(gprT1, _Imm11_); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); } const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W); @@ -1375,7 +1375,7 @@ mVUop(mVU_SQD) xDEC(regT); xMOVZX(gprT1, xRegister16(regT)); mVU.regAlloc->clearNeeded(regT); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); it = gprT1q; } else @@ -1405,7 +1405,7 @@ mVUop(mVU_SQI) xMOVZX(gprT1, xRegister16(regT)); xINC(regT); mVU.regAlloc->clearNeeded(regT); - mVUaddrFix(mVU, gprT1q); + mVUaddrFix(mVU, gprT1q, gprT2q); } const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W); if (_It_) diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index f9e9663a56..021bb90834 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -295,7 +295,7 @@ static void mVUwaitMTVU() } // Transforms the Address in gprReg to valid VU0/VU1 Address -__fi void mVUaddrFix(mV, const xAddressReg& gprReg) +__fi void mVUaddrFix(mV, const xAddressReg& gprReg, const xAddressReg& tmpReg) { if (isVU1) { @@ -324,7 +324,16 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg) xFastCall((void*)mVU.waitMTVU); } xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! - xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem); + sptr offset = (u128*)VU1.VF - (u128*)VU0.Mem; + if (offset == (s32)offset) + { + xADD(gprReg, offset); + } + else + { + xMOV64(tmpReg, offset); + xADD(gprReg, tmpReg); + } jmpB.SetTarget(); xSHL(gprReg, 4); // multiply by 16 (shift left by 4) } diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index ed8e548538..ec157c79cd 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -23,7 +23,8 @@ void dVifRelease(int idx) } VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_) - : v(vif_) + : vifPtr(rax) + , v(vif_) , vB(vifBlock_) { const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2) @@ -42,9 +43,6 @@ __fi void makeMergeMask(u32& x) __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const { - const int idx = v.idx; - const vifStruct& vif = MTVU_VifX; - //This could have ended up copying the row when there was no row to write.1810080 u32 m0 = vB.mask; //The actual mask example 0x03020100 u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge) @@ -52,14 +50,14 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const if ((doMask && m2) || doMode) { - xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); + xMOVAPS(xmmRow, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)]); MSKPATH3_LOG("Moving row"); } if (doMask && m3) { VIF_LOG("Merging Cols"); - xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]); + xMOVAPS(xmmCol0, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskCol)]); if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1); if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2); if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3); @@ -137,8 +135,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const void VifUnpackSSE_Dynarec::writeBackRow() const { - const int idx = v.idx; - xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow); + xMOVAPS(ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)], xmmRow); VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode); } @@ -239,6 +236,7 @@ void VifUnpackSSE_Dynarec::ProcessMasks() void VifUnpackSSE_Dynarec::CompileRoutine() { + const int idx = v.idx; const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2) const int upkNum = vB.upkType & 0xf; const u8& vift = nVifT[upkNum]; @@ -252,6 +250,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine() VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum); pxAssume(vCL == 0); + xLoadFarAddr(vifPtr, &MTVU_VifX); // Value passed determines # of col regs we need to load SetMasks(isFill ? blockSize : cycleSize); @@ -336,6 +335,7 @@ _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill) } // Compile the block now + xSetTextPtr(nullptr); xSetPtr(v.recWritePtr); block.startPtr = (uptr)xGetAlignedCallTarget(); diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index 68b52ce997..5a124cf2b3 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -329,9 +329,11 @@ void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const { xMOVAPS(xmm7, ptr[dstIndirect]); int offX = std::min(curCycle, 3); - xPAND(regX, ptr32[nVifMask[0][offX]]); - xPAND(xmm7, ptr32[nVifMask[1][offX]]); - xPOR (regX, ptr32[nVifMask[2][offX]]); + sptr base = reinterpret_cast(nVifMask[2]); + xLoadFarAddr(rax, nVifMask); + xPAND(regX, ptr128[rax + (reinterpret_cast(nVifMask[0][offX]) - base)]); + xPAND(xmm7, ptr128[rax + (reinterpret_cast(nVifMask[1][offX]) - base)]); + xPOR (regX, ptr128[rax + (reinterpret_cast(nVifMask[2][offX]) - base)]); xPOR (regX, xmm7); xMOVAPS(ptr[dstIndirect], regX); } @@ -362,6 +364,7 @@ void VifUnpackSSE_Init() { DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters..."); + xSetTextPtr(nullptr); xSetPtr(SysMemory::GetVIFUnpackRec()); for (int a = 0; a < 2; a++) diff --git a/pcsx2/x86/newVif_UnpackSSE.h b/pcsx2/x86/newVif_UnpackSSE.h index 866a5ce1a7..381cb61bac 100644 --- a/pcsx2/x86/newVif_UnpackSSE.h +++ b/pcsx2/x86/newVif_UnpackSSE.h @@ -98,6 +98,7 @@ public: bool inputMasked; protected: + xAddressReg vifPtr; const nVifStruct& v; // vif0 or vif1 const nVifBlock& vB; // some pre-collected data from VifStruct int vCL; // internal copy of vif->cl From 5a6b3cba6e93b3b171debdcb87e483ea500de885 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 21 Aug 2024 01:06:09 -0500 Subject: [PATCH 3/7] Core: Map sys memory anywhere --- pcsx2/Memory.cpp | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index 853c689e0e..fa775c05da 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -122,39 +122,7 @@ u8* SysMemory::TryAllocateVirtualMemory(const char* name, void* file_handle, upt u8* SysMemory::AllocateVirtualMemory(const char* name, void* file_handle, size_t size, size_t offset_from_base) { - // ARM64 does not need the rec areas to be in +/- 2GB. -#ifdef _M_X86 - pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Virtual memory size is page aligned"); - - // Everything looks nicer when the start of all the sections is a nice round looking number. - // Also reduces the variation in the address due to small changes in code. - // Breaks ASLR but so does anything else that tries to make addresses constant for our debugging pleasure - uptr codeBase = (uptr)(void*)AllocateVirtualMemory / (1 << 28) * (1 << 28); - - // The allocation is ~640mb in size, slighly under 3*2^28. - // We'll hope that the code generated for the PCSX2 executable stays under 512mb (which is likely) - // On x86-64, code can reach 8*2^28 from its address [-6*2^28, 4*2^28] is the region that allows for code in the 640mb allocation to reach 512mb of code that either starts at codeBase or 256mb before it. - // We start high and count down because on macOS code starts at the beginning of useable address space, so starting as far ahead as possible reduces address variations due to code size. Not sure about other platforms. Obviously this only actually affects what shows up in a debugger and won't affect performance or correctness of anything. - for (int offset = 4; offset >= -6; offset--) - { - uptr base = codeBase + (offset << 28) + offset_from_base; - if ((sptr)base < 0 || (sptr)(base + size - 1) < 0) - { - // VTLB will throw a fit if we try to put EE main memory here - continue; - } - - if (u8* ret = TryAllocateVirtualMemory(name, file_handle, base, size)) - return ret; - - DevCon.Warning("%s: host memory @ 0x%016" PRIXPTR " -> 0x%016" PRIXPTR " is unavailable; attempting to map elsewhere...", name, - base, base + size); - } -#else return TryAllocateVirtualMemory(name, file_handle, 0, size); -#endif - - return nullptr; } bool SysMemory::AllocateMemoryMap() From d8b8af44a0bc31221e4b82bcbe07a1dff3da61e8 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 21 Aug 2024 02:11:24 -0500 Subject: [PATCH 4/7] Common: Switch back to Linux allocation routines on macOS We don't need MAP_FIXED anymore --- common/CMakeLists.txt | 1 + common/Darwin/DarwinMisc.cpp | 200 ----------------------------------- common/Linux/LnxHostSys.cpp | 27 +++-- 3 files changed, 17 insertions(+), 211 deletions(-) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 8f3c90071e..ced5a64d07 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -149,6 +149,7 @@ elseif(APPLE) Darwin/DarwinThreads.cpp Darwin/DarwinMisc.cpp Darwin/DarwinMisc.h + Linux/LnxHostSys.cpp ) target_compile_options(common PRIVATE -fobjc-arc) target_link_options(common PRIVATE -fobjc-link-runtime) diff --git a/common/Darwin/DarwinMisc.cpp b/common/Darwin/DarwinMisc.cpp index a4f01ac13d..c620be02a1 100644 --- a/common/Darwin/DarwinMisc.cpp +++ b/common/Darwin/DarwinMisc.cpp @@ -16,16 +16,10 @@ #include #include #include -#include -#include #include #include -#include -#include #include -#include #include -#include #include #include @@ -201,200 +195,6 @@ size_t HostSys::GetRuntimeCacheLineSize() return static_cast(std::max(sysctlbyname_T("hw.cachelinesize").value_or(0), 0)); } -static __ri vm_prot_t MachProt(const PageProtectionMode& mode) -{ - vm_prot_t machmode = (mode.CanWrite()) ? VM_PROT_WRITE : 0; - machmode |= (mode.CanRead()) ? VM_PROT_READ : 0; - machmode |= (mode.CanExecute()) ? (VM_PROT_EXECUTE | VM_PROT_READ) : 0; - return machmode; -} - -void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode) -{ - pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned"); - if (mode.IsNone()) - return nullptr; - -#ifdef __aarch64__ - // We can't allocate executable memory with mach_vm_allocate() on Apple Silicon. - // Instead, we need to use MAP_JIT with mmap(), which does not support fixed mappings. - if (mode.CanExecute()) - { - if (base) - return nullptr; - - const u32 mmap_prot = mode.CanWrite() ? (PROT_READ | PROT_WRITE | PROT_EXEC) : (PROT_READ | PROT_EXEC); - const u32 flags = MAP_PRIVATE | MAP_ANON | MAP_JIT; - void* const res = mmap(nullptr, size, mmap_prot, flags, -1, 0); - return (res == MAP_FAILED) ? nullptr : res; - } -#endif - - kern_return_t ret = mach_vm_allocate(mach_task_self(), reinterpret_cast(&base), size, - base ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE); - if (ret != KERN_SUCCESS) - { - DEV_LOG("mach_vm_allocate() returned {}", ret); - return nullptr; - } - - ret = mach_vm_protect(mach_task_self(), reinterpret_cast(base), size, false, MachProt(mode)); - if (ret != KERN_SUCCESS) - { - DEV_LOG("mach_vm_protect() returned {}", ret); - mach_vm_deallocate(mach_task_self(), reinterpret_cast(base), size); - return nullptr; - } - - return base; -} - -void HostSys::Munmap(void* base, size_t size) -{ - if (!base) - return; - - mach_vm_deallocate(mach_task_self(), reinterpret_cast(base), size); -} - -void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode) -{ - pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned"); - - kern_return_t res = mach_vm_protect(mach_task_self(), reinterpret_cast(baseaddr), size, false, - MachProt(mode)); - if (res != KERN_SUCCESS) [[unlikely]] - { - ERROR_LOG("mach_vm_protect() failed: {}", res); - pxFailRel("mach_vm_protect() failed"); - } -} - -std::string HostSys::GetFileMappingName(const char* prefix) -{ - // name actually is not used. - return {}; -} - -void* HostSys::CreateSharedMemory(const char* name, size_t size) -{ - mach_vm_size_t vm_size = size; - mach_port_t port; - const kern_return_t res = mach_make_memory_entry_64( - mach_task_self(), &vm_size, 0, MAP_MEM_NAMED_CREATE | VM_PROT_READ | VM_PROT_WRITE, &port, MACH_PORT_NULL); - if (res != KERN_SUCCESS) - { - ERROR_LOG("mach_make_memory_entry_64() failed: {}", res); - return nullptr; - } - - return reinterpret_cast(static_cast(port)); -} - -void HostSys::DestroySharedMemory(void* ptr) -{ - mach_port_deallocate(mach_task_self(), static_cast(reinterpret_cast(ptr))); -} - -void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode) -{ - mach_vm_address_t ptr = reinterpret_cast(baseaddr); - const kern_return_t res = mach_vm_map(mach_task_self(), &ptr, size, 0, baseaddr ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE, - static_cast(reinterpret_cast(handle)), offset, FALSE, - MachProt(mode), VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_NONE); - if (res != KERN_SUCCESS) - { - ERROR_LOG("mach_vm_map() failed: {}", res); - return nullptr; - } - - return reinterpret_cast(ptr); -} - -void HostSys::UnmapSharedMemory(void* baseaddr, size_t size) -{ - const kern_return_t res = mach_vm_deallocate(mach_task_self(), reinterpret_cast(baseaddr), size); - if (res != KERN_SUCCESS) - pxFailRel("Failed to unmap shared memory"); -} - -#ifdef _M_ARM64 - -void HostSys::FlushInstructionCache(void* address, u32 size) -{ - __builtin___clear_cache(reinterpret_cast(address), reinterpret_cast(address) + size); -} - -#endif - -SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages) - : m_base_ptr(base_ptr) - , m_size(size) - , m_num_pages(num_pages) -{ -} - -SharedMemoryMappingArea::~SharedMemoryMappingArea() -{ - pxAssertRel(m_num_mappings == 0, "No mappings left"); - - if (mach_vm_deallocate(mach_task_self(), reinterpret_cast(m_base_ptr), m_size) != KERN_SUCCESS) - pxFailRel("Failed to release shared memory area"); -} - - -std::unique_ptr SharedMemoryMappingArea::Create(size_t size) -{ - pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned"); - - mach_vm_address_t alloc; - const kern_return_t res = - mach_vm_map(mach_task_self(), &alloc, size, 0, VM_FLAGS_ANYWHERE, - MEMORY_OBJECT_NULL, 0, false, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_NONE); - if (res != KERN_SUCCESS) - { - ERROR_LOG("mach_vm_map() failed: {}", res); - return {}; - } - - return std::unique_ptr(new SharedMemoryMappingArea(reinterpret_cast(alloc), size, size / __pagesize)); -} - -u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode) -{ - pxAssert(static_cast(map_base) >= m_base_ptr && static_cast(map_base) < (m_base_ptr + m_size)); - - const kern_return_t res = - mach_vm_map(mach_task_self(), reinterpret_cast(&map_base), map_size, 0, VM_FLAGS_OVERWRITE, - static_cast(reinterpret_cast(file_handle)), file_offset, false, - MachProt(mode), VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_NONE); - if (res != KERN_SUCCESS) [[unlikely]] - { - ERROR_LOG("mach_vm_map() failed: {}", res); - return nullptr; - } - - m_num_mappings++; - return static_cast(map_base); -} - -bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size) -{ - pxAssert(static_cast(map_base) >= m_base_ptr && static_cast(map_base) < (m_base_ptr + m_size)); - - const kern_return_t res = - mach_vm_map(mach_task_self(), reinterpret_cast(&map_base), map_size, 0, VM_FLAGS_OVERWRITE, - MEMORY_OBJECT_NULL, 0, false, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_NONE); - if (res != KERN_SUCCESS) [[unlikely]] - { - ERROR_LOG("mach_vm_map() failed: {}", res); - return false; - } - - m_num_mappings--; - return true; -} - #ifdef _M_ARM64 static thread_local int s_code_write_depth = 0; diff --git a/common/Linux/LnxHostSys.cpp b/common/Linux/LnxHostSys.cpp index 54a8e2f24a..7b3939810d 100644 --- a/common/Linux/LnxHostSys.cpp +++ b/common/Linux/LnxHostSys.cpp @@ -14,8 +14,10 @@ #include #include #include -#include #include +#ifndef __APPLE__ +#include +#endif #include "fmt/core.h" @@ -23,12 +25,6 @@ #include "cpuinfo.h" #endif -// FreeBSD does not have MAP_FIXED_NOREPLACE, but does have MAP_EXCL. -// MAP_FIXED combined with MAP_EXCL behaves like MAP_FIXED_NOREPLACE. -#if defined(__FreeBSD__) && !defined(MAP_FIXED_NOREPLACE) -#define MAP_FIXED_NOREPLACE (MAP_FIXED | MAP_EXCL) -#endif - static __ri uint LinuxProt(const PageProtectionMode& mode) { u32 lnxmode = 0; @@ -53,8 +49,6 @@ void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode) const u32 prot = LinuxProt(mode); u32 flags = MAP_PRIVATE | MAP_ANONYMOUS; - if (base) - flags |= MAP_FIXED_NOREPLACE; void* res = mmap(base, size, prot, flags, -1, 0); if (res == MAP_FAILED) @@ -124,8 +118,12 @@ void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size { const uint lnxmode = LinuxProt(mode); - const int flags = (baseaddr != nullptr) ? (MAP_SHARED | MAP_FIXED_NOREPLACE) : MAP_SHARED; - void* ptr = mmap(baseaddr, size, lnxmode, flags, static_cast(reinterpret_cast(handle)), static_cast(offset)); + int flags = MAP_SHARED; +#ifdef __APPLE__ + if (mode.CanExecute()) + flags |= MAP_JIT; +#endif + void* ptr = mmap(0, size, lnxmode, flags, static_cast(reinterpret_cast(handle)), static_cast(offset)); if (ptr == MAP_FAILED) return nullptr; @@ -138,6 +136,8 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size) pxFailRel("Failed to unmap shared memory"); } +#ifndef __APPLE__ + size_t HostSys::GetRuntimePageSize() { int res = sysconf(_SC_PAGESIZE); @@ -183,6 +183,8 @@ size_t HostSys::GetRuntimeCacheLineSize() #endif } +#endif + SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages) : m_base_ptr(base_ptr) , m_size(size) @@ -236,6 +238,8 @@ bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size) return true; } +#ifndef __APPLE__ // These are done in DarwinMisc + namespace PageFaultHandler { static std::recursive_mutex s_exception_handler_mutex; @@ -370,3 +374,4 @@ bool PageFaultHandler::Install(Error* error) s_installed = true; return true; } +#endif // __APPLE__ From 51c7a723dbd845b55dfb3a76895f341d20591303 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 21 Aug 2024 03:02:09 -0500 Subject: [PATCH 5/7] Common: Allow non-file mappings in SharedMemoryMappingArea --- common/HostSys.h | 2 +- common/Linux/LnxHostSys.cpp | 32 ++++++++++++++++++++++++-------- common/Windows/WinHostSys.cpp | 21 ++++++++++++++++----- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/common/HostSys.h b/common/HostSys.h index 23646284d9..7847a84bce 100644 --- a/common/HostSys.h +++ b/common/HostSys.h @@ -146,7 +146,7 @@ namespace PageFaultHandler class SharedMemoryMappingArea { public: - static std::unique_ptr Create(size_t size); + static std::unique_ptr Create(size_t size, bool jit = false); ~SharedMemoryMappingArea(); diff --git a/common/Linux/LnxHostSys.cpp b/common/Linux/LnxHostSys.cpp index 7b3939810d..b5128c64e2 100644 --- a/common/Linux/LnxHostSys.cpp +++ b/common/Linux/LnxHostSys.cpp @@ -201,11 +201,16 @@ SharedMemoryMappingArea::~SharedMemoryMappingArea() } -std::unique_ptr SharedMemoryMappingArea::Create(size_t size) +std::unique_ptr SharedMemoryMappingArea::Create(size_t size, bool jit) { pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned"); - void* alloc = mmap(nullptr, size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + uint flags = MAP_ANONYMOUS | MAP_PRIVATE; +#ifdef __APPLE__ + if (jit) + flags |= MAP_JIT; +#endif + void* alloc = mmap(nullptr, size, PROT_NONE, flags, -1, 0); if (alloc == MAP_FAILED) return nullptr; @@ -216,15 +221,26 @@ u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* ma { pxAssert(static_cast(map_base) >= m_base_ptr && static_cast(map_base) < (m_base_ptr + m_size)); - // MAP_FIXED is okay here, since we've reserved the entire region, and *want* to overwrite the mapping. const uint lnxmode = LinuxProt(mode); - void* const ptr = mmap(map_base, map_size, lnxmode, MAP_SHARED | MAP_FIXED, - static_cast(reinterpret_cast(file_handle)), static_cast(file_offset)); - if (ptr == MAP_FAILED) - return nullptr; + if (file_handle) + { + const int fd = static_cast(reinterpret_cast(file_handle)); + // MAP_FIXED is okay here, since we've reserved the entire region, and *want* to overwrite the mapping. + void* const ptr = mmap(map_base, map_size, lnxmode, MAP_SHARED | MAP_FIXED, fd, static_cast(file_offset)); + if (ptr == MAP_FAILED) + return nullptr; + } + else + { + // macOS doesn't seem to allow MAP_JIT with MAP_FIXED + // So we do the MAP_JIT in the allocation, and just mprotect here + // Note that this will only work the first time for a given region + if (mprotect(map_base, map_size, lnxmode) < 0) + return nullptr; + } m_num_mappings++; - return static_cast(ptr); + return static_cast(map_base); } bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size) diff --git a/common/Windows/WinHostSys.cpp b/common/Windows/WinHostSys.cpp index 54e76fa6cc..7f05a8460e 100644 --- a/common/Windows/WinHostSys.cpp +++ b/common/Windows/WinHostSys.cpp @@ -183,7 +183,7 @@ SharedMemoryMappingArea::PlaceholderMap::iterator SharedMemoryMappingArea::FindP return m_placeholder_ranges.end(); } -std::unique_ptr SharedMemoryMappingArea::Create(size_t size) +std::unique_ptr SharedMemoryMappingArea::Create(size_t size, bool jit) { pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned"); @@ -241,11 +241,22 @@ u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* ma } // actually do the mapping, replacing the placeholder on the range - if (!MapViewOfFile3(static_cast(file_handle), GetCurrentProcess(), - map_base, file_offset, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) + if (file_handle) { - Console.Error("(SharedMemoryMappingArea) MapViewOfFile3() failed: %u", GetLastError()); - return nullptr; + if (!MapViewOfFile3(static_cast(file_handle), GetCurrentProcess(), + map_base, file_offset, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) + { + Console.Error("(SharedMemoryMappingArea) MapViewOfFile3() failed: %u", GetLastError()); + return nullptr; + } + } + else + { + if (!VirtualAlloc2(GetCurrentProcess(), map_base, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) + { + Console.Error("(SharedMemoryMappingArea) VirtualAlloc2() failed: %u", GetLastError()); + return nullptr; + } } const DWORD prot = ConvertToWinApi(mode); From 2eabebc82ae6a99311dfdc508e36ef570f9c9db3 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 21 Aug 2024 02:50:48 -0500 Subject: [PATCH 6/7] Core: Reserve data and code areas together They need to stay near each other for the x86 JIT to work --- pcsx2/Memory.cpp | 64 ++++++++++++------------------------------------ 1 file changed, 16 insertions(+), 48 deletions(-) diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index fa775c05da..5c3569d48e 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -49,9 +49,6 @@ namespace Ps2MemSize namespace SysMemory { - static u8* TryAllocateVirtualMemory(const char* name, void* file_handle, uptr base, size_t size); - static u8* AllocateVirtualMemory(const char* name, void* file_handle, size_t size, size_t offset_from_base); - static bool AllocateMemoryMap(); static void DumpMemoryMap(); static void ReleaseMemoryMap(); @@ -59,6 +56,7 @@ namespace SysMemory static u8* s_data_memory; static void* s_data_memory_file_handle; static u8* s_code_memory; + static std::unique_ptr s_memory_mapping_area; } // namespace SysMemory static void memAllocate(); @@ -86,45 +84,6 @@ namespace HostMemoryMap } } // namespace HostMemoryMap -u8* SysMemory::TryAllocateVirtualMemory(const char* name, void* file_handle, uptr base, size_t size) -{ - u8* baseptr; - - if (file_handle) - baseptr = static_cast(HostSys::MapSharedMemory(file_handle, 0, (void*)base, size, PageAccess_ReadWrite())); - else - baseptr = static_cast(HostSys::Mmap((void*)base, size, PageAccess_Any())); - - if (!baseptr) - return nullptr; - - if (base != 0 && (uptr)baseptr != base) - { - if (file_handle) - { - if (baseptr) - HostSys::UnmapSharedMemory(baseptr, size); - } - else - { - if (baseptr) - HostSys::Munmap(baseptr, size); - } - - return nullptr; - } - - DevCon.WriteLn(Color_Gray, "%-32s @ 0x%016" PRIXPTR " -> 0x%016" PRIXPTR " %s", name, - baseptr, (uptr)baseptr + size, fmt::format("[{}mb]", size / _1mb).c_str()); - - return baseptr; -} - -u8* SysMemory::AllocateVirtualMemory(const char* name, void* file_handle, size_t size, size_t offset_from_base) -{ - return TryAllocateVirtualMemory(name, file_handle, 0, size); -} - bool SysMemory::AllocateMemoryMap() { s_data_memory_file_handle = HostSys::CreateSharedMemory(HostSys::GetFileMappingName("pcsx2").c_str(), HostMemoryMap::MainSize); @@ -135,16 +94,23 @@ bool SysMemory::AllocateMemoryMap() return false; } - if ((s_data_memory = AllocateVirtualMemory("Data Memory", s_data_memory_file_handle, HostMemoryMap::MainSize, 0)) == nullptr) + if (!(s_memory_mapping_area = SharedMemoryMappingArea::Create(HostMemoryMap::MainSize + HostMemoryMap::CodeSize, true))) { - Host::ReportErrorAsync("Error", "Failed to map data memory at an acceptable location."); + Host::ReportErrorAsync("Error", "Failed to map main memory."); ReleaseMemoryMap(); return false; } - if ((s_code_memory = AllocateVirtualMemory("Code Memory", nullptr, HostMemoryMap::CodeSize, HostMemoryMap::MainSize)) == nullptr) + if ((s_data_memory = s_memory_mapping_area->Map(s_data_memory_file_handle, 0, s_memory_mapping_area->BasePointer(), HostMemoryMap::MainSize, PageAccess_ReadWrite())) == nullptr) { - Host::ReportErrorAsync("Error", "Failed to allocate code memory at an acceptable location."); + Host::ReportErrorAsync("Error", "Failed to map data memory."); + ReleaseMemoryMap(); + return false; + } + + if ((s_code_memory = s_memory_mapping_area->Map(nullptr, 0, s_memory_mapping_area->OffsetPointer(HostMemoryMap::MainSize), HostMemoryMap::CodeSize, PageAccess_Any())) == nullptr) + { + Host::ReportErrorAsync("Error", "Failed to allocate code memory."); ReleaseMemoryMap(); return false; } @@ -186,16 +152,18 @@ void SysMemory::ReleaseMemoryMap() { if (s_code_memory) { - HostSys::Munmap(s_code_memory, HostMemoryMap::CodeSize); + s_memory_mapping_area->Unmap(s_code_memory, HostMemoryMap::CodeSize); s_code_memory = nullptr; } if (s_data_memory) { - HostSys::UnmapSharedMemory(s_data_memory, HostMemoryMap::MainSize); + s_memory_mapping_area->Unmap(s_data_memory, HostMemoryMap::MainSize); s_data_memory = nullptr; } + s_memory_mapping_area.reset(); + if (s_data_memory_file_handle) { HostSys::DestroySharedMemory(s_data_memory_file_handle); From 87b795e1c6e75dc9279124ac41be21a80bb5e014 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 21 Aug 2024 03:03:21 -0500 Subject: [PATCH 7/7] Common: Remove unused functions --- common/HostSys.h | 9 ------- common/Linux/LnxHostSys.cpp | 48 ----------------------------------- common/Windows/WinHostSys.cpp | 39 ---------------------------- 3 files changed, 96 deletions(-) diff --git a/common/HostSys.h b/common/HostSys.h index 7847a84bce..5984a958c3 100644 --- a/common/HostSys.h +++ b/common/HostSys.h @@ -90,20 +90,11 @@ static __fi PageProtectionMode PageAccess_Any() // -------------------------------------------------------------------------------------- namespace HostSys { - // Maps a block of memory for use as a recompiled code buffer. - // Returns NULL on allocation failure. - extern void* Mmap(void* base, size_t size, const PageProtectionMode& mode); - - // Unmaps a block allocated by SysMmap - extern void Munmap(void* base, size_t size); - extern void MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode); extern std::string GetFileMappingName(const char* prefix); extern void* CreateSharedMemory(const char* name, size_t size); extern void DestroySharedMemory(void* ptr); - extern void* MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode); - extern void UnmapSharedMemory(void* baseaddr, size_t size); /// JIT write protect for Apple Silicon. Needs to be called prior to writing to any RWX pages. #if !defined(__APPLE__) || !defined(_M_ARM64) diff --git a/common/Linux/LnxHostSys.cpp b/common/Linux/LnxHostSys.cpp index b5128c64e2..b0074b758f 100644 --- a/common/Linux/LnxHostSys.cpp +++ b/common/Linux/LnxHostSys.cpp @@ -39,32 +39,6 @@ static __ri uint LinuxProt(const PageProtectionMode& mode) return lnxmode; } -void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode) -{ - pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned"); - - if (mode.IsNone()) - return nullptr; - - const u32 prot = LinuxProt(mode); - - u32 flags = MAP_PRIVATE | MAP_ANONYMOUS; - - void* res = mmap(base, size, prot, flags, -1, 0); - if (res == MAP_FAILED) - return nullptr; - - return res; -} - -void HostSys::Munmap(void* base, size_t size) -{ - if (!base) - return; - - munmap((void*)base, size); -} - void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode) { pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned"); @@ -114,28 +88,6 @@ void HostSys::DestroySharedMemory(void* ptr) close(static_cast(reinterpret_cast(ptr))); } -void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode) -{ - const uint lnxmode = LinuxProt(mode); - - int flags = MAP_SHARED; -#ifdef __APPLE__ - if (mode.CanExecute()) - flags |= MAP_JIT; -#endif - void* ptr = mmap(0, size, lnxmode, flags, static_cast(reinterpret_cast(handle)), static_cast(offset)); - if (ptr == MAP_FAILED) - return nullptr; - - return ptr; -} - -void HostSys::UnmapSharedMemory(void* baseaddr, size_t size) -{ - if (munmap(baseaddr, size) != 0) - pxFailRel("Failed to unmap shared memory"); -} - #ifndef __APPLE__ size_t HostSys::GetRuntimePageSize() diff --git a/common/Windows/WinHostSys.cpp b/common/Windows/WinHostSys.cpp index 7f05a8460e..d5e1ddf857 100644 --- a/common/Windows/WinHostSys.cpp +++ b/common/Windows/WinHostSys.cpp @@ -35,22 +35,6 @@ static DWORD ConvertToWinApi(const PageProtectionMode& mode) return winmode; } -void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode) -{ - if (mode.IsNone()) - return nullptr; - - return VirtualAlloc(base, size, MEM_RESERVE | MEM_COMMIT, ConvertToWinApi(mode)); -} - -void HostSys::Munmap(void* base, size_t size) -{ - if (!base) - return; - - VirtualFree((void*)base, 0, MEM_RELEASE); -} - void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode) { pxAssert((size & (__pagesize - 1)) == 0); @@ -77,29 +61,6 @@ void HostSys::DestroySharedMemory(void* ptr) CloseHandle(static_cast(ptr)); } -void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode) -{ - void* ret = MapViewOfFileEx(static_cast(handle), FILE_MAP_READ | FILE_MAP_WRITE, - static_cast(offset >> 32), static_cast(offset), size, baseaddr); - if (!ret) - return nullptr; - - const DWORD prot = ConvertToWinApi(mode); - if (prot != PAGE_READWRITE) - { - DWORD old_prot; - if (!VirtualProtect(ret, size, prot, &old_prot)) - pxFail("Failed to protect memory mapping"); - } - return ret; -} - -void HostSys::UnmapSharedMemory(void* baseaddr, size_t size) -{ - if (!UnmapViewOfFile(baseaddr)) - pxFail("Failed to unmap shared memory"); -} - size_t HostSys::GetRuntimePageSize() { SYSTEM_INFO si = {};