diff --git a/pcsx2/x86/R5900_Profiler.h b/pcsx2/x86/R5900_Profiler.h index a42e862fc0..e0c2124f16 100644 --- a/pcsx2/x86/R5900_Profiler.h +++ b/pcsx2/x86/R5900_Profiler.h @@ -364,14 +364,13 @@ struct eeProfiler } } - // Warning dirty ebx - void EmitMem() + void EmitMem(int addr_reg) { // Compact the 4GB virtual address to a 512KB virtual address if (x86caps.hasBMI2) { - xPEXT(ebx, ecx, ptr[&memMask]); - xADD(ptr32[(rbx * 4) + memStats], 1); + xPEXT(arg1regd, xRegister32(addr_reg), ptr[&memMask]); + xADD(ptr32[(arg1reg * 4) + memStats], 1); } } @@ -403,7 +402,7 @@ struct eeProfiler __fi void Reset() {} __fi void EmitOp(eeOpcode op) {} __fi void Print() {} - __fi void EmitMem() {} + __fi void EmitMem(int addrReg) {} __fi void EmitConstMem(u32 add) {} __fi void EmitSlowMem() {} __fi void EmitFastMem() {} diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index a0fc64ab10..4d91c1a181 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -52,6 +52,10 @@ bool _isAllocatableX86reg(int x86reg) if (CHECK_FASTMEM && x86reg == 5) return false; + // rbx is used to reference PCSX2 program text + if (xGetTextPtr() && x86reg == RTEXTPTR.GetId()) + return false; + #ifdef ENABLE_VTUNE // vtune needs ebp... if (!CHECK_FASTMEM && x86reg == 5) diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index ac0139a8e4..0a5856390b 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -175,10 +175,10 @@ static const void* _DynGen_JITCompile() xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc]); xMOV(eax, ptr[&psxRegs.pc]); - xMOV(ebx, eax); + xMOV(edx, eax); xSHR(eax, 16); xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]); - xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); + xJMP(ptrNative[rdx * (wordsize / 4) + rcx]); return retval; } @@ -196,10 +196,10 @@ static const void* _DynGen_DispatcherReg() u8* retval = xGetPtr(); xMOV(eax, ptr[&psxRegs.pc]); - xMOV(ebx, eax); + xMOV(edx, eax); xSHR(eax, 16); xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]); - xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); + xJMP(ptrNative[rdx * (wordsize / 4) + rcx]); return retval; } @@ -1181,16 +1181,16 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) } else { - xMOV(ebx, ptr32[&psxRegs.cycle]); - xADD(ebx, blockCycles); - xMOV(ptr32[&psxRegs.cycle], ebx); // update cycles + xMOV(r12d, ptr32[&psxRegs.cycle]); + xADD(r12d, blockCycles); + xMOV(ptr32[&psxRegs.cycle], r12d); // update cycles // jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE) iPsxAddEECycles(blockCycles); xJLE(iopExitRecompiledCode); // check if an event is pending - xSUB(ebx, ptr32[&psxRegs.iopNextEventCycle]); + xSUB(r12d, ptr32[&psxRegs.iopNextEventCycle]); xForwardJS nointerruptpending; xFastCall((void*)iopEventTest); diff --git a/pcsx2/x86/ix86-32/iR5900.cpp b/pcsx2/x86/ix86-32/iR5900.cpp index c08bb247de..a716d1f07f 100644 --- a/pcsx2/x86/ix86-32/iR5900.cpp +++ b/pcsx2/x86/ix86-32/iR5900.cpp @@ -381,10 +381,10 @@ static const void* _DynGen_JITCompile() // void(**base)() = (void(**)())recLUT[addr >> 16]; // base[addr >> 2](); xMOV(eax, ptr[&cpuRegs.pc]); - xMOV(ebx, eax); + xMOV(edx, eax); xSHR(eax, 16); xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]); - xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); + xJMP(ptrNative[rdx * (wordsize / 4) + rcx]); return retval; } @@ -406,10 +406,10 @@ static const void* _DynGen_DispatcherReg() // void(**base)() = (void(**)())recLUT[addr >> 16]; // base[addr >> 2](); xMOV(eax, ptr[&cpuRegs.pc]); - xMOV(ebx, eax); + xMOV(edx, eax); xSHR(eax, 16); xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]); - xJMP(ptrNative[rbx * (wordsize / 4) + rcx]); + xJMP(ptrNative[rdx * (wordsize / 4) + rcx]); return retval; } @@ -2138,26 +2138,26 @@ static bool recSkipTimeoutLoop(s32 reg, bool is_timeout_loop) // if new_v0 > 0 { jump to dispatcher because loop exited early } // else new_v0 is 0, so exit loop - xMOV(ebx, ptr32[&cpuRegs.cycle]); // ebx = cycle + xMOV(r12d, ptr32[&cpuRegs.cycle]); // r12d = cycle xMOV(ecx, ptr32[&cpuRegs.nextEventCycle]); // ecx = nextEventCycle - xCMP(ebx, ecx); + xCMP(r12d, ecx); //xJAE((void*)DispatcherEvent); // jump to dispatcher if event immediately // TODO: In the case where nextEventCycle < cycle because it's overflowed, tack 8 // cycles onto the event count, so hopefully it'll wrap around. This is pretty // gross, but until we switch to 64-bit counters, not many better options. xForwardJB8 not_dispatcher; - xADD(ebx, 8); - xMOV(ptr32[&cpuRegs.cycle], ebx); + xADD(r12d, 8); + xMOV(ptr32[&cpuRegs.cycle], r12d); xJMP((void*)DispatcherEvent); not_dispatcher.SetTarget(); xMOV(edx, ptr32[&cpuRegs.GPR.r[reg].UL[0]]); // eax = v0 - xLEA(rax, ptrNative[rdx * 8 + rbx]); // edx = v0 * 8 + cycle + xLEA(rax, ptrNative[rdx * 8 + r12]); // edx = v0 * 8 + cycle xCMP(rcx, rax); xCMOVB(rax, rcx); // eax = new_cycles = min(v8 * 8, nextEventCycle) xMOV(ptr32[&cpuRegs.cycle], eax); // writeback new_cycles - xSUB(eax, ebx); // new_cycles -= cycle + xSUB(eax, r12d); // new_cycles -= cycle xSHR(eax, 3); // compute new v0 value xSUB(edx, eax); // v0 -= cycle_diff xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], edx); // write back new value of v0 diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index d6e9a099ac..1a95793de0 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -119,14 +119,12 @@ static void __vectorcall LogWriteQuad(u32 addr, __m128i val) namespace vtlb_private { // ------------------------------------------------------------------------ - // Prepares eax, ecx, and, ebx for Direct or Indirect operations. - // Returns the writeback pointer for ebx (return address from indirect handling) + // Prepares eax and ecx for Direct or Indirect operations. // static void DynGen_PrepRegs(int addr_reg, int value_reg, u32 sz, bool xmm) { - EE::Profiler.EmitMem(); - _freeX86reg(arg1regd); + EE::Profiler.EmitMem(addr_reg); xMOV(arg1regd, xRegister32(addr_reg)); if (value_reg >= 0) @@ -269,7 +267,7 @@ static void DynGen_HandlerTest(const GenDirectFn& gen_direct, int mode, int bits // ------------------------------------------------------------------------ // Generates the various instances of the indirect dispatchers -// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64), rbx: function return ptr +// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64) // Out: eax: result (if mode < 64) static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign) { @@ -939,14 +937,13 @@ void vtlb_DynBackpatchLoadStore(uptr code_address, u32 code_size, u32 guest_pc, u32 num_gprs = 0; u32 num_fprs = 0; - const u32 rbxid = static_cast(rbx.GetId()); const u32 arg1id = static_cast(arg1reg.GetId()); const u32 arg2id = static_cast(arg2reg.GetId()); const u32 arg3id = static_cast(arg3reg.GetId()); for (u32 i = 0; i < iREGCNT_GPR; i++) { - if ((gpr_bitmask & (1u << i)) && (i == rbxid || i == arg1id || i == arg2id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i)) + if ((gpr_bitmask & (1u << i)) && (i == arg1id || i == arg2id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i)) num_gprs++; } for (u32 i = 0; i < iREGCNT_XMM; i++) diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 9c587820a2..6981a3be3e 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -147,10 +147,10 @@ static const char branchSTR[16][8] = { #define gprT1b ax // Low 16-bit of gprT1 (eax) #define gprT2b cx // Low 16-bit of gprT2 (ecx) -#define gprF0 ebx // Status Flag 0 -#define gprF1 r12d // Status Flag 1 -#define gprF2 r13d // Status Flag 2 -#define gprF3 r14d // Status Flag 3 +#define gprF0 r12d // Status Flag 0 +#define gprF1 r13d // Status Flag 1 +#define gprF2 r14d // Status Flag 2 +#define gprF3 r15d // Status Flag 3 // Function Params #define mP microVU& mVU, int recPass