From c06cb7b12129553ae2257ecdfffdd3fd431390e6 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Fri, 2 Jul 2010 22:14:35 +0000 Subject: [PATCH] (speedup!) Add missing flushes to COP0 and COP2 (VUmacro execution calls), and subsequently disable *all* XMM freezes. They aren't needed anymore. Rationale: Pseudonym did the necessary upgrades to the recompilers a couple months ago prepping us for a day when we would no longer need MMX/XMM register freezes. All regs are already being flushed on all memory operations, so I added proper flushing to COP0 and COP2 here, and removed XMM freeze/thaw code entirely. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3375 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/src/x86emitter/tools.cpp | 20 +++++++++++++++----- pcsx2/x86/iCOP0.cpp | 12 ++++++------ pcsx2/x86/iCore.h | 30 +++++++++++++++--------------- pcsx2/x86/ix86-32/iR5900-32.cpp | 14 ++++++++++---- pcsx2/x86/microVU_Macro.inl | 17 ++++++++++------- 5 files changed, 56 insertions(+), 37 deletions(-) diff --git a/common/src/x86emitter/tools.cpp b/common/src/x86emitter/tools.cpp index c8c692cea6..9f14f6b0cc 100644 --- a/common/src/x86emitter/tools.cpp +++ b/common/src/x86emitter/tools.cpp @@ -35,11 +35,14 @@ namespace MMXRegisters __forceinline bool Saved() { - return ( stack_depth > 0); + return false; + //return (stack_depth > 0); } __forceinline void Freeze() { + return; + if (!g_EEFreezeRegs) return; //DevCon.Warning("MMXRegisters::Freeze: depth[%d]\n", stack_depth); @@ -83,6 +86,8 @@ namespace MMXRegisters __forceinline void Thaw() { + return; + if (!g_EEFreezeRegs) return; //DevCon.Warning("MMXRegisters::Thaw: depth[%d]\n", stack_depth); @@ -138,11 +143,14 @@ namespace XMMRegisters __forceinline bool Saved() { - return ( stack_depth > 0); + return false; + //return ( stack_depth > 0); } __forceinline void Freeze() { + return; + if (!g_EEFreezeRegs) return; //DevCon.Warning("XMMRegisters::Freeze: depth[%d]\n", Depth()); @@ -185,6 +193,8 @@ namespace XMMRegisters __forceinline void Thaw() { + return; + if (!g_EEFreezeRegs) return; //DevCon.Warning("XMMRegisters::Thaw: depth[%d]\n", Depth()); @@ -238,18 +248,18 @@ namespace Registers // MMX registers should not be needing freezes anymore (speedup!) __forceinline bool Saved() { - return (XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ ); + return false; //(XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ ); } __forceinline void Freeze() { - XMMRegisters::Freeze(); + //XMMRegisters::Freeze(); //MMXRegisters::Freeze(); } __forceinline void Thaw() { - XMMRegisters::Thaw(); + //XMMRegisters::Thaw(); //MMXRegisters::Thaw(); } } diff --git a/pcsx2/x86/iCOP0.cpp b/pcsx2/x86/iCOP0.cpp index 81a9fda37a..26aa2bc835 100644 --- a/pcsx2/x86/iCOP0.cpp +++ b/pcsx2/x86/iCOP0.cpp @@ -169,12 +169,12 @@ void recMFC0( void ) break; case 1: - iFlushCall(FLUSH_NODESTROY); + iFlushCall(FLUSH_INTERPRETER); xCALL( COP0_UpdatePCCR ); xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]); break; case 3: - iFlushCall(FLUSH_NODESTROY); + iFlushCall(FLUSH_INTERPRETER); xCALL( COP0_UpdatePCCR ); xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]); break; @@ -206,7 +206,7 @@ void recMTC0() switch (_Rd_) { case 12: - iFlushCall(FLUSH_NODESTROY); + iFlushCall(FLUSH_INTERPRETER); xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] ); xCALL( WriteCP0Status ); break; @@ -221,7 +221,7 @@ void recMTC0() switch(_Imm_ & 0x3F) { case 0: - iFlushCall(FLUSH_NODESTROY); + iFlushCall(FLUSH_INTERPRETER); xCALL( COP0_UpdatePCCR ); xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] ); xCALL( COP0_DiagnosticPCCR ); @@ -255,7 +255,7 @@ void recMTC0() switch (_Rd_) { case 12: - iFlushCall(FLUSH_NODESTROY); + iFlushCall(FLUSH_INTERPRETER); _eeMoveGPRtoR(ECX, _Rt_); xCALL( WriteCP0Status ); break; @@ -270,7 +270,7 @@ void recMTC0() switch(_Imm_ & 0x3F) { case 0: - iFlushCall(FLUSH_NODESTROY); + iFlushCall(FLUSH_INTERPRETER); xCALL( COP0_UpdatePCCR ); _eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_); xCALL( COP0_DiagnosticPCCR ); diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index abef3789b6..0f693fa7ff 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -346,22 +346,22 @@ extern u16 x86FpuState; // the code being called is going to modify register allocations -- ie, be doing // some kind of recompiling of its own. -#define FLUSH_CACHED_REGS 1 -#define FLUSH_FLUSH_XMM 2 -#define FLUSH_FREE_XMM 4 // both flushes and frees -#define FLUSH_FLUSH_MMX 8 -#define FLUSH_FREE_MMX 16 // both flushes and frees -#define FLUSH_FLUSH_ALLX86 32 // flush x86 -#define FLUSH_FREE_TEMPX86 64 // flush and free temporary x86 regs -#define FLUSH_FREE_ALLX86 128 // free all x86 regs -#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs -#define FLUSH_PC 0x200 // program counter -#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit -#define FLUSH_CODE 0x800 // opcode for interpreter +#define FLUSH_CACHED_REGS 0x001 +#define FLUSH_FLUSH_XMM 0x002 +#define FLUSH_FREE_XMM 0x004 // both flushes and frees +#define FLUSH_FLUSH_MMX 0x008 +#define FLUSH_FREE_MMX 0x010 // both flushes and frees +#define FLUSH_FLUSH_ALLX86 0x020 // flush x86 +#define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs +#define FLUSH_FREE_ALLX86 0x080 // free all x86 regs +#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs +#define FLUSH_PC 0x200 // program counter +#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit +#define FLUSH_CODE 0x800 // opcode for interpreter -#define FLUSH_EVERYTHING 0x1ff -#define FLUSH_EXCEPTION 0x7ff -#define FLUSH_INTERPRETER 0xfff +#define FLUSH_EVERYTHING 0x1ff +#define FLUSH_EXCEPTION 0x7ff +#define FLUSH_INTERPRETER 0xfff // no freeing, used when callee won't destroy mmx/xmm regs #define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86) diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 71d38b30ff..c1b6308f7a 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -55,7 +55,7 @@ int branch; // set for branch __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0}; u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0; -bool g_cpuFlushedPC, g_recompilingDelaySlot, g_maySignalException; +bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException; //////////////////////////////////////////////////////////////// // Static Private Variables - R5900 Dynarec @@ -976,13 +976,17 @@ void iFlushCall(int flushtype) _freeX86reg(ECX); _freeX86reg(EDX); - if (flushtype & FLUSH_PC && !g_cpuFlushedPC) { + if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC) { xMOV(ptr32[&cpuRegs.pc], pc); g_cpuFlushedPC = true; } - if (flushtype & FLUSH_CODE) + + if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode) { xMOV(ptr32[&cpuRegs.code], cpuRegs.code); - if (flushtype & FLUSH_CAUSE) { + g_cpuFlushedCode = true; + } + + if ((flushtype & FLUSH_CAUSE) && !g_maySignalException) { if (g_recompilingDelaySlot) xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD g_maySignalException = true; @@ -1135,6 +1139,7 @@ void recompileNextInstruction(int delayslot) if (!delayslot) { pc += 4; g_cpuFlushedPC = false; + g_cpuFlushedCode = false; } else { // increment after recompiling so that pc points to the branch during recompilation g_recompilingDelaySlot = true; @@ -1223,6 +1228,7 @@ void recompileNextInstruction(int delayslot) if (delayslot) { pc += 4; g_cpuFlushedPC = false; + g_cpuFlushedCode = false; if (g_maySignalException) xAND(ptr32[&cpuRegs.CP0.n.Cause], ~(1 << 31)); // BD g_recompilingDelaySlot = false; diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index f235e053ab..004d3896d1 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -18,6 +18,8 @@ extern void _vu0WaitMicro(); extern void _vu0FinishMicro(); +typedef void FnType_Void(); + //------------------------------------------------------------------ // Macro VU - Helper Macros / Functions //------------------------------------------------------------------ @@ -244,16 +246,16 @@ void recBC2TL() { _setupBranchTest(JZ32, true); } void COP2_Interlock(bool mBitSync) { if (cpuRegs.code & 1) { - iFlushCall(FLUSH_NOCONST); - if (mBitSync) CALLFunc((uptr)_vu0WaitMicro); - else CALLFunc((uptr)_vu0FinishMicro); + iFlushCall(FLUSH_EVERYTHING | FLUSH_PC); + if (mBitSync) xCALL(_vu0WaitMicro); + else xCALL(_vu0FinishMicro); } } -void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) { +void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) { TEST32ItoR(EAX, (vuIndex) ? 0x200 : 0x002); j8Ptr[0] = JZ8(0); - CALLFunc(resetFunct); + xCALL(resetFunct); MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); x86SetJ8(j8Ptr[0]); } @@ -261,6 +263,7 @@ void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) { static void recCFC2() { printCOP2("CFC2"); + COP2_Interlock(0); if (!_Rt_) return; iFlushCall(FLUSH_EVERYTHING); @@ -320,8 +323,8 @@ static void recCTC2() { } else MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); - TEST_FBRST_RESET((uptr)vu0ResetRegs, 0); - TEST_FBRST_RESET((uptr)vu1ResetRegs, 1); + TEST_FBRST_RESET(vu0ResetRegs, 0); + TEST_FBRST_RESET(vu1ResetRegs, 1); AND32ItoR(EAX, 0x0C0C); MOV32RtoM((uptr)µVU0.regs->VI[REG_FBRST].UL, EAX);