(speedup!) Add missing flushes to COP0 and COP2 (VUmacro execution calls), and subsequently disable *all* XMM freezes. They aren't needed anymore.

Rationale: Pseudonym did the necessary upgrades to the recompilers a couple months ago prepping us for a day when we would no longer need MMX/XMM register freezes.  All regs are already being flushed on all memory operations, so I added proper flushing to COP0 and COP2 here, and removed XMM freeze/thaw code entirely.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3375 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-02 22:14:35 +00:00
parent 1cc2b210c6
commit c06cb7b121
5 changed files with 56 additions and 37 deletions

View File

@ -35,11 +35,14 @@ namespace MMXRegisters
__forceinline bool Saved() __forceinline bool Saved()
{ {
return ( stack_depth > 0); return false;
//return (stack_depth > 0);
} }
__forceinline void Freeze() __forceinline void Freeze()
{ {
return;
if (!g_EEFreezeRegs) return; if (!g_EEFreezeRegs) return;
//DevCon.Warning("MMXRegisters::Freeze: depth[%d]\n", stack_depth); //DevCon.Warning("MMXRegisters::Freeze: depth[%d]\n", stack_depth);
@ -83,6 +86,8 @@ namespace MMXRegisters
__forceinline void Thaw() __forceinline void Thaw()
{ {
return;
if (!g_EEFreezeRegs) return; if (!g_EEFreezeRegs) return;
//DevCon.Warning("MMXRegisters::Thaw: depth[%d]\n", stack_depth); //DevCon.Warning("MMXRegisters::Thaw: depth[%d]\n", stack_depth);
@ -138,11 +143,14 @@ namespace XMMRegisters
__forceinline bool Saved() __forceinline bool Saved()
{ {
return ( stack_depth > 0); return false;
//return ( stack_depth > 0);
} }
__forceinline void Freeze() __forceinline void Freeze()
{ {
return;
if (!g_EEFreezeRegs) return; if (!g_EEFreezeRegs) return;
//DevCon.Warning("XMMRegisters::Freeze: depth[%d]\n", Depth()); //DevCon.Warning("XMMRegisters::Freeze: depth[%d]\n", Depth());
@ -185,6 +193,8 @@ namespace XMMRegisters
__forceinline void Thaw() __forceinline void Thaw()
{ {
return;
if (!g_EEFreezeRegs) return; if (!g_EEFreezeRegs) return;
//DevCon.Warning("XMMRegisters::Thaw: depth[%d]\n", Depth()); //DevCon.Warning("XMMRegisters::Thaw: depth[%d]\n", Depth());
@ -238,18 +248,18 @@ namespace Registers
// MMX registers should not be needing freezes anymore (speedup!) // MMX registers should not be needing freezes anymore (speedup!)
__forceinline bool Saved() __forceinline bool Saved()
{ {
return (XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ ); return false; //(XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ );
} }
__forceinline void Freeze() __forceinline void Freeze()
{ {
XMMRegisters::Freeze(); //XMMRegisters::Freeze();
//MMXRegisters::Freeze(); //MMXRegisters::Freeze();
} }
__forceinline void Thaw() __forceinline void Thaw()
{ {
XMMRegisters::Thaw(); //XMMRegisters::Thaw();
//MMXRegisters::Thaw(); //MMXRegisters::Thaw();
} }
} }

View File

@ -169,12 +169,12 @@ void recMFC0( void )
break; break;
case 1: case 1:
iFlushCall(FLUSH_NODESTROY); iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR ); xCALL( COP0_UpdatePCCR );
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]); xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
break; break;
case 3: case 3:
iFlushCall(FLUSH_NODESTROY); iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR ); xCALL( COP0_UpdatePCCR );
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]); xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
break; break;
@ -206,7 +206,7 @@ void recMTC0()
switch (_Rd_) switch (_Rd_)
{ {
case 12: case 12:
iFlushCall(FLUSH_NODESTROY); iFlushCall(FLUSH_INTERPRETER);
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] ); xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] );
xCALL( WriteCP0Status ); xCALL( WriteCP0Status );
break; break;
@ -221,7 +221,7 @@ void recMTC0()
switch(_Imm_ & 0x3F) switch(_Imm_ & 0x3F)
{ {
case 0: case 0:
iFlushCall(FLUSH_NODESTROY); iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR ); xCALL( COP0_UpdatePCCR );
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] ); xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
xCALL( COP0_DiagnosticPCCR ); xCALL( COP0_DiagnosticPCCR );
@ -255,7 +255,7 @@ void recMTC0()
switch (_Rd_) switch (_Rd_)
{ {
case 12: case 12:
iFlushCall(FLUSH_NODESTROY); iFlushCall(FLUSH_INTERPRETER);
_eeMoveGPRtoR(ECX, _Rt_); _eeMoveGPRtoR(ECX, _Rt_);
xCALL( WriteCP0Status ); xCALL( WriteCP0Status );
break; break;
@ -270,7 +270,7 @@ void recMTC0()
switch(_Imm_ & 0x3F) switch(_Imm_ & 0x3F)
{ {
case 0: case 0:
iFlushCall(FLUSH_NODESTROY); iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR ); xCALL( COP0_UpdatePCCR );
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_); _eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
xCALL( COP0_DiagnosticPCCR ); xCALL( COP0_DiagnosticPCCR );

View File

@ -346,22 +346,22 @@ extern u16 x86FpuState;
// the code being called is going to modify register allocations -- ie, be doing // the code being called is going to modify register allocations -- ie, be doing
// some kind of recompiling of its own. // some kind of recompiling of its own.
#define FLUSH_CACHED_REGS 1 #define FLUSH_CACHED_REGS 0x001
#define FLUSH_FLUSH_XMM 2 #define FLUSH_FLUSH_XMM 0x002
#define FLUSH_FREE_XMM 4 // both flushes and frees #define FLUSH_FREE_XMM 0x004 // both flushes and frees
#define FLUSH_FLUSH_MMX 8 #define FLUSH_FLUSH_MMX 0x008
#define FLUSH_FREE_MMX 16 // both flushes and frees #define FLUSH_FREE_MMX 0x010 // both flushes and frees
#define FLUSH_FLUSH_ALLX86 32 // flush x86 #define FLUSH_FLUSH_ALLX86 0x020 // flush x86
#define FLUSH_FREE_TEMPX86 64 // flush and free temporary x86 regs #define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs
#define FLUSH_FREE_ALLX86 128 // free all x86 regs #define FLUSH_FREE_ALLX86 0x080 // free all x86 regs
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs #define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
#define FLUSH_PC 0x200 // program counter #define FLUSH_PC 0x200 // program counter
#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit #define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit
#define FLUSH_CODE 0x800 // opcode for interpreter #define FLUSH_CODE 0x800 // opcode for interpreter
#define FLUSH_EVERYTHING 0x1ff #define FLUSH_EVERYTHING 0x1ff
#define FLUSH_EXCEPTION 0x7ff #define FLUSH_EXCEPTION 0x7ff
#define FLUSH_INTERPRETER 0xfff #define FLUSH_INTERPRETER 0xfff
// no freeing, used when callee won't destroy mmx/xmm regs // no freeing, used when callee won't destroy mmx/xmm regs
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86) #define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86)

View File

@ -55,7 +55,7 @@ int branch; // set for branch
__aligned16 GPR_reg64 g_cpuConstRegs[32] = {0}; __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0; u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
bool g_cpuFlushedPC, g_recompilingDelaySlot, g_maySignalException; bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Static Private Variables - R5900 Dynarec // Static Private Variables - R5900 Dynarec
@ -976,13 +976,17 @@ void iFlushCall(int flushtype)
_freeX86reg(ECX); _freeX86reg(ECX);
_freeX86reg(EDX); _freeX86reg(EDX);
if (flushtype & FLUSH_PC && !g_cpuFlushedPC) { if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC) {
xMOV(ptr32[&cpuRegs.pc], pc); xMOV(ptr32[&cpuRegs.pc], pc);
g_cpuFlushedPC = true; g_cpuFlushedPC = true;
} }
if (flushtype & FLUSH_CODE)
if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode) {
xMOV(ptr32[&cpuRegs.code], cpuRegs.code); xMOV(ptr32[&cpuRegs.code], cpuRegs.code);
if (flushtype & FLUSH_CAUSE) { g_cpuFlushedCode = true;
}
if ((flushtype & FLUSH_CAUSE) && !g_maySignalException) {
if (g_recompilingDelaySlot) if (g_recompilingDelaySlot)
xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD
g_maySignalException = true; g_maySignalException = true;
@ -1135,6 +1139,7 @@ void recompileNextInstruction(int delayslot)
if (!delayslot) { if (!delayslot) {
pc += 4; pc += 4;
g_cpuFlushedPC = false; g_cpuFlushedPC = false;
g_cpuFlushedCode = false;
} else { } else {
// increment after recompiling so that pc points to the branch during recompilation // increment after recompiling so that pc points to the branch during recompilation
g_recompilingDelaySlot = true; g_recompilingDelaySlot = true;
@ -1223,6 +1228,7 @@ void recompileNextInstruction(int delayslot)
if (delayslot) { if (delayslot) {
pc += 4; pc += 4;
g_cpuFlushedPC = false; g_cpuFlushedPC = false;
g_cpuFlushedCode = false;
if (g_maySignalException) if (g_maySignalException)
xAND(ptr32[&cpuRegs.CP0.n.Cause], ~(1 << 31)); // BD xAND(ptr32[&cpuRegs.CP0.n.Cause], ~(1 << 31)); // BD
g_recompilingDelaySlot = false; g_recompilingDelaySlot = false;

View File

@ -18,6 +18,8 @@
extern void _vu0WaitMicro(); extern void _vu0WaitMicro();
extern void _vu0FinishMicro(); extern void _vu0FinishMicro();
typedef void FnType_Void();
//------------------------------------------------------------------ //------------------------------------------------------------------
// Macro VU - Helper Macros / Functions // Macro VU - Helper Macros / Functions
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -244,16 +246,16 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
void COP2_Interlock(bool mBitSync) { void COP2_Interlock(bool mBitSync) {
if (cpuRegs.code & 1) { if (cpuRegs.code & 1) {
iFlushCall(FLUSH_NOCONST); iFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
if (mBitSync) CALLFunc((uptr)_vu0WaitMicro); if (mBitSync) xCALL(_vu0WaitMicro);
else CALLFunc((uptr)_vu0FinishMicro); else xCALL(_vu0FinishMicro);
} }
} }
void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) { void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) {
TEST32ItoR(EAX, (vuIndex) ? 0x200 : 0x002); TEST32ItoR(EAX, (vuIndex) ? 0x200 : 0x002);
j8Ptr[0] = JZ8(0); j8Ptr[0] = JZ8(0);
CALLFunc(resetFunct); xCALL(resetFunct);
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[0]);
} }
@ -261,6 +263,7 @@ void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) {
static void recCFC2() { static void recCFC2() {
printCOP2("CFC2"); printCOP2("CFC2");
COP2_Interlock(0); COP2_Interlock(0);
if (!_Rt_) return; if (!_Rt_) return;
iFlushCall(FLUSH_EVERYTHING); iFlushCall(FLUSH_EVERYTHING);
@ -320,8 +323,8 @@ static void recCTC2() {
} }
else MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); else MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
TEST_FBRST_RESET((uptr)vu0ResetRegs, 0); TEST_FBRST_RESET(vu0ResetRegs, 0);
TEST_FBRST_RESET((uptr)vu1ResetRegs, 1); TEST_FBRST_RESET(vu1ResetRegs, 1);
AND32ItoR(EAX, 0x0C0C); AND32ItoR(EAX, 0x0C0C);
MOV32RtoM((uptr)&microVU0.regs->VI[REG_FBRST].UL, EAX); MOV32RtoM((uptr)&microVU0.regs->VI[REG_FBRST].UL, EAX);