(speedup!) Add missing flushes to COP0 and COP2 (VUmacro execution calls), and subsequently disable *all* XMM freezes. They aren't needed anymore.

Rationale: Pseudonym did the necessary upgrades to the recompilers a couple months ago prepping us for a day when we would no longer need MMX/XMM register freezes.  All regs are already being flushed on all memory operations, so I added proper flushing to COP0 and COP2 here, and removed XMM freeze/thaw code entirely.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3375 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-02 22:14:35 +00:00
parent 1cc2b210c6
commit c06cb7b121
5 changed files with 56 additions and 37 deletions

View File

@ -35,11 +35,14 @@ namespace MMXRegisters
__forceinline bool Saved()
{
return ( stack_depth > 0);
return false;
//return (stack_depth > 0);
}
__forceinline void Freeze()
{
return;
if (!g_EEFreezeRegs) return;
//DevCon.Warning("MMXRegisters::Freeze: depth[%d]\n", stack_depth);
@ -83,6 +86,8 @@ namespace MMXRegisters
__forceinline void Thaw()
{
return;
if (!g_EEFreezeRegs) return;
//DevCon.Warning("MMXRegisters::Thaw: depth[%d]\n", stack_depth);
@ -138,11 +143,14 @@ namespace XMMRegisters
__forceinline bool Saved()
{
return ( stack_depth > 0);
return false;
//return ( stack_depth > 0);
}
__forceinline void Freeze()
{
return;
if (!g_EEFreezeRegs) return;
//DevCon.Warning("XMMRegisters::Freeze: depth[%d]\n", Depth());
@ -185,6 +193,8 @@ namespace XMMRegisters
__forceinline void Thaw()
{
return;
if (!g_EEFreezeRegs) return;
//DevCon.Warning("XMMRegisters::Thaw: depth[%d]\n", Depth());
@ -238,18 +248,18 @@ namespace Registers
// MMX registers should not be needing freezes anymore (speedup!)
__forceinline bool Saved()
{
return (XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ );
return false; //(XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ );
}
__forceinline void Freeze()
{
XMMRegisters::Freeze();
//XMMRegisters::Freeze();
//MMXRegisters::Freeze();
}
__forceinline void Thaw()
{
XMMRegisters::Thaw();
//XMMRegisters::Thaw();
//MMXRegisters::Thaw();
}
}

View File

@ -169,12 +169,12 @@ void recMFC0( void )
break;
case 1:
iFlushCall(FLUSH_NODESTROY);
iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR );
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
break;
case 3:
iFlushCall(FLUSH_NODESTROY);
iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR );
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
break;
@ -206,7 +206,7 @@ void recMTC0()
switch (_Rd_)
{
case 12:
iFlushCall(FLUSH_NODESTROY);
iFlushCall(FLUSH_INTERPRETER);
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] );
xCALL( WriteCP0Status );
break;
@ -221,7 +221,7 @@ void recMTC0()
switch(_Imm_ & 0x3F)
{
case 0:
iFlushCall(FLUSH_NODESTROY);
iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR );
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
xCALL( COP0_DiagnosticPCCR );
@ -255,7 +255,7 @@ void recMTC0()
switch (_Rd_)
{
case 12:
iFlushCall(FLUSH_NODESTROY);
iFlushCall(FLUSH_INTERPRETER);
_eeMoveGPRtoR(ECX, _Rt_);
xCALL( WriteCP0Status );
break;
@ -270,7 +270,7 @@ void recMTC0()
switch(_Imm_ & 0x3F)
{
case 0:
iFlushCall(FLUSH_NODESTROY);
iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR );
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
xCALL( COP0_DiagnosticPCCR );

View File

@ -346,22 +346,22 @@ extern u16 x86FpuState;
// the code being called is going to modify register allocations -- ie, be doing
// some kind of recompiling of its own.
#define FLUSH_CACHED_REGS 1
#define FLUSH_FLUSH_XMM 2
#define FLUSH_FREE_XMM 4 // both flushes and frees
#define FLUSH_FLUSH_MMX 8
#define FLUSH_FREE_MMX 16 // both flushes and frees
#define FLUSH_FLUSH_ALLX86 32 // flush x86
#define FLUSH_FREE_TEMPX86 64 // flush and free temporary x86 regs
#define FLUSH_FREE_ALLX86 128 // free all x86 regs
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
#define FLUSH_PC 0x200 // program counter
#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit
#define FLUSH_CODE 0x800 // opcode for interpreter
#define FLUSH_CACHED_REGS 0x001
#define FLUSH_FLUSH_XMM 0x002
#define FLUSH_FREE_XMM 0x004 // both flushes and frees
#define FLUSH_FLUSH_MMX 0x008
#define FLUSH_FREE_MMX 0x010 // both flushes and frees
#define FLUSH_FLUSH_ALLX86 0x020 // flush x86
#define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs
#define FLUSH_FREE_ALLX86 0x080 // free all x86 regs
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
#define FLUSH_PC 0x200 // program counter
#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit
#define FLUSH_CODE 0x800 // opcode for interpreter
#define FLUSH_EVERYTHING 0x1ff
#define FLUSH_EXCEPTION 0x7ff
#define FLUSH_INTERPRETER 0xfff
#define FLUSH_EVERYTHING 0x1ff
#define FLUSH_EXCEPTION 0x7ff
#define FLUSH_INTERPRETER 0xfff
// no freeing, used when callee won't destroy mmx/xmm regs
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86)

View File

@ -55,7 +55,7 @@ int branch; // set for branch
__aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
bool g_cpuFlushedPC, g_recompilingDelaySlot, g_maySignalException;
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
////////////////////////////////////////////////////////////////
// Static Private Variables - R5900 Dynarec
@ -976,13 +976,17 @@ void iFlushCall(int flushtype)
_freeX86reg(ECX);
_freeX86reg(EDX);
if (flushtype & FLUSH_PC && !g_cpuFlushedPC) {
if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC) {
xMOV(ptr32[&cpuRegs.pc], pc);
g_cpuFlushedPC = true;
}
if (flushtype & FLUSH_CODE)
if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode) {
xMOV(ptr32[&cpuRegs.code], cpuRegs.code);
if (flushtype & FLUSH_CAUSE) {
g_cpuFlushedCode = true;
}
if ((flushtype & FLUSH_CAUSE) && !g_maySignalException) {
if (g_recompilingDelaySlot)
xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD
g_maySignalException = true;
@ -1135,6 +1139,7 @@ void recompileNextInstruction(int delayslot)
if (!delayslot) {
pc += 4;
g_cpuFlushedPC = false;
g_cpuFlushedCode = false;
} else {
// increment after recompiling so that pc points to the branch during recompilation
g_recompilingDelaySlot = true;
@ -1223,6 +1228,7 @@ void recompileNextInstruction(int delayslot)
if (delayslot) {
pc += 4;
g_cpuFlushedPC = false;
g_cpuFlushedCode = false;
if (g_maySignalException)
xAND(ptr32[&cpuRegs.CP0.n.Cause], ~(1 << 31)); // BD
g_recompilingDelaySlot = false;

View File

@ -18,6 +18,8 @@
extern void _vu0WaitMicro();
extern void _vu0FinishMicro();
typedef void FnType_Void();
//------------------------------------------------------------------
// Macro VU - Helper Macros / Functions
//------------------------------------------------------------------
@ -244,16 +246,16 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
void COP2_Interlock(bool mBitSync) {
if (cpuRegs.code & 1) {
iFlushCall(FLUSH_NOCONST);
if (mBitSync) CALLFunc((uptr)_vu0WaitMicro);
else CALLFunc((uptr)_vu0FinishMicro);
iFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
if (mBitSync) xCALL(_vu0WaitMicro);
else xCALL(_vu0FinishMicro);
}
}
void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) {
void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) {
TEST32ItoR(EAX, (vuIndex) ? 0x200 : 0x002);
j8Ptr[0] = JZ8(0);
CALLFunc(resetFunct);
xCALL(resetFunct);
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
x86SetJ8(j8Ptr[0]);
}
@ -261,6 +263,7 @@ void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) {
static void recCFC2() {
printCOP2("CFC2");
COP2_Interlock(0);
if (!_Rt_) return;
iFlushCall(FLUSH_EVERYTHING);
@ -320,8 +323,8 @@ static void recCTC2() {
}
else MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
TEST_FBRST_RESET((uptr)vu0ResetRegs, 0);
TEST_FBRST_RESET((uptr)vu1ResetRegs, 1);
TEST_FBRST_RESET(vu0ResetRegs, 0);
TEST_FBRST_RESET(vu1ResetRegs, 1);
AND32ItoR(EAX, 0x0C0C);
MOV32RtoM((uptr)&microVU0.regs->VI[REG_FBRST].UL, EAX);