mirror of https://github.com/PCSX2/pcsx2.git
(speedup!) Add missing flushes to COP0 and COP2 (VUmacro execution calls), and subsequently disable *all* XMM freezes. They aren't needed anymore.
Rationale: Pseudonym did the necessary upgrades to the recompilers a couple months ago prepping us for a day when we would no longer need MMX/XMM register freezes. All regs are already being flushed on all memory operations, so I added proper flushing to COP0 and COP2 here, and removed XMM freeze/thaw code entirely. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3375 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
1cc2b210c6
commit
c06cb7b121
|
@ -35,11 +35,14 @@ namespace MMXRegisters
|
||||||
|
|
||||||
__forceinline bool Saved()
|
__forceinline bool Saved()
|
||||||
{
|
{
|
||||||
return ( stack_depth > 0);
|
return false;
|
||||||
|
//return (stack_depth > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void Freeze()
|
__forceinline void Freeze()
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
|
|
||||||
if (!g_EEFreezeRegs) return;
|
if (!g_EEFreezeRegs) return;
|
||||||
|
|
||||||
//DevCon.Warning("MMXRegisters::Freeze: depth[%d]\n", stack_depth);
|
//DevCon.Warning("MMXRegisters::Freeze: depth[%d]\n", stack_depth);
|
||||||
|
@ -83,6 +86,8 @@ namespace MMXRegisters
|
||||||
|
|
||||||
__forceinline void Thaw()
|
__forceinline void Thaw()
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
|
|
||||||
if (!g_EEFreezeRegs) return;
|
if (!g_EEFreezeRegs) return;
|
||||||
|
|
||||||
//DevCon.Warning("MMXRegisters::Thaw: depth[%d]\n", stack_depth);
|
//DevCon.Warning("MMXRegisters::Thaw: depth[%d]\n", stack_depth);
|
||||||
|
@ -138,11 +143,14 @@ namespace XMMRegisters
|
||||||
|
|
||||||
__forceinline bool Saved()
|
__forceinline bool Saved()
|
||||||
{
|
{
|
||||||
return ( stack_depth > 0);
|
return false;
|
||||||
|
//return ( stack_depth > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void Freeze()
|
__forceinline void Freeze()
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
|
|
||||||
if (!g_EEFreezeRegs) return;
|
if (!g_EEFreezeRegs) return;
|
||||||
|
|
||||||
//DevCon.Warning("XMMRegisters::Freeze: depth[%d]\n", Depth());
|
//DevCon.Warning("XMMRegisters::Freeze: depth[%d]\n", Depth());
|
||||||
|
@ -185,6 +193,8 @@ namespace XMMRegisters
|
||||||
|
|
||||||
__forceinline void Thaw()
|
__forceinline void Thaw()
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
|
|
||||||
if (!g_EEFreezeRegs) return;
|
if (!g_EEFreezeRegs) return;
|
||||||
|
|
||||||
//DevCon.Warning("XMMRegisters::Thaw: depth[%d]\n", Depth());
|
//DevCon.Warning("XMMRegisters::Thaw: depth[%d]\n", Depth());
|
||||||
|
@ -238,18 +248,18 @@ namespace Registers
|
||||||
// MMX registers should not be needing freezes anymore (speedup!)
|
// MMX registers should not be needing freezes anymore (speedup!)
|
||||||
__forceinline bool Saved()
|
__forceinline bool Saved()
|
||||||
{
|
{
|
||||||
return (XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ );
|
return false; //(XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ );
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void Freeze()
|
__forceinline void Freeze()
|
||||||
{
|
{
|
||||||
XMMRegisters::Freeze();
|
//XMMRegisters::Freeze();
|
||||||
//MMXRegisters::Freeze();
|
//MMXRegisters::Freeze();
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void Thaw()
|
__forceinline void Thaw()
|
||||||
{
|
{
|
||||||
XMMRegisters::Thaw();
|
//XMMRegisters::Thaw();
|
||||||
//MMXRegisters::Thaw();
|
//MMXRegisters::Thaw();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -169,12 +169,12 @@ void recMFC0( void )
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
iFlushCall(FLUSH_NODESTROY);
|
iFlushCall(FLUSH_INTERPRETER);
|
||||||
xCALL( COP0_UpdatePCCR );
|
xCALL( COP0_UpdatePCCR );
|
||||||
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
|
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
iFlushCall(FLUSH_NODESTROY);
|
iFlushCall(FLUSH_INTERPRETER);
|
||||||
xCALL( COP0_UpdatePCCR );
|
xCALL( COP0_UpdatePCCR );
|
||||||
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
|
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
|
||||||
break;
|
break;
|
||||||
|
@ -206,7 +206,7 @@ void recMTC0()
|
||||||
switch (_Rd_)
|
switch (_Rd_)
|
||||||
{
|
{
|
||||||
case 12:
|
case 12:
|
||||||
iFlushCall(FLUSH_NODESTROY);
|
iFlushCall(FLUSH_INTERPRETER);
|
||||||
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] );
|
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] );
|
||||||
xCALL( WriteCP0Status );
|
xCALL( WriteCP0Status );
|
||||||
break;
|
break;
|
||||||
|
@ -221,7 +221,7 @@ void recMTC0()
|
||||||
switch(_Imm_ & 0x3F)
|
switch(_Imm_ & 0x3F)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
iFlushCall(FLUSH_NODESTROY);
|
iFlushCall(FLUSH_INTERPRETER);
|
||||||
xCALL( COP0_UpdatePCCR );
|
xCALL( COP0_UpdatePCCR );
|
||||||
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
|
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
|
||||||
xCALL( COP0_DiagnosticPCCR );
|
xCALL( COP0_DiagnosticPCCR );
|
||||||
|
@ -255,7 +255,7 @@ void recMTC0()
|
||||||
switch (_Rd_)
|
switch (_Rd_)
|
||||||
{
|
{
|
||||||
case 12:
|
case 12:
|
||||||
iFlushCall(FLUSH_NODESTROY);
|
iFlushCall(FLUSH_INTERPRETER);
|
||||||
_eeMoveGPRtoR(ECX, _Rt_);
|
_eeMoveGPRtoR(ECX, _Rt_);
|
||||||
xCALL( WriteCP0Status );
|
xCALL( WriteCP0Status );
|
||||||
break;
|
break;
|
||||||
|
@ -270,7 +270,7 @@ void recMTC0()
|
||||||
switch(_Imm_ & 0x3F)
|
switch(_Imm_ & 0x3F)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
iFlushCall(FLUSH_NODESTROY);
|
iFlushCall(FLUSH_INTERPRETER);
|
||||||
xCALL( COP0_UpdatePCCR );
|
xCALL( COP0_UpdatePCCR );
|
||||||
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
|
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
|
||||||
xCALL( COP0_DiagnosticPCCR );
|
xCALL( COP0_DiagnosticPCCR );
|
||||||
|
|
|
@ -346,22 +346,22 @@ extern u16 x86FpuState;
|
||||||
// the code being called is going to modify register allocations -- ie, be doing
|
// the code being called is going to modify register allocations -- ie, be doing
|
||||||
// some kind of recompiling of its own.
|
// some kind of recompiling of its own.
|
||||||
|
|
||||||
#define FLUSH_CACHED_REGS 1
|
#define FLUSH_CACHED_REGS 0x001
|
||||||
#define FLUSH_FLUSH_XMM 2
|
#define FLUSH_FLUSH_XMM 0x002
|
||||||
#define FLUSH_FREE_XMM 4 // both flushes and frees
|
#define FLUSH_FREE_XMM 0x004 // both flushes and frees
|
||||||
#define FLUSH_FLUSH_MMX 8
|
#define FLUSH_FLUSH_MMX 0x008
|
||||||
#define FLUSH_FREE_MMX 16 // both flushes and frees
|
#define FLUSH_FREE_MMX 0x010 // both flushes and frees
|
||||||
#define FLUSH_FLUSH_ALLX86 32 // flush x86
|
#define FLUSH_FLUSH_ALLX86 0x020 // flush x86
|
||||||
#define FLUSH_FREE_TEMPX86 64 // flush and free temporary x86 regs
|
#define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs
|
||||||
#define FLUSH_FREE_ALLX86 128 // free all x86 regs
|
#define FLUSH_FREE_ALLX86 0x080 // free all x86 regs
|
||||||
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
|
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
|
||||||
#define FLUSH_PC 0x200 // program counter
|
#define FLUSH_PC 0x200 // program counter
|
||||||
#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit
|
#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit
|
||||||
#define FLUSH_CODE 0x800 // opcode for interpreter
|
#define FLUSH_CODE 0x800 // opcode for interpreter
|
||||||
|
|
||||||
#define FLUSH_EVERYTHING 0x1ff
|
#define FLUSH_EVERYTHING 0x1ff
|
||||||
#define FLUSH_EXCEPTION 0x7ff
|
#define FLUSH_EXCEPTION 0x7ff
|
||||||
#define FLUSH_INTERPRETER 0xfff
|
#define FLUSH_INTERPRETER 0xfff
|
||||||
|
|
||||||
// no freeing, used when callee won't destroy mmx/xmm regs
|
// no freeing, used when callee won't destroy mmx/xmm regs
|
||||||
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86)
|
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86)
|
||||||
|
|
|
@ -55,7 +55,7 @@ int branch; // set for branch
|
||||||
|
|
||||||
__aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
|
__aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
|
||||||
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
|
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
|
||||||
bool g_cpuFlushedPC, g_recompilingDelaySlot, g_maySignalException;
|
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
// Static Private Variables - R5900 Dynarec
|
// Static Private Variables - R5900 Dynarec
|
||||||
|
@ -976,13 +976,17 @@ void iFlushCall(int flushtype)
|
||||||
_freeX86reg(ECX);
|
_freeX86reg(ECX);
|
||||||
_freeX86reg(EDX);
|
_freeX86reg(EDX);
|
||||||
|
|
||||||
if (flushtype & FLUSH_PC && !g_cpuFlushedPC) {
|
if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC) {
|
||||||
xMOV(ptr32[&cpuRegs.pc], pc);
|
xMOV(ptr32[&cpuRegs.pc], pc);
|
||||||
g_cpuFlushedPC = true;
|
g_cpuFlushedPC = true;
|
||||||
}
|
}
|
||||||
if (flushtype & FLUSH_CODE)
|
|
||||||
|
if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode) {
|
||||||
xMOV(ptr32[&cpuRegs.code], cpuRegs.code);
|
xMOV(ptr32[&cpuRegs.code], cpuRegs.code);
|
||||||
if (flushtype & FLUSH_CAUSE) {
|
g_cpuFlushedCode = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((flushtype & FLUSH_CAUSE) && !g_maySignalException) {
|
||||||
if (g_recompilingDelaySlot)
|
if (g_recompilingDelaySlot)
|
||||||
xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD
|
xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD
|
||||||
g_maySignalException = true;
|
g_maySignalException = true;
|
||||||
|
@ -1135,6 +1139,7 @@ void recompileNextInstruction(int delayslot)
|
||||||
if (!delayslot) {
|
if (!delayslot) {
|
||||||
pc += 4;
|
pc += 4;
|
||||||
g_cpuFlushedPC = false;
|
g_cpuFlushedPC = false;
|
||||||
|
g_cpuFlushedCode = false;
|
||||||
} else {
|
} else {
|
||||||
// increment after recompiling so that pc points to the branch during recompilation
|
// increment after recompiling so that pc points to the branch during recompilation
|
||||||
g_recompilingDelaySlot = true;
|
g_recompilingDelaySlot = true;
|
||||||
|
@ -1223,6 +1228,7 @@ void recompileNextInstruction(int delayslot)
|
||||||
if (delayslot) {
|
if (delayslot) {
|
||||||
pc += 4;
|
pc += 4;
|
||||||
g_cpuFlushedPC = false;
|
g_cpuFlushedPC = false;
|
||||||
|
g_cpuFlushedCode = false;
|
||||||
if (g_maySignalException)
|
if (g_maySignalException)
|
||||||
xAND(ptr32[&cpuRegs.CP0.n.Cause], ~(1 << 31)); // BD
|
xAND(ptr32[&cpuRegs.CP0.n.Cause], ~(1 << 31)); // BD
|
||||||
g_recompilingDelaySlot = false;
|
g_recompilingDelaySlot = false;
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
extern void _vu0WaitMicro();
|
extern void _vu0WaitMicro();
|
||||||
extern void _vu0FinishMicro();
|
extern void _vu0FinishMicro();
|
||||||
|
|
||||||
|
typedef void FnType_Void();
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Macro VU - Helper Macros / Functions
|
// Macro VU - Helper Macros / Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
@ -244,16 +246,16 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
|
||||||
|
|
||||||
void COP2_Interlock(bool mBitSync) {
|
void COP2_Interlock(bool mBitSync) {
|
||||||
if (cpuRegs.code & 1) {
|
if (cpuRegs.code & 1) {
|
||||||
iFlushCall(FLUSH_NOCONST);
|
iFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
|
||||||
if (mBitSync) CALLFunc((uptr)_vu0WaitMicro);
|
if (mBitSync) xCALL(_vu0WaitMicro);
|
||||||
else CALLFunc((uptr)_vu0FinishMicro);
|
else xCALL(_vu0FinishMicro);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) {
|
void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) {
|
||||||
TEST32ItoR(EAX, (vuIndex) ? 0x200 : 0x002);
|
TEST32ItoR(EAX, (vuIndex) ? 0x200 : 0x002);
|
||||||
j8Ptr[0] = JZ8(0);
|
j8Ptr[0] = JZ8(0);
|
||||||
CALLFunc(resetFunct);
|
xCALL(resetFunct);
|
||||||
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
|
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
|
||||||
x86SetJ8(j8Ptr[0]);
|
x86SetJ8(j8Ptr[0]);
|
||||||
}
|
}
|
||||||
|
@ -261,6 +263,7 @@ void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) {
|
||||||
static void recCFC2() {
|
static void recCFC2() {
|
||||||
|
|
||||||
printCOP2("CFC2");
|
printCOP2("CFC2");
|
||||||
|
|
||||||
COP2_Interlock(0);
|
COP2_Interlock(0);
|
||||||
if (!_Rt_) return;
|
if (!_Rt_) return;
|
||||||
iFlushCall(FLUSH_EVERYTHING);
|
iFlushCall(FLUSH_EVERYTHING);
|
||||||
|
@ -320,8 +323,8 @@ static void recCTC2() {
|
||||||
}
|
}
|
||||||
else MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
|
else MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
|
||||||
|
|
||||||
TEST_FBRST_RESET((uptr)vu0ResetRegs, 0);
|
TEST_FBRST_RESET(vu0ResetRegs, 0);
|
||||||
TEST_FBRST_RESET((uptr)vu1ResetRegs, 1);
|
TEST_FBRST_RESET(vu1ResetRegs, 1);
|
||||||
|
|
||||||
AND32ItoR(EAX, 0x0C0C);
|
AND32ItoR(EAX, 0x0C0C);
|
||||||
MOV32RtoM((uptr)µVU0.regs->VI[REG_FBRST].UL, EAX);
|
MOV32RtoM((uptr)µVU0.regs->VI[REG_FBRST].UL, EAX);
|
||||||
|
|
Loading…
Reference in New Issue