mirror of https://github.com/PCSX2/pcsx2.git
(speedup!) Add missing flushes to COP0 and COP2 (VUmacro execution calls), and subsequently disable *all* XMM freezes. They aren't needed anymore.
Rationale: Pseudonym did the necessary upgrades to the recompilers a couple months ago prepping us for a day when we would no longer need MMX/XMM register freezes. All regs are already being flushed on all memory operations, so I added proper flushing to COP0 and COP2 here, and removed XMM freeze/thaw code entirely. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3375 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
1cc2b210c6
commit
c06cb7b121
|
@ -35,11 +35,14 @@ namespace MMXRegisters
|
|||
|
||||
__forceinline bool Saved()
|
||||
{
|
||||
return ( stack_depth > 0);
|
||||
return false;
|
||||
//return (stack_depth > 0);
|
||||
}
|
||||
|
||||
__forceinline void Freeze()
|
||||
{
|
||||
return;
|
||||
|
||||
if (!g_EEFreezeRegs) return;
|
||||
|
||||
//DevCon.Warning("MMXRegisters::Freeze: depth[%d]\n", stack_depth);
|
||||
|
@ -83,6 +86,8 @@ namespace MMXRegisters
|
|||
|
||||
__forceinline void Thaw()
|
||||
{
|
||||
return;
|
||||
|
||||
if (!g_EEFreezeRegs) return;
|
||||
|
||||
//DevCon.Warning("MMXRegisters::Thaw: depth[%d]\n", stack_depth);
|
||||
|
@ -138,11 +143,14 @@ namespace XMMRegisters
|
|||
|
||||
__forceinline bool Saved()
|
||||
{
|
||||
return ( stack_depth > 0);
|
||||
return false;
|
||||
//return ( stack_depth > 0);
|
||||
}
|
||||
|
||||
__forceinline void Freeze()
|
||||
{
|
||||
return;
|
||||
|
||||
if (!g_EEFreezeRegs) return;
|
||||
|
||||
//DevCon.Warning("XMMRegisters::Freeze: depth[%d]\n", Depth());
|
||||
|
@ -185,6 +193,8 @@ namespace XMMRegisters
|
|||
|
||||
__forceinline void Thaw()
|
||||
{
|
||||
return;
|
||||
|
||||
if (!g_EEFreezeRegs) return;
|
||||
|
||||
//DevCon.Warning("XMMRegisters::Thaw: depth[%d]\n", Depth());
|
||||
|
@ -238,18 +248,18 @@ namespace Registers
|
|||
// MMX registers should not be needing freezes anymore (speedup!)
|
||||
__forceinline bool Saved()
|
||||
{
|
||||
return (XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ );
|
||||
return false; //(XMMRegisters::Saved() /*|| MMXRegisters::Saved()*/ );
|
||||
}
|
||||
|
||||
__forceinline void Freeze()
|
||||
{
|
||||
XMMRegisters::Freeze();
|
||||
//XMMRegisters::Freeze();
|
||||
//MMXRegisters::Freeze();
|
||||
}
|
||||
|
||||
__forceinline void Thaw()
|
||||
{
|
||||
XMMRegisters::Thaw();
|
||||
//XMMRegisters::Thaw();
|
||||
//MMXRegisters::Thaw();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -169,12 +169,12 @@ void recMFC0( void )
|
|||
break;
|
||||
|
||||
case 1:
|
||||
iFlushCall(FLUSH_NODESTROY);
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
xCALL( COP0_UpdatePCCR );
|
||||
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
|
||||
break;
|
||||
case 3:
|
||||
iFlushCall(FLUSH_NODESTROY);
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
xCALL( COP0_UpdatePCCR );
|
||||
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
|
||||
break;
|
||||
|
@ -206,7 +206,7 @@ void recMTC0()
|
|||
switch (_Rd_)
|
||||
{
|
||||
case 12:
|
||||
iFlushCall(FLUSH_NODESTROY);
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] );
|
||||
xCALL( WriteCP0Status );
|
||||
break;
|
||||
|
@ -221,7 +221,7 @@ void recMTC0()
|
|||
switch(_Imm_ & 0x3F)
|
||||
{
|
||||
case 0:
|
||||
iFlushCall(FLUSH_NODESTROY);
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
xCALL( COP0_UpdatePCCR );
|
||||
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
|
||||
xCALL( COP0_DiagnosticPCCR );
|
||||
|
@ -255,7 +255,7 @@ void recMTC0()
|
|||
switch (_Rd_)
|
||||
{
|
||||
case 12:
|
||||
iFlushCall(FLUSH_NODESTROY);
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
_eeMoveGPRtoR(ECX, _Rt_);
|
||||
xCALL( WriteCP0Status );
|
||||
break;
|
||||
|
@ -270,7 +270,7 @@ void recMTC0()
|
|||
switch(_Imm_ & 0x3F)
|
||||
{
|
||||
case 0:
|
||||
iFlushCall(FLUSH_NODESTROY);
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
xCALL( COP0_UpdatePCCR );
|
||||
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
|
||||
xCALL( COP0_DiagnosticPCCR );
|
||||
|
|
|
@ -346,22 +346,22 @@ extern u16 x86FpuState;
|
|||
// the code being called is going to modify register allocations -- ie, be doing
|
||||
// some kind of recompiling of its own.
|
||||
|
||||
#define FLUSH_CACHED_REGS 1
|
||||
#define FLUSH_FLUSH_XMM 2
|
||||
#define FLUSH_FREE_XMM 4 // both flushes and frees
|
||||
#define FLUSH_FLUSH_MMX 8
|
||||
#define FLUSH_FREE_MMX 16 // both flushes and frees
|
||||
#define FLUSH_FLUSH_ALLX86 32 // flush x86
|
||||
#define FLUSH_FREE_TEMPX86 64 // flush and free temporary x86 regs
|
||||
#define FLUSH_FREE_ALLX86 128 // free all x86 regs
|
||||
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
|
||||
#define FLUSH_PC 0x200 // program counter
|
||||
#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit
|
||||
#define FLUSH_CODE 0x800 // opcode for interpreter
|
||||
#define FLUSH_CACHED_REGS 0x001
|
||||
#define FLUSH_FLUSH_XMM 0x002
|
||||
#define FLUSH_FREE_XMM 0x004 // both flushes and frees
|
||||
#define FLUSH_FLUSH_MMX 0x008
|
||||
#define FLUSH_FREE_MMX 0x010 // both flushes and frees
|
||||
#define FLUSH_FLUSH_ALLX86 0x020 // flush x86
|
||||
#define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs
|
||||
#define FLUSH_FREE_ALLX86 0x080 // free all x86 regs
|
||||
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
|
||||
#define FLUSH_PC 0x200 // program counter
|
||||
#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit
|
||||
#define FLUSH_CODE 0x800 // opcode for interpreter
|
||||
|
||||
#define FLUSH_EVERYTHING 0x1ff
|
||||
#define FLUSH_EXCEPTION 0x7ff
|
||||
#define FLUSH_INTERPRETER 0xfff
|
||||
#define FLUSH_EVERYTHING 0x1ff
|
||||
#define FLUSH_EXCEPTION 0x7ff
|
||||
#define FLUSH_INTERPRETER 0xfff
|
||||
|
||||
// no freeing, used when callee won't destroy mmx/xmm regs
|
||||
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86)
|
||||
|
|
|
@ -55,7 +55,7 @@ int branch; // set for branch
|
|||
|
||||
__aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
|
||||
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
|
||||
bool g_cpuFlushedPC, g_recompilingDelaySlot, g_maySignalException;
|
||||
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Static Private Variables - R5900 Dynarec
|
||||
|
@ -976,13 +976,17 @@ void iFlushCall(int flushtype)
|
|||
_freeX86reg(ECX);
|
||||
_freeX86reg(EDX);
|
||||
|
||||
if (flushtype & FLUSH_PC && !g_cpuFlushedPC) {
|
||||
if ((flushtype & FLUSH_PC) && !g_cpuFlushedPC) {
|
||||
xMOV(ptr32[&cpuRegs.pc], pc);
|
||||
g_cpuFlushedPC = true;
|
||||
}
|
||||
if (flushtype & FLUSH_CODE)
|
||||
|
||||
if ((flushtype & FLUSH_CODE) && !g_cpuFlushedCode) {
|
||||
xMOV(ptr32[&cpuRegs.code], cpuRegs.code);
|
||||
if (flushtype & FLUSH_CAUSE) {
|
||||
g_cpuFlushedCode = true;
|
||||
}
|
||||
|
||||
if ((flushtype & FLUSH_CAUSE) && !g_maySignalException) {
|
||||
if (g_recompilingDelaySlot)
|
||||
xOR(ptr32[&cpuRegs.CP0.n.Cause], 1 << 31); // BD
|
||||
g_maySignalException = true;
|
||||
|
@ -1135,6 +1139,7 @@ void recompileNextInstruction(int delayslot)
|
|||
if (!delayslot) {
|
||||
pc += 4;
|
||||
g_cpuFlushedPC = false;
|
||||
g_cpuFlushedCode = false;
|
||||
} else {
|
||||
// increment after recompiling so that pc points to the branch during recompilation
|
||||
g_recompilingDelaySlot = true;
|
||||
|
@ -1223,6 +1228,7 @@ void recompileNextInstruction(int delayslot)
|
|||
if (delayslot) {
|
||||
pc += 4;
|
||||
g_cpuFlushedPC = false;
|
||||
g_cpuFlushedCode = false;
|
||||
if (g_maySignalException)
|
||||
xAND(ptr32[&cpuRegs.CP0.n.Cause], ~(1 << 31)); // BD
|
||||
g_recompilingDelaySlot = false;
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
extern void _vu0WaitMicro();
|
||||
extern void _vu0FinishMicro();
|
||||
|
||||
typedef void FnType_Void();
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Macro VU - Helper Macros / Functions
|
||||
//------------------------------------------------------------------
|
||||
|
@ -244,16 +246,16 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
|
|||
|
||||
void COP2_Interlock(bool mBitSync) {
|
||||
if (cpuRegs.code & 1) {
|
||||
iFlushCall(FLUSH_NOCONST);
|
||||
if (mBitSync) CALLFunc((uptr)_vu0WaitMicro);
|
||||
else CALLFunc((uptr)_vu0FinishMicro);
|
||||
iFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
|
||||
if (mBitSync) xCALL(_vu0WaitMicro);
|
||||
else xCALL(_vu0FinishMicro);
|
||||
}
|
||||
}
|
||||
|
||||
void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) {
|
||||
void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) {
|
||||
TEST32ItoR(EAX, (vuIndex) ? 0x200 : 0x002);
|
||||
j8Ptr[0] = JZ8(0);
|
||||
CALLFunc(resetFunct);
|
||||
xCALL(resetFunct);
|
||||
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
}
|
||||
|
@ -261,6 +263,7 @@ void TEST_FBRST_RESET(uptr resetFunct, int vuIndex) {
|
|||
static void recCFC2() {
|
||||
|
||||
printCOP2("CFC2");
|
||||
|
||||
COP2_Interlock(0);
|
||||
if (!_Rt_) return;
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
@ -320,8 +323,8 @@ static void recCTC2() {
|
|||
}
|
||||
else MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
|
||||
|
||||
TEST_FBRST_RESET((uptr)vu0ResetRegs, 0);
|
||||
TEST_FBRST_RESET((uptr)vu1ResetRegs, 1);
|
||||
TEST_FBRST_RESET(vu0ResetRegs, 0);
|
||||
TEST_FBRST_RESET(vu1ResetRegs, 1);
|
||||
|
||||
AND32ItoR(EAX, 0x0C0C);
|
||||
MOV32RtoM((uptr)µVU0.regs->VI[REG_FBRST].UL, EAX);
|
||||
|
|
Loading…
Reference in New Issue