Revamp the FreezeRegs functions.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2056 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2009-10-22 01:20:22 +00:00
parent fb2169f64d
commit 8e493ad2d2
14 changed files with 278 additions and 199 deletions

View File

@ -95,11 +95,28 @@ struct x86CPU_INFO
extern __aligned16 x86CPU_INFO x86caps;
extern u8 g_globalMMXSaved, g_globalXMMSaved;
extern bool g_EEFreezeRegs;
// when using mmx/xmm regs, use; 0 is load
// freezes no matter the state
extern void FreezeXMMRegs(int save);
extern void FreezeMMXRegs(int save);
extern void FreezeRegs(int save);
// when using mmx/xmm regs, use these functions.
namespace MMXRegisters
{
void Freeze();
void Thaw();
bool Saved();
};
namespace XMMRegisters
{
void Freeze();
void Thaw();
bool Saved();
};
namespace Registers
{
void Freeze();
void Thaw();
bool Saved();
};

View File

@ -17,11 +17,8 @@
#include "internal.h"
#include "tools.h"
// used to make sure regs don't get changed while in recompiler
// use FreezeMMXRegs, FreezeXMMRegs
u8 g_globalMMXSaved = 0;
u8 g_globalXMMSaved = 0;
// To make sure regs don't get changed while in the recompiler,
// use Freeze/Thaw in MMXRegisters, XMMRegisters, & Registers.
__aligned16 u64 g_globalMMXData[8];
__aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
@ -30,22 +27,33 @@ __aligned16 u64 g_globalXMMData[2*iREGCNT_XMM];
// MMX Register Freezing
//
__forceinline void FreezeRegs(int save)
namespace MMXRegisters
{
FreezeXMMRegs(save);
FreezeMMXRegs(save);
}
u8 g_globalMMXSaved = 0;
__forceinline void FreezeMMXRegs(int save)
{
if( !g_EEFreezeRegs ) return;
__forceinline u8 Depth()
{
return g_globalMMXSaved;
}
__forceinline bool Saved()
{
return ( Depth() > 0);
}
__forceinline bool SavedRepeatedly()
{
return ( Depth() > 1);
}
__forceinline void Freeze()
{
if (!g_EEFreezeRegs) return;
//DevCon.Notice("FreezeMMXRegs_(%d); [%d]\n", save, g_globalMMXSaved);
if( save )
{
g_globalMMXSaved++;
if( g_globalMMXSaved > 1 )
if (SavedRepeatedly())
{
//DevCon.Notice("MMX Already Saved!\n");
return;
@ -79,17 +87,22 @@ __forceinline void FreezeMMXRegs(int save)
".att_syntax\n" : : [g_globalMMXData]"r"(g_globalMMXData) : "memory"
);
#endif
}
else {
if( g_globalMMXSaved==0 )
__forceinline void Thaw()
{
if (!g_EEFreezeRegs) return;
//DevCon.Notice("FreezeMMXRegs_(%d); [%d]\n", save, g_globalMMXSaved);
if (!Saved())
{
//DevCon.Notice("MMX Not Saved!\n");
return;
}
g_globalMMXSaved--;
if( g_globalMMXSaved > 0 ) return;
if (Saved()) return;
#ifdef _MSC_VER
__asm {
@ -125,21 +138,40 @@ __forceinline void FreezeMMXRegs(int save)
//////////////////////////////////////////////////////////////////////
// XMM Register Freezing
//
__forceinline void FreezeXMMRegs(int save)
namespace XMMRegisters
{
if( !g_EEFreezeRegs ) return;
u8 g_globalXMMSaved = 0;
__forceinline u8 Depth()
{
return g_globalXMMSaved;
}
__forceinline bool Saved()
{
return ( Depth() > 0);
}
__forceinline bool SavedRepeatedly()
{
return ( Depth() > 1);
}
__forceinline void Freeze()
{
if (!g_EEFreezeRegs) return;
//DevCon.Notice("FreezeXMMRegs_(%d); [%d]\n", save, g_globalXMMSaved);
if( save )
{
g_globalXMMSaved++;
if( g_globalXMMSaved > 1 ){
if (SavedRepeatedly())
{
//DevCon.Notice("XMM Already saved\n");
return;
}
#ifdef _MSC_VER
__asm {
mov ecx, offset g_globalXMMData
@ -152,8 +184,7 @@ __forceinline void FreezeXMMRegs(int save)
movaps xmmword ptr [ecx+0x60], xmm6
movaps xmmword ptr [ecx+0x70], xmm7
}
#else
#else
__asm__ volatile(
".intel_syntax noprefix\n"
"movaps [%[g_globalXMMData]+0x00], xmm0\n"
@ -166,12 +197,16 @@ __forceinline void FreezeXMMRegs(int save)
"movaps [%[g_globalXMMData]+0x70], xmm7\n"
".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory"
);
#endif // _MSC_VER
}
else
__forceinline void Thaw()
{
if( g_globalXMMSaved==0 )
if (!g_EEFreezeRegs) return;
//DevCon.Notice("FreezeXMMRegs_(%d); [%d]\n", save, g_globalXMMSaved);
if (!Saved())
{
//DevCon.Notice("XMM Regs not saved!\n");
return;
@ -179,7 +214,7 @@ __forceinline void FreezeXMMRegs(int save)
// TODO: really need to backup all regs?
g_globalXMMSaved--;
if( g_globalXMMSaved > 0 ) return;
if (Saved()) return;
#ifdef _MSC_VER
__asm
@ -194,7 +229,6 @@ __forceinline void FreezeXMMRegs(int save)
movaps xmm6, xmmword ptr [ecx+0x60]
movaps xmm7, xmmword ptr [ecx+0x70]
}
#else
__asm__ volatile(
".intel_syntax noprefix\n"
@ -208,8 +242,35 @@ __forceinline void FreezeXMMRegs(int save)
"movaps xmm7, [%[g_globalXMMData]+0x70]\n"
".att_syntax\n" : : [g_globalXMMData]"r"(g_globalXMMData) : "memory"
);
#endif // _MSC_VER
}
}
};
//////////////////////////////////////////////////////////////////////
// Register Freezing
//
namespace Registers
{
__forceinline bool Saved()
{
return (XMMRegisters::Saved() || MMXRegisters::Saved());
}
__forceinline bool SavedRepeatedly()
{
return (XMMRegisters::SavedRepeatedly() || MMXRegisters::SavedRepeatedly());
}
__forceinline void Freeze()
{
XMMRegisters::Freeze();
MMXRegisters::Freeze();
}
__forceinline void Thaw()
{
XMMRegisters::Thaw();
MMXRegisters::Thaw();
}
}

View File

@ -149,13 +149,13 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
psHu64(GIF_FIFO) = value[0];
psHu64(GIF_FIFO + 8) = value[1];
FreezeRegs(1);
Registers::Freeze();
mtgsThread.PrepDataPacket(GIF_PATH_3, nloop0_packet, 1);
u64* data = (u64*)mtgsThread.GetDataPacketPtr();
data[0] = value[0];
data[1] = value[1];
mtgsThread.SendDataPacket();
FreezeRegs(0);
Registers::Thaw();
}
void __fastcall WriteFIFO_page_7(u32 mem, const mem128_t *value)

View File

@ -133,9 +133,9 @@ int _GIFchain()
static __forceinline void GIFchain()
{
FreezeRegs(1);
Registers::Freeze();
if (gif->qwc) gscycles+= _GIFchain(); /* guessing */
FreezeRegs(0);
Registers::Thaw();
}
static __forceinline bool checkTieBit(u32* &ptag)
@ -508,13 +508,13 @@ void mfifoGIFtransfer(int qwc)
}
}
FreezeRegs(1);
Registers::Freeze();
if (mfifoGIFchain() == -1)
{
Console.WriteLn("GIF dmaChain error size=%d, madr=%lx, tadr=%lx", gif->qwc, gif->madr, gif->tadr);
gifstate = GIF_STATE_STALL;
}
FreezeRegs(0);
Registers::Thaw();
if ((gif->qwc == 0) && (gifstate & GIF_STATE_DONE)) gifstate = GIF_STATE_STALL;
CPU_INT(11,mfifocycles);

View File

@ -77,7 +77,7 @@ static __aligned16 u16 yuv2rgb_temp[3][8];
// This could potentially be improved for SSE4
__releaseinline void yuv2rgb_sse2(void)
{
FreezeXMMRegs(1);
XMMRegisters::Freeze();
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
__asm {
@ -356,7 +356,7 @@ ihatemsvc:
# error Unsupported compiler
#endif
FreezeXMMRegs(0);
XMMRegisters::Thaw();
}
void yuv2rgb_init(void)

View File

@ -184,12 +184,12 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr)
if (eesifbusy[1])
{
FreezeXMMRegs(1);
XMMRegisters::Freeze();
SIF1Dma();
psHu32(SBUS_F240) &= ~0x40;
psHu32(SBUS_F240) &= ~0x100;
psHu32(SBUS_F240) &= ~0x4000;
FreezeXMMRegs(0);
XMMRegisters::Thaw();
}
}

View File

@ -443,12 +443,12 @@ __forceinline void dmaSIF0()
if (iopsifbusy[0])
{
FreezeXMMRegs(1);
XMMRegisters::Freeze();
hwIntcIrq(INTC_SBUS);
SIF0Dma();
psHu32(SBUS_F240) &= ~0x20;
psHu32(SBUS_F240) &= ~0x2000;
FreezeXMMRegs(0);
XMMRegisters::Thaw();
}
}
@ -467,12 +467,12 @@ __forceinline void dmaSIF1()
if (iopsifbusy[1])
{
FreezeXMMRegs(1);
XMMRegisters::Freeze();
SIF1Dma();
psHu32(SBUS_F240) &= ~0x40;
psHu32(SBUS_F240) &= ~0x100;
psHu32(SBUS_F240) &= ~0x4000;
FreezeXMMRegs(0);
XMMRegisters::Thaw();
}
}

View File

@ -224,7 +224,7 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
{
int ret;
FreezeXMMRegs(1);
XMMRegisters::Freeze();
if (vif0.vifpacketsize < vif0.tag.size)
{
if(vif0Regs->offset != 0 || vif0.cl != 0)
@ -238,7 +238,7 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
ProcessMemSkip<0>((vif0.vifpacketsize - ret) << 2, (vif0.cmd & 0xf));
vif0.tag.size -= (vif0.vifpacketsize - ret);
FreezeXMMRegs(0);
XMMRegisters::Thaw();
return vif0.vifpacketsize;
}
@ -271,7 +271,7 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
vif0.cmd = 0;
}
FreezeXMMRegs(0);
XMMRegisters::Thaw();
return ret;
}

View File

@ -261,7 +261,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
}
}
FreezeRegs(1);
Registers::Freeze();
// copy 16 bytes the fast way:
const u64* src = (u64*)splittransfer[0];
mtgsThread.PrepDataPacket(GIF_PATH_2, nloop0_packet, 1);
@ -270,7 +270,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
dst[1] = src[1];
mtgsThread.SendDataPacket();
FreezeRegs(0);
Registers::Thaw();
if (vif1.tag.size == 0) vif1.cmd = 0;
splitptr = 0;
@ -303,21 +303,21 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
//TODO: ret is guaranteed to be qword aligned ?
FreezeRegs(1);
Registers::Freeze();
// Round ret up, just in case it's not 128bit aligned.
const uint count = mtgsThread.PrepDataPacket(GIF_PATH_2, data, (ret + 3) >> 2);
memcpy_fast(mtgsThread.GetDataPacketPtr(), data, count << 4);
mtgsThread.SendDataPacket();
FreezeRegs(0);
Registers::Thaw();
return ret;
}
static int __fastcall Vif1TransUnpack(u32 *data)
{
FreezeXMMRegs(1);
XMMRegisters::Freeze();
if (vif1.vifpacketsize < vif1.tag.size)
{
@ -340,7 +340,7 @@ static int __fastcall Vif1TransUnpack(u32 *data)
vif1.tag.size -= vif1.vifpacketsize;
}
FreezeXMMRegs(0);
XMMRegisters::Thaw();
return vif1.vifpacketsize;
}
else
@ -361,7 +361,7 @@ static int __fastcall Vif1TransUnpack(u32 *data)
vif1.tag.size = 0;
vif1.cmd = 0;
FreezeXMMRegs(0);
XMMRegisters::Thaw();
return ret;
}
@ -720,7 +720,7 @@ void vif1TransferFromMemory()
// stuff from the GS. The *only* way to handle this case safely is to flush the GS
// completely and execute the transfer there-after.
FreezeXMMRegs(1);
XMMRegisters::Freeze();
if (GSreadFIFO2 == NULL)
{
@ -746,7 +746,7 @@ void vif1TransferFromMemory()
psHu64(VIF1_FIFO + 8) = pMem[2*vif1ch->qwc-1];
}
FreezeXMMRegs(0);
XMMRegisters::Thaw();
g_vifCycles += vif1ch->qwc * 2;
vif1ch->madr += vif1ch->qwc * 16; // mgs3 scene changes

View File

@ -121,9 +121,9 @@ static DynGenFunc* iopExitRecompiledCode = NULL;
static void recEventTest()
{
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
pxAssert(!Registers::Saved());
_cpuBranchTest_Shared();
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
pxAssert(!Registers::Saved());
}
// parameters:

View File

@ -44,10 +44,10 @@ namespace VU0micro
{
if ((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return;
FreezeXMMRegs(1);
XMMRegisters::Freeze();
if (useMVU0) runVUrec(VU0.VI[REG_TPC].UL, 0x300, 0);
else SuperVUExecuteProgram(VU0.VI[REG_TPC].UL & 0xfff, 0);
FreezeXMMRegs(0);
XMMRegisters::Thaw();
}
}

View File

@ -126,7 +126,7 @@ namespace VU1micro
SysPrintf("(%08d) StartPC = 0x%04x\n", runAmount, VU1.VI[REG_TPC].UL);
#endif
FreezeXMMRegs(1);
XMMRegisters::Freeze();
runCount++;
memcpy_fast((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs));
@ -242,7 +242,7 @@ namespace VU1micro
}
VUtestPause();
FreezeXMMRegs(0);
XMMRegisters::Thaw();
}
}
#else
@ -273,7 +273,7 @@ namespace VU1micro
SysPrintf("(%08d) StartPC = 0x%04x\n", runAmount, VU1.VI[REG_TPC].UL);
#endif
FreezeXMMRegs(1);
XMMRegisters::Freeze();
if (useMVU1) runVUrec(VU1.VI[REG_TPC].UL, 3000000, 1);
else {
if (VU1.VI[REG_TPC].UL >= VU1.maxmicro) {
@ -283,7 +283,7 @@ namespace VU1micro
SuperVUExecuteProgram(VU1.VI[REG_TPC].UL & 0x3fff, 1);
} while( VU0.VI[REG_VPU_STAT].UL&0x100 );
}
FreezeXMMRegs(0);
XMMRegisters::Thaw();
VUtestPause();
}
}

View File

@ -61,7 +61,8 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{
u32 i;
u32 prev = 0;
FreezeXMMRegs(1);
XMMRegisters::Freeze();
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
prev |= s_maskwrite[mask&0xff];
@ -103,7 +104,7 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
}
//#endif
}
FreezeXMMRegs(0);
XMMRegisters::Thaw();
}
/*#ifdef __LINUX__

View File

@ -332,9 +332,9 @@ static DynGenFunc* ExitRecompiledCode = NULL;
static void recEventTest()
{
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
pxAssert(!Registers::Saved());
_cpuBranchTest_Shared();
pxAssert( !g_globalXMMSaved && !g_globalMMXSaved );
pxAssert(!Registers::Saved());
}
// parameters:
@ -1297,7 +1297,7 @@ static void printfn()
static int curcount = 0;
const int skip = 0;
pxAssert( !g_globalMMXSaved && !g_globalXMMSaved );
pxAssert(!Registers::Saved());
//pxAssert( cpuRegs.pc != 0x80001300 );