MTGS now force-freezes all XMM/MMX registers prior to kernel calls (Sleep/SetEvent), because some versions of Windows and drivers like to muck with them.

FreezeMMXRegs / FreezeXMMRegs now track freezes and thaws, so that nested freezes won't end up unfreezing registers too early anymore.  Plus several code cleanups and ugly macro removals to a handful of other modules.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@399 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2008-12-08 02:21:25 +00:00 committed by Gregory Hainaut
parent 8ccd60b450
commit f7fc484e58
12 changed files with 200 additions and 202 deletions

View File

@ -440,17 +440,27 @@ __forceinline void gsWaitGS()
{
if( !CHECK_MULTIGS ) return;
// Freeze registers because some kernel code likes to destroy them
FreezeXMMRegs(1);
FreezeMMXRegs(1);
GS_SETEVENT();
while( *(volatile PU8*)&g_pGSRingPos != *(volatile PU8*)&g_pGSWritePos )
_TIMESLICE();
FreezeXMMRegs(0);
FreezeMMXRegs(0);
}
// Sets the gsEvent flag and releases a timeslice.
// For use in loops that wait on the GS thread to do certain things.
static void gsSetEventWait()
{
// Freeze registers because some kernel code likes to destroy them
FreezeXMMRegs(1);
FreezeMMXRegs(1);
GS_SETEVENT();
_TIMESLICE();
FreezeXMMRegs(0);
FreezeMMXRegs(0);
m_mtgsCopyCommandTally = 0;
}

View File

@ -86,7 +86,6 @@ void gsDynamicSkipEnable();
// mem and size are the ones from GSRingBufCopy
extern void GSRINGBUF_DONECOPY(const u8 *mem, u32 size);
extern void GS_SETEVENT();
extern void gsWaitGS();
// used for resetting GIF fifo

View File

@ -341,16 +341,40 @@ void hwRead128(u32 mem, u64 *out) {
out[1] = psHu64(mem+8);
}
HW_LOG("Unknown Hardware Read 128 at %x\n",mem);
HW_LOG("Unknown Hardware Read 128 at %x\n",mem);
}
// dark cloud2 uses it
#define DmaExec8(name, num) { \
psHu8(mem) = (u8)value; \
if ((psHu8(mem) & 0x1) && (psHu32(DMAC_CTRL) & 0x1)) { \
/*SysPrintf("Running DMA 8 %x\n", psHu32(mem & ~0x1));*/ \
dma##name(); \
} \
// dark cloud2 uses 8 bit DMAs register writes
static __forceinline void DmaExec8( void (*func)(), u32 mem, u8 value )
{
psHu8(mem) = (u8)value;
if ((psHu8(mem) & 0x1) && (psHu32(DMAC_CTRL) & 0x1))
{
/*SysPrintf("Running DMA 8 %x\n", psHu32(mem & ~0x1));*/
func();
}
}
static __forceinline void DmaExec16( void (*func)(), u32 mem, u16 value )
{
psHu16(mem) = (u16)value;
if ((psHu16(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1))
{
//SysPrintf("16bit DMA Start\n");
func();
}
}
static void DmaExec( void (*func)(), u32 mem, u32 value )
{
/* Keep the old tag if in chain mode and hw doesnt set it*/
if( (value & 0xc) == 0x4 && (value & 0xffff0000) == 0)
psHu32(mem) = (psHu32(mem) & 0xFFFF0000) | (u16)value;
else /* Else (including Normal mode etc) write whatever the hardware sends*/
psHu32(mem) = (u32)value;
if ((psHu32(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1))
func();
}
char sio_buffer[1024];
@ -404,55 +428,55 @@ void hwWrite8(u32 mem, u8 value) {
break;
case 0x10008001: // dma0 - vif0
DMA_LOG("VIF0dma %lx\n", value);
DmaExec8(VIF0, 0);
DmaExec8(dmaVIF0, mem, value);
break;
case 0x10009001: // dma1 - vif1
DMA_LOG("VIF1dma %lx\n", value);
DmaExec8(VIF1, 1);
DmaExec8(dmaVIF1, mem, value);
break;
case 0x1000a001: // dma2 - gif
DMA_LOG("0x%8.8x hwWrite8: GSdma %lx 0x%lx\n", cpuRegs.cycle, value);
DmaExec8(GIF, 2);
DmaExec8(dmaGIF, mem, value);
break;
case 0x1000b001: // dma3 - fromIPU
DMA_LOG("IPU0dma %lx\n", value);
DmaExec8(IPU0, 3);
DmaExec8(dmaIPU0, mem, value);
break;
case 0x1000b401: // dma4 - toIPU
#ifdef DMA_LOG
DMA_LOG("IPU1dma %lx\n", value);
#endif
DmaExec8(IPU1, 4);
DmaExec8(dmaIPU1, mem, value);
break;
case 0x1000c001: // dma5 - sif0
DMA_LOG("SIF0dma %lx\n", value);
// if (value == 0) psxSu32(0x30) = 0x40000;
DmaExec8(SIF0, 5);
DmaExec8(dmaSIF0, mem, value);
break;
case 0x1000c401: // dma6 - sif1
DMA_LOG("SIF1dma %lx\n", value);
DmaExec8(SIF1, 6);
DmaExec8(dmaSIF1, mem, value);
break;
case 0x1000c801: // dma7 - sif2
DMA_LOG("SIF2dma %lx\n", value);
DmaExec8(SIF2, 7);
DmaExec8(dmaSIF2, mem, value);
break;
case 0x1000d001: // dma8 - fromSPR
DMA_LOG("fromSPRdma8 %lx\n", value);
DmaExec8(SPR0, 8);
DmaExec8(dmaSPR0, mem, value);
break;
case 0x1000d401: // dma9 - toSPR
DMA_LOG("toSPRdma8 %lx\n", value);
DmaExec8(SPR1, 9);
DmaExec8(dmaSPR1, mem, value);
break;
case 0x1000f592: // DMAC_ENABLEW
@ -489,14 +513,6 @@ void hwWrite8(u32 mem, u8 value) {
}
}
#define DmaExec16(name, num) { \
psHu16(mem) = (u16)value; \
if ((psHu16(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1)) { \
SysPrintf("16bit DMA Start\n"); \
dma##name(); \
} \
}
void hwWrite16(u32 mem, u16 value)
{
#ifdef PCSX2_DEVBUILD
@ -506,13 +522,13 @@ void hwWrite16(u32 mem, u16 value)
switch(mem) {
case 0x10008000: // dma0 - vif0
DMA_LOG("VIF0dma %lx\n", value);
DmaExec16(VIF0, 0);
DmaExec16(dmaVIF0, mem, value);
break;
// Latest Fix for Florin by asadr (VIF1)
case 0x10009000: // dma1 - vif1 - chcr
DMA_LOG("VIF1dma CHCR %lx\n", value);
DmaExec16(VIF1, 1);
DmaExec16(dmaVIF1, mem, value);
break;
#ifdef HW_LOG
@ -545,7 +561,7 @@ void hwWrite16(u32 mem, u16 value)
case 0x1000a000: // dma2 - gif
DMA_LOG("0x%8.8x hwWrite32: GSdma %lx\n", cpuRegs.cycle, value);
DmaExec16(GIF, 2);
DmaExec16(dmaGIF, mem, value);
break;
#ifdef HW_LOG
@ -576,7 +592,7 @@ void hwWrite16(u32 mem, u16 value)
#endif
case 0x1000b000: // dma3 - fromIPU
DMA_LOG("IPU0dma %lx\n", value);
DmaExec16(IPU0, 3);
DmaExec16(dmaIPU0, mem, value);
break;
#ifdef HW_LOG
@ -599,7 +615,7 @@ void hwWrite16(u32 mem, u16 value)
#endif
case 0x1000b400: // dma4 - toIPU
DMA_LOG("IPU1dma %lx\n", value);
DmaExec16(IPU1, 4);
DmaExec16(dmaIPU1, mem, value);
break;
#ifdef HW_LOG
case 0x1000b410:
@ -623,7 +639,7 @@ void hwWrite16(u32 mem, u16 value)
case 0x1000c000: // dma5 - sif0
DMA_LOG("SIF0dma %lx\n", value);
// if (value == 0) psxSu32(0x30) = 0x40000;
DmaExec16(SIF0, 5);
DmaExec16(dmaSIF0, mem, value);
break;
case 0x1000c002:
@ -631,7 +647,7 @@ void hwWrite16(u32 mem, u16 value)
break;
case 0x1000c400: // dma6 - sif1
DMA_LOG("SIF1dma %lx\n", value);
DmaExec16(SIF1, 6);
DmaExec16(dmaSIF1, mem, value);
break;
#ifdef HW_LOG
@ -650,19 +666,19 @@ void hwWrite16(u32 mem, u16 value)
case 0x1000c800: // dma7 - sif2
DMA_LOG("SIF2dma %lx\n", value);
DmaExec16(SIF2, 7);
DmaExec16(dmaSIF2, mem, value);
break;
case 0x1000c802:
//?
break;
case 0x1000d000: // dma8 - fromSPR
DMA_LOG("fromSPRdma %lx\n", value);
DmaExec16(SPR0, 8);
DmaExec16(dmaSPR0, mem, value);
break;
case 0x1000d400: // dma9 - toSPR
DMA_LOG("toSPRdma %lx\n", value);
DmaExec16(SPR1, 9);
DmaExec16(dmaSPR1, mem, value);
break;
case 0x1000f592: // DMAC_ENABLEW
psHu16(0xf592) = value;
@ -713,17 +729,6 @@ void hwWrite16(u32 mem, u16 value)
HW_LOG("Unknown Hardware write 16 at %x with value %x\n",mem,value);
}
#define DmaExec(name, num) { \
/* Keep the old tag if in chain mode and hw doesnt set it*/ \
if( (value & 0xc) == 0x4 && (value & 0xffff0000) == 0) \
psHu32(mem) = (psHu32(mem) & 0xFFFF0000) | (u16)value; \
else /* Else (including Normal mode etc) write whatever the hardware sends*/ \
psHu32(mem) = (u32)value; \
\
if ((psHu32(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1)) { \
dma##name(); \
} \
}
void hwWrite32(u32 mem, u32 value) {
@ -782,12 +787,12 @@ void hwWrite32(u32 mem, u32 value) {
case 0x10008000: // dma0 - vif0
DMA_LOG("VIF0dma %lx\n", value);
DmaExec(VIF0, 0);
DmaExec(dmaVIF0, mem, value);
break;
//------------------------------------------------------------------
case 0x10009000: // dma1 - vif1 - chcr
DMA_LOG("VIF1dma CHCR %lx\n", value);
DmaExec(VIF1, 1);
DmaExec(dmaVIF1, mem, value);
break;
case 0x10009010: // dma1 - vif1 - madr
HW_LOG("VIF1dma Madr %lx\n", value);
@ -816,7 +821,7 @@ void hwWrite32(u32 mem, u32 value) {
//------------------------------------------------------------------
case 0x1000a000: // dma2 - gif
DMA_LOG("0x%8.8x hwWrite32: GSdma %lx\n", cpuRegs.cycle, value);
DmaExec(GIF, 2);
DmaExec(dmaGIF, mem, value);
break;
case 0x1000a010:
psHu32(mem) = value;//dma2 madr
@ -845,7 +850,7 @@ void hwWrite32(u32 mem, u32 value) {
//------------------------------------------------------------------
case 0x1000b000: // dma3 - fromIPU
DMA_LOG("IPU0dma %lx\n", value);
DmaExec(IPU0, 3);
DmaExec(dmaIPU0, mem, value);
break;
//------------------------------------------------------------------
case 0x1000b010:
@ -867,7 +872,7 @@ void hwWrite32(u32 mem, u32 value) {
//------------------------------------------------------------------
case 0x1000b400: // dma4 - toIPU
DMA_LOG("IPU1dma %lx\n", value);
DmaExec(IPU1, 4);
DmaExec(dmaIPU1, mem, value);
break;
//------------------------------------------------------------------
case 0x1000b410:
@ -890,12 +895,12 @@ void hwWrite32(u32 mem, u32 value) {
case 0x1000c000: // dma5 - sif0
DMA_LOG("SIF0dma %lx\n", value);
//if (value == 0) psxSu32(0x30) = 0x40000;
DmaExec(SIF0, 5);
DmaExec(dmaSIF0, mem, value);
break;
//------------------------------------------------------------------
case 0x1000c400: // dma6 - sif1
DMA_LOG("SIF1dma %lx\n", value);
DmaExec(SIF1, 6);
DmaExec(dmaSIF1, mem, value);
break;
case 0x1000c420: // dma6 - sif1 - qwc
HW_LOG("SIF1dma QWC = %lx\n", value);
@ -908,17 +913,17 @@ void hwWrite32(u32 mem, u32 value) {
//------------------------------------------------------------------
case 0x1000c800: // dma7 - sif2
DMA_LOG("SIF2dma %lx\n", value);
DmaExec(SIF2, 7);
DmaExec(dmaSIF2, mem, value);
break;
//------------------------------------------------------------------
case 0x1000d000: // dma8 - fromSPR
DMA_LOG("fromSPRdma %lx\n", value);
DmaExec(SPR0, 8);
DmaExec(dmaSPR0, mem, value);
break;
//------------------------------------------------------------------
case 0x1000d400: // dma9 - toSPR
DMA_LOG("toSPRdma %lx\n", value);
DmaExec(SPR1, 9);
DmaExec(dmaSPR1, mem, value);
break;
//------------------------------------------------------------------
case 0x1000e000: // DMAC_CTRL
@ -1051,7 +1056,7 @@ void hwWrite64(u32 mem, u64 value) {
case 0x1000a000: // dma2 - gif
DMA_LOG("0x%8.8x hwWrite64: GSdma %lx\n", cpuRegs.cycle, value);
DmaExec(GIF, 2);
DmaExec(dmaGIF, mem, value);
break;
#ifdef HW_LOG

View File

@ -224,7 +224,7 @@ int IsBIOS(char *filename, char *description);
#define FreezeMMXRegs(save)
#define FreezeXMMRegs(save)
#else
void FreezeXMMRegs_(int save);
extern void FreezeXMMRegs_(int save);
extern u32 g_EEFreezeRegs;
#define FreezeXMMRegs(save) if( g_EEFreezeRegs ) { FreezeXMMRegs_(save); }
@ -237,18 +237,6 @@ void FreezeMMXRegs_(int save);
#endif
// define a PCS2 specific memcpy and make sure it is used all in real-time code
#if _MSC_VER >= 1400 // vs2005+ uses xmm/mmx in memcpy
__forceinline void memcpy_pcsx2(void* dest, const void* src, size_t n)
{
//FreezeMMXRegs(1); // mmx not used
FreezeXMMRegs(1);
memcpy(dest, src, n);
// have to be unfrozen by parent call!
}
#else
#define memcpy_pcsx2 memcpy
#endif
#ifdef PCSX2_NORECBUILD
#define memcpy_fast memcpy

View File

@ -513,7 +513,7 @@ static __forceinline void _cpuBranchTest_Shared()
// Apply the hsync counter's nextCycle
cpuSetNextBranch( counters[4].sCycle, counters[4].CycleT );
// Apply other counter nextCycles
// Apply vsync and other counter nextCycles
cpuSetNextBranch( nextsCounter, nextCounter );
}
@ -522,11 +522,12 @@ static __forceinline void _cpuBranchTest_Shared()
extern u8 g_globalXMMSaved;
X86_32CODE(extern u8 g_globalMMXSaved;)
#endif
#endif
void cpuBranchTest()
{
#ifndef PCSX2_NORECBUILD
// cpuBranchTest should be called from the recompiler only.
assert( Cpu == &recCpu );
#ifdef PCSX2_DEVBUILD
// dont' remove this check unless doing an official release
if( g_globalXMMSaved X86_32CODE(|| g_globalMMXSaved) )
@ -534,7 +535,6 @@ void cpuBranchTest()
assert( !g_globalXMMSaved X86_32CODE(&& !g_globalMMXSaved) );
#endif
g_EEFreezeRegs = 0;
#endif
// Perform counters, ints, and IOP updates:
_cpuBranchTest_Shared();
@ -543,18 +543,15 @@ void cpuBranchTest()
if (VU0.VI[REG_VPU_STAT].UL & 0x1)
{
FreezeXMMRegs(1);
// We're in a BranchTest. All dynarec registers are flushed
// so there is no need to freeze registers here.
Cpu->ExecuteVU0Block();
FreezeXMMRegs(0);
}
#ifndef PCSX2_NORECBUILD
#ifdef PCSX2_DEVBUILD
assert( !g_globalXMMSaved X86_32CODE(&& !g_globalMMXSaved) );
#endif
g_EEFreezeRegs = 1;
#endif
}
#endif
__forceinline void CPU_INT( u32 n, s32 ecycle)
{

View File

@ -214,14 +214,10 @@ __forceinline void vif1FLUSH() {
_cycles = VU1.cycle;
if( VU0.VI[REG_VPU_STAT].UL & 0x100 ) {
//FreezeXMMRegs(1);
do {
Cpu->ExecuteVU1Block();
} while(VU0.VI[REG_VPU_STAT].UL & 0x100);
// FreezeXMMRegs(0);
// FreezeMMXRegs(0);
//FreezeXMMRegs(0);
g_vifCycles+= (VU1.cycle - _cycles)*BIAS;
}
}

View File

@ -335,7 +335,7 @@ void recERET()
MOV32MtoR( ECX, (uptr)&cpuRegs.cycle );
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc );
MOV32RtoM( (uptr)&g_nextBranchCycle, ECX );
iFlushCall(FLUSH_EVERYTHING);
iFlushCall(FLUSH_NOCONST);
CALLFunc( (uptr)ERET );
}
@ -349,7 +349,7 @@ void recEI()
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc );
MOV32RtoM( (uptr)&g_nextBranchCycle, ECX );
iFlushCall(FLUSH_EVERYTHING);
iFlushCall(FLUSH_NOCONST);
CALLFunc( (uptr)EI );
}
@ -363,7 +363,7 @@ void recDI()
//MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc );
MOV32RtoM( (uptr)&g_nextBranchCycle, ECX );
iFlushCall(FLUSH_EVERYTHING);
iFlushCall(0);
CALLFunc( (uptr)DI );
}

View File

@ -839,7 +839,6 @@ void _deleteFPtoXMMreg(int reg, int flush)
void _freeXMMreg(int xmmreg)
{
VURegs *VU = xmmregs[xmmreg].VU ? &VU1 : &VU0;
assert( xmmreg < XMMREGS );
if (!xmmregs[xmmreg].inuse) return;
@ -847,6 +846,8 @@ void _freeXMMreg(int xmmreg)
if (xmmregs[xmmreg].mode & MODE_WRITE) {
switch (xmmregs[xmmreg].type) {
case XMMTYPE_VFREG:
{
const VURegs *VU = xmmregs[xmmreg].VU ? &VU1 : &VU0;
if( xmmregs[xmmreg].mode & MODE_VUXYZ )
{
if( xmmregs[xmmreg].mode & MODE_VUZ )
@ -882,9 +883,12 @@ void _freeXMMreg(int xmmreg)
{
SSE_MOVAPS_XMM_to_M128(VU_VFx_ADDR(xmmregs[xmmreg].reg), xmmreg);
}
}
break;
case XMMTYPE_ACC:
{
const VURegs *VU = xmmregs[xmmreg].VU ? &VU1 : &VU0;
if( xmmregs[xmmreg].mode & MODE_VUXYZ )
{
if( xmmregs[xmmreg].mode & MODE_VUZ )
@ -921,7 +925,8 @@ void _freeXMMreg(int xmmreg)
{
SSE_MOVAPS_XMM_to_M128(VU_ACCx_ADDR, xmmreg);
}
break;
}
break;
case XMMTYPE_GPRREG:
assert( xmmregs[xmmreg].reg != 0 );
@ -1039,17 +1044,17 @@ void _freeXMMregs()
#if !defined(_MSC_VER) || !defined(__x86_64__)
void FreezeXMMRegs_(int save)
__forceinline void FreezeXMMRegs_(int save)
{
assert( g_EEFreezeRegs );
if( save ) {
if( g_globalXMMSaved ){
g_globalXMMSaved++;
if( g_globalXMMSaved > 1 ){
//SysPrintf("XMM Already saved\n");
return;
}
}
g_globalXMMSaved = 1;
#ifdef _MSC_VER
__asm {
@ -1088,13 +1093,14 @@ void FreezeXMMRegs_(int save)
#endif // _MSC_VER
}
else {
if( !g_globalXMMSaved ){
if( g_globalXMMSaved==0 )
{
//SysPrintf("XMM Regs not saved!\n");
return;
}
}
// TODO: really need to backup all regs?
g_globalXMMSaved = 0;
g_globalXMMSaved--;
if( g_globalXMMSaved > 0 ) return;
#ifdef _MSC_VER
__asm {

View File

@ -113,7 +113,7 @@ void _callFunctionArg3(uptr fn, u32 arg1, u32 arg2, u32 arg3, uptr arg1mem, uptr
// when using mmx/xmm regs, use; 0 is load
// freezes no matter the state
void FreezeXMMRegs_(int save);
extern void FreezeXMMRegs_(int save);
void _flushCachedRegs();
void _flushConstRegs();
@ -225,6 +225,18 @@ int _getNumXMMwrite();
#define FLUSH_FREE_ALLX86 128 // free all x86 regs
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
// Flushing vs. Freeing, as understood by Air (I could be wrong still....)
// "Freeing" registers means that the contents of the registers are flushed to memory.
// This is good for any sort of C code function that plans to modify the actual
// registers. When the Recs resume, they'll reload the registers with values saved
// as needed. (similar to a "FreezeXMMRegs")
// "Flushing" means that in addition to the standard free (which is actually a flush)
// the register allocations are additionally wiped. This should only be necessary if
// the code being called is going to modify register allocations -- ie, be doing
// some kind of recompiling of its own.
#define FLUSH_EVERYTHING 0xfff
// no freeing, used when callee won't destroy mmx/xmm regs
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86)
@ -404,7 +416,7 @@ void _recMove128MtoM(u32 to, u32 from);
#define FPU_STATE 0
#define MMX_STATE 1
void FreezeMMXRegs_(int save);
extern void FreezeMMXRegs_(int save);
void SetFPUstate();
// max is 0x7f, when 0x80 is set, need to flush reg

View File

@ -468,7 +468,11 @@ static void recDmaExecI8(void (*name)(), u32 mem, int mmreg)
static void recDmaExec8(void (*name)(), u32 mem, int mmreg)
{
iFlushCall(0);
// Flushcall Note : DMA transfers are almost always "involved" operations
// that use memcpys and/or threading. Freeing all XMM and MMX regs is the
// best option.
iFlushCall(FLUSH_NOCONST);
if( IS_EECONSTREG(mmreg) ) {
recDmaExecI8(name, mem, mmreg);
}
@ -501,36 +505,45 @@ static void PrintDebug(u8 value)
}
}
#define CONSTWRITE_CALLTIMER(name, index, bit) { \
if( !IS_EECONSTREG(mmreg) ) { \
if( bit == 8 ) MOVZX32R8toR(mmreg&0xf, mmreg&0xf); \
else if( bit == 16 ) MOVZX32R16toR(mmreg&0xf, mmreg&0xf); \
} \
_recPushReg(mmreg); \
iFlushCall(0); \
PUSH32I(index); \
CALLFunc((uptr)name); \
ADD32ItoR(ESP, 8); \
} \
// fixme: this would be more optimal as a C++ template (with bit as the template parameter)
static __forceinline void ConstWrite_ExecTimer( void (*name)(), u8 index, u8 bit, int mmreg)
{
if( bit != 32 )
{
if( !IS_EECONSTREG(mmreg) )
{
if( bit == 8 ) MOVZX32R8toR(mmreg&0xf, mmreg&0xf);
else if( bit == 16 ) MOVZX32R16toR(mmreg&0xf, mmreg&0xf);
}
}
// FlushCall Note : All counter functions are short and sweet, full flush not needed.
_recPushReg(mmreg);
iFlushCall(0);
PUSH32I(index);
CALLFunc((uptr)name);
ADD32ItoR(ESP, 8);
}
#define CONSTWRITE_TIMERS(bit) \
case 0x10000000: CONSTWRITE_CALLTIMER(rcntWcount, 0, bit); break; \
case 0x10000010: CONSTWRITE_CALLTIMER(rcntWmode, 0, bit); break; \
case 0x10000020: CONSTWRITE_CALLTIMER(rcntWtarget, 0, bit); break; \
case 0x10000030: CONSTWRITE_CALLTIMER(rcntWhold, 0, bit); break; \
case 0x10000000: ConstWrite_ExecTimer(rcntWcount, 0, bit, mmreg); break; \
case 0x10000010: ConstWrite_ExecTimer(rcntWmode, 0, bit, mmreg); break; \
case 0x10000020: ConstWrite_ExecTimer(rcntWtarget, 0, bit, mmreg); break; \
case 0x10000030: ConstWrite_ExecTimer(rcntWhold, 0, bit, mmreg); break; \
\
case 0x10000800: CONSTWRITE_CALLTIMER(rcntWcount, 1, bit); break; \
case 0x10000810: CONSTWRITE_CALLTIMER(rcntWmode, 1, bit); break; \
case 0x10000820: CONSTWRITE_CALLTIMER(rcntWtarget, 1, bit); break; \
case 0x10000830: CONSTWRITE_CALLTIMER(rcntWhold, 1, bit); break; \
case 0x10000800: ConstWrite_ExecTimer(rcntWcount, 1, bit, mmreg); break; \
case 0x10000810: ConstWrite_ExecTimer(rcntWmode, 1, bit, mmreg); break; \
case 0x10000820: ConstWrite_ExecTimer(rcntWtarget, 1, bit, mmreg); break; \
case 0x10000830: ConstWrite_ExecTimer(rcntWhold, 1, bit, mmreg); break; \
\
case 0x10001000: CONSTWRITE_CALLTIMER(rcntWcount, 2, bit); break; \
case 0x10001010: CONSTWRITE_CALLTIMER(rcntWmode, 2, bit); break; \
case 0x10001020: CONSTWRITE_CALLTIMER(rcntWtarget, 2, bit); break; \
case 0x10001000: ConstWrite_ExecTimer(rcntWcount, 2, bit, mmreg); break; \
case 0x10001010: ConstWrite_ExecTimer(rcntWmode, 2, bit, mmreg); break; \
case 0x10001020: ConstWrite_ExecTimer(rcntWtarget, 2, bit, mmreg); break; \
\
case 0x10001800: CONSTWRITE_CALLTIMER(rcntWcount, 3, bit); break; \
case 0x10001810: CONSTWRITE_CALLTIMER(rcntWmode, 3, bit); break; \
case 0x10001820: CONSTWRITE_CALLTIMER(rcntWtarget, 3, bit); break; \
case 0x10001800: ConstWrite_ExecTimer(rcntWcount, 3, bit, mmreg); break; \
case 0x10001810: ConstWrite_ExecTimer(rcntWmode, 3, bit, mmreg); break; \
case 0x10001820: ConstWrite_ExecTimer(rcntWtarget, 3, bit, mmreg); break; \
void hwConstWrite8(u32 mem, int mmreg)
{
@ -638,6 +651,11 @@ void hwConstWrite8(u32 mem, int mmreg)
}
}
// Flushcall Note : DMA transfers are almost always "involved" operations
// that use memcpys and/or threading. Freeing all XMM and MMX regs is the
// best option (removes the need for FreezeXMMRegs()). But register
// allocation is such a mess right now that we can't do it (yet).
static void recDmaExecI16( void (*name)(), u32 mem, int mmreg )
{
MOV16ItoM((uptr)&PS2MEM_HW[(mem) & 0xffff], g_cpuConstRegs[(mmreg>>16)&0x1f].UL[0]);
@ -652,6 +670,7 @@ static void recDmaExecI16( void (*name)(), u32 mem, int mmreg )
static void recDmaExec16(void (*name)(), u32 mem, int mmreg)
{
iFlushCall(0);
if( IS_EECONSTREG(mmreg) ) {
recDmaExecI16(name, mem, mmreg);
}
@ -674,7 +693,9 @@ static void recDmaExec16(void (*name)(), u32 mem, int mmreg)
void hwConstWrite16(u32 mem, int mmreg)
{
switch(mem) {
CONSTWRITE_TIMERS(16)
case 0x10008000: // dma0 - vif0
recDmaExec16(dmaVIF0, mem, mmreg);
break;
@ -804,11 +825,19 @@ static void recDmaExecI( void (*name)(), u32 mem, int mmreg )
static void recDmaExec( void (*name)(), u32 mem, int mmreg )
{
iFlushCall(0);
if( IS_EECONSTREG(mmreg) ) {
recDmaExecI(name, mem, mmreg);
}
else {
// fixme: This is a lot of code to be injecting into the recompiler
// for every DMA transfer. It might actually be more efficient to
// set this up as a C function call instead (depends on how often
// the register is written without actually starting a DMA xfer).
_eeMoveMMREGtoR(EAX, mmreg);
TEST32ItoR(EAX, 0xffff0000);
j8Ptr[6] = JNZ8(0);
@ -839,13 +868,6 @@ static void recDmaExec( void (*name)(), u32 mem, int mmreg )
}
}
#define CONSTWRITE_CALLTIMER32(name, index, bit) { \
_recPushReg(mmreg); \
iFlushCall(0); \
PUSH32I(index); \
CALLFunc((uptr)name); \
ADD32ItoR(ESP, 8); \
} \
void hwConstWrite32(u32 mem, int mmreg)
{
@ -874,23 +896,8 @@ void hwConstWrite32(u32 mem, int mmreg)
}
switch (mem) {
case 0x10000000: CONSTWRITE_CALLTIMER32(rcntWcount, 0, bit); break;
case 0x10000010: CONSTWRITE_CALLTIMER32(rcntWmode, 0, bit); break;
case 0x10000020: CONSTWRITE_CALLTIMER32(rcntWtarget, 0, bit); break;
case 0x10000030: CONSTWRITE_CALLTIMER32(rcntWhold, 0, bit); break;
case 0x10000800: CONSTWRITE_CALLTIMER32(rcntWcount, 1, bit); break;
case 0x10000810: CONSTWRITE_CALLTIMER32(rcntWmode, 1, bit); break;
case 0x10000820: CONSTWRITE_CALLTIMER32(rcntWtarget, 1, bit); break;
case 0x10000830: CONSTWRITE_CALLTIMER32(rcntWhold, 1, bit); break;
case 0x10001000: CONSTWRITE_CALLTIMER32(rcntWcount, 2, bit); break;
case 0x10001010: CONSTWRITE_CALLTIMER32(rcntWmode, 2, bit); break;
case 0x10001020: CONSTWRITE_CALLTIMER32(rcntWtarget, 2, bit); break;
case 0x10001800: CONSTWRITE_CALLTIMER32(rcntWcount, 3, bit); break;
case 0x10001810: CONSTWRITE_CALLTIMER32(rcntWmode, 3, bit); break;
case 0x10001820: CONSTWRITE_CALLTIMER32(rcntWtarget, 3, bit); break;
CONSTWRITE_TIMERS(32)
case GIF_CTRL:
@ -980,8 +987,6 @@ void hwConstWrite32(u32 mem, int mmreg)
XOR16RtoM((uptr)&PS2MEM_HW[0xe012], EAX);
CALLFunc((uptr)cpuTestDMACInts);
//x86SetJ8( j8Ptr[5] );
break;
case 0x1000f000: // INTC_STAT
@ -1173,15 +1178,7 @@ void hwConstWrite64(u32 mem, int mmreg)
SHR32ItoR(EAX, 16);
XOR16RtoM((uptr)&PS2MEM_HW[0xe012], EAX);
// cpuRegs.CP0.n.Status.val is checked by cpuTestDMACInts.
//MOV32MtoR(EAX, (uptr)&cpuRegs.CP0.n.Status.val);
//AND32ItoR(EAX, 0x10807);
//CMP32ItoR(EAX, 0x10801);
//j8Ptr[5] = JNE8(0);
CALLFunc((uptr)cpuTestDMACInts);
//x86SetJ8( j8Ptr[5] );
break;
case 0x1000f590: // DMAC_ENABLEW
@ -1191,14 +1188,7 @@ void hwConstWrite64(u32 mem, int mmreg)
case 0x1000f000: // INTC_STAT
_eeWriteConstMem32OP((uptr)&PS2MEM_HW[mem&0xffff], mmreg, 2);
// note: cpuRegs.CP0.n.Status.val conditional is done by cpuTestINTCInts.
//MOV32MtoR(EAX, (uptr)&cpuRegs.CP0.n.Status.val);
//AND32ItoR(EAX, 0x10407);
//CMP32ItoR(EAX, 0x10401);
//j8Ptr[5] = JNE8(0);
CALLFunc((uptr)cpuTestINTCInts);
//x86SetJ8( j8Ptr[5] );
break;
case 0x1000f010: // INTC_MASK
@ -1207,16 +1197,7 @@ void hwConstWrite64(u32 mem, int mmreg)
iFlushCall(0);
XOR16RtoM((uptr)&PS2MEM_HW[0xf010], EAX);
// note: cpuRegs.CP0.n.Status.val conditional is done by cpuTestINTCInts.
//MOV32MtoR(EAX, (uptr)&cpuRegs.CP0.n.Status.val);
//AND32ItoR(EAX, 0x10407);
//CMP32ItoR(EAX, 0x10401);
//j8Ptr[5] = JNE8(0);
CALLFunc((uptr)cpuTestINTCInts);
//x86SetJ8( j8Ptr[5] );
break;
case 0x1000f130:

View File

@ -768,16 +768,17 @@ void _freeMMXregs()
}
}
void FreezeMMXRegs_(int save)
__forceinline void FreezeMMXRegs_(int save)
{
assert( g_EEFreezeRegs );
if( save ) {
if( g_globalMMXSaved ){
g_globalMMXSaved++;
if( g_globalMMXSaved>1 )
{
//SysPrintf("MMX Already Saved!\n");
return;
}
g_globalMMXSaved = 1;
}
#ifdef _MSC_VER
__asm {
@ -807,11 +808,14 @@ void FreezeMMXRegs_(int save)
}
else {
if( !g_globalMMXSaved ){
if( g_globalMMXSaved==0 )
{
//SysPrintf("MMX Not Saved!\n");
return;
}
g_globalMMXSaved = 0;
}
g_globalMMXSaved--;
if( g_globalMMXSaved > 0 ) return;
#ifdef _MSC_VER
__asm {

View File

@ -2283,7 +2283,7 @@ static void iBranchTest(u32 newpc, u32 cpuBranch)
if( newpc != 0xffffffff )
{
CMP32ItoM((uptr)&cpuRegs.pc, newpc);
JNE32((u32)DispatcherReg - ( (u32)x86Ptr + 6 ));
JNE32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 ));
}
x86SetJ8( j8Ptr[0] );
@ -2315,7 +2315,7 @@ void recSYSCALL( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT);
JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 ));
JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 ));
x86SetJ8(j8Ptr[0]);
//branch = 2;
}
@ -2329,7 +2329,7 @@ void recBREAK( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT);
ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT);
RET();
x86SetJ8(j8Ptr[0]);
//branch = 2;
@ -2365,16 +2365,16 @@ void recMFSA( void )
mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if( mmreg >= 0 ) {
SSE_MOVLPS_M64_to_XMM(mmreg, (u32)&cpuRegs.sa);
SSE_MOVLPS_M64_to_XMM(mmreg, (uptr)&cpuRegs.sa);
}
else if( (mmreg = _checkMMXreg(MMX_GPR+_Rd_, MODE_WRITE)) >= 0 ) {
MOVDMtoMMX(mmreg, (u32)&cpuRegs.sa);
MOVDMtoMMX(mmreg, (uptr)&cpuRegs.sa);
SetMMXstate();
}
else {
MOV32MtoR(EAX, (u32)&cpuRegs.sa);
_deleteEEreg(_Rd_, 0);
MOV32RtoM((u32)&cpuRegs.GPR.r[_Rd_].UL[0], EAX);
MOV32RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UL[0], EAX);
MOV32ItoM((uptr)&cpuRegs.GPR.r[_Rd_].UL[1], 0);
}
}
@ -2388,15 +2388,15 @@ void recMTSA( void )
int mmreg;
if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) {
SSE_MOVSS_XMM_to_M32((u32)&cpuRegs.sa, mmreg);
SSE_MOVSS_XMM_to_M32((uptr)&cpuRegs.sa, mmreg);
}
else if( (mmreg = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ)) >= 0 ) {
MOVDMMXtoM((u32)&cpuRegs.sa, mmreg);
MOVDMMXtoM((uptr)&cpuRegs.sa, mmreg);
SetMMXstate();
}
else {
MOV32MtoR(EAX, (u32)&cpuRegs.GPR.r[_Rs_].UL[0]);
MOV32RtoM((u32)&cpuRegs.sa, EAX);
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UL[0]);
MOV32RtoM((uptr)&cpuRegs.sa, EAX);
}
}
}
@ -2411,7 +2411,7 @@ void recMTSAB( void )
AND32ItoR(EAX, 0xF);
XOR32ItoR(EAX, _Imm_&0xf);
SHL32ItoR(EAX, 3);
MOV32RtoM((u32)&cpuRegs.sa, EAX);
MOV32RtoM((uptr)&cpuRegs.sa, EAX);
}
}
@ -2425,7 +2425,7 @@ void recMTSAH( void )
AND32ItoR(EAX, 0x7);
XOR32ItoR(EAX, _Imm_&0x7);
SHL32ItoR(EAX, 4);
MOV32RtoM((u32)&cpuRegs.sa, EAX);
MOV32RtoM((uptr)&cpuRegs.sa, EAX);
}
}
@ -2496,7 +2496,7 @@ void recompileNextInstruction(int delayslot)
// return;
// }
JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5));
JMP32((uptr)pblock->pFnptr - ((uptr)x86Ptr + 5));
branch = 3;
return;
}
@ -2895,7 +2895,7 @@ void recRecompile( u32 startpc )
MOV32ItoM((uptr)&g_lastpc, pc);
CALLFunc((uptr)printfn);
// CMP32MtoR(EBP, (u32)&s_uSaveEBP);
// CMP32MtoR(EBP, (uptr)&s_uSaveEBP);
// j8Ptr[0] = JE8(0);
// CALLFunc((uptr)badespfn);
// x86SetJ8(j8Ptr[0]);
@ -3151,7 +3151,7 @@ StartRecomp:
// s_startcount = 0;
// if( pc+32 < s_nEndBlock ) {
// // only blocks with more than 8 insts
// //PUSH32I((u32)&lbase);
// //PUSH32I((uptr)&lbase);
// //CALLFunc((uptr)QueryPerformanceCounter);
// lbase.QuadPart = GetCPUTick();
// s_startcount = 1;
@ -3217,7 +3217,7 @@ StartRecomp:
iBranchTest(0xffffffff, 1);
if( bExecBIOS ) CheckForBIOSEnd();
JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 ));
JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 ));
}
else {
assert( branch != 3 );
@ -3229,7 +3229,7 @@ StartRecomp:
assert( pc == s_nEndBlock );
iFlushCall(FLUSH_EVERYTHING);
MOV32ItoM((uptr)&cpuRegs.pc, pc);
JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5));
JMP32((uptr)pblock->pFnptr - ((uptr)x86Ptr + 5));
branch = 3;
}
else if( !branch ) {