diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index abc65692a1..b4c1bcfa46 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -440,17 +440,27 @@ __forceinline void gsWaitGS() { if( !CHECK_MULTIGS ) return; + // Freeze registers because some kernel code likes to destroy them + FreezeXMMRegs(1); + FreezeMMXRegs(1); GS_SETEVENT(); while( *(volatile PU8*)&g_pGSRingPos != *(volatile PU8*)&g_pGSWritePos ) _TIMESLICE(); + FreezeXMMRegs(0); + FreezeMMXRegs(0); } // Sets the gsEvent flag and releases a timeslice. // For use in loops that wait on the GS thread to do certain things. static void gsSetEventWait() { + // Freeze registers because some kernel code likes to destroy them + FreezeXMMRegs(1); + FreezeMMXRegs(1); GS_SETEVENT(); _TIMESLICE(); + FreezeXMMRegs(0); + FreezeMMXRegs(0); m_mtgsCopyCommandTally = 0; } diff --git a/pcsx2/GS.h b/pcsx2/GS.h index e857550898..a5b2348214 100644 --- a/pcsx2/GS.h +++ b/pcsx2/GS.h @@ -86,7 +86,6 @@ void gsDynamicSkipEnable(); // mem and size are the ones from GSRingBufCopy extern void GSRINGBUF_DONECOPY(const u8 *mem, u32 size); -extern void GS_SETEVENT(); extern void gsWaitGS(); // used for resetting GIF fifo diff --git a/pcsx2/Hw.c b/pcsx2/Hw.c index 5a9849ce31..54e161dd4d 100644 --- a/pcsx2/Hw.c +++ b/pcsx2/Hw.c @@ -341,16 +341,40 @@ void hwRead128(u32 mem, u64 *out) { out[1] = psHu64(mem+8); } - HW_LOG("Unknown Hardware Read 128 at %x\n",mem); + HW_LOG("Unknown Hardware Read 128 at %x\n",mem); } -// dark cloud2 uses it -#define DmaExec8(name, num) { \ - psHu8(mem) = (u8)value; \ - if ((psHu8(mem) & 0x1) && (psHu32(DMAC_CTRL) & 0x1)) { \ - /*SysPrintf("Running DMA 8 %x\n", psHu32(mem & ~0x1));*/ \ - dma##name(); \ - } \ +// dark cloud2 uses 8 bit DMAs register writes +static __forceinline void DmaExec8( void (*func)(), u32 mem, u8 value ) +{ + psHu8(mem) = (u8)value; + if ((psHu8(mem) & 0x1) && (psHu32(DMAC_CTRL) & 0x1)) + { + /*SysPrintf("Running DMA 8 %x\n", psHu32(mem & ~0x1));*/ + func(); + } +} + +static __forceinline void DmaExec16( void (*func)(), u32 mem, u16 value ) +{ + psHu16(mem) = (u16)value; + if ((psHu16(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1)) + { + //SysPrintf("16bit DMA Start\n"); + func(); + } +} + +static void DmaExec( void (*func)(), u32 mem, u32 value ) +{ + /* Keep the old tag if in chain mode and hw doesnt set it*/ + if( (value & 0xc) == 0x4 && (value & 0xffff0000) == 0) + psHu32(mem) = (psHu32(mem) & 0xFFFF0000) | (u16)value; + else /* Else (including Normal mode etc) write whatever the hardware sends*/ + psHu32(mem) = (u32)value; + + if ((psHu32(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1)) + func(); } char sio_buffer[1024]; @@ -404,55 +428,55 @@ void hwWrite8(u32 mem, u8 value) { break; case 0x10008001: // dma0 - vif0 DMA_LOG("VIF0dma %lx\n", value); - DmaExec8(VIF0, 0); + DmaExec8(dmaVIF0, mem, value); break; case 0x10009001: // dma1 - vif1 DMA_LOG("VIF1dma %lx\n", value); - DmaExec8(VIF1, 1); + DmaExec8(dmaVIF1, mem, value); break; case 0x1000a001: // dma2 - gif DMA_LOG("0x%8.8x hwWrite8: GSdma %lx 0x%lx\n", cpuRegs.cycle, value); - DmaExec8(GIF, 2); + DmaExec8(dmaGIF, mem, value); break; case 0x1000b001: // dma3 - fromIPU DMA_LOG("IPU0dma %lx\n", value); - DmaExec8(IPU0, 3); + DmaExec8(dmaIPU0, mem, value); break; case 0x1000b401: // dma4 - toIPU #ifdef DMA_LOG DMA_LOG("IPU1dma %lx\n", value); #endif - DmaExec8(IPU1, 4); + DmaExec8(dmaIPU1, mem, value); break; case 0x1000c001: // dma5 - sif0 DMA_LOG("SIF0dma %lx\n", value); // if (value == 0) psxSu32(0x30) = 0x40000; - DmaExec8(SIF0, 5); + DmaExec8(dmaSIF0, mem, value); break; case 0x1000c401: // dma6 - sif1 DMA_LOG("SIF1dma %lx\n", value); - DmaExec8(SIF1, 6); + DmaExec8(dmaSIF1, mem, value); break; case 0x1000c801: // dma7 - sif2 DMA_LOG("SIF2dma %lx\n", value); - DmaExec8(SIF2, 7); + DmaExec8(dmaSIF2, mem, value); break; case 0x1000d001: // dma8 - fromSPR DMA_LOG("fromSPRdma8 %lx\n", value); - DmaExec8(SPR0, 8); + DmaExec8(dmaSPR0, mem, value); break; case 0x1000d401: // dma9 - toSPR DMA_LOG("toSPRdma8 %lx\n", value); - DmaExec8(SPR1, 9); + DmaExec8(dmaSPR1, mem, value); break; case 0x1000f592: // DMAC_ENABLEW @@ -489,14 +513,6 @@ void hwWrite8(u32 mem, u8 value) { } } -#define DmaExec16(name, num) { \ - psHu16(mem) = (u16)value; \ - if ((psHu16(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1)) { \ - SysPrintf("16bit DMA Start\n"); \ - dma##name(); \ - } \ -} - void hwWrite16(u32 mem, u16 value) { #ifdef PCSX2_DEVBUILD @@ -506,13 +522,13 @@ void hwWrite16(u32 mem, u16 value) switch(mem) { case 0x10008000: // dma0 - vif0 DMA_LOG("VIF0dma %lx\n", value); - DmaExec16(VIF0, 0); + DmaExec16(dmaVIF0, mem, value); break; // Latest Fix for Florin by asadr (VIF1) case 0x10009000: // dma1 - vif1 - chcr DMA_LOG("VIF1dma CHCR %lx\n", value); - DmaExec16(VIF1, 1); + DmaExec16(dmaVIF1, mem, value); break; #ifdef HW_LOG @@ -545,7 +561,7 @@ void hwWrite16(u32 mem, u16 value) case 0x1000a000: // dma2 - gif DMA_LOG("0x%8.8x hwWrite32: GSdma %lx\n", cpuRegs.cycle, value); - DmaExec16(GIF, 2); + DmaExec16(dmaGIF, mem, value); break; #ifdef HW_LOG @@ -576,7 +592,7 @@ void hwWrite16(u32 mem, u16 value) #endif case 0x1000b000: // dma3 - fromIPU DMA_LOG("IPU0dma %lx\n", value); - DmaExec16(IPU0, 3); + DmaExec16(dmaIPU0, mem, value); break; #ifdef HW_LOG @@ -599,7 +615,7 @@ void hwWrite16(u32 mem, u16 value) #endif case 0x1000b400: // dma4 - toIPU DMA_LOG("IPU1dma %lx\n", value); - DmaExec16(IPU1, 4); + DmaExec16(dmaIPU1, mem, value); break; #ifdef HW_LOG case 0x1000b410: @@ -623,7 +639,7 @@ void hwWrite16(u32 mem, u16 value) case 0x1000c000: // dma5 - sif0 DMA_LOG("SIF0dma %lx\n", value); // if (value == 0) psxSu32(0x30) = 0x40000; - DmaExec16(SIF0, 5); + DmaExec16(dmaSIF0, mem, value); break; case 0x1000c002: @@ -631,7 +647,7 @@ void hwWrite16(u32 mem, u16 value) break; case 0x1000c400: // dma6 - sif1 DMA_LOG("SIF1dma %lx\n", value); - DmaExec16(SIF1, 6); + DmaExec16(dmaSIF1, mem, value); break; #ifdef HW_LOG @@ -650,19 +666,19 @@ void hwWrite16(u32 mem, u16 value) case 0x1000c800: // dma7 - sif2 DMA_LOG("SIF2dma %lx\n", value); - DmaExec16(SIF2, 7); + DmaExec16(dmaSIF2, mem, value); break; case 0x1000c802: //? break; case 0x1000d000: // dma8 - fromSPR DMA_LOG("fromSPRdma %lx\n", value); - DmaExec16(SPR0, 8); + DmaExec16(dmaSPR0, mem, value); break; case 0x1000d400: // dma9 - toSPR DMA_LOG("toSPRdma %lx\n", value); - DmaExec16(SPR1, 9); + DmaExec16(dmaSPR1, mem, value); break; case 0x1000f592: // DMAC_ENABLEW psHu16(0xf592) = value; @@ -713,17 +729,6 @@ void hwWrite16(u32 mem, u16 value) HW_LOG("Unknown Hardware write 16 at %x with value %x\n",mem,value); } -#define DmaExec(name, num) { \ - /* Keep the old tag if in chain mode and hw doesnt set it*/ \ - if( (value & 0xc) == 0x4 && (value & 0xffff0000) == 0) \ - psHu32(mem) = (psHu32(mem) & 0xFFFF0000) | (u16)value; \ - else /* Else (including Normal mode etc) write whatever the hardware sends*/ \ - psHu32(mem) = (u32)value; \ - \ - if ((psHu32(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1)) { \ - dma##name(); \ - } \ -} void hwWrite32(u32 mem, u32 value) { @@ -782,12 +787,12 @@ void hwWrite32(u32 mem, u32 value) { case 0x10008000: // dma0 - vif0 DMA_LOG("VIF0dma %lx\n", value); - DmaExec(VIF0, 0); + DmaExec(dmaVIF0, mem, value); break; //------------------------------------------------------------------ case 0x10009000: // dma1 - vif1 - chcr DMA_LOG("VIF1dma CHCR %lx\n", value); - DmaExec(VIF1, 1); + DmaExec(dmaVIF1, mem, value); break; case 0x10009010: // dma1 - vif1 - madr HW_LOG("VIF1dma Madr %lx\n", value); @@ -816,7 +821,7 @@ void hwWrite32(u32 mem, u32 value) { //------------------------------------------------------------------ case 0x1000a000: // dma2 - gif DMA_LOG("0x%8.8x hwWrite32: GSdma %lx\n", cpuRegs.cycle, value); - DmaExec(GIF, 2); + DmaExec(dmaGIF, mem, value); break; case 0x1000a010: psHu32(mem) = value;//dma2 madr @@ -845,7 +850,7 @@ void hwWrite32(u32 mem, u32 value) { //------------------------------------------------------------------ case 0x1000b000: // dma3 - fromIPU DMA_LOG("IPU0dma %lx\n", value); - DmaExec(IPU0, 3); + DmaExec(dmaIPU0, mem, value); break; //------------------------------------------------------------------ case 0x1000b010: @@ -867,7 +872,7 @@ void hwWrite32(u32 mem, u32 value) { //------------------------------------------------------------------ case 0x1000b400: // dma4 - toIPU DMA_LOG("IPU1dma %lx\n", value); - DmaExec(IPU1, 4); + DmaExec(dmaIPU1, mem, value); break; //------------------------------------------------------------------ case 0x1000b410: @@ -890,12 +895,12 @@ void hwWrite32(u32 mem, u32 value) { case 0x1000c000: // dma5 - sif0 DMA_LOG("SIF0dma %lx\n", value); //if (value == 0) psxSu32(0x30) = 0x40000; - DmaExec(SIF0, 5); + DmaExec(dmaSIF0, mem, value); break; //------------------------------------------------------------------ case 0x1000c400: // dma6 - sif1 DMA_LOG("SIF1dma %lx\n", value); - DmaExec(SIF1, 6); + DmaExec(dmaSIF1, mem, value); break; case 0x1000c420: // dma6 - sif1 - qwc HW_LOG("SIF1dma QWC = %lx\n", value); @@ -908,17 +913,17 @@ void hwWrite32(u32 mem, u32 value) { //------------------------------------------------------------------ case 0x1000c800: // dma7 - sif2 DMA_LOG("SIF2dma %lx\n", value); - DmaExec(SIF2, 7); + DmaExec(dmaSIF2, mem, value); break; //------------------------------------------------------------------ case 0x1000d000: // dma8 - fromSPR DMA_LOG("fromSPRdma %lx\n", value); - DmaExec(SPR0, 8); + DmaExec(dmaSPR0, mem, value); break; //------------------------------------------------------------------ case 0x1000d400: // dma9 - toSPR DMA_LOG("toSPRdma %lx\n", value); - DmaExec(SPR1, 9); + DmaExec(dmaSPR1, mem, value); break; //------------------------------------------------------------------ case 0x1000e000: // DMAC_CTRL @@ -1051,7 +1056,7 @@ void hwWrite64(u32 mem, u64 value) { case 0x1000a000: // dma2 - gif DMA_LOG("0x%8.8x hwWrite64: GSdma %lx\n", cpuRegs.cycle, value); - DmaExec(GIF, 2); + DmaExec(dmaGIF, mem, value); break; #ifdef HW_LOG diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index 6f49329161..8e0878e434 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -224,7 +224,7 @@ int IsBIOS(char *filename, char *description); #define FreezeMMXRegs(save) #define FreezeXMMRegs(save) #else -void FreezeXMMRegs_(int save); +extern void FreezeXMMRegs_(int save); extern u32 g_EEFreezeRegs; #define FreezeXMMRegs(save) if( g_EEFreezeRegs ) { FreezeXMMRegs_(save); } @@ -237,18 +237,6 @@ void FreezeMMXRegs_(int save); #endif -// define a PCS2 specific memcpy and make sure it is used all in real-time code -#if _MSC_VER >= 1400 // vs2005+ uses xmm/mmx in memcpy -__forceinline void memcpy_pcsx2(void* dest, const void* src, size_t n) -{ - //FreezeMMXRegs(1); // mmx not used - FreezeXMMRegs(1); - memcpy(dest, src, n); - // have to be unfrozen by parent call! -} -#else -#define memcpy_pcsx2 memcpy -#endif #ifdef PCSX2_NORECBUILD #define memcpy_fast memcpy diff --git a/pcsx2/R5900.c b/pcsx2/R5900.c index 43924dd40b..55f7968ca1 100644 --- a/pcsx2/R5900.c +++ b/pcsx2/R5900.c @@ -513,7 +513,7 @@ static __forceinline void _cpuBranchTest_Shared() // Apply the hsync counter's nextCycle cpuSetNextBranch( counters[4].sCycle, counters[4].CycleT ); - // Apply other counter nextCycles + // Apply vsync and other counter nextCycles cpuSetNextBranch( nextsCounter, nextCounter ); } @@ -522,11 +522,12 @@ static __forceinline void _cpuBranchTest_Shared() extern u8 g_globalXMMSaved; X86_32CODE(extern u8 g_globalMMXSaved;) #endif -#endif void cpuBranchTest() { -#ifndef PCSX2_NORECBUILD + // cpuBranchTest should be called from the recompiler only. + assert( Cpu == &recCpu ); + #ifdef PCSX2_DEVBUILD // dont' remove this check unless doing an official release if( g_globalXMMSaved X86_32CODE(|| g_globalMMXSaved) ) @@ -534,7 +535,6 @@ void cpuBranchTest() assert( !g_globalXMMSaved X86_32CODE(&& !g_globalMMXSaved) ); #endif g_EEFreezeRegs = 0; -#endif // Perform counters, ints, and IOP updates: _cpuBranchTest_Shared(); @@ -543,18 +543,15 @@ void cpuBranchTest() if (VU0.VI[REG_VPU_STAT].UL & 0x1) { - FreezeXMMRegs(1); + // We're in a BranchTest. All dynarec registers are flushed + // so there is no need to freeze registers here. Cpu->ExecuteVU0Block(); - FreezeXMMRegs(0); } -#ifndef PCSX2_NORECBUILD -#ifdef PCSX2_DEVBUILD assert( !g_globalXMMSaved X86_32CODE(&& !g_globalMMXSaved) ); -#endif g_EEFreezeRegs = 1; -#endif } +#endif __forceinline void CPU_INT( u32 n, s32 ecycle) { diff --git a/pcsx2/VifDma.c b/pcsx2/VifDma.c index c67b9c661b..a70cd4e99f 100644 --- a/pcsx2/VifDma.c +++ b/pcsx2/VifDma.c @@ -214,14 +214,10 @@ __forceinline void vif1FLUSH() { _cycles = VU1.cycle; if( VU0.VI[REG_VPU_STAT].UL & 0x100 ) { - //FreezeXMMRegs(1); do { Cpu->ExecuteVU1Block(); } while(VU0.VI[REG_VPU_STAT].UL & 0x100); -// FreezeXMMRegs(0); -// FreezeMMXRegs(0); - //FreezeXMMRegs(0); g_vifCycles+= (VU1.cycle - _cycles)*BIAS; } } diff --git a/pcsx2/x86/iCP0.c b/pcsx2/x86/iCP0.c index 3206d819a2..95111d6a5d 100644 --- a/pcsx2/x86/iCP0.c +++ b/pcsx2/x86/iCP0.c @@ -335,7 +335,7 @@ void recERET() MOV32MtoR( ECX, (uptr)&cpuRegs.cycle ); MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); MOV32RtoM( (uptr)&g_nextBranchCycle, ECX ); - iFlushCall(FLUSH_EVERYTHING); + iFlushCall(FLUSH_NOCONST); CALLFunc( (uptr)ERET ); } @@ -349,7 +349,7 @@ void recEI() MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); MOV32RtoM( (uptr)&g_nextBranchCycle, ECX ); - iFlushCall(FLUSH_EVERYTHING); + iFlushCall(FLUSH_NOCONST); CALLFunc( (uptr)EI ); } @@ -363,7 +363,7 @@ void recDI() //MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); MOV32RtoM( (uptr)&g_nextBranchCycle, ECX ); - iFlushCall(FLUSH_EVERYTHING); + iFlushCall(0); CALLFunc( (uptr)DI ); } diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 90fab0d675..7d0653e571 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -839,7 +839,6 @@ void _deleteFPtoXMMreg(int reg, int flush) void _freeXMMreg(int xmmreg) { - VURegs *VU = xmmregs[xmmreg].VU ? &VU1 : &VU0; assert( xmmreg < XMMREGS ); if (!xmmregs[xmmreg].inuse) return; @@ -847,6 +846,8 @@ void _freeXMMreg(int xmmreg) if (xmmregs[xmmreg].mode & MODE_WRITE) { switch (xmmregs[xmmreg].type) { case XMMTYPE_VFREG: + { + const VURegs *VU = xmmregs[xmmreg].VU ? &VU1 : &VU0; if( xmmregs[xmmreg].mode & MODE_VUXYZ ) { if( xmmregs[xmmreg].mode & MODE_VUZ ) @@ -882,9 +883,12 @@ void _freeXMMreg(int xmmreg) { SSE_MOVAPS_XMM_to_M128(VU_VFx_ADDR(xmmregs[xmmreg].reg), xmmreg); } + } break; case XMMTYPE_ACC: + { + const VURegs *VU = xmmregs[xmmreg].VU ? &VU1 : &VU0; if( xmmregs[xmmreg].mode & MODE_VUXYZ ) { if( xmmregs[xmmreg].mode & MODE_VUZ ) @@ -921,7 +925,8 @@ void _freeXMMreg(int xmmreg) { SSE_MOVAPS_XMM_to_M128(VU_ACCx_ADDR, xmmreg); } - break; + } + break; case XMMTYPE_GPRREG: assert( xmmregs[xmmreg].reg != 0 ); @@ -1039,17 +1044,17 @@ void _freeXMMregs() #if !defined(_MSC_VER) || !defined(__x86_64__) -void FreezeXMMRegs_(int save) +__forceinline void FreezeXMMRegs_(int save) { assert( g_EEFreezeRegs ); if( save ) { - if( g_globalXMMSaved ){ + g_globalXMMSaved++; + if( g_globalXMMSaved > 1 ){ //SysPrintf("XMM Already saved\n"); return; - } + } - g_globalXMMSaved = 1; #ifdef _MSC_VER __asm { @@ -1088,13 +1093,14 @@ void FreezeXMMRegs_(int save) #endif // _MSC_VER } else { - if( !g_globalXMMSaved ){ + if( g_globalXMMSaved==0 ) + { //SysPrintf("XMM Regs not saved!\n"); return; - } + } - // TODO: really need to backup all regs? - g_globalXMMSaved = 0; + g_globalXMMSaved--; + if( g_globalXMMSaved > 0 ) return; #ifdef _MSC_VER __asm { diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index dd0d03be67..155d899cb6 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -113,7 +113,7 @@ void _callFunctionArg3(uptr fn, u32 arg1, u32 arg2, u32 arg3, uptr arg1mem, uptr // when using mmx/xmm regs, use; 0 is load // freezes no matter the state -void FreezeXMMRegs_(int save); +extern void FreezeXMMRegs_(int save); void _flushCachedRegs(); void _flushConstRegs(); @@ -225,6 +225,18 @@ int _getNumXMMwrite(); #define FLUSH_FREE_ALLX86 128 // free all x86 regs #define FLUSH_FREE_VU0 0x100 // free all vu0 related regs +// Flushing vs. Freeing, as understood by Air (I could be wrong still....) + +// "Freeing" registers means that the contents of the registers are flushed to memory. +// This is good for any sort of C code function that plans to modify the actual +// registers. When the Recs resume, they'll reload the registers with values saved +// as needed. (similar to a "FreezeXMMRegs") + +// "Flushing" means that in addition to the standard free (which is actually a flush) +// the register allocations are additionally wiped. This should only be necessary if +// the code being called is going to modify register allocations -- ie, be doing +// some kind of recompiling of its own. + #define FLUSH_EVERYTHING 0xfff // no freeing, used when callee won't destroy mmx/xmm regs #define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86) @@ -404,7 +416,7 @@ void _recMove128MtoM(u32 to, u32 from); #define FPU_STATE 0 #define MMX_STATE 1 -void FreezeMMXRegs_(int save); +extern void FreezeMMXRegs_(int save); void SetFPUstate(); // max is 0x7f, when 0x80 is set, need to flush reg diff --git a/pcsx2/x86/iHw.c b/pcsx2/x86/iHw.c index db9ddea7a3..6899b014b4 100644 --- a/pcsx2/x86/iHw.c +++ b/pcsx2/x86/iHw.c @@ -468,7 +468,11 @@ static void recDmaExecI8(void (*name)(), u32 mem, int mmreg) static void recDmaExec8(void (*name)(), u32 mem, int mmreg) { - iFlushCall(0); + // Flushcall Note : DMA transfers are almost always "involved" operations + // that use memcpys and/or threading. Freeing all XMM and MMX regs is the + // best option. + + iFlushCall(FLUSH_NOCONST); if( IS_EECONSTREG(mmreg) ) { recDmaExecI8(name, mem, mmreg); } @@ -501,36 +505,45 @@ static void PrintDebug(u8 value) } } -#define CONSTWRITE_CALLTIMER(name, index, bit) { \ - if( !IS_EECONSTREG(mmreg) ) { \ - if( bit == 8 ) MOVZX32R8toR(mmreg&0xf, mmreg&0xf); \ - else if( bit == 16 ) MOVZX32R16toR(mmreg&0xf, mmreg&0xf); \ - } \ - _recPushReg(mmreg); \ - iFlushCall(0); \ - PUSH32I(index); \ - CALLFunc((uptr)name); \ - ADD32ItoR(ESP, 8); \ -} \ +// fixme: this would be more optimal as a C++ template (with bit as the template parameter) +static __forceinline void ConstWrite_ExecTimer( void (*name)(), u8 index, u8 bit, int mmreg) +{ + if( bit != 32 ) + { + if( !IS_EECONSTREG(mmreg) ) + { + if( bit == 8 ) MOVZX32R8toR(mmreg&0xf, mmreg&0xf); + else if( bit == 16 ) MOVZX32R16toR(mmreg&0xf, mmreg&0xf); + } + } + + // FlushCall Note : All counter functions are short and sweet, full flush not needed. + + _recPushReg(mmreg); + iFlushCall(0); + PUSH32I(index); + CALLFunc((uptr)name); + ADD32ItoR(ESP, 8); +} #define CONSTWRITE_TIMERS(bit) \ - case 0x10000000: CONSTWRITE_CALLTIMER(rcntWcount, 0, bit); break; \ - case 0x10000010: CONSTWRITE_CALLTIMER(rcntWmode, 0, bit); break; \ - case 0x10000020: CONSTWRITE_CALLTIMER(rcntWtarget, 0, bit); break; \ - case 0x10000030: CONSTWRITE_CALLTIMER(rcntWhold, 0, bit); break; \ + case 0x10000000: ConstWrite_ExecTimer(rcntWcount, 0, bit, mmreg); break; \ + case 0x10000010: ConstWrite_ExecTimer(rcntWmode, 0, bit, mmreg); break; \ + case 0x10000020: ConstWrite_ExecTimer(rcntWtarget, 0, bit, mmreg); break; \ + case 0x10000030: ConstWrite_ExecTimer(rcntWhold, 0, bit, mmreg); break; \ \ - case 0x10000800: CONSTWRITE_CALLTIMER(rcntWcount, 1, bit); break; \ - case 0x10000810: CONSTWRITE_CALLTIMER(rcntWmode, 1, bit); break; \ - case 0x10000820: CONSTWRITE_CALLTIMER(rcntWtarget, 1, bit); break; \ - case 0x10000830: CONSTWRITE_CALLTIMER(rcntWhold, 1, bit); break; \ + case 0x10000800: ConstWrite_ExecTimer(rcntWcount, 1, bit, mmreg); break; \ + case 0x10000810: ConstWrite_ExecTimer(rcntWmode, 1, bit, mmreg); break; \ + case 0x10000820: ConstWrite_ExecTimer(rcntWtarget, 1, bit, mmreg); break; \ + case 0x10000830: ConstWrite_ExecTimer(rcntWhold, 1, bit, mmreg); break; \ \ - case 0x10001000: CONSTWRITE_CALLTIMER(rcntWcount, 2, bit); break; \ - case 0x10001010: CONSTWRITE_CALLTIMER(rcntWmode, 2, bit); break; \ - case 0x10001020: CONSTWRITE_CALLTIMER(rcntWtarget, 2, bit); break; \ + case 0x10001000: ConstWrite_ExecTimer(rcntWcount, 2, bit, mmreg); break; \ + case 0x10001010: ConstWrite_ExecTimer(rcntWmode, 2, bit, mmreg); break; \ + case 0x10001020: ConstWrite_ExecTimer(rcntWtarget, 2, bit, mmreg); break; \ \ - case 0x10001800: CONSTWRITE_CALLTIMER(rcntWcount, 3, bit); break; \ - case 0x10001810: CONSTWRITE_CALLTIMER(rcntWmode, 3, bit); break; \ - case 0x10001820: CONSTWRITE_CALLTIMER(rcntWtarget, 3, bit); break; \ + case 0x10001800: ConstWrite_ExecTimer(rcntWcount, 3, bit, mmreg); break; \ + case 0x10001810: ConstWrite_ExecTimer(rcntWmode, 3, bit, mmreg); break; \ + case 0x10001820: ConstWrite_ExecTimer(rcntWtarget, 3, bit, mmreg); break; \ void hwConstWrite8(u32 mem, int mmreg) { @@ -638,6 +651,11 @@ void hwConstWrite8(u32 mem, int mmreg) } } +// Flushcall Note : DMA transfers are almost always "involved" operations +// that use memcpys and/or threading. Freeing all XMM and MMX regs is the +// best option (removes the need for FreezeXMMRegs()). But register +// allocation is such a mess right now that we can't do it (yet). + static void recDmaExecI16( void (*name)(), u32 mem, int mmreg ) { MOV16ItoM((uptr)&PS2MEM_HW[(mem) & 0xffff], g_cpuConstRegs[(mmreg>>16)&0x1f].UL[0]); @@ -652,6 +670,7 @@ static void recDmaExecI16( void (*name)(), u32 mem, int mmreg ) static void recDmaExec16(void (*name)(), u32 mem, int mmreg) { iFlushCall(0); + if( IS_EECONSTREG(mmreg) ) { recDmaExecI16(name, mem, mmreg); } @@ -674,7 +693,9 @@ static void recDmaExec16(void (*name)(), u32 mem, int mmreg) void hwConstWrite16(u32 mem, int mmreg) { switch(mem) { + CONSTWRITE_TIMERS(16) + case 0x10008000: // dma0 - vif0 recDmaExec16(dmaVIF0, mem, mmreg); break; @@ -804,11 +825,19 @@ static void recDmaExecI( void (*name)(), u32 mem, int mmreg ) static void recDmaExec( void (*name)(), u32 mem, int mmreg ) { + iFlushCall(0); + if( IS_EECONSTREG(mmreg) ) { recDmaExecI(name, mem, mmreg); } else { + + // fixme: This is a lot of code to be injecting into the recompiler + // for every DMA transfer. It might actually be more efficient to + // set this up as a C function call instead (depends on how often + // the register is written without actually starting a DMA xfer). + _eeMoveMMREGtoR(EAX, mmreg); TEST32ItoR(EAX, 0xffff0000); j8Ptr[6] = JNZ8(0); @@ -839,13 +868,6 @@ static void recDmaExec( void (*name)(), u32 mem, int mmreg ) } } -#define CONSTWRITE_CALLTIMER32(name, index, bit) { \ - _recPushReg(mmreg); \ - iFlushCall(0); \ - PUSH32I(index); \ - CALLFunc((uptr)name); \ - ADD32ItoR(ESP, 8); \ -} \ void hwConstWrite32(u32 mem, int mmreg) { @@ -874,23 +896,8 @@ void hwConstWrite32(u32 mem, int mmreg) } switch (mem) { - case 0x10000000: CONSTWRITE_CALLTIMER32(rcntWcount, 0, bit); break; - case 0x10000010: CONSTWRITE_CALLTIMER32(rcntWmode, 0, bit); break; - case 0x10000020: CONSTWRITE_CALLTIMER32(rcntWtarget, 0, bit); break; - case 0x10000030: CONSTWRITE_CALLTIMER32(rcntWhold, 0, bit); break; - - case 0x10000800: CONSTWRITE_CALLTIMER32(rcntWcount, 1, bit); break; - case 0x10000810: CONSTWRITE_CALLTIMER32(rcntWmode, 1, bit); break; - case 0x10000820: CONSTWRITE_CALLTIMER32(rcntWtarget, 1, bit); break; - case 0x10000830: CONSTWRITE_CALLTIMER32(rcntWhold, 1, bit); break; - - case 0x10001000: CONSTWRITE_CALLTIMER32(rcntWcount, 2, bit); break; - case 0x10001010: CONSTWRITE_CALLTIMER32(rcntWmode, 2, bit); break; - case 0x10001020: CONSTWRITE_CALLTIMER32(rcntWtarget, 2, bit); break; - - case 0x10001800: CONSTWRITE_CALLTIMER32(rcntWcount, 3, bit); break; - case 0x10001810: CONSTWRITE_CALLTIMER32(rcntWmode, 3, bit); break; - case 0x10001820: CONSTWRITE_CALLTIMER32(rcntWtarget, 3, bit); break; + + CONSTWRITE_TIMERS(32) case GIF_CTRL: @@ -980,8 +987,6 @@ void hwConstWrite32(u32 mem, int mmreg) XOR16RtoM((uptr)&PS2MEM_HW[0xe012], EAX); CALLFunc((uptr)cpuTestDMACInts); - - //x86SetJ8( j8Ptr[5] ); break; case 0x1000f000: // INTC_STAT @@ -1173,15 +1178,7 @@ void hwConstWrite64(u32 mem, int mmreg) SHR32ItoR(EAX, 16); XOR16RtoM((uptr)&PS2MEM_HW[0xe012], EAX); - - // cpuRegs.CP0.n.Status.val is checked by cpuTestDMACInts. - //MOV32MtoR(EAX, (uptr)&cpuRegs.CP0.n.Status.val); - //AND32ItoR(EAX, 0x10807); - //CMP32ItoR(EAX, 0x10801); - //j8Ptr[5] = JNE8(0); CALLFunc((uptr)cpuTestDMACInts); - - //x86SetJ8( j8Ptr[5] ); break; case 0x1000f590: // DMAC_ENABLEW @@ -1191,14 +1188,7 @@ void hwConstWrite64(u32 mem, int mmreg) case 0x1000f000: // INTC_STAT _eeWriteConstMem32OP((uptr)&PS2MEM_HW[mem&0xffff], mmreg, 2); - // note: cpuRegs.CP0.n.Status.val conditional is done by cpuTestINTCInts. - //MOV32MtoR(EAX, (uptr)&cpuRegs.CP0.n.Status.val); - //AND32ItoR(EAX, 0x10407); - //CMP32ItoR(EAX, 0x10401); - //j8Ptr[5] = JNE8(0); CALLFunc((uptr)cpuTestINTCInts); - - //x86SetJ8( j8Ptr[5] ); break; case 0x1000f010: // INTC_MASK @@ -1207,16 +1197,7 @@ void hwConstWrite64(u32 mem, int mmreg) iFlushCall(0); XOR16RtoM((uptr)&PS2MEM_HW[0xf010], EAX); - - // note: cpuRegs.CP0.n.Status.val conditional is done by cpuTestINTCInts. - //MOV32MtoR(EAX, (uptr)&cpuRegs.CP0.n.Status.val); - //AND32ItoR(EAX, 0x10407); - //CMP32ItoR(EAX, 0x10401); - //j8Ptr[5] = JNE8(0); - CALLFunc((uptr)cpuTestINTCInts); - - //x86SetJ8( j8Ptr[5] ); - + CALLFunc((uptr)cpuTestINTCInts); break; case 0x1000f130: diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 184f658644..425b9a2289 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -768,16 +768,17 @@ void _freeMMXregs() } } -void FreezeMMXRegs_(int save) +__forceinline void FreezeMMXRegs_(int save) { assert( g_EEFreezeRegs ); if( save ) { - if( g_globalMMXSaved ){ + g_globalMMXSaved++; + if( g_globalMMXSaved>1 ) + { //SysPrintf("MMX Already Saved!\n"); return; - } - g_globalMMXSaved = 1; + } #ifdef _MSC_VER __asm { @@ -807,11 +808,14 @@ void FreezeMMXRegs_(int save) } else { - if( !g_globalMMXSaved ){ + if( g_globalMMXSaved==0 ) + { //SysPrintf("MMX Not Saved!\n"); return; - } - g_globalMMXSaved = 0; + } + g_globalMMXSaved--; + + if( g_globalMMXSaved > 0 ) return; #ifdef _MSC_VER __asm { diff --git a/pcsx2/x86/ix86-32/iR5900-32.c b/pcsx2/x86/ix86-32/iR5900-32.c index bb62394ce5..b4ec630fb5 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.c +++ b/pcsx2/x86/ix86-32/iR5900-32.c @@ -2283,7 +2283,7 @@ static void iBranchTest(u32 newpc, u32 cpuBranch) if( newpc != 0xffffffff ) { CMP32ItoM((uptr)&cpuRegs.pc, newpc); - JNE32((u32)DispatcherReg - ( (u32)x86Ptr + 6 )); + JNE32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 )); } x86SetJ8( j8Ptr[0] ); @@ -2315,7 +2315,7 @@ void recSYSCALL( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); - JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); + JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //branch = 2; } @@ -2329,7 +2329,7 @@ void recBREAK( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); - ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); + ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); RET(); x86SetJ8(j8Ptr[0]); //branch = 2; @@ -2365,16 +2365,16 @@ void recMFSA( void ) mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); if( mmreg >= 0 ) { - SSE_MOVLPS_M64_to_XMM(mmreg, (u32)&cpuRegs.sa); + SSE_MOVLPS_M64_to_XMM(mmreg, (uptr)&cpuRegs.sa); } else if( (mmreg = _checkMMXreg(MMX_GPR+_Rd_, MODE_WRITE)) >= 0 ) { - MOVDMtoMMX(mmreg, (u32)&cpuRegs.sa); + MOVDMtoMMX(mmreg, (uptr)&cpuRegs.sa); SetMMXstate(); } else { MOV32MtoR(EAX, (u32)&cpuRegs.sa); _deleteEEreg(_Rd_, 0); - MOV32RtoM((u32)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); + MOV32RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); MOV32ItoM((uptr)&cpuRegs.GPR.r[_Rd_].UL[1], 0); } } @@ -2388,15 +2388,15 @@ void recMTSA( void ) int mmreg; if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { - SSE_MOVSS_XMM_to_M32((u32)&cpuRegs.sa, mmreg); + SSE_MOVSS_XMM_to_M32((uptr)&cpuRegs.sa, mmreg); } else if( (mmreg = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ)) >= 0 ) { - MOVDMMXtoM((u32)&cpuRegs.sa, mmreg); + MOVDMMXtoM((uptr)&cpuRegs.sa, mmreg); SetMMXstate(); } else { - MOV32MtoR(EAX, (u32)&cpuRegs.GPR.r[_Rs_].UL[0]); - MOV32RtoM((u32)&cpuRegs.sa, EAX); + MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UL[0]); + MOV32RtoM((uptr)&cpuRegs.sa, EAX); } } } @@ -2411,7 +2411,7 @@ void recMTSAB( void ) AND32ItoR(EAX, 0xF); XOR32ItoR(EAX, _Imm_&0xf); SHL32ItoR(EAX, 3); - MOV32RtoM((u32)&cpuRegs.sa, EAX); + MOV32RtoM((uptr)&cpuRegs.sa, EAX); } } @@ -2425,7 +2425,7 @@ void recMTSAH( void ) AND32ItoR(EAX, 0x7); XOR32ItoR(EAX, _Imm_&0x7); SHL32ItoR(EAX, 4); - MOV32RtoM((u32)&cpuRegs.sa, EAX); + MOV32RtoM((uptr)&cpuRegs.sa, EAX); } } @@ -2496,7 +2496,7 @@ void recompileNextInstruction(int delayslot) // return; // } - JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5)); + JMP32((uptr)pblock->pFnptr - ((uptr)x86Ptr + 5)); branch = 3; return; } @@ -2895,7 +2895,7 @@ void recRecompile( u32 startpc ) MOV32ItoM((uptr)&g_lastpc, pc); CALLFunc((uptr)printfn); -// CMP32MtoR(EBP, (u32)&s_uSaveEBP); +// CMP32MtoR(EBP, (uptr)&s_uSaveEBP); // j8Ptr[0] = JE8(0); // CALLFunc((uptr)badespfn); // x86SetJ8(j8Ptr[0]); @@ -3151,7 +3151,7 @@ StartRecomp: // s_startcount = 0; // if( pc+32 < s_nEndBlock ) { // // only blocks with more than 8 insts -// //PUSH32I((u32)&lbase); +// //PUSH32I((uptr)&lbase); // //CALLFunc((uptr)QueryPerformanceCounter); // lbase.QuadPart = GetCPUTick(); // s_startcount = 1; @@ -3217,7 +3217,7 @@ StartRecomp: iBranchTest(0xffffffff, 1); if( bExecBIOS ) CheckForBIOSEnd(); - JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); + JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 )); } else { assert( branch != 3 ); @@ -3229,7 +3229,7 @@ StartRecomp: assert( pc == s_nEndBlock ); iFlushCall(FLUSH_EVERYTHING); MOV32ItoM((uptr)&cpuRegs.pc, pc); - JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5)); + JMP32((uptr)pblock->pFnptr - ((uptr)x86Ptr + 5)); branch = 3; } else if( !branch ) {