Mostly added comments to some code. Also added a "safety" set of FreezeXMMRegs calls to gs.cpp, in a spot of code that should always be called from a frozen register state anyway (but better to be safe).

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@425 a6443dda-0b58-4228-96e9-037be469359c
2008-12-13 03:21:31 +00:00 · 2008-12-13 03:21:31 +00:00 · 5a317846a5
parent be9d0ea954
commit 5a317846a5
7 changed files with 120 additions and 51 deletions
--- a/pcsx2/CDVD.c
+++ b/pcsx2/CDVD.c
@ -672,11 +672,26 @@ void cdvdReset()

 }

+// CDVD Timing Notes...
+// As of recent IOP sync fixes, CDVD timings seem to be fairly unimportant
+// to compatibility now.  Tests have shown that setting CDVD read speeds to
+// insanely low values (equating to unrealistically fast DVDdrive speeds)
+// don't break any games, nor do higher/lower speeds cause/fix IPU sync
+// problems anymore either.  Games do tend to issue a lot of CDVD BREAKs
+// when the CDVD latency is very low, which are something the emulator can
+// safely ignore anyway.
+
+// Notable Exception : DigitalDevilSaga PAL, in which certain movies do not
+// play unless the CDVD read speed is *insanely* high (several thousand 
+// cycles per block).  This probably has nothing to do with the CDVD though,
+// and is likely caused by some other emulation problem that just happens
+// to be "masked over" by slowing down the CDVD.
+
 #define PSX_CD_READSPEED (PSXCLK / 153600) // 1 Byte Time @ x1 (150KB = cd x 1)
-#define PSX_DVD_READSPEED (PSXCLK /1382400) // normal is 1 Byte Time @ x1 (1350KB = dvd x 1)
+#define PSX_DVD_READSPEED (PSXCLK / 1382400) // normal is 1 Byte Time @ x1 (1350KB = dvd x 1)

 void cdvdReadTimeRcnt(int mode) // Mode 0 is DVD, Mode 1 is CD
-{	
+{
 	if (cdvd.Sector == 16) //DVD TOC
 		cdvdReadTime = 30000; //simulates spin-up time, fixes hdloader
 	else
--- a/pcsx2/GS.cpp
+++ b/pcsx2/GS.cpp
@ -432,16 +432,20 @@ s32 gsOpen()

 void GS_SETEVENT()
 {
+	// Win32 Kernel calls can corrupt the XMM/MMX registers.
+	// Callers should always make sure those registers are frozen:
+	assert( !g_EEFreezeRegs || (g_globalXMMSaved > 0) );
+
 	event_set(g_hGsEvent);
 	m_mtgsCopyCommandTally = 0;
 }

-__forceinline void gsWaitGS()
+void gsWaitGS()
 {
 	if( !CHECK_MULTIGS ) return;

 	// Freeze registers because some kernel code likes to destroy them
-	FreezeXMMRegs(1); 
+	FreezeXMMRegs(1);
 	FreezeMMXRegs(1);
 	GS_SETEVENT();
 	while( *(volatile PU8*)&g_pGSRingPos != *(volatile PU8*)&g_pGSWritePos )
@ -511,8 +515,13 @@ void GSRINGBUF_DONECOPY(const u8* mem, u32 size)
 	//  24 - very slow on HT machines (+5% drop in fps)
 	//  8 - roughly 2% slower on HT machines.

+	FreezeXMMRegs(1); 
+	FreezeMMXRegs(1);
 	if( ++m_mtgsCopyCommandTally > 16 )
 		GS_SETEVENT();
+	FreezeXMMRegs(0); 
+	FreezeMMXRegs(0);
+
 }

 void gsShutdown()
@ -1830,6 +1839,9 @@ extern "C" void gsPostVsyncEnd()
 		//SysPrintf( " Sending VSync : %d \n", g_pGSvSyncCount );
 #endif
 		GSRingBufSimplePacket(GS_RINGTYPE_VSYNC, (*(u32*)(PS2MEM_GS+0x1000)&0x2000), 0, 0);
+
+		// No need to freeze MMX/XMM registers here since this
+		// code is always called from the context of a BranchTest.
 		GS_SETEVENT();
 	}
 	else
--- a/pcsx2/IPU/IPU.c
+++ b/pcsx2/IPU/IPU.c
@ -27,15 +27,22 @@
 #include "iR5900.h"
 #include "coroutine.h"

-// IPU Speedhack : Calls the IPU interrupt handlers directly instead of feeding
-// them through the EE's branch test.  Not tested extensively yet.
-#ifdef USE_IPU_SPEEDHACK
-#	define IPU_TO_INT( val )  ipu1Interrupt()
-#	define IPU_FROM_INT( val )  ipu0Interrupt()
+// Zero cycle IRQ schedules aren't really good, but the IPU uses them.
+// Better to throw the IRQ inline:
+
+#define IPU_INT0_FROM()  ipu0Interrupt()
+//#define IPU_INT0_FROM()  CPU_INT( DMAC_FROM_IPU, 0 )
+
+// IPU Inline'd IRQs : Calls the IPU interrupt handlers directly instead of
+// feeding them through the EE's branch test. (see IPU.H for details)
+
+#ifdef IPU_INLINE_IRQS
+#	define IPU_INT_TO( cycles )  ipu1Interrupt()
+#	define IPU_INT_FROM( cycles )  ipu0Interrupt()
 #	define IPU_FORCEINLINE
 #else
-#	define IPU_TO_INT( val )  CPU_INT( DMAC_TO_IPU, val )
-#	define IPU_FROM_INT( val )  CPU_INT( DMAC_FROM_IPU, val )
+#	define IPU_INT_TO( cycles )  CPU_INT( DMAC_TO_IPU, cycles )
+#	define IPU_INT_FROM( cycles )  CPU_INT( DMAC_FROM_IPU, cycles )
 #	define IPU_FORCEINLINE __forceinline
 #endif

@ -1028,7 +1035,7 @@ void IPUCMD_WRITE(u32 val) {
 			
 			if( ipuCSC(ipuRegs->cmd.DATA) ) {
 				if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) 
-					IPU_FROM_INT(0);
+					IPU_INT0_FROM();
 				return;
 			}

@ -1048,7 +1055,7 @@ void IPUCMD_WRITE(u32 val) {
 			if( ipuIDEC(val) ) {
 				// idec done, ipu0 done too
 				if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) 
-					IPU_FROM_INT(0);
+					IPU_INT0_FROM();
 				return;
 			}

@ -1062,7 +1069,7 @@ void IPUCMD_WRITE(u32 val) {
 		case SCE_IPU_BDEC:
 			if( ipuBDEC(val)) {
 				if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) 
-					IPU_FROM_INT(0);
+					IPU_INT0_FROM();
 				if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD)
 					hwIntcIrq(INTC_IPU);

@ -1135,7 +1142,7 @@ void IPUWorker()
 			}

 			if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) 
-				IPU_FROM_INT(0);
+				IPU_INT0_FROM();
 			break;		
 		case SCE_IPU_PACK:
 			if( !ipuPACK(ipuRegs->cmd.DATA) )
@ -1160,7 +1167,7 @@ void IPUWorker()
 			ipuCurCmd = 0xffffffff;
 			// CHECK!: IPU0dma remains when IDEC is done, so we need to clear it
 			if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100))
-                IPU_FROM_INT(0);
+                IPU_INT0_FROM();

 			s_routine = NULL;
 			break;
@ -1177,7 +1184,7 @@ void IPUWorker()
 			ipuRegs->cmd.BUSY = 0;
 			ipuCurCmd = 0xffffffff;
 			if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100))
-				IPU_FROM_INT(0);
+				IPU_INT0_FROM();
 			s_routine = NULL;
 			if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD)
 				hwIntcIrq(INTC_IPU);
@ -1642,7 +1649,7 @@ int IPU1dma()
 		if ((ipu1dma->chcr & 0x80) && (g_nDMATransfer&IPU_DMA_DOTIE1)) {			 //Check TIE bit of CHCR and IRQ bit of tag
 			SysPrintf("IPU1 TIE\n");

-			IPU_TO_INT(totalqwc*BIAS);
+			IPU_INT_TO(totalqwc*BIAS);
 			g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1);
 			g_nDMATransfer |= IPU_DMA_TIE1;
 			return totalqwc;
@ -1651,7 +1658,7 @@ int IPU1dma()
 		g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1);

 		if( (ipu1dma->chcr&0xc) == 0 ) {
-			IPU_TO_INT(totalqwc*BIAS);
+			IPU_INT_TO(totalqwc*BIAS);
 			return totalqwc;
 		}
 		else {
@ -1667,7 +1674,7 @@ int IPU1dma()

 				ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 );
 				IPU_LOG("IPU dmaIrq Set\n"); 
-				IPU_TO_INT(totalqwc*BIAS);
+				IPU_INT_TO(totalqwc*BIAS);
 				g_nDMATransfer |= IPU_DMA_TIE1;
 				return totalqwc;
 			}
@ -1676,12 +1683,12 @@ int IPU1dma()
 			{
 			case 0x00000000:
 				ipu1dma->tadr += 16;
-				IPU_TO_INT((1+totalqwc)*BIAS);
+				IPU_INT_TO((1+totalqwc)*BIAS);
 				return totalqwc;

 			case 0x70000000:
 				ipu1dma->tadr = ipu1dma->madr;
-				IPU_TO_INT((1+totalqwc)*BIAS);
+				IPU_INT_TO((1+totalqwc)*BIAS);
 				return totalqwc;
 			}
 		}
@ -1698,7 +1705,7 @@ int IPU1dma()
 		IPU_LOG("dmaIPU1 Normal size=%d, addr=%lx, fifosize=%x\n",
 			ipu1dma->qwc, ipu1dma->madr, 8 - g_BP.IFC);
 		IPU1chain();
-		IPU_TO_INT((ipu1cycles+totalqwc)*BIAS);
+		IPU_INT_TO((ipu1cycles+totalqwc)*BIAS);
 		return totalqwc;
 	}
 	else 
@ -1780,35 +1787,31 @@ int IPU1dma()
 					ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 );
 				}

-				IPU_TO_INT(ipu1cycles+totalqwc*BIAS);
+				IPU_INT_TO(ipu1cycles+totalqwc*BIAS);
 				g_nDMATransfer |= IPU_DMA_TIE1;
 				return totalqwc;
 			}
-		//}

-			if(ipu1dma->qwc == 0){
-		switch( ptag[0]&0x70000000 )
+		if(ipu1dma->qwc == 0)
 		{
-		case 0x00000000:
-			ipu1dma->tadr += 16;
-			IPU_TO_INT((ipu1cycles+totalqwc)*BIAS);
-			return totalqwc;
-
-		case 0x70000000:
-			ipu1dma->tadr = ipu1dma->madr;
-			IPU_TO_INT((ipu1cycles+totalqwc)*BIAS);
-			return totalqwc;
-		}
+			switch( ptag[0]&0x70000000 )
+			{
+				case 0x00000000:
+					ipu1dma->tadr += 16;
+				break;
+				
+				case 0x70000000:
+					ipu1dma->tadr = ipu1dma->madr;
+				break;
 			}
+		}
 	}

-	IPU_TO_INT((ipu1cycles+totalqwc)*BIAS);
+	IPU_INT_TO((ipu1cycles+totalqwc)*BIAS);
 	return totalqwc;
 }


-
-
 int FIFOfrom_write(u32 *value,int size)
 {
 	int transsize;
@ -1910,7 +1913,7 @@ int IPU0dma()
 					break;
 			}
 		}
-		IPU_FROM_INT(readsize*BIAS);
+		IPU_INT_FROM( readsize*BIAS );
 	}

 	return readsize;
@ -1932,7 +1935,7 @@ void dmaIPU1() // toIPU

 extern void GIFdma();

-IPU_FORCEINLINE void ipu0Interrupt() {
+void ipu0Interrupt() {
 	IPU_LOG("ipu0Interrupt: %x\n", cpuRegs.cycle);

 	if( g_nDMATransfer & IPU_DMA_FIREINT0 ) {
--- a/pcsx2/IPU/IPU.h
+++ b/pcsx2/IPU/IPU.h
@ -21,12 +21,20 @@

 #include "Common.h"

-// IPU Speed Hack!
-// By uncommenting the define below, the IPU can get a pretty nice speed boost,
-// of about 10%.  Additionally, the ipuInterrupt handlers in the cpuBranchTest
-// are also be removed, which helps reduce branch test overhead in *all* games.
+// IPU_INLINE_IRQS
+// Scheduling ints into the future is a purist approach to emulation, and
+// is mostly cosmetic since the emulator itself performs all actions instantly
+// (as far as the emulated CPU is concerned).  In some cases this can actually
+// cause more sync problems than it supposedly solves, due to accumulated
+// delays incurred by the recompiler's low cycle update rate and also Pcsx2
+// failing to properly handle pre-emptive DMA/IRQs or cpu exceptions.

-//#define USE_IPU_SPEEDHACK
+// Uncomment the following line to enable inline IRQs for the IPU.  Tests show
+// that it doesn't have any effect on compatibility or audio/video sync, and it
+// speeds up movie playback by some 6-8%. But it lacks the purist touch, so it's
+// not enabled by default.
+
+//#define IPU_INLINE_IRQS


 #ifdef _MSC_VER
--- a/pcsx2/R3000A.c
+++ b/pcsx2/R3000A.c
@ -32,7 +32,7 @@ u32 g_psxHasConstReg, g_psxFlushedConstReg;
 // Controls when branch tests are performed.
 u32 g_psxNextBranchCycle = 0;

-// This value is used when the IOP execution is broken to return contorl to the EE.
+// This value is used when the IOP execution is broken to return control to the EE.
 // (which happens when the IOP throws EE-bound interrupts).  It holds the value of
 // psxCycleEE (which is set to zero to facilitate the code break), so that the unrun
 // cycles can be accounted for later.
@ -42,6 +42,9 @@ s32 psxBreak = 0;
 // control is returned to the EE.
 s32 psxCycleEE = -1;

+// Used to signal to the EE when important actions that need IOP-attention have
+// happened (hsyncs, vsyncs, IOP exceptions, etc).  IOP runs code whenever this
+// is true, even if it's already running ahead a bit.
 int iopBranchAction = 0;


--- a/pcsx2/Sio.c
+++ b/pcsx2/Sio.c
@ -34,7 +34,17 @@ FILE * MemoryCard1, * MemoryCard2;
 const unsigned char cardh[4] = { 0xFF, 0xFF, 0x5a, 0x5d };
 // Memory Card Specs : Sector size etc.
 struct mc_command_0x26_tag mc_command_0x26= {'+', 512, 16, 0x4000, 0x52, 0x5A};
-#define SIO_INT() PSX_INT(16, PSXCLK/250000); /*270;*/
+
+// SIO Inline'd IRQs : Calls the SIO interrupt handlers directly instead of
+// feeding them through the IOP's branch test. (see SIO.H for details)
+
+#ifdef SIO_INLINE_IRQS
+#define SIO_INT() sioInterrupt()
+#define SIO_FORCEINLINE
+#else
+#define SIO_INT() PSX_INT(16, PSXCLK/250000);
+#define SIO_FORCEINLINE __forceinline
+#endif

 void _ReadMcd(char *data, u32 adr, int size) {
 	ReadMcd(sio.CtrlReg&0x2000?2:1, data, adr, size);
@ -348,7 +358,7 @@ void SIO_CommandWrite(u8 value,int way) {
 					_SaveMcd(sio.buf, (512+16)*sio.sector+256, 256);
 					_SaveMcd(sio.buf, (512+16)*sio.sector+512, 16);
 					sio.buf[2]='+';
-				*/	sio.buf[3]=sio.terminator;
+					sio.buf[3]=sio.terminator;*/
 					//sio.buf[sio.bufcount] = sio.terminator;
 				MEMCARDS_LOG("MC(%d) INTERNAL ERASE command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
 				}
@ -497,7 +507,7 @@ void sioWriteCtrl16(unsigned short value) {
 	}
 }

-__forceinline void  sioInterrupt() {
+void  sioInterrupt() {
 	PAD_LOG("Sio Interrupt\n");
 	sio.StatReg|= IRQ;
 	psxHu32(0x1070)|=0x80;
--- a/pcsx2/Sio.h
+++ b/pcsx2/Sio.h
@ -20,6 +20,24 @@
 #ifndef _SIO_H_
 #define _SIO_H_

+// SIO IRQ Timings...
+// Scheduling ints into the future is a purist approach to emulation, and
+// is mostly cosmetic since the emulator itself performs all actions instantly
+// (as far as the emulated CPU is concerned).  In some cases this can actually
+// cause more sync problems than it supposedly solves, due to accumulated
+// delays incurred by the recompiler's low cycle update rate and also Pcsx2
+// failing to properly handle pre-emptive DMA/IRQs or cpu exceptions.
+
+// The SIO is one of these cases, where-by many games seem to be a lot happier
+// if the SIO handles its IRQs instantly instead of scheduling them.
+// Uncomment the line below for SIO instant-IRQ mode.  It improves responsiveness 
+// considerably, fixes PAD latency problems in some games, and may even reduce the
+// chance of saves getting corrupted (untested).  But it lacks the purist touch,
+// so it's not enabled by default.
+
+//#define SIO_INLINE_IRQS
+
+
 typedef struct {
 	u16 StatReg;
 	u16 ModeReg;