From 5a317846a5e85b4c78e72f73c582211205bd2b15 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sat, 13 Dec 2008 03:21:31 +0000 Subject: [PATCH] Mostly added comments to some code. Also added a "safety" set of FreezeXMMRegs calls to gs.cpp, in a spot of code that should always be called from a frozen register state anyway (but better to be safe). git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@425 a6443dda-0b58-4228-96e9-037be469359c --- pcsx2/CDVD.c | 19 ++++++++++-- pcsx2/GS.cpp | 16 ++++++++-- pcsx2/IPU/IPU.c | 79 +++++++++++++++++++++++++------------------------ pcsx2/IPU/IPU.h | 18 +++++++---- pcsx2/R3000A.c | 5 +++- pcsx2/Sio.c | 16 ++++++++-- pcsx2/Sio.h | 18 +++++++++++ 7 files changed, 120 insertions(+), 51 deletions(-) diff --git a/pcsx2/CDVD.c b/pcsx2/CDVD.c index 68314273a9..7800331499 100644 --- a/pcsx2/CDVD.c +++ b/pcsx2/CDVD.c @@ -672,11 +672,26 @@ void cdvdReset() } +// CDVD Timing Notes... +// As of recent IOP sync fixes, CDVD timings seem to be fairly unimportant +// to compatibility now. Tests have shown that setting CDVD read speeds to +// insanely low values (equating to unrealistically fast DVDdrive speeds) +// don't break any games, nor do higher/lower speeds cause/fix IPU sync +// problems anymore either. Games do tend to issue a lot of CDVD BREAKs +// when the CDVD latency is very low, which are something the emulator can +// safely ignore anyway. + +// Notable Exception : DigitalDevilSaga PAL, in which certain movies do not +// play unless the CDVD read speed is *insanely* high (several thousand +// cycles per block). This probably has nothing to do with the CDVD though, +// and is likely caused by some other emulation problem that just happens +// to be "masked over" by slowing down the CDVD. + #define PSX_CD_READSPEED (PSXCLK / 153600) // 1 Byte Time @ x1 (150KB = cd x 1) -#define PSX_DVD_READSPEED (PSXCLK /1382400) // normal is 1 Byte Time @ x1 (1350KB = dvd x 1) +#define PSX_DVD_READSPEED (PSXCLK / 1382400) // normal is 1 Byte Time @ x1 (1350KB = dvd x 1) void cdvdReadTimeRcnt(int mode) // Mode 0 is DVD, Mode 1 is CD -{ +{ if (cdvd.Sector == 16) //DVD TOC cdvdReadTime = 30000; //simulates spin-up time, fixes hdloader else diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index 8e856437bc..fc74c819eb 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -432,16 +432,20 @@ s32 gsOpen() void GS_SETEVENT() { + // Win32 Kernel calls can corrupt the XMM/MMX registers. + // Callers should always make sure those registers are frozen: + assert( !g_EEFreezeRegs || (g_globalXMMSaved > 0) ); + event_set(g_hGsEvent); m_mtgsCopyCommandTally = 0; } -__forceinline void gsWaitGS() +void gsWaitGS() { if( !CHECK_MULTIGS ) return; // Freeze registers because some kernel code likes to destroy them - FreezeXMMRegs(1); + FreezeXMMRegs(1); FreezeMMXRegs(1); GS_SETEVENT(); while( *(volatile PU8*)&g_pGSRingPos != *(volatile PU8*)&g_pGSWritePos ) @@ -511,8 +515,13 @@ void GSRINGBUF_DONECOPY(const u8* mem, u32 size) // 24 - very slow on HT machines (+5% drop in fps) // 8 - roughly 2% slower on HT machines. + FreezeXMMRegs(1); + FreezeMMXRegs(1); if( ++m_mtgsCopyCommandTally > 16 ) GS_SETEVENT(); + FreezeXMMRegs(0); + FreezeMMXRegs(0); + } void gsShutdown() @@ -1830,6 +1839,9 @@ extern "C" void gsPostVsyncEnd() //SysPrintf( " Sending VSync : %d \n", g_pGSvSyncCount ); #endif GSRingBufSimplePacket(GS_RINGTYPE_VSYNC, (*(u32*)(PS2MEM_GS+0x1000)&0x2000), 0, 0); + + // No need to freeze MMX/XMM registers here since this + // code is always called from the context of a BranchTest. GS_SETEVENT(); } else diff --git a/pcsx2/IPU/IPU.c b/pcsx2/IPU/IPU.c index adabc3c3e8..170ce5836f 100644 --- a/pcsx2/IPU/IPU.c +++ b/pcsx2/IPU/IPU.c @@ -27,15 +27,22 @@ #include "iR5900.h" #include "coroutine.h" -// IPU Speedhack : Calls the IPU interrupt handlers directly instead of feeding -// them through the EE's branch test. Not tested extensively yet. -#ifdef USE_IPU_SPEEDHACK -# define IPU_TO_INT( val ) ipu1Interrupt() -# define IPU_FROM_INT( val ) ipu0Interrupt() +// Zero cycle IRQ schedules aren't really good, but the IPU uses them. +// Better to throw the IRQ inline: + +#define IPU_INT0_FROM() ipu0Interrupt() +//#define IPU_INT0_FROM() CPU_INT( DMAC_FROM_IPU, 0 ) + +// IPU Inline'd IRQs : Calls the IPU interrupt handlers directly instead of +// feeding them through the EE's branch test. (see IPU.H for details) + +#ifdef IPU_INLINE_IRQS +# define IPU_INT_TO( cycles ) ipu1Interrupt() +# define IPU_INT_FROM( cycles ) ipu0Interrupt() # define IPU_FORCEINLINE #else -# define IPU_TO_INT( val ) CPU_INT( DMAC_TO_IPU, val ) -# define IPU_FROM_INT( val ) CPU_INT( DMAC_FROM_IPU, val ) +# define IPU_INT_TO( cycles ) CPU_INT( DMAC_TO_IPU, cycles ) +# define IPU_INT_FROM( cycles ) CPU_INT( DMAC_FROM_IPU, cycles ) # define IPU_FORCEINLINE __forceinline #endif @@ -1028,7 +1035,7 @@ void IPUCMD_WRITE(u32 val) { if( ipuCSC(ipuRegs->cmd.DATA) ) { if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) - IPU_FROM_INT(0); + IPU_INT0_FROM(); return; } @@ -1048,7 +1055,7 @@ void IPUCMD_WRITE(u32 val) { if( ipuIDEC(val) ) { // idec done, ipu0 done too if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) - IPU_FROM_INT(0); + IPU_INT0_FROM(); return; } @@ -1062,7 +1069,7 @@ void IPUCMD_WRITE(u32 val) { case SCE_IPU_BDEC: if( ipuBDEC(val)) { if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) - IPU_FROM_INT(0); + IPU_INT0_FROM(); if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU); @@ -1135,7 +1142,7 @@ void IPUWorker() } if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) - IPU_FROM_INT(0); + IPU_INT0_FROM(); break; case SCE_IPU_PACK: if( !ipuPACK(ipuRegs->cmd.DATA) ) @@ -1160,7 +1167,7 @@ void IPUWorker() ipuCurCmd = 0xffffffff; // CHECK!: IPU0dma remains when IDEC is done, so we need to clear it if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) - IPU_FROM_INT(0); + IPU_INT0_FROM(); s_routine = NULL; break; @@ -1177,7 +1184,7 @@ void IPUWorker() ipuRegs->cmd.BUSY = 0; ipuCurCmd = 0xffffffff; if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) - IPU_FROM_INT(0); + IPU_INT0_FROM(); s_routine = NULL; if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU); @@ -1642,7 +1649,7 @@ int IPU1dma() if ((ipu1dma->chcr & 0x80) && (g_nDMATransfer&IPU_DMA_DOTIE1)) { //Check TIE bit of CHCR and IRQ bit of tag SysPrintf("IPU1 TIE\n"); - IPU_TO_INT(totalqwc*BIAS); + IPU_INT_TO(totalqwc*BIAS); g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1); g_nDMATransfer |= IPU_DMA_TIE1; return totalqwc; @@ -1651,7 +1658,7 @@ int IPU1dma() g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1); if( (ipu1dma->chcr&0xc) == 0 ) { - IPU_TO_INT(totalqwc*BIAS); + IPU_INT_TO(totalqwc*BIAS); return totalqwc; } else { @@ -1667,7 +1674,7 @@ int IPU1dma() ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 ); IPU_LOG("IPU dmaIrq Set\n"); - IPU_TO_INT(totalqwc*BIAS); + IPU_INT_TO(totalqwc*BIAS); g_nDMATransfer |= IPU_DMA_TIE1; return totalqwc; } @@ -1676,12 +1683,12 @@ int IPU1dma() { case 0x00000000: ipu1dma->tadr += 16; - IPU_TO_INT((1+totalqwc)*BIAS); + IPU_INT_TO((1+totalqwc)*BIAS); return totalqwc; case 0x70000000: ipu1dma->tadr = ipu1dma->madr; - IPU_TO_INT((1+totalqwc)*BIAS); + IPU_INT_TO((1+totalqwc)*BIAS); return totalqwc; } } @@ -1698,7 +1705,7 @@ int IPU1dma() IPU_LOG("dmaIPU1 Normal size=%d, addr=%lx, fifosize=%x\n", ipu1dma->qwc, ipu1dma->madr, 8 - g_BP.IFC); IPU1chain(); - IPU_TO_INT((ipu1cycles+totalqwc)*BIAS); + IPU_INT_TO((ipu1cycles+totalqwc)*BIAS); return totalqwc; } else @@ -1780,35 +1787,31 @@ int IPU1dma() ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 ); } - IPU_TO_INT(ipu1cycles+totalqwc*BIAS); + IPU_INT_TO(ipu1cycles+totalqwc*BIAS); g_nDMATransfer |= IPU_DMA_TIE1; return totalqwc; } - //} - if(ipu1dma->qwc == 0){ - switch( ptag[0]&0x70000000 ) + if(ipu1dma->qwc == 0) { - case 0x00000000: - ipu1dma->tadr += 16; - IPU_TO_INT((ipu1cycles+totalqwc)*BIAS); - return totalqwc; - - case 0x70000000: - ipu1dma->tadr = ipu1dma->madr; - IPU_TO_INT((ipu1cycles+totalqwc)*BIAS); - return totalqwc; - } + switch( ptag[0]&0x70000000 ) + { + case 0x00000000: + ipu1dma->tadr += 16; + break; + + case 0x70000000: + ipu1dma->tadr = ipu1dma->madr; + break; } + } } - IPU_TO_INT((ipu1cycles+totalqwc)*BIAS); + IPU_INT_TO((ipu1cycles+totalqwc)*BIAS); return totalqwc; } - - int FIFOfrom_write(u32 *value,int size) { int transsize; @@ -1910,7 +1913,7 @@ int IPU0dma() break; } } - IPU_FROM_INT(readsize*BIAS); + IPU_INT_FROM( readsize*BIAS ); } return readsize; @@ -1932,7 +1935,7 @@ void dmaIPU1() // toIPU extern void GIFdma(); -IPU_FORCEINLINE void ipu0Interrupt() { +void ipu0Interrupt() { IPU_LOG("ipu0Interrupt: %x\n", cpuRegs.cycle); if( g_nDMATransfer & IPU_DMA_FIREINT0 ) { diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index 82658e37f9..b0a18af5c7 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -21,12 +21,20 @@ #include "Common.h" -// IPU Speed Hack! -// By uncommenting the define below, the IPU can get a pretty nice speed boost, -// of about 10%. Additionally, the ipuInterrupt handlers in the cpuBranchTest -// are also be removed, which helps reduce branch test overhead in *all* games. +// IPU_INLINE_IRQS +// Scheduling ints into the future is a purist approach to emulation, and +// is mostly cosmetic since the emulator itself performs all actions instantly +// (as far as the emulated CPU is concerned). In some cases this can actually +// cause more sync problems than it supposedly solves, due to accumulated +// delays incurred by the recompiler's low cycle update rate and also Pcsx2 +// failing to properly handle pre-emptive DMA/IRQs or cpu exceptions. -//#define USE_IPU_SPEEDHACK +// Uncomment the following line to enable inline IRQs for the IPU. Tests show +// that it doesn't have any effect on compatibility or audio/video sync, and it +// speeds up movie playback by some 6-8%. But it lacks the purist touch, so it's +// not enabled by default. + +//#define IPU_INLINE_IRQS #ifdef _MSC_VER diff --git a/pcsx2/R3000A.c b/pcsx2/R3000A.c index 5693adaa4c..96796c1baf 100644 --- a/pcsx2/R3000A.c +++ b/pcsx2/R3000A.c @@ -32,7 +32,7 @@ u32 g_psxHasConstReg, g_psxFlushedConstReg; // Controls when branch tests are performed. u32 g_psxNextBranchCycle = 0; -// This value is used when the IOP execution is broken to return contorl to the EE. +// This value is used when the IOP execution is broken to return control to the EE. // (which happens when the IOP throws EE-bound interrupts). It holds the value of // psxCycleEE (which is set to zero to facilitate the code break), so that the unrun // cycles can be accounted for later. @@ -42,6 +42,9 @@ s32 psxBreak = 0; // control is returned to the EE. s32 psxCycleEE = -1; +// Used to signal to the EE when important actions that need IOP-attention have +// happened (hsyncs, vsyncs, IOP exceptions, etc). IOP runs code whenever this +// is true, even if it's already running ahead a bit. int iopBranchAction = 0; diff --git a/pcsx2/Sio.c b/pcsx2/Sio.c index 06850c96cc..4b467f5ca0 100644 --- a/pcsx2/Sio.c +++ b/pcsx2/Sio.c @@ -34,7 +34,17 @@ FILE * MemoryCard1, * MemoryCard2; const unsigned char cardh[4] = { 0xFF, 0xFF, 0x5a, 0x5d }; // Memory Card Specs : Sector size etc. struct mc_command_0x26_tag mc_command_0x26= {'+', 512, 16, 0x4000, 0x52, 0x5A}; -#define SIO_INT() PSX_INT(16, PSXCLK/250000); /*270;*/ + +// SIO Inline'd IRQs : Calls the SIO interrupt handlers directly instead of +// feeding them through the IOP's branch test. (see SIO.H for details) + +#ifdef SIO_INLINE_IRQS +#define SIO_INT() sioInterrupt() +#define SIO_FORCEINLINE +#else +#define SIO_INT() PSX_INT(16, PSXCLK/250000); +#define SIO_FORCEINLINE __forceinline +#endif void _ReadMcd(char *data, u32 adr, int size) { ReadMcd(sio.CtrlReg&0x2000?2:1, data, adr, size); @@ -348,7 +358,7 @@ void SIO_CommandWrite(u8 value,int way) { _SaveMcd(sio.buf, (512+16)*sio.sector+256, 256); _SaveMcd(sio.buf, (512+16)*sio.sector+512, 16); sio.buf[2]='+'; - */ sio.buf[3]=sio.terminator; + sio.buf[3]=sio.terminator;*/ //sio.buf[sio.bufcount] = sio.terminator; MEMCARDS_LOG("MC(%d) INTERNAL ERASE command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value); } @@ -497,7 +507,7 @@ void sioWriteCtrl16(unsigned short value) { } } -__forceinline void sioInterrupt() { +void sioInterrupt() { PAD_LOG("Sio Interrupt\n"); sio.StatReg|= IRQ; psxHu32(0x1070)|=0x80; diff --git a/pcsx2/Sio.h b/pcsx2/Sio.h index 4bf8c5a920..398808428b 100644 --- a/pcsx2/Sio.h +++ b/pcsx2/Sio.h @@ -20,6 +20,24 @@ #ifndef _SIO_H_ #define _SIO_H_ +// SIO IRQ Timings... +// Scheduling ints into the future is a purist approach to emulation, and +// is mostly cosmetic since the emulator itself performs all actions instantly +// (as far as the emulated CPU is concerned). In some cases this can actually +// cause more sync problems than it supposedly solves, due to accumulated +// delays incurred by the recompiler's low cycle update rate and also Pcsx2 +// failing to properly handle pre-emptive DMA/IRQs or cpu exceptions. + +// The SIO is one of these cases, where-by many games seem to be a lot happier +// if the SIO handles its IRQs instantly instead of scheduling them. +// Uncomment the line below for SIO instant-IRQ mode. It improves responsiveness +// considerably, fixes PAD latency problems in some games, and may even reduce the +// chance of saves getting corrupted (untested). But it lacks the purist touch, +// so it's not enabled by default. + +//#define SIO_INLINE_IRQS + + typedef struct { u16 StatReg; u16 ModeReg;