diff --git a/pcsx2/IPU/IPU.c b/pcsx2/IPU/IPU.c index 7d22d71ee0..8a65610158 100644 --- a/pcsx2/IPU/IPU.c +++ b/pcsx2/IPU/IPU.c @@ -27,10 +27,16 @@ #include "iR5900.h" #include "coroutine.h" -#ifdef _WIN32 -#define FASTCALL __fastcall +// IPU Speedhack : Calls the IPU interrupt handlers directly instead of feeding +// them through the EE's branch test. Not tested extensively yet. +#ifdef USE_IPU_SPEEDHACK +# define IPU_TO_INT( val ) ipu1Interrupt() +# define IPU_FROM_INT( val ) ipu0Interrupt() +# define IPU_FASTCALL #else -#define FASTCALL +# define IPU_TO_INT( val ) CPU_INT( DMAC_TO_IPU, val ) +# define IPU_FROM_INT( val ) CPU_INT( DMAC_FROM_IPU, val ) +# define IPU_FORCEINLINE __forceinline #endif #ifndef PCSX2_VIRTUAL_MEM @@ -940,7 +946,7 @@ static void ipuSETTH(u32 val) { /////////////////////// // IPU Worker Thread // /////////////////////// -#define IPU_INTERRUPT(dma) { \ +#define IPU_TO_INTERRUPT(dma) { \ hwIntcIrq(INTC_IPU); \ } @@ -960,7 +966,7 @@ void IPUCMD_WRITE(u32 val) { switch (ipuRegs->cmd.CMD) { case SCE_IPU_BCLR: ipuBCLR(val); - IPU_INTERRUPT(DMAC_TO_IPU); + IPU_TO_INTERRUPT(DMAC_TO_IPU); return; case SCE_IPU_VDEC: @@ -1021,7 +1027,8 @@ void IPUCMD_WRITE(u32 val) { g_nCmdIndex = 0; if( ipuCSC(ipuRegs->cmd.DATA) ) { - if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); + if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) + IPU_FROM_INT(0); return; } @@ -1040,7 +1047,8 @@ void IPUCMD_WRITE(u32 val) { case SCE_IPU_IDEC: if( ipuIDEC(val) ) { // idec done, ipu0 done too - if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); + if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) + IPU_FROM_INT(0); return; } @@ -1053,7 +1061,8 @@ void IPUCMD_WRITE(u32 val) { case SCE_IPU_BDEC: if( ipuBDEC(val)) { - if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); + if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) + IPU_FROM_INT(0); if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU); @@ -1125,7 +1134,8 @@ void IPUWorker() return; } - if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); + if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) + IPU_FROM_INT(0); break; case SCE_IPU_PACK: if( !ipuPACK(ipuRegs->cmd.DATA) ) @@ -1150,7 +1160,7 @@ void IPUWorker() ipuCurCmd = 0xffffffff; // CHECK!: IPU0dma remains when IDEC is done, so we need to clear it if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) - CPU_INT(DMAC_FROM_IPU,0); + IPU_FROM_INT(0); s_routine = NULL; break; @@ -1166,7 +1176,8 @@ void IPUWorker() ipuRegs->topbusy = 0; ipuRegs->cmd.BUSY = 0; ipuCurCmd = 0xffffffff; - if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); + if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) + IPU_FROM_INT(0); s_routine = NULL; if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU); @@ -1631,7 +1642,7 @@ int IPU1dma() if ((ipu1dma->chcr & 0x80) && (g_nDMATransfer&IPU_DMA_DOTIE1)) { //Check TIE bit of CHCR and IRQ bit of tag SysPrintf("IPU1 TIE\n"); - CPU_INT(DMAC_TO_IPU, totalqwc*BIAS); + IPU_TO_INT(totalqwc*BIAS); g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1); g_nDMATransfer |= IPU_DMA_TIE1; return totalqwc; @@ -1640,7 +1651,7 @@ int IPU1dma() g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1); if( (ipu1dma->chcr&0xc) == 0 ) { - CPU_INT(DMAC_TO_IPU, totalqwc*BIAS); + IPU_TO_INT(totalqwc*BIAS); return totalqwc; } else { @@ -1656,7 +1667,7 @@ int IPU1dma() ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 ); IPU_LOG("IPU dmaIrq Set\n"); - CPU_INT(DMAC_TO_IPU, totalqwc*BIAS); + IPU_TO_INT(totalqwc*BIAS); g_nDMATransfer |= IPU_DMA_TIE1; return totalqwc; } @@ -1665,12 +1676,12 @@ int IPU1dma() { case 0x00000000: ipu1dma->tadr += 16; - CPU_INT(DMAC_TO_IPU, (1+totalqwc)*BIAS); + IPU_TO_INT((1+totalqwc)*BIAS); return totalqwc; case 0x70000000: ipu1dma->tadr = ipu1dma->madr; - CPU_INT(DMAC_TO_IPU, (1+totalqwc)*BIAS); + IPU_TO_INT((1+totalqwc)*BIAS); return totalqwc; } } @@ -1687,7 +1698,7 @@ int IPU1dma() IPU_LOG("dmaIPU1 Normal size=%d, addr=%lx, fifosize=%x\n", ipu1dma->qwc, ipu1dma->madr, 8 - g_BP.IFC); IPU1chain(); - CPU_INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS); + IPU_TO_INT((ipu1cycles+totalqwc)*BIAS); return totalqwc; } else @@ -1769,7 +1780,7 @@ int IPU1dma() ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 ); } - CPU_INT(DMAC_TO_IPU, ipu1cycles+totalqwc*BIAS); + IPU_TO_INT(ipu1cycles+totalqwc*BIAS); g_nDMATransfer |= IPU_DMA_TIE1; return totalqwc; } @@ -1780,18 +1791,18 @@ int IPU1dma() { case 0x00000000: ipu1dma->tadr += 16; - CPU_INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS); + IPU_TO_INT((ipu1cycles+totalqwc)*BIAS); return totalqwc; case 0x70000000: ipu1dma->tadr = ipu1dma->madr; - CPU_INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS); + IPU_TO_INT((ipu1cycles+totalqwc)*BIAS); return totalqwc; } } } - CPU_INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS); + IPU_TO_INT((ipu1cycles+totalqwc)*BIAS); return totalqwc; } @@ -1899,8 +1910,7 @@ int IPU0dma() break; } } - CPU_INT(DMAC_FROM_IPU,readsize*BIAS); - + IPU_FROM_INT(readsize*BIAS); } return readsize; @@ -1922,7 +1932,7 @@ void dmaIPU1() // toIPU extern void GIFdma(); -__forceinline void ipu0Interrupt() { +IPU_FORCEINLINE void ipu0Interrupt() { IPU_LOG("ipu0Interrupt: %x\n", cpuRegs.cycle); if( g_nDMATransfer & IPU_DMA_FIREINT0 ) { @@ -1953,7 +1963,7 @@ __forceinline void ipu0Interrupt() { cpuRegs.interrupt &= ~(1 << 3); } -__forceinline void ipu1Interrupt() { +IPU_FORCEINLINE void ipu1Interrupt() { IPU_LOG("ipu1Interrupt %x:\n", cpuRegs.cycle); if( g_nDMATransfer & IPU_DMA_FIREINT1 ) { diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index 9621e9aac5..82658e37f9 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -20,6 +20,15 @@ #define __IPU_H__ #include "Common.h" + +// IPU Speed Hack! +// By uncommenting the define below, the IPU can get a pretty nice speed boost, +// of about 10%. Additionally, the ipuInterrupt handlers in the cpuBranchTest +// are also be removed, which helps reduce branch test overhead in *all* games. + +//#define USE_IPU_SPEEDHACK + + #ifdef _MSC_VER #pragma pack(1) #endif diff --git a/pcsx2/R5900.c b/pcsx2/R5900.c index d65a7d019a..b9315cf7d0 100644 --- a/pcsx2/R5900.c +++ b/pcsx2/R5900.c @@ -390,8 +390,10 @@ static __forceinline void _cpuTestInterrupts() if( cpuRegs.interrupt & ( 1 | (3 << 3) | (3<<8) | (3<<10)) ) { TESTINT(0, vif0Interrupt); +#ifndef USE_IPU_SPEEDHACK TESTINT(3, ipu0Interrupt); TESTINT(4, ipu1Interrupt); +#endif TESTINT(8, SPRFROMinterrupt); TESTINT(9, SPRTOinterrupt);