Just committing an IPU speedhack I found a while back, in case anyone wants to play with it. It's disabled by default. Enable by uncommenting the #define in IPU.h -- might break some movies tho, I dunno.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@381 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2008-12-03 18:24:06 +00:00 committed by Gregory Hainaut
parent 63398c252a
commit a555f60477
3 changed files with 46 additions and 25 deletions

View File

@ -27,10 +27,16 @@
#include "iR5900.h" #include "iR5900.h"
#include "coroutine.h" #include "coroutine.h"
#ifdef _WIN32 // IPU Speedhack : Calls the IPU interrupt handlers directly instead of feeding
#define FASTCALL __fastcall // them through the EE's branch test. Not tested extensively yet.
#ifdef USE_IPU_SPEEDHACK
# define IPU_TO_INT( val ) ipu1Interrupt()
# define IPU_FROM_INT( val ) ipu0Interrupt()
# define IPU_FASTCALL
#else #else
#define FASTCALL # define IPU_TO_INT( val ) CPU_INT( DMAC_TO_IPU, val )
# define IPU_FROM_INT( val ) CPU_INT( DMAC_FROM_IPU, val )
# define IPU_FORCEINLINE __forceinline
#endif #endif
#ifndef PCSX2_VIRTUAL_MEM #ifndef PCSX2_VIRTUAL_MEM
@ -940,7 +946,7 @@ static void ipuSETTH(u32 val) {
/////////////////////// ///////////////////////
// IPU Worker Thread // // IPU Worker Thread //
/////////////////////// ///////////////////////
#define IPU_INTERRUPT(dma) { \ #define IPU_TO_INTERRUPT(dma) { \
hwIntcIrq(INTC_IPU); \ hwIntcIrq(INTC_IPU); \
} }
@ -960,7 +966,7 @@ void IPUCMD_WRITE(u32 val) {
switch (ipuRegs->cmd.CMD) { switch (ipuRegs->cmd.CMD) {
case SCE_IPU_BCLR: case SCE_IPU_BCLR:
ipuBCLR(val); ipuBCLR(val);
IPU_INTERRUPT(DMAC_TO_IPU); IPU_TO_INTERRUPT(DMAC_TO_IPU);
return; return;
case SCE_IPU_VDEC: case SCE_IPU_VDEC:
@ -1021,7 +1027,8 @@ void IPUCMD_WRITE(u32 val) {
g_nCmdIndex = 0; g_nCmdIndex = 0;
if( ipuCSC(ipuRegs->cmd.DATA) ) { if( ipuCSC(ipuRegs->cmd.DATA) ) {
if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100))
IPU_FROM_INT(0);
return; return;
} }
@ -1040,7 +1047,8 @@ void IPUCMD_WRITE(u32 val) {
case SCE_IPU_IDEC: case SCE_IPU_IDEC:
if( ipuIDEC(val) ) { if( ipuIDEC(val) ) {
// idec done, ipu0 done too // idec done, ipu0 done too
if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100))
IPU_FROM_INT(0);
return; return;
} }
@ -1053,7 +1061,8 @@ void IPUCMD_WRITE(u32 val) {
case SCE_IPU_BDEC: case SCE_IPU_BDEC:
if( ipuBDEC(val)) { if( ipuBDEC(val)) {
if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100))
IPU_FROM_INT(0);
if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD)
hwIntcIrq(INTC_IPU); hwIntcIrq(INTC_IPU);
@ -1125,7 +1134,8 @@ void IPUWorker()
return; return;
} }
if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100))
IPU_FROM_INT(0);
break; break;
case SCE_IPU_PACK: case SCE_IPU_PACK:
if( !ipuPACK(ipuRegs->cmd.DATA) ) if( !ipuPACK(ipuRegs->cmd.DATA) )
@ -1150,7 +1160,7 @@ void IPUWorker()
ipuCurCmd = 0xffffffff; ipuCurCmd = 0xffffffff;
// CHECK!: IPU0dma remains when IDEC is done, so we need to clear it // CHECK!: IPU0dma remains when IDEC is done, so we need to clear it
if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100))
CPU_INT(DMAC_FROM_IPU,0); IPU_FROM_INT(0);
s_routine = NULL; s_routine = NULL;
break; break;
@ -1166,7 +1176,8 @@ void IPUWorker()
ipuRegs->topbusy = 0; ipuRegs->topbusy = 0;
ipuRegs->cmd.BUSY = 0; ipuRegs->cmd.BUSY = 0;
ipuCurCmd = 0xffffffff; ipuCurCmd = 0xffffffff;
if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100)) CPU_INT(DMAC_FROM_IPU,0); if(ipu0dma->qwc > 0 && (ipu0dma->chcr & 0x100))
IPU_FROM_INT(0);
s_routine = NULL; s_routine = NULL;
if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD)
hwIntcIrq(INTC_IPU); hwIntcIrq(INTC_IPU);
@ -1631,7 +1642,7 @@ int IPU1dma()
if ((ipu1dma->chcr & 0x80) && (g_nDMATransfer&IPU_DMA_DOTIE1)) { //Check TIE bit of CHCR and IRQ bit of tag if ((ipu1dma->chcr & 0x80) && (g_nDMATransfer&IPU_DMA_DOTIE1)) { //Check TIE bit of CHCR and IRQ bit of tag
SysPrintf("IPU1 TIE\n"); SysPrintf("IPU1 TIE\n");
CPU_INT(DMAC_TO_IPU, totalqwc*BIAS); IPU_TO_INT(totalqwc*BIAS);
g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1); g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1);
g_nDMATransfer |= IPU_DMA_TIE1; g_nDMATransfer |= IPU_DMA_TIE1;
return totalqwc; return totalqwc;
@ -1640,7 +1651,7 @@ int IPU1dma()
g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1); g_nDMATransfer &= ~(IPU_DMA_ACTV1|IPU_DMA_DOTIE1);
if( (ipu1dma->chcr&0xc) == 0 ) { if( (ipu1dma->chcr&0xc) == 0 ) {
CPU_INT(DMAC_TO_IPU, totalqwc*BIAS); IPU_TO_INT(totalqwc*BIAS);
return totalqwc; return totalqwc;
} }
else { else {
@ -1656,7 +1667,7 @@ int IPU1dma()
ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 ); ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 );
IPU_LOG("IPU dmaIrq Set\n"); IPU_LOG("IPU dmaIrq Set\n");
CPU_INT(DMAC_TO_IPU, totalqwc*BIAS); IPU_TO_INT(totalqwc*BIAS);
g_nDMATransfer |= IPU_DMA_TIE1; g_nDMATransfer |= IPU_DMA_TIE1;
return totalqwc; return totalqwc;
} }
@ -1665,12 +1676,12 @@ int IPU1dma()
{ {
case 0x00000000: case 0x00000000:
ipu1dma->tadr += 16; ipu1dma->tadr += 16;
CPU_INT(DMAC_TO_IPU, (1+totalqwc)*BIAS); IPU_TO_INT((1+totalqwc)*BIAS);
return totalqwc; return totalqwc;
case 0x70000000: case 0x70000000:
ipu1dma->tadr = ipu1dma->madr; ipu1dma->tadr = ipu1dma->madr;
CPU_INT(DMAC_TO_IPU, (1+totalqwc)*BIAS); IPU_TO_INT((1+totalqwc)*BIAS);
return totalqwc; return totalqwc;
} }
} }
@ -1687,7 +1698,7 @@ int IPU1dma()
IPU_LOG("dmaIPU1 Normal size=%d, addr=%lx, fifosize=%x\n", IPU_LOG("dmaIPU1 Normal size=%d, addr=%lx, fifosize=%x\n",
ipu1dma->qwc, ipu1dma->madr, 8 - g_BP.IFC); ipu1dma->qwc, ipu1dma->madr, 8 - g_BP.IFC);
IPU1chain(); IPU1chain();
CPU_INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS); IPU_TO_INT((ipu1cycles+totalqwc)*BIAS);
return totalqwc; return totalqwc;
} }
else else
@ -1769,7 +1780,7 @@ int IPU1dma()
ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 ); ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 );
} }
CPU_INT(DMAC_TO_IPU, ipu1cycles+totalqwc*BIAS); IPU_TO_INT(ipu1cycles+totalqwc*BIAS);
g_nDMATransfer |= IPU_DMA_TIE1; g_nDMATransfer |= IPU_DMA_TIE1;
return totalqwc; return totalqwc;
} }
@ -1780,18 +1791,18 @@ int IPU1dma()
{ {
case 0x00000000: case 0x00000000:
ipu1dma->tadr += 16; ipu1dma->tadr += 16;
CPU_INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS); IPU_TO_INT((ipu1cycles+totalqwc)*BIAS);
return totalqwc; return totalqwc;
case 0x70000000: case 0x70000000:
ipu1dma->tadr = ipu1dma->madr; ipu1dma->tadr = ipu1dma->madr;
CPU_INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS); IPU_TO_INT((ipu1cycles+totalqwc)*BIAS);
return totalqwc; return totalqwc;
} }
} }
} }
CPU_INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS); IPU_TO_INT((ipu1cycles+totalqwc)*BIAS);
return totalqwc; return totalqwc;
} }
@ -1899,8 +1910,7 @@ int IPU0dma()
break; break;
} }
} }
CPU_INT(DMAC_FROM_IPU,readsize*BIAS); IPU_FROM_INT(readsize*BIAS);
} }
return readsize; return readsize;
@ -1922,7 +1932,7 @@ void dmaIPU1() // toIPU
extern void GIFdma(); extern void GIFdma();
__forceinline void ipu0Interrupt() { IPU_FORCEINLINE void ipu0Interrupt() {
IPU_LOG("ipu0Interrupt: %x\n", cpuRegs.cycle); IPU_LOG("ipu0Interrupt: %x\n", cpuRegs.cycle);
if( g_nDMATransfer & IPU_DMA_FIREINT0 ) { if( g_nDMATransfer & IPU_DMA_FIREINT0 ) {
@ -1953,7 +1963,7 @@ __forceinline void ipu0Interrupt() {
cpuRegs.interrupt &= ~(1 << 3); cpuRegs.interrupt &= ~(1 << 3);
} }
__forceinline void ipu1Interrupt() { IPU_FORCEINLINE void ipu1Interrupt() {
IPU_LOG("ipu1Interrupt %x:\n", cpuRegs.cycle); IPU_LOG("ipu1Interrupt %x:\n", cpuRegs.cycle);
if( g_nDMATransfer & IPU_DMA_FIREINT1 ) { if( g_nDMATransfer & IPU_DMA_FIREINT1 ) {

View File

@ -20,6 +20,15 @@
#define __IPU_H__ #define __IPU_H__
#include "Common.h" #include "Common.h"
// IPU Speed Hack!
// By uncommenting the define below, the IPU can get a pretty nice speed boost,
// of about 10%. Additionally, the ipuInterrupt handlers in the cpuBranchTest
// are also be removed, which helps reduce branch test overhead in *all* games.
//#define USE_IPU_SPEEDHACK
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma pack(1) #pragma pack(1)
#endif #endif

View File

@ -390,8 +390,10 @@ static __forceinline void _cpuTestInterrupts()
if( cpuRegs.interrupt & ( 1 | (3 << 3) | (3<<8) | (3<<10)) ) if( cpuRegs.interrupt & ( 1 | (3 << 3) | (3<<8) | (3<<10)) )
{ {
TESTINT(0, vif0Interrupt); TESTINT(0, vif0Interrupt);
#ifndef USE_IPU_SPEEDHACK
TESTINT(3, ipu0Interrupt); TESTINT(3, ipu0Interrupt);
TESTINT(4, ipu1Interrupt); TESTINT(4, ipu1Interrupt);
#endif
TESTINT(8, SPRFROMinterrupt); TESTINT(8, SPRFROMinterrupt);
TESTINT(9, SPRTOinterrupt); TESTINT(9, SPRTOinterrupt);