From dfd433993f7c200efb8684c930167e3ceb7c586d Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 16 Apr 2009 22:33:18 +0000 Subject: [PATCH] Minor bugfix for unpack mode 2 Fixed split videos in Gradius V Fixed Spyro hanging problem in Issue 112 Put in a hacky fix for FFX videos into IPU to compensate the spyro fix (which is actually correct). Implementing unpack overflow protection (Guitar Hero 3 & Toni Hawks Project 8) Writing XGKick to a temp buffer before sending to the GS (part of the GH3 / THP8 fix) Note! THP8 and GH3 will STILL crash with any VUrecs on and MTGS on, these must all be OFF. Also use GSDX in software mode with the NLoop hack on for now. Slow i know, but it works :P hopefully we can fix the rec side of it soon. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@989 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 34 ++++++++++++++---------- pcsx2/IPU/IPU.cpp | 10 +++++++ pcsx2/VUops.cpp | 33 +++++++++++++++-------- pcsx2/Vif.h | 8 +++--- pcsx2/VifDma.cpp | 68 ++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 117 insertions(+), 36 deletions(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index dd6056effa..59ef287d07 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -28,6 +28,7 @@ using std::min; +#define gifsplit 128 enum gifstate_t { GIF_STATE_EMPTY = 0, @@ -71,7 +72,7 @@ __forceinline void gsInterrupt() { /*if (!(vif1Regs->mskpath3 && (vif1ch->chcr & 0x100)) || (psHu32(GIF_MODE) & 0x1)) CPU_INT( 2, 64 );*/ #endif - if (gspath3done == 0) return; + if(gspath3done == 0 || gif->qwc > 0) return; } gspath3done = 0; @@ -83,6 +84,7 @@ __forceinline void gsInterrupt() { psHu32(GIF_STAT)&= ~0xE00; // OPH=0 | APATH=0 psHu32(GIF_STAT)&= ~0x1F000000; // QFC=0 hwDmacIrq(DMAC_GIF); + GIF_LOG("GIF DMA end"); } @@ -125,7 +127,7 @@ static void WRITERING_DMA(u32 *pMem, u32 qwc) int _GIFchain() { #ifdef GSPATH3FIX - u32 qwc = ((psHu32(GIF_MODE) & 0x4) && (vif1Regs->mskpath3)) ? min(8, (int)gif->qwc) : gif->qwc; + u32 qwc = ((psHu32(GIF_MODE) & 0x4) && (vif1Regs->mskpath3)) ? min(8, (int)gif->qwc) : min( gifsplit, (int)gif->qwc ); #else u32 qwc = gif->qwc; #endif @@ -161,7 +163,7 @@ static __forceinline void dmaGIFend() if ((psHu32(GIF_MODE) & 0x4) && gif->qwc != 0) CPU_INT(2, min( 8, (int)gif->qwc ) /** BIAS*/); else - CPU_INT(2, gif->qwc /** BIAS*/); + CPU_INT(2, min( gifsplit, (int)gif->qwc ) /** BIAS*/); } // These could probably be consolidated into one function, @@ -172,7 +174,7 @@ static __forceinline void GIFdmaEnd() if (psHu32(GIF_MODE) & 0x4) CPU_INT(2, min( 8, (int)gif->qwc ) /** BIAS*/); else - CPU_INT(2, gif->qwc /** BIAS*/); + CPU_INT(2, min( gifsplit, (int)gif->qwc ) /** BIAS*/); } void GIFdma() @@ -187,7 +189,7 @@ void GIFdma() return; } - GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n tadr = %lx, asr0 = %lx, asr1 = %lx", gif->chcr, gif->madr, gif->qwc, gif->tadr, gif->asr0, gif->asr1); + #ifndef GSPATH3FIX if ( !(psHu32(GIF_MODE) & 0x4) ) { @@ -266,14 +268,17 @@ void GIFdma() if (((gif->qwc == 0) && (gif->chcr & 0xc) == 0)) gspath3done = 1; - else + else if(gif->qwc > 0) + { GIFdmaEnd(); return; } - else { + } + if ((gif->chcr & 0xc) == 0x4 && gspath3done == 0) + { // Chain Mode - while ((gspath3done == 0) && (gif->qwc == 0)) { //Loop if the transfers aren't intermittent + //while ((gspath3done == 0) && (gif->qwc == 0)) { //Loop if the transfers aren't intermittent ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR if (ptag == NULL) { //Is ptag empty? psHu32(DMAC_STAT)|= DMAC_STAT_BEIS; //If yes, set BEIS (BUSERR) in DMAC_STAT register @@ -311,16 +316,16 @@ void GIFdma() GIF_LOG("dmaIrq Set"); gspath3done = 1; } - } + //} } prevcycles = 0; if (!(vif1Regs->mskpath3 || (psHu32(GIF_MODE) & 0x1))) { - if (gspath3done == 0) + if (gspath3done == 0 || gif->qwc > 0) { - if ((psHu32(GIF_MODE) & 0x4) && gif->qwc != 0) + if (gif->qwc != 0) { - CPU_INT(2, min( 8, (int)gif->qwc )/** BIAS*/); + GIFdmaEnd(); } else { @@ -340,7 +345,7 @@ void GIFdma() void dmaGIF() { //We used to addd wait time for the buffer to fill here, fixing some timing problems in path 3 masking //It takes the time of 24 QW for the BUS to become ready - The Punisher, And1 Streetball - + GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n tadr = %lx, asr0 = %lx, asr1 = %lx", gif->chcr, gif->madr, gif->qwc, gif->tadr, gif->asr0, gif->asr1); if ((psHu32(DMAC_CTRL) & 0xC) == 0xC ) { // GIF MFIFO Console::WriteLn("GIF MFIFO"); gifMFIFOInterrupt(); @@ -359,7 +364,8 @@ void dmaGIF() { gif->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag gif->chcr = ( gif->chcr & 0xFFFF ) | ( (*ptag) & 0xFFFF0000 ); //Transfer upper part of tag to CHCR bits 31-15 - dmaGIFend(); + //gspath3done = hwDmacSrcChainWithStack(gif, (ptag[0] >> 28) & 0x7); + GIFdmaEnd(); gif->qwc = 0; return; } diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index a1b7626df5..04152bfd10 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -1370,6 +1370,9 @@ int FIFOto_write(u32* pMem, int size) } \ } +#define gif ((DMACh*)&PS2MEM_HW[0xA000]) +extern void gsInterrupt(); + int IPU1dma() { u32 *ptag, *pMem; @@ -1383,6 +1386,13 @@ int IPU1dma() assert(!(g_nDMATransfer & IPU_DMA_TIE1)); + //We need to make sure GIF has flushed before sending IPU data, it seems to REALLY screw FFX videos + while(gif->chcr & 0x100) + { + GIF_LOG("Flushing gif chcr %x tadr %x madr %x qwc %x", gif->chcr, gif->tadr, gif->madr, gif->qwc); + gsInterrupt(); + } + // in kh, qwc == 0 when dma_actv1 is set if ((g_nDMATransfer & IPU_DMA_ACTV1) && ipu1dma->qwc > 0) { diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 299803d2d6..bd8b81b0b7 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -1585,7 +1585,7 @@ void _vuLQ(VURegs * VU) { if (_Ft_ == 0) return; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; @@ -1601,7 +1601,7 @@ void _vuLQD( VURegs * VU ) { if (_Fs_ != 0) VU->VI[_Fs_].US[0]--; if (_Ft_ == 0) return; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16) & (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; if (_Y) VU->VF[_Ft_].UL[1] = ptr[1]; @@ -1614,7 +1614,7 @@ void _vuLQI(VURegs * VU) { u32 addr; u32 *ptr; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; if (_Y) VU->VF[_Ft_].UL[1] = ptr[1]; @@ -1631,7 +1631,7 @@ void _vuSQ(VURegs * VU) { u32 *ptr; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Ft_].SS[0]) * 16; + addr = ((imm + VU->VI[_Ft_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1644,7 +1644,7 @@ void _vuSQD(VURegs * VU) { u32 *ptr; if(_Ft_ != 0) VU->VI[_Ft_].US[0]--; - addr = VU->VI[_Ft_].US[0] * 16; + addr = (VU->VI[_Ft_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1656,7 +1656,7 @@ void _vuSQI(VURegs * VU) { u32 addr; u32 *ptr; - addr = VU->VI[_Ft_].US[0] * 16; + addr = (VU->VI[_Ft_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1673,7 +1673,7 @@ void _vuILW(VURegs * VU) { if (_Ft_ == 0) return; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) VU->VI[_Ft_].US[0] = ptr[0]; if (_Y) VU->VI[_Ft_].US[0] = ptr[2]; @@ -1687,7 +1687,7 @@ void _vuISW(VURegs * VU) { u16 *ptr; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) { ptr[0] = VU->VI[_Ft_].US[0]; ptr[1] = 0; } if (_Y) { ptr[2] = VU->VI[_Ft_].US[0]; ptr[3] = 0; } @@ -1700,7 +1700,7 @@ void _vuILWR(VURegs * VU) { u16 *ptr; if (_Ft_ == 0) return; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) VU->VI[_Ft_].US[0] = ptr[0]; if (_Y) VU->VI[_Ft_].US[0] = ptr[2]; @@ -1712,7 +1712,7 @@ void _vuISWR(VURegs * VU) { u32 addr; u16 *ptr; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16) & (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) { ptr[0] = VU->VI[_Ft_].US[0]; ptr[1] = 0; } if (_Y) { ptr[2] = VU->VI[_Ft_].US[0]; ptr[3] = 0; } @@ -2045,9 +2045,20 @@ void _vuXITOP(VURegs * VU) { void _vuXGKICK(VURegs * VU) { + u32* ptr = (u32*)GET_VU_MEM(VU, (VU->VI[_Fs_].US[0]*16) & (VU == &VU1 ? 0x3fff : 0xfff)); + int temp = 0x4000 - ((VU->VI[_Fs_].US[0]*16) & 0x3fff); + u32 tempmem[0x8000]; + // flush all pipelines first (in the right order) _vuFlushAll(VU); - GSGIFTRANSFER1((u32*)VU->Mem, (VU->VI[_Fs_].US[0]*16) & 0x3fff); + + //Gonna be slow but reshuffles the memory so overflows wont occur + memset(tempmem, 0, sizeof(tempmem)); + memcpy(tempmem, ptr, temp); + ptr = (u32*)GET_VU_MEM(VU, 0); + memcpy(&tempmem[temp], ptr, ((VU->VI[_Fs_].US[0]*16) & 0x3fff)); + GSGIFTRANSFER1((u32*)&tempmem[0], 0); + //} else GSGIFTRANSFER1((u32*)VU->Mem, (VU->VI[_Fs_].US[0]*16) & 0x3fff); } void _vuXTOP(VURegs * VU) { diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 64335c4dde..2f84369b5a 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data) switch (reg) { case 0: - vifRegs->r0 = data; + vifRegs->r0 += data; break; case 1: - vifRegs->r1 = data; + vifRegs->r1 += data; break; case 2: - vifRegs->r2 = data; + vifRegs->r2 += data; break; case 3: - vifRegs->r3 = data; + vifRegs->r3 += data; break; jNO_DEFAULT; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 6e594881fe..5b57c430f7 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -80,10 +80,10 @@ struct VIFUnpackFuncTable UNPACKFUNCTYPE funcU; UNPACKFUNCTYPE funcS; - int bsize; // currently unused - int dsize; // byte size of one channel - int gsize; // size of data in bytes used for each write cycle - int qsize; // used for unpack parts, num of vectors that + u32 bsize; // currently unused + u32 dsize; // byte size of one channel + u32 gsize; // size of data in bytes used for each write cycle + u32 qsize; // used for unpack parts, num of vectors that // will be decompressed from data for 1 cycle }; @@ -333,7 +333,7 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int } -static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static int VIFalign(u32 *data, vifCode *v, unsigned int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; @@ -485,7 +485,7 @@ static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanu } -static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; @@ -493,6 +493,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma const VIFUnpackFuncTable *ft; VURegs * VU; u8 *cdata = (u8*)data; + u32 tempsize = 0; #ifdef _DEBUG u32 memsize = VIFdmanum ? 0x4000 : 0x1000; @@ -554,6 +555,18 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma } #endif + tempsize = (vif->tag.addr + (size / (ft->gsize * vifRegs->cycle.wl)) * + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) + ((size / ft->gsize) * 16); + + //Sanity Check (memory overflow) + if(tempsize > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + + // DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); + tempsize = size; + size = 0; + } else tempsize = 0; + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { @@ -658,7 +671,48 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma } } - else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have + else if(tempsize) + { + int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; + size = 0; + + + while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) + { + //VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, vif->tag.addr); + func(dest, (u32*)cdata, ft->qsize); + cdata += ft->gsize; + tempsize -= ft->gsize; + + vifRegs->num--; + ++vif->cl; + if (vif->cl == vifRegs->cycle.wl) + { + dest += incdest; + v->addr = (v->addr + (incdest * 4)) & (VIFdmanum ? 0x3fff : 0xfff); + if(v->addr <= (u32)(VIFdmanum ? 0x3000 : 0x500)) dest = (u32*)(VU->Mem + v->addr); + vif->cl = 0; + } + else + { + dest += 4; + v->addr = (v->addr + 16) & (VIFdmanum ? 0x3fff : 0xfff); + if(v->addr <= (u32)(VIFdmanum ? 0x3000 : 0x500)) dest = (u32*)(VU->Mem + v->addr); + } + } + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } + if(tempsize > 0) size = tempsize; + + } + if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have { //VIF_LOG("warning, end with size = %d", size);