diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index dd6056effa..59ef287d07 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -28,6 +28,7 @@ using std::min; +#define gifsplit 128 enum gifstate_t { GIF_STATE_EMPTY = 0, @@ -71,7 +72,7 @@ __forceinline void gsInterrupt() { /*if (!(vif1Regs->mskpath3 && (vif1ch->chcr & 0x100)) || (psHu32(GIF_MODE) & 0x1)) CPU_INT( 2, 64 );*/ #endif - if (gspath3done == 0) return; + if(gspath3done == 0 || gif->qwc > 0) return; } gspath3done = 0; @@ -83,6 +84,7 @@ __forceinline void gsInterrupt() { psHu32(GIF_STAT)&= ~0xE00; // OPH=0 | APATH=0 psHu32(GIF_STAT)&= ~0x1F000000; // QFC=0 hwDmacIrq(DMAC_GIF); + GIF_LOG("GIF DMA end"); } @@ -125,7 +127,7 @@ static void WRITERING_DMA(u32 *pMem, u32 qwc) int _GIFchain() { #ifdef GSPATH3FIX - u32 qwc = ((psHu32(GIF_MODE) & 0x4) && (vif1Regs->mskpath3)) ? min(8, (int)gif->qwc) : gif->qwc; + u32 qwc = ((psHu32(GIF_MODE) & 0x4) && (vif1Regs->mskpath3)) ? min(8, (int)gif->qwc) : min( gifsplit, (int)gif->qwc ); #else u32 qwc = gif->qwc; #endif @@ -161,7 +163,7 @@ static __forceinline void dmaGIFend() if ((psHu32(GIF_MODE) & 0x4) && gif->qwc != 0) CPU_INT(2, min( 8, (int)gif->qwc ) /** BIAS*/); else - CPU_INT(2, gif->qwc /** BIAS*/); + CPU_INT(2, min( gifsplit, (int)gif->qwc ) /** BIAS*/); } // These could probably be consolidated into one function, @@ -172,7 +174,7 @@ static __forceinline void GIFdmaEnd() if (psHu32(GIF_MODE) & 0x4) CPU_INT(2, min( 8, (int)gif->qwc ) /** BIAS*/); else - CPU_INT(2, gif->qwc /** BIAS*/); + CPU_INT(2, min( gifsplit, (int)gif->qwc ) /** BIAS*/); } void GIFdma() @@ -187,7 +189,7 @@ void GIFdma() return; } - GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n tadr = %lx, asr0 = %lx, asr1 = %lx", gif->chcr, gif->madr, gif->qwc, gif->tadr, gif->asr0, gif->asr1); + #ifndef GSPATH3FIX if ( !(psHu32(GIF_MODE) & 0x4) ) { @@ -266,14 +268,17 @@ void GIFdma() if (((gif->qwc == 0) && (gif->chcr & 0xc) == 0)) gspath3done = 1; - else + else if(gif->qwc > 0) + { GIFdmaEnd(); return; } - else { + } + if ((gif->chcr & 0xc) == 0x4 && gspath3done == 0) + { // Chain Mode - while ((gspath3done == 0) && (gif->qwc == 0)) { //Loop if the transfers aren't intermittent + //while ((gspath3done == 0) && (gif->qwc == 0)) { //Loop if the transfers aren't intermittent ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR if (ptag == NULL) { //Is ptag empty? psHu32(DMAC_STAT)|= DMAC_STAT_BEIS; //If yes, set BEIS (BUSERR) in DMAC_STAT register @@ -311,16 +316,16 @@ void GIFdma() GIF_LOG("dmaIrq Set"); gspath3done = 1; } - } + //} } prevcycles = 0; if (!(vif1Regs->mskpath3 || (psHu32(GIF_MODE) & 0x1))) { - if (gspath3done == 0) + if (gspath3done == 0 || gif->qwc > 0) { - if ((psHu32(GIF_MODE) & 0x4) && gif->qwc != 0) + if (gif->qwc != 0) { - CPU_INT(2, min( 8, (int)gif->qwc )/** BIAS*/); + GIFdmaEnd(); } else { @@ -340,7 +345,7 @@ void GIFdma() void dmaGIF() { //We used to addd wait time for the buffer to fill here, fixing some timing problems in path 3 masking //It takes the time of 24 QW for the BUS to become ready - The Punisher, And1 Streetball - + GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n tadr = %lx, asr0 = %lx, asr1 = %lx", gif->chcr, gif->madr, gif->qwc, gif->tadr, gif->asr0, gif->asr1); if ((psHu32(DMAC_CTRL) & 0xC) == 0xC ) { // GIF MFIFO Console::WriteLn("GIF MFIFO"); gifMFIFOInterrupt(); @@ -359,7 +364,8 @@ void dmaGIF() { gif->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag gif->chcr = ( gif->chcr & 0xFFFF ) | ( (*ptag) & 0xFFFF0000 ); //Transfer upper part of tag to CHCR bits 31-15 - dmaGIFend(); + //gspath3done = hwDmacSrcChainWithStack(gif, (ptag[0] >> 28) & 0x7); + GIFdmaEnd(); gif->qwc = 0; return; } diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index a1b7626df5..04152bfd10 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -1370,6 +1370,9 @@ int FIFOto_write(u32* pMem, int size) } \ } +#define gif ((DMACh*)&PS2MEM_HW[0xA000]) +extern void gsInterrupt(); + int IPU1dma() { u32 *ptag, *pMem; @@ -1383,6 +1386,13 @@ int IPU1dma() assert(!(g_nDMATransfer & IPU_DMA_TIE1)); + //We need to make sure GIF has flushed before sending IPU data, it seems to REALLY screw FFX videos + while(gif->chcr & 0x100) + { + GIF_LOG("Flushing gif chcr %x tadr %x madr %x qwc %x", gif->chcr, gif->tadr, gif->madr, gif->qwc); + gsInterrupt(); + } + // in kh, qwc == 0 when dma_actv1 is set if ((g_nDMATransfer & IPU_DMA_ACTV1) && ipu1dma->qwc > 0) { diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 299803d2d6..bd8b81b0b7 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -1585,7 +1585,7 @@ void _vuLQ(VURegs * VU) { if (_Ft_ == 0) return; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; @@ -1601,7 +1601,7 @@ void _vuLQD( VURegs * VU ) { if (_Fs_ != 0) VU->VI[_Fs_].US[0]--; if (_Ft_ == 0) return; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16) & (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; if (_Y) VU->VF[_Ft_].UL[1] = ptr[1]; @@ -1614,7 +1614,7 @@ void _vuLQI(VURegs * VU) { u32 addr; u32 *ptr; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; if (_Y) VU->VF[_Ft_].UL[1] = ptr[1]; @@ -1631,7 +1631,7 @@ void _vuSQ(VURegs * VU) { u32 *ptr; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Ft_].SS[0]) * 16; + addr = ((imm + VU->VI[_Ft_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1644,7 +1644,7 @@ void _vuSQD(VURegs * VU) { u32 *ptr; if(_Ft_ != 0) VU->VI[_Ft_].US[0]--; - addr = VU->VI[_Ft_].US[0] * 16; + addr = (VU->VI[_Ft_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1656,7 +1656,7 @@ void _vuSQI(VURegs * VU) { u32 addr; u32 *ptr; - addr = VU->VI[_Ft_].US[0] * 16; + addr = (VU->VI[_Ft_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1673,7 +1673,7 @@ void _vuILW(VURegs * VU) { if (_Ft_ == 0) return; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) VU->VI[_Ft_].US[0] = ptr[0]; if (_Y) VU->VI[_Ft_].US[0] = ptr[2]; @@ -1687,7 +1687,7 @@ void _vuISW(VURegs * VU) { u16 *ptr; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) { ptr[0] = VU->VI[_Ft_].US[0]; ptr[1] = 0; } if (_Y) { ptr[2] = VU->VI[_Ft_].US[0]; ptr[3] = 0; } @@ -1700,7 +1700,7 @@ void _vuILWR(VURegs * VU) { u16 *ptr; if (_Ft_ == 0) return; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) VU->VI[_Ft_].US[0] = ptr[0]; if (_Y) VU->VI[_Ft_].US[0] = ptr[2]; @@ -1712,7 +1712,7 @@ void _vuISWR(VURegs * VU) { u32 addr; u16 *ptr; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16) & (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) { ptr[0] = VU->VI[_Ft_].US[0]; ptr[1] = 0; } if (_Y) { ptr[2] = VU->VI[_Ft_].US[0]; ptr[3] = 0; } @@ -2045,9 +2045,20 @@ void _vuXITOP(VURegs * VU) { void _vuXGKICK(VURegs * VU) { + u32* ptr = (u32*)GET_VU_MEM(VU, (VU->VI[_Fs_].US[0]*16) & (VU == &VU1 ? 0x3fff : 0xfff)); + int temp = 0x4000 - ((VU->VI[_Fs_].US[0]*16) & 0x3fff); + u32 tempmem[0x8000]; + // flush all pipelines first (in the right order) _vuFlushAll(VU); - GSGIFTRANSFER1((u32*)VU->Mem, (VU->VI[_Fs_].US[0]*16) & 0x3fff); + + //Gonna be slow but reshuffles the memory so overflows wont occur + memset(tempmem, 0, sizeof(tempmem)); + memcpy(tempmem, ptr, temp); + ptr = (u32*)GET_VU_MEM(VU, 0); + memcpy(&tempmem[temp], ptr, ((VU->VI[_Fs_].US[0]*16) & 0x3fff)); + GSGIFTRANSFER1((u32*)&tempmem[0], 0); + //} else GSGIFTRANSFER1((u32*)VU->Mem, (VU->VI[_Fs_].US[0]*16) & 0x3fff); } void _vuXTOP(VURegs * VU) { diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 64335c4dde..2f84369b5a 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data) switch (reg) { case 0: - vifRegs->r0 = data; + vifRegs->r0 += data; break; case 1: - vifRegs->r1 = data; + vifRegs->r1 += data; break; case 2: - vifRegs->r2 = data; + vifRegs->r2 += data; break; case 3: - vifRegs->r3 = data; + vifRegs->r3 += data; break; jNO_DEFAULT; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 6e594881fe..5b57c430f7 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -80,10 +80,10 @@ struct VIFUnpackFuncTable UNPACKFUNCTYPE funcU; UNPACKFUNCTYPE funcS; - int bsize; // currently unused - int dsize; // byte size of one channel - int gsize; // size of data in bytes used for each write cycle - int qsize; // used for unpack parts, num of vectors that + u32 bsize; // currently unused + u32 dsize; // byte size of one channel + u32 gsize; // size of data in bytes used for each write cycle + u32 qsize; // used for unpack parts, num of vectors that // will be decompressed from data for 1 cycle }; @@ -333,7 +333,7 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int } -static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static int VIFalign(u32 *data, vifCode *v, unsigned int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; @@ -485,7 +485,7 @@ static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanu } -static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; @@ -493,6 +493,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma const VIFUnpackFuncTable *ft; VURegs * VU; u8 *cdata = (u8*)data; + u32 tempsize = 0; #ifdef _DEBUG u32 memsize = VIFdmanum ? 0x4000 : 0x1000; @@ -554,6 +555,18 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma } #endif + tempsize = (vif->tag.addr + (size / (ft->gsize * vifRegs->cycle.wl)) * + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) + ((size / ft->gsize) * 16); + + //Sanity Check (memory overflow) + if(tempsize > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + + // DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); + tempsize = size; + size = 0; + } else tempsize = 0; + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { @@ -658,7 +671,48 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma } } - else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have + else if(tempsize) + { + int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; + size = 0; + + + while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) + { + //VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, vif->tag.addr); + func(dest, (u32*)cdata, ft->qsize); + cdata += ft->gsize; + tempsize -= ft->gsize; + + vifRegs->num--; + ++vif->cl; + if (vif->cl == vifRegs->cycle.wl) + { + dest += incdest; + v->addr = (v->addr + (incdest * 4)) & (VIFdmanum ? 0x3fff : 0xfff); + if(v->addr <= (u32)(VIFdmanum ? 0x3000 : 0x500)) dest = (u32*)(VU->Mem + v->addr); + vif->cl = 0; + } + else + { + dest += 4; + v->addr = (v->addr + 16) & (VIFdmanum ? 0x3fff : 0xfff); + if(v->addr <= (u32)(VIFdmanum ? 0x3000 : 0x500)) dest = (u32*)(VU->Mem + v->addr); + } + } + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } + if(tempsize > 0) size = tempsize; + + } + if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have { //VIF_LOG("warning, end with size = %d", size);