From 4df3f80d30acbbab927c87ff7a31a7275ae28420 Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 11 Apr 2009 17:45:14 +0000 Subject: [PATCH] Optimized and split up the unpack call a bit so less checks are being run, should bring an overall speed increase. Also got rid of some duplicate pointer rubbish which was all over the place. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@955 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 89 +++--- pcsx2/Vif.h | 38 +-- pcsx2/VifDma.cpp | 463 +++++++++++++++-------------- pcsx2/x86/aVif.S | 20 +- pcsx2/x86/ix86-32/aVif_proc-32.asm | 20 +- 5 files changed, 331 insertions(+), 299 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index f20103e72d..cb81e5f6b2 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -25,10 +25,10 @@ #include "Vif.h" #include "VifDma.h" -VIFregisters *_vifRegs; -u32* _vifRow = NULL, *_vifCol = NULL; -u32* _vifMaskRegs = NULL; -vifStruct *_vif; +VIFregisters *vifRegs; +u32* vifRow = NULL, *vifCol = NULL; +u32* vifMaskRegs = NULL; +vifStruct *vif; PCSX2_ALIGNED16(u32 g_vifRow0[4]); PCSX2_ALIGNED16(u32 g_vifCol0[4]); @@ -57,24 +57,24 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) int n; u32 vifRowReg = getVifRowRegs(offnum); - if (_vifRegs->code & 0x10000000) + if (vifRegs->code & 0x10000000) { - switch (_vif->cl) + switch (vif->cl) { case 0: if (offnum == OFFSET_X) - n = (_vifRegs->mask) & 0x3; + n = (vifRegs->mask) & 0x3; else - n = (_vifRegs->mask >> (offnum * 2)) & 0x3; + n = (vifRegs->mask >> (offnum * 2)) & 0x3; break; case 1: - n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; case 2: - n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; default: - n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; } } @@ -83,11 +83,11 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) switch (n) { case 0: - if ((_vif->cmd & 0x6F) == 0x6f) + if ((vif->cmd & 0x6F) == 0x6f) { dest = data; } - else switch (_vifRegs->mode) + else switch (vifRegs->mode) { case 1: dest = data + vifRowReg; @@ -105,13 +105,12 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) dest = vifRowReg; break; case 2: - dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl); + dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl); break; case 3: - //Masked so don't do anything break; } -// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); +// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data); } template @@ -127,78 +126,78 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size) template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if (_vifRegs->offset == OFFSET_X) + if (vifRegs->offset == OFFSET_X) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Y; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; size--; } } - if (_vifRegs->offset == OFFSET_Y) + if (vifRegs->offset == OFFSET_Y) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data); - _vifRegs->offset = OFFSET_Z; + writeXYZW(vifRegs->offset, *dest++, *data); + vifRegs->offset = OFFSET_Z; size--; } } - if (_vifRegs->offset == OFFSET_Z) + if (vifRegs->offset == OFFSET_Z) { - writeXYZW(_vifRegs->offset, *dest++, *dest-2); - _vifRegs->offset = OFFSET_W; + writeXYZW(vifRegs->offset, *dest++, *dest-2); + vifRegs->offset = OFFSET_W; } - if (_vifRegs->offset == OFFSET_W) + if (vifRegs->offset == OFFSET_W) { - writeXYZW(_vifRegs->offset, *dest, *data); - _vifRegs->offset = OFFSET_X; + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X) + if(vifRegs->offset == OFFSET_X) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Y; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; size--; } } - if(_vifRegs->offset == OFFSET_Y) + if(vifRegs->offset == OFFSET_Y) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Z; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Z; size--; } } - if(_vifRegs->offset == OFFSET_Z) + if(vifRegs->offset == OFFSET_Z) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_W; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_W; size--; } } - if(_vifRegs->offset == OFFSET_W) + if(vifRegs->offset == OFFSET_W) { //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - writeXYZW(_vifRegs->offset, *dest, *data); - _vifRegs->offset = OFFSET_X; + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; } } @@ -207,12 +206,12 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset++; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset++; size--; } - if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X; + if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) @@ -391,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer() return ret; } -static __forceinline int mfifoVIF1chain() +static __forceinline int mfifo_VIF1chain() { int ret; @@ -531,7 +530,7 @@ void vifMFIFOInterrupt() { g_vifCycles = 0; - if (vif1.inprogress == 1) mfifoVIF1chain(); + if (vif1.inprogress == 1) mfifo_VIF1chain(); if (vif1.irq && vif1.tag.size == 0) { diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index fa318d7618..e01cb32bd8 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -81,9 +81,9 @@ struct VIFregisters { extern "C" { // these use cdecl for Asm code references. - extern VIFregisters *_vifRegs; - extern u32* _vifMaskRegs; - extern u32* _vifRow; + extern VIFregisters *vifRegs; + extern u32* vifMaskRegs; + extern u32* vifRow; extern u32* _vifCol; } @@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data) switch (reg) { case 0: - _vifRegs->r0 = data; + vifRegs->r0 = data; break; case 1: - _vifRegs->r1 = data; + vifRegs->r1 = data; break; case 2: - _vifRegs->r2 = data; + vifRegs->r2 = data; break; case 3: - _vifRegs->r3 = data; + vifRegs->r3 = data; break; jNO_DEFAULT; } @@ -113,16 +113,16 @@ static __forceinline u32 getVifRowRegs(u32 reg) switch (reg) { case 0: - return _vifRegs->r0; + return vifRegs->r0; break; case 1: - return _vifRegs->r1; + return vifRegs->r1; break; case 2: - return _vifRegs->r2; + return vifRegs->r2; break; case 3: - return _vifRegs->r3; + return vifRegs->r3; break; jNO_DEFAULT; } @@ -133,16 +133,16 @@ static __forceinline u32 setVifColRegs(u32 reg, u32 data) switch (reg) { case 0: - _vifRegs->c0 = data; + vifRegs->c0 = data; break; case 1: - _vifRegs->c1 = data; + vifRegs->c1 = data; break; case 2: - _vifRegs->c2 = data; + vifRegs->c2 = data; break; case 3: - _vifRegs->c3 = data; + vifRegs->c3 = data; break; jNO_DEFAULT; } @@ -154,16 +154,16 @@ static __forceinline u32 getVifColRegs(u32 reg) switch (reg) { case 0: - return _vifRegs->c0; + return vifRegs->c0; break; case 1: - return _vifRegs->c1; + return vifRegs->c1; break; case 2: - return _vifRegs->c2; + return vifRegs->c2; break; case 3: - return _vifRegs->c3; + return vifRegs->c3; break; jNO_DEFAULT; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index e6155bc4ea..8efee3181d 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -37,10 +37,10 @@ using namespace std; // for min / max extern "C" { // Need cdecl on these for ASM references. - extern VIFregisters *_vifRegs; - extern u32* _vifMaskRegs; - extern u32* _vifRow; - extern u32* _vifCol; + extern VIFregisters *vifRegs; + extern u32* vifMaskRegs; + extern u32* vifRow; + extern u32* vifCol; } PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]); @@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]); -extern vifStruct *_vif; +extern vifStruct *vif; vifStruct vif0, vif1; @@ -254,21 +254,9 @@ __forceinline static int _limit(int a, int max) static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum) { const VIFUnpackFuncTable *unpack; - vifStruct *vif; - VIFregisters *vifRegs; + unpack = &VIFfuncTable[ unpackType ]; - if (VIFdmanum == 0) - { - vif = &vif0; - vifRegs = vif0Regs; - } - else - { - vif = &vif1; - vifRegs = vif1Regs; - } - switch (unpackType) { case 0x0: @@ -338,85 +326,49 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int } //This is sorted out later - vif->tag.addr &= ~0xf; + if((vif->tag.addr & 0xf) != (vifRegs->offset * 4)) + { + VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr); + vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4); + } + } -static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; UNPACKFUNCTYPE func; const VIFUnpackFuncTable *ft; - vifStruct *vif; - VIFregisters *vifRegs; VURegs * VU; u8 *cdata = (u8*)data; + #ifdef _DEBUG u32 memsize = VIFdmanum ? 0x4000 : 0x1000; #endif - _mm_prefetch((char*)data, _MM_HINT_NTA); - if (VIFdmanum == 0) { VU = &VU0; - vif = &vif0; - vifRegs = vif0Regs; assert(v->addr < memsize); } else { - VU = &VU1; - vif = &vif1; - vifRegs = vif1Regs; assert(v->addr < memsize); - - if (vu1MicroIsSkipping()) - { - // don't process since the frame is dummy - vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16); - return; - } } dest = (u32*)(VU->Mem + v->addr); - VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); - - VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); -#ifdef _DEBUG - if (v->size != size) - { - VIF_LOG("*PCSX2*: warning v->size != size"); - } - - if ((v->addr + size*4) > memsize) - { - Console::Notice("*PCSX2*: fixme unpack overflow"); - Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", - params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); - } -#endif // The unpack type unpackType = v->cmd & 0xf; - - if (size == 0) - { - VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask); - } - - _mm_prefetch((char*)data + 128, _MM_HINT_NTA); - _vifRegs = (VIFregisters*)vifRegs; - _vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks; - _vif = vif; - _vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; ft = &VIFfuncTable[ unpackType ]; - func = _vif->usn ? ft->funcU : ft->funcS; + func = vif->usn ? ft->funcU : ft->funcS; size <<= 2; @@ -424,23 +376,12 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma memsize = size; #endif - if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); - - - if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000)) - { - //Sanity Check (memory overflow) - DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); - - } - - if (_vifRegs->offset > 0) - { - int destinc, unpacksize; + if(vif1Regs->offset != 0) + { + int unpacksize; //This is just to make sure the alignment isnt loopy on a split packet - if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) + if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) { DevCon::Error("Warning: Unpack alignment error"); } @@ -449,48 +390,50 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); - // SSE doesn't handle such small data - if (vifRegs->offset < (u32)ft->qsize) + if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) { - if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) - { - Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset)); - } + DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset)); + } unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset)); - } - else - { - unpacksize = 0; - Console::WriteLn("Unpack align offset = 0"); - } - VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); - destinc = (4 - ft->qsize) + unpacksize; + + VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); + func(dest, (u32*)cdata, unpacksize); size -= unpacksize * ft->dsize; - cdata += unpacksize * ft->dsize; - + vifRegs->num--; ++vif->cl; + if (vif->cl == vifRegs->cycle.wl) { if (vifRegs->cycle.cl != vifRegs->cycle.wl) - dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; + { + vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4; + //dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; + } else - dest += destinc; + { + vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4; + //dest += destinc; + } vif->cl = 0; + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); + return size >> 2; + } else { - dest += destinc; + vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4; + dest += (4 - ft->qsize) + unpacksize; + cdata += unpacksize * ft->dsize; + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); } - - VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); - } + - if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write + if (vif->cl != 0) //Check alignment for SSE unpacks { #ifdef _DEBUG @@ -499,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma int incdest; - if (vif->cl != 0) + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { // continuation from last stream @@ -516,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (vif->cl == vifRegs->cycle.wl) { dest += incdest; + vif->tag.addr += incdest * 4; vif->cl = 0; break; } dest += 4; + vif->tag.addr += 16; } - // have to update - _vifRow[0] = _vifRegs->r0; - _vifRow[1] = _vifRegs->r1; - _vifRow[2] = _vifRegs->r2; - _vifRow[3] = _vifRegs->r3; + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } } + } + return size>>2; +} - if ((size >= ft->gsize) && !(v->addr&0xf)) + +static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +{ + u32 *dest; + u32 unpackType; + UNPACKFUNCTYPE func; + const VIFUnpackFuncTable *ft; + VURegs * VU; + u8 *cdata = (u8*)data; + +#ifdef _DEBUG + u32 memsize = VIFdmanum ? 0x4000 : 0x1000; +#endif + + _mm_prefetch((char*)data, _MM_HINT_NTA); + + if (VIFdmanum == 0) + { + VU = &VU0; + //vifRegs = vif0Regs; + assert(v->addr < memsize); + } + else + { + + VU = &VU1; + //vifRegs = vif1Regs; + assert(v->addr < memsize); + + if (vu1MicroIsSkipping()) + { + // don't process since the frame is dummy + vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16); + return; + } + } + + dest = (u32*)(VU->Mem + v->addr); + + VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); + + VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); + + // The unpack type + unpackType = v->cmd & 0xf; + + _mm_prefetch((char*)data + 128, _MM_HINT_NTA); + + ft = &VIFfuncTable[ unpackType ]; + func = vif->usn ? ft->funcU : ft->funcS; + + size <<= 2; + +#ifdef _DEBUG + memsize = size; +#endif + + +#ifdef VIFUNPACKDEBUG + + if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + //Sanity Check (memory overflow) + DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); + + } +#endif + + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write + { + +#ifdef _DEBUG + static int s_count = 0; +#endif + + + if (size >= ft->gsize) { const UNPACKPARTFUNCTYPESSE* pfn; int writemask; @@ -579,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle; + if(vifRegs->mode == 2) + { + //Update the reg rows for non SSE + vifRegs->r0 = vifRow[0]; + vifRegs->r1 = vifRow[1]; + vifRegs->r2 = vifRow[2]; + vifRegs->r3 = vifRow[3]; + } + + // if size is left over, update the src,dst pointers if (writemask > 0) { @@ -586,92 +626,38 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma cdata += left * ft->gsize; dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16); vifRegs->num -= left; - _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + size = writemask; + + if (size >= ft->dsize && vifRegs->num > 0) + { + //VIF_LOG("warning, end with size = %d", size); + + /* unpack one qword */ + vif->tag.addr += (size / ft->dsize) * 4; + func(dest, (u32*)cdata, size / ft->dsize); + size = 0; + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } + VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); + } } else { vifRegs->num -= size / ft->gsize; - if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + size = 0; } - size = writemask; - - _vifRegs->r0 = _vifRow[0]; - _vifRegs->r1 = _vifRow[1]; - _vifRegs->r2 = _vifRow[2]; - _vifRegs->r3 = _vifRow[3]; - } - else - { - - if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available - { - // v4-32 - if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0)) - { - vifRegs->num -= size >> 4; - memcpy_fast((u8*)dest, cdata, size); - size = 0; - return; - } - } - - incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; - - while ((size >= ft->gsize) && (vifRegs->num > 0)) - { - func(dest, (u32*)cdata, ft->qsize); - cdata += ft->gsize; - size -= ft->gsize; - - vifRegs->num--; - //if(vifRegs->num == loophere) dest = (u32*)(VU->Mem); - ++vif->cl; - if (vif->cl == vifRegs->cycle.wl) - { - dest += incdest; - vif->cl = 0; - } - else - { - dest += 4; - } - - } - - // have to update - _vifRow[0] = _vifRegs->r0; - _vifRow[1] = _vifRegs->r1; - _vifRow[2] = _vifRegs->r2; - _vifRow[3] = _vifRegs->r3; - } - - // used for debugging vif -// { -// int i, j, k; -// u32* curdest = olddest; -// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w"); -// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr); -// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle); -// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]); -// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3); -// -// for(i = 0; i < memsize; ) { -// for(k = 0; k < vifRegs->cycle.wl; ++k) { -// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) { -// fprintf(ftemp, "%x ", curdest[4*k+j]); -// } -// } -// -// fprintf(ftemp, "\n"); -// curdest += 4*vifRegs->cycle.cl; -// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl; -// } -// fclose(ftemp); -// } -// s_count++; - - if (size >= ft->dsize && vifRegs->num > 0) + } + else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have { //VIF_LOG("warning, end with size = %d", size); @@ -679,14 +665,20 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma vif->tag.addr += (size / ft->dsize) * 4; func(dest, (u32*)cdata, size / ft->dsize); size = 0; - + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); } - } else /* filling write */ { - VIF_LOG("VIFunpack - filling write"); if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0) DevCon::Notice("Filling write warning! Size < packet size and CL != 0"); @@ -827,11 +819,16 @@ static __forceinline void vif0UNPACK(u32 *data) vif0.tag.addr &= 0xfff; vif0.tag.size = len; vif0Regs->offset = 0; + + vifRegs = (VIFregisters*)vif0Regs; + vifMaskRegs = g_vif0Masks; + vif = &vif0; + vifRow = g_vifRow0; } -static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size) +static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size) { - /* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size); + /* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size); { FILE *f = fopen("vu1.raw", "wb"); fwrite(data, 1, size*4, f); @@ -935,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG { if (vif0.vifpacketsize < vif0.tag.size) { - _vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); + vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); vif0.tag.addr += vif0.vifpacketsize << 2; vif0.tag.size -= vif0.vifpacketsize; return vif0.vifpacketsize; @@ -944,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG { int ret; - _vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); + vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); ret = vif0.tag.size; vif0.tag.size = 0; vif0.cmd = 0; @@ -959,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK { /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); + + ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum); + vif0.tag.size -= vif0.vifpacketsize; FreezeXMMRegs(0); return vif0.vifpacketsize; @@ -966,14 +966,27 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK else { /* we got all the data, transfer it fully */ - int ret; + int ret = vif0.tag.size; - VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); - ret = vif0.tag.size; - vif0.tag.size = 0; - vif0.cmd = 0; - FreezeXMMRegs(0); - return ret; + //Align data after a split transfer first + if(vif0Regs->offset != 0 || vif0.cl != 0) + { + vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + data += ret - vif0.tag.size; + if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + vif0.tag.size = 0; + vif0.cmd = 0; + FreezeXMMRegs(0); + return ret; + } + else + { + VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + vif0.tag.size = 0; + vif0.cmd = 0; + FreezeXMMRegs(0); + return ret; + } } } @@ -1555,11 +1568,16 @@ static __forceinline void vif1UNPACK(u32 *data) vif1.cl = 0; vif1.tag.addr <<= 4; vif1.tag.cmd = vif1.cmd; + + vifRegs = (VIFregisters*)vif1Regs; + vifMaskRegs = g_vif1Masks; + vif = &vif1; + vifRow = g_vifRow1; } -static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size) +static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size) { - /* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size); + /* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size); { FILE *f = fopen("vu1.raw", "wb"); fwrite(data, 1, size*4, f); @@ -1661,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data) { if (vif1.vifpacketsize < vif1.tag.size) { - _vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); + vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); vif1.tag.addr += vif1.vifpacketsize << 2; vif1.tag.size -= vif1.vifpacketsize; return vif1.vifpacketsize; @@ -1669,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data) else { int ret; - _vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); + vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); ret = vif1.tag.size; vif1.tag.size = 0; vif1.cmd = 0; @@ -1770,20 +1788,35 @@ static int __fastcall Vif1TransUnpack(u32 *data) /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); + + ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum); vif1.tag.size -= vif1.vifpacketsize; FreezeXMMRegs(0); return vif1.vifpacketsize; } else { - int ret; - /* we got all the data, transfer it fully */ - VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); - ret = vif1.tag.size; - vif1.tag.size = 0; - vif1.cmd = 0; - FreezeXMMRegs(0); - return ret; + int ret = vif1.tag.size; + + if(vif1Regs->offset != 0 || vif1.cl != 0) + { + vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + data += ret - vif1.tag.size; + if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + vif1.tag.size = 0; + vif1.cmd = 0; + FreezeXMMRegs(0); + return ret; + } + else + { + /* we got all the data, transfer it fully */ + VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + vif1.tag.size = 0; + vif1.cmd = 0; + FreezeXMMRegs(0); + return ret; + } } } diff --git a/pcsx2/x86/aVif.S b/pcsx2/x86/aVif.S index 05a2e9248f..e4b64685f6 100644 --- a/pcsx2/x86/aVif.S +++ b/pcsx2/x86/aVif.S @@ -18,9 +18,9 @@ */ .intel_syntax noprefix -.extern _vifRegs -.extern _vifMaskRegs -.extern _vifRow +.extern vifRegs +.extern vifMaskRegs +.extern vifRow #define VIF_ESP esp #define VIF_SRC esi @@ -108,7 +108,7 @@ // setting up masks #define UNPACK_Setup_Mask_SSE(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \ @@ -118,7 +118,7 @@ #define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL) #define UNPACK_Start_Setup_Mask_SSE_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ pand XMM_ROWMASK, XMM_ROW; \ @@ -129,12 +129,12 @@ #define UNPACK_Setup_Mask_SSE_0_1(CL) #define UNPACK_Setup_Mask_SSE_1_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ // ignore CL, since vif.cycle.wl == 1 #define UNPACK_Setup_Mask_SSE_2_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ @@ -1312,9 +1312,9 @@ #pragma warning(disable:4731) #define SAVE_ROW_REG_BASE \ - mov VIF_TMPADDR, _vifRow; \ + mov VIF_TMPADDR, vifRow; \ movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \ - mov VIF_TMPADDR, _vifRegs; \ + mov VIF_TMPADDR, vifRegs; \ movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \ psrldq XMM_ROW, 4; \ movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \ @@ -1349,7 +1349,7 @@ .globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \ UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \ INIT_ARGS(); \ - mov VIF_TMPADDR, _vifRegs; \ + mov VIF_TMPADDR, vifRegs; \ movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \ movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \ sub VIF_INC, VIF_SAVEEBX; \ diff --git a/pcsx2/x86/ix86-32/aVif_proc-32.asm b/pcsx2/x86/ix86-32/aVif_proc-32.asm index 12c8b969b4..62fd377795 100644 --- a/pcsx2/x86/ix86-32/aVif_proc-32.asm +++ b/pcsx2/x86/ix86-32/aVif_proc-32.asm @@ -5,9 +5,9 @@ .xmm -extern _vifRegs:ptr -extern _vifMaskRegs:ptr -extern _vifRow:ptr +extern vifRegs:ptr +extern vifMaskRegs:ptr +extern vifRow:ptr extern s_TempDecompress:ptr @@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0 UNPACK_Setup_Mask_SSE macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm5, [eax + 64*(CL) + 32] movdqa xmm3, [eax + 64*(CL)] @@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL endm UNPACK_Start_Setup_Mask_SSE_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm5, [eax + 64*(CL) + 32] pand xmm4, xmm6 @@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL UNPACK_Setup_Mask_SSE_0_1 macro CL endm UNPACK_Setup_Mask_SSE_1_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm3, [eax + 64*(0)] endm UNPACK_Setup_Mask_SSE_2_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(0) + 16] movdqa xmm5, [eax + 64*(0) + 32] movdqa xmm3, [eax + 64*(0)] @@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType SAVE_ROW_REG_BASE macro - mov eax, [_vifRow] + mov eax, [vifRow] movdqa [eax], xmm6 - mov eax, [_vifRegs] + mov eax, [vifRegs] movss dword ptr [eax+0100h], xmm6 psrldq xmm6, 4 movss dword ptr [eax+0110h], xmm6 @@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE push ebx INIT_ARGS - mov eax, [_vifRegs] + mov eax, [vifRegs] movzx ecx, byte ptr [eax + 040h] movzx ebx, byte ptr [eax + 041h] sub ecx, ebx