From ca2d9b0469203b977679a457c9f979444c83402d Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 25 Aug 2012 19:09:29 +0000 Subject: [PATCH] Vif Unpacks: Fixed Issue 1325 with Non-SSE4 processors. Put in some handling for MPG Overflows (VIF command, not videos :P) Fixed another SSE Unpack bug i came across - Effected THPS Project 8 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5392 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif_Codes.cpp | 31 +++++++++++++++++++++++++------ pcsx2/x86/microVU_Misc.inl | 17 +++++++++++++++++ pcsx2/x86/newVif_Dynarec.cpp | 6 +++--- pcsx2/x86/newVif_UnpackSSE.cpp | 8 +++++--- 4 files changed, 50 insertions(+), 12 deletions(-) diff --git a/pcsx2/Vif_Codes.cpp b/pcsx2/Vif_Codes.cpp index 36d34c1832..fa34815b88 100644 --- a/pcsx2/Vif_Codes.cpp +++ b/pcsx2/Vif_Codes.cpp @@ -249,12 +249,30 @@ vifOp(vifCode_Mark) { static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) { VURegs& VUx = idx ? VU1 : VU0; + vifStruct& vifX = GetVifX; pxAssert(VUx.Micro > 0); if (idx && THREAD_VU1) { vu1Thread.WriteMicroMem(addr, (u8*)data, size*4); return; } + + + + if((addr + size *4) > (idx ? 0x4000 : 0x1000)) + { + //DevCon.Warning("Handling split MPG"); + if (!idx) CpuVU0->Clear(addr, (idx ? 0x4000 : 0x1000) - addr); + else CpuVU1->Clear(addr, (idx ? 0x4000 : 0x1000) - addr); + + memcpy_fast(VUx.Micro + addr, data, (idx ? 0x4000 : 0x1000) - addr); + size -= ((idx ? 0x4000 : 0x1000) - addr) / 4; + memcpy_fast(VUx.Micro, data, size); + + vifX.tag.addr = size * 4; + } + else + { //The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it. //Faster without. //if (memcmp_mmx(VUx.Micro + addr, data, size*4)) { @@ -262,7 +280,9 @@ static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) { if (!idx) CpuVU0->Clear(addr, size*4); else CpuVU1->Clear(addr, size*4); memcpy_fast(VUx.Micro + addr, data, size*4); //from tests, memcpy is 1fps faster on Grandia 3 than memcpy_fast - //} + + vifX.tag.addr += size * 4; + } } vifOp(vifCode_MPG) { @@ -282,19 +302,18 @@ vifOp(vifCode_MPG) { pass2 { if (vifX.vifpacketsize < vifX.tag.size) { // Partial Transfer if((vifX.tag.addr + vifX.vifpacketsize*4) > (idx ? 0x4000 : 0x1000)) { - DevCon.Warning("Vif%d MPG Split Overflow", idx); + //DevCon.Warning("Vif%d MPG Split Overflow", idx); } _vifCode_MPG(idx, vifX.tag.addr, data, vifX.vifpacketsize); - vifX.tag.addr += vifX.vifpacketsize * 4; - vifX.tag.size -= vifX.vifpacketsize; + vifX.tag.size -= vifX.vifpacketsize; //We can do this first as its passed as a pointer return vifX.vifpacketsize; } else { // Full Transfer if((vifX.tag.addr + vifX.tag.size*4) > (idx ? 0x4000 : 0x1000)) { - DevCon.Warning("Vif%d MPG Split Overflow", idx); + //DevCon.Warning("Vif%d MPG Split Overflow full %x", idx, vifX.tag.addr + vifX.tag.size*4); } _vifCode_MPG(idx, vifX.tag.addr, data, vifX.tag.size); - int ret = vifX.tag.size; + int ret = vifX.tag.size; vifX.tag.size = 0; vifX.cmd = 0; vifX.pass = 0; diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 73d1a8cf78..241e1adc20 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -142,6 +142,14 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW) } } +static const __aligned16 u32 SSEXYZWMask[4][4] = +{ + {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000}, + {0xffffffff, 0xffffffff, 0x00000000, 0xffffffff}, + {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff}, + {0x00000000, 0xffffffff, 0xffffffff, 0xffffffff} +}; + // Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases) void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) { @@ -206,6 +214,15 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) break; } } + } + else if( dest == src ) + { + //VIF can sent the temp directory as the source and destination, just need to clear the ones we dont want in which case. + if(!(xyzw & 0x1)) xAND.PS( dest, ptr128[SSEXYZWMask[0]]); + if(!(xyzw & 0x2)) xAND.PS( dest, ptr128[SSEXYZWMask[1]]); + if(!(xyzw & 0x4)) xAND.PS( dest, ptr128[SSEXYZWMask[2]]); + if(!(xyzw & 0x8)) xAND.PS( dest, ptr128[SSEXYZWMask[3]]); + } } diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index 08f91083e5..b65299a8b4 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -160,11 +160,11 @@ void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp ) { case 0: case 1: - case 2: UnpkNoOfIterations = 4; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break; + case 2: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x3; } break; case 4: case 5: - case 6: UnpkNoOfIterations = 2; if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration % UnpkNoOfIterations; } break; + case 6: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break; case 8: break; case 9: break; @@ -195,7 +195,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine() { MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum); pxAssume(vCL == 0); - UnpkLoopIteration = 0; + // Value passed determines # of col regs we need to load SetMasks(isFill ? blockSize : cycleSize); diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index deb7d9405b..d6f3153f3d 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -46,6 +46,7 @@ VifUnpackSSE_Base::VifUnpackSSE_Base() , workReg( xmm1 ) , destReg( xmm0 ) { + UnpkLoopIteration = 0; } void VifUnpackSSE_Base::xMovDest() const { @@ -182,9 +183,10 @@ void VifUnpackSSE_Base::xUPK_V2_16() const { } else { - xMOV32 (workReg, ptr32[srcIndirect]); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 16); + xXOR.PD (destReg, destReg); + xMOV64 (workReg, ptr32[srcIndirect]); + xPUNPCK.LWD(workReg, destReg); + //xShiftR (workReg, 16); } xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0 }