diff --git a/pcsx2/Vif_Dma.h b/pcsx2/Vif_Dma.h index ce52981442..31ca98c31c 100644 --- a/pcsx2/Vif_Dma.h +++ b/pcsx2/Vif_Dma.h @@ -70,7 +70,7 @@ struct vifStruct { int pass; int cl; u8 usn; - bool start_aligned; + u8 start_aligned; u8 StructEnd; // Address of this is used to calculate end of struct }; diff --git a/pcsx2/Vif_Unpack.cpp b/pcsx2/Vif_Unpack.cpp index 469db6cb60..19943a2922 100644 --- a/pcsx2/Vif_Unpack.cpp +++ b/pcsx2/Vif_Unpack.cpp @@ -232,7 +232,12 @@ _vifT void vifUnpackSetup(const u32 *data) { vifX.tag.cmd = vifX.cmd; GetVifX.pass = 1; - vifX.start_aligned = !!((vifX.vifpacketsize-1) & 0x1); + //Ugh things are never easy. + //Alright, in most cases with V2 and V3 we only need to know if its offset 32bits. + //However in V3-16 if the data it requires ends on a QW boundary of the source data + //the W vector becomes 0, so we need to know how far through the current QW the data begins + vifX.start_aligned = 4-((vifX.vifpacketsize-1) & 0x3); + //DevCon.Warning("Aligned %d packetsize at data start %d", vifX.start_aligned, vifX.vifpacketsize - 1); } template void vifUnpackSetup<0>(const u32 *data); diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index 28a3b12958..0b5ee77b13 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -161,14 +161,14 @@ void VifUnpackSSE_Dynarec::ModUnpack( int upknum, bool PostOp ) { case 0: case 1: - case 2: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x3; } break; + case 2: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x3; } break; case 4: case 5: case 6: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break; - case 8: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break; - case 9: break; + case 8: if(PostOp == true) { UnpkLoopIteration++; UnpkLoopIteration = UnpkLoopIteration & 0x1; } break; + case 9: if (PostOp == false) { UnpkLoopIteration++; } break; case 10: break; case 12: break; @@ -316,6 +316,10 @@ _vifT __fi void dVifUnpack(const u8* data, bool isFill) { v.block.wl = vifRegs.cycle.wl; v.block.aligned = vif.start_aligned; //MTVU doesn't have a packet size! + if ((upkType & 0xf) != 9) + v.block.aligned &= 0x1; + + //DevCon.Warning("Alignment %d", v.block.aligned); // Zero out the mask parameter if it's unused -- games leave random junk // values here which cause false recblock cache misses. v.block.mask = doMask ? vifRegs.mask : 0; diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index a534263847..bd28dba23c 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -260,7 +260,17 @@ void VifUnpackSSE_Base::xUPK_V3_16() const { xPUNPCK.LWD(destReg, destReg); xShiftR (destReg, 16); } -} + + //With V3-16, it takes the first vector from the next position as the W vector + //However - IF the end of this iteration of the unpack falls on a quadword boundary, W becomes 0 + //IsAligned is the position through the current QW in the vif packet + //Iteration counts where we are in the packet. + int result = (((UnpkLoopIteration/4) + 1 + (4-IsAligned)) & 0x3); + + if ((UnpkLoopIteration & 0x1) == 0 && result == 0){ + xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word on QW boundary if whole 32bit word is used - tested on ps2 + } +} void VifUnpackSSE_Base::xUPK_V3_8() const {