diff --git a/pcsx2/x86/VifUnpackSSE.cpp b/pcsx2/x86/VifUnpackSSE.cpp index 1fa850fb1c..3117590278 100644 --- a/pcsx2/x86/VifUnpackSSE.cpp +++ b/pcsx2/x86/VifUnpackSSE.cpp @@ -26,6 +26,18 @@ static __pagealigned u8 nVifUpkExec[__pagesize*4]; +// Merges xmm vectors without modifying source reg +void mergeVectors(int dest, int src, int temp, int xyzw) { + if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15) + || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) { + mVUmergeRegs(dest, src, xyzw); + } + else { + SSE_MOVAPS_XMM_to_XMM(temp, src); + mVUmergeRegs(dest, temp, xyzw); + } +} + // ===================================================================================================== // VifUnpackSSE_Base Section // ===================================================================================================== diff --git a/pcsx2/x86/VifUnpackSSE.h b/pcsx2/x86/VifUnpackSSE.h index 3a4214d8dd..a3f59a61f6 100644 --- a/pcsx2/x86/VifUnpackSSE.h +++ b/pcsx2/x86/VifUnpackSSE.h @@ -26,6 +26,8 @@ using namespace x86Emitter; #if newVif +extern void mergeVectors(int dest, int src, int temp, int xyzw); + // -------------------------------------------------------------------------------------- // VifUnpackSSE_Base // -------------------------------------------------------------------------------------- diff --git a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp b/pcsx2/x86/VifUnpackSSE_Dynarec.cpp index a1373be66e..8c7d29e808 100644 --- a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp +++ b/pcsx2/x86/VifUnpackSSE_Dynarec.cpp @@ -111,28 +111,29 @@ _f void VifUnpackSSE_Dynarec::SetMasks(int cS) const { void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking."); + int t = regX.Id ? 0 : 1; // Get Temp Reg int cc = aMin(vCL, 3); u32 m0 = (vB.mask >> (cc * 8)) & 0xff; - u32 m1 = m0 & 0xaaaa; + u32 m1 = m0 & 0xaa; u32 m2 =(~m1>>1) & m0; u32 m3 = (m1>>1) & ~m0; u32 m4 = (m1>>1) & m0; makeMergeMask(m2); makeMergeMask(m3); makeMergeMask(m4); - if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect - if (doMask&&m2) { mVUmergeRegs(regX.Id, xmmRow.Id, m2); } // Merge Row - if (doMask&&m3) { mVUmergeRegs(regX.Id, xmmCol0.Id+cc, m3); } // Merge Col - if (doMask&&m4) { mVUmergeRegs(regX.Id, xmmTemp.Id, m4); } // Merge Write Protect + if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect + if (doMask&&m2) { mergeVectors(regX.Id, xmmRow.Id, t, m2); } // Merge Row + if (doMask&&m3) { mergeVectors(regX.Id, xmmCol0.Id+cc, t, m3); } // Merge Col + if (doMask&&m4) { mergeVectors(regX.Id, xmmTemp.Id, t, m4); } // Merge Write Protect if (doMode) { u32 m5 = (~m1>>1) & ~m0; if (!doMask) m5 = 0xf; else makeMergeMask(m5); if (m5 < 0xf) { xPXOR(xmmTemp, xmmTemp); - mVUmergeRegs(xmmTemp.Id, xmmRow.Id, m5); + mergeVectors(xmmTemp.Id, xmmRow.Id, t, m5); xPADD.D(regX, xmmTemp); - if (doMode==2) mVUmergeRegs(xmmRow.Id, regX.Id, m5); + if (doMode==2) mergeVectors(xmmRow.Id, regX.Id, t, m5); } else if (m5 == 0xf) { xPADD.D(regX, xmmRow); diff --git a/pcsx2/x86/newVif_Unpack.cpp b/pcsx2/x86/newVif_Unpack.cpp index 159f7158bb..f81d62a651 100644 --- a/pcsx2/x86/newVif_Unpack.cpp +++ b/pcsx2/x86/newVif_Unpack.cpp @@ -141,7 +141,8 @@ int nVifUnpack(int idx, u8* data) { } if (ret == v.vif->tag.size) { // Full Transfer - dVifUnpack(idx, data, size, isFill); + if (newVifDynaRec) dVifUnpack(idx, data, size, isFill); + else _nVifUnpack(idx, data, size, isFill); vif->tag.size = 0; vif->cmd = 0; }