From dd2d4edffc09f11528544219f26548f6698a35ce Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Mon, 1 May 2023 08:12:43 +0100 Subject: [PATCH] VIF-JIT: Get rid of mem read for mask --- pcsx2/x86/newVif_Dynarec.cpp | 4 ++++ pcsx2/x86/newVif_UnpackSSE.cpp | 21 ++++++--------------- pcsx2/x86/newVif_UnpackSSE.h | 1 + 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index 0dc572bd55..e570fbc0fd 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -278,6 +278,10 @@ void VifUnpackSSE_Dynarec::CompileRoutine() // Value passed determines # of col regs we need to load SetMasks(isFill ? blockSize : cycleSize); + // Need a zero register for V2_32/V3 unpacks. + if ((upkNum >= 8 && upkNum <= 10) || upkNum == 4) + xXOR.PS(zeroReg, zeroReg); + while (vNum) { ShiftDisplacementWindow(dstIndirect, arg1reg); diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index 7fa93e4159..ea1b189bb0 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -24,14 +24,6 @@ #define xMOV64(regX, loc) xMOVUPS (regX, loc) #define xMOV128(regX, loc) xMOVUPS (regX, loc) -alignas(16) static const u32 SSEXYZWMask[4][4] = -{ - {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000}, - {0xffffffff, 0xffffffff, 0x00000000, 0xffffffff}, - {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff}, - {0x00000000, 0xffffffff, 0xffffffff, 0xffffffff} -}; - //alignas(__pagesize) static u8 nVifUpkExec[__pagesize*4]; static RecompiledCodeReserve* nVifUpkExec = NULL; @@ -46,6 +38,7 @@ VifUnpackSSE_Base::VifUnpackSSE_Base() , IsAligned(0) , dstIndirect(arg1reg) , srcIndirect(arg2reg) + , zeroReg(xmm2) , workReg(xmm1) , destReg(xmm0) { @@ -147,13 +140,13 @@ void VifUnpackSSE_Base::xUPK_V2_32() const xMOV128(workReg, ptr32[srcIndirect]); xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0 if (IsAligned) - xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2 + xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 } else { xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 if (IsAligned) - xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word - tested on ps2 + xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 } } @@ -187,7 +180,7 @@ void VifUnpackSSE_Base::xUPK_V3_32() const { xMOV128(destReg, ptr128[srcIndirect]); if (UnpkLoopIteration != IsAligned) - xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); + xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 } void VifUnpackSSE_Base::xUPK_V3_16() const @@ -201,16 +194,14 @@ void VifUnpackSSE_Base::xUPK_V3_16() const int result = (((UnpkLoopIteration / 4) + 1 + (4 - IsAligned)) & 0x3); if ((UnpkLoopIteration & 0x1) == 0 && result == 0) - { - xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); //zero last word on QW boundary if whole 32bit word is used - tested on ps2 - } + xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 } void VifUnpackSSE_Base::xUPK_V3_8() const { xPMOVXX8(destReg); if (UnpkLoopIteration != IsAligned) - xAND.PS(destReg, ptr128[SSEXYZWMask[0]]); + xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 } void VifUnpackSSE_Base::xUPK_V4_32() const diff --git a/pcsx2/x86/newVif_UnpackSSE.h b/pcsx2/x86/newVif_UnpackSSE.h index e18969384a..d20ad72de2 100644 --- a/pcsx2/x86/newVif_UnpackSSE.h +++ b/pcsx2/x86/newVif_UnpackSSE.h @@ -39,6 +39,7 @@ public: protected: xAddressVoid dstIndirect; xAddressVoid srcIndirect; + xRegisterSSE zeroReg; xRegisterSSE workReg; xRegisterSSE destReg;