VIF-JIT: Skip src reads on fill writes using regs

Clean up some of the code
2023-05-01 07:37:47 +01:00 · 2023-05-01 07:37:47 +01:00 · fd2960c9cb
parent c2907ea58f
commit fd2960c9cb
2 changed files with 13 additions and 19 deletions
--- a/pcsx2/x86/newVif_Dynarec.cpp
+++ b/pcsx2/x86/newVif_Dynarec.cpp
@ -95,6 +95,7 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
 		xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
 		MSKPATH3_LOG("Moving row");
 	}
+
 	if (m3 && doMask)
 	{
 		MSKPATH3_LOG("Merging Cols");
@ -111,7 +112,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
 {
 	pxAssertDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");

-	int cc = std::min(vCL, 3);
+	const int cc = std::min(vCL, 3);
 	u32 m0 = (vB.mask >> (cc * 8)) & 0xff; //The actual mask example 0xE4 (protect, col, row, clear)
 	u32 m3 = ((m0 & 0xaa) >> 1) & ~m0; //all the upper bits (cols shifted right) cancelling out any write protects 0x10
 	u32 m2 = (m0 & 0x55) & (~m0 >> 1); // all the lower bits (rows)cancelling out any write protects 0x04
@ -125,6 +126,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
 	{
 		mVUmergeRegs(regX, xmmRow, m2);
 	}
+
 	if (doMask && m3) // Merge MaskCol
 	{
 		mVUmergeRegs(regX, xRegisterSSE(xmmCol0.Id + cc), m3);
@ -166,6 +168,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
 			}
 		}
 	}
+
 	if (doMask && m4) // Merge Write Protect
 		mVUsaveReg(regX, ptr32[dstIndirect], m4 ^ 0xf, false);
 	else
@ -277,14 +280,11 @@ void VifUnpackSSE_Dynarec::CompileRoutine()

 	while (vNum)
 	{
-
-
 		ShiftDisplacementWindow(dstIndirect, arg1reg);

 		if (UnpkNoOfIterations == 0)
 			ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source.

-
 		if (vCL < cycleSize)
 		{
 			ModUnpack(upkNum, false);
@ -302,9 +302,14 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
 		}
 		else if (isFill)
 		{
-			//Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source.
-			//DevCon.WriteLn("filling mode!");
-			xUnpack(upkNum);
+			// Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source.
+			// If all vectors read a row or column or are masked, we don't need to process the source at all.
+			const int cc = std::min(vCL, 3);
+			u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
+			m0 = (m0 >> 1) | m0;
+
+			if ((m0 & 0x55) != 0x55)
+				xUnpack(upkNum);
 			xMovDest();

 			dstIndirect += 16;
@ -322,6 +327,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()

 	if (doMode >= 2)
 		writeBackRow();
+
 	xRET();
 }

--- a/pcsx2/x86/newVif_UnpackSSE.cpp
+++ b/pcsx2/x86/newVif_UnpackSSE.cpp
@ -77,7 +77,6 @@ void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const

 void VifUnpackSSE_Base::xUPK_S_32() const
 {
-
 	switch (UnpkLoopIteration)
 	{
 		case 0:
@ -98,7 +97,6 @@ void VifUnpackSSE_Base::xUPK_S_32() const

 void VifUnpackSSE_Base::xUPK_S_16() const
 {
-
 	switch (UnpkLoopIteration)
 	{
 		case 0:
@ -119,7 +117,6 @@ void VifUnpackSSE_Base::xUPK_S_16() const

 void VifUnpackSSE_Base::xUPK_S_8() const
 {
-
 	switch (UnpkLoopIteration)
 	{
 		case 0:
@ -145,7 +142,6 @@ void VifUnpackSSE_Base::xUPK_S_8() const

 void VifUnpackSSE_Base::xUPK_V2_32() const
 {
-
 	if (UnpkLoopIteration == 0)
 	{
 		xMOV128(workReg, ptr32[srcIndirect]);
@ -163,7 +159,6 @@ void VifUnpackSSE_Base::xUPK_V2_32() const

 void VifUnpackSSE_Base::xUPK_V2_16() const
 {
-
 	if (UnpkLoopIteration == 0)
 	{
 		xPMOVXX16(workReg);
@ -177,7 +172,6 @@ void VifUnpackSSE_Base::xUPK_V2_16() const

 void VifUnpackSSE_Base::xUPK_V2_8() const
 {
-
 	if (UnpkLoopIteration == 0)
 	{
 		xPMOVXX8(workReg);
@ -191,7 +185,6 @@ void VifUnpackSSE_Base::xUPK_V2_8() const

 void VifUnpackSSE_Base::xUPK_V3_32() const
 {
-
 	xMOV128(destReg, ptr128[srcIndirect]);
 	if (UnpkLoopIteration != IsAligned)
 		xAND.PS(destReg, ptr128[SSEXYZWMask[0]]);
@ -199,7 +192,6 @@ void VifUnpackSSE_Base::xUPK_V3_32() const

 void VifUnpackSSE_Base::xUPK_V3_16() const
 {
-
 	xPMOVXX16(destReg);

 	//With V3-16, it takes the first vector from the next position as the W vector
@ -216,7 +208,6 @@ void VifUnpackSSE_Base::xUPK_V3_16() const

 void VifUnpackSSE_Base::xUPK_V3_8() const
 {
-
 	xPMOVXX8(destReg);
 	if (UnpkLoopIteration != IsAligned)
 		xAND.PS(destReg, ptr128[SSEXYZWMask[0]]);
@ -239,7 +230,6 @@ void VifUnpackSSE_Base::xUPK_V4_8() const

 void VifUnpackSSE_Base::xUPK_V4_5() const
 {
-
 	xMOV16      (workReg, ptr32[srcIndirect]);
 	xPSHUF.D    (workReg, workReg, _v0);
 	xPSLL.D     (workReg, 3);           // ABG|R5.000
@ -278,7 +268,6 @@ void VifUnpackSSE_Base::xUnpack(int upknum) const
 		case 14: xUPK_V4_8();  break;
 		case 15: xUPK_V4_5();  break;

-
 		case 3:
 		case 7:
 		case 11:
@ -313,7 +302,6 @@ void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const
 // ecx = dest, edx = src
 static void nVifGen(int usn, int mask, int curCycle)
 {
-
 	int usnpart  = usn * 2 * 16;
 	int maskpart = mask * 16;