VIF: Improve IR setup for skipped unpack inputs/writes

This commit is contained in:
refractionpcsx2 2023-05-03 00:25:15 +01:00
parent fe4788ae3f
commit 341f377e6d
3 changed files with 105 additions and 24 deletions

View File

@ -90,15 +90,15 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge) u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
u32 m2 = (m0 & 0x55555555) & (~m0 >> 1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row u32 m2 = (m0 & 0x55555555) & (~m0 >> 1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row
if ((m2 && doMask) || doMode) if ((doMask && m2) || doMode)
{ {
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
MSKPATH3_LOG("Moving row"); MSKPATH3_LOG("Moving row");
} }
if (m3 && doMask) if (doMask && m3)
{ {
MSKPATH3_LOG("Merging Cols"); VIF_LOG("Merging Cols");
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]); xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1); if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2); if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
@ -122,10 +122,6 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
makeMergeMask(m3); makeMergeMask(m3);
makeMergeMask(m4); makeMergeMask(m4);
// Everything is write protected, don't touch it, saveReg can't handle a mask of 0.
if (doMask && m4 == 0xf)
return;
if (doMask && m2) // Merge MaskRow if (doMask && m2) // Merge MaskRow
{ {
mVUmergeRegs(regX, xmmRow, m2); mVUmergeRegs(regX, xmmRow, m2);
@ -184,8 +180,7 @@ void VifUnpackSSE_Dynarec::writeBackRow() const
const int idx = v.idx; const int idx = v.idx;
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow); xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
DevCon.WriteLn("nVif: writing back row reg! [doMode = %d]", doMode); VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
// ToDo: Do we need to write back to vifregs.rX too!? :/
} }
static void ShiftDisplacementWindow(xAddressVoid& addr, const xRegisterLong& modReg) static void ShiftDisplacementWindow(xAddressVoid& addr, const xRegisterLong& modReg)
@ -263,6 +258,25 @@ void VifUnpackSSE_Dynarec::ModUnpack(int upknum, bool PostOp)
} }
} }
void VifUnpackSSE_Dynarec::ProcessMasks()
{
skipProcessing = false;
inputMasked = false;
if (!doMask)
return;
const int cc = std::min(vCL, 3);
const u32 full_mask = (vB.mask >> (cc * 8)) & 0xff;
const u32 rowcol_mask = ((full_mask >> 1) | full_mask) & 0x55; // Rows or Cols being written instead of data, or protected.
// Every channel is write protected for this cycle, no need to process anything.
skipProcessing = full_mask == 0xff;
// All channels are masked, no reason to process anything here.
inputMasked = rowcol_mask == 0x55;
}
void VifUnpackSSE_Dynarec::CompileRoutine() void VifUnpackSSE_Dynarec::CompileRoutine()
{ {
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2) const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
@ -275,7 +289,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
uint vNum = vB.num ? vB.num : 256; uint vNum = vB.num ? vB.num : 256;
doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature. doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature.
UnpkNoOfIterations = 0; UnpkNoOfIterations = 0;
MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum); VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
pxAssume(vCL == 0); pxAssume(vCL == 0);
@ -293,6 +307,9 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
if (UnpkNoOfIterations == 0) if (UnpkNoOfIterations == 0)
ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source. ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source.
// Determine if reads/processing can be skipped.
ProcessMasks();
if (vCL < cycleSize) if (vCL < cycleSize)
{ {
ModUnpack(upkNum, false); ModUnpack(upkNum, false);
@ -300,7 +317,6 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
xMovDest(); xMovDest();
ModUnpack(upkNum, true); ModUnpack(upkNum, true);
dstIndirect += 16; dstIndirect += 16;
srcIndirect += vift; srcIndirect += vift;
@ -311,13 +327,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
else if (isFill) else if (isFill)
{ {
// Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source. // Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source.
// If all vectors read a row or column or are masked, we don't need to process the source at all. xUnpack(upkNum);
const int cc = std::min(vCL, 3);
u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
m0 = (m0 >> 1) | m0;
if ((m0 & 0x55) != 0x55)
xUnpack(upkNum);
xMovDest(); xMovDest();
dstIndirect += 16; dstIndirect += 16;

View File

@ -46,8 +46,13 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
void VifUnpackSSE_Base::xMovDest() const void VifUnpackSSE_Base::xMovDest() const
{ {
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); } if (!IsWriteProtectedOp())
else { doMaskWrite(destReg); } {
if (IsUnmaskedOp())
xMOVAPS(ptr[dstIndirect], destReg);
else
doMaskWrite(destReg);
}
} }
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const
@ -70,10 +75,15 @@ void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const
void VifUnpackSSE_Base::xUPK_S_32() const void VifUnpackSSE_Base::xUPK_S_32() const
{ {
if (UnpkLoopIteration == 0)
xMOV128(workReg, ptr32[srcIndirect]);
if (IsInputMasked())
return;
switch (UnpkLoopIteration) switch (UnpkLoopIteration)
{ {
case 0: case 0:
xMOV128(workReg, ptr32[srcIndirect]);
xPSHUF.D(destReg, workReg, _v0); xPSHUF.D(destReg, workReg, _v0);
break; break;
case 1: case 1:
@ -90,10 +100,15 @@ void VifUnpackSSE_Base::xUPK_S_32() const
void VifUnpackSSE_Base::xUPK_S_16() const void VifUnpackSSE_Base::xUPK_S_16() const
{ {
if (UnpkLoopIteration == 0)
xPMOVXX16(workReg);
if (IsInputMasked())
return;
switch (UnpkLoopIteration) switch (UnpkLoopIteration)
{ {
case 0: case 0:
xPMOVXX16(workReg);
xPSHUF.D(destReg, workReg, _v0); xPSHUF.D(destReg, workReg, _v0);
break; break;
case 1: case 1:
@ -110,10 +125,15 @@ void VifUnpackSSE_Base::xUPK_S_16() const
void VifUnpackSSE_Base::xUPK_S_8() const void VifUnpackSSE_Base::xUPK_S_8() const
{ {
if (UnpkLoopIteration == 0)
xPMOVXX8(workReg);
if (IsInputMasked())
return;
switch (UnpkLoopIteration) switch (UnpkLoopIteration)
{ {
case 0: case 0:
xPMOVXX8(workReg);
xPSHUF.D(destReg, workReg, _v0); xPSHUF.D(destReg, workReg, _v0);
break; break;
case 1: case 1:
@ -138,12 +158,19 @@ void VifUnpackSSE_Base::xUPK_V2_32() const
if (UnpkLoopIteration == 0) if (UnpkLoopIteration == 0)
{ {
xMOV128(workReg, ptr32[srcIndirect]); xMOV128(workReg, ptr32[srcIndirect]);
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0 xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
if (IsAligned) if (IsAligned)
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
} }
else else
{ {
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
if (IsAligned) if (IsAligned)
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
@ -155,10 +182,17 @@ void VifUnpackSSE_Base::xUPK_V2_16() const
if (UnpkLoopIteration == 0) if (UnpkLoopIteration == 0)
{ {
xPMOVXX16(workReg); xPMOVXX16(workReg);
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0 xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
} }
else else
{ {
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
} }
} }
@ -168,16 +202,26 @@ void VifUnpackSSE_Base::xUPK_V2_8() const
if (UnpkLoopIteration == 0) if (UnpkLoopIteration == 0)
{ {
xPMOVXX8(workReg); xPMOVXX8(workReg);
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0 xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
} }
else else
{ {
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2 xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
} }
} }
void VifUnpackSSE_Base::xUPK_V3_32() const void VifUnpackSSE_Base::xUPK_V3_32() const
{ {
if (IsInputMasked())
return;
xMOV128(destReg, ptr128[srcIndirect]); xMOV128(destReg, ptr128[srcIndirect]);
if (UnpkLoopIteration != IsAligned) if (UnpkLoopIteration != IsAligned)
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
@ -185,6 +229,9 @@ void VifUnpackSSE_Base::xUPK_V3_32() const
void VifUnpackSSE_Base::xUPK_V3_16() const void VifUnpackSSE_Base::xUPK_V3_16() const
{ {
if (IsInputMasked())
return;
xPMOVXX16(destReg); xPMOVXX16(destReg);
//With V3-16, it takes the first vector from the next position as the W vector //With V3-16, it takes the first vector from the next position as the W vector
@ -199,6 +246,9 @@ void VifUnpackSSE_Base::xUPK_V3_16() const
void VifUnpackSSE_Base::xUPK_V3_8() const void VifUnpackSSE_Base::xUPK_V3_8() const
{ {
if (IsInputMasked())
return;
xPMOVXX8(destReg); xPMOVXX8(destReg);
if (UnpkLoopIteration != IsAligned) if (UnpkLoopIteration != IsAligned)
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2 xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
@ -206,21 +256,33 @@ void VifUnpackSSE_Base::xUPK_V3_8() const
void VifUnpackSSE_Base::xUPK_V4_32() const void VifUnpackSSE_Base::xUPK_V4_32() const
{ {
if (IsInputMasked())
return;
xMOV128(destReg, ptr32[srcIndirect]); xMOV128(destReg, ptr32[srcIndirect]);
} }
void VifUnpackSSE_Base::xUPK_V4_16() const void VifUnpackSSE_Base::xUPK_V4_16() const
{ {
if (IsInputMasked())
return;
xPMOVXX16(destReg); xPMOVXX16(destReg);
} }
void VifUnpackSSE_Base::xUPK_V4_8() const void VifUnpackSSE_Base::xUPK_V4_8() const
{ {
if (IsInputMasked())
return;
xPMOVXX8(destReg); xPMOVXX8(destReg);
} }
void VifUnpackSSE_Base::xUPK_V4_5() const void VifUnpackSSE_Base::xUPK_V4_5() const
{ {
if (IsInputMasked())
return;
xMOV16 (workReg, ptr32[srcIndirect]); xMOV16 (workReg, ptr32[srcIndirect]);
xPSHUF.D (workReg, workReg, _v0); xPSHUF.D (workReg, workReg, _v0);
xPSLL.D (workReg, 3); // ABG|R5.000 xPSLL.D (workReg, 3); // ABG|R5.000

View File

@ -48,6 +48,8 @@ public:
virtual ~VifUnpackSSE_Base() = default; virtual ~VifUnpackSSE_Base() = default;
virtual void xUnpack(int upktype) const; virtual void xUnpack(int upktype) const;
virtual bool IsWriteProtectedOp() const = 0;
virtual bool IsInputMasked() const = 0;
virtual bool IsUnmaskedOp() const = 0; virtual bool IsUnmaskedOp() const = 0;
virtual void xMovDest() const; virtual void xMovDest() const;
@ -90,6 +92,8 @@ public:
VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_); VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
virtual ~VifUnpackSSE_Simple() = default; virtual ~VifUnpackSSE_Simple() = default;
virtual bool IsWriteProtectedOp() const { return false; }
virtual bool IsInputMasked() const { return false; }
virtual bool IsUnmaskedOp() const { return !doMask; } virtual bool IsUnmaskedOp() const { return !doMask; }
protected: protected:
@ -105,7 +109,9 @@ class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base
public: public:
bool isFill; bool isFill;
int doMode; // two bit value representing... something! int doMode; // two bit value representing difference mode
bool skipProcessing;
bool inputMasked;
protected: protected:
const nVifStruct& v; // vif0 or vif1 const nVifStruct& v; // vif0 or vif1
@ -125,9 +131,12 @@ public:
virtual ~VifUnpackSSE_Dynarec() = default; virtual ~VifUnpackSSE_Dynarec() = default;
virtual bool IsWriteProtectedOp() const { return skipProcessing; }
virtual bool IsInputMasked() const { return inputMasked; }
virtual bool IsUnmaskedOp() const { return !doMode && !doMask; } virtual bool IsUnmaskedOp() const { return !doMode && !doMask; }
void ModUnpack(int upknum, bool PostOp); void ModUnpack(int upknum, bool PostOp);
void ProcessMasks();
void CompileRoutine(); void CompileRoutine();