VIF: Improve IR setup for skipped unpack inputs/writes

This commit is contained in:
refractionpcsx2 2023-05-03 00:25:15 +01:00
parent fe4788ae3f
commit 341f377e6d
3 changed files with 105 additions and 24 deletions

View File

@ -90,15 +90,15 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
u32 m2 = (m0 & 0x55555555) & (~m0 >> 1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row
if ((m2 && doMask) || doMode)
if ((doMask && m2) || doMode)
{
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
MSKPATH3_LOG("Moving row");
}
if (m3 && doMask)
if (doMask && m3)
{
MSKPATH3_LOG("Merging Cols");
VIF_LOG("Merging Cols");
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
@ -122,10 +122,6 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
makeMergeMask(m3);
makeMergeMask(m4);
// Everything is write protected, don't touch it, saveReg can't handle a mask of 0.
if (doMask && m4 == 0xf)
return;
if (doMask && m2) // Merge MaskRow
{
mVUmergeRegs(regX, xmmRow, m2);
@ -184,8 +180,7 @@ void VifUnpackSSE_Dynarec::writeBackRow() const
const int idx = v.idx;
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
DevCon.WriteLn("nVif: writing back row reg! [doMode = %d]", doMode);
// ToDo: Do we need to write back to vifregs.rX too!? :/
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
}
static void ShiftDisplacementWindow(xAddressVoid& addr, const xRegisterLong& modReg)
@ -263,6 +258,25 @@ void VifUnpackSSE_Dynarec::ModUnpack(int upknum, bool PostOp)
}
}
void VifUnpackSSE_Dynarec::ProcessMasks()
{
skipProcessing = false;
inputMasked = false;
if (!doMask)
return;
const int cc = std::min(vCL, 3);
const u32 full_mask = (vB.mask >> (cc * 8)) & 0xff;
const u32 rowcol_mask = ((full_mask >> 1) | full_mask) & 0x55; // Rows or Cols being written instead of data, or protected.
// Every channel is write protected for this cycle, no need to process anything.
skipProcessing = full_mask == 0xff;
// All channels are masked, no reason to process anything here.
inputMasked = rowcol_mask == 0x55;
}
void VifUnpackSSE_Dynarec::CompileRoutine()
{
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
@ -275,7 +289,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
uint vNum = vB.num ? vB.num : 256;
doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature.
UnpkNoOfIterations = 0;
MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
pxAssume(vCL == 0);
@ -293,6 +307,9 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
if (UnpkNoOfIterations == 0)
ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source.
// Determine if reads/processing can be skipped.
ProcessMasks();
if (vCL < cycleSize)
{
ModUnpack(upkNum, false);
@ -300,7 +317,6 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
xMovDest();
ModUnpack(upkNum, true);
dstIndirect += 16;
srcIndirect += vift;
@ -311,13 +327,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
else if (isFill)
{
// Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source.
// If all vectors read a row or column or are masked, we don't need to process the source at all.
const int cc = std::min(vCL, 3);
u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
m0 = (m0 >> 1) | m0;
if ((m0 & 0x55) != 0x55)
xUnpack(upkNum);
xUnpack(upkNum);
xMovDest();
dstIndirect += 16;

View File

@ -46,8 +46,13 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
void VifUnpackSSE_Base::xMovDest() const
{
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
else { doMaskWrite(destReg); }
if (!IsWriteProtectedOp())
{
if (IsUnmaskedOp())
xMOVAPS(ptr[dstIndirect], destReg);
else
doMaskWrite(destReg);
}
}
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const
@ -70,10 +75,15 @@ void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const
void VifUnpackSSE_Base::xUPK_S_32() const
{
if (UnpkLoopIteration == 0)
xMOV128(workReg, ptr32[srcIndirect]);
if (IsInputMasked())
return;
switch (UnpkLoopIteration)
{
case 0:
xMOV128(workReg, ptr32[srcIndirect]);
xPSHUF.D(destReg, workReg, _v0);
break;
case 1:
@ -90,10 +100,15 @@ void VifUnpackSSE_Base::xUPK_S_32() const
void VifUnpackSSE_Base::xUPK_S_16() const
{
if (UnpkLoopIteration == 0)
xPMOVXX16(workReg);
if (IsInputMasked())
return;
switch (UnpkLoopIteration)
{
case 0:
xPMOVXX16(workReg);
xPSHUF.D(destReg, workReg, _v0);
break;
case 1:
@ -110,10 +125,15 @@ void VifUnpackSSE_Base::xUPK_S_16() const
void VifUnpackSSE_Base::xUPK_S_8() const
{
if (UnpkLoopIteration == 0)
xPMOVXX8(workReg);
if (IsInputMasked())
return;
switch (UnpkLoopIteration)
{
case 0:
xPMOVXX8(workReg);
xPSHUF.D(destReg, workReg, _v0);
break;
case 1:
@ -138,12 +158,19 @@ void VifUnpackSSE_Base::xUPK_V2_32() const
if (UnpkLoopIteration == 0)
{
xMOV128(workReg, ptr32[srcIndirect]);
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
if (IsAligned)
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
}
else
{
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
if (IsAligned)
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
@ -155,10 +182,17 @@ void VifUnpackSSE_Base::xUPK_V2_16() const
if (UnpkLoopIteration == 0)
{
xPMOVXX16(workReg);
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
}
else
{
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
}
}
@ -168,16 +202,26 @@ void VifUnpackSSE_Base::xUPK_V2_8() const
if (UnpkLoopIteration == 0)
{
xPMOVXX8(workReg);
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
}
else
{
if (IsInputMasked())
return;
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
}
}
void VifUnpackSSE_Base::xUPK_V3_32() const
{
if (IsInputMasked())
return;
xMOV128(destReg, ptr128[srcIndirect]);
if (UnpkLoopIteration != IsAligned)
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
@ -185,6 +229,9 @@ void VifUnpackSSE_Base::xUPK_V3_32() const
void VifUnpackSSE_Base::xUPK_V3_16() const
{
if (IsInputMasked())
return;
xPMOVXX16(destReg);
//With V3-16, it takes the first vector from the next position as the W vector
@ -199,6 +246,9 @@ void VifUnpackSSE_Base::xUPK_V3_16() const
void VifUnpackSSE_Base::xUPK_V3_8() const
{
if (IsInputMasked())
return;
xPMOVXX8(destReg);
if (UnpkLoopIteration != IsAligned)
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
@ -206,21 +256,33 @@ void VifUnpackSSE_Base::xUPK_V3_8() const
void VifUnpackSSE_Base::xUPK_V4_32() const
{
if (IsInputMasked())
return;
xMOV128(destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V4_16() const
{
if (IsInputMasked())
return;
xPMOVXX16(destReg);
}
void VifUnpackSSE_Base::xUPK_V4_8() const
{
if (IsInputMasked())
return;
xPMOVXX8(destReg);
}
void VifUnpackSSE_Base::xUPK_V4_5() const
{
if (IsInputMasked())
return;
xMOV16 (workReg, ptr32[srcIndirect]);
xPSHUF.D (workReg, workReg, _v0);
xPSLL.D (workReg, 3); // ABG|R5.000

View File

@ -48,6 +48,8 @@ public:
virtual ~VifUnpackSSE_Base() = default;
virtual void xUnpack(int upktype) const;
virtual bool IsWriteProtectedOp() const = 0;
virtual bool IsInputMasked() const = 0;
virtual bool IsUnmaskedOp() const = 0;
virtual void xMovDest() const;
@ -90,6 +92,8 @@ public:
VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
virtual ~VifUnpackSSE_Simple() = default;
virtual bool IsWriteProtectedOp() const { return false; }
virtual bool IsInputMasked() const { return false; }
virtual bool IsUnmaskedOp() const { return !doMask; }
protected:
@ -105,7 +109,9 @@ class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base
public:
bool isFill;
int doMode; // two bit value representing... something!
int doMode; // two bit value representing difference mode
bool skipProcessing;
bool inputMasked;
protected:
const nVifStruct& v; // vif0 or vif1
@ -125,9 +131,12 @@ public:
virtual ~VifUnpackSSE_Dynarec() = default;
virtual bool IsWriteProtectedOp() const { return skipProcessing; }
virtual bool IsInputMasked() const { return inputMasked; }
virtual bool IsUnmaskedOp() const { return !doMode && !doMask; }
void ModUnpack(int upknum, bool PostOp);
void ProcessMasks();
void CompileRoutine();