mirror of https://github.com/PCSX2/pcsx2.git
VIF: Improve IR setup for skipped unpack inputs/writes
This commit is contained in:
parent
fe4788ae3f
commit
341f377e6d
|
@ -90,15 +90,15 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
||||||
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
|
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
|
||||||
u32 m2 = (m0 & 0x55555555) & (~m0 >> 1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row
|
u32 m2 = (m0 & 0x55555555) & (~m0 >> 1); // 0x1000100 & 0xFE7EFF7F = 0x00000100 Just the row
|
||||||
|
|
||||||
if ((m2 && doMask) || doMode)
|
if ((doMask && m2) || doMode)
|
||||||
{
|
{
|
||||||
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
|
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
|
||||||
MSKPATH3_LOG("Moving row");
|
MSKPATH3_LOG("Moving row");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m3 && doMask)
|
if (doMask && m3)
|
||||||
{
|
{
|
||||||
MSKPATH3_LOG("Merging Cols");
|
VIF_LOG("Merging Cols");
|
||||||
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
|
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
|
||||||
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
||||||
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
||||||
|
@ -122,10 +122,6 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
|
||||||
makeMergeMask(m3);
|
makeMergeMask(m3);
|
||||||
makeMergeMask(m4);
|
makeMergeMask(m4);
|
||||||
|
|
||||||
// Everything is write protected, don't touch it, saveReg can't handle a mask of 0.
|
|
||||||
if (doMask && m4 == 0xf)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (doMask && m2) // Merge MaskRow
|
if (doMask && m2) // Merge MaskRow
|
||||||
{
|
{
|
||||||
mVUmergeRegs(regX, xmmRow, m2);
|
mVUmergeRegs(regX, xmmRow, m2);
|
||||||
|
@ -184,8 +180,7 @@ void VifUnpackSSE_Dynarec::writeBackRow() const
|
||||||
const int idx = v.idx;
|
const int idx = v.idx;
|
||||||
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
|
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
|
||||||
|
|
||||||
DevCon.WriteLn("nVif: writing back row reg! [doMode = %d]", doMode);
|
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
|
||||||
// ToDo: Do we need to write back to vifregs.rX too!? :/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ShiftDisplacementWindow(xAddressVoid& addr, const xRegisterLong& modReg)
|
static void ShiftDisplacementWindow(xAddressVoid& addr, const xRegisterLong& modReg)
|
||||||
|
@ -263,6 +258,25 @@ void VifUnpackSSE_Dynarec::ModUnpack(int upknum, bool PostOp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VifUnpackSSE_Dynarec::ProcessMasks()
|
||||||
|
{
|
||||||
|
skipProcessing = false;
|
||||||
|
inputMasked = false;
|
||||||
|
|
||||||
|
if (!doMask)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const int cc = std::min(vCL, 3);
|
||||||
|
const u32 full_mask = (vB.mask >> (cc * 8)) & 0xff;
|
||||||
|
const u32 rowcol_mask = ((full_mask >> 1) | full_mask) & 0x55; // Rows or Cols being written instead of data, or protected.
|
||||||
|
|
||||||
|
// Every channel is write protected for this cycle, no need to process anything.
|
||||||
|
skipProcessing = full_mask == 0xff;
|
||||||
|
|
||||||
|
// All channels are masked, no reason to process anything here.
|
||||||
|
inputMasked = rowcol_mask == 0x55;
|
||||||
|
}
|
||||||
|
|
||||||
void VifUnpackSSE_Dynarec::CompileRoutine()
|
void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
{
|
{
|
||||||
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
|
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
|
||||||
|
@ -275,7 +289,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
uint vNum = vB.num ? vB.num : 256;
|
uint vNum = vB.num ? vB.num : 256;
|
||||||
doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature.
|
doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature.
|
||||||
UnpkNoOfIterations = 0;
|
UnpkNoOfIterations = 0;
|
||||||
MSKPATH3_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
||||||
|
|
||||||
pxAssume(vCL == 0);
|
pxAssume(vCL == 0);
|
||||||
|
|
||||||
|
@ -293,6 +307,9 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
if (UnpkNoOfIterations == 0)
|
if (UnpkNoOfIterations == 0)
|
||||||
ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source.
|
ShiftDisplacementWindow(srcIndirect, arg2reg); //Don't need to do this otherwise as we arent reading the source.
|
||||||
|
|
||||||
|
// Determine if reads/processing can be skipped.
|
||||||
|
ProcessMasks();
|
||||||
|
|
||||||
if (vCL < cycleSize)
|
if (vCL < cycleSize)
|
||||||
{
|
{
|
||||||
ModUnpack(upkNum, false);
|
ModUnpack(upkNum, false);
|
||||||
|
@ -300,7 +317,6 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
xMovDest();
|
xMovDest();
|
||||||
ModUnpack(upkNum, true);
|
ModUnpack(upkNum, true);
|
||||||
|
|
||||||
|
|
||||||
dstIndirect += 16;
|
dstIndirect += 16;
|
||||||
srcIndirect += vift;
|
srcIndirect += vift;
|
||||||
|
|
||||||
|
@ -311,13 +327,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
else if (isFill)
|
else if (isFill)
|
||||||
{
|
{
|
||||||
// Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source.
|
// Filling doesn't need anything fancy, it's pretty much a normal write, just doesnt increment the source.
|
||||||
// If all vectors read a row or column or are masked, we don't need to process the source at all.
|
xUnpack(upkNum);
|
||||||
const int cc = std::min(vCL, 3);
|
|
||||||
u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
|
|
||||||
m0 = (m0 >> 1) | m0;
|
|
||||||
|
|
||||||
if ((m0 & 0x55) != 0x55)
|
|
||||||
xUnpack(upkNum);
|
|
||||||
xMovDest();
|
xMovDest();
|
||||||
|
|
||||||
dstIndirect += 16;
|
dstIndirect += 16;
|
||||||
|
|
|
@ -46,8 +46,13 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xMovDest() const
|
void VifUnpackSSE_Base::xMovDest() const
|
||||||
{
|
{
|
||||||
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
|
if (!IsWriteProtectedOp())
|
||||||
else { doMaskWrite(destReg); }
|
{
|
||||||
|
if (IsUnmaskedOp())
|
||||||
|
xMOVAPS(ptr[dstIndirect], destReg);
|
||||||
|
else
|
||||||
|
doMaskWrite(destReg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const
|
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const
|
||||||
|
@ -70,10 +75,15 @@ void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_S_32() const
|
void VifUnpackSSE_Base::xUPK_S_32() const
|
||||||
{
|
{
|
||||||
|
if (UnpkLoopIteration == 0)
|
||||||
|
xMOV128(workReg, ptr32[srcIndirect]);
|
||||||
|
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
switch (UnpkLoopIteration)
|
switch (UnpkLoopIteration)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
xMOV128(workReg, ptr32[srcIndirect]);
|
|
||||||
xPSHUF.D(destReg, workReg, _v0);
|
xPSHUF.D(destReg, workReg, _v0);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -90,10 +100,15 @@ void VifUnpackSSE_Base::xUPK_S_32() const
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_S_16() const
|
void VifUnpackSSE_Base::xUPK_S_16() const
|
||||||
{
|
{
|
||||||
|
if (UnpkLoopIteration == 0)
|
||||||
|
xPMOVXX16(workReg);
|
||||||
|
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
switch (UnpkLoopIteration)
|
switch (UnpkLoopIteration)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
xPMOVXX16(workReg);
|
|
||||||
xPSHUF.D(destReg, workReg, _v0);
|
xPSHUF.D(destReg, workReg, _v0);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -110,10 +125,15 @@ void VifUnpackSSE_Base::xUPK_S_16() const
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_S_8() const
|
void VifUnpackSSE_Base::xUPK_S_8() const
|
||||||
{
|
{
|
||||||
|
if (UnpkLoopIteration == 0)
|
||||||
|
xPMOVXX8(workReg);
|
||||||
|
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
switch (UnpkLoopIteration)
|
switch (UnpkLoopIteration)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
xPMOVXX8(workReg);
|
|
||||||
xPSHUF.D(destReg, workReg, _v0);
|
xPSHUF.D(destReg, workReg, _v0);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -138,12 +158,19 @@ void VifUnpackSSE_Base::xUPK_V2_32() const
|
||||||
if (UnpkLoopIteration == 0)
|
if (UnpkLoopIteration == 0)
|
||||||
{
|
{
|
||||||
xMOV128(workReg, ptr32[srcIndirect]);
|
xMOV128(workReg, ptr32[srcIndirect]);
|
||||||
|
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
|
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
|
||||||
if (IsAligned)
|
if (IsAligned)
|
||||||
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
|
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
|
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
|
||||||
if (IsAligned)
|
if (IsAligned)
|
||||||
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
|
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
|
||||||
|
@ -155,10 +182,17 @@ void VifUnpackSSE_Base::xUPK_V2_16() const
|
||||||
if (UnpkLoopIteration == 0)
|
if (UnpkLoopIteration == 0)
|
||||||
{
|
{
|
||||||
xPMOVXX16(workReg);
|
xPMOVXX16(workReg);
|
||||||
|
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
|
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
|
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -168,16 +202,26 @@ void VifUnpackSSE_Base::xUPK_V2_8() const
|
||||||
if (UnpkLoopIteration == 0)
|
if (UnpkLoopIteration == 0)
|
||||||
{
|
{
|
||||||
xPMOVXX8(workReg);
|
xPMOVXX8(workReg);
|
||||||
|
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
|
xPSHUF.D(destReg, workReg, 0x44); //v1v0v1v0
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
|
xPSHUF.D(destReg, workReg, 0xEE); //v3v2v3v2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_V3_32() const
|
void VifUnpackSSE_Base::xUPK_V3_32() const
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xMOV128(destReg, ptr128[srcIndirect]);
|
xMOV128(destReg, ptr128[srcIndirect]);
|
||||||
if (UnpkLoopIteration != IsAligned)
|
if (UnpkLoopIteration != IsAligned)
|
||||||
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
|
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
|
||||||
|
@ -185,6 +229,9 @@ void VifUnpackSSE_Base::xUPK_V3_32() const
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_V3_16() const
|
void VifUnpackSSE_Base::xUPK_V3_16() const
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPMOVXX16(destReg);
|
xPMOVXX16(destReg);
|
||||||
|
|
||||||
//With V3-16, it takes the first vector from the next position as the W vector
|
//With V3-16, it takes the first vector from the next position as the W vector
|
||||||
|
@ -199,6 +246,9 @@ void VifUnpackSSE_Base::xUPK_V3_16() const
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_V3_8() const
|
void VifUnpackSSE_Base::xUPK_V3_8() const
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPMOVXX8(destReg);
|
xPMOVXX8(destReg);
|
||||||
if (UnpkLoopIteration != IsAligned)
|
if (UnpkLoopIteration != IsAligned)
|
||||||
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
|
xBLEND.PS(destReg, zeroReg, 0x8); //zero last word - tested on ps2
|
||||||
|
@ -206,21 +256,33 @@ void VifUnpackSSE_Base::xUPK_V3_8() const
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_V4_32() const
|
void VifUnpackSSE_Base::xUPK_V4_32() const
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xMOV128(destReg, ptr32[srcIndirect]);
|
xMOV128(destReg, ptr32[srcIndirect]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_V4_16() const
|
void VifUnpackSSE_Base::xUPK_V4_16() const
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPMOVXX16(destReg);
|
xPMOVXX16(destReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_V4_8() const
|
void VifUnpackSSE_Base::xUPK_V4_8() const
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xPMOVXX8(destReg);
|
xPMOVXX8(destReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VifUnpackSSE_Base::xUPK_V4_5() const
|
void VifUnpackSSE_Base::xUPK_V4_5() const
|
||||||
{
|
{
|
||||||
|
if (IsInputMasked())
|
||||||
|
return;
|
||||||
|
|
||||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||||
xPSHUF.D (workReg, workReg, _v0);
|
xPSHUF.D (workReg, workReg, _v0);
|
||||||
xPSLL.D (workReg, 3); // ABG|R5.000
|
xPSLL.D (workReg, 3); // ABG|R5.000
|
||||||
|
|
|
@ -48,6 +48,8 @@ public:
|
||||||
virtual ~VifUnpackSSE_Base() = default;
|
virtual ~VifUnpackSSE_Base() = default;
|
||||||
|
|
||||||
virtual void xUnpack(int upktype) const;
|
virtual void xUnpack(int upktype) const;
|
||||||
|
virtual bool IsWriteProtectedOp() const = 0;
|
||||||
|
virtual bool IsInputMasked() const = 0;
|
||||||
virtual bool IsUnmaskedOp() const = 0;
|
virtual bool IsUnmaskedOp() const = 0;
|
||||||
virtual void xMovDest() const;
|
virtual void xMovDest() const;
|
||||||
|
|
||||||
|
@ -90,6 +92,8 @@ public:
|
||||||
VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
|
VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
|
||||||
virtual ~VifUnpackSSE_Simple() = default;
|
virtual ~VifUnpackSSE_Simple() = default;
|
||||||
|
|
||||||
|
virtual bool IsWriteProtectedOp() const { return false; }
|
||||||
|
virtual bool IsInputMasked() const { return false; }
|
||||||
virtual bool IsUnmaskedOp() const { return !doMask; }
|
virtual bool IsUnmaskedOp() const { return !doMask; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -105,7 +109,9 @@ class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base
|
||||||
|
|
||||||
public:
|
public:
|
||||||
bool isFill;
|
bool isFill;
|
||||||
int doMode; // two bit value representing... something!
|
int doMode; // two bit value representing difference mode
|
||||||
|
bool skipProcessing;
|
||||||
|
bool inputMasked;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const nVifStruct& v; // vif0 or vif1
|
const nVifStruct& v; // vif0 or vif1
|
||||||
|
@ -125,9 +131,12 @@ public:
|
||||||
|
|
||||||
virtual ~VifUnpackSSE_Dynarec() = default;
|
virtual ~VifUnpackSSE_Dynarec() = default;
|
||||||
|
|
||||||
|
virtual bool IsWriteProtectedOp() const { return skipProcessing; }
|
||||||
|
virtual bool IsInputMasked() const { return inputMasked; }
|
||||||
virtual bool IsUnmaskedOp() const { return !doMode && !doMask; }
|
virtual bool IsUnmaskedOp() const { return !doMode && !doMask; }
|
||||||
|
|
||||||
void ModUnpack(int upknum, bool PostOp);
|
void ModUnpack(int upknum, bool PostOp);
|
||||||
|
void ProcessMasks();
|
||||||
void CompileRoutine();
|
void CompileRoutine();
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue