mirror of https://github.com/PCSX2/pcsx2.git
Vif: Don't reserve regs for unused columns
We can reuse those for other purposes
This commit is contained in:
parent
6334082e6f
commit
29a98f317e
|
@ -51,18 +51,18 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
||||||
|
|
||||||
if ((doMask && m2) || doMode)
|
if ((doMask && m2) || doMode)
|
||||||
{
|
{
|
||||||
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
|
xMOVAPS(rowReg, ptr128[&vif.MaskRow]);
|
||||||
MSKPATH3_LOG("Moving row");
|
MSKPATH3_LOG("Moving row");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doMask && m3)
|
if (doMask && m3)
|
||||||
{
|
{
|
||||||
VIF_LOG("Merging Cols");
|
VIF_LOG("Merging Cols");
|
||||||
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
|
xMOVAPS(colRegs[0], ptr128[&vif.MaskCol]);
|
||||||
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(colRegs[1], colRegs[0], _v1);
|
||||||
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(colRegs[2], colRegs[0], _v2);
|
||||||
if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
|
if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(colRegs[3], colRegs[0], _v3);
|
||||||
if ((cS >= 1) && (m3 & 0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
|
if ((cS >= 1) && (m3 & 0x000000ff)) xPSHUF.D(colRegs[0], colRegs[0], _v0);
|
||||||
}
|
}
|
||||||
//if (doMask||doMode) loadRowCol((nVifStruct&)v);
|
//if (doMask||doMode) loadRowCol((nVifStruct&)v);
|
||||||
}
|
}
|
||||||
|
@ -83,12 +83,12 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
|
||||||
|
|
||||||
if (doMask && m2) // Merge MaskRow
|
if (doMask && m2) // Merge MaskRow
|
||||||
{
|
{
|
||||||
mVUmergeRegs(regX, xmmRow, m2);
|
mVUmergeRegs(regX, rowReg, m2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doMask && m3) // Merge MaskCol
|
if (doMask && m3) // Merge MaskCol
|
||||||
{
|
{
|
||||||
mVUmergeRegs(regX, xRegisterSSE(xmmCol0.Id + cc), m3);
|
mVUmergeRegs(regX, colRegs[cc], m3);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doMode)
|
if (doMode)
|
||||||
|
@ -102,28 +102,28 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
|
||||||
{
|
{
|
||||||
if (doMode == 3)
|
if (doMode == 3)
|
||||||
{
|
{
|
||||||
mVUmergeRegs(xmmRow, regX, m5);
|
mVUmergeRegs(rowReg, regX, m5);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xPXOR(xmmTemp, xmmTemp);
|
xPXOR(tmpReg, tmpReg);
|
||||||
mVUmergeRegs(xmmTemp, xmmRow, m5);
|
mVUmergeRegs(tmpReg, rowReg, m5);
|
||||||
xPADD.D(regX, xmmTemp);
|
xPADD.D(regX, tmpReg);
|
||||||
if (doMode == 2)
|
if (doMode == 2)
|
||||||
mVUmergeRegs(xmmRow, regX, m5);
|
mVUmergeRegs(rowReg, regX, m5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (doMode == 3)
|
if (doMode == 3)
|
||||||
{
|
{
|
||||||
xMOVAPS(xmmRow, regX);
|
xMOVAPS(rowReg, regX);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xPADD.D(regX, xmmRow);
|
xPADD.D(regX, rowReg);
|
||||||
if (doMode == 2)
|
if (doMode == 2)
|
||||||
xMOVAPS(xmmRow, regX);
|
xMOVAPS(rowReg, regX);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -137,7 +137,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
|
||||||
void VifUnpackSSE_Dynarec::writeBackRow() const
|
void VifUnpackSSE_Dynarec::writeBackRow() const
|
||||||
{
|
{
|
||||||
const int idx = v.idx;
|
const int idx = v.idx;
|
||||||
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
|
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], rowReg);
|
||||||
|
|
||||||
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
|
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
|
||||||
}
|
}
|
||||||
|
@ -262,39 +262,69 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
const u32 m0 = vB.mask;
|
const u32 m0 = vB.mask;
|
||||||
const u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0;
|
const u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0;
|
||||||
const u32 m2 = (m0 & 0x55555555) & (~m0 >> 1);
|
const u32 m2 = (m0 & 0x55555555) & (~m0 >> 1);
|
||||||
const bool needXmmRow = ((doMask && m2) || doMode);
|
|
||||||
// see doMaskWrite()
|
// see doMaskWrite()
|
||||||
const u32 m4 = (m0 & ~((m3 << 1) | m2)) & 0x55555555;
|
const u32 m4 = (m0 & ~((m3 << 1) | m2)) & 0x55555555;
|
||||||
const u32 m5 = ~(m2 | m3 | m4) & 0x0f0f0f0f;
|
const u32 m5 = ~(m2 | m3 | m4) & 0x0f0f0f0f;
|
||||||
const bool needXmmTemp = doMode && (doMode != 3) && doMask && m5 != 0x0f0f0f0f;
|
|
||||||
|
|
||||||
// Backup non-volatile registers if needed
|
int regsUsed = 2;
|
||||||
if (needXmmZero || needXmmRow || needXmmTemp)
|
// Allocate column registers
|
||||||
|
if (doMask && m3)
|
||||||
{
|
{
|
||||||
int toBackup = 0;
|
colRegs[0] = xRegisterSSE(regsUsed++);
|
||||||
if (needXmmRow)
|
|
||||||
toBackup++;
|
|
||||||
if (needXmmTemp)
|
|
||||||
toBackup++;
|
|
||||||
if (needXmmZero)
|
|
||||||
toBackup++;
|
|
||||||
|
|
||||||
xSUB(rsp, 8 + 16 * toBackup);
|
const int cS = isFill ? blockSize : cycleSize;
|
||||||
|
if ((cS >= 2) && (m3 & 0x0000ff00))
|
||||||
int idx = 0;
|
colRegs[1] = xRegisterSSE(regsUsed++);
|
||||||
if (needXmmRow)
|
if ((cS >= 3) && (m3 & 0x00ff0000))
|
||||||
{
|
colRegs[2] = xRegisterSSE(regsUsed++);
|
||||||
xMOVAPS(ptr128[rsp], xmmRow); // xmm6
|
if ((cS >= 4) && (m3 & 0xff000000))
|
||||||
idx++;
|
colRegs[3] = xRegisterSSE(regsUsed++);
|
||||||
}
|
// Column 0 already accounted for
|
||||||
if (needXmmTemp)
|
|
||||||
{
|
|
||||||
xMOVAPS(ptr128[rsp + 16 * idx], xmmTemp); // xmm7
|
|
||||||
idx++;
|
|
||||||
}
|
|
||||||
if (needXmmZero)
|
|
||||||
xMOVAPS(ptr128[rsp + 16 * idx], zeroReg); // xmm15
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::array<xRegisterSSE, 3> nonVolatileRegs;
|
||||||
|
|
||||||
|
// Allocate row register
|
||||||
|
if ((doMask && m2) || doMode)
|
||||||
|
{
|
||||||
|
if (regsUsed - 6 >= 0)
|
||||||
|
nonVolatileRegs[regsUsed - 6] = rowReg;
|
||||||
|
rowReg = xRegisterSSE(regsUsed++);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate temp register
|
||||||
|
if (doMode && (doMode != 3) &&
|
||||||
|
doMask && m5 != 0x0f0f0f0f)
|
||||||
|
{
|
||||||
|
if (regsUsed - 6 >= 0)
|
||||||
|
nonVolatileRegs[regsUsed - 6] = tmpReg;
|
||||||
|
tmpReg = xRegisterSSE(regsUsed++);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate zero register
|
||||||
|
if (needXmmZero)
|
||||||
|
{
|
||||||
|
if (regsUsed - 6 >= 0)
|
||||||
|
nonVolatileRegs[regsUsed - 6] = zeroReg;
|
||||||
|
zeroReg = xRegisterSSE(regsUsed++);
|
||||||
|
}
|
||||||
|
|
||||||
|
regsUsed -= 6;
|
||||||
|
// Backup non-volatile registers if needed
|
||||||
|
if (regsUsed > 0)
|
||||||
|
{
|
||||||
|
xSUB(rsp, 8 + 16 * regsUsed);
|
||||||
|
for (int i = 0; i < regsUsed; i++)
|
||||||
|
xMOVAPS(ptr128[rsp + 16 * i], nonVolatileRegs[i]);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
colRegs[0] = xmm2;
|
||||||
|
colRegs[1] = xmm3;
|
||||||
|
colRegs[2] = xmm4;
|
||||||
|
colRegs[3] = xmm5;
|
||||||
|
rowReg = xmm6;
|
||||||
|
tmpReg = xmm7;
|
||||||
|
// zeroReg already set;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Value passed determines # of col regs we need to load
|
// Value passed determines # of col regs we need to load
|
||||||
|
@ -352,25 +382,11 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
// Restore non-volatile registers
|
// Restore non-volatile registers
|
||||||
if (needXmmZero || needXmmRow || needXmmTemp)
|
if (regsUsed > 0)
|
||||||
{
|
{
|
||||||
int toRestore = 0;
|
for (int i = 0; i < regsUsed; i++)
|
||||||
if (needXmmRow)
|
xMOVAPS(nonVolatileRegs[i], ptr128[rsp + 16 * i]);
|
||||||
{
|
xADD(rsp, 8 + 16 * regsUsed);
|
||||||
xMOVAPS(xmmRow, ptr128[rsp]); // xmm6
|
|
||||||
toRestore++;
|
|
||||||
}
|
|
||||||
if (needXmmTemp)
|
|
||||||
{
|
|
||||||
xMOVAPS(xmmTemp, ptr128[rsp + 16 * toRestore]); // xmm7
|
|
||||||
toRestore++;
|
|
||||||
}
|
|
||||||
if (needXmmZero)
|
|
||||||
{
|
|
||||||
xMOVAPS(zeroReg, ptr128[rsp + 16 * toRestore]); // xmm15
|
|
||||||
toRestore++;
|
|
||||||
}
|
|
||||||
xADD(rsp, 8 + 16 * toRestore);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -102,6 +102,10 @@ protected:
|
||||||
const nVifBlock& vB; // some pre-collected data from VifStruct
|
const nVifBlock& vB; // some pre-collected data from VifStruct
|
||||||
int vCL; // internal copy of vif->cl
|
int vCL; // internal copy of vif->cl
|
||||||
|
|
||||||
|
std::array<xRegisterSSE, 4> colRegs;
|
||||||
|
xRegisterSSE rowReg;
|
||||||
|
xRegisterSSE tmpReg;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_);
|
VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_);
|
||||||
VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor
|
VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor
|
||||||
|
|
|
@ -17,9 +17,3 @@ extern void mVUsaveReg(const xRegisterSSE& reg, xAddressVoid ptr, int xyzw, boo
|
||||||
#define _v1 0x55
|
#define _v1 0x55
|
||||||
#define _v2 0xaa
|
#define _v2 0xaa
|
||||||
#define _v3 0xff
|
#define _v3 0xff
|
||||||
#define xmmCol0 xmm2
|
|
||||||
#define xmmCol1 xmm3
|
|
||||||
#define xmmCol2 xmm4
|
|
||||||
#define xmmCol3 xmm5
|
|
||||||
#define xmmRow xmm6
|
|
||||||
#define xmmTemp xmm7
|
|
||||||
|
|
Loading…
Reference in New Issue