GS: Pixel loop optimizations

This commit is contained in:
TellowKrinkle 2021-03-01 05:43:48 -06:00 committed by refractionpcsx2
parent 86a2d73931
commit 39c7f11b98
6 changed files with 76 additions and 56 deletions

View File

@ -429,11 +429,6 @@ GSLocalMemory::~GSLocalMemory()
} }
} }
GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
{
return GSOffset(m_psm[psm].info, bp, bw, psm);
}
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF) GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
{ {
uint32 fbp = FRAME.Block(); uint32 fbp = FRAME.Block();
@ -1178,6 +1173,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
int sx = (int)TRXPOS.DSAX; int sx = (int)TRXPOS.DSAX;
int ex = sx + (int)TRXREG.RRW; int ex = sx + (int)TRXREG.RRW;
GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM);
switch (BITBLTBUF.DPSM) switch (BITBLTBUF.DPSM)
{ {
case PSM_PSMCT32: case PSM_PSMCT32:
@ -1187,7 +1184,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = off.assertSizesMatch(swizzle32).pa(0, y);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x++, pd++) for (; len > 0 && x < ex; len--, x++, pd++)
@ -1211,7 +1208,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = off.assertSizesMatch(swizzle32).pa(0, y);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x++, pb += 3) for (; len > 0 && x < ex; len--, x++, pb += 3)
@ -1237,7 +1234,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = off.assertSizesMatch(swizzle16).pa(0, y);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x++, pw++) for (; len > 0 && x < ex; len--, x++, pw++)
@ -1258,7 +1255,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8).pa(0, y);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x++, pb++) for (; len > 0 && x < ex; len--, x++, pb++)
@ -1279,7 +1276,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4).pa(0, y);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x += 2, pb++) for (; len > 0 && x < ex; len--, x += 2, pb++)
@ -1301,7 +1298,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H).pa(0, y);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x++, pb++) for (; len > 0 && x < ex; len--, x++, pb++)
@ -1322,7 +1319,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL).pa(0, y);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x += 2, pb++) for (; len > 0 && x < ex; len--, x += 2, pb++)
@ -1344,7 +1341,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH).pa(0, y);
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x += 2, pb++) for (; len > 0 && x < ex; len--, x += 2, pb++)
@ -1387,6 +1384,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
int sx = (int)TRXPOS.SSAX; int sx = (int)TRXPOS.SSAX;
int ex = sx + (int)TRXREG.RRW; int ex = sx + (int)TRXREG.RRW;
GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM);
// printf("spsm=%d x=%d ex=%d y=%d len=%d\n", BITBLTBUF.SPSM, x, ex, y, len); // printf("spsm=%d x=%d ex=%d y=%d len=%d\n", BITBLTBUF.SPSM, x, ex, y, len);
switch (BITBLTBUF.SPSM) switch (BITBLTBUF.SPSM)
@ -1401,7 +1400,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while (len > 0) while (len > 0)
{ {
int* RESTRICT offset = psm->rowOffset[y & 7]; int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)]; uint32* RESTRICT ps = &m_vm32[off.assertSizesMatch(swizzle32).pa(0, y)];
for (; len > 0 && x < ex && (x & 7); len--, x++, pd++) for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
{ {
@ -1443,7 +1442,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while (len > 0) while (len > 0)
{ {
int* RESTRICT offset = psm->rowOffset[y & 7]; int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)]; uint32* RESTRICT ps = &m_vm32[off.assertSizesMatch(swizzle32).pa(0, y)];
for (; len > 0 && x < ex; len--, x++, pb += 3) for (; len > 0 && x < ex; len--, x++, pb += 3)
{ {
@ -1473,7 +1472,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while (len > 0) while (len > 0)
{ {
int* RESTRICT offset = psm->rowOffset[y & 7]; int* RESTRICT offset = psm->rowOffset[y & 7];
uint16* RESTRICT ps = &m_vm16[psm->info.pa(0, y, bp, bw)]; uint16* RESTRICT ps = &m_vm16[off.assertSizesMatch(swizzle16).pa(0, y)];
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4) for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4)
{ {
@ -1502,7 +1501,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while (len > 0) while (len > 0)
{ {
int* RESTRICT offset = psm->rowOffset[y & 7]; int* RESTRICT offset = psm->rowOffset[y & 7];
uint8* RESTRICT ps = &m_vm8[psm->info.pa(0, y, bp, bw)]; uint8* RESTRICT ps = &m_vm8[GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8).pa(0, y)];
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4) for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
{ {
@ -1530,7 +1529,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while (len > 0) while (len > 0)
{ {
uint32 addr = psm->info.pa(0, y, bp, bw); uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4).pa(0, y);
int* RESTRICT offset = psm->rowOffset[y & 7]; int* RESTRICT offset = psm->rowOffset[y & 7];
for (; len > 0 && x < ex; len--, x += 2, pb++) for (; len > 0 && x < ex; len--, x += 2, pb++)
@ -1552,7 +1551,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while (len > 0) while (len > 0)
{ {
int* RESTRICT offset = psm->rowOffset[y & 7]; int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)]; uint32* RESTRICT ps = &m_vm32[GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H).pa(0, y)];
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4) for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
{ {
@ -1581,7 +1580,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while (len > 0) while (len > 0)
{ {
int* offset = psm->rowOffset[y & 7]; int* offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)]; uint32* RESTRICT ps = &m_vm32[GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL).pa(0, y)];
for (; len > 0 && x < ex; len--, x += 2, pb++) for (; len > 0 && x < ex; len--, x += 2, pb++)
{ {
@ -1605,7 +1604,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while (len > 0) while (len > 0)
{ {
int* RESTRICT offset = psm->rowOffset[y & 7]; int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)]; uint32* RESTRICT ps = &m_vm32[GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH).pa(0, y)];
for (; len > 0 && x < ex; len--, x += 2, pb++) for (; len > 0 && x < ex; len--, x += 2, pb++)
{ {
@ -1633,7 +1632,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock32(src, read_dst, dstpitch); GSBlock::ReadBlock32(src, read_dst, dstpitch);
}); });
@ -1643,14 +1642,14 @@ void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, uint
{ {
if (TEXA.AEM) if (TEXA.AEM)
{ {
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA); GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
}); });
} }
else else
{ {
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA); GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
}); });
@ -1659,7 +1658,7 @@ void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, uint
void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
foreachBlock(off, this, r, dst, dstpitch, 16, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 16, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock16(src, read_dst, dstpitch); GSBlock::ReadBlock16(src, read_dst, dstpitch);
}); });
@ -1681,14 +1680,14 @@ void GSLocalMemory::ReadTexture16(const GSOffset& off, const GSVector4i& r, uint
{ {
if (TEXA.AEM) if (TEXA.AEM)
{ {
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA); GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
}); });
} }
else else
{ {
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA); GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
}); });
@ -1699,7 +1698,7 @@ void GSLocalMemory::ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle8), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
}); });
@ -1709,7 +1708,7 @@ void GSLocalMemory::ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8
{ {
const uint64* pal = m_clut; const uint64* pal = m_clut;
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle4), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
}); });
@ -1719,7 +1718,7 @@ void GSLocalMemory::ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
}); });
@ -1729,7 +1728,7 @@ void GSLocalMemory::ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uin
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
}); });
@ -1739,7 +1738,7 @@ void GSLocalMemory::ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uin
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
}); });
@ -1900,7 +1899,7 @@ void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8*
void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle8), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock8(src, read_dst, dstpitch); GSBlock::ReadBlock8(src, read_dst, dstpitch);
}); });
@ -1908,7 +1907,7 @@ void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint
void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle4), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock4P(src, read_dst, dstpitch); GSBlock::ReadBlock4P(src, read_dst, dstpitch);
}); });
@ -1916,7 +1915,7 @@ void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint
void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock8HP(src, read_dst, dstpitch); GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
}); });
@ -1924,7 +1923,7 @@ void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uin
void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch); GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
}); });
@ -1932,7 +1931,7 @@ void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, ui
void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src) foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch); GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
}); });

View File

@ -298,6 +298,10 @@ public:
{ {
pageLooperForRect(rect).loopPages(std::forward<Fn>(fn)); pageLooperForRect(rect).loopPages(std::forward<Fn>(fn));
} }
/// Use compile-time dimensions from `swz` as a performance optimization
/// Also asserts if your assumption was wrong
constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const;
}; };
class GSSwizzleInfo class GSSwizzleInfo
@ -372,6 +376,20 @@ constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32
{ {
} }
constexpr GSOffset GSOffset::assertSizesMatch(const GSSwizzleInfo& swz) const
{
GSOffset o = *this;
#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x;
MATCH(m_pageMask)
MATCH(m_blockMask)
MATCH(m_pageShiftX)
MATCH(m_pageShiftY)
MATCH(m_blockShiftX)
MATCH(m_blockShiftY)
#undef MATCH
return o;
}
class GSLocalMemory : public GSAlignedClass<32> class GSLocalMemory : public GSAlignedClass<32>
{ {
public: public:
@ -478,7 +496,10 @@ public:
GSLocalMemory(); GSLocalMemory();
virtual ~GSLocalMemory(); virtual ~GSLocalMemory();
GSOffset GetOffset(uint32 bp, uint32 bw, uint32 psm); GSOffset GetOffset(uint32 bp, uint32 bw, uint32 psm) const
{
return GSOffset(m_psm[psm].info, bp, bw, psm);
}
GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0); std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);

View File

@ -1650,10 +1650,14 @@ void GSState::Move()
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format) // TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
GSOffset spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM); int sbp = m_env.BITBLTBUF.SBP;
GSOffset dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM); int sbw = m_env.BITBLTBUF.SBW;
int dbp = m_env.BITBLTBUF.DBP;
int dbw = m_env.BITBLTBUF.DBW;
GSOffset spo = m_mem.GetOffset(sbp, sbw, m_env.BITBLTBUF.SPSM);
GSOffset dpo = m_mem.GetOffset(dbp, dbw, m_env.BITBLTBUF.DPSM);
auto copy = [&](auto&& pxCopyFn) auto copy = [&](const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
{ {
if (xinc > 0) if (xinc > 0)
{ {
@ -1691,14 +1695,14 @@ void GSState::Move()
{ {
if (spsm.trbpp == 32) if (spsm.trbpp == 32)
{ {
copy([&](uint32 doff, uint32 soff) copy(dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [&](uint32 doff, uint32 soff)
{ {
m_mem.m_vm32[doff] = m_mem.m_vm32[soff]; m_mem.m_vm32[doff] = m_mem.m_vm32[soff];
}); });
} }
else if (spsm.trbpp == 24) else if (spsm.trbpp == 24)
{ {
copy([&](uint32 doff, uint32 soff) copy(dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [&](uint32 doff, uint32 soff)
{ {
uint32& d = m_mem.m_vm32[doff]; uint32& d = m_mem.m_vm32[doff];
d = (d & 0xff000000) | (m_mem.m_vm32[soff] & 0x00ffffff); d = (d & 0xff000000) | (m_mem.m_vm32[soff] & 0x00ffffff);
@ -1706,7 +1710,7 @@ void GSState::Move()
} }
else // if(spsm.trbpp == 16) else // if(spsm.trbpp == 16)
{ {
copy([&](uint32 doff, uint32 soff) copy(dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [&](uint32 doff, uint32 soff)
{ {
m_mem.m_vm16[doff] = m_mem.m_vm16[soff]; m_mem.m_vm16[doff] = m_mem.m_vm16[soff];
}); });
@ -1714,21 +1718,21 @@ void GSState::Move()
} }
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8) else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
{ {
copy([&](uint32 doff, uint32 soff) copy(GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT8), [&](uint32 doff, uint32 soff)
{ {
m_mem.m_vm8[doff] = m_mem.m_vm8[soff]; m_mem.m_vm8[doff] = m_mem.m_vm8[soff];
}); });
} }
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4) else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
{ {
copy([&](uint32 doff, uint32 soff) copy(GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT4), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT4), [&](uint32 doff, uint32 soff)
{ {
m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff)); m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
}); });
} }
else else
{ {
copy([&](uint32 doff, uint32 soff) copy(dpo, spo, [&](uint32 doff, uint32 soff)
{ {
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff)); (m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
}); });

View File

@ -63,12 +63,12 @@ public:
{ {
} }
bool operator==(const GSVector2T& v) const constexpr bool operator==(const GSVector2T& v) const
{ {
return x == v.x && y == v.y; return x == v.x && y == v.y;
} }
bool operator!=(const GSVector2T& v) const constexpr bool operator!=(const GSVector2T& v) const
{ {
return x != v.x || y != v.y; return x != v.x || y != v.y;
} }

View File

@ -1827,7 +1827,7 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel32 // Based on WritePixel32
for (int y = r.top; y < r.bottom; y++) for (int y = r.top; y < r.bottom; y++)
{ {
GSOffset::PAHelper pa = off.paMulti(r.left, y); GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y);
for (; pa.x() < r.right; pa.incX()) for (; pa.x() < r.right; pa.incX())
{ {
@ -1840,7 +1840,7 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel24 // Based on WritePixel24
for (int y = r.top; y < r.bottom; y++) for (int y = r.top; y < r.bottom; y++)
{ {
GSOffset::PAHelper pa = off.paMulti(r.left, y); GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y);
for (; pa.x() < r.right; pa.incX()) for (; pa.x() < r.right; pa.incX())
{ {
@ -1855,7 +1855,7 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel16 // Based on WritePixel16
for(int y = r.top; y < r.bottom; y++) for(int y = r.top; y < r.bottom; y++)
{ {
GSOffset::PAHelper pa = off.paMulti(r.left, y); GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(r.left, y);
for(int x = r.left; x < r.right; x++) for(int x = r.left; x < r.right; x++)
{ {

View File

@ -21,10 +21,6 @@
#include "GSLocalMemory.h" #include "GSLocalMemory.h"
GSLocalMemory::psm_t GSLocalMemory::m_psm[64]; GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
{
abort();
}
void* vmalloc(size_t size, bool code) void* vmalloc(size_t size, bool code)
{ {