mirror of https://github.com/PCSX2/pcsx2.git
GS: Pixel loop optimizations
This commit is contained in:
parent
86a2d73931
commit
39c7f11b98
|
@ -429,11 +429,6 @@ GSLocalMemory::~GSLocalMemory()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
|
||||||
{
|
|
||||||
return GSOffset(m_psm[psm].info, bp, bw, psm);
|
|
||||||
}
|
|
||||||
|
|
||||||
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
||||||
{
|
{
|
||||||
uint32 fbp = FRAME.Block();
|
uint32 fbp = FRAME.Block();
|
||||||
|
@ -1178,6 +1173,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
int sx = (int)TRXPOS.DSAX;
|
int sx = (int)TRXPOS.DSAX;
|
||||||
int ex = sx + (int)TRXREG.RRW;
|
int ex = sx + (int)TRXREG.RRW;
|
||||||
|
|
||||||
|
GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM);
|
||||||
|
|
||||||
switch (BITBLTBUF.DPSM)
|
switch (BITBLTBUF.DPSM)
|
||||||
{
|
{
|
||||||
case PSM_PSMCT32:
|
case PSM_PSMCT32:
|
||||||
|
@ -1187,7 +1184,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = off.assertSizesMatch(swizzle32).pa(0, y);
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x++, pd++)
|
for (; len > 0 && x < ex; len--, x++, pd++)
|
||||||
|
@ -1211,7 +1208,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = off.assertSizesMatch(swizzle32).pa(0, y);
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x++, pb += 3)
|
for (; len > 0 && x < ex; len--, x++, pb += 3)
|
||||||
|
@ -1237,7 +1234,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = off.assertSizesMatch(swizzle16).pa(0, y);
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x++, pw++)
|
for (; len > 0 && x < ex; len--, x++, pw++)
|
||||||
|
@ -1258,7 +1255,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8).pa(0, y);
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x++, pb++)
|
for (; len > 0 && x < ex; len--, x++, pb++)
|
||||||
|
@ -1279,7 +1276,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4).pa(0, y);
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
||||||
|
@ -1301,7 +1298,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H).pa(0, y);
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x++, pb++)
|
for (; len > 0 && x < ex; len--, x++, pb++)
|
||||||
|
@ -1322,7 +1319,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL).pa(0, y);
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
||||||
|
@ -1344,7 +1341,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH).pa(0, y);
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
||||||
|
@ -1387,6 +1384,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
int sx = (int)TRXPOS.SSAX;
|
int sx = (int)TRXPOS.SSAX;
|
||||||
int ex = sx + (int)TRXREG.RRW;
|
int ex = sx + (int)TRXREG.RRW;
|
||||||
|
|
||||||
|
GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM);
|
||||||
|
|
||||||
// printf("spsm=%d x=%d ex=%d y=%d len=%d\n", BITBLTBUF.SPSM, x, ex, y, len);
|
// printf("spsm=%d x=%d ex=%d y=%d len=%d\n", BITBLTBUF.SPSM, x, ex, y, len);
|
||||||
|
|
||||||
switch (BITBLTBUF.SPSM)
|
switch (BITBLTBUF.SPSM)
|
||||||
|
@ -1401,7 +1400,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||||
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)];
|
uint32* RESTRICT ps = &m_vm32[off.assertSizesMatch(swizzle32).pa(0, y)];
|
||||||
|
|
||||||
for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
|
for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
|
||||||
{
|
{
|
||||||
|
@ -1443,7 +1442,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||||
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)];
|
uint32* RESTRICT ps = &m_vm32[off.assertSizesMatch(swizzle32).pa(0, y)];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x++, pb += 3)
|
for (; len > 0 && x < ex; len--, x++, pb += 3)
|
||||||
{
|
{
|
||||||
|
@ -1473,7 +1472,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||||
uint16* RESTRICT ps = &m_vm16[psm->info.pa(0, y, bp, bw)];
|
uint16* RESTRICT ps = &m_vm16[off.assertSizesMatch(swizzle16).pa(0, y)];
|
||||||
|
|
||||||
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4)
|
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4)
|
||||||
{
|
{
|
||||||
|
@ -1502,7 +1501,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||||
uint8* RESTRICT ps = &m_vm8[psm->info.pa(0, y, bp, bw)];
|
uint8* RESTRICT ps = &m_vm8[GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8).pa(0, y)];
|
||||||
|
|
||||||
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
|
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
|
||||||
{
|
{
|
||||||
|
@ -1530,7 +1529,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
uint32 addr = psm->info.pa(0, y, bp, bw);
|
uint32 addr = GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4).pa(0, y);
|
||||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
||||||
|
@ -1552,7 +1551,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||||
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)];
|
uint32* RESTRICT ps = &m_vm32[GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H).pa(0, y)];
|
||||||
|
|
||||||
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
|
for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
|
||||||
{
|
{
|
||||||
|
@ -1581,7 +1580,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
int* offset = psm->rowOffset[y & 7];
|
int* offset = psm->rowOffset[y & 7];
|
||||||
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)];
|
uint32* RESTRICT ps = &m_vm32[GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL).pa(0, y)];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
||||||
{
|
{
|
||||||
|
@ -1605,7 +1604,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||||
uint32* RESTRICT ps = &m_vm32[psm->info.pa(0, y, bp, bw)];
|
uint32* RESTRICT ps = &m_vm32[GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH).pa(0, y)];
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
for (; len > 0 && x < ex; len--, x += 2, pb++)
|
||||||
{
|
{
|
||||||
|
@ -1633,7 +1632,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock32(src, read_dst, dstpitch);
|
GSBlock::ReadBlock32(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
@ -1643,14 +1642,14 @@ void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, uint
|
||||||
{
|
{
|
||||||
if (TEXA.AEM)
|
if (TEXA.AEM)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
|
||||||
});
|
});
|
||||||
|
@ -1659,7 +1658,7 @@ void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, uint
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 16, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 16, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock16(src, read_dst, dstpitch);
|
GSBlock::ReadBlock16(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
@ -1681,14 +1680,14 @@ void GSLocalMemory::ReadTexture16(const GSOffset& off, const GSVector4i& r, uint
|
||||||
{
|
{
|
||||||
if (TEXA.AEM)
|
if (TEXA.AEM)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
|
||||||
});
|
});
|
||||||
|
@ -1699,7 +1698,7 @@ void GSLocalMemory::ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const uint32* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle8), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
|
@ -1709,7 +1708,7 @@ void GSLocalMemory::ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8
|
||||||
{
|
{
|
||||||
const uint64* pal = m_clut;
|
const uint64* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle4), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
|
@ -1719,7 +1718,7 @@ void GSLocalMemory::ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const uint32* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
|
@ -1729,7 +1728,7 @@ void GSLocalMemory::ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uin
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const uint32* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
|
@ -1739,7 +1738,7 @@ void GSLocalMemory::ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uin
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const uint32* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
|
@ -1900,7 +1899,7 @@ void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8*
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle8), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock8(src, read_dst, dstpitch);
|
GSBlock::ReadBlock8(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
@ -1908,7 +1907,7 @@ void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle4), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4P(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4P(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
@ -1916,7 +1915,7 @@ void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
@ -1924,7 +1923,7 @@ void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uin
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
@ -1932,7 +1931,7 @@ void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, ui
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
|
|
@ -298,6 +298,10 @@ public:
|
||||||
{
|
{
|
||||||
pageLooperForRect(rect).loopPages(std::forward<Fn>(fn));
|
pageLooperForRect(rect).loopPages(std::forward<Fn>(fn));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Use compile-time dimensions from `swz` as a performance optimization
|
||||||
|
/// Also asserts if your assumption was wrong
|
||||||
|
constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSSwizzleInfo
|
class GSSwizzleInfo
|
||||||
|
@ -372,6 +376,20 @@ constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr GSOffset GSOffset::assertSizesMatch(const GSSwizzleInfo& swz) const
|
||||||
|
{
|
||||||
|
GSOffset o = *this;
|
||||||
|
#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x;
|
||||||
|
MATCH(m_pageMask)
|
||||||
|
MATCH(m_blockMask)
|
||||||
|
MATCH(m_pageShiftX)
|
||||||
|
MATCH(m_pageShiftY)
|
||||||
|
MATCH(m_blockShiftX)
|
||||||
|
MATCH(m_blockShiftY)
|
||||||
|
#undef MATCH
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
class GSLocalMemory : public GSAlignedClass<32>
|
class GSLocalMemory : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -478,7 +496,10 @@ public:
|
||||||
GSLocalMemory();
|
GSLocalMemory();
|
||||||
virtual ~GSLocalMemory();
|
virtual ~GSLocalMemory();
|
||||||
|
|
||||||
GSOffset GetOffset(uint32 bp, uint32 bw, uint32 psm);
|
GSOffset GetOffset(uint32 bp, uint32 bw, uint32 psm) const
|
||||||
|
{
|
||||||
|
return GSOffset(m_psm[psm].info, bp, bw, psm);
|
||||||
|
}
|
||||||
GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
||||||
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
||||||
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
|
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
|
||||||
|
|
|
@ -1650,10 +1650,14 @@ void GSState::Move()
|
||||||
|
|
||||||
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
|
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
|
||||||
|
|
||||||
GSOffset spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
|
int sbp = m_env.BITBLTBUF.SBP;
|
||||||
GSOffset dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
|
int sbw = m_env.BITBLTBUF.SBW;
|
||||||
|
int dbp = m_env.BITBLTBUF.DBP;
|
||||||
|
int dbw = m_env.BITBLTBUF.DBW;
|
||||||
|
GSOffset spo = m_mem.GetOffset(sbp, sbw, m_env.BITBLTBUF.SPSM);
|
||||||
|
GSOffset dpo = m_mem.GetOffset(dbp, dbw, m_env.BITBLTBUF.DPSM);
|
||||||
|
|
||||||
auto copy = [&](auto&& pxCopyFn)
|
auto copy = [&](const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
|
||||||
{
|
{
|
||||||
if (xinc > 0)
|
if (xinc > 0)
|
||||||
{
|
{
|
||||||
|
@ -1691,14 +1695,14 @@ void GSState::Move()
|
||||||
{
|
{
|
||||||
if (spsm.trbpp == 32)
|
if (spsm.trbpp == 32)
|
||||||
{
|
{
|
||||||
copy([&](uint32 doff, uint32 soff)
|
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
m_mem.m_vm32[doff] = m_mem.m_vm32[soff];
|
m_mem.m_vm32[doff] = m_mem.m_vm32[soff];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else if (spsm.trbpp == 24)
|
else if (spsm.trbpp == 24)
|
||||||
{
|
{
|
||||||
copy([&](uint32 doff, uint32 soff)
|
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
uint32& d = m_mem.m_vm32[doff];
|
uint32& d = m_mem.m_vm32[doff];
|
||||||
d = (d & 0xff000000) | (m_mem.m_vm32[soff] & 0x00ffffff);
|
d = (d & 0xff000000) | (m_mem.m_vm32[soff] & 0x00ffffff);
|
||||||
|
@ -1706,7 +1710,7 @@ void GSState::Move()
|
||||||
}
|
}
|
||||||
else // if(spsm.trbpp == 16)
|
else // if(spsm.trbpp == 16)
|
||||||
{
|
{
|
||||||
copy([&](uint32 doff, uint32 soff)
|
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
m_mem.m_vm16[doff] = m_mem.m_vm16[soff];
|
m_mem.m_vm16[doff] = m_mem.m_vm16[soff];
|
||||||
});
|
});
|
||||||
|
@ -1714,21 +1718,21 @@ void GSState::Move()
|
||||||
}
|
}
|
||||||
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
||||||
{
|
{
|
||||||
copy([&](uint32 doff, uint32 soff)
|
copy(GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT8), [&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
m_mem.m_vm8[doff] = m_mem.m_vm8[soff];
|
m_mem.m_vm8[doff] = m_mem.m_vm8[soff];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
||||||
{
|
{
|
||||||
copy([&](uint32 doff, uint32 soff)
|
copy(GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT4), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT4), [&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
|
m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
copy([&](uint32 doff, uint32 soff)
|
copy(dpo, spo, [&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
|
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
|
||||||
});
|
});
|
||||||
|
|
|
@ -63,12 +63,12 @@ public:
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator==(const GSVector2T& v) const
|
constexpr bool operator==(const GSVector2T& v) const
|
||||||
{
|
{
|
||||||
return x == v.x && y == v.y;
|
return x == v.x && y == v.y;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator!=(const GSVector2T& v) const
|
constexpr bool operator!=(const GSVector2T& v) const
|
||||||
{
|
{
|
||||||
return x != v.x || y != v.y;
|
return x != v.x || y != v.y;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1827,7 +1827,7 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel32
|
// Based on WritePixel32
|
||||||
for (int y = r.top; y < r.bottom; y++)
|
for (int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper pa = off.paMulti(r.left, y);
|
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y);
|
||||||
|
|
||||||
for (; pa.x() < r.right; pa.incX())
|
for (; pa.x() < r.right; pa.incX())
|
||||||
{
|
{
|
||||||
|
@ -1840,7 +1840,7 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel24
|
// Based on WritePixel24
|
||||||
for (int y = r.top; y < r.bottom; y++)
|
for (int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper pa = off.paMulti(r.left, y);
|
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y);
|
||||||
|
|
||||||
for (; pa.x() < r.right; pa.incX())
|
for (; pa.x() < r.right; pa.incX())
|
||||||
{
|
{
|
||||||
|
@ -1855,7 +1855,7 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel16
|
// Based on WritePixel16
|
||||||
for(int y = r.top; y < r.bottom; y++)
|
for(int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper pa = off.paMulti(r.left, y);
|
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(r.left, y);
|
||||||
|
|
||||||
for(int x = r.left; x < r.right; x++)
|
for(int x = r.left; x < r.right; x++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -21,10 +21,6 @@
|
||||||
#include "GSLocalMemory.h"
|
#include "GSLocalMemory.h"
|
||||||
|
|
||||||
GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
|
GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
|
||||||
GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
|
||||||
{
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
void* vmalloc(size_t size, bool code)
|
void* vmalloc(size_t size, bool code)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue