GS: Switch back to row+column for pixel lookups

Code for the full calculation was way too complicated to run for every pixel in a loop
This commit is contained in:
TellowKrinkle 2021-03-02 20:51:27 -06:00 committed by refractionpcsx2
parent b2fb6c7804
commit b901c6af71
8 changed files with 393 additions and 288 deletions

View File

@ -197,13 +197,14 @@ template <int n>
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{ {
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32); GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV); GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
int x = TEXCLUT.COU << 4;
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4); uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
for (int i = 0; i < n; pa.incX(), i++) for (int i = 0; i < n; x++, i++)
{ {
uint32 c = m_mem->m_vm32[pa.value()]; uint32 c = m_mem->m_vm32[pa.value(x)];
clut[i] = (uint16)(c & 0xffff); clut[i] = (uint16)(c & 0xffff);
clut[i + 256] = (uint16)(c >> 16); clut[i + 256] = (uint16)(c >> 16);
@ -214,13 +215,14 @@ template <int n>
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{ {
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16); GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV); GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
int x = TEXCLUT.COU << 4;
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4); uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for (int i = 0; i < n; pa.incX(), i++) for (int i = 0; i < n; x++, i++)
{ {
clut[i] = m_mem->m_vm16[pa.value()]; clut[i] = m_mem->m_vm16[pa.value(x)];
} }
} }
@ -228,13 +230,14 @@ template <int n>
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{ {
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S); GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV); GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
int x = TEXCLUT.COU << 4;
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4); uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for (int i = 0; i < n; pa.incX(), i++) for (int i = 0; i < n; x++, i++)
{ {
clut[i] = m_mem->m_vm16[pa.value()]; clut[i] = m_mem->m_vm16[pa.value(x)];
} }
} }

View File

@ -41,14 +41,14 @@ static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector
// //
GSPageOffsetTable<32, 64> GSLocalMemory::pageOffset32; constexpr GSSwizzleInfo GSLocalMemory::swizzle32;
GSPageOffsetTable<32, 64> GSLocalMemory::pageOffset32Z; constexpr GSSwizzleInfo GSLocalMemory::swizzle32Z;
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16; constexpr GSSwizzleInfo GSLocalMemory::swizzle16;
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16S; constexpr GSSwizzleInfo GSLocalMemory::swizzle16S;
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16Z; constexpr GSSwizzleInfo GSLocalMemory::swizzle16Z;
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16SZ; constexpr GSSwizzleInfo GSLocalMemory::swizzle16SZ;
GSPageOffsetTable<64, 128> GSLocalMemory::pageOffset8; constexpr GSSwizzleInfo GSLocalMemory::swizzle8;
GSPageOffsetTable<128, 128> GSLocalMemory::pageOffset4; constexpr GSSwizzleInfo GSLocalMemory::swizzle4;
// //
@ -56,21 +56,6 @@ GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
// //
template <int PageHeight, int PageWidth, int ColHeight, int ColWidth, typename Col>
static void setupPageOffsetTable(GSPageOffsetTable<PageHeight, PageWidth>& table, const GSBlockSwizzleTable& block, Col (&col)[ColHeight][ColWidth])
{
int blockSize = ColHeight * ColWidth;
for (int y = 0; y < PageHeight; y++)
{
for (int x = 0; x < 256; x++)
{
int colOff = col[y % ColHeight][x % ColWidth];
int blockOff = block.lookup(x / ColWidth, y / ColHeight);
table.value[y].value[x] = blockOff * blockSize + colOff;
}
}
}
GSLocalMemory::GSLocalMemory() GSLocalMemory::GSLocalMemory()
: m_clut(this) : m_clut(this)
{ {
@ -101,15 +86,6 @@ GSLocalMemory::GSLocalMemory()
memset(m_vm8, 0, m_vmsize); memset(m_vm8, 0, m_vmsize);
setupPageOffsetTable(pageOffset32, blockTable32, columnTable32);
setupPageOffsetTable(pageOffset32Z, blockTable32Z, columnTable32);
setupPageOffsetTable(pageOffset16, blockTable16, columnTable16);
setupPageOffsetTable(pageOffset16S, blockTable16S, columnTable16);
setupPageOffsetTable(pageOffset16Z, blockTable16Z, columnTable16);
setupPageOffsetTable(pageOffset16SZ, blockTable16SZ, columnTable16);
setupPageOffsetTable(pageOffset8, blockTable8, columnTable8);
setupPageOffsetTable(pageOffset4, blockTable4, columnTable4);
for (size_t i = 0; i < countof(m_psm); i++) for (size_t i = 0; i < countof(m_psm); i++)
{ {
m_psm[i].info = GSLocalMemory::swizzle32; m_psm[i].info = GSLocalMemory::swizzle32;
@ -1108,20 +1084,19 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
auto copy = [&](int len, const GSOffset& off, auto&& fn) auto copy = [&](int len, const GSOffset& off, auto&& fn)
{ {
GSOffset::PAHelper pa = off.paMulti(x, y); GSOffset::PAHelper pa = off.paMulti(y);
for (; len > 0; len--) for (; len > 0; len--)
{ {
fn(pa); fn(pa);
pa.incX(); x++;
if (pa.x() >= ex) if (x >= ex)
{ {
y++; y++;
pa = off.paMulti(sx, y); x = sx;
pa = off.paMulti(y);
} }
} }
x = pa.x();
}; };
GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM); GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM);
@ -1132,7 +1107,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMZ32: case PSM_PSMZ32:
copy(len / 4, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa) copy(len / 4, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
{ {
WritePixel32(pa.value(), *pd); WritePixel32(pa.value(x), *pd);
pd++; pd++;
}); });
break; break;
@ -1141,7 +1116,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMZ24: case PSM_PSMZ24:
copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa) copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
{ {
WritePixel24(pa.value(), *(uint32*)pb); WritePixel24(pa.value(x), *(uint32*)pb);
pb += 3; pb += 3;
}); });
break; break;
@ -1152,7 +1127,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMZ16S: case PSM_PSMZ16S:
copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa) copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa)
{ {
WritePixel16(pa.value(), *pw); WritePixel16(pa.value(x), *pw);
pw++; pw++;
}); });
break; break;
@ -1160,7 +1135,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT8: case PSM_PSMT8:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa)
{ {
WritePixel8(pa.value(), *pb); WritePixel8(pa.value(x), *pb);
pb++; pb++;
}); });
break; break;
@ -1168,9 +1143,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT4: case PSM_PSMT4:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa)
{ {
WritePixel4(pa.value(), *pb & 0xf); WritePixel4(pa.value(x++), *pb & 0xf);
pa.incX(); WritePixel4(pa.value(x), *pb >> 4);
WritePixel4(pa.value(), *pb >> 4);
pb++; pb++;
}); });
break; break;
@ -1178,7 +1152,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT8H: case PSM_PSMT8H:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa)
{ {
WritePixel8H(pa.value(), *pb); WritePixel8H(pa.value(x), *pb);
pb++; pb++;
}); });
break; break;
@ -1186,9 +1160,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT4HL: case PSM_PSMT4HL:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa)
{ {
WritePixel4HL(pa.value(), *pb & 0xf); WritePixel4HL(pa.value(x++), *pb & 0xf);
pa.incX(); WritePixel4HL(pa.value(x), *pb >> 4);
WritePixel4HL(pa.value(), *pb >> 4);
pb++; pb++;
}); });
break; break;
@ -1196,9 +1169,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT4HH: case PSM_PSMT4HH:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa)
{ {
WritePixel4HH(pa.value(), *pb & 0xf); WritePixel4HH(pa.value(x++), *pb & 0xf);
pa.incX(); WritePixel4HH(pa.value(x), *pb >> 4);
WritePixel4HH(pa.value(), *pb >> 4);
pb++; pb++;
}); });
break; break;
@ -1230,20 +1202,19 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
auto copy = [&](int len, const GSOffset& off, auto&& fn) auto copy = [&](int len, const GSOffset& off, auto&& fn)
{ {
GSOffset::PAHelper pa = off.paMulti(x, y); GSOffset::PAHelper pa = off.paMulti(y);
for (; len > 0; len--) for (; len > 0; len--)
{ {
fn(pa); fn(pa);
pa.incX(); x++;
if (pa.x() >= ex) if (x >= ex)
{ {
y++; y++;
pa = off.paMulti(sx, y); x = sx;
pa = off.paMulti(y);
} }
} }
x = pa.x();
}; };
GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM); GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM);
@ -1259,49 +1230,48 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
len /= 4; len /= 4;
GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(x, y); GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(y);
while (len > 0) while (len > 0)
{ {
for (; len > 0 && pa.x() < ex && (pa.x() & 7); len--, pa.incX(), pd++) for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
{ {
*pd = m_vm32[pa.value()]; *pd = m_vm32[pa.value(x)];
} }
// aligned to a column // aligned to a column
for (int ex8 = ex - 8; len >= 8 && pa.x() <= ex8; len -= 8, pd += 8) for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8)
{ {
uint32* ps = m_vm32 + pa.value(); uint32* ps = m_vm32 + pa.value(x);
GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4)); GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4));
GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12)); GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12));
for (int i = 0; i < 8; i++, pa.incX()) for (int i = 0; i < 8; i++)
ASSERT(pd[i] == m_vm32[pa.value()]); ASSERT(pd[i] == m_vm32[pa.value(x + i)]);
} }
for (; len > 0 && pa.x() < ex; len--, pa.incX(), pd++) for (; len > 0 && x < ex; len--, x++, pd++)
{ {
*pd = m_vm32[pa.value()]; *pd = m_vm32[pa.value(x)];
} }
if (pa.x() == ex) if (x == ex)
{ {
y++; y++;
pa = off.assertSizesMatch(swizzle32).paMulti(sx, y); x = sx;
pa = off.assertSizesMatch(swizzle32).paMulti(y);
}
} }
} }
x = pa.x();
break; break;
}
case PSM_PSMCT24: case PSM_PSMCT24:
case PSM_PSMZ24: case PSM_PSMZ24:
copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa) copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
{ {
uint32 c = m_vm32[pa.value()]; uint32 c = m_vm32[pa.value(x)];
pb[0] = (uint8)(c); pb[0] = (uint8)(c);
pb[1] = (uint8)(c >> 8); pb[1] = (uint8)(c >> 8);
pb[2] = (uint8)(c >> 16); pb[2] = (uint8)(c >> 16);
@ -1315,7 +1285,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMZ16S: case PSM_PSMZ16S:
copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa) copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa)
{ {
*pw = m_vm16[pa.value()]; *pw = m_vm16[pa.value(x)];
pw++; pw++;
}); });
break; break;
@ -1323,7 +1293,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT8: case PSM_PSMT8:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa)
{ {
*pb = m_vm8[pa.value()]; *pb = m_vm8[pa.value(x)];
pb++; pb++;
}); });
break; break;
@ -1331,9 +1301,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT4: case PSM_PSMT4:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa)
{ {
uint8 low = ReadPixel4(pa.value()); uint8 low = ReadPixel4(pa.value(x++));
pa.incX(); uint8 high = ReadPixel4(pa.value(x));
uint8 high = ReadPixel4(pa.value());
*pb = low | (high << 4); *pb = low | (high << 4);
pb++; pb++;
}); });
@ -1342,7 +1311,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT8H: case PSM_PSMT8H:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa)
{ {
*pb = (uint8)(m_vm32[pa.value()] >> 24); *pb = (uint8)(m_vm32[pa.value(x)] >> 24);
pb++; pb++;
}); });
break; break;
@ -1350,9 +1319,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT4HL: case PSM_PSMT4HL:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa)
{ {
uint32 c0 = m_vm32[pa.value()] >> 24 & 0x0f; uint32 c0 = m_vm32[pa.value(x++)] >> 24 & 0x0f;
pa.incX(); uint32 c1 = m_vm32[pa.value(x)] >> 20 & 0xf0;
uint32 c1 = m_vm32[pa.value()] >> 20 & 0xf0;
*pb = (uint8)(c0 | c1); *pb = (uint8)(c0 | c1);
pb++; pb++;
}); });
@ -1361,9 +1329,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT4HH: case PSM_PSMT4HH:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa) copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa)
{ {
uint32 c0 = m_vm32[pa.value()] >> 28 & 0x0f; uint32 c0 = m_vm32[pa.value(x++)] >> 28 & 0x0f;
pa.incX(); uint32 c1 = m_vm32[pa.value(x)] >> 24 & 0xf0;
uint32 c1 = m_vm32[pa.value()] >> 24 & 0xf0;
*pb = (uint8)(c0 | c1); *pb = (uint8)(c0 | c1);
pb++; pb++;
}); });

View File

@ -41,38 +41,66 @@ struct GSPixelOffset4
uint32 fbp, zbp, fpsm, zpsm, bw; uint32 fbp, zbp, fpsm, zpsm, bw;
}; };
struct alignas(128) GSPageOffsetRow class GSOffset;
{
// Maximum page width is 128, but store mirror for unaligned simd loads
uint32 value[256];
};
template <int Height, int Width> class GSSwizzleInfo
struct GSPageOffsetTable
{
GSPageOffsetRow value[Height];
};
class GSSwizzleInfo;
class GSOffset
{ {
friend class GSOffset;
/// Table for storing swizzling of blocks within a page /// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle; const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page /// Table for storing swizzling of pixels within a page in the y dimension
const GSPageOffsetRow* m_pixelSwizzle; const int* m_pixelSwizzleCol;
/// Array of tables for storing swizzling of pixels in the x dimension
const GSPixelRowOffsetTable* const* m_pixelSwizzleRow;
GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1) GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1)
GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1) GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1)
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page x offset int m_pixelRowMask; ///< Mask for getting the offset in m_pixelSwizzleRow for a given y value
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page y offset uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset
uint8 m_blockShiftX; ///< Amount to rshift x value by to get block x offset uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset
uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block
uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block
static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i >> 1); }
public:
GSSwizzleInfo() = default;
/// @param blockSize Size of block in pixels
template <int PageWidth, int PageHeight, int BlocksWide, int BlocksHigh, int PixelRowMask>
constexpr GSSwizzleInfo(GSSwizzleTableList<PageHeight, PageWidth, BlocksHigh, BlocksWide, PixelRowMask> list)
: m_blockSwizzle(&list.block)
, m_pixelSwizzleCol(list.col.value)
, m_pixelSwizzleRow(list.row.rows)
, m_pageMask{PageWidth - 1, PageHeight - 1}
, m_blockMask{(PageWidth / BlocksWide) - 1, (PageHeight / BlocksHigh) - 1}
, m_pixelRowMask(PixelRowMask)
, m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight))
, m_blockShiftX(ilog2(PageWidth / BlocksWide)), m_blockShiftY(ilog2(PageHeight / BlocksHigh))
{
static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2");
static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2");
}
/// Get the block number of the given pixel
uint32 bn(int x, int y, uint32 bp, uint32 bw) const;
/// Get the address of the given pixel
uint32 pa(int x, int y, uint32 bp, uint32 bw) const;
};
class GSOffset : GSSwizzleInfo
{
int m_bp; ///< Offset's base pointer (same measurement as GS) int m_bp; ///< Offset's base pointer (same measurement as GS)
int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures) int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures)
int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffset algorithms) int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffset algorithms)
public: public:
GSOffset() = default; GSOffset() = default;
constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm); constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
: GSSwizzleInfo(swz)
, m_bp(bp)
, m_bwPg(bw >> (m_pageShiftX - 6))
, m_psm(psm)
{
}
/// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known /// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known
constexpr static GSOffset fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm); constexpr static GSOffset fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm);
@ -182,49 +210,37 @@ public:
class PAHelper class PAHelper
{ {
/// Pixel swizzle array /// Pixel swizzle array
const GSPageOffsetRow* m_pixelSwizzle; const GSPixelRowOffsetTable* m_pixelSwizzleRow;
int m_pageMaskX; ///< Mask for getting offset within a page int m_base;
int m_base; ///< Address for origin x
int m_x; ///< Current x position
int m_shift; ///< Amount to lshift x to get offset due to page after clearing with pageMaskX
int m_mask; ///< Mask to stay in bounds
public: public:
PAHelper() = default; PAHelper() = default;
PAHelper(const GSOffset& off, int x, int y) PAHelper(const GSOffset& off, int y)
{ {
m_pixelSwizzle = off.m_pixelSwizzle + (y & off.m_pageMask.y); m_pixelSwizzleRow = off.m_pixelSwizzleRow[y & off.m_pixelRowMask];
m_base = off.m_bp << (off.m_pageShiftX + off.m_pageShiftY - 5); m_base = off.m_bp << (off.m_pageShiftX + off.m_pageShiftY - 5);
m_base += ((y & ~off.m_pageMask.y) * off.m_bwPg) << off.m_pageShiftX; m_base += ((y & ~off.m_pageMask.y) * off.m_bwPg) << off.m_pageShiftX;
m_pageMaskX = off.m_pageMask.x; m_base &= (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1;
m_shift = off.m_pageShiftY; m_base += off.m_pixelSwizzleCol[y & off.m_pageMask.y];
m_x = x;
m_mask = (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1;
} }
/// Get current x value
int x() const { return m_x; }
/// Increment x value
void incX() { m_x++; }
/// Decrement x value
void decX() { m_x--; }
/// Get current pixel address /// Get current pixel address
uint32 value() const uint32 value(size_t x) const
{ {
int x = (m_x & ~m_pageMaskX) << m_shift; return m_base + (*m_pixelSwizzleRow)[x];
return (m_base + x + m_pixelSwizzle->value[m_x & m_pageMaskX]) & m_mask;
} }
}; };
/// Get the address of the given pixel /// Get the address of the given pixel
uint32 pa(int x, int y) const uint32 pa(int x, int y) const
{ {
return PAHelper(*this, x, y).value(); return PAHelper(*this, y).value(x);
} }
/// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis) /// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis)
PAHelper paMulti(int x, int y) const PAHelper paMulti(int y) const
{ {
return PAHelper(*this, x, y); return PAHelper(*this, y);
} }
/// Loop over the pixels in the given rectangle /// Loop over the pixels in the given rectangle
@ -236,11 +252,10 @@ public:
for (int y = r.top; y < r.bottom; y++, px = reinterpret_cast<Src*>(reinterpret_cast<uint8*>(px) + pitch)) for (int y = r.top; y < r.bottom; y++, px = reinterpret_cast<Src*>(reinterpret_cast<uint8*>(px) + pitch))
{ {
PAHelper pa = paMulti(r.left, y); PAHelper pa = paMulti(y);
while (pa.x() < r.right) for (int x = r.left; x < r.right; x++)
{ {
fn(vm + pa.value(), px + pa.x()); fn(vm + pa.value(x), px + x);
pa.incX();
} }
} }
} }
@ -314,93 +329,30 @@ public:
/// Use compile-time dimensions from `swz` as a performance optimization /// Use compile-time dimensions from `swz` as a performance optimization
/// Also asserts if your assumption was wrong /// Also asserts if your assumption was wrong
constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const; constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const
};
class GSSwizzleInfo
{
friend class GSOffset;
/// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page
const GSPageOffsetRow* m_pixelSwizzle;
GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1)
GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1)
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset
uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block
uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block
static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i>>1); }
public:
GSSwizzleInfo() = default;
/// @param PageWidth Width of page in pixels
/// @param PageHeight Height of page in pixels
/// @param blockSize Size of block in pixels
template <int PageWidth, int PageHeight>
constexpr GSSwizzleInfo(GSVector2i blockSize, const GSBlockSwizzleTable* blockSwizzle, const GSPageOffsetTable<PageHeight, PageWidth>* pxSwizzle)
: m_blockSwizzle(blockSwizzle)
, m_pixelSwizzle(pxSwizzle->value)
, m_pageMask{PageWidth - 1, PageHeight - 1}
, m_blockMask{blockSize.x - 1, blockSize.y - 1}
, m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight))
, m_blockShiftX(ilog2(blockSize.x)), m_blockShiftY(ilog2(blockSize.y))
{ {
static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2");
static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2");
}
/// Get the block number of the given pixel
uint32 bn(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffset(*this, bp, bw, 0).bn(x, y);
}
/// Get the address of the given pixel
uint32 pa(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffset(*this, bp, bw, 0).pa(x, y);
}
/// Loop over all the pages in the given rect, calling `fn` on each
template <typename Fn>
void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
{
GSOffset(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn));
}
/// Loop over all the blocks in the given rect, calling `fn` on each
template <typename Fn>
void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
{
GSOffset(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn));
}
};
constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
: m_blockSwizzle(swz.m_blockSwizzle)
, m_pixelSwizzle(swz.m_pixelSwizzle)
, m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask)
, m_pageShiftX(swz.m_pageShiftX), m_pageShiftY(swz.m_pageShiftY)
, m_blockShiftX(swz.m_blockShiftX), m_blockShiftY(swz.m_blockShiftY)
, m_bp(bp)
, m_bwPg(bw >> (m_pageShiftX - 6))
, m_psm(psm)
{
}
constexpr GSOffset GSOffset::assertSizesMatch(const GSSwizzleInfo& swz) const
{
GSOffset o = *this; GSOffset o = *this;
#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x; #define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x;
MATCH(m_pageMask) MATCH(m_pageMask)
MATCH(m_blockMask) MATCH(m_blockMask)
MATCH(m_pixelRowMask)
MATCH(m_pageShiftX) MATCH(m_pageShiftX)
MATCH(m_pageShiftY) MATCH(m_pageShiftY)
MATCH(m_blockShiftX) MATCH(m_blockShiftX)
MATCH(m_blockShiftY) MATCH(m_blockShiftY)
#undef MATCH #undef MATCH
return o; return o;
}
};
inline uint32 GSSwizzleInfo::bn(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffset(*this, bp, bw, 0).bn(x, y);
}
inline uint32 GSSwizzleInfo::pa(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffset(*this, bp, bw, 0).pa(x, y);
} }
class GSLocalMemory : public GSAlignedClass<32> class GSLocalMemory : public GSAlignedClass<32>
@ -452,24 +404,15 @@ public:
protected: protected:
bool m_use_fifo_alloc; bool m_use_fifo_alloc;
static GSPageOffsetTable<32, 64> pageOffset32;
static GSPageOffsetTable<32, 64> pageOffset32Z;
static GSPageOffsetTable<64, 64> pageOffset16;
static GSPageOffsetTable<64, 64> pageOffset16S;
static GSPageOffsetTable<64, 64> pageOffset16Z;
static GSPageOffsetTable<64, 64> pageOffset16SZ;
static GSPageOffsetTable<64, 128> pageOffset8;
static GSPageOffsetTable<128, 128> pageOffset4;
public: public:
static constexpr GSSwizzleInfo swizzle32{{8, 8}, &blockTable32, &pageOffset32}; static constexpr GSSwizzleInfo swizzle32 {swizzleTables32};
static constexpr GSSwizzleInfo swizzle32Z{{8, 8}, &blockTable32Z, &pageOffset32Z}; static constexpr GSSwizzleInfo swizzle32Z {swizzleTables32Z};
static constexpr GSSwizzleInfo swizzle16{{16, 8}, &blockTable16, &pageOffset16}; static constexpr GSSwizzleInfo swizzle16 {swizzleTables16};
static constexpr GSSwizzleInfo swizzle16S{{16, 8}, &blockTable16S, &pageOffset16S}; static constexpr GSSwizzleInfo swizzle16S {swizzleTables16S};
static constexpr GSSwizzleInfo swizzle16Z{{16, 8}, &blockTable16Z, &pageOffset16Z}; static constexpr GSSwizzleInfo swizzle16Z {swizzleTables16Z};
static constexpr GSSwizzleInfo swizzle16SZ{{16, 8}, &blockTable16SZ, &pageOffset16SZ}; static constexpr GSSwizzleInfo swizzle16SZ {swizzleTables16SZ};
static constexpr GSSwizzleInfo swizzle8{{16, 16}, &blockTable8, &pageOffset8}; static constexpr GSSwizzleInfo swizzle8 {swizzleTables8};
static constexpr GSSwizzleInfo swizzle4{{32, 16}, &blockTable4, &pageOffset4}; static constexpr GSSwizzleInfo swizzle4 {swizzleTables4};
protected: protected:
__forceinline static uint32 Expand24To32(uint32 c, const GIFRegTEXA& TEXA) __forceinline static uint32 Expand24To32(uint32 c, const GIFRegTEXA& TEXA)

View File

@ -1663,14 +1663,12 @@ void GSState::Move()
{ {
for (int y = 0; y < h; y++, sy += yinc, dy += yinc) for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{ {
GSOffset::PAHelper s = spo.paMulti(sx, sy); GSOffset::PAHelper s = spo.paMulti(sy);
GSOffset::PAHelper d = dpo.paMulti(dx, dy); GSOffset::PAHelper d = dpo.paMulti(dy);
for (int x = 0; x < w; x++) for (int x = 0; x < w; x++)
{ {
pxCopyFn(d.value(), s.value()); pxCopyFn(d.value(dx + x), s.value(sx + x));
s.incX();
d.incX();
} }
} }
} }
@ -1678,14 +1676,12 @@ void GSState::Move()
{ {
for (int y = 0; y < h; y++, sy += yinc, dy += yinc) for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{ {
GSOffset::PAHelper s = spo.paMulti(sx, sy); GSOffset::PAHelper s = spo.paMulti(sy);
GSOffset::PAHelper d = dpo.paMulti(dx, dy); GSOffset::PAHelper d = dpo.paMulti(dy);
for (int x = 0; x < w; x++) for (int x = 0; x < w; x++)
{ {
pxCopyFn(d.value(), s.value()); pxCopyFn(d.value(dx - x), s.value(sx - x));
s.decX();
d.decX();
} }
} }
} }

View File

@ -20,8 +20,8 @@
#include "GS_types.h" #include "GS_types.h"
template <int Width, int Height> template <int Width, int Height>
static constexpr GSBlockSwizzleTable makeSwizzleTable(const uint8 (&arr)[Height][Width]) { static constexpr GSSizedBlockSwizzleTable<Height, Width> makeSwizzleTable(const uint8 (&arr)[Height][Width]) {
GSBlockSwizzleTable table = {}; GSSizedBlockSwizzleTable<Height, Width> table = {};
for (int y = 0; y < 8; y++) { for (int y = 0; y < 8; y++) {
for (int x = 0; x < 8; x++) { for (int x = 0; x < 8; x++) {
table.value[y][x] = arr[y % Height][x % Width]; table.value[y][x] = arr[y % Height][x % Width];
@ -114,14 +114,14 @@ static constexpr uint8 _blockTable4[8][4] =
{ 21, 23, 29, 31 } { 21, 23, 29, 31 }
}; };
constexpr GSBlockSwizzleTable blockTable32 = makeSwizzleTable(_blockTable32); constexpr GSSizedBlockSwizzleTable<4, 8> blockTable32 = makeSwizzleTable(_blockTable32);
constexpr GSBlockSwizzleTable blockTable32Z = makeSwizzleTable(_blockTable32Z); constexpr GSSizedBlockSwizzleTable<4, 8> blockTable32Z = makeSwizzleTable(_blockTable32Z);
constexpr GSBlockSwizzleTable blockTable16 = makeSwizzleTable(_blockTable16); constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16 = makeSwizzleTable(_blockTable16);
constexpr GSBlockSwizzleTable blockTable16S = makeSwizzleTable(_blockTable16S); constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16S = makeSwizzleTable(_blockTable16S);
constexpr GSBlockSwizzleTable blockTable16Z = makeSwizzleTable(_blockTable16Z); constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16Z = makeSwizzleTable(_blockTable16Z);
constexpr GSBlockSwizzleTable blockTable16SZ = makeSwizzleTable(_blockTable16SZ); constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16SZ = makeSwizzleTable(_blockTable16SZ);
constexpr GSBlockSwizzleTable blockTable8 = makeSwizzleTable(_blockTable8); constexpr GSSizedBlockSwizzleTable<4, 8> blockTable8 = makeSwizzleTable(_blockTable8);
constexpr GSBlockSwizzleTable blockTable4 = makeSwizzleTable(_blockTable4); constexpr GSSizedBlockSwizzleTable<8, 4> blockTable4 = makeSwizzleTable(_blockTable4);
constexpr uint8 columnTable32[8][8] = constexpr uint8 columnTable32[8][8] =
{ {
@ -290,3 +290,75 @@ constexpr uint8 clutTableT16I4[16] =
0, 2, 8, 10, 16, 18, 24, 26, 0, 2, 8, 10, 16, 18, 24, 26,
4, 6, 12, 14, 20, 22, 28, 30 4, 6, 12, 14, 20, 22, 28, 30
}; };
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
constexpr int pxOffset(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int x, int y)
{
int blockSize = ColHeight * ColWidth;
int pageSize = blockSize * BlocksHigh * BlocksWide;
int pageWidth = BlocksWide * ColWidth;
int pageX = x / pageWidth;
int subpageX = x % pageWidth;
int blockID = blockTable[y / ColHeight][subpageX / ColWidth];
int sublockOffset = colTable[y % ColHeight][subpageX % ColWidth];
return pageX * pageSize + blockID * blockSize + sublockOffset;
}
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
constexpr GSPixelColOffsetTable<BlocksHigh * ColHeight> makeColOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth])
{
constexpr int size = BlocksHigh * ColHeight;
GSPixelColOffsetTable<size> table = {};
for (int y = 0; y < size; y++)
{
table.value[y] = pxOffset(blockTable, colTable, 0, y);
}
return table;
}
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
constexpr GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> makeRowOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int y)
{
int base = pxOffset(blockTable, colTable, 0, y);
GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> table = {};
for (int x = 0; x < 2048; x++)
{
table.value[x] = pxOffset(blockTable, colTable, x, y) - base;
}
return table;
}
constexpr GSPixelColOffsetTable< 32> pixelColOffset32 = makeColOffsetTable(_blockTable32, columnTable32);
constexpr GSPixelColOffsetTable< 32> pixelColOffset32Z = makeColOffsetTable(_blockTable32Z, columnTable32);
constexpr GSPixelColOffsetTable< 64> pixelColOffset16 = makeColOffsetTable(_blockTable16, columnTable16);
constexpr GSPixelColOffsetTable< 64> pixelColOffset16S = makeColOffsetTable(_blockTable16S, columnTable16);
constexpr GSPixelColOffsetTable< 64> pixelColOffset16Z = makeColOffsetTable(_blockTable16Z, columnTable16);
constexpr GSPixelColOffsetTable< 64> pixelColOffset16SZ = makeColOffsetTable(_blockTable16SZ, columnTable16);
constexpr GSPixelColOffsetTable< 64> pixelColOffset8 = makeColOffsetTable(_blockTable8, columnTable8);
constexpr GSPixelColOffsetTable<128> pixelColOffset4 = makeColOffsetTable(_blockTable4, columnTable4);
// These can't be constexpr due to a GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99901
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset32 = makeRowOffsetTable(_blockTable32, columnTable32, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset32Z = makeRowOffsetTable(_blockTable32Z, columnTable32, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16 = makeRowOffsetTable(_blockTable16, columnTable16, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16S = makeRowOffsetTable(_blockTable16S, columnTable16, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16Z = makeRowOffsetTable(_blockTable16Z, columnTable16, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16SZ = makeRowOffsetTable(_blockTable16SZ, columnTable16, 0);
CONSTINIT const GSSizedPixelRowOffsetTable<128> GSTables::_pixelRowOffset8[2] =
{
makeRowOffsetTable(_blockTable8, columnTable8, 0),
makeRowOffsetTable(_blockTable8, columnTable8, 2),
};
CONSTINIT const GSSizedPixelRowOffsetTable<128> GSTables::_pixelRowOffset4[2] =
{
makeRowOffsetTable(_blockTable4, columnTable4, 0),
makeRowOffsetTable(_blockTable4, columnTable4, 2),
};
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset32;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset32Z;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16S;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16Z;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16SZ;
constexpr GSPixelRowOffsetTableList<128, 7> GSTables::pixelRowOffset8;
constexpr GSPixelRowOffsetTableList<128, 7> GSTables::pixelRowOffset4;

View File

@ -29,14 +29,88 @@ struct alignas(64) GSBlockSwizzleTable
} }
}; };
extern const GSBlockSwizzleTable blockTable32; /// Adds sizes to GSBlockSwizzleTable for to feel better about not making mistakes
extern const GSBlockSwizzleTable blockTable32Z; template <int Height, int Width>
extern const GSBlockSwizzleTable blockTable16; struct GSSizedBlockSwizzleTable : public GSBlockSwizzleTable
extern const GSBlockSwizzleTable blockTable16S; {
extern const GSBlockSwizzleTable blockTable16Z; };
extern const GSBlockSwizzleTable blockTable16SZ;
extern const GSBlockSwizzleTable blockTable8; /// Table for storing offsets of x = 0 pixels from the beginning of the page
extern const GSBlockSwizzleTable blockTable4; /// Add values from a GSPixelRowOffsetTable to get the pixels for x != 0
template <int Height>
struct alignas(128) GSPixelColOffsetTable
{
int value[Height] = {};
int operator[](int y) const
{
return value[y % Height];
}
};
/// Table for storing offsets of x != 0 pixels from the pixel at the same y where x = 0
/// Unlike ColOffsets, this table stretches to the maximum size of a texture so no masking is needed
struct alignas(128) GSPixelRowOffsetTable
{
int value[2048] = {};
int operator[](size_t x) const
{
ASSERT(x < 2048);
return value[x];
}
};
/// Adds size to GSPixelRowOffsetTable to feel better about not making mistakes
template <int PageWidth>
struct GSSizedPixelRowOffsetTable : public GSPixelRowOffsetTable
{
};
/// List of row offset tables
/// Some swizzlings (PSMT8 and PSMT4) have different row offsets depending on which column they're a part of
/// The ones that do use an a a b b b b a a pattern that repeats every 8 rows.
/// You can always look up the correct row in this list with y & 7, but if you use y & Mask where Mask is known at compile time, the compiler should be able to optimize better
template <int PageWidth, int Mask>
struct alignas(sizeof(void*) * 8) GSPixelRowOffsetTableList
{
const GSPixelRowOffsetTable* rows[8];
const GSPixelRowOffsetTable& operator[](int y) const
{
return *rows[y & Mask];
}
};
/// Full pixel offset table
/// Template values are for objects constructing from one of these tables
template <int PageHeight, int PageWidth, int BlockHeight, int BlockWidth, int RowMask>
struct GSSwizzleTableList
{
const GSSizedBlockSwizzleTable<BlockHeight, BlockWidth>& block;
const GSPixelColOffsetTable<PageHeight>& col;
const GSPixelRowOffsetTableList<PageWidth, RowMask>& row;
};
/// List of all tables for a given swizzle for easy setup
template <int PageHeight, int PageWidth, int BlockHeight, int BlockWidth, int RowMask>
constexpr GSSwizzleTableList<PageHeight, PageWidth, BlockHeight, BlockWidth, RowMask>
makeSwizzleTableList(
const GSSizedBlockSwizzleTable<BlockHeight, BlockWidth>& block,
const GSPixelColOffsetTable<PageHeight>& col,
const GSPixelRowOffsetTableList<PageWidth, RowMask>& row)
{
return {block, col, row};
}
extern const GSSizedBlockSwizzleTable<4, 8> blockTable32;
extern const GSSizedBlockSwizzleTable<4, 8> blockTable32Z;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16S;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16Z;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16SZ;
extern const GSSizedBlockSwizzleTable<4, 8> blockTable8;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable4;
extern const uint8 columnTable32[8][8]; extern const uint8 columnTable32[8][8];
extern const uint8 columnTable16[8][16]; extern const uint8 columnTable16[8][16];
extern const uint8 columnTable8[16][16]; extern const uint8 columnTable8[16][16];
@ -45,3 +119,57 @@ extern const uint8 clutTableT32I8[128];
extern const uint8 clutTableT32I4[16]; extern const uint8 clutTableT32I4[16];
extern const uint8 clutTableT16I8[32]; extern const uint8 clutTableT16I8[32];
extern const uint8 clutTableT16I4[16]; extern const uint8 clutTableT16I4[16];
extern const GSPixelColOffsetTable< 32> pixelColOffset32;
extern const GSPixelColOffsetTable< 32> pixelColOffset32Z;
extern const GSPixelColOffsetTable< 64> pixelColOffset16;
extern const GSPixelColOffsetTable< 64> pixelColOffset16S;
extern const GSPixelColOffsetTable< 64> pixelColOffset16Z;
extern const GSPixelColOffsetTable< 64> pixelColOffset16SZ;
extern const GSPixelColOffsetTable< 64> pixelColOffset8;
extern const GSPixelColOffsetTable<128> pixelColOffset4;
template <int PageWidth>
constexpr GSPixelRowOffsetTableList<PageWidth, 0> makeRowOffsetTableList(
const GSSizedPixelRowOffsetTable<PageWidth>* a)
{
return {{a, a, a, a, a, a, a, a}};
}
template <int PageWidth>
constexpr GSPixelRowOffsetTableList<PageWidth, 7> makeRowOffsetTableList(
const GSSizedPixelRowOffsetTable<PageWidth>* a,
const GSSizedPixelRowOffsetTable<PageWidth>* b)
{
return {{a, a, b, b, b, b, a, a}};
}
/// Just here to force external linkage so we don't end up with multiple copies of pixelRowOffset*
struct GSTables
{
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset32;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset32Z;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16S;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16Z;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16SZ;
static const GSSizedPixelRowOffsetTable<128> _pixelRowOffset8[2];
static const GSSizedPixelRowOffsetTable<128> _pixelRowOffset4[2];
static constexpr auto pixelRowOffset32 = makeRowOffsetTableList(&_pixelRowOffset32);
static constexpr auto pixelRowOffset32Z = makeRowOffsetTableList(&_pixelRowOffset32Z);
static constexpr auto pixelRowOffset16 = makeRowOffsetTableList(&_pixelRowOffset16);
static constexpr auto pixelRowOffset16S = makeRowOffsetTableList(&_pixelRowOffset16S);
static constexpr auto pixelRowOffset16Z = makeRowOffsetTableList(&_pixelRowOffset16Z);
static constexpr auto pixelRowOffset16SZ = makeRowOffsetTableList(&_pixelRowOffset16SZ);
static constexpr auto pixelRowOffset8 = makeRowOffsetTableList(&_pixelRowOffset8[0], &_pixelRowOffset8[1]);
static constexpr auto pixelRowOffset4 = makeRowOffsetTableList(&_pixelRowOffset4[0], &_pixelRowOffset4[1]);
};
constexpr auto swizzleTables32 = makeSwizzleTableList(blockTable32, pixelColOffset32, GSTables::pixelRowOffset32 );
constexpr auto swizzleTables32Z = makeSwizzleTableList(blockTable32Z, pixelColOffset32Z, GSTables::pixelRowOffset32Z );
constexpr auto swizzleTables16 = makeSwizzleTableList(blockTable16, pixelColOffset16, GSTables::pixelRowOffset16 );
constexpr auto swizzleTables16Z = makeSwizzleTableList(blockTable16Z, pixelColOffset16Z, GSTables::pixelRowOffset16Z );
constexpr auto swizzleTables16S = makeSwizzleTableList(blockTable16S, pixelColOffset16S, GSTables::pixelRowOffset16S );
constexpr auto swizzleTables16SZ = makeSwizzleTableList(blockTable16SZ, pixelColOffset16SZ, GSTables::pixelRowOffset16SZ);
constexpr auto swizzleTables8 = makeSwizzleTableList(blockTable8, pixelColOffset8, GSTables::pixelRowOffset8 );
constexpr auto swizzleTables4 = makeSwizzleTableList(blockTable4, pixelColOffset4, GSTables::pixelRowOffset4 );

View File

@ -907,26 +907,22 @@ void GSRendererHW::SwSpriteRender()
for (int y = 0; y < h; y++, ++sy, ++dy) for (int y = 0; y < h; y++, ++sy, ++dy)
{ {
GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sx, sy) : GSOffset::PAHelper(); GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sy) : GSOffset::PAHelper();
GSOffset::PAHelper dpa = dpo.paMulti(dx, dy); GSOffset::PAHelper dpa = dpo.paMulti(dy);
ASSERT(w % 2 == 0); ASSERT(w % 2 == 0);
for (int x = 0; x < w; x += 2) for (int x = 0; x < w; x += 2)
{ {
uint32 di = dpa.value(); uint32 di = dpa.value(dx + x);
dpa.incX(); ASSERT(di + 1 == dpa.value(dx + x + 1)); // Destination pixel pair is adjacent in memory
ASSERT(di + 1 == dpa.value()); // Destination pixel pair is adjacent in memory
dpa.incX();
GSVector4i sc; GSVector4i sc;
if (texture_mapping_enabled) if (texture_mapping_enabled)
{ {
uint32 si = spa.value(); uint32 si = spa.value(sx + x);
spa.incX();
// Read 2 source pixel colors // Read 2 source pixel colors
ASSERT((si + 1) == spa.value()); // Source pixel pair is adjacent in memory ASSERT((si + 1) == spa.value(sx + x + 1)); // Source pixel pair is adjacent in memory
spa.incX();
sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
// Apply TFX // Apply TFX
@ -1827,11 +1823,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel32 // Based on WritePixel32
for (int y = r.top; y < r.bottom; y++) for (int y = r.top; y < r.bottom; y++)
{ {
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y); GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y);
for (; pa.x() < r.right; pa.incX()) for (int x = r.left; x < r.right; x++)
{ {
m_mem.m_vm32[pa.value()] = 0; // Here the constant color m_mem.m_vm32[pa.value(x)] = 0; // Here the constant color
} }
} }
} }
@ -1840,11 +1836,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel24 // Based on WritePixel24
for (int y = r.top; y < r.bottom; y++) for (int y = r.top; y < r.bottom; y++)
{ {
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y); GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y);
for (; pa.x() < r.right; pa.incX()) for (int x = r.left; x < r.right; x++)
{ {
m_mem.m_vm32[pa.value()] &= 0xff000000; // Clear the color m_mem.m_vm32[pa.value(x)] &= 0xff000000; // Clear the color
} }
} }
} }
@ -1855,11 +1851,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel16 // Based on WritePixel16
for(int y = r.top; y < r.bottom; y++) for(int y = r.top; y < r.bottom; y++)
{ {
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(r.left, y); GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(y);
for(int x = r.left; x < r.right; x++) for(int x = r.left; x < r.right; x++)
{ {
m_mem.m_vm16[pa.value()] = 0; // Here the constant color m_mem.m_vm16[pa.value(x)] = 0; // Here the constant color
} }
} }
#endif #endif

View File

@ -2942,11 +2942,11 @@ void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c
for (int y = r.y; y < r.w; y++) for (int y = r.y; y < r.w; y++)
{ {
GSOffset::PAHelper pa = off.paMulti(r.x, y); GSOffset::PAHelper pa = off.paMulti(y);
for (; pa.x() < r.z; pa.incX()) for (int x = r.x; x < r.z; x++)
{ {
T& d = vm[pa.value()]; T& d = vm[pa.value(x)];
d = (T)(!masked ? c : (c | (d & m))); d = (T)(!masked ? c : (c | (d & m)));
} }
} }