mirror of https://github.com/PCSX2/pcsx2.git
GS: Switch back to row+column for pixel lookups
Code for the full calculation was way too complicated to run for every pixel in a loop
This commit is contained in:
parent
b2fb6c7804
commit
b901c6af71
|
@ -197,13 +197,14 @@ template <int n>
|
|||
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||
{
|
||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
|
||||
|
||||
int x = TEXCLUT.COU << 4;
|
||||
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
|
||||
|
||||
for (int i = 0; i < n; pa.incX(), i++)
|
||||
for (int i = 0; i < n; x++, i++)
|
||||
{
|
||||
uint32 c = m_mem->m_vm32[pa.value()];
|
||||
uint32 c = m_mem->m_vm32[pa.value(x)];
|
||||
|
||||
clut[i] = (uint16)(c & 0xffff);
|
||||
clut[i + 256] = (uint16)(c >> 16);
|
||||
|
@ -214,13 +215,14 @@ template <int n>
|
|||
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||
{
|
||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
|
||||
|
||||
int x = TEXCLUT.COU << 4;
|
||||
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||
|
||||
for (int i = 0; i < n; pa.incX(), i++)
|
||||
for (int i = 0; i < n; x++, i++)
|
||||
{
|
||||
clut[i] = m_mem->m_vm16[pa.value()];
|
||||
clut[i] = m_mem->m_vm16[pa.value(x)];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -228,13 +230,14 @@ template <int n>
|
|||
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||
{
|
||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
|
||||
|
||||
int x = TEXCLUT.COU << 4;
|
||||
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||
|
||||
for (int i = 0; i < n; pa.incX(), i++)
|
||||
for (int i = 0; i < n; x++, i++)
|
||||
{
|
||||
clut[i] = m_mem->m_vm16[pa.value()];
|
||||
clut[i] = m_mem->m_vm16[pa.value(x)];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,14 +41,14 @@ static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector
|
|||
|
||||
//
|
||||
|
||||
GSPageOffsetTable<32, 64> GSLocalMemory::pageOffset32;
|
||||
GSPageOffsetTable<32, 64> GSLocalMemory::pageOffset32Z;
|
||||
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16;
|
||||
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16S;
|
||||
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16Z;
|
||||
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16SZ;
|
||||
GSPageOffsetTable<64, 128> GSLocalMemory::pageOffset8;
|
||||
GSPageOffsetTable<128, 128> GSLocalMemory::pageOffset4;
|
||||
constexpr GSSwizzleInfo GSLocalMemory::swizzle32;
|
||||
constexpr GSSwizzleInfo GSLocalMemory::swizzle32Z;
|
||||
constexpr GSSwizzleInfo GSLocalMemory::swizzle16;
|
||||
constexpr GSSwizzleInfo GSLocalMemory::swizzle16S;
|
||||
constexpr GSSwizzleInfo GSLocalMemory::swizzle16Z;
|
||||
constexpr GSSwizzleInfo GSLocalMemory::swizzle16SZ;
|
||||
constexpr GSSwizzleInfo GSLocalMemory::swizzle8;
|
||||
constexpr GSSwizzleInfo GSLocalMemory::swizzle4;
|
||||
|
||||
//
|
||||
|
||||
|
@ -56,21 +56,6 @@ GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
|
|||
|
||||
//
|
||||
|
||||
template <int PageHeight, int PageWidth, int ColHeight, int ColWidth, typename Col>
|
||||
static void setupPageOffsetTable(GSPageOffsetTable<PageHeight, PageWidth>& table, const GSBlockSwizzleTable& block, Col (&col)[ColHeight][ColWidth])
|
||||
{
|
||||
int blockSize = ColHeight * ColWidth;
|
||||
for (int y = 0; y < PageHeight; y++)
|
||||
{
|
||||
for (int x = 0; x < 256; x++)
|
||||
{
|
||||
int colOff = col[y % ColHeight][x % ColWidth];
|
||||
int blockOff = block.lookup(x / ColWidth, y / ColHeight);
|
||||
table.value[y].value[x] = blockOff * blockSize + colOff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSLocalMemory::GSLocalMemory()
|
||||
: m_clut(this)
|
||||
{
|
||||
|
@ -101,15 +86,6 @@ GSLocalMemory::GSLocalMemory()
|
|||
|
||||
memset(m_vm8, 0, m_vmsize);
|
||||
|
||||
setupPageOffsetTable(pageOffset32, blockTable32, columnTable32);
|
||||
setupPageOffsetTable(pageOffset32Z, blockTable32Z, columnTable32);
|
||||
setupPageOffsetTable(pageOffset16, blockTable16, columnTable16);
|
||||
setupPageOffsetTable(pageOffset16S, blockTable16S, columnTable16);
|
||||
setupPageOffsetTable(pageOffset16Z, blockTable16Z, columnTable16);
|
||||
setupPageOffsetTable(pageOffset16SZ, blockTable16SZ, columnTable16);
|
||||
setupPageOffsetTable(pageOffset8, blockTable8, columnTable8);
|
||||
setupPageOffsetTable(pageOffset4, blockTable4, columnTable4);
|
||||
|
||||
for (size_t i = 0; i < countof(m_psm); i++)
|
||||
{
|
||||
m_psm[i].info = GSLocalMemory::swizzle32;
|
||||
|
@ -1108,20 +1084,19 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
|
||||
auto copy = [&](int len, const GSOffset& off, auto&& fn)
|
||||
{
|
||||
GSOffset::PAHelper pa = off.paMulti(x, y);
|
||||
GSOffset::PAHelper pa = off.paMulti(y);
|
||||
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
fn(pa);
|
||||
pa.incX();
|
||||
if (pa.x() >= ex)
|
||||
x++;
|
||||
if (x >= ex)
|
||||
{
|
||||
y++;
|
||||
pa = off.paMulti(sx, y);
|
||||
x = sx;
|
||||
pa = off.paMulti(y);
|
||||
}
|
||||
}
|
||||
|
||||
x = pa.x();
|
||||
};
|
||||
|
||||
GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM);
|
||||
|
@ -1132,7 +1107,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
case PSM_PSMZ32:
|
||||
copy(len / 4, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
WritePixel32(pa.value(), *pd);
|
||||
WritePixel32(pa.value(x), *pd);
|
||||
pd++;
|
||||
});
|
||||
break;
|
||||
|
@ -1141,7 +1116,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
case PSM_PSMZ24:
|
||||
copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
WritePixel24(pa.value(), *(uint32*)pb);
|
||||
WritePixel24(pa.value(x), *(uint32*)pb);
|
||||
pb += 3;
|
||||
});
|
||||
break;
|
||||
|
@ -1152,7 +1127,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
case PSM_PSMZ16S:
|
||||
copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
WritePixel16(pa.value(), *pw);
|
||||
WritePixel16(pa.value(x), *pw);
|
||||
pw++;
|
||||
});
|
||||
break;
|
||||
|
@ -1160,7 +1135,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
case PSM_PSMT8:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
WritePixel8(pa.value(), *pb);
|
||||
WritePixel8(pa.value(x), *pb);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -1168,9 +1143,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
case PSM_PSMT4:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
WritePixel4(pa.value(), *pb & 0xf);
|
||||
pa.incX();
|
||||
WritePixel4(pa.value(), *pb >> 4);
|
||||
WritePixel4(pa.value(x++), *pb & 0xf);
|
||||
WritePixel4(pa.value(x), *pb >> 4);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -1178,7 +1152,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
case PSM_PSMT8H:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
WritePixel8H(pa.value(), *pb);
|
||||
WritePixel8H(pa.value(x), *pb);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -1186,9 +1160,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
case PSM_PSMT4HL:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
WritePixel4HL(pa.value(), *pb & 0xf);
|
||||
pa.incX();
|
||||
WritePixel4HL(pa.value(), *pb >> 4);
|
||||
WritePixel4HL(pa.value(x++), *pb & 0xf);
|
||||
WritePixel4HL(pa.value(x), *pb >> 4);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -1196,9 +1169,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
|||
case PSM_PSMT4HH:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
WritePixel4HH(pa.value(), *pb & 0xf);
|
||||
pa.incX();
|
||||
WritePixel4HH(pa.value(), *pb >> 4);
|
||||
WritePixel4HH(pa.value(x++), *pb & 0xf);
|
||||
WritePixel4HH(pa.value(x), *pb >> 4);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -1230,20 +1202,19 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
|
||||
auto copy = [&](int len, const GSOffset& off, auto&& fn)
|
||||
{
|
||||
GSOffset::PAHelper pa = off.paMulti(x, y);
|
||||
GSOffset::PAHelper pa = off.paMulti(y);
|
||||
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
fn(pa);
|
||||
pa.incX();
|
||||
if (pa.x() >= ex)
|
||||
x++;
|
||||
if (x >= ex)
|
||||
{
|
||||
y++;
|
||||
pa = off.paMulti(sx, y);
|
||||
x = sx;
|
||||
pa = off.paMulti(y);
|
||||
}
|
||||
}
|
||||
|
||||
x = pa.x();
|
||||
};
|
||||
|
||||
GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM);
|
||||
|
@ -1259,49 +1230,48 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
|
||||
len /= 4;
|
||||
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(x, y);
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(y);
|
||||
|
||||
while (len > 0)
|
||||
{
|
||||
for (; len > 0 && pa.x() < ex && (pa.x() & 7); len--, pa.incX(), pd++)
|
||||
for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
|
||||
{
|
||||
*pd = m_vm32[pa.value()];
|
||||
*pd = m_vm32[pa.value(x)];
|
||||
}
|
||||
|
||||
// aligned to a column
|
||||
|
||||
for (int ex8 = ex - 8; len >= 8 && pa.x() <= ex8; len -= 8, pd += 8)
|
||||
for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8)
|
||||
{
|
||||
uint32* ps = m_vm32 + pa.value();
|
||||
uint32* ps = m_vm32 + pa.value(x);
|
||||
|
||||
GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4));
|
||||
GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12));
|
||||
|
||||
for (int i = 0; i < 8; i++, pa.incX())
|
||||
ASSERT(pd[i] == m_vm32[pa.value()]);
|
||||
for (int i = 0; i < 8; i++)
|
||||
ASSERT(pd[i] == m_vm32[pa.value(x + i)]);
|
||||
}
|
||||
|
||||
for (; len > 0 && pa.x() < ex; len--, pa.incX(), pd++)
|
||||
for (; len > 0 && x < ex; len--, x++, pd++)
|
||||
{
|
||||
*pd = m_vm32[pa.value()];
|
||||
*pd = m_vm32[pa.value(x)];
|
||||
}
|
||||
|
||||
if (pa.x() == ex)
|
||||
if (x == ex)
|
||||
{
|
||||
y++;
|
||||
pa = off.assertSizesMatch(swizzle32).paMulti(sx, y);
|
||||
x = sx;
|
||||
pa = off.assertSizesMatch(swizzle32).paMulti(y);
|
||||
}
|
||||
}
|
||||
|
||||
x = pa.x();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PSM_PSMCT24:
|
||||
case PSM_PSMZ24:
|
||||
copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
uint32 c = m_vm32[pa.value()];
|
||||
uint32 c = m_vm32[pa.value(x)];
|
||||
pb[0] = (uint8)(c);
|
||||
pb[1] = (uint8)(c >> 8);
|
||||
pb[2] = (uint8)(c >> 16);
|
||||
|
@ -1315,7 +1285,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
case PSM_PSMZ16S:
|
||||
copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
*pw = m_vm16[pa.value()];
|
||||
*pw = m_vm16[pa.value(x)];
|
||||
pw++;
|
||||
});
|
||||
break;
|
||||
|
@ -1323,7 +1293,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
case PSM_PSMT8:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
*pb = m_vm8[pa.value()];
|
||||
*pb = m_vm8[pa.value(x)];
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -1331,9 +1301,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
case PSM_PSMT4:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
uint8 low = ReadPixel4(pa.value());
|
||||
pa.incX();
|
||||
uint8 high = ReadPixel4(pa.value());
|
||||
uint8 low = ReadPixel4(pa.value(x++));
|
||||
uint8 high = ReadPixel4(pa.value(x));
|
||||
*pb = low | (high << 4);
|
||||
pb++;
|
||||
});
|
||||
|
@ -1342,7 +1311,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
case PSM_PSMT8H:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
*pb = (uint8)(m_vm32[pa.value()] >> 24);
|
||||
*pb = (uint8)(m_vm32[pa.value(x)] >> 24);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -1350,9 +1319,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
case PSM_PSMT4HL:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
uint32 c0 = m_vm32[pa.value()] >> 24 & 0x0f;
|
||||
pa.incX();
|
||||
uint32 c1 = m_vm32[pa.value()] >> 20 & 0xf0;
|
||||
uint32 c0 = m_vm32[pa.value(x++)] >> 24 & 0x0f;
|
||||
uint32 c1 = m_vm32[pa.value(x)] >> 20 & 0xf0;
|
||||
*pb = (uint8)(c0 | c1);
|
||||
pb++;
|
||||
});
|
||||
|
@ -1361,9 +1329,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
case PSM_PSMT4HH:
|
||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa)
|
||||
{
|
||||
uint32 c0 = m_vm32[pa.value()] >> 28 & 0x0f;
|
||||
pa.incX();
|
||||
uint32 c1 = m_vm32[pa.value()] >> 24 & 0xf0;
|
||||
uint32 c0 = m_vm32[pa.value(x++)] >> 28 & 0x0f;
|
||||
uint32 c1 = m_vm32[pa.value(x)] >> 24 & 0xf0;
|
||||
*pb = (uint8)(c0 | c1);
|
||||
pb++;
|
||||
});
|
||||
|
|
|
@ -41,38 +41,66 @@ struct GSPixelOffset4
|
|||
uint32 fbp, zbp, fpsm, zpsm, bw;
|
||||
};
|
||||
|
||||
struct alignas(128) GSPageOffsetRow
|
||||
{
|
||||
// Maximum page width is 128, but store mirror for unaligned simd loads
|
||||
uint32 value[256];
|
||||
};
|
||||
class GSOffset;
|
||||
|
||||
template <int Height, int Width>
|
||||
struct GSPageOffsetTable
|
||||
{
|
||||
GSPageOffsetRow value[Height];
|
||||
};
|
||||
|
||||
class GSSwizzleInfo;
|
||||
|
||||
class GSOffset
|
||||
class GSSwizzleInfo
|
||||
{
|
||||
friend class GSOffset;
|
||||
/// Table for storing swizzling of blocks within a page
|
||||
const GSBlockSwizzleTable* m_blockSwizzle;
|
||||
/// Table for storing swizzling of pixels within a page
|
||||
const GSPageOffsetRow* m_pixelSwizzle;
|
||||
/// Table for storing swizzling of pixels within a page in the y dimension
|
||||
const int* m_pixelSwizzleCol;
|
||||
/// Array of tables for storing swizzling of pixels in the x dimension
|
||||
const GSPixelRowOffsetTable* const* m_pixelSwizzleRow;
|
||||
GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1)
|
||||
GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1)
|
||||
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page x offset
|
||||
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page y offset
|
||||
uint8 m_blockShiftX; ///< Amount to rshift x value by to get block x offset
|
||||
uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset
|
||||
int m_pixelRowMask; ///< Mask for getting the offset in m_pixelSwizzleRow for a given y value
|
||||
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset
|
||||
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset
|
||||
uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block
|
||||
uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block
|
||||
static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i >> 1); }
|
||||
|
||||
public:
|
||||
GSSwizzleInfo() = default;
|
||||
|
||||
/// @param blockSize Size of block in pixels
|
||||
template <int PageWidth, int PageHeight, int BlocksWide, int BlocksHigh, int PixelRowMask>
|
||||
constexpr GSSwizzleInfo(GSSwizzleTableList<PageHeight, PageWidth, BlocksHigh, BlocksWide, PixelRowMask> list)
|
||||
: m_blockSwizzle(&list.block)
|
||||
, m_pixelSwizzleCol(list.col.value)
|
||||
, m_pixelSwizzleRow(list.row.rows)
|
||||
, m_pageMask{PageWidth - 1, PageHeight - 1}
|
||||
, m_blockMask{(PageWidth / BlocksWide) - 1, (PageHeight / BlocksHigh) - 1}
|
||||
, m_pixelRowMask(PixelRowMask)
|
||||
, m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight))
|
||||
, m_blockShiftX(ilog2(PageWidth / BlocksWide)), m_blockShiftY(ilog2(PageHeight / BlocksHigh))
|
||||
{
|
||||
static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2");
|
||||
static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2");
|
||||
}
|
||||
|
||||
/// Get the block number of the given pixel
|
||||
uint32 bn(int x, int y, uint32 bp, uint32 bw) const;
|
||||
|
||||
/// Get the address of the given pixel
|
||||
uint32 pa(int x, int y, uint32 bp, uint32 bw) const;
|
||||
};
|
||||
|
||||
class GSOffset : GSSwizzleInfo
|
||||
{
|
||||
int m_bp; ///< Offset's base pointer (same measurement as GS)
|
||||
int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures)
|
||||
int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffset algorithms)
|
||||
public:
|
||||
GSOffset() = default;
|
||||
constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm);
|
||||
constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
|
||||
: GSSwizzleInfo(swz)
|
||||
, m_bp(bp)
|
||||
, m_bwPg(bw >> (m_pageShiftX - 6))
|
||||
, m_psm(psm)
|
||||
{
|
||||
}
|
||||
/// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known
|
||||
constexpr static GSOffset fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm);
|
||||
|
||||
|
@ -182,49 +210,37 @@ public:
|
|||
class PAHelper
|
||||
{
|
||||
/// Pixel swizzle array
|
||||
const GSPageOffsetRow* m_pixelSwizzle;
|
||||
int m_pageMaskX; ///< Mask for getting offset within a page
|
||||
int m_base; ///< Address for origin x
|
||||
int m_x; ///< Current x position
|
||||
int m_shift; ///< Amount to lshift x to get offset due to page after clearing with pageMaskX
|
||||
int m_mask; ///< Mask to stay in bounds
|
||||
const GSPixelRowOffsetTable* m_pixelSwizzleRow;
|
||||
int m_base;
|
||||
|
||||
public:
|
||||
PAHelper() = default;
|
||||
PAHelper(const GSOffset& off, int x, int y)
|
||||
PAHelper(const GSOffset& off, int y)
|
||||
{
|
||||
m_pixelSwizzle = off.m_pixelSwizzle + (y & off.m_pageMask.y);
|
||||
m_pixelSwizzleRow = off.m_pixelSwizzleRow[y & off.m_pixelRowMask];
|
||||
m_base = off.m_bp << (off.m_pageShiftX + off.m_pageShiftY - 5);
|
||||
m_base += ((y & ~off.m_pageMask.y) * off.m_bwPg) << off.m_pageShiftX;
|
||||
m_pageMaskX = off.m_pageMask.x;
|
||||
m_shift = off.m_pageShiftY;
|
||||
m_x = x;
|
||||
m_mask = (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1;
|
||||
m_base &= (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1;
|
||||
m_base += off.m_pixelSwizzleCol[y & off.m_pageMask.y];
|
||||
}
|
||||
|
||||
/// Get current x value
|
||||
int x() const { return m_x; }
|
||||
/// Increment x value
|
||||
void incX() { m_x++; }
|
||||
/// Decrement x value
|
||||
void decX() { m_x--; }
|
||||
/// Get current pixel address
|
||||
uint32 value() const
|
||||
uint32 value(size_t x) const
|
||||
{
|
||||
int x = (m_x & ~m_pageMaskX) << m_shift;
|
||||
return (m_base + x + m_pixelSwizzle->value[m_x & m_pageMaskX]) & m_mask;
|
||||
return m_base + (*m_pixelSwizzleRow)[x];
|
||||
}
|
||||
};
|
||||
|
||||
/// Get the address of the given pixel
|
||||
uint32 pa(int x, int y) const
|
||||
{
|
||||
return PAHelper(*this, x, y).value();
|
||||
return PAHelper(*this, y).value(x);
|
||||
}
|
||||
|
||||
/// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis)
|
||||
PAHelper paMulti(int x, int y) const
|
||||
PAHelper paMulti(int y) const
|
||||
{
|
||||
return PAHelper(*this, x, y);
|
||||
return PAHelper(*this, y);
|
||||
}
|
||||
|
||||
/// Loop over the pixels in the given rectangle
|
||||
|
@ -236,11 +252,10 @@ public:
|
|||
|
||||
for (int y = r.top; y < r.bottom; y++, px = reinterpret_cast<Src*>(reinterpret_cast<uint8*>(px) + pitch))
|
||||
{
|
||||
PAHelper pa = paMulti(r.left, y);
|
||||
while (pa.x() < r.right)
|
||||
PAHelper pa = paMulti(y);
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
fn(vm + pa.value(), px + pa.x());
|
||||
pa.incX();
|
||||
fn(vm + pa.value(x), px + x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -314,93 +329,30 @@ public:
|
|||
|
||||
/// Use compile-time dimensions from `swz` as a performance optimization
|
||||
/// Also asserts if your assumption was wrong
|
||||
constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const;
|
||||
};
|
||||
|
||||
class GSSwizzleInfo
|
||||
{
|
||||
friend class GSOffset;
|
||||
/// Table for storing swizzling of blocks within a page
|
||||
const GSBlockSwizzleTable* m_blockSwizzle;
|
||||
/// Table for storing swizzling of pixels within a page
|
||||
const GSPageOffsetRow* m_pixelSwizzle;
|
||||
GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1)
|
||||
GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1)
|
||||
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset
|
||||
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset
|
||||
uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block
|
||||
uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block
|
||||
static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i>>1); }
|
||||
public:
|
||||
GSSwizzleInfo() = default;
|
||||
|
||||
/// @param PageWidth Width of page in pixels
|
||||
/// @param PageHeight Height of page in pixels
|
||||
/// @param blockSize Size of block in pixels
|
||||
template <int PageWidth, int PageHeight>
|
||||
constexpr GSSwizzleInfo(GSVector2i blockSize, const GSBlockSwizzleTable* blockSwizzle, const GSPageOffsetTable<PageHeight, PageWidth>* pxSwizzle)
|
||||
: m_blockSwizzle(blockSwizzle)
|
||||
, m_pixelSwizzle(pxSwizzle->value)
|
||||
, m_pageMask{PageWidth - 1, PageHeight - 1}
|
||||
, m_blockMask{blockSize.x - 1, blockSize.y - 1}
|
||||
, m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight))
|
||||
, m_blockShiftX(ilog2(blockSize.x)), m_blockShiftY(ilog2(blockSize.y))
|
||||
constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const
|
||||
{
|
||||
static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2");
|
||||
static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2");
|
||||
}
|
||||
|
||||
/// Get the block number of the given pixel
|
||||
uint32 bn(int x, int y, uint32 bp, uint32 bw) const
|
||||
{
|
||||
return GSOffset(*this, bp, bw, 0).bn(x, y);
|
||||
}
|
||||
|
||||
/// Get the address of the given pixel
|
||||
uint32 pa(int x, int y, uint32 bp, uint32 bw) const
|
||||
{
|
||||
return GSOffset(*this, bp, bw, 0).pa(x, y);
|
||||
}
|
||||
|
||||
/// Loop over all the pages in the given rect, calling `fn` on each
|
||||
template <typename Fn>
|
||||
void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
|
||||
{
|
||||
GSOffset(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
/// Loop over all the blocks in the given rect, calling `fn` on each
|
||||
template <typename Fn>
|
||||
void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
|
||||
{
|
||||
GSOffset(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn));
|
||||
GSOffset o = *this;
|
||||
#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x;
|
||||
MATCH(m_pageMask)
|
||||
MATCH(m_blockMask)
|
||||
MATCH(m_pixelRowMask)
|
||||
MATCH(m_pageShiftX)
|
||||
MATCH(m_pageShiftY)
|
||||
MATCH(m_blockShiftX)
|
||||
MATCH(m_blockShiftY)
|
||||
#undef MATCH
|
||||
return o;
|
||||
}
|
||||
};
|
||||
|
||||
constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
|
||||
: m_blockSwizzle(swz.m_blockSwizzle)
|
||||
, m_pixelSwizzle(swz.m_pixelSwizzle)
|
||||
, m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask)
|
||||
, m_pageShiftX(swz.m_pageShiftX), m_pageShiftY(swz.m_pageShiftY)
|
||||
, m_blockShiftX(swz.m_blockShiftX), m_blockShiftY(swz.m_blockShiftY)
|
||||
, m_bp(bp)
|
||||
, m_bwPg(bw >> (m_pageShiftX - 6))
|
||||
, m_psm(psm)
|
||||
inline uint32 GSSwizzleInfo::bn(int x, int y, uint32 bp, uint32 bw) const
|
||||
{
|
||||
return GSOffset(*this, bp, bw, 0).bn(x, y);
|
||||
}
|
||||
|
||||
constexpr GSOffset GSOffset::assertSizesMatch(const GSSwizzleInfo& swz) const
|
||||
inline uint32 GSSwizzleInfo::pa(int x, int y, uint32 bp, uint32 bw) const
|
||||
{
|
||||
GSOffset o = *this;
|
||||
#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x;
|
||||
MATCH(m_pageMask)
|
||||
MATCH(m_blockMask)
|
||||
MATCH(m_pageShiftX)
|
||||
MATCH(m_pageShiftY)
|
||||
MATCH(m_blockShiftX)
|
||||
MATCH(m_blockShiftY)
|
||||
#undef MATCH
|
||||
return o;
|
||||
return GSOffset(*this, bp, bw, 0).pa(x, y);
|
||||
}
|
||||
|
||||
class GSLocalMemory : public GSAlignedClass<32>
|
||||
|
@ -452,24 +404,15 @@ public:
|
|||
protected:
|
||||
bool m_use_fifo_alloc;
|
||||
|
||||
static GSPageOffsetTable<32, 64> pageOffset32;
|
||||
static GSPageOffsetTable<32, 64> pageOffset32Z;
|
||||
static GSPageOffsetTable<64, 64> pageOffset16;
|
||||
static GSPageOffsetTable<64, 64> pageOffset16S;
|
||||
static GSPageOffsetTable<64, 64> pageOffset16Z;
|
||||
static GSPageOffsetTable<64, 64> pageOffset16SZ;
|
||||
static GSPageOffsetTable<64, 128> pageOffset8;
|
||||
static GSPageOffsetTable<128, 128> pageOffset4;
|
||||
|
||||
public:
|
||||
static constexpr GSSwizzleInfo swizzle32{{8, 8}, &blockTable32, &pageOffset32};
|
||||
static constexpr GSSwizzleInfo swizzle32Z{{8, 8}, &blockTable32Z, &pageOffset32Z};
|
||||
static constexpr GSSwizzleInfo swizzle16{{16, 8}, &blockTable16, &pageOffset16};
|
||||
static constexpr GSSwizzleInfo swizzle16S{{16, 8}, &blockTable16S, &pageOffset16S};
|
||||
static constexpr GSSwizzleInfo swizzle16Z{{16, 8}, &blockTable16Z, &pageOffset16Z};
|
||||
static constexpr GSSwizzleInfo swizzle16SZ{{16, 8}, &blockTable16SZ, &pageOffset16SZ};
|
||||
static constexpr GSSwizzleInfo swizzle8{{16, 16}, &blockTable8, &pageOffset8};
|
||||
static constexpr GSSwizzleInfo swizzle4{{32, 16}, &blockTable4, &pageOffset4};
|
||||
static constexpr GSSwizzleInfo swizzle32 {swizzleTables32};
|
||||
static constexpr GSSwizzleInfo swizzle32Z {swizzleTables32Z};
|
||||
static constexpr GSSwizzleInfo swizzle16 {swizzleTables16};
|
||||
static constexpr GSSwizzleInfo swizzle16S {swizzleTables16S};
|
||||
static constexpr GSSwizzleInfo swizzle16Z {swizzleTables16Z};
|
||||
static constexpr GSSwizzleInfo swizzle16SZ {swizzleTables16SZ};
|
||||
static constexpr GSSwizzleInfo swizzle8 {swizzleTables8};
|
||||
static constexpr GSSwizzleInfo swizzle4 {swizzleTables4};
|
||||
|
||||
protected:
|
||||
__forceinline static uint32 Expand24To32(uint32 c, const GIFRegTEXA& TEXA)
|
||||
|
|
|
@ -1663,14 +1663,12 @@ void GSState::Move()
|
|||
{
|
||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
||||
{
|
||||
GSOffset::PAHelper s = spo.paMulti(sx, sy);
|
||||
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
|
||||
GSOffset::PAHelper s = spo.paMulti(sy);
|
||||
GSOffset::PAHelper d = dpo.paMulti(dy);
|
||||
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
pxCopyFn(d.value(), s.value());
|
||||
s.incX();
|
||||
d.incX();
|
||||
pxCopyFn(d.value(dx + x), s.value(sx + x));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1678,14 +1676,12 @@ void GSState::Move()
|
|||
{
|
||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
||||
{
|
||||
GSOffset::PAHelper s = spo.paMulti(sx, sy);
|
||||
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
|
||||
GSOffset::PAHelper s = spo.paMulti(sy);
|
||||
GSOffset::PAHelper d = dpo.paMulti(dy);
|
||||
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
pxCopyFn(d.value(), s.value());
|
||||
s.decX();
|
||||
d.decX();
|
||||
pxCopyFn(d.value(dx - x), s.value(sx - x));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
#include "GS_types.h"
|
||||
|
||||
template <int Width, int Height>
|
||||
static constexpr GSBlockSwizzleTable makeSwizzleTable(const uint8 (&arr)[Height][Width]) {
|
||||
GSBlockSwizzleTable table = {};
|
||||
static constexpr GSSizedBlockSwizzleTable<Height, Width> makeSwizzleTable(const uint8 (&arr)[Height][Width]) {
|
||||
GSSizedBlockSwizzleTable<Height, Width> table = {};
|
||||
for (int y = 0; y < 8; y++) {
|
||||
for (int x = 0; x < 8; x++) {
|
||||
table.value[y][x] = arr[y % Height][x % Width];
|
||||
|
@ -114,14 +114,14 @@ static constexpr uint8 _blockTable4[8][4] =
|
|||
{ 21, 23, 29, 31 }
|
||||
};
|
||||
|
||||
constexpr GSBlockSwizzleTable blockTable32 = makeSwizzleTable(_blockTable32);
|
||||
constexpr GSBlockSwizzleTable blockTable32Z = makeSwizzleTable(_blockTable32Z);
|
||||
constexpr GSBlockSwizzleTable blockTable16 = makeSwizzleTable(_blockTable16);
|
||||
constexpr GSBlockSwizzleTable blockTable16S = makeSwizzleTable(_blockTable16S);
|
||||
constexpr GSBlockSwizzleTable blockTable16Z = makeSwizzleTable(_blockTable16Z);
|
||||
constexpr GSBlockSwizzleTable blockTable16SZ = makeSwizzleTable(_blockTable16SZ);
|
||||
constexpr GSBlockSwizzleTable blockTable8 = makeSwizzleTable(_blockTable8);
|
||||
constexpr GSBlockSwizzleTable blockTable4 = makeSwizzleTable(_blockTable4);
|
||||
constexpr GSSizedBlockSwizzleTable<4, 8> blockTable32 = makeSwizzleTable(_blockTable32);
|
||||
constexpr GSSizedBlockSwizzleTable<4, 8> blockTable32Z = makeSwizzleTable(_blockTable32Z);
|
||||
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16 = makeSwizzleTable(_blockTable16);
|
||||
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16S = makeSwizzleTable(_blockTable16S);
|
||||
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16Z = makeSwizzleTable(_blockTable16Z);
|
||||
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16SZ = makeSwizzleTable(_blockTable16SZ);
|
||||
constexpr GSSizedBlockSwizzleTable<4, 8> blockTable8 = makeSwizzleTable(_blockTable8);
|
||||
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable4 = makeSwizzleTable(_blockTable4);
|
||||
|
||||
constexpr uint8 columnTable32[8][8] =
|
||||
{
|
||||
|
@ -290,3 +290,75 @@ constexpr uint8 clutTableT16I4[16] =
|
|||
0, 2, 8, 10, 16, 18, 24, 26,
|
||||
4, 6, 12, 14, 20, 22, 28, 30
|
||||
};
|
||||
|
||||
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
||||
constexpr int pxOffset(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int x, int y)
|
||||
{
|
||||
int blockSize = ColHeight * ColWidth;
|
||||
int pageSize = blockSize * BlocksHigh * BlocksWide;
|
||||
int pageWidth = BlocksWide * ColWidth;
|
||||
int pageX = x / pageWidth;
|
||||
int subpageX = x % pageWidth;
|
||||
int blockID = blockTable[y / ColHeight][subpageX / ColWidth];
|
||||
int sublockOffset = colTable[y % ColHeight][subpageX % ColWidth];
|
||||
return pageX * pageSize + blockID * blockSize + sublockOffset;
|
||||
}
|
||||
|
||||
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
||||
constexpr GSPixelColOffsetTable<BlocksHigh * ColHeight> makeColOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth])
|
||||
{
|
||||
constexpr int size = BlocksHigh * ColHeight;
|
||||
GSPixelColOffsetTable<size> table = {};
|
||||
for (int y = 0; y < size; y++)
|
||||
{
|
||||
table.value[y] = pxOffset(blockTable, colTable, 0, y);
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
||||
constexpr GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> makeRowOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int y)
|
||||
{
|
||||
int base = pxOffset(blockTable, colTable, 0, y);
|
||||
GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> table = {};
|
||||
for (int x = 0; x < 2048; x++)
|
||||
{
|
||||
table.value[x] = pxOffset(blockTable, colTable, x, y) - base;
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
constexpr GSPixelColOffsetTable< 32> pixelColOffset32 = makeColOffsetTable(_blockTable32, columnTable32);
|
||||
constexpr GSPixelColOffsetTable< 32> pixelColOffset32Z = makeColOffsetTable(_blockTable32Z, columnTable32);
|
||||
constexpr GSPixelColOffsetTable< 64> pixelColOffset16 = makeColOffsetTable(_blockTable16, columnTable16);
|
||||
constexpr GSPixelColOffsetTable< 64> pixelColOffset16S = makeColOffsetTable(_blockTable16S, columnTable16);
|
||||
constexpr GSPixelColOffsetTable< 64> pixelColOffset16Z = makeColOffsetTable(_blockTable16Z, columnTable16);
|
||||
constexpr GSPixelColOffsetTable< 64> pixelColOffset16SZ = makeColOffsetTable(_blockTable16SZ, columnTable16);
|
||||
constexpr GSPixelColOffsetTable< 64> pixelColOffset8 = makeColOffsetTable(_blockTable8, columnTable8);
|
||||
constexpr GSPixelColOffsetTable<128> pixelColOffset4 = makeColOffsetTable(_blockTable4, columnTable4);
|
||||
// These can't be constexpr due to a GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99901
|
||||
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset32 = makeRowOffsetTable(_blockTable32, columnTable32, 0);
|
||||
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset32Z = makeRowOffsetTable(_blockTable32Z, columnTable32, 0);
|
||||
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16 = makeRowOffsetTable(_blockTable16, columnTable16, 0);
|
||||
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16S = makeRowOffsetTable(_blockTable16S, columnTable16, 0);
|
||||
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16Z = makeRowOffsetTable(_blockTable16Z, columnTable16, 0);
|
||||
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16SZ = makeRowOffsetTable(_blockTable16SZ, columnTable16, 0);
|
||||
CONSTINIT const GSSizedPixelRowOffsetTable<128> GSTables::_pixelRowOffset8[2] =
|
||||
{
|
||||
makeRowOffsetTable(_blockTable8, columnTable8, 0),
|
||||
makeRowOffsetTable(_blockTable8, columnTable8, 2),
|
||||
};
|
||||
CONSTINIT const GSSizedPixelRowOffsetTable<128> GSTables::_pixelRowOffset4[2] =
|
||||
{
|
||||
makeRowOffsetTable(_blockTable4, columnTable4, 0),
|
||||
makeRowOffsetTable(_blockTable4, columnTable4, 2),
|
||||
};
|
||||
|
||||
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset32;
|
||||
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset32Z;
|
||||
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16;
|
||||
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16S;
|
||||
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16Z;
|
||||
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16SZ;
|
||||
constexpr GSPixelRowOffsetTableList<128, 7> GSTables::pixelRowOffset8;
|
||||
constexpr GSPixelRowOffsetTableList<128, 7> GSTables::pixelRowOffset4;
|
||||
|
|
|
@ -29,14 +29,88 @@ struct alignas(64) GSBlockSwizzleTable
|
|||
}
|
||||
};
|
||||
|
||||
extern const GSBlockSwizzleTable blockTable32;
|
||||
extern const GSBlockSwizzleTable blockTable32Z;
|
||||
extern const GSBlockSwizzleTable blockTable16;
|
||||
extern const GSBlockSwizzleTable blockTable16S;
|
||||
extern const GSBlockSwizzleTable blockTable16Z;
|
||||
extern const GSBlockSwizzleTable blockTable16SZ;
|
||||
extern const GSBlockSwizzleTable blockTable8;
|
||||
extern const GSBlockSwizzleTable blockTable4;
|
||||
/// Adds sizes to GSBlockSwizzleTable for to feel better about not making mistakes
|
||||
template <int Height, int Width>
|
||||
struct GSSizedBlockSwizzleTable : public GSBlockSwizzleTable
|
||||
{
|
||||
};
|
||||
|
||||
/// Table for storing offsets of x = 0 pixels from the beginning of the page
|
||||
/// Add values from a GSPixelRowOffsetTable to get the pixels for x != 0
|
||||
template <int Height>
|
||||
struct alignas(128) GSPixelColOffsetTable
|
||||
{
|
||||
int value[Height] = {};
|
||||
|
||||
int operator[](int y) const
|
||||
{
|
||||
return value[y % Height];
|
||||
}
|
||||
};
|
||||
|
||||
/// Table for storing offsets of x != 0 pixels from the pixel at the same y where x = 0
|
||||
/// Unlike ColOffsets, this table stretches to the maximum size of a texture so no masking is needed
|
||||
struct alignas(128) GSPixelRowOffsetTable
|
||||
{
|
||||
int value[2048] = {};
|
||||
|
||||
int operator[](size_t x) const
|
||||
{
|
||||
ASSERT(x < 2048);
|
||||
return value[x];
|
||||
}
|
||||
};
|
||||
|
||||
/// Adds size to GSPixelRowOffsetTable to feel better about not making mistakes
|
||||
template <int PageWidth>
|
||||
struct GSSizedPixelRowOffsetTable : public GSPixelRowOffsetTable
|
||||
{
|
||||
};
|
||||
|
||||
/// List of row offset tables
|
||||
/// Some swizzlings (PSMT8 and PSMT4) have different row offsets depending on which column they're a part of
|
||||
/// The ones that do use an a a b b b b a a pattern that repeats every 8 rows.
|
||||
/// You can always look up the correct row in this list with y & 7, but if you use y & Mask where Mask is known at compile time, the compiler should be able to optimize better
|
||||
template <int PageWidth, int Mask>
|
||||
struct alignas(sizeof(void*) * 8) GSPixelRowOffsetTableList
|
||||
{
|
||||
const GSPixelRowOffsetTable* rows[8];
|
||||
|
||||
const GSPixelRowOffsetTable& operator[](int y) const
|
||||
{
|
||||
return *rows[y & Mask];
|
||||
}
|
||||
};
|
||||
|
||||
/// Full pixel offset table
|
||||
/// Template values are for objects constructing from one of these tables
|
||||
template <int PageHeight, int PageWidth, int BlockHeight, int BlockWidth, int RowMask>
|
||||
struct GSSwizzleTableList
|
||||
{
|
||||
const GSSizedBlockSwizzleTable<BlockHeight, BlockWidth>& block;
|
||||
const GSPixelColOffsetTable<PageHeight>& col;
|
||||
const GSPixelRowOffsetTableList<PageWidth, RowMask>& row;
|
||||
};
|
||||
|
||||
/// List of all tables for a given swizzle for easy setup
|
||||
template <int PageHeight, int PageWidth, int BlockHeight, int BlockWidth, int RowMask>
|
||||
constexpr GSSwizzleTableList<PageHeight, PageWidth, BlockHeight, BlockWidth, RowMask>
|
||||
makeSwizzleTableList(
|
||||
const GSSizedBlockSwizzleTable<BlockHeight, BlockWidth>& block,
|
||||
const GSPixelColOffsetTable<PageHeight>& col,
|
||||
const GSPixelRowOffsetTableList<PageWidth, RowMask>& row)
|
||||
{
|
||||
return {block, col, row};
|
||||
}
|
||||
|
||||
extern const GSSizedBlockSwizzleTable<4, 8> blockTable32;
|
||||
extern const GSSizedBlockSwizzleTable<4, 8> blockTable32Z;
|
||||
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16;
|
||||
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16S;
|
||||
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16Z;
|
||||
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16SZ;
|
||||
extern const GSSizedBlockSwizzleTable<4, 8> blockTable8;
|
||||
extern const GSSizedBlockSwizzleTable<8, 4> blockTable4;
|
||||
extern const uint8 columnTable32[8][8];
|
||||
extern const uint8 columnTable16[8][16];
|
||||
extern const uint8 columnTable8[16][16];
|
||||
|
@ -45,3 +119,57 @@ extern const uint8 clutTableT32I8[128];
|
|||
extern const uint8 clutTableT32I4[16];
|
||||
extern const uint8 clutTableT16I8[32];
|
||||
extern const uint8 clutTableT16I4[16];
|
||||
extern const GSPixelColOffsetTable< 32> pixelColOffset32;
|
||||
extern const GSPixelColOffsetTable< 32> pixelColOffset32Z;
|
||||
extern const GSPixelColOffsetTable< 64> pixelColOffset16;
|
||||
extern const GSPixelColOffsetTable< 64> pixelColOffset16S;
|
||||
extern const GSPixelColOffsetTable< 64> pixelColOffset16Z;
|
||||
extern const GSPixelColOffsetTable< 64> pixelColOffset16SZ;
|
||||
extern const GSPixelColOffsetTable< 64> pixelColOffset8;
|
||||
extern const GSPixelColOffsetTable<128> pixelColOffset4;
|
||||
|
||||
template <int PageWidth>
|
||||
constexpr GSPixelRowOffsetTableList<PageWidth, 0> makeRowOffsetTableList(
|
||||
const GSSizedPixelRowOffsetTable<PageWidth>* a)
|
||||
{
|
||||
return {{a, a, a, a, a, a, a, a}};
|
||||
}
|
||||
|
||||
template <int PageWidth>
|
||||
constexpr GSPixelRowOffsetTableList<PageWidth, 7> makeRowOffsetTableList(
|
||||
const GSSizedPixelRowOffsetTable<PageWidth>* a,
|
||||
const GSSizedPixelRowOffsetTable<PageWidth>* b)
|
||||
{
|
||||
return {{a, a, b, b, b, b, a, a}};
|
||||
}
|
||||
|
||||
/// Just here to force external linkage so we don't end up with multiple copies of pixelRowOffset*
|
||||
struct GSTables
|
||||
{
|
||||
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset32;
|
||||
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset32Z;
|
||||
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16;
|
||||
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16S;
|
||||
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16Z;
|
||||
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16SZ;
|
||||
static const GSSizedPixelRowOffsetTable<128> _pixelRowOffset8[2];
|
||||
static const GSSizedPixelRowOffsetTable<128> _pixelRowOffset4[2];
|
||||
|
||||
static constexpr auto pixelRowOffset32 = makeRowOffsetTableList(&_pixelRowOffset32);
|
||||
static constexpr auto pixelRowOffset32Z = makeRowOffsetTableList(&_pixelRowOffset32Z);
|
||||
static constexpr auto pixelRowOffset16 = makeRowOffsetTableList(&_pixelRowOffset16);
|
||||
static constexpr auto pixelRowOffset16S = makeRowOffsetTableList(&_pixelRowOffset16S);
|
||||
static constexpr auto pixelRowOffset16Z = makeRowOffsetTableList(&_pixelRowOffset16Z);
|
||||
static constexpr auto pixelRowOffset16SZ = makeRowOffsetTableList(&_pixelRowOffset16SZ);
|
||||
static constexpr auto pixelRowOffset8 = makeRowOffsetTableList(&_pixelRowOffset8[0], &_pixelRowOffset8[1]);
|
||||
static constexpr auto pixelRowOffset4 = makeRowOffsetTableList(&_pixelRowOffset4[0], &_pixelRowOffset4[1]);
|
||||
};
|
||||
|
||||
constexpr auto swizzleTables32 = makeSwizzleTableList(blockTable32, pixelColOffset32, GSTables::pixelRowOffset32 );
|
||||
constexpr auto swizzleTables32Z = makeSwizzleTableList(blockTable32Z, pixelColOffset32Z, GSTables::pixelRowOffset32Z );
|
||||
constexpr auto swizzleTables16 = makeSwizzleTableList(blockTable16, pixelColOffset16, GSTables::pixelRowOffset16 );
|
||||
constexpr auto swizzleTables16Z = makeSwizzleTableList(blockTable16Z, pixelColOffset16Z, GSTables::pixelRowOffset16Z );
|
||||
constexpr auto swizzleTables16S = makeSwizzleTableList(blockTable16S, pixelColOffset16S, GSTables::pixelRowOffset16S );
|
||||
constexpr auto swizzleTables16SZ = makeSwizzleTableList(blockTable16SZ, pixelColOffset16SZ, GSTables::pixelRowOffset16SZ);
|
||||
constexpr auto swizzleTables8 = makeSwizzleTableList(blockTable8, pixelColOffset8, GSTables::pixelRowOffset8 );
|
||||
constexpr auto swizzleTables4 = makeSwizzleTableList(blockTable4, pixelColOffset4, GSTables::pixelRowOffset4 );
|
||||
|
|
|
@ -907,26 +907,22 @@ void GSRendererHW::SwSpriteRender()
|
|||
|
||||
for (int y = 0; y < h; y++, ++sy, ++dy)
|
||||
{
|
||||
GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sx, sy) : GSOffset::PAHelper();
|
||||
GSOffset::PAHelper dpa = dpo.paMulti(dx, dy);
|
||||
GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sy) : GSOffset::PAHelper();
|
||||
GSOffset::PAHelper dpa = dpo.paMulti(dy);
|
||||
|
||||
ASSERT(w % 2 == 0);
|
||||
|
||||
for (int x = 0; x < w; x += 2)
|
||||
{
|
||||
uint32 di = dpa.value();
|
||||
dpa.incX();
|
||||
ASSERT(di + 1 == dpa.value()); // Destination pixel pair is adjacent in memory
|
||||
dpa.incX();
|
||||
uint32 di = dpa.value(dx + x);
|
||||
ASSERT(di + 1 == dpa.value(dx + x + 1)); // Destination pixel pair is adjacent in memory
|
||||
|
||||
GSVector4i sc;
|
||||
if (texture_mapping_enabled)
|
||||
{
|
||||
uint32 si = spa.value();
|
||||
spa.incX();
|
||||
uint32 si = spa.value(sx + x);
|
||||
// Read 2 source pixel colors
|
||||
ASSERT((si + 1) == spa.value()); // Source pixel pair is adjacent in memory
|
||||
spa.incX();
|
||||
ASSERT((si + 1) == spa.value(sx + x + 1)); // Source pixel pair is adjacent in memory
|
||||
sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
||||
|
||||
// Apply TFX
|
||||
|
@ -1827,11 +1823,11 @@ void GSRendererHW::OI_GsMemClear()
|
|||
// Based on WritePixel32
|
||||
for (int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y);
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y);
|
||||
|
||||
for (; pa.x() < r.right; pa.incX())
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
m_mem.m_vm32[pa.value()] = 0; // Here the constant color
|
||||
m_mem.m_vm32[pa.value(x)] = 0; // Here the constant color
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1840,11 +1836,11 @@ void GSRendererHW::OI_GsMemClear()
|
|||
// Based on WritePixel24
|
||||
for (int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y);
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y);
|
||||
|
||||
for (; pa.x() < r.right; pa.incX())
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
m_mem.m_vm32[pa.value()] &= 0xff000000; // Clear the color
|
||||
m_mem.m_vm32[pa.value(x)] &= 0xff000000; // Clear the color
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1855,11 +1851,11 @@ void GSRendererHW::OI_GsMemClear()
|
|||
// Based on WritePixel16
|
||||
for(int y = r.top; y < r.bottom; y++)
|
||||
{
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(r.left, y);
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(y);
|
||||
|
||||
for(int x = r.left; x < r.right; x++)
|
||||
{
|
||||
m_mem.m_vm16[pa.value()] = 0; // Here the constant color
|
||||
m_mem.m_vm16[pa.value(x)] = 0; // Here the constant color
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -2942,11 +2942,11 @@ void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c
|
|||
|
||||
for (int y = r.y; y < r.w; y++)
|
||||
{
|
||||
GSOffset::PAHelper pa = off.paMulti(r.x, y);
|
||||
GSOffset::PAHelper pa = off.paMulti(y);
|
||||
|
||||
for (; pa.x() < r.z; pa.incX())
|
||||
for (int x = r.x; x < r.z; x++)
|
||||
{
|
||||
T& d = vm[pa.value()];
|
||||
T& d = vm[pa.value(x)];
|
||||
d = (T)(!masked ? c : (c | (d & m)));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue