GS: Switch back to row+column for pixel lookups

Code for the full calculation was way too complicated to run for every pixel in a loop
This commit is contained in:
TellowKrinkle 2021-03-02 20:51:27 -06:00 committed by refractionpcsx2
parent b2fb6c7804
commit b901c6af71
8 changed files with 393 additions and 288 deletions

View File

@ -197,13 +197,14 @@ template <int n>
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
int x = TEXCLUT.COU << 4;
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
for (int i = 0; i < n; pa.incX(), i++)
for (int i = 0; i < n; x++, i++)
{
uint32 c = m_mem->m_vm32[pa.value()];
uint32 c = m_mem->m_vm32[pa.value(x)];
clut[i] = (uint16)(c & 0xffff);
clut[i + 256] = (uint16)(c >> 16);
@ -214,13 +215,14 @@ template <int n>
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
int x = TEXCLUT.COU << 4;
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for (int i = 0; i < n; pa.incX(), i++)
for (int i = 0; i < n; x++, i++)
{
clut[i] = m_mem->m_vm16[pa.value()];
clut[i] = m_mem->m_vm16[pa.value(x)];
}
}
@ -228,13 +230,14 @@ template <int n>
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
int x = TEXCLUT.COU << 4;
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for (int i = 0; i < n; pa.incX(), i++)
for (int i = 0; i < n; x++, i++)
{
clut[i] = m_mem->m_vm16[pa.value()];
clut[i] = m_mem->m_vm16[pa.value(x)];
}
}

View File

@ -41,14 +41,14 @@ static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector
//
GSPageOffsetTable<32, 64> GSLocalMemory::pageOffset32;
GSPageOffsetTable<32, 64> GSLocalMemory::pageOffset32Z;
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16;
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16S;
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16Z;
GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16SZ;
GSPageOffsetTable<64, 128> GSLocalMemory::pageOffset8;
GSPageOffsetTable<128, 128> GSLocalMemory::pageOffset4;
constexpr GSSwizzleInfo GSLocalMemory::swizzle32;
constexpr GSSwizzleInfo GSLocalMemory::swizzle32Z;
constexpr GSSwizzleInfo GSLocalMemory::swizzle16;
constexpr GSSwizzleInfo GSLocalMemory::swizzle16S;
constexpr GSSwizzleInfo GSLocalMemory::swizzle16Z;
constexpr GSSwizzleInfo GSLocalMemory::swizzle16SZ;
constexpr GSSwizzleInfo GSLocalMemory::swizzle8;
constexpr GSSwizzleInfo GSLocalMemory::swizzle4;
//
@ -56,21 +56,6 @@ GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
//
template <int PageHeight, int PageWidth, int ColHeight, int ColWidth, typename Col>
static void setupPageOffsetTable(GSPageOffsetTable<PageHeight, PageWidth>& table, const GSBlockSwizzleTable& block, Col (&col)[ColHeight][ColWidth])
{
int blockSize = ColHeight * ColWidth;
for (int y = 0; y < PageHeight; y++)
{
for (int x = 0; x < 256; x++)
{
int colOff = col[y % ColHeight][x % ColWidth];
int blockOff = block.lookup(x / ColWidth, y / ColHeight);
table.value[y].value[x] = blockOff * blockSize + colOff;
}
}
}
GSLocalMemory::GSLocalMemory()
: m_clut(this)
{
@ -101,15 +86,6 @@ GSLocalMemory::GSLocalMemory()
memset(m_vm8, 0, m_vmsize);
setupPageOffsetTable(pageOffset32, blockTable32, columnTable32);
setupPageOffsetTable(pageOffset32Z, blockTable32Z, columnTable32);
setupPageOffsetTable(pageOffset16, blockTable16, columnTable16);
setupPageOffsetTable(pageOffset16S, blockTable16S, columnTable16);
setupPageOffsetTable(pageOffset16Z, blockTable16Z, columnTable16);
setupPageOffsetTable(pageOffset16SZ, blockTable16SZ, columnTable16);
setupPageOffsetTable(pageOffset8, blockTable8, columnTable8);
setupPageOffsetTable(pageOffset4, blockTable4, columnTable4);
for (size_t i = 0; i < countof(m_psm); i++)
{
m_psm[i].info = GSLocalMemory::swizzle32;
@ -1108,20 +1084,19 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
auto copy = [&](int len, const GSOffset& off, auto&& fn)
{
GSOffset::PAHelper pa = off.paMulti(x, y);
GSOffset::PAHelper pa = off.paMulti(y);
for (; len > 0; len--)
{
fn(pa);
pa.incX();
if (pa.x() >= ex)
x++;
if (x >= ex)
{
y++;
pa = off.paMulti(sx, y);
x = sx;
pa = off.paMulti(y);
}
}
x = pa.x();
};
GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM);
@ -1132,7 +1107,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMZ32:
copy(len / 4, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
{
WritePixel32(pa.value(), *pd);
WritePixel32(pa.value(x), *pd);
pd++;
});
break;
@ -1141,7 +1116,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMZ24:
copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
{
WritePixel24(pa.value(), *(uint32*)pb);
WritePixel24(pa.value(x), *(uint32*)pb);
pb += 3;
});
break;
@ -1152,7 +1127,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMZ16S:
copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa)
{
WritePixel16(pa.value(), *pw);
WritePixel16(pa.value(x), *pw);
pw++;
});
break;
@ -1160,7 +1135,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT8:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa)
{
WritePixel8(pa.value(), *pb);
WritePixel8(pa.value(x), *pb);
pb++;
});
break;
@ -1168,9 +1143,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT4:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa)
{
WritePixel4(pa.value(), *pb & 0xf);
pa.incX();
WritePixel4(pa.value(), *pb >> 4);
WritePixel4(pa.value(x++), *pb & 0xf);
WritePixel4(pa.value(x), *pb >> 4);
pb++;
});
break;
@ -1178,7 +1152,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT8H:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa)
{
WritePixel8H(pa.value(), *pb);
WritePixel8H(pa.value(x), *pb);
pb++;
});
break;
@ -1186,9 +1160,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT4HL:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa)
{
WritePixel4HL(pa.value(), *pb & 0xf);
pa.incX();
WritePixel4HL(pa.value(), *pb >> 4);
WritePixel4HL(pa.value(x++), *pb & 0xf);
WritePixel4HL(pa.value(x), *pb >> 4);
pb++;
});
break;
@ -1196,9 +1169,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
case PSM_PSMT4HH:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa)
{
WritePixel4HH(pa.value(), *pb & 0xf);
pa.incX();
WritePixel4HH(pa.value(), *pb >> 4);
WritePixel4HH(pa.value(x++), *pb & 0xf);
WritePixel4HH(pa.value(x), *pb >> 4);
pb++;
});
break;
@ -1230,20 +1202,19 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
auto copy = [&](int len, const GSOffset& off, auto&& fn)
{
GSOffset::PAHelper pa = off.paMulti(x, y);
GSOffset::PAHelper pa = off.paMulti(y);
for (; len > 0; len--)
{
fn(pa);
pa.incX();
if (pa.x() >= ex)
x++;
if (x >= ex)
{
y++;
pa = off.paMulti(sx, y);
x = sx;
pa = off.paMulti(y);
}
}
x = pa.x();
};
GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM);
@ -1259,49 +1230,48 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
len /= 4;
GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(x, y);
GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(y);
while (len > 0)
{
for (; len > 0 && pa.x() < ex && (pa.x() & 7); len--, pa.incX(), pd++)
for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
{
*pd = m_vm32[pa.value()];
*pd = m_vm32[pa.value(x)];
}
// aligned to a column
for (int ex8 = ex - 8; len >= 8 && pa.x() <= ex8; len -= 8, pd += 8)
for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8)
{
uint32* ps = m_vm32 + pa.value();
uint32* ps = m_vm32 + pa.value(x);
GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4));
GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12));
for (int i = 0; i < 8; i++, pa.incX())
ASSERT(pd[i] == m_vm32[pa.value()]);
for (int i = 0; i < 8; i++)
ASSERT(pd[i] == m_vm32[pa.value(x + i)]);
}
for (; len > 0 && pa.x() < ex; len--, pa.incX(), pd++)
for (; len > 0 && x < ex; len--, x++, pd++)
{
*pd = m_vm32[pa.value()];
*pd = m_vm32[pa.value(x)];
}
if (pa.x() == ex)
if (x == ex)
{
y++;
pa = off.assertSizesMatch(swizzle32).paMulti(sx, y);
x = sx;
pa = off.assertSizesMatch(swizzle32).paMulti(y);
}
}
x = pa.x();
break;
}
break;
case PSM_PSMCT24:
case PSM_PSMZ24:
copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
{
uint32 c = m_vm32[pa.value()];
uint32 c = m_vm32[pa.value(x)];
pb[0] = (uint8)(c);
pb[1] = (uint8)(c >> 8);
pb[2] = (uint8)(c >> 16);
@ -1315,7 +1285,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMZ16S:
copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa)
{
*pw = m_vm16[pa.value()];
*pw = m_vm16[pa.value(x)];
pw++;
});
break;
@ -1323,7 +1293,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT8:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa)
{
*pb = m_vm8[pa.value()];
*pb = m_vm8[pa.value(x)];
pb++;
});
break;
@ -1331,9 +1301,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT4:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa)
{
uint8 low = ReadPixel4(pa.value());
pa.incX();
uint8 high = ReadPixel4(pa.value());
uint8 low = ReadPixel4(pa.value(x++));
uint8 high = ReadPixel4(pa.value(x));
*pb = low | (high << 4);
pb++;
});
@ -1342,7 +1311,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT8H:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa)
{
*pb = (uint8)(m_vm32[pa.value()] >> 24);
*pb = (uint8)(m_vm32[pa.value(x)] >> 24);
pb++;
});
break;
@ -1350,9 +1319,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT4HL:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa)
{
uint32 c0 = m_vm32[pa.value()] >> 24 & 0x0f;
pa.incX();
uint32 c1 = m_vm32[pa.value()] >> 20 & 0xf0;
uint32 c0 = m_vm32[pa.value(x++)] >> 24 & 0x0f;
uint32 c1 = m_vm32[pa.value(x)] >> 20 & 0xf0;
*pb = (uint8)(c0 | c1);
pb++;
});
@ -1361,9 +1329,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMT4HH:
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa)
{
uint32 c0 = m_vm32[pa.value()] >> 28 & 0x0f;
pa.incX();
uint32 c1 = m_vm32[pa.value()] >> 24 & 0xf0;
uint32 c0 = m_vm32[pa.value(x++)] >> 28 & 0x0f;
uint32 c1 = m_vm32[pa.value(x)] >> 24 & 0xf0;
*pb = (uint8)(c0 | c1);
pb++;
});

View File

@ -41,38 +41,66 @@ struct GSPixelOffset4
uint32 fbp, zbp, fpsm, zpsm, bw;
};
struct alignas(128) GSPageOffsetRow
{
// Maximum page width is 128, but store mirror for unaligned simd loads
uint32 value[256];
};
class GSOffset;
template <int Height, int Width>
struct GSPageOffsetTable
{
GSPageOffsetRow value[Height];
};
class GSSwizzleInfo;
class GSOffset
class GSSwizzleInfo
{
friend class GSOffset;
/// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page
const GSPageOffsetRow* m_pixelSwizzle;
/// Table for storing swizzling of pixels within a page in the y dimension
const int* m_pixelSwizzleCol;
/// Array of tables for storing swizzling of pixels in the x dimension
const GSPixelRowOffsetTable* const* m_pixelSwizzleRow;
GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1)
GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1)
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page x offset
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page y offset
uint8 m_blockShiftX; ///< Amount to rshift x value by to get block x offset
uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset
int m_pixelRowMask; ///< Mask for getting the offset in m_pixelSwizzleRow for a given y value
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset
uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block
uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block
static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i >> 1); }
public:
GSSwizzleInfo() = default;
/// @param blockSize Size of block in pixels
template <int PageWidth, int PageHeight, int BlocksWide, int BlocksHigh, int PixelRowMask>
constexpr GSSwizzleInfo(GSSwizzleTableList<PageHeight, PageWidth, BlocksHigh, BlocksWide, PixelRowMask> list)
: m_blockSwizzle(&list.block)
, m_pixelSwizzleCol(list.col.value)
, m_pixelSwizzleRow(list.row.rows)
, m_pageMask{PageWidth - 1, PageHeight - 1}
, m_blockMask{(PageWidth / BlocksWide) - 1, (PageHeight / BlocksHigh) - 1}
, m_pixelRowMask(PixelRowMask)
, m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight))
, m_blockShiftX(ilog2(PageWidth / BlocksWide)), m_blockShiftY(ilog2(PageHeight / BlocksHigh))
{
static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2");
static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2");
}
/// Get the block number of the given pixel
uint32 bn(int x, int y, uint32 bp, uint32 bw) const;
/// Get the address of the given pixel
uint32 pa(int x, int y, uint32 bp, uint32 bw) const;
};
class GSOffset : GSSwizzleInfo
{
int m_bp; ///< Offset's base pointer (same measurement as GS)
int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures)
int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffset algorithms)
public:
GSOffset() = default;
constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm);
constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
: GSSwizzleInfo(swz)
, m_bp(bp)
, m_bwPg(bw >> (m_pageShiftX - 6))
, m_psm(psm)
{
}
/// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known
constexpr static GSOffset fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm);
@ -182,49 +210,37 @@ public:
class PAHelper
{
/// Pixel swizzle array
const GSPageOffsetRow* m_pixelSwizzle;
int m_pageMaskX; ///< Mask for getting offset within a page
int m_base; ///< Address for origin x
int m_x; ///< Current x position
int m_shift; ///< Amount to lshift x to get offset due to page after clearing with pageMaskX
int m_mask; ///< Mask to stay in bounds
const GSPixelRowOffsetTable* m_pixelSwizzleRow;
int m_base;
public:
PAHelper() = default;
PAHelper(const GSOffset& off, int x, int y)
PAHelper(const GSOffset& off, int y)
{
m_pixelSwizzle = off.m_pixelSwizzle + (y & off.m_pageMask.y);
m_pixelSwizzleRow = off.m_pixelSwizzleRow[y & off.m_pixelRowMask];
m_base = off.m_bp << (off.m_pageShiftX + off.m_pageShiftY - 5);
m_base += ((y & ~off.m_pageMask.y) * off.m_bwPg) << off.m_pageShiftX;
m_pageMaskX = off.m_pageMask.x;
m_shift = off.m_pageShiftY;
m_x = x;
m_mask = (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1;
m_base &= (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1;
m_base += off.m_pixelSwizzleCol[y & off.m_pageMask.y];
}
/// Get current x value
int x() const { return m_x; }
/// Increment x value
void incX() { m_x++; }
/// Decrement x value
void decX() { m_x--; }
/// Get current pixel address
uint32 value() const
uint32 value(size_t x) const
{
int x = (m_x & ~m_pageMaskX) << m_shift;
return (m_base + x + m_pixelSwizzle->value[m_x & m_pageMaskX]) & m_mask;
return m_base + (*m_pixelSwizzleRow)[x];
}
};
/// Get the address of the given pixel
uint32 pa(int x, int y) const
{
return PAHelper(*this, x, y).value();
return PAHelper(*this, y).value(x);
}
/// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis)
PAHelper paMulti(int x, int y) const
PAHelper paMulti(int y) const
{
return PAHelper(*this, x, y);
return PAHelper(*this, y);
}
/// Loop over the pixels in the given rectangle
@ -236,11 +252,10 @@ public:
for (int y = r.top; y < r.bottom; y++, px = reinterpret_cast<Src*>(reinterpret_cast<uint8*>(px) + pitch))
{
PAHelper pa = paMulti(r.left, y);
while (pa.x() < r.right)
PAHelper pa = paMulti(y);
for (int x = r.left; x < r.right; x++)
{
fn(vm + pa.value(), px + pa.x());
pa.incX();
fn(vm + pa.value(x), px + x);
}
}
}
@ -314,93 +329,30 @@ public:
/// Use compile-time dimensions from `swz` as a performance optimization
/// Also asserts if your assumption was wrong
constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const;
};
class GSSwizzleInfo
{
friend class GSOffset;
/// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page
const GSPageOffsetRow* m_pixelSwizzle;
GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1)
GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1)
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset
uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block
uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block
static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i>>1); }
public:
GSSwizzleInfo() = default;
/// @param PageWidth Width of page in pixels
/// @param PageHeight Height of page in pixels
/// @param blockSize Size of block in pixels
template <int PageWidth, int PageHeight>
constexpr GSSwizzleInfo(GSVector2i blockSize, const GSBlockSwizzleTable* blockSwizzle, const GSPageOffsetTable<PageHeight, PageWidth>* pxSwizzle)
: m_blockSwizzle(blockSwizzle)
, m_pixelSwizzle(pxSwizzle->value)
, m_pageMask{PageWidth - 1, PageHeight - 1}
, m_blockMask{blockSize.x - 1, blockSize.y - 1}
, m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight))
, m_blockShiftX(ilog2(blockSize.x)), m_blockShiftY(ilog2(blockSize.y))
constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const
{
static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2");
static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2");
}
/// Get the block number of the given pixel
uint32 bn(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffset(*this, bp, bw, 0).bn(x, y);
}
/// Get the address of the given pixel
uint32 pa(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffset(*this, bp, bw, 0).pa(x, y);
}
/// Loop over all the pages in the given rect, calling `fn` on each
template <typename Fn>
void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
{
GSOffset(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn));
}
/// Loop over all the blocks in the given rect, calling `fn` on each
template <typename Fn>
void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
{
GSOffset(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn));
GSOffset o = *this;
#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x;
MATCH(m_pageMask)
MATCH(m_blockMask)
MATCH(m_pixelRowMask)
MATCH(m_pageShiftX)
MATCH(m_pageShiftY)
MATCH(m_blockShiftX)
MATCH(m_blockShiftY)
#undef MATCH
return o;
}
};
constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
: m_blockSwizzle(swz.m_blockSwizzle)
, m_pixelSwizzle(swz.m_pixelSwizzle)
, m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask)
, m_pageShiftX(swz.m_pageShiftX), m_pageShiftY(swz.m_pageShiftY)
, m_blockShiftX(swz.m_blockShiftX), m_blockShiftY(swz.m_blockShiftY)
, m_bp(bp)
, m_bwPg(bw >> (m_pageShiftX - 6))
, m_psm(psm)
inline uint32 GSSwizzleInfo::bn(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffset(*this, bp, bw, 0).bn(x, y);
}
constexpr GSOffset GSOffset::assertSizesMatch(const GSSwizzleInfo& swz) const
inline uint32 GSSwizzleInfo::pa(int x, int y, uint32 bp, uint32 bw) const
{
GSOffset o = *this;
#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x;
MATCH(m_pageMask)
MATCH(m_blockMask)
MATCH(m_pageShiftX)
MATCH(m_pageShiftY)
MATCH(m_blockShiftX)
MATCH(m_blockShiftY)
#undef MATCH
return o;
return GSOffset(*this, bp, bw, 0).pa(x, y);
}
class GSLocalMemory : public GSAlignedClass<32>
@ -452,24 +404,15 @@ public:
protected:
bool m_use_fifo_alloc;
static GSPageOffsetTable<32, 64> pageOffset32;
static GSPageOffsetTable<32, 64> pageOffset32Z;
static GSPageOffsetTable<64, 64> pageOffset16;
static GSPageOffsetTable<64, 64> pageOffset16S;
static GSPageOffsetTable<64, 64> pageOffset16Z;
static GSPageOffsetTable<64, 64> pageOffset16SZ;
static GSPageOffsetTable<64, 128> pageOffset8;
static GSPageOffsetTable<128, 128> pageOffset4;
public:
static constexpr GSSwizzleInfo swizzle32{{8, 8}, &blockTable32, &pageOffset32};
static constexpr GSSwizzleInfo swizzle32Z{{8, 8}, &blockTable32Z, &pageOffset32Z};
static constexpr GSSwizzleInfo swizzle16{{16, 8}, &blockTable16, &pageOffset16};
static constexpr GSSwizzleInfo swizzle16S{{16, 8}, &blockTable16S, &pageOffset16S};
static constexpr GSSwizzleInfo swizzle16Z{{16, 8}, &blockTable16Z, &pageOffset16Z};
static constexpr GSSwizzleInfo swizzle16SZ{{16, 8}, &blockTable16SZ, &pageOffset16SZ};
static constexpr GSSwizzleInfo swizzle8{{16, 16}, &blockTable8, &pageOffset8};
static constexpr GSSwizzleInfo swizzle4{{32, 16}, &blockTable4, &pageOffset4};
static constexpr GSSwizzleInfo swizzle32 {swizzleTables32};
static constexpr GSSwizzleInfo swizzle32Z {swizzleTables32Z};
static constexpr GSSwizzleInfo swizzle16 {swizzleTables16};
static constexpr GSSwizzleInfo swizzle16S {swizzleTables16S};
static constexpr GSSwizzleInfo swizzle16Z {swizzleTables16Z};
static constexpr GSSwizzleInfo swizzle16SZ {swizzleTables16SZ};
static constexpr GSSwizzleInfo swizzle8 {swizzleTables8};
static constexpr GSSwizzleInfo swizzle4 {swizzleTables4};
protected:
__forceinline static uint32 Expand24To32(uint32 c, const GIFRegTEXA& TEXA)

View File

@ -1663,14 +1663,12 @@ void GSState::Move()
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
GSOffset::PAHelper s = spo.paMulti(sx, sy);
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
GSOffset::PAHelper s = spo.paMulti(sy);
GSOffset::PAHelper d = dpo.paMulti(dy);
for (int x = 0; x < w; x++)
{
pxCopyFn(d.value(), s.value());
s.incX();
d.incX();
pxCopyFn(d.value(dx + x), s.value(sx + x));
}
}
}
@ -1678,14 +1676,12 @@ void GSState::Move()
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
GSOffset::PAHelper s = spo.paMulti(sx, sy);
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
GSOffset::PAHelper s = spo.paMulti(sy);
GSOffset::PAHelper d = dpo.paMulti(dy);
for (int x = 0; x < w; x++)
{
pxCopyFn(d.value(), s.value());
s.decX();
d.decX();
pxCopyFn(d.value(dx - x), s.value(sx - x));
}
}
}

View File

@ -20,8 +20,8 @@
#include "GS_types.h"
template <int Width, int Height>
static constexpr GSBlockSwizzleTable makeSwizzleTable(const uint8 (&arr)[Height][Width]) {
GSBlockSwizzleTable table = {};
static constexpr GSSizedBlockSwizzleTable<Height, Width> makeSwizzleTable(const uint8 (&arr)[Height][Width]) {
GSSizedBlockSwizzleTable<Height, Width> table = {};
for (int y = 0; y < 8; y++) {
for (int x = 0; x < 8; x++) {
table.value[y][x] = arr[y % Height][x % Width];
@ -114,14 +114,14 @@ static constexpr uint8 _blockTable4[8][4] =
{ 21, 23, 29, 31 }
};
constexpr GSBlockSwizzleTable blockTable32 = makeSwizzleTable(_blockTable32);
constexpr GSBlockSwizzleTable blockTable32Z = makeSwizzleTable(_blockTable32Z);
constexpr GSBlockSwizzleTable blockTable16 = makeSwizzleTable(_blockTable16);
constexpr GSBlockSwizzleTable blockTable16S = makeSwizzleTable(_blockTable16S);
constexpr GSBlockSwizzleTable blockTable16Z = makeSwizzleTable(_blockTable16Z);
constexpr GSBlockSwizzleTable blockTable16SZ = makeSwizzleTable(_blockTable16SZ);
constexpr GSBlockSwizzleTable blockTable8 = makeSwizzleTable(_blockTable8);
constexpr GSBlockSwizzleTable blockTable4 = makeSwizzleTable(_blockTable4);
constexpr GSSizedBlockSwizzleTable<4, 8> blockTable32 = makeSwizzleTable(_blockTable32);
constexpr GSSizedBlockSwizzleTable<4, 8> blockTable32Z = makeSwizzleTable(_blockTable32Z);
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16 = makeSwizzleTable(_blockTable16);
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16S = makeSwizzleTable(_blockTable16S);
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16Z = makeSwizzleTable(_blockTable16Z);
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16SZ = makeSwizzleTable(_blockTable16SZ);
constexpr GSSizedBlockSwizzleTable<4, 8> blockTable8 = makeSwizzleTable(_blockTable8);
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable4 = makeSwizzleTable(_blockTable4);
constexpr uint8 columnTable32[8][8] =
{
@ -290,3 +290,75 @@ constexpr uint8 clutTableT16I4[16] =
0, 2, 8, 10, 16, 18, 24, 26,
4, 6, 12, 14, 20, 22, 28, 30
};
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
constexpr int pxOffset(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int x, int y)
{
int blockSize = ColHeight * ColWidth;
int pageSize = blockSize * BlocksHigh * BlocksWide;
int pageWidth = BlocksWide * ColWidth;
int pageX = x / pageWidth;
int subpageX = x % pageWidth;
int blockID = blockTable[y / ColHeight][subpageX / ColWidth];
int sublockOffset = colTable[y % ColHeight][subpageX % ColWidth];
return pageX * pageSize + blockID * blockSize + sublockOffset;
}
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
constexpr GSPixelColOffsetTable<BlocksHigh * ColHeight> makeColOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth])
{
constexpr int size = BlocksHigh * ColHeight;
GSPixelColOffsetTable<size> table = {};
for (int y = 0; y < size; y++)
{
table.value[y] = pxOffset(blockTable, colTable, 0, y);
}
return table;
}
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
constexpr GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> makeRowOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int y)
{
int base = pxOffset(blockTable, colTable, 0, y);
GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> table = {};
for (int x = 0; x < 2048; x++)
{
table.value[x] = pxOffset(blockTable, colTable, x, y) - base;
}
return table;
}
constexpr GSPixelColOffsetTable< 32> pixelColOffset32 = makeColOffsetTable(_blockTable32, columnTable32);
constexpr GSPixelColOffsetTable< 32> pixelColOffset32Z = makeColOffsetTable(_blockTable32Z, columnTable32);
constexpr GSPixelColOffsetTable< 64> pixelColOffset16 = makeColOffsetTable(_blockTable16, columnTable16);
constexpr GSPixelColOffsetTable< 64> pixelColOffset16S = makeColOffsetTable(_blockTable16S, columnTable16);
constexpr GSPixelColOffsetTable< 64> pixelColOffset16Z = makeColOffsetTable(_blockTable16Z, columnTable16);
constexpr GSPixelColOffsetTable< 64> pixelColOffset16SZ = makeColOffsetTable(_blockTable16SZ, columnTable16);
constexpr GSPixelColOffsetTable< 64> pixelColOffset8 = makeColOffsetTable(_blockTable8, columnTable8);
constexpr GSPixelColOffsetTable<128> pixelColOffset4 = makeColOffsetTable(_blockTable4, columnTable4);
// These can't be constexpr due to a GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99901
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset32 = makeRowOffsetTable(_blockTable32, columnTable32, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset32Z = makeRowOffsetTable(_blockTable32Z, columnTable32, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16 = makeRowOffsetTable(_blockTable16, columnTable16, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16S = makeRowOffsetTable(_blockTable16S, columnTable16, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16Z = makeRowOffsetTable(_blockTable16Z, columnTable16, 0);
CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16SZ = makeRowOffsetTable(_blockTable16SZ, columnTable16, 0);
CONSTINIT const GSSizedPixelRowOffsetTable<128> GSTables::_pixelRowOffset8[2] =
{
makeRowOffsetTable(_blockTable8, columnTable8, 0),
makeRowOffsetTable(_blockTable8, columnTable8, 2),
};
CONSTINIT const GSSizedPixelRowOffsetTable<128> GSTables::_pixelRowOffset4[2] =
{
makeRowOffsetTable(_blockTable4, columnTable4, 0),
makeRowOffsetTable(_blockTable4, columnTable4, 2),
};
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset32;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset32Z;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16S;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16Z;
constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16SZ;
constexpr GSPixelRowOffsetTableList<128, 7> GSTables::pixelRowOffset8;
constexpr GSPixelRowOffsetTableList<128, 7> GSTables::pixelRowOffset4;

View File

@ -29,14 +29,88 @@ struct alignas(64) GSBlockSwizzleTable
}
};
extern const GSBlockSwizzleTable blockTable32;
extern const GSBlockSwizzleTable blockTable32Z;
extern const GSBlockSwizzleTable blockTable16;
extern const GSBlockSwizzleTable blockTable16S;
extern const GSBlockSwizzleTable blockTable16Z;
extern const GSBlockSwizzleTable blockTable16SZ;
extern const GSBlockSwizzleTable blockTable8;
extern const GSBlockSwizzleTable blockTable4;
/// Adds sizes to GSBlockSwizzleTable for to feel better about not making mistakes
template <int Height, int Width>
struct GSSizedBlockSwizzleTable : public GSBlockSwizzleTable
{
};
/// Table for storing offsets of x = 0 pixels from the beginning of the page
/// Add values from a GSPixelRowOffsetTable to get the pixels for x != 0
template <int Height>
struct alignas(128) GSPixelColOffsetTable
{
int value[Height] = {};
int operator[](int y) const
{
return value[y % Height];
}
};
/// Table for storing offsets of x != 0 pixels from the pixel at the same y where x = 0
/// Unlike ColOffsets, this table stretches to the maximum size of a texture so no masking is needed
struct alignas(128) GSPixelRowOffsetTable
{
int value[2048] = {};
int operator[](size_t x) const
{
ASSERT(x < 2048);
return value[x];
}
};
/// Adds size to GSPixelRowOffsetTable to feel better about not making mistakes
template <int PageWidth>
struct GSSizedPixelRowOffsetTable : public GSPixelRowOffsetTable
{
};
/// List of row offset tables
/// Some swizzlings (PSMT8 and PSMT4) have different row offsets depending on which column they're a part of
/// The ones that do use an a a b b b b a a pattern that repeats every 8 rows.
/// You can always look up the correct row in this list with y & 7, but if you use y & Mask where Mask is known at compile time, the compiler should be able to optimize better
template <int PageWidth, int Mask>
struct alignas(sizeof(void*) * 8) GSPixelRowOffsetTableList
{
const GSPixelRowOffsetTable* rows[8];
const GSPixelRowOffsetTable& operator[](int y) const
{
return *rows[y & Mask];
}
};
/// Full pixel offset table
/// Template values are for objects constructing from one of these tables
template <int PageHeight, int PageWidth, int BlockHeight, int BlockWidth, int RowMask>
struct GSSwizzleTableList
{
const GSSizedBlockSwizzleTable<BlockHeight, BlockWidth>& block;
const GSPixelColOffsetTable<PageHeight>& col;
const GSPixelRowOffsetTableList<PageWidth, RowMask>& row;
};
/// List of all tables for a given swizzle for easy setup
template <int PageHeight, int PageWidth, int BlockHeight, int BlockWidth, int RowMask>
constexpr GSSwizzleTableList<PageHeight, PageWidth, BlockHeight, BlockWidth, RowMask>
makeSwizzleTableList(
const GSSizedBlockSwizzleTable<BlockHeight, BlockWidth>& block,
const GSPixelColOffsetTable<PageHeight>& col,
const GSPixelRowOffsetTableList<PageWidth, RowMask>& row)
{
return {block, col, row};
}
extern const GSSizedBlockSwizzleTable<4, 8> blockTable32;
extern const GSSizedBlockSwizzleTable<4, 8> blockTable32Z;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16S;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16Z;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16SZ;
extern const GSSizedBlockSwizzleTable<4, 8> blockTable8;
extern const GSSizedBlockSwizzleTable<8, 4> blockTable4;
extern const uint8 columnTable32[8][8];
extern const uint8 columnTable16[8][16];
extern const uint8 columnTable8[16][16];
@ -45,3 +119,57 @@ extern const uint8 clutTableT32I8[128];
extern const uint8 clutTableT32I4[16];
extern const uint8 clutTableT16I8[32];
extern const uint8 clutTableT16I4[16];
extern const GSPixelColOffsetTable< 32> pixelColOffset32;
extern const GSPixelColOffsetTable< 32> pixelColOffset32Z;
extern const GSPixelColOffsetTable< 64> pixelColOffset16;
extern const GSPixelColOffsetTable< 64> pixelColOffset16S;
extern const GSPixelColOffsetTable< 64> pixelColOffset16Z;
extern const GSPixelColOffsetTable< 64> pixelColOffset16SZ;
extern const GSPixelColOffsetTable< 64> pixelColOffset8;
extern const GSPixelColOffsetTable<128> pixelColOffset4;
template <int PageWidth>
constexpr GSPixelRowOffsetTableList<PageWidth, 0> makeRowOffsetTableList(
const GSSizedPixelRowOffsetTable<PageWidth>* a)
{
return {{a, a, a, a, a, a, a, a}};
}
template <int PageWidth>
constexpr GSPixelRowOffsetTableList<PageWidth, 7> makeRowOffsetTableList(
const GSSizedPixelRowOffsetTable<PageWidth>* a,
const GSSizedPixelRowOffsetTable<PageWidth>* b)
{
return {{a, a, b, b, b, b, a, a}};
}
/// Just here to force external linkage so we don't end up with multiple copies of pixelRowOffset*
struct GSTables
{
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset32;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset32Z;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16S;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16Z;
static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16SZ;
static const GSSizedPixelRowOffsetTable<128> _pixelRowOffset8[2];
static const GSSizedPixelRowOffsetTable<128> _pixelRowOffset4[2];
static constexpr auto pixelRowOffset32 = makeRowOffsetTableList(&_pixelRowOffset32);
static constexpr auto pixelRowOffset32Z = makeRowOffsetTableList(&_pixelRowOffset32Z);
static constexpr auto pixelRowOffset16 = makeRowOffsetTableList(&_pixelRowOffset16);
static constexpr auto pixelRowOffset16S = makeRowOffsetTableList(&_pixelRowOffset16S);
static constexpr auto pixelRowOffset16Z = makeRowOffsetTableList(&_pixelRowOffset16Z);
static constexpr auto pixelRowOffset16SZ = makeRowOffsetTableList(&_pixelRowOffset16SZ);
static constexpr auto pixelRowOffset8 = makeRowOffsetTableList(&_pixelRowOffset8[0], &_pixelRowOffset8[1]);
static constexpr auto pixelRowOffset4 = makeRowOffsetTableList(&_pixelRowOffset4[0], &_pixelRowOffset4[1]);
};
constexpr auto swizzleTables32 = makeSwizzleTableList(blockTable32, pixelColOffset32, GSTables::pixelRowOffset32 );
constexpr auto swizzleTables32Z = makeSwizzleTableList(blockTable32Z, pixelColOffset32Z, GSTables::pixelRowOffset32Z );
constexpr auto swizzleTables16 = makeSwizzleTableList(blockTable16, pixelColOffset16, GSTables::pixelRowOffset16 );
constexpr auto swizzleTables16Z = makeSwizzleTableList(blockTable16Z, pixelColOffset16Z, GSTables::pixelRowOffset16Z );
constexpr auto swizzleTables16S = makeSwizzleTableList(blockTable16S, pixelColOffset16S, GSTables::pixelRowOffset16S );
constexpr auto swizzleTables16SZ = makeSwizzleTableList(blockTable16SZ, pixelColOffset16SZ, GSTables::pixelRowOffset16SZ);
constexpr auto swizzleTables8 = makeSwizzleTableList(blockTable8, pixelColOffset8, GSTables::pixelRowOffset8 );
constexpr auto swizzleTables4 = makeSwizzleTableList(blockTable4, pixelColOffset4, GSTables::pixelRowOffset4 );

View File

@ -907,26 +907,22 @@ void GSRendererHW::SwSpriteRender()
for (int y = 0; y < h; y++, ++sy, ++dy)
{
GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sx, sy) : GSOffset::PAHelper();
GSOffset::PAHelper dpa = dpo.paMulti(dx, dy);
GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sy) : GSOffset::PAHelper();
GSOffset::PAHelper dpa = dpo.paMulti(dy);
ASSERT(w % 2 == 0);
for (int x = 0; x < w; x += 2)
{
uint32 di = dpa.value();
dpa.incX();
ASSERT(di + 1 == dpa.value()); // Destination pixel pair is adjacent in memory
dpa.incX();
uint32 di = dpa.value(dx + x);
ASSERT(di + 1 == dpa.value(dx + x + 1)); // Destination pixel pair is adjacent in memory
GSVector4i sc;
if (texture_mapping_enabled)
{
uint32 si = spa.value();
spa.incX();
uint32 si = spa.value(sx + x);
// Read 2 source pixel colors
ASSERT((si + 1) == spa.value()); // Source pixel pair is adjacent in memory
spa.incX();
ASSERT((si + 1) == spa.value(sx + x + 1)); // Source pixel pair is adjacent in memory
sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
// Apply TFX
@ -1827,11 +1823,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel32
for (int y = r.top; y < r.bottom; y++)
{
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y);
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y);
for (; pa.x() < r.right; pa.incX())
for (int x = r.left; x < r.right; x++)
{
m_mem.m_vm32[pa.value()] = 0; // Here the constant color
m_mem.m_vm32[pa.value(x)] = 0; // Here the constant color
}
}
}
@ -1840,11 +1836,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel24
for (int y = r.top; y < r.bottom; y++)
{
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y);
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y);
for (; pa.x() < r.right; pa.incX())
for (int x = r.left; x < r.right; x++)
{
m_mem.m_vm32[pa.value()] &= 0xff000000; // Clear the color
m_mem.m_vm32[pa.value(x)] &= 0xff000000; // Clear the color
}
}
}
@ -1855,11 +1851,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel16
for(int y = r.top; y < r.bottom; y++)
{
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(r.left, y);
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(y);
for(int x = r.left; x < r.right; x++)
{
m_mem.m_vm16[pa.value()] = 0; // Here the constant color
m_mem.m_vm16[pa.value(x)] = 0; // Here the constant color
}
}
#endif

View File

@ -2942,11 +2942,11 @@ void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c
for (int y = r.y; y < r.w; y++)
{
GSOffset::PAHelper pa = off.paMulti(r.x, y);
GSOffset::PAHelper pa = off.paMulti(y);
for (; pa.x() < r.z; pa.incX())
for (int x = r.x; x < r.z; x++)
{
T& d = vm[pa.value()];
T& d = vm[pa.value(x)];
d = (T)(!masked ? c : (c | (d & m)));
}
}