From b901c6af710656062cd4dc24201ca5f469c775b3 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Tue, 2 Mar 2021 20:51:27 -0600 Subject: [PATCH] GS: Switch back to row+column for pixel lookups Code for the full calculation was way too complicated to run for every pixel in a loop --- pcsx2/GS/GSClut.cpp | 21 +- pcsx2/GS/GSLocalMemory.cpp | 137 +++++-------- pcsx2/GS/GSLocalMemory.h | 233 +++++++++-------------- pcsx2/GS/GSState.cpp | 16 +- pcsx2/GS/GSTables.cpp | 92 ++++++++- pcsx2/GS/GSTables.h | 144 +++++++++++++- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 32 ++-- pcsx2/GS/Renderers/SW/GSDrawScanline.cpp | 6 +- 8 files changed, 393 insertions(+), 288 deletions(-) diff --git a/pcsx2/GS/GSClut.cpp b/pcsx2/GS/GSClut.cpp index 90f0f8359b..ceed73402e 100644 --- a/pcsx2/GS/GSClut.cpp +++ b/pcsx2/GS/GSClut.cpp @@ -197,13 +197,14 @@ template void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) { GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32); - GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV); + GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV); + int x = TEXCLUT.COU << 4; uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4); - for (int i = 0; i < n; pa.incX(), i++) + for (int i = 0; i < n; x++, i++) { - uint32 c = m_mem->m_vm32[pa.value()]; + uint32 c = m_mem->m_vm32[pa.value(x)]; clut[i] = (uint16)(c & 0xffff); clut[i + 256] = (uint16)(c >> 16); @@ -214,13 +215,14 @@ template void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) { GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16); - GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV); + GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV); + int x = TEXCLUT.COU << 4; uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4); - for (int i = 0; i < n; pa.incX(), i++) + for (int i = 0; i < n; x++, i++) { - clut[i] = m_mem->m_vm16[pa.value()]; + clut[i] = m_mem->m_vm16[pa.value(x)]; } } @@ -228,13 +230,14 @@ template void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) { GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S); - GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV); + GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV); + int x = TEXCLUT.COU << 4; uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4); - for (int i = 0; i < n; pa.incX(), i++) + for (int i = 0; i < n; x++, i++) { - clut[i] = m_mem->m_vm16[pa.value()]; + clut[i] = m_mem->m_vm16[pa.value(x)]; } } diff --git a/pcsx2/GS/GSLocalMemory.cpp b/pcsx2/GS/GSLocalMemory.cpp index 57b1428aa9..b021de4f9b 100644 --- a/pcsx2/GS/GSLocalMemory.cpp +++ b/pcsx2/GS/GSLocalMemory.cpp @@ -41,14 +41,14 @@ static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector // -GSPageOffsetTable<32, 64> GSLocalMemory::pageOffset32; -GSPageOffsetTable<32, 64> GSLocalMemory::pageOffset32Z; -GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16; -GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16S; -GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16Z; -GSPageOffsetTable<64, 64> GSLocalMemory::pageOffset16SZ; -GSPageOffsetTable<64, 128> GSLocalMemory::pageOffset8; -GSPageOffsetTable<128, 128> GSLocalMemory::pageOffset4; +constexpr GSSwizzleInfo GSLocalMemory::swizzle32; +constexpr GSSwizzleInfo GSLocalMemory::swizzle32Z; +constexpr GSSwizzleInfo GSLocalMemory::swizzle16; +constexpr GSSwizzleInfo GSLocalMemory::swizzle16S; +constexpr GSSwizzleInfo GSLocalMemory::swizzle16Z; +constexpr GSSwizzleInfo GSLocalMemory::swizzle16SZ; +constexpr GSSwizzleInfo GSLocalMemory::swizzle8; +constexpr GSSwizzleInfo GSLocalMemory::swizzle4; // @@ -56,21 +56,6 @@ GSLocalMemory::psm_t GSLocalMemory::m_psm[64]; // -template -static void setupPageOffsetTable(GSPageOffsetTable& table, const GSBlockSwizzleTable& block, Col (&col)[ColHeight][ColWidth]) -{ - int blockSize = ColHeight * ColWidth; - for (int y = 0; y < PageHeight; y++) - { - for (int x = 0; x < 256; x++) - { - int colOff = col[y % ColHeight][x % ColWidth]; - int blockOff = block.lookup(x / ColWidth, y / ColHeight); - table.value[y].value[x] = blockOff * blockSize + colOff; - } - } -} - GSLocalMemory::GSLocalMemory() : m_clut(this) { @@ -101,15 +86,6 @@ GSLocalMemory::GSLocalMemory() memset(m_vm8, 0, m_vmsize); - setupPageOffsetTable(pageOffset32, blockTable32, columnTable32); - setupPageOffsetTable(pageOffset32Z, blockTable32Z, columnTable32); - setupPageOffsetTable(pageOffset16, blockTable16, columnTable16); - setupPageOffsetTable(pageOffset16S, blockTable16S, columnTable16); - setupPageOffsetTable(pageOffset16Z, blockTable16Z, columnTable16); - setupPageOffsetTable(pageOffset16SZ, blockTable16SZ, columnTable16); - setupPageOffsetTable(pageOffset8, blockTable8, columnTable8); - setupPageOffsetTable(pageOffset4, blockTable4, columnTable4); - for (size_t i = 0; i < countof(m_psm); i++) { m_psm[i].info = GSLocalMemory::swizzle32; @@ -1108,20 +1084,19 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF auto copy = [&](int len, const GSOffset& off, auto&& fn) { - GSOffset::PAHelper pa = off.paMulti(x, y); + GSOffset::PAHelper pa = off.paMulti(y); for (; len > 0; len--) { fn(pa); - pa.incX(); - if (pa.x() >= ex) + x++; + if (x >= ex) { y++; - pa = off.paMulti(sx, y); + x = sx; + pa = off.paMulti(y); } } - - x = pa.x(); }; GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM); @@ -1132,7 +1107,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF case PSM_PSMZ32: copy(len / 4, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa) { - WritePixel32(pa.value(), *pd); + WritePixel32(pa.value(x), *pd); pd++; }); break; @@ -1141,7 +1116,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF case PSM_PSMZ24: copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa) { - WritePixel24(pa.value(), *(uint32*)pb); + WritePixel24(pa.value(x), *(uint32*)pb); pb += 3; }); break; @@ -1152,7 +1127,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF case PSM_PSMZ16S: copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa) { - WritePixel16(pa.value(), *pw); + WritePixel16(pa.value(x), *pw); pw++; }); break; @@ -1160,7 +1135,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF case PSM_PSMT8: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa) { - WritePixel8(pa.value(), *pb); + WritePixel8(pa.value(x), *pb); pb++; }); break; @@ -1168,9 +1143,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF case PSM_PSMT4: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa) { - WritePixel4(pa.value(), *pb & 0xf); - pa.incX(); - WritePixel4(pa.value(), *pb >> 4); + WritePixel4(pa.value(x++), *pb & 0xf); + WritePixel4(pa.value(x), *pb >> 4); pb++; }); break; @@ -1178,7 +1152,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF case PSM_PSMT8H: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa) { - WritePixel8H(pa.value(), *pb); + WritePixel8H(pa.value(x), *pb); pb++; }); break; @@ -1186,9 +1160,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF case PSM_PSMT4HL: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa) { - WritePixel4HL(pa.value(), *pb & 0xf); - pa.incX(); - WritePixel4HL(pa.value(), *pb >> 4); + WritePixel4HL(pa.value(x++), *pb & 0xf); + WritePixel4HL(pa.value(x), *pb >> 4); pb++; }); break; @@ -1196,9 +1169,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF case PSM_PSMT4HH: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa) { - WritePixel4HH(pa.value(), *pb & 0xf); - pa.incX(); - WritePixel4HH(pa.value(), *pb >> 4); + WritePixel4HH(pa.value(x++), *pb & 0xf); + WritePixel4HH(pa.value(x), *pb >> 4); pb++; }); break; @@ -1230,20 +1202,19 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB auto copy = [&](int len, const GSOffset& off, auto&& fn) { - GSOffset::PAHelper pa = off.paMulti(x, y); + GSOffset::PAHelper pa = off.paMulti(y); for (; len > 0; len--) { fn(pa); - pa.incX(); - if (pa.x() >= ex) + x++; + if (x >= ex) { y++; - pa = off.paMulti(sx, y); + x = sx; + pa = off.paMulti(y); } } - - x = pa.x(); }; GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM); @@ -1259,49 +1230,48 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB len /= 4; - GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(x, y); + GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(y); while (len > 0) { - for (; len > 0 && pa.x() < ex && (pa.x() & 7); len--, pa.incX(), pd++) + for (; len > 0 && x < ex && (x & 7); len--, x++, pd++) { - *pd = m_vm32[pa.value()]; + *pd = m_vm32[pa.value(x)]; } // aligned to a column - for (int ex8 = ex - 8; len >= 8 && pa.x() <= ex8; len -= 8, pd += 8) + for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8) { - uint32* ps = m_vm32 + pa.value(); + uint32* ps = m_vm32 + pa.value(x); GSVector4i::store(&pd[0], GSVector4i::load(ps + 0, ps + 4)); GSVector4i::store(&pd[4], GSVector4i::load(ps + 8, ps + 12)); - for (int i = 0; i < 8; i++, pa.incX()) - ASSERT(pd[i] == m_vm32[pa.value()]); + for (int i = 0; i < 8; i++) + ASSERT(pd[i] == m_vm32[pa.value(x + i)]); } - for (; len > 0 && pa.x() < ex; len--, pa.incX(), pd++) + for (; len > 0 && x < ex; len--, x++, pd++) { - *pd = m_vm32[pa.value()]; + *pd = m_vm32[pa.value(x)]; } - if (pa.x() == ex) + if (x == ex) { y++; - pa = off.assertSizesMatch(swizzle32).paMulti(sx, y); + x = sx; + pa = off.assertSizesMatch(swizzle32).paMulti(y); } } - - x = pa.x(); - break; } + break; case PSM_PSMCT24: case PSM_PSMZ24: copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa) { - uint32 c = m_vm32[pa.value()]; + uint32 c = m_vm32[pa.value(x)]; pb[0] = (uint8)(c); pb[1] = (uint8)(c >> 8); pb[2] = (uint8)(c >> 16); @@ -1315,7 +1285,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB case PSM_PSMZ16S: copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa) { - *pw = m_vm16[pa.value()]; + *pw = m_vm16[pa.value(x)]; pw++; }); break; @@ -1323,7 +1293,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB case PSM_PSMT8: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa) { - *pb = m_vm8[pa.value()]; + *pb = m_vm8[pa.value(x)]; pb++; }); break; @@ -1331,9 +1301,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB case PSM_PSMT4: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa) { - uint8 low = ReadPixel4(pa.value()); - pa.incX(); - uint8 high = ReadPixel4(pa.value()); + uint8 low = ReadPixel4(pa.value(x++)); + uint8 high = ReadPixel4(pa.value(x)); *pb = low | (high << 4); pb++; }); @@ -1342,7 +1311,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB case PSM_PSMT8H: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa) { - *pb = (uint8)(m_vm32[pa.value()] >> 24); + *pb = (uint8)(m_vm32[pa.value(x)] >> 24); pb++; }); break; @@ -1350,9 +1319,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB case PSM_PSMT4HL: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa) { - uint32 c0 = m_vm32[pa.value()] >> 24 & 0x0f; - pa.incX(); - uint32 c1 = m_vm32[pa.value()] >> 20 & 0xf0; + uint32 c0 = m_vm32[pa.value(x++)] >> 24 & 0x0f; + uint32 c1 = m_vm32[pa.value(x)] >> 20 & 0xf0; *pb = (uint8)(c0 | c1); pb++; }); @@ -1361,9 +1329,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB case PSM_PSMT4HH: copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa) { - uint32 c0 = m_vm32[pa.value()] >> 28 & 0x0f; - pa.incX(); - uint32 c1 = m_vm32[pa.value()] >> 24 & 0xf0; + uint32 c0 = m_vm32[pa.value(x++)] >> 28 & 0x0f; + uint32 c1 = m_vm32[pa.value(x)] >> 24 & 0xf0; *pb = (uint8)(c0 | c1); pb++; }); diff --git a/pcsx2/GS/GSLocalMemory.h b/pcsx2/GS/GSLocalMemory.h index ff8e165004..04d393a462 100644 --- a/pcsx2/GS/GSLocalMemory.h +++ b/pcsx2/GS/GSLocalMemory.h @@ -41,38 +41,66 @@ struct GSPixelOffset4 uint32 fbp, zbp, fpsm, zpsm, bw; }; -struct alignas(128) GSPageOffsetRow -{ - // Maximum page width is 128, but store mirror for unaligned simd loads - uint32 value[256]; -}; +class GSOffset; -template -struct GSPageOffsetTable -{ - GSPageOffsetRow value[Height]; -}; - -class GSSwizzleInfo; - -class GSOffset +class GSSwizzleInfo { + friend class GSOffset; /// Table for storing swizzling of blocks within a page const GSBlockSwizzleTable* m_blockSwizzle; - /// Table for storing swizzling of pixels within a page - const GSPageOffsetRow* m_pixelSwizzle; + /// Table for storing swizzling of pixels within a page in the y dimension + const int* m_pixelSwizzleCol; + /// Array of tables for storing swizzling of pixels in the x dimension + const GSPixelRowOffsetTable* const* m_pixelSwizzleRow; GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1) GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1) - uint8 m_pageShiftX; ///< Amount to rshift x value by to get page x offset - uint8 m_pageShiftY; ///< Amount to rshift y value by to get page y offset - uint8 m_blockShiftX; ///< Amount to rshift x value by to get block x offset - uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset + int m_pixelRowMask; ///< Mask for getting the offset in m_pixelSwizzleRow for a given y value + uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset + uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset + uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block + uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block + static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i >> 1); } + +public: + GSSwizzleInfo() = default; + + /// @param blockSize Size of block in pixels + template + constexpr GSSwizzleInfo(GSSwizzleTableList list) + : m_blockSwizzle(&list.block) + , m_pixelSwizzleCol(list.col.value) + , m_pixelSwizzleRow(list.row.rows) + , m_pageMask{PageWidth - 1, PageHeight - 1} + , m_blockMask{(PageWidth / BlocksWide) - 1, (PageHeight / BlocksHigh) - 1} + , m_pixelRowMask(PixelRowMask) + , m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight)) + , m_blockShiftX(ilog2(PageWidth / BlocksWide)), m_blockShiftY(ilog2(PageHeight / BlocksHigh)) + { + static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2"); + static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2"); + } + + /// Get the block number of the given pixel + uint32 bn(int x, int y, uint32 bp, uint32 bw) const; + + /// Get the address of the given pixel + uint32 pa(int x, int y, uint32 bp, uint32 bw) const; +}; + +class GSOffset : GSSwizzleInfo +{ int m_bp; ///< Offset's base pointer (same measurement as GS) int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures) int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffset algorithms) public: GSOffset() = default; - constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm); + constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm) + : GSSwizzleInfo(swz) + , m_bp(bp) + , m_bwPg(bw >> (m_pageShiftX - 6)) + , m_psm(psm) + { + } /// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known constexpr static GSOffset fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm); @@ -182,49 +210,37 @@ public: class PAHelper { /// Pixel swizzle array - const GSPageOffsetRow* m_pixelSwizzle; - int m_pageMaskX; ///< Mask for getting offset within a page - int m_base; ///< Address for origin x - int m_x; ///< Current x position - int m_shift; ///< Amount to lshift x to get offset due to page after clearing with pageMaskX - int m_mask; ///< Mask to stay in bounds + const GSPixelRowOffsetTable* m_pixelSwizzleRow; + int m_base; + public: PAHelper() = default; - PAHelper(const GSOffset& off, int x, int y) + PAHelper(const GSOffset& off, int y) { - m_pixelSwizzle = off.m_pixelSwizzle + (y & off.m_pageMask.y); + m_pixelSwizzleRow = off.m_pixelSwizzleRow[y & off.m_pixelRowMask]; m_base = off.m_bp << (off.m_pageShiftX + off.m_pageShiftY - 5); m_base += ((y & ~off.m_pageMask.y) * off.m_bwPg) << off.m_pageShiftX; - m_pageMaskX = off.m_pageMask.x; - m_shift = off.m_pageShiftY; - m_x = x; - m_mask = (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1; + m_base &= (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1; + m_base += off.m_pixelSwizzleCol[y & off.m_pageMask.y]; } - /// Get current x value - int x() const { return m_x; } - /// Increment x value - void incX() { m_x++; } - /// Decrement x value - void decX() { m_x--; } /// Get current pixel address - uint32 value() const + uint32 value(size_t x) const { - int x = (m_x & ~m_pageMaskX) << m_shift; - return (m_base + x + m_pixelSwizzle->value[m_x & m_pageMaskX]) & m_mask; + return m_base + (*m_pixelSwizzleRow)[x]; } }; /// Get the address of the given pixel uint32 pa(int x, int y) const { - return PAHelper(*this, x, y).value(); + return PAHelper(*this, y).value(x); } /// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis) - PAHelper paMulti(int x, int y) const + PAHelper paMulti(int y) const { - return PAHelper(*this, x, y); + return PAHelper(*this, y); } /// Loop over the pixels in the given rectangle @@ -236,11 +252,10 @@ public: for (int y = r.top; y < r.bottom; y++, px = reinterpret_cast(reinterpret_cast(px) + pitch)) { - PAHelper pa = paMulti(r.left, y); - while (pa.x() < r.right) + PAHelper pa = paMulti(y); + for (int x = r.left; x < r.right; x++) { - fn(vm + pa.value(), px + pa.x()); - pa.incX(); + fn(vm + pa.value(x), px + x); } } } @@ -314,93 +329,30 @@ public: /// Use compile-time dimensions from `swz` as a performance optimization /// Also asserts if your assumption was wrong - constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const; -}; - -class GSSwizzleInfo -{ - friend class GSOffset; - /// Table for storing swizzling of blocks within a page - const GSBlockSwizzleTable* m_blockSwizzle; - /// Table for storing swizzling of pixels within a page - const GSPageOffsetRow* m_pixelSwizzle; - GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1) - GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1) - uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset - uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset - uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block - uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block - static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i>>1); } -public: - GSSwizzleInfo() = default; - - /// @param PageWidth Width of page in pixels - /// @param PageHeight Height of page in pixels - /// @param blockSize Size of block in pixels - template - constexpr GSSwizzleInfo(GSVector2i blockSize, const GSBlockSwizzleTable* blockSwizzle, const GSPageOffsetTable* pxSwizzle) - : m_blockSwizzle(blockSwizzle) - , m_pixelSwizzle(pxSwizzle->value) - , m_pageMask{PageWidth - 1, PageHeight - 1} - , m_blockMask{blockSize.x - 1, blockSize.y - 1} - , m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight)) - , m_blockShiftX(ilog2(blockSize.x)), m_blockShiftY(ilog2(blockSize.y)) + constexpr GSOffset assertSizesMatch(const GSSwizzleInfo& swz) const { - static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2"); - static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2"); - } - - /// Get the block number of the given pixel - uint32 bn(int x, int y, uint32 bp, uint32 bw) const - { - return GSOffset(*this, bp, bw, 0).bn(x, y); - } - - /// Get the address of the given pixel - uint32 pa(int x, int y, uint32 bp, uint32 bw) const - { - return GSOffset(*this, bp, bw, 0).pa(x, y); - } - - /// Loop over all the pages in the given rect, calling `fn` on each - template - void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const - { - GSOffset(*this, bp, bw, 0).loopPages(rect, std::forward(fn)); - } - - /// Loop over all the blocks in the given rect, calling `fn` on each - template - void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const - { - GSOffset(*this, bp, bw, 0).loopBlocks(rect, std::forward(fn)); + GSOffset o = *this; +#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x; + MATCH(m_pageMask) + MATCH(m_blockMask) + MATCH(m_pixelRowMask) + MATCH(m_pageShiftX) + MATCH(m_pageShiftY) + MATCH(m_blockShiftX) + MATCH(m_blockShiftY) +#undef MATCH + return o; } }; -constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm) - : m_blockSwizzle(swz.m_blockSwizzle) - , m_pixelSwizzle(swz.m_pixelSwizzle) - , m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask) - , m_pageShiftX(swz.m_pageShiftX), m_pageShiftY(swz.m_pageShiftY) - , m_blockShiftX(swz.m_blockShiftX), m_blockShiftY(swz.m_blockShiftY) - , m_bp(bp) - , m_bwPg(bw >> (m_pageShiftX - 6)) - , m_psm(psm) +inline uint32 GSSwizzleInfo::bn(int x, int y, uint32 bp, uint32 bw) const { + return GSOffset(*this, bp, bw, 0).bn(x, y); } -constexpr GSOffset GSOffset::assertSizesMatch(const GSSwizzleInfo& swz) const +inline uint32 GSSwizzleInfo::pa(int x, int y, uint32 bp, uint32 bw) const { - GSOffset o = *this; -#define MATCH(x) ASSERT(o.x == swz.x); o.x = swz.x; - MATCH(m_pageMask) - MATCH(m_blockMask) - MATCH(m_pageShiftX) - MATCH(m_pageShiftY) - MATCH(m_blockShiftX) - MATCH(m_blockShiftY) -#undef MATCH - return o; + return GSOffset(*this, bp, bw, 0).pa(x, y); } class GSLocalMemory : public GSAlignedClass<32> @@ -452,24 +404,15 @@ public: protected: bool m_use_fifo_alloc; - static GSPageOffsetTable<32, 64> pageOffset32; - static GSPageOffsetTable<32, 64> pageOffset32Z; - static GSPageOffsetTable<64, 64> pageOffset16; - static GSPageOffsetTable<64, 64> pageOffset16S; - static GSPageOffsetTable<64, 64> pageOffset16Z; - static GSPageOffsetTable<64, 64> pageOffset16SZ; - static GSPageOffsetTable<64, 128> pageOffset8; - static GSPageOffsetTable<128, 128> pageOffset4; - public: - static constexpr GSSwizzleInfo swizzle32{{8, 8}, &blockTable32, &pageOffset32}; - static constexpr GSSwizzleInfo swizzle32Z{{8, 8}, &blockTable32Z, &pageOffset32Z}; - static constexpr GSSwizzleInfo swizzle16{{16, 8}, &blockTable16, &pageOffset16}; - static constexpr GSSwizzleInfo swizzle16S{{16, 8}, &blockTable16S, &pageOffset16S}; - static constexpr GSSwizzleInfo swizzle16Z{{16, 8}, &blockTable16Z, &pageOffset16Z}; - static constexpr GSSwizzleInfo swizzle16SZ{{16, 8}, &blockTable16SZ, &pageOffset16SZ}; - static constexpr GSSwizzleInfo swizzle8{{16, 16}, &blockTable8, &pageOffset8}; - static constexpr GSSwizzleInfo swizzle4{{32, 16}, &blockTable4, &pageOffset4}; + static constexpr GSSwizzleInfo swizzle32 {swizzleTables32}; + static constexpr GSSwizzleInfo swizzle32Z {swizzleTables32Z}; + static constexpr GSSwizzleInfo swizzle16 {swizzleTables16}; + static constexpr GSSwizzleInfo swizzle16S {swizzleTables16S}; + static constexpr GSSwizzleInfo swizzle16Z {swizzleTables16Z}; + static constexpr GSSwizzleInfo swizzle16SZ {swizzleTables16SZ}; + static constexpr GSSwizzleInfo swizzle8 {swizzleTables8}; + static constexpr GSSwizzleInfo swizzle4 {swizzleTables4}; protected: __forceinline static uint32 Expand24To32(uint32 c, const GIFRegTEXA& TEXA) diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 90241e7e14..1387066469 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -1663,14 +1663,12 @@ void GSState::Move() { for (int y = 0; y < h; y++, sy += yinc, dy += yinc) { - GSOffset::PAHelper s = spo.paMulti(sx, sy); - GSOffset::PAHelper d = dpo.paMulti(dx, dy); + GSOffset::PAHelper s = spo.paMulti(sy); + GSOffset::PAHelper d = dpo.paMulti(dy); for (int x = 0; x < w; x++) { - pxCopyFn(d.value(), s.value()); - s.incX(); - d.incX(); + pxCopyFn(d.value(dx + x), s.value(sx + x)); } } } @@ -1678,14 +1676,12 @@ void GSState::Move() { for (int y = 0; y < h; y++, sy += yinc, dy += yinc) { - GSOffset::PAHelper s = spo.paMulti(sx, sy); - GSOffset::PAHelper d = dpo.paMulti(dx, dy); + GSOffset::PAHelper s = spo.paMulti(sy); + GSOffset::PAHelper d = dpo.paMulti(dy); for (int x = 0; x < w; x++) { - pxCopyFn(d.value(), s.value()); - s.decX(); - d.decX(); + pxCopyFn(d.value(dx - x), s.value(sx - x)); } } } diff --git a/pcsx2/GS/GSTables.cpp b/pcsx2/GS/GSTables.cpp index dad675f8e5..7a68fa2ca2 100644 --- a/pcsx2/GS/GSTables.cpp +++ b/pcsx2/GS/GSTables.cpp @@ -20,8 +20,8 @@ #include "GS_types.h" template -static constexpr GSBlockSwizzleTable makeSwizzleTable(const uint8 (&arr)[Height][Width]) { - GSBlockSwizzleTable table = {}; +static constexpr GSSizedBlockSwizzleTable makeSwizzleTable(const uint8 (&arr)[Height][Width]) { + GSSizedBlockSwizzleTable table = {}; for (int y = 0; y < 8; y++) { for (int x = 0; x < 8; x++) { table.value[y][x] = arr[y % Height][x % Width]; @@ -114,14 +114,14 @@ static constexpr uint8 _blockTable4[8][4] = { 21, 23, 29, 31 } }; -constexpr GSBlockSwizzleTable blockTable32 = makeSwizzleTable(_blockTable32); -constexpr GSBlockSwizzleTable blockTable32Z = makeSwizzleTable(_blockTable32Z); -constexpr GSBlockSwizzleTable blockTable16 = makeSwizzleTable(_blockTable16); -constexpr GSBlockSwizzleTable blockTable16S = makeSwizzleTable(_blockTable16S); -constexpr GSBlockSwizzleTable blockTable16Z = makeSwizzleTable(_blockTable16Z); -constexpr GSBlockSwizzleTable blockTable16SZ = makeSwizzleTable(_blockTable16SZ); -constexpr GSBlockSwizzleTable blockTable8 = makeSwizzleTable(_blockTable8); -constexpr GSBlockSwizzleTable blockTable4 = makeSwizzleTable(_blockTable4); +constexpr GSSizedBlockSwizzleTable<4, 8> blockTable32 = makeSwizzleTable(_blockTable32); +constexpr GSSizedBlockSwizzleTable<4, 8> blockTable32Z = makeSwizzleTable(_blockTable32Z); +constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16 = makeSwizzleTable(_blockTable16); +constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16S = makeSwizzleTable(_blockTable16S); +constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16Z = makeSwizzleTable(_blockTable16Z); +constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16SZ = makeSwizzleTable(_blockTable16SZ); +constexpr GSSizedBlockSwizzleTable<4, 8> blockTable8 = makeSwizzleTable(_blockTable8); +constexpr GSSizedBlockSwizzleTable<8, 4> blockTable4 = makeSwizzleTable(_blockTable4); constexpr uint8 columnTable32[8][8] = { @@ -290,3 +290,75 @@ constexpr uint8 clutTableT16I4[16] = 0, 2, 8, 10, 16, 18, 24, 26, 4, 6, 12, 14, 20, 22, 28, 30 }; + +template +constexpr int pxOffset(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int x, int y) +{ + int blockSize = ColHeight * ColWidth; + int pageSize = blockSize * BlocksHigh * BlocksWide; + int pageWidth = BlocksWide * ColWidth; + int pageX = x / pageWidth; + int subpageX = x % pageWidth; + int blockID = blockTable[y / ColHeight][subpageX / ColWidth]; + int sublockOffset = colTable[y % ColHeight][subpageX % ColWidth]; + return pageX * pageSize + blockID * blockSize + sublockOffset; +} + +template +constexpr GSPixelColOffsetTable makeColOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth]) +{ + constexpr int size = BlocksHigh * ColHeight; + GSPixelColOffsetTable table = {}; + for (int y = 0; y < size; y++) + { + table.value[y] = pxOffset(blockTable, colTable, 0, y); + } + return table; +} + +template +constexpr GSSizedPixelRowOffsetTable makeRowOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int y) +{ + int base = pxOffset(blockTable, colTable, 0, y); + GSSizedPixelRowOffsetTable table = {}; + for (int x = 0; x < 2048; x++) + { + table.value[x] = pxOffset(blockTable, colTable, x, y) - base; + } + return table; +} + +constexpr GSPixelColOffsetTable< 32> pixelColOffset32 = makeColOffsetTable(_blockTable32, columnTable32); +constexpr GSPixelColOffsetTable< 32> pixelColOffset32Z = makeColOffsetTable(_blockTable32Z, columnTable32); +constexpr GSPixelColOffsetTable< 64> pixelColOffset16 = makeColOffsetTable(_blockTable16, columnTable16); +constexpr GSPixelColOffsetTable< 64> pixelColOffset16S = makeColOffsetTable(_blockTable16S, columnTable16); +constexpr GSPixelColOffsetTable< 64> pixelColOffset16Z = makeColOffsetTable(_blockTable16Z, columnTable16); +constexpr GSPixelColOffsetTable< 64> pixelColOffset16SZ = makeColOffsetTable(_blockTable16SZ, columnTable16); +constexpr GSPixelColOffsetTable< 64> pixelColOffset8 = makeColOffsetTable(_blockTable8, columnTable8); +constexpr GSPixelColOffsetTable<128> pixelColOffset4 = makeColOffsetTable(_blockTable4, columnTable4); +// These can't be constexpr due to a GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99901 +CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset32 = makeRowOffsetTable(_blockTable32, columnTable32, 0); +CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset32Z = makeRowOffsetTable(_blockTable32Z, columnTable32, 0); +CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16 = makeRowOffsetTable(_blockTable16, columnTable16, 0); +CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16S = makeRowOffsetTable(_blockTable16S, columnTable16, 0); +CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16Z = makeRowOffsetTable(_blockTable16Z, columnTable16, 0); +CONSTINIT const GSSizedPixelRowOffsetTable< 64> GSTables::_pixelRowOffset16SZ = makeRowOffsetTable(_blockTable16SZ, columnTable16, 0); +CONSTINIT const GSSizedPixelRowOffsetTable<128> GSTables::_pixelRowOffset8[2] = +{ + makeRowOffsetTable(_blockTable8, columnTable8, 0), + makeRowOffsetTable(_blockTable8, columnTable8, 2), +}; +CONSTINIT const GSSizedPixelRowOffsetTable<128> GSTables::_pixelRowOffset4[2] = +{ + makeRowOffsetTable(_blockTable4, columnTable4, 0), + makeRowOffsetTable(_blockTable4, columnTable4, 2), +}; + +constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset32; +constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset32Z; +constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16; +constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16S; +constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16Z; +constexpr GSPixelRowOffsetTableList< 64, 0> GSTables::pixelRowOffset16SZ; +constexpr GSPixelRowOffsetTableList<128, 7> GSTables::pixelRowOffset8; +constexpr GSPixelRowOffsetTableList<128, 7> GSTables::pixelRowOffset4; diff --git a/pcsx2/GS/GSTables.h b/pcsx2/GS/GSTables.h index c2e728be21..6b09222d73 100644 --- a/pcsx2/GS/GSTables.h +++ b/pcsx2/GS/GSTables.h @@ -29,14 +29,88 @@ struct alignas(64) GSBlockSwizzleTable } }; -extern const GSBlockSwizzleTable blockTable32; -extern const GSBlockSwizzleTable blockTable32Z; -extern const GSBlockSwizzleTable blockTable16; -extern const GSBlockSwizzleTable blockTable16S; -extern const GSBlockSwizzleTable blockTable16Z; -extern const GSBlockSwizzleTable blockTable16SZ; -extern const GSBlockSwizzleTable blockTable8; -extern const GSBlockSwizzleTable blockTable4; +/// Adds sizes to GSBlockSwizzleTable for to feel better about not making mistakes +template +struct GSSizedBlockSwizzleTable : public GSBlockSwizzleTable +{ +}; + +/// Table for storing offsets of x = 0 pixels from the beginning of the page +/// Add values from a GSPixelRowOffsetTable to get the pixels for x != 0 +template +struct alignas(128) GSPixelColOffsetTable +{ + int value[Height] = {}; + + int operator[](int y) const + { + return value[y % Height]; + } +}; + +/// Table for storing offsets of x != 0 pixels from the pixel at the same y where x = 0 +/// Unlike ColOffsets, this table stretches to the maximum size of a texture so no masking is needed +struct alignas(128) GSPixelRowOffsetTable +{ + int value[2048] = {}; + + int operator[](size_t x) const + { + ASSERT(x < 2048); + return value[x]; + } +}; + +/// Adds size to GSPixelRowOffsetTable to feel better about not making mistakes +template +struct GSSizedPixelRowOffsetTable : public GSPixelRowOffsetTable +{ +}; + +/// List of row offset tables +/// Some swizzlings (PSMT8 and PSMT4) have different row offsets depending on which column they're a part of +/// The ones that do use an a a b b b b a a pattern that repeats every 8 rows. +/// You can always look up the correct row in this list with y & 7, but if you use y & Mask where Mask is known at compile time, the compiler should be able to optimize better +template +struct alignas(sizeof(void*) * 8) GSPixelRowOffsetTableList +{ + const GSPixelRowOffsetTable* rows[8]; + + const GSPixelRowOffsetTable& operator[](int y) const + { + return *rows[y & Mask]; + } +}; + +/// Full pixel offset table +/// Template values are for objects constructing from one of these tables +template +struct GSSwizzleTableList +{ + const GSSizedBlockSwizzleTable& block; + const GSPixelColOffsetTable& col; + const GSPixelRowOffsetTableList& row; +}; + +/// List of all tables for a given swizzle for easy setup +template +constexpr GSSwizzleTableList +makeSwizzleTableList( + const GSSizedBlockSwizzleTable& block, + const GSPixelColOffsetTable& col, + const GSPixelRowOffsetTableList& row) +{ + return {block, col, row}; +} + +extern const GSSizedBlockSwizzleTable<4, 8> blockTable32; +extern const GSSizedBlockSwizzleTable<4, 8> blockTable32Z; +extern const GSSizedBlockSwizzleTable<8, 4> blockTable16; +extern const GSSizedBlockSwizzleTable<8, 4> blockTable16S; +extern const GSSizedBlockSwizzleTable<8, 4> blockTable16Z; +extern const GSSizedBlockSwizzleTable<8, 4> blockTable16SZ; +extern const GSSizedBlockSwizzleTable<4, 8> blockTable8; +extern const GSSizedBlockSwizzleTable<8, 4> blockTable4; extern const uint8 columnTable32[8][8]; extern const uint8 columnTable16[8][16]; extern const uint8 columnTable8[16][16]; @@ -45,3 +119,57 @@ extern const uint8 clutTableT32I8[128]; extern const uint8 clutTableT32I4[16]; extern const uint8 clutTableT16I8[32]; extern const uint8 clutTableT16I4[16]; +extern const GSPixelColOffsetTable< 32> pixelColOffset32; +extern const GSPixelColOffsetTable< 32> pixelColOffset32Z; +extern const GSPixelColOffsetTable< 64> pixelColOffset16; +extern const GSPixelColOffsetTable< 64> pixelColOffset16S; +extern const GSPixelColOffsetTable< 64> pixelColOffset16Z; +extern const GSPixelColOffsetTable< 64> pixelColOffset16SZ; +extern const GSPixelColOffsetTable< 64> pixelColOffset8; +extern const GSPixelColOffsetTable<128> pixelColOffset4; + +template +constexpr GSPixelRowOffsetTableList makeRowOffsetTableList( + const GSSizedPixelRowOffsetTable* a) +{ + return {{a, a, a, a, a, a, a, a}}; +} + +template +constexpr GSPixelRowOffsetTableList makeRowOffsetTableList( + const GSSizedPixelRowOffsetTable* a, + const GSSizedPixelRowOffsetTable* b) +{ + return {{a, a, b, b, b, b, a, a}}; +} + +/// Just here to force external linkage so we don't end up with multiple copies of pixelRowOffset* +struct GSTables +{ + static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset32; + static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset32Z; + static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16; + static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16S; + static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16Z; + static const GSSizedPixelRowOffsetTable< 64> _pixelRowOffset16SZ; + static const GSSizedPixelRowOffsetTable<128> _pixelRowOffset8[2]; + static const GSSizedPixelRowOffsetTable<128> _pixelRowOffset4[2]; + + static constexpr auto pixelRowOffset32 = makeRowOffsetTableList(&_pixelRowOffset32); + static constexpr auto pixelRowOffset32Z = makeRowOffsetTableList(&_pixelRowOffset32Z); + static constexpr auto pixelRowOffset16 = makeRowOffsetTableList(&_pixelRowOffset16); + static constexpr auto pixelRowOffset16S = makeRowOffsetTableList(&_pixelRowOffset16S); + static constexpr auto pixelRowOffset16Z = makeRowOffsetTableList(&_pixelRowOffset16Z); + static constexpr auto pixelRowOffset16SZ = makeRowOffsetTableList(&_pixelRowOffset16SZ); + static constexpr auto pixelRowOffset8 = makeRowOffsetTableList(&_pixelRowOffset8[0], &_pixelRowOffset8[1]); + static constexpr auto pixelRowOffset4 = makeRowOffsetTableList(&_pixelRowOffset4[0], &_pixelRowOffset4[1]); +}; + +constexpr auto swizzleTables32 = makeSwizzleTableList(blockTable32, pixelColOffset32, GSTables::pixelRowOffset32 ); +constexpr auto swizzleTables32Z = makeSwizzleTableList(blockTable32Z, pixelColOffset32Z, GSTables::pixelRowOffset32Z ); +constexpr auto swizzleTables16 = makeSwizzleTableList(blockTable16, pixelColOffset16, GSTables::pixelRowOffset16 ); +constexpr auto swizzleTables16Z = makeSwizzleTableList(blockTable16Z, pixelColOffset16Z, GSTables::pixelRowOffset16Z ); +constexpr auto swizzleTables16S = makeSwizzleTableList(blockTable16S, pixelColOffset16S, GSTables::pixelRowOffset16S ); +constexpr auto swizzleTables16SZ = makeSwizzleTableList(blockTable16SZ, pixelColOffset16SZ, GSTables::pixelRowOffset16SZ); +constexpr auto swizzleTables8 = makeSwizzleTableList(blockTable8, pixelColOffset8, GSTables::pixelRowOffset8 ); +constexpr auto swizzleTables4 = makeSwizzleTableList(blockTable4, pixelColOffset4, GSTables::pixelRowOffset4 ); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index d8c472cfa5..9c40a7cc83 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -907,26 +907,22 @@ void GSRendererHW::SwSpriteRender() for (int y = 0; y < h; y++, ++sy, ++dy) { - GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sx, sy) : GSOffset::PAHelper(); - GSOffset::PAHelper dpa = dpo.paMulti(dx, dy); + GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sy) : GSOffset::PAHelper(); + GSOffset::PAHelper dpa = dpo.paMulti(dy); ASSERT(w % 2 == 0); for (int x = 0; x < w; x += 2) { - uint32 di = dpa.value(); - dpa.incX(); - ASSERT(di + 1 == dpa.value()); // Destination pixel pair is adjacent in memory - dpa.incX(); + uint32 di = dpa.value(dx + x); + ASSERT(di + 1 == dpa.value(dx + x + 1)); // Destination pixel pair is adjacent in memory GSVector4i sc; if (texture_mapping_enabled) { - uint32 si = spa.value(); - spa.incX(); + uint32 si = spa.value(sx + x); // Read 2 source pixel colors - ASSERT((si + 1) == spa.value()); // Source pixel pair is adjacent in memory - spa.incX(); + ASSERT((si + 1) == spa.value(sx + x + 1)); // Source pixel pair is adjacent in memory sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr // Apply TFX @@ -1827,11 +1823,11 @@ void GSRendererHW::OI_GsMemClear() // Based on WritePixel32 for (int y = r.top; y < r.bottom; y++) { - GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y); + GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y); - for (; pa.x() < r.right; pa.incX()) + for (int x = r.left; x < r.right; x++) { - m_mem.m_vm32[pa.value()] = 0; // Here the constant color + m_mem.m_vm32[pa.value(x)] = 0; // Here the constant color } } } @@ -1840,11 +1836,11 @@ void GSRendererHW::OI_GsMemClear() // Based on WritePixel24 for (int y = r.top; y < r.bottom; y++) { - GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(r.left, y); + GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y); - for (; pa.x() < r.right; pa.incX()) + for (int x = r.left; x < r.right; x++) { - m_mem.m_vm32[pa.value()] &= 0xff000000; // Clear the color + m_mem.m_vm32[pa.value(x)] &= 0xff000000; // Clear the color } } } @@ -1855,11 +1851,11 @@ void GSRendererHW::OI_GsMemClear() // Based on WritePixel16 for(int y = r.top; y < r.bottom; y++) { - GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(r.left, y); + GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(y); for(int x = r.left; x < r.right; x++) { - m_mem.m_vm16[pa.value()] = 0; // Here the constant color + m_mem.m_vm16[pa.value(x)] = 0; // Here the constant color } } #endif diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp index 0bedbd5ecb..c25fa4dddc 100644 --- a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp +++ b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp @@ -2942,11 +2942,11 @@ void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c for (int y = r.y; y < r.w; y++) { - GSOffset::PAHelper pa = off.paMulti(r.x, y); + GSOffset::PAHelper pa = off.paMulti(y); - for (; pa.x() < r.z; pa.incX()) + for (int x = r.x; x < r.z; x++) { - T& d = vm[pa.value()]; + T& d = vm[pa.value(x)]; d = (T)(!masked ? c : (c | (d & m))); } }