GS: Switch to new non-cached GSOffset

This commit is contained in:
TellowKrinkle 2021-02-23 19:37:35 -06:00 committed by refractionpcsx2
parent d9defb19f9
commit 8eb50c3517
18 changed files with 450 additions and 693 deletions

View File

@ -196,16 +196,14 @@ void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& T
template <int n> template <int n>
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{ {
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32); GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
uint32* RESTRICT s = &m_mem->m_vm32[off->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4); uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
for (int i = 0; i < n; i++) for (int i = 0; i < n; pa.incX(), i++)
{ {
uint32 c = s[col[i]]; uint32 c = m_mem->m_vm32[pa.value()];
clut[i] = (uint16)(c & 0xffff); clut[i] = (uint16)(c & 0xffff);
clut[i + 256] = (uint16)(c >> 16); clut[i + 256] = (uint16)(c >> 16);
@ -215,32 +213,28 @@ void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCL
template <int n> template <int n>
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{ {
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16); GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
uint16* RESTRICT s = &m_mem->m_vm16[off->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4); uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for (int i = 0; i < n; i++) for (int i = 0; i < n; pa.incX(), i++)
{ {
clut[i] = s[col[i]]; clut[i] = m_mem->m_vm16[pa.value()];
} }
} }
template <int n> template <int n>
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{ {
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S); GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
uint16* RESTRICT s = &m_mem->m_vm16[off->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4); uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for (int i = 0; i < n; i++) for (int i = 0; i < n; pa.incX(), i++)
{ {
clut[i] = s[col[i]]; clut[i] = m_mem->m_vm16[pa.value()];
} }
} }

View File

@ -48,9 +48,9 @@ public:
struct struct
{ {
GSOffset* fb; GSOffset fb;
GSOffset* zb; GSOffset zb;
GSOffset* tex; GSOffset tex;
GSPixelOffset* fzb; GSPixelOffset* fzb;
GSPixelOffset4* fzb4; GSPixelOffset4* fzb4;
} offset; } offset;

View File

@ -35,6 +35,28 @@
#define FOREACH_BLOCK_END }} #define FOREACH_BLOCK_END }}
template <typename Fn>
static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector4i& r, uint8* dst, int dstpitch, int bpp, Fn&& fn)
{
ASSERT(off.isBlockAligned(r));
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
int right = r.right >> off.blockShiftX();
int bottom = r.bottom >> off.blockShiftY();
int offset = dstpitch << off.blockShiftY();
int xAdd = (1 << off.blockShiftX()) * (bpp / 8);
for (; bn.blkY() < bottom; bn.nextBlockY(), dst += offset)
{
for (int x = 0; bn.blkX() < right; bn.nextBlockX(), x += xAdd)
{
const uint8* src = mem->BlockPtr(bn.value());
uint8* read_dst = dst + x;
fn(read_dst, src);
}
}
}
// //
uint32 GSLocalMemory::pageOffset32[32][32][64]; uint32 GSLocalMemory::pageOffset32[32][32][64];
@ -487,22 +509,9 @@ GSLocalMemory::~GSLocalMemory()
} }
} }
GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm) GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
{ {
uint32 hash = bp | (bw << 14) | (psm << 20); return GSOffset(m_psm[psm].info, bp, bw, psm);
auto i = m_omap.find(hash);
if (i != m_omap.end())
{
return i->second;
}
GSOffset* off = new GSOffset(bp, bw, psm);
m_omap[hash] = off;
return off;
} }
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF) GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
@ -629,19 +638,18 @@ std::vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
int tw = std::max<int>(1 << TEX0.TW, bs.x); int tw = std::max<int>(1 << TEX0.TW, bs.x);
int th = std::max<int>(1 << TEX0.TH, bs.y); int th = std::max<int>(1 << TEX0.TH, bs.y);
const GSOffset* off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); GSOffset off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSOffset::BNHelper bn = off.bnMulti(0, 0);
std::unordered_map<uint32, std::unordered_set<uint32>> tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks) std::unordered_map<uint32, std::unordered_set<uint32>> tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
for (int y = 0; y < th; y += bs.y) for (; bn.blkY() < (th >> off.blockShiftY()); bn.nextBlockY())
{ {
uint32 base = off->block.row[y >> 3]; for (; bn.blkX() < (tw >> off.blockShiftX()); bn.nextBlockX())
for (int x = 0, i = y << 7; x < tw; x += bs.x, i += bs.x)
{ {
uint32 page = ((base + off->block.col[x >> 3]) >> 5) % MAX_PAGES; uint32 page = (bn.value() >> 5) % MAX_PAGES;
tmp[page].insert(i >> 3); // ((y << 7) | x) >> 3 tmp[page].insert((bn.blkY() << 7) + bn.blkX());
} }
} }
@ -1703,42 +1711,38 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
/////////////////// ///////////////////
void GSLocalMemory::ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(r, 8, 8, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock32(src, read_dst, dstpitch); GSBlock::ReadBlock32(src, read_dst, dstpitch);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
if (TEXA.AEM) if (TEXA.AEM)
{ {
FOREACH_BLOCK_START(r, 8, 8, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA); GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
} });
FOREACH_BLOCK_END
} }
else else
{ {
FOREACH_BLOCK_START(r, 8, 8, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA); GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
} });
FOREACH_BLOCK_END
} }
} }
void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(r, 16, 8, 16) foreachBlock(off, this, r, dst, dstpitch, 16, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock16(src, read_dst, dstpitch); GSBlock::ReadBlock16(src, read_dst, dstpitch);
} });
FOREACH_BLOCK_END
// Convert packed RGB scanline to 32 bits RGBA // Convert packed RGB scanline to 32 bits RGBA
ASSERT(dstpitch >= r.width() * 4); ASSERT(dstpitch >= r.width() * 4);
@ -1753,79 +1757,72 @@ void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVecto
} }
} }
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture16(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
if (TEXA.AEM) if (TEXA.AEM)
{ {
FOREACH_BLOCK_START(r, 16, 8, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA); GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
} });
FOREACH_BLOCK_END
} }
else else
{ {
FOREACH_BLOCK_START(r, 16, 8, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA); GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
} });
FOREACH_BLOCK_END
} }
} }
void GSLocalMemory::ReadTexture8(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 16, 16, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
const uint64* pal = m_clut; const uint64* pal = m_clut;
FOREACH_BLOCK_START(r, 32, 16, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture8H(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4HL(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4HH(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32) foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal); GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
} });
FOREACH_BLOCK_END
} }
/////////////////// ///////////////////
@ -1902,9 +1899,9 @@ void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, con
/////////////////// ///////////////////
void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
const psm_t& psm = m_psm[off->psm]; const psm_t& psm = m_psm[off.psm()];
readTexel rt = psm.rt; readTexel rt = psm.rt;
readTexture rtx = psm.rtx; readTexture rtx = psm.rtx;
@ -1913,9 +1910,9 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i&
{ {
GIFRegTEX0 TEX0; GIFRegTEX0 TEX0;
TEX0.TBP0 = off->bp; TEX0.TBP0 = off.bp();
TEX0.TBW = off->bw; TEX0.TBW = off.bw();
TEX0.PSM = off->psm; TEX0.PSM = off.psm();
GSVector4i cr = r.ralign<Align_Inside>(psm.bs); GSVector4i cr = r.ralign<Align_Inside>(psm.bs);
@ -1981,49 +1978,44 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i&
// 32/8 // 32/8
void GSLocalMemory::ReadTexture8P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(r, 16, 16, 8) foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock8(src, read_dst, dstpitch); GSBlock::ReadBlock8(src, read_dst, dstpitch);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(r, 32, 16, 8) foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock4P(src, read_dst, dstpitch); GSBlock::ReadBlock4P(src, read_dst, dstpitch);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture8HP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(r, 8, 8, 8) foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock8HP(src, read_dst, dstpitch); GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4HLP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(r, 8, 8, 8) foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch); GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
} });
FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4HHP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(r, 8, 8, 8) foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{ {
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch); GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
} });
FOREACH_BLOCK_END
} }
// //
@ -2103,14 +2095,14 @@ void GSLocalMemory::SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32
namespace namespace
{ {
/// Helper for GSOffsetNew::pageLooperForRect /// Helper for GSOffset::pageLooperForRect
struct alignas(16) TextureAligned struct alignas(16) TextureAligned
{ {
int ox1, oy1, ox2, oy2; ///< Block-aligned outer rect (smallest rectangle containing the original that is block-aligned) int ox1, oy1, ox2, oy2; ///< Block-aligned outer rect (smallest rectangle containing the original that is block-aligned)
int ix1, iy1, ix2, iy2; ///< Page-aligned inner rect (largest rectangle inside original that is page-aligned) int ix1, iy1, ix2, iy2; ///< Page-aligned inner rect (largest rectangle inside original that is page-aligned)
}; };
/// Helper for GSOffsetNew::pageLooperForRect /// Helper for GSOffset::pageLooperForRect
TextureAligned align(const GSVector4i& rect, const GSVector2i& blockMask, const GSVector2i& pageMask, int blockShiftX, int blockShiftY) TextureAligned align(const GSVector4i& rect, const GSVector2i& blockMask, const GSVector2i& pageMask, int blockShiftX, int blockShiftY)
{ {
GSVector4i outer = rect.ralign_presub<Align_Outside>(blockMask); GSVector4i outer = rect.ralign_presub<Align_Outside>(blockMask);
@ -2149,7 +2141,7 @@ namespace
} // namespace } // namespace
GSOffsetNew::PageLooper GSOffsetNew::pageLooperForRect(const GSVector4i& rect) const GSOffset::PageLooper GSOffset::pageLooperForRect(const GSVector4i& rect) const
{ {
// Plan: // Plan:
// - Split texture into tiles on page lines // - Split texture into tiles on page lines
@ -2264,6 +2256,7 @@ GSOffsetNew::PageLooper GSOffsetNew::pageLooperForRect(const GSVector4i& rect) c
return out; return out;
} }
/*
GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm) GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
{ {
hash = _bp | (_bw << 14) | (_psm << 20); hash = _bp | (_bw << 14) | (_psm << 20);
@ -2414,3 +2407,4 @@ void* GSOffset::GetPagesAsBits(const GSVector4i& rect, void* pages)
return pages; return pages;
} }
*/

View File

@ -21,38 +21,6 @@
#include "GSBlock.h" #include "GSBlock.h"
#include "GSClut.h" #include "GSClut.h"
class GSOffset : public GSAlignedClass<32>
{
public:
struct alignas(32) Block
{
short row[256]; // yn (n = 0 8 16 ...)
short* col; // blockOffset*
};
struct alignas(32) Pixel
{
int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing)
int* col[8]; // rowOffset*
};
union { uint32 hash; struct { uint32 bp:14, bw:6, psm:6; }; };
Block block;
Pixel pixel;
std::array<uint32*, 256> pages_as_bit; // texture page coverage based on the texture size. Lazy allocated
GSOffset(uint32 bp, uint32 bw, uint32 psm);
virtual ~GSOffset();
enum { EOP = 0xffffffff };
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
void* GetPagesAsBits(const GSVector4i& rect, void* pages);
uint32* GetPagesAsBits(const GIFRegTEX0& TEX0);
};
struct GSPixelOffset struct GSPixelOffset
{ {
// 16 bit offsets (m_vm16[...]) // 16 bit offsets (m_vm16[...])
@ -75,7 +43,8 @@ struct GSPixelOffset4
class GSSwizzleInfo; class GSSwizzleInfo;
class GSOffsetNew { class GSOffset
{
/// Table for storing swizzling of blocks within a page /// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle; const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page (size: uint32[PageHeight][PageWidth]) /// Table for storing swizzling of pixels within a page (size: uint32[PageHeight][PageWidth])
@ -88,12 +57,12 @@ class GSOffsetNew {
uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset
int m_bp; ///< Offset's base pointer (same measurement as GS) int m_bp; ///< Offset's base pointer (same measurement as GS)
int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures) int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures)
int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffsetNew algorithms) int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffset algorithms)
public: public:
GSOffsetNew() = default; GSOffset() = default;
constexpr GSOffsetNew(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm); constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm);
/// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known /// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known
constexpr static GSOffsetNew fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm); constexpr static GSOffset fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm);
uint32 bp() const { return m_bp; } uint32 bp() const { return m_bp; }
uint32 bw() const { return m_bwPg << (m_pageShiftX - 6); } uint32 bw() const { return m_bwPg << (m_pageShiftX - 6); }
@ -104,7 +73,7 @@ public:
/// Helper class for efficiently getting the numbers of multiple blocks in a scanning pattern (increment x then y) /// Helper class for efficiently getting the numbers of multiple blocks in a scanning pattern (increment x then y)
class BNHelper class BNHelper
{ {
const GSBlockSwizzleTable* m_blockSwizzle; ///< Block swizzle table from GSOffsetNew const GSBlockSwizzleTable* m_blockSwizzle; ///< Block swizzle table from GSOffset
int m_baseBP; ///< bp for start of current row (to return to the origin x when advancing y) int m_baseBP; ///< bp for start of current row (to return to the origin x when advancing y)
int m_bp; ///< bp for current position int m_bp; ///< bp for current position
int m_baseBlkX; ///< x of origin in blocks (to return to the origin x when advancing y) int m_baseBlkX; ///< x of origin in blocks (to return to the origin x when advancing y)
@ -114,7 +83,7 @@ public:
int m_pageMaskY; ///< mask for y value of block coordinate to get position within page (to detect page crossing) int m_pageMaskY; ///< mask for y value of block coordinate to get position within page (to detect page crossing)
int m_addY; ///< Amount to add to bp to advance one page in y direction int m_addY; ///< Amount to add to bp to advance one page in y direction
public: public:
BNHelper(const GSOffsetNew& off, int x, int y) BNHelper(const GSOffset& off, int x, int y)
{ {
m_blockSwizzle = off.m_blockSwizzle; m_blockSwizzle = off.m_blockSwizzle;
int yAmt = ((y >> (off.m_pageShiftY - 5)) & ~0x1f) * off.m_bwPg; int yAmt = ((y >> (off.m_pageShiftY - 5)) & ~0x1f) * off.m_bwPg;
@ -209,7 +178,7 @@ public:
int m_shift; ///< Amount to lshift page number to get element offset for the start of that page int m_shift; ///< Amount to lshift page number to get element offset for the start of that page
public: public:
PAHelper() = default; PAHelper() = default;
PAHelper(const GSOffsetNew& off, int x, int y) PAHelper(const GSOffset& off, int x, int y)
{ {
m_pixelSwizzle = off.m_pixelSwizzle + ((y & off.m_pageMask.y) << off.m_pageShiftX); m_pixelSwizzle = off.m_pixelSwizzle + ((y & off.m_pageMask.y) << off.m_pageShiftX);
m_pageBase = (off.m_bp >> 5) + (y >> off.m_pageShiftY) * off.m_bwPg; m_pageBase = (off.m_bp >> 5) + (y >> off.m_pageShiftY) * off.m_bwPg;
@ -264,7 +233,7 @@ public:
} }
/// Helper class for looping over the pages in a rect /// Helper class for looping over the pages in a rect
/// Create with GSOffsetNew::pageLooperForRect /// Create with GSOffset::pageLooperForRect
class PageLooper class PageLooper
{ {
int firstRowPgXStart, firstRowPgXEnd; ///< Offset of start/end pages of the first line from x=0 page (only line for textures that don't cross page boundaries) int firstRowPgXStart, firstRowPgXEnd; ///< Offset of start/end pages of the first line from x=0 page (only line for textures that don't cross page boundaries)
@ -274,7 +243,7 @@ public:
int yInc; ///< Amount to add to bp when increasing y by one page int yInc; ///< Amount to add to bp when increasing y by one page
int yCnt; ///< Number of pages the rect covers in the y direction int yCnt; ///< Number of pages the rect covers in the y direction
friend class GSOffsetNew; friend class GSOffset;
public: public:
/// Loop over pages, fn can return `false` to break the loop /// Loop over pages, fn can return `false` to break the loop
@ -333,7 +302,7 @@ public:
class GSSwizzleInfo class GSSwizzleInfo
{ {
friend class GSOffsetNew; friend class GSOffset;
/// Table for storing swizzling of blocks within a page /// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle; const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page /// Table for storing swizzling of pixels within a page
@ -367,31 +336,31 @@ public:
/// Get the block number of the given pixel /// Get the block number of the given pixel
uint32 bn(int x, int y, uint32 bp, uint32 bw) const uint32 bn(int x, int y, uint32 bp, uint32 bw) const
{ {
return GSOffsetNew(*this, bp, bw, 0).bn(x, y); return GSOffset(*this, bp, bw, 0).bn(x, y);
} }
/// Get the address of the given pixel /// Get the address of the given pixel
uint32 pa(int x, int y, uint32 bp, uint32 bw) const uint32 pa(int x, int y, uint32 bp, uint32 bw) const
{ {
return GSOffsetNew(*this, bp, bw, 0).pa(x, y); return GSOffset(*this, bp, bw, 0).pa(x, y);
} }
/// Loop over all the pages in the given rect, calling `fn` on each /// Loop over all the pages in the given rect, calling `fn` on each
template <typename Fn> template <typename Fn>
void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
{ {
GSOffsetNew(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn)); GSOffset(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn));
} }
/// Loop over all the blocks in the given rect, calling `fn` on each /// Loop over all the blocks in the given rect, calling `fn` on each
template <typename Fn> template <typename Fn>
void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
{ {
GSOffsetNew(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn)); GSOffset(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn));
} }
}; };
constexpr inline GSOffsetNew::GSOffsetNew(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm) constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
: m_blockSwizzle(swz.m_blockSwizzle) : m_blockSwizzle(swz.m_blockSwizzle)
, m_pixelSwizzle(swz.m_pixelSwizzle + ((bp & 0x1f) << (swz.m_pageShiftX + swz.m_pageShiftY))) , m_pixelSwizzle(swz.m_pixelSwizzle + ((bp & 0x1f) << (swz.m_pageShiftX + swz.m_pageShiftY)))
, m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask) , m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask)
@ -417,7 +386,7 @@ public:
typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const; typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const; typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
typedef void (GSLocalMemory::*readTexture)(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); typedef void (GSLocalMemory::*readTexture)(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
struct alignas(128) psm_t struct alignas(128) psm_t
@ -520,7 +489,7 @@ public:
GSLocalMemory(); GSLocalMemory();
virtual ~GSLocalMemory(); virtual ~GSLocalMemory();
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm); GSOffset GetOffset(uint32 bp, uint32 bw, uint32 psm);
GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0); std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
@ -985,75 +954,35 @@ public:
WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c); WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c);
} }
__forceinline void WritePixel32(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r) void WritePixel32(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
{ {
src -= r.left * sizeof(uint32); off.loopPixels(r, m_vm32, (uint32*)src, pitch, [&](uint32* dst, uint32* src) { *dst = *src; });
for (int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = s[x];
}
}
} }
__forceinline void WritePixel24(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r) void WritePixel24(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
{ {
src -= r.left * sizeof(uint32); off.loopPixels(r, m_vm32, (uint32*)src, pitch,
[&](uint32* dst, uint32* src)
for (int y = r.top; y < r.bottom; y++, src += pitch)
{ {
uint32* RESTRICT s = (uint32*)src; *dst = (*dst & 0xff000000) | (*src & 0x00ffffff);
uint32* RESTRICT d = &m_vm32[off->pixel.row[y]]; });
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = (d[col[x]] & 0xff000000) | (s[x] & 0x00ffffff);
}
}
} }
__forceinline void WritePixel16(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r) void WritePixel16(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
{ {
src -= r.left * sizeof(uint16); off.loopPixels(r, m_vm16, (uint16*)src, pitch, [&](uint16* dst, uint16* src) { *dst = *src; });
for (int y = r.top; y < r.bottom; y++, src += pitch)
{
uint16* RESTRICT s = (uint16*)src;
uint16* RESTRICT d = &m_vm16[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = s[x];
}
}
} }
__forceinline void WriteFrame16(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r) void WriteFrame16(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
{ {
src -= r.left * sizeof(uint32); off.loopPixels(r, m_vm16, (uint32*)src, pitch,
[&](uint16* dst, uint32* src)
for (int y = r.top; y < r.bottom; y++, src += pitch)
{ {
uint32* RESTRICT s = (uint32*)src; uint32 rb = *src & 0x00f800f8;
uint16* RESTRICT d = &m_vm16[off->pixel.row[y]]; uint32 ga = *src & 0x8000f800;
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++) *dst = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
{ });
uint32 rb = s[x] & 0x00f800f8;
uint32 ga = s[x] & 0x8000f800;
d[col[x]] = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
}
}
} }
__forceinline uint32 ReadTexel32(uint32 addr, const GIFRegTEXA& TEXA) const __forceinline uint32 ReadTexel32(uint32 addr, const GIFRegTEXA& TEXA) const
@ -1191,17 +1120,17 @@ public:
// * => 32 // * => 32
void ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture16(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8H(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HL(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HH(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
@ -1214,11 +1143,11 @@ public:
// pal ? 8 : 32 // pal ? 8 : 32
void ReadTexture8P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8HP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HLP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HHP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
@ -1229,31 +1158,31 @@ public:
// //
template <typename T> template <typename T>
void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); void ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
// //
void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h); void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h);
}; };
constexpr inline GSOffsetNew GSOffsetNew::fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm) constexpr inline GSOffset GSOffset::fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm)
{ {
switch (psm) switch (psm)
{ {
case PSM_PSMCT32: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm); case PSM_PSMCT32: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMCT24: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm); case PSM_PSMCT24: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMCT16: return GSOffsetNew(GSLocalMemory::swizzle16, bp, bw, psm); case PSM_PSMCT16: return GSOffset(GSLocalMemory::swizzle16, bp, bw, psm);
case PSM_PSMCT16S: return GSOffsetNew(GSLocalMemory::swizzle16S, bp, bw, psm); case PSM_PSMCT16S: return GSOffset(GSLocalMemory::swizzle16S, bp, bw, psm);
case PSM_PSGPU24: return GSOffsetNew(GSLocalMemory::swizzle16, bp, bw, psm); case PSM_PSGPU24: return GSOffset(GSLocalMemory::swizzle16, bp, bw, psm);
case PSM_PSMT8: return GSOffsetNew(GSLocalMemory::swizzle8, bp, bw, psm); case PSM_PSMT8: return GSOffset(GSLocalMemory::swizzle8, bp, bw, psm);
case PSM_PSMT4: return GSOffsetNew(GSLocalMemory::swizzle4, bp, bw, psm); case PSM_PSMT4: return GSOffset(GSLocalMemory::swizzle4, bp, bw, psm);
case PSM_PSMT8H: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm); case PSM_PSMT8H: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMT4HL: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm); case PSM_PSMT4HL: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMT4HH: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm); case PSM_PSMT4HH: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMZ32: return GSOffsetNew(GSLocalMemory::swizzle32Z, bp, bw, psm); case PSM_PSMZ32: return GSOffset(GSLocalMemory::swizzle32Z, bp, bw, psm);
case PSM_PSMZ24: return GSOffsetNew(GSLocalMemory::swizzle32Z, bp, bw, psm); case PSM_PSMZ24: return GSOffset(GSLocalMemory::swizzle32Z, bp, bw, psm);
case PSM_PSMZ16: return GSOffsetNew(GSLocalMemory::swizzle16Z, bp, bw, psm); case PSM_PSMZ16: return GSOffset(GSLocalMemory::swizzle16Z, bp, bw, psm);
case PSM_PSMZ16S: return GSOffsetNew(GSLocalMemory::swizzle16SZ, bp, bw, psm); case PSM_PSMZ16S: return GSOffset(GSLocalMemory::swizzle16SZ, bp, bw, psm);
} }
return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm); return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
} }

View File

@ -1650,182 +1650,88 @@ void GSState::Move()
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format) // TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
GSOffset* RESTRICT spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM); GSOffset spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
GSOffset* RESTRICT dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM); GSOffset dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
auto copy = [&](auto&& pxCopyFn)
{
if (xinc > 0)
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
GSOffset::PAHelper s = spo.paMulti(sx, sy);
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
for (int x = 0; x < w; x++)
{
pxCopyFn(d.value(), s.value());
s.incX();
d.incX();
}
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
GSOffset::PAHelper s = spo.paMulti(sx, sy);
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
for (int x = 0; x < w; x++)
{
pxCopyFn(d.value(), s.value());
s.decX();
d.decX();
}
}
}
};
if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16) if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
{ {
int* RESTRICT scol = &spo->pixel.col[0][sx];
int* RESTRICT dcol = &dpo->pixel.col[0][dx];
if (spsm.trbpp == 32) if (spsm.trbpp == 32)
{ {
if (xinc > 0) copy([&](uint32 doff, uint32 soff)
{ {
for (int y = 0; y < h; y++, sy += yinc, dy += yinc) m_mem.m_vm32[doff] = m_mem.m_vm32[soff];
{ });
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for (int x = 0; x < w; x++)
d[dcol[x]] = s[scol[x]];
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for (int x = 0; x > -w; x--)
d[dcol[x]] = s[scol[x]];
}
}
} }
else if (spsm.trbpp == 24) else if (spsm.trbpp == 24)
{ {
if (xinc > 0) copy([&](uint32 doff, uint32 soff)
{ {
for (int y = 0; y < h; y++, sy += yinc, dy += yinc) uint32& d = m_mem.m_vm32[doff];
{ d = (d & 0xff000000) | (m_mem.m_vm32[soff] & 0x00ffffff);
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]]; });
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for (int x = 0; x < w; x++)
d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for (int x = 0; x > -w; x--)
d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
}
}
} }
else // if(spsm.trbpp == 16) else // if(spsm.trbpp == 16)
{ {
if (xinc > 0) copy([&](uint32 doff, uint32 soff)
{ {
for (int y = 0; y < h; y++, sy += yinc, dy += yinc) m_mem.m_vm16[doff] = m_mem.m_vm16[soff];
{ });
uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]];
for (int x = 0; x < w; x++)
d[dcol[x]] = s[scol[x]];
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]];
for (int x = 0; x > -w; x--)
d[dcol[x]] = s[scol[x]];
}
}
} }
} }
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8) else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
{ {
if (xinc > 0) copy([&](uint32 doff, uint32 soff)
{ {
for (int y = 0; y < h; y++, sy += yinc, dy += yinc) m_mem.m_vm8[doff] = m_mem.m_vm8[soff];
{ });
uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x < w; x++)
d[dcol[x]] = s[scol[x]];
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x > -w; x--)
d[dcol[x]] = s[scol[x]];
}
}
} }
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4) else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
{ {
if (xinc > 0) copy([&](uint32 doff, uint32 soff)
{ {
for (int y = 0; y < h; y++, sy += yinc, dy += yinc) m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
{ });
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x < w; x++)
m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x]));
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x > -w; x--)
m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x]));
}
}
} }
else else
{ {
if (xinc > 0) copy([&](uint32 doff, uint32 soff)
{ {
for (int y = 0; y < h; y++, sy += yinc, dy += yinc) (m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
{ });
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x < w; x++)
(m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x]));
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x > -w; x--)
(m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x]));
}
}
} }
} }

View File

@ -79,7 +79,7 @@ void GSTextureCache11::Read(Target* t, const GSVector4i& r)
{ {
// TODO: block level write // TODO: block level write
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
switch (TEX0.PSM) switch (TEX0.PSM)
{ {
@ -124,7 +124,7 @@ void GSTextureCache11::Read(Source* t, const GSVector4i& r)
if (offscreen->Map(m, &r_offscreen)) if (offscreen->Map(m, &r_offscreen))
{ {
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);

View File

@ -885,11 +885,8 @@ void GSRendererHW::SwSpriteRender()
InvalidateLocalMem(bitbltbuf, GSVector4i(sx, sy, sx + w, sy + h)); InvalidateLocalMem(bitbltbuf, GSVector4i(sx, sy, sx + w, sy + h));
InvalidateVideoMem(bitbltbuf, GSVector4i(dx, dy, dx + w, dy + h)); InvalidateVideoMem(bitbltbuf, GSVector4i(dx, dy, dx + w, dy + h));
GSOffset* RESTRICT spo = texture_mapping_enabled ? m_mem.GetOffset(bitbltbuf.SBP, bitbltbuf.SBW, bitbltbuf.SPSM) : nullptr; GSOffset spo = texture_mapping_enabled ? m_mem.GetOffset(bitbltbuf.SBP, bitbltbuf.SBW, bitbltbuf.SPSM) : GSOffset();
GSOffset* RESTRICT dpo = m_mem.GetOffset(bitbltbuf.DBP, bitbltbuf.DBW, bitbltbuf.DPSM); GSOffset dpo = m_mem.GetOffset(bitbltbuf.DBP, bitbltbuf.DBW, bitbltbuf.DPSM);
const int* RESTRICT scol = texture_mapping_enabled ? &spo->pixel.col[0][sx] : nullptr;
int* RESTRICT dcol = &dpo->pixel.col[0][dx];
const bool alpha_blending_enabled = PRIM->ABE; const bool alpha_blending_enabled = PRIM->ABE;
@ -910,19 +907,27 @@ void GSRendererHW::SwSpriteRender()
for (int y = 0; y < h; y++, ++sy, ++dy) for (int y = 0; y < h; y++, ++sy, ++dy)
{ {
const uint32* RESTRICT s = texture_mapping_enabled ? &m_mem.m_vm32[spo->pixel.row[sy]] : nullptr; GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sx, sy) : GSOffset::PAHelper();
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]]; GSOffset::PAHelper dpa = dpo.paMulti(dx, dy);
ASSERT(w % 2 == 0); ASSERT(w % 2 == 0);
for (int x = 0; x < w; x += 2) for (int x = 0; x < w; x += 2)
{ {
uint32 di = dpa.value();
dpa.incX();
ASSERT(di + 1 == dpa.value()); // Destination pixel pair is adjacent in memory
dpa.incX();
GSVector4i sc; GSVector4i sc;
if (texture_mapping_enabled) if (texture_mapping_enabled)
{ {
uint32 si = spa.value();
spa.incX();
// Read 2 source pixel colors // Read 2 source pixel colors
ASSERT((scol[x] + 1) == scol[x + 1]); // Source pixel pair is adjacent in memory ASSERT((si + 1) == spa.value()); // Source pixel pair is adjacent in memory
sc = GSVector4i::loadl(&s[scol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr spa.incX();
sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
// Apply TFX // Apply TFX
ASSERT(tex0_tfx == 0 || tex0_tfx == 1); ASSERT(tex0_tfx == 0 || tex0_tfx == 1);
@ -943,8 +948,7 @@ void GSRendererHW::SwSpriteRender()
if (alpha_blending_enabled || fb_mask_enabled) if (alpha_blending_enabled || fb_mask_enabled)
{ {
// Read 2 destination pixel colors // Read 2 destination pixel colors
ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory dc0 = GSVector4i::loadl(&m_mem.m_vm32[di]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
dc0 = GSVector4i::loadl(&d[dcol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
} }
if (alpha_blending_enabled) if (alpha_blending_enabled)
@ -999,8 +1003,7 @@ void GSRendererHW::SwSpriteRender()
// Store 2 pixel colors // Store 2 pixel colors
dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr
ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory GSVector4i::storel(&m_mem.m_vm32[di], dc);
GSVector4i::storel(&d[dcol[x]], dc);
} }
} }
} }
@ -1806,7 +1809,7 @@ void GSRendererHW::OI_GsMemClear()
// Limit it further to a full screen 0 write // Limit it further to a full screen 0 write
if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0))) if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0)))
{ {
GSOffset* off = m_context->offset.fb; const GSOffset& off = m_context->offset.fb;
const GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in)); const GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
// Limit the hack to a single fullscreen clear. Some games might use severals column to clear a screen // Limit the hack to a single fullscreen clear. Some games might use severals column to clear a screen
// but hopefully it will be enough. // but hopefully it will be enough.
@ -1824,12 +1827,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel32 // Based on WritePixel32
for (int y = r.top; y < r.bottom; y++) for (int y = r.top; y < r.bottom; y++)
{ {
uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]]; GSOffset::PAHelper pa = off.paMulti(r.left, y);
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++) for (; pa.x() < r.right; pa.incX())
{ {
d[col[x]] = 0; // Here the constant color m_mem.m_vm32[pa.value()] = 0; // Here the constant color
} }
} }
} }
@ -1838,12 +1840,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel24 // Based on WritePixel24
for (int y = r.top; y < r.bottom; y++) for (int y = r.top; y < r.bottom; y++)
{ {
uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]]; GSOffset::PAHelper pa = off.paMulti(r.left, y);
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++) for (; pa.x() < r.right; pa.incX())
{ {
d[col[x]] &= 0xff000000; // Clear the color m_mem.m_vm32[pa.value()] &= 0xff000000; // Clear the color
} }
} }
} }
@ -1854,12 +1855,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel16 // Based on WritePixel16
for(int y = r.top; y < r.bottom; y++) for(int y = r.top; y < r.bottom; y++)
{ {
uint32* RESTRICT d = &m_mem.m_vm16[off->pixel.row[y]]; GSOffset::PAHelper pa = off.paMulti(r.left, y);
int* RESTRICT col = off->pixel.col[0];
for(int x = r.left; x < r.right; x++) for(int x = r.left; x < r.right; x++)
{ {
d[col[x]] = 0; // Here the constant color m_mem.m_vm16[pa.value()] = 0; // Here the constant color
} }
} }
#endif #endif

View File

@ -797,14 +797,11 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
// Goal: invalidate data sent to the GPU when the source (GS memory) is modified // Goal: invalidate data sent to the GPU when the source (GS memory) is modified
// Called each time you want to write to the GS memory // Called each time you want to write to the GS memory
void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target) void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& rect, bool target)
{ {
if (!off) uint32 bp = off.bp();
return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549. uint32 bw = off.bw();
uint32 psm = off.psm();
uint32 bp = off->bp;
uint32 bw = off->bw;
uint32 psm = off->psm;
if (!target) if (!target)
{ {
@ -871,18 +868,12 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
} }
} }
GSVector4i r;
uint32* pages = (uint32*)m_temp;
off->GetPages(rect, pages, &r);
bool found = false; bool found = false;
for (const uint32* p = pages; *p != GSOffset::EOP; p++) GSVector4i r = rect.ralign<Align_Outside>((bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs);
{
uint32 page = *p;
off.loopPages(rect, [&](uint32 page)
{
auto& list = m_src.m_map[page]; auto& list = m_src.m_map[page];
for (auto i = list.begin(); i != list.end();) for (auto i = list.begin(); i != list.end();)
{ {
@ -937,7 +928,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
} }
} }
} }
} });
if (!target) if (!target)
return; return;
@ -1045,10 +1036,10 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
// Goal: retrive the data from the GPU to the GS memory. // Goal: retrive the data from the GPU to the GS memory.
// Called each time you want to read from the GS memory // Called each time you want to read from the GS memory
void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r) void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r)
{ {
uint32 bp = off->bp; uint32 bp = off.bp();
uint32 psm = off->psm; uint32 psm = off.psm();
//uint32 bw = off->bw; //uint32 bw = off->bw;
// No depth handling please. // No depth handling please.
@ -1777,8 +1768,7 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0); m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
} }
GSOffset* off = m_renderer->m_context->offset.tex; m_pages = m_renderer->m_context->offset.tex.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
m_pages_as_bit = off->GetPagesAsBits(m_TEX0);
} }
} }
@ -1808,23 +1798,23 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
m_complete = true; // lame, but better than nothing m_complete = true; // lame, but better than nothing
} }
const GSOffset* off = m_renderer->m_context->offset.tex; const GSOffset& off = m_renderer->m_context->offset.tex;
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
uint32 blocks = 0; uint32 blocks = 0;
if (m_repeating) if (m_repeating)
{ {
for (int y = r.top; y < r.bottom; y += bs.y) for (int y = r.top; y < r.bottom; y += bs.y, bn.nextBlockY())
{ {
uint32 base = off->block.row[y >> 3u]; for (int x = r.left; x < r.right; bn.nextBlockX(), x += bs.x)
for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
{ {
uint32 block = base + off->block.col[x >> 3u]; int i = (bn.blkY() << 7) + bn.blkX();
uint32 block = bn.valueNoWrap();
if (block < MAX_BLOCKS || m_wrap_gs_mem) if (block < MAX_BLOCKS || m_wrap_gs_mem)
{ {
uint32 addr = (i >> 3u) % MAX_BLOCKS; uint32 addr = i % MAX_BLOCKS;
uint32 row = addr >> 5u; uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u); uint32 col = 1 << (addr & 31u);
@ -1843,13 +1833,11 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
} }
else else
{ {
for (int y = r.top; y < r.bottom; y += bs.y) for (int y = r.top; y < r.bottom; y += bs.y, bn.nextBlockY())
{ {
uint32 base = off->block.row[y >> 3u]; for (int x = r.left; x < r.right; x += bs.x, bn.nextBlockX())
for (int x = r.left; x < r.right; x += bs.x)
{ {
uint32 block = base + off->block.col[x >> 3u]; uint32 block = bn.valueNoWrap();
if (block < MAX_BLOCKS || m_wrap_gs_mem) if (block < MAX_BLOCKS || m_wrap_gs_mem)
{ {
@ -1951,7 +1939,7 @@ void GSTextureCache::Source::Flush(uint32 count, int layer)
GSLocalMemory& mem = m_renderer->m_mem; GSLocalMemory& mem = m_renderer->m_mem;
const GSOffset* off = m_renderer->m_context->offset.tex; const GSOffset& off = m_renderer->m_context->offset.tex;
GSLocalMemory::readTexture rtx = psm.rtx; GSLocalMemory::readTexture rtx = psm.rtx;
@ -2079,7 +2067,7 @@ void GSTextureCache::Target::Update()
GSTexture* t = m_renderer->m_dev->CreateTexture(w, h); GSTexture* t = m_renderer->m_dev->CreateTexture(w, h);
const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); GSOffset off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSTexture::GSMap m; GSTexture::GSMap m;
@ -2130,7 +2118,7 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect)
// GSTextureCache::SourceMap // GSTextureCache::SourceMap
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off) void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset& off)
{ {
m_surfaces.insert(s); m_surfaces.insert(s);
@ -2147,26 +2135,10 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
} }
// The source pointer will be stored/duplicated in all m_map[array of pages] // The source pointer will be stored/duplicated in all m_map[array of pages]
for (size_t i = 0; i < countof(m_pages); i++) s->m_pages.loopPages([this, s](uint32 page)
{ {
if (uint32 p = s->m_pages_as_bit[i]) s->m_erase_it[page] = m_map[page].InsertFront(s);
{ });
auto* m = &m_map[i << 5];
auto* e = &s->m_erase_it[i << 5];
unsigned long j;
while (_BitScanForward(&j, p))
{
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
p ^= 1U << j;
e[j] = m[j].InsertFront(s);
}
}
}
} }
void GSTextureCache::SourceMap::RemoveAll() void GSTextureCache::SourceMap::RemoveAll()
@ -2197,26 +2169,10 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
} }
else else
{ {
for (size_t i = 0; i < countof(m_pages); i++) s->m_pages.loopPages([this, s](uint32 page)
{ {
if (uint32 p = s->m_pages_as_bit[i]) m_map[page].EraseIndex(s->m_erase_it[page]);
{ });
auto* m = &m_map[i << 5];
const auto* e = &s->m_erase_it[i << 5];
unsigned long j;
while (_BitScanForward(&j, p))
{
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
p ^= 1U << j;
m[j].EraseIndex(e[j]);
}
}
}
} }
delete s; delete s;

View File

@ -125,7 +125,7 @@ public:
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase // Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
std::array<uint16, MAX_PAGES> m_erase_it; std::array<uint16, MAX_PAGES> m_erase_it;
uint32* m_pages_as_bit; GSOffset::PageLooper m_pages;
public: public:
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false); Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false);
@ -189,7 +189,7 @@ public:
memset(m_pages, 0, sizeof(m_pages)); memset(m_pages, 0, sizeof(m_pages));
} }
void Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off); void Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset& off);
void RemoveAll(); void RemoveAll();
void RemovePartial(); void RemovePartial();
void RemoveAt(Source* s); void RemoveAt(Source* s);
@ -249,8 +249,8 @@ public:
void InvalidateVideoMemType(int type, uint32 bp); void InvalidateVideoMemType(int type, uint32 bp);
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
void InvalidateVideoMem(GSOffset* off, const GSVector4i& r, bool target = true); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(GSOffset* off, const GSVector4i& r); void InvalidateLocalMem(const GSOffset& off, const GSVector4i& r);
void IncAge(); void IncAge();
bool UserHacks_HalfPixelOffset; bool UserHacks_HalfPixelOffset;

View File

@ -78,7 +78,7 @@ void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
{ {
// TODO: block level write // TODO: block level write
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
switch (TEX0.PSM) switch (TEX0.PSM)
{ {
@ -125,7 +125,7 @@ void GSTextureCacheOGL::Read(Source* t, const GSVector4i& r)
if (offscreen->Map(m, &r_offscreen)) if (offscreen->Map(m, &r_offscreen))
{ {
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);

View File

@ -2811,31 +2811,28 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
if (m != 0xffffffff) if (m != 0xffffffff)
{ {
const int* zbr = m_global.zbr;
const int* zbc = m_global.zbc;
uint32 z = v.t.u32[3]; // (uint32)v.p.z; uint32 z = v.t.u32[3]; // (uint32)v.p.z;
if (m_global.sel.zpsm != 2) if (m_global.sel.zpsm != 2)
{ {
if (m == 0) if (m == 0)
{ {
DrawRectT<uint32, false>(zbr, zbc, r, z, m); DrawRectT<uint32, false>(m_global.zbo, r, z, m);
} }
else else
{ {
DrawRectT<uint32, true>(zbr, zbc, r, z, m); DrawRectT<uint32, true>(m_global.zbo, r, z, m);
} }
} }
else else
{ {
if ((m & 0xffff) == 0) if ((m & 0xffff) == 0)
{ {
DrawRectT<uint16, false>(zbr, zbc, r, z, m); DrawRectT<uint16, false>(m_global.zbo, r, z, m);
} }
else else
{ {
DrawRectT<uint16, true>(zbr, zbc, r, z, m); DrawRectT<uint16, true>(m_global.zbo, r, z, m);
} }
} }
} }
@ -2848,9 +2845,6 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
if (m != 0xffffffff) if (m != 0xffffffff)
{ {
const int* fbr = m_global.fbr;
const int* fbc = m_global.fbc;
uint32 c = (GSVector4i(v.c) >> 7).rgba32(); uint32 c = (GSVector4i(v.c) >> 7).rgba32();
if (m_global.sel.fba) if (m_global.sel.fba)
@ -2862,11 +2856,11 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
{ {
if (m == 0) if (m == 0)
{ {
DrawRectT<uint32, false>(fbr, fbc, r, c, m); DrawRectT<uint32, false>(m_global.fbo, r, c, m);
} }
else else
{ {
DrawRectT<uint32, true>(fbr, fbc, r, c, m); DrawRectT<uint32, true>(m_global.fbo, r, c, m);
} }
} }
else else
@ -2875,18 +2869,18 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
if ((m & 0xffff) == 0) if ((m & 0xffff) == 0)
{ {
DrawRectT<uint16, false>(fbr, fbc, r, c, m); DrawRectT<uint16, false>(m_global.fbo, r, c, m);
} }
else else
{ {
DrawRectT<uint16, true>(fbr, fbc, r, c, m); DrawRectT<uint16, true>(m_global.fbo, r, c, m);
} }
} }
} }
} }
template <class T, bool masked> template <class T, bool masked>
void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m)
{ {
if (m == 0xffffffff) if (m == 0xffffffff)
return; return;
@ -2921,25 +2915,25 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
if (!br.rempty()) if (!br.rempty())
{ {
FillRect<T, masked>(row, col, GSVector4i(r.x, r.y, r.z, br.y), c, m); FillRect<T, masked>(off, GSVector4i(r.x, r.y, r.z, br.y), c, m);
FillRect<T, masked>(row, col, GSVector4i(r.x, br.w, r.z, r.w), c, m); FillRect<T, masked>(off, GSVector4i(r.x, br.w, r.z, r.w), c, m);
if (r.x < br.x || br.z < r.z) if (r.x < br.x || br.z < r.z)
{ {
FillRect<T, masked>(row, col, GSVector4i(r.x, br.y, br.x, br.w), c, m); FillRect<T, masked>(off, GSVector4i(r.x, br.y, br.x, br.w), c, m);
FillRect<T, masked>(row, col, GSVector4i(br.z, br.y, r.z, br.w), c, m); FillRect<T, masked>(off, GSVector4i(br.z, br.y, r.z, br.w), c, m);
} }
FillBlock<T, masked>(row, col, br, color, mask); FillBlock<T, masked>(off, br, color, mask);
} }
else else
{ {
FillRect<T, masked>(row, col, r, c, m); FillRect<T, masked>(off, r, c, m);
} }
} }
template <class T, bool masked> template <class T, bool masked>
void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m)
{ {
if (r.x >= r.z) if (r.x >= r.z)
return; return;
@ -2948,11 +2942,12 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
for (int y = r.y; y < r.w; y++) for (int y = r.y; y < r.w; y++)
{ {
T* RESTRICT d = &vm[row[y]]; GSOffset::PAHelper pa = off.paMulti(r.x, y);
for (int x = r.x; x < r.z; x++) for (; pa.x() < r.z; pa.incX())
{ {
d[col[x]] = (T)(!masked ? c : (c | (d[col[x]] & m))); T& d = vm[pa.value()];
d = (T)(!masked ? c : (c | (d & m)));
} }
} }
} }
@ -2960,7 +2955,7 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
template <class T, bool masked> template <class T, bool masked>
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m) void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m)
{ {
if (r.x >= r.z) if (r.x >= r.z)
return; return;
@ -2969,11 +2964,9 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
for (int y = r.y; y < r.w; y += 8) for (int y = r.y; y < r.w; y += 8)
{ {
T* RESTRICT d = &vm[row[y]];
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{ {
GSVector8i* RESTRICT p = (GSVector8i*)&d[col[x]]; GSVector8i* RESTRICT p = (GSVector8i*)&vm[off.pa(x, y)];
p[0] = !masked ? c : (c | (p[0] & m)); p[0] = !masked ? c : (c | (p[0] & m));
p[1] = !masked ? c : (c | (p[1] & m)); p[1] = !masked ? c : (c | (p[1] & m));
@ -2990,7 +2983,7 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
#else #else
template <class T, bool masked> template <class T, bool masked>
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m) void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
{ {
if (r.x >= r.z) if (r.x >= r.z)
return; return;
@ -2999,11 +2992,9 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
for (int y = r.y; y < r.w; y += 8) for (int y = r.y; y < r.w; y += 8)
{ {
T* RESTRICT d = &vm[row[y]];
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{ {
GSVector4i* RESTRICT p = (GSVector4i*)&d[col[x]]; GSVector4i* RESTRICT p = (GSVector4i*)&vm[off.pa(x, y)];
for (int i = 0; i < 16; i += 4) for (int i = 0; i < 16; i += 4)
{ {

View File

@ -38,20 +38,20 @@ protected:
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map; GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
template <class T, bool masked> template <class T, bool masked>
void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); void DrawRectT(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m);
template <class T, bool masked> template <class T, bool masked>
__forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); __forceinline void FillRect(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m);
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
template <class T, bool masked> template <class T, bool masked>
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m); __forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m);
#else #else
template <class T, bool masked> template <class T, bool masked>
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m); __forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
#endif #endif

View File

@ -409,17 +409,20 @@ void GSRendererSW::Draw()
// GSScanlineGlobalData& gd = sd->global; // GSScanlineGlobalData& gd = sd->global;
uint32* fb_pages = NULL; GSOffset::PageLooper* fb_pages = NULL;
uint32* zb_pages = NULL; GSOffset::PageLooper* zb_pages = NULL;
GSOffset::PageLooper _fb_pages, _zb_pages;
if (sd->global.sel.fb) if (sd->global.sel.fb)
{ {
fb_pages = m_context->offset.fb->GetPages(r); _fb_pages = m_context->offset.fb.pageLooperForRect(r);
fb_pages = &_fb_pages;
} }
if (sd->global.sel.zb) if (sd->global.sel.zb)
{ {
zb_pages = m_context->offset.zb->GetPages(r); _zb_pages = m_context->offset.zb.pageLooperForRect(r);
zb_pages = &_zb_pages;
} }
// check if there is an overlap between this and previous targets // check if there is an overlap between this and previous targets
@ -438,7 +441,7 @@ void GSRendererSW::Draw()
// addref source and target pages // addref source and target pages
sd->UsePages(fb_pages, m_context->offset.fb->psm, zb_pages, m_context->offset.zb->psm); sd->UsePages(fb_pages, m_context->offset.fb.psm(), zb_pages, m_context->offset.zb.psm());
// //
@ -641,26 +644,26 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
fflush(s_fp); fflush(s_fp);
} }
GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); GSOffset off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
GSOffset::PageLooper pages = off.pageLooperForRect(r);
off->GetPages(r, m_tmp_pages);
// check if the changing pages either used as a texture or a target // check if the changing pages either used as a texture or a target
if (!m_rl->IsSynced()) if (!m_rl->IsSynced())
{ {
for (uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++) pages.loopPagesWithBreak([&](uint32 page)
{ {
if (m_fzb_pages[*p] | m_tex_pages[*p]) if (m_fzb_pages[page] | m_tex_pages[page])
{ {
Sync(6); Sync(6);
break; return false;
} }
} return true;
});
} }
m_tc->InvalidatePages(m_tmp_pages, off->psm); // if texture update runs on a thread and Sync(5) happens then this must come later m_tc->InvalidatePages(pages, off.psm()); // if texture update runs on a thread and Sync(5) happens then this must come later
} }
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
@ -673,77 +676,92 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
if (!m_rl->IsSynced()) if (!m_rl->IsSynced())
{ {
GSOffset* off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); GSOffset off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
GSOffset::PageLooper pages = off.pageLooperForRect(r);
off->GetPages(r, m_tmp_pages); pages.loopPagesWithBreak([&](uint32 page)
for (uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++)
{ {
if (m_fzb_pages[*p]) if (m_fzb_pages[page])
{ {
Sync(7); Sync(7);
break; return false;
} }
} return true;
});
} }
} }
void GSRendererSW::UsePages(const uint32* pages, const int type) void GSRendererSW::UsePages(const GSOffset::PageLooper& pages, const int type)
{ {
for (const uint32* p = pages; *p != GSOffset::EOP; p++) pages.loopPages([=](uint32 page)
{ {
switch (type) switch (type)
{ {
case 0: case 0:
ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX); ASSERT((m_fzb_pages[page] & 0xFFFF) < USHRT_MAX);
m_fzb_pages[*p] += 1; m_fzb_pages[page] += 1;
break; break;
case 1: case 1:
ASSERT((m_fzb_pages[*p] >> 16) < USHRT_MAX); ASSERT((m_fzb_pages[page] >> 16) < USHRT_MAX);
m_fzb_pages[*p] += 0x10000; m_fzb_pages[page] += 0x10000;
break; break;
case 2: case 2:
ASSERT(m_tex_pages[*p] < USHRT_MAX); ASSERT(m_tex_pages[page] < USHRT_MAX);
m_tex_pages[*p] += 1; m_tex_pages[page] += 1;
break; break;
default: default:
break; break;
} }
} });
} }
void GSRendererSW::ReleasePages(const uint32* pages, const int type) void GSRendererSW::ReleasePages(const GSOffset::PageLooper& pages, const int type)
{ {
for (const uint32* p = pages; *p != GSOffset::EOP; p++) pages.loopPages([=](uint32 page)
{ {
switch (type) switch (type)
{ {
case 0: case 0:
ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0); ASSERT((m_fzb_pages[page] & 0xFFFF) > 0);
m_fzb_pages[*p] -= 1; m_fzb_pages[page] -= 1;
break; break;
case 1: case 1:
ASSERT((m_fzb_pages[*p] >> 16) > 0); ASSERT((m_fzb_pages[page] >> 16) > 0);
m_fzb_pages[*p] -= 0x10000; m_fzb_pages[page] -= 0x10000;
break; break;
case 2: case 2:
ASSERT(m_tex_pages[*p] > 0); ASSERT(m_tex_pages[page] > 0);
m_tex_pages[*p] -= 1; m_tex_pages[page] -= 1;
break; break;
default: default:
break; break;
} }
} });
} }
bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r) bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const GSOffset::PageLooper* zb_pages, const GSVector4i& r)
{ {
bool synced = m_rl->IsSynced(); bool synced = m_rl->IsSynced();
bool fb = fb_pages != NULL; bool fb = fb_pages != NULL;
bool zb = zb_pages != NULL; bool zb = zb_pages != NULL;
GSOffset::PageLooper _fb_pages, _zb_pages;
auto requirePages = [&]
{
if (fb_pages == NULL)
{
_fb_pages = m_context->offset.fb.pageLooperForRect(r);
fb_pages = &_fb_pages;
}
if (zb_pages == NULL)
{
_zb_pages = m_context->offset.zb.pageLooperForRect(r);
zb_pages = &_zb_pages;
}
};
bool res = false; bool res = false;
if (m_fzb != m_context->offset.fzb4) if (m_fzb != m_context->offset.fzb4)
@ -753,17 +771,14 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
m_fzb = m_context->offset.fzb4; m_fzb = m_context->offset.fzb4;
m_fzb_bbox = r; m_fzb_bbox = r;
if (fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r);
if (zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r);
memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages)); memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages));
uint32 used = 0; uint32 used = 0;
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++) requirePages();
{
uint32 i = *p;
fb_pages->loopPages([&](uint32 i)
{
uint32 row = i >> 5; uint32 row = i >> 5;
uint32 col = 1 << (i & 31); uint32 col = 1 << (i & 31);
@ -771,12 +786,10 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
used |= m_fzb_pages[i]; used |= m_fzb_pages[i];
used |= m_tex_pages[i]; used |= m_tex_pages[i];
} });
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++) zb_pages->loopPages([&](uint32 i)
{ {
uint32 i = *p;
uint32 row = i >> 5; uint32 row = i >> 5;
uint32 col = 1 << (i & 31); uint32 col = 1 << (i & 31);
@ -784,7 +797,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
used |= m_fzb_pages[i]; used |= m_fzb_pages[i];
used |= m_tex_pages[i]; used |= m_tex_pages[i];
} });
if (!synced) if (!synced)
{ {
@ -816,15 +829,12 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{ {
// drawing area is larger than previous time, check new parts only to avoid false positives (m_fzb_cur_pages guards) // drawing area is larger than previous time, check new parts only to avoid false positives (m_fzb_cur_pages guards)
if (fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r); requirePages();
if (zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r);
uint32 used = 0; uint32 used = 0;
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++) fb_pages->loopPages([&](uint32 i)
{ {
uint32 i = *p;
uint32 row = i >> 5; uint32 row = i >> 5;
uint32 col = 1 << (i & 31); uint32 col = 1 << (i & 31);
@ -834,12 +844,10 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
used |= m_fzb_pages[i]; used |= m_fzb_pages[i];
} }
} });
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++) zb_pages->loopPages([&](uint32 i)
{ {
uint32 i = *p;
uint32 row = i >> 5; uint32 row = i >> 5;
uint32 col = 1 << (i & 31); uint32 col = 1 << (i & 31);
@ -849,7 +857,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
used |= m_fzb_pages[i]; used |= m_fzb_pages[i];
} }
} });
if (!synced) if (!synced)
{ {
@ -873,9 +881,9 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
if (fb && !res) if (fb && !res)
{ {
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++) fb_pages->loopPagesWithBreak([&](uint32 page)
{ {
if (m_fzb_pages[*p] & 0xffff0000) if (m_fzb_pages[page] & 0xffff0000)
{ {
if (LOG) if (LOG)
{ {
@ -885,16 +893,17 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
res = true; res = true;
break; return false;
} }
} return true;
});
} }
if (zb && !res) if (zb && !res)
{ {
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++) zb_pages->loopPagesWithBreak([&](uint32 page)
{ {
if (m_fzb_pages[*p] & 0x0000ffff) if (m_fzb_pages[page] & 0x0000ffff)
{ {
if (LOG) if (LOG)
{ {
@ -904,16 +913,14 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
res = true; res = true;
break; return false;
} }
} return true;
});
} }
} }
} }
if (!fb && fb_pages != NULL) delete[] fb_pages;
if (!zb && zb_pages != NULL) delete[] zb_pages;
return res; return res;
} }
@ -923,19 +930,22 @@ bool GSRendererSW::CheckSourcePages(SharedData* sd)
{ {
for (size_t i = 0; sd->m_tex[i].t != NULL; i++) for (size_t i = 0; sd->m_tex[i].t != NULL; i++)
{ {
sd->m_tex[i].t->m_offset->GetPages(sd->m_tex[i].r, m_tmp_pages); GSOffset::PageLooper pages = sd->m_tex[i].t->m_offset.pageLooperForRect(sd->m_tex[i].r);
uint32* pages = m_tmp_pages; // sd->m_tex[i].t->m_pages.n; bool ret = false;
pages.loopPagesWithBreak([&](uint32 pages)
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
{ {
// TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2) // TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2)
if (m_fzb_pages[*p]) // currently being drawn to? => sync if (m_fzb_pages[pages]) // currently being drawn to? => sync
{ {
return true; ret = true;
return false;
} }
} return true;
});
if (ret)
return true;
} }
} }
@ -954,10 +964,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
gd.vm = m_mem.m_vm8; gd.vm = m_mem.m_vm8;
gd.fbr = context->offset.fb->pixel.row; gd.fbo = context->offset.fb;
gd.zbr = context->offset.zb->pixel.row; gd.zbo = context->offset.zb;
gd.fbc = context->offset.fb->pixel.col[0];
gd.zbc = context->offset.zb->pixel.col[0];
gd.fzbr = context->offset.fzb4->row; gd.fzbr = context->offset.fzb4->row;
gd.fzbc = context->offset.fzb4->col; gd.fzbc = context->offset.fzb4->col;
@ -1423,8 +1431,6 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GSRendererSW::SharedData::SharedData(GSRendererSW* parent) GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
: m_parent(parent) : m_parent(parent)
, m_fb_pages(NULL)
, m_zb_pages(NULL)
, m_fpsm(0) , m_fpsm(0)
, m_zpsm(0) , m_zpsm(0)
, m_using_pages(false) , m_using_pages(false)
@ -1460,7 +1466,7 @@ GSRendererSW::SharedData::~SharedData()
//static TransactionScope::Lock s_lock; //static TransactionScope::Lock s_lock;
void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm) void GSRendererSW::SharedData::UsePages(const GSOffset::PageLooper* fb_pages, int fpsm, const GSOffset::PageLooper* zb_pages, int zpsm)
{ {
if (m_using_pages) if (m_using_pages)
return; return;
@ -1468,24 +1474,26 @@ void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const
{ {
//TransactionScope scope(s_lock); //TransactionScope scope(s_lock);
if (global.sel.fb && fb_pages != NULL) if (global.sel.fb)
{ {
m_parent->UsePages(fb_pages, 0); m_parent->UsePages(*fb_pages, 0);
} }
if (global.sel.zb && zb_pages != NULL) if (global.sel.zb)
{ {
m_parent->UsePages(zb_pages, 1); m_parent->UsePages(*zb_pages, 1);
} }
for (size_t i = 0; m_tex[i].t != NULL; i++) for (size_t i = 0; m_tex[i].t != NULL; i++)
{ {
m_parent->UsePages(m_tex[i].t->m_pages.n, 2); m_parent->UsePages(m_tex[i].t->m_pages, 2);
} }
} }
m_fb_pages = fb_pages; if (fb_pages)
m_zb_pages = zb_pages; m_fb_pages = *fb_pages;
if (zb_pages)
m_zb_pages = *zb_pages;
m_fpsm = fpsm; m_fpsm = fpsm;
m_zpsm = zpsm; m_zpsm = zpsm;
@ -1512,16 +1520,10 @@ void GSRendererSW::SharedData::ReleasePages()
for (size_t i = 0; m_tex[i].t != NULL; i++) for (size_t i = 0; m_tex[i].t != NULL; i++)
{ {
m_parent->ReleasePages(m_tex[i].t->m_pages.n, 2); m_parent->ReleasePages(m_tex[i].t->m_pages, 2);
} }
} }
delete[] m_fb_pages;
delete[] m_zb_pages;
m_fb_pages = NULL;
m_zb_pages = NULL;
m_using_pages = false; m_using_pages = false;
} }

View File

@ -35,8 +35,8 @@ class GSRendererSW : public GSRenderer
public: public:
GSRendererSW* m_parent; GSRendererSW* m_parent;
const uint32* m_fb_pages; GSOffset::PageLooper m_fb_pages;
const uint32* m_zb_pages; GSOffset::PageLooper m_zb_pages;
int m_fpsm; int m_fpsm;
int m_zpsm; int m_zpsm;
bool m_using_pages; bool m_using_pages;
@ -52,7 +52,7 @@ class GSRendererSW : public GSRenderer
SharedData(GSRendererSW* parent); SharedData(GSRendererSW* parent);
virtual ~SharedData(); virtual ~SharedData();
void UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm); void UsePages(const GSOffset::PageLooper* fb_pages, int fpsm, const GSOffset::PageLooper* zb_pages, int zpsm);
void ReleasePages(); void ReleasePages();
void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level); void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level);
@ -76,7 +76,6 @@ protected:
uint32 m_fzb_cur_pages[16]; uint32 m_fzb_cur_pages[16];
std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
std::atomic<uint16> m_tex_pages[512]; std::atomic<uint16> m_tex_pages[512];
uint32 m_tmp_pages[512 + 1];
void Reset(); void Reset();
void VSync(int field); void VSync(int field);
@ -90,10 +89,10 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UsePages(const uint32* pages, const int type); void UsePages(const GSOffset::PageLooper& pages, const int type);
void ReleasePages(const uint32* pages, const int type); void ReleasePages(const GSOffset::PageLooper& pages, const int type);
bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r); bool CheckTargetPages(const GSOffset::PageLooper* fb_pages, const GSOffset::PageLooper* zb_pages, const GSVector4i& r);
bool CheckSourcePages(SharedData* sd); bool CheckSourcePages(SharedData* sd);
bool GetScanlineGlobalData(SharedData* data); bool GetScanlineGlobalData(SharedData* data);

View File

@ -127,10 +127,8 @@ struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a p
uint32* clut; uint32* clut;
GSVector4i* dimx; GSVector4i* dimx;
const int* fbr; GSOffset fbo;
const int* zbr; GSOffset zbo;
const int* fbc;
const int* zbc;
const GSVector2i* fzbr; const GSVector2i* fzbr;
const GSVector2i* fzbc; const GSVector2i* fzbc;

View File

@ -62,21 +62,18 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
m_textures.insert(t); m_textures.insert(t);
for (const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) t->m_pages.loopPages([&](uint32 page)
{ {
const uint32 page = *p;
t->m_erase_it[page] = m_map[page].InsertFront(t); t->m_erase_it[page] = m_map[page].InsertFront(t);
} });
return t; return t;
} }
void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm) void GSTextureCacheSW::InvalidatePages(const GSOffset::PageLooper& pages, uint32 psm)
{ {
for (const uint32* p = pages; *p != GSOffset::EOP; p++) pages.loopPages([&](uint32 page)
{ {
const uint32 page = *p;
for (Texture* t : m_map[page]) for (Texture* t : m_map[page])
{ {
if (GSUtil::HasSharedBits(psm, t->m_sharedbits)) if (GSUtil::HasSharedBits(psm, t->m_sharedbits))
@ -98,7 +95,7 @@ void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
t->m_complete = false; t->m_complete = false;
} }
} }
} });
} }
void GSTextureCacheSW::RemoveAll() void GSTextureCacheSW::RemoveAll()
@ -124,11 +121,10 @@ void GSTextureCacheSW::IncAge()
{ {
i = m_textures.erase(i); i = m_textures.erase(i);
for (const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) t->m_pages.loopPages([&](uint32 page)
{ {
const uint32 page = *p;
m_map[page].EraseIndex(t->m_erase_it[page]); m_map[page].EraseIndex(t->m_erase_it[page]);
} });
delete t; delete t;
} }
@ -162,9 +158,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM); m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_pages = m_offset.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
memcpy(m_pages.bm, m_offset->GetPagesAsBits(TEX0), sizeof(m_pages.bm));
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
@ -176,8 +170,6 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
GSTextureCacheSW::Texture::~Texture() GSTextureCacheSW::Texture::~Texture()
{ {
delete[] m_pages.n;
if (m_buff) if (m_buff)
{ {
_aligned_free(m_buff); _aligned_free(m_buff);
@ -223,7 +215,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
GSLocalMemory& mem = m_state->m_mem; GSLocalMemory& mem = m_state->m_mem;
const GSOffset* RESTRICT off = m_offset; GSOffset off = m_offset;
uint32 blocks = 0; uint32 blocks = 0;
@ -235,22 +227,20 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
int block_pitch = pitch * bs.y; int block_pitch = pitch * bs.y;
r = r.srl32(3); shift += off.blockShiftX();
int bottom = r.bottom >> off.blockShiftY();
int right = r.right >> off.blockShiftX();
bs.x >>= 3; GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
bs.y >>= 3;
shift += 3;
if (m_repeating) if (m_repeating)
{ {
for (int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) for (; bn.blkY() < bottom; bn.nextBlockY(), dst += block_pitch)
{ {
uint32 base = off->block.row[y]; for (; bn.blkX() < right; bn.nextBlockX())
for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
{ {
uint32 block = (base + off->block.col[x]) % MAX_BLOCKS; int i = (bn.blkY() << 7) + bn.blkX();
uint32 block = bn.value();
uint32 row = i >> 5; uint32 row = i >> 5;
uint32 col = 1 << (i & 31); uint32 col = 1 << (i & 31);
@ -259,7 +249,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{ {
m_valid[row] |= col; m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA); (mem.*rtxbP)(block, &dst[bn.blkX() << shift], pitch, m_TEXA);
blocks++; blocks++;
} }
@ -268,13 +258,11 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
} }
else else
{ {
for (int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) for (; bn.blkY() < bottom; bn.nextBlockY(), dst += block_pitch)
{ {
uint32 base = off->block.row[y]; for (; bn.blkX() < right; bn.nextBlockX())
for (int x = r.left; x < r.right; x += bs.x)
{ {
uint32 block = (base + off->block.col[x]) % MAX_BLOCKS; uint32 block = bn.value();
uint32 row = block >> 5; uint32 row = block >> 5;
uint32 col = 1 << (block & 31); uint32 col = 1 << (block & 31);
@ -283,7 +271,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{ {
m_valid[row] |= col; m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA); (mem.*rtxbP)(block, &dst[bn.blkX() << shift], pitch, m_TEXA);
blocks++; blocks++;
} }

View File

@ -25,7 +25,8 @@ public:
{ {
public: public:
GSState* m_state; GSState* m_state;
GSOffset* m_offset; GSOffset m_offset;
GSOffset::PageLooper m_pages;
GIFRegTEX0 m_TEX0; GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA; GIFRegTEXA m_TEXA;
void* m_buff; void* m_buff;
@ -36,7 +37,6 @@ public:
std::vector<GSVector2i>* m_p2t; std::vector<GSVector2i>* m_p2t;
uint32 m_valid[MAX_PAGES]; uint32 m_valid[MAX_PAGES];
std::array<uint16, MAX_PAGES> m_erase_it; std::array<uint16, MAX_PAGES> m_erase_it;
struct { uint32 bm[16]; const uint32* n; } m_pages;
const uint32* RESTRICT m_sharedbits; const uint32* RESTRICT m_sharedbits;
// m_valid // m_valid
@ -61,7 +61,7 @@ public:
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0); Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
void InvalidatePages(const uint32* pages, uint32 psm); void InvalidatePages(const GSOffset::PageLooper& pages, uint32 psm);
void RemoveAll(); void RemoveAll();
void IncAge(); void IncAge();

View File

@ -21,7 +21,7 @@
#include "GSLocalMemory.h" #include "GSLocalMemory.h"
GSLocalMemory::psm_t GSLocalMemory::m_psm[64]; GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm) GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
{ {
abort(); abort();
} }