mirror of https://github.com/PCSX2/pcsx2.git
GS: Switch to new non-cached GSOffset
This commit is contained in:
parent
d9defb19f9
commit
8eb50c3517
|
@ -196,16 +196,14 @@ void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& T
|
||||||
template <int n>
|
template <int n>
|
||||||
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
|
||||||
|
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
uint32* RESTRICT s = &m_mem->m_vm32[off->pixel.row[TEXCLUT.COV]];
|
|
||||||
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
|
|
||||||
|
|
||||||
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
|
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; pa.incX(), i++)
|
||||||
{
|
{
|
||||||
uint32 c = s[col[i]];
|
uint32 c = m_mem->m_vm32[pa.value()];
|
||||||
|
|
||||||
clut[i] = (uint16)(c & 0xffff);
|
clut[i] = (uint16)(c & 0xffff);
|
||||||
clut[i + 256] = (uint16)(c >> 16);
|
clut[i + 256] = (uint16)(c >> 16);
|
||||||
|
@ -215,32 +213,28 @@ void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCL
|
||||||
template <int n>
|
template <int n>
|
||||||
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
|
||||||
|
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
uint16* RESTRICT s = &m_mem->m_vm16[off->pixel.row[TEXCLUT.COV]];
|
|
||||||
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
|
|
||||||
|
|
||||||
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; pa.incX(), i++)
|
||||||
{
|
{
|
||||||
clut[i] = s[col[i]];
|
clut[i] = m_mem->m_vm16[pa.value()];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int n>
|
template <int n>
|
||||||
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
|
||||||
|
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
uint16* RESTRICT s = &m_mem->m_vm16[off->pixel.row[TEXCLUT.COV]];
|
|
||||||
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
|
|
||||||
|
|
||||||
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; pa.incX(), i++)
|
||||||
{
|
{
|
||||||
clut[i] = s[col[i]];
|
clut[i] = m_mem->m_vm16[pa.value()];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,9 +48,9 @@ public:
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
GSOffset* fb;
|
GSOffset fb;
|
||||||
GSOffset* zb;
|
GSOffset zb;
|
||||||
GSOffset* tex;
|
GSOffset tex;
|
||||||
GSPixelOffset* fzb;
|
GSPixelOffset* fzb;
|
||||||
GSPixelOffset4* fzb4;
|
GSPixelOffset4* fzb4;
|
||||||
} offset;
|
} offset;
|
||||||
|
|
|
@ -35,6 +35,28 @@
|
||||||
|
|
||||||
#define FOREACH_BLOCK_END }}
|
#define FOREACH_BLOCK_END }}
|
||||||
|
|
||||||
|
template <typename Fn>
|
||||||
|
static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector4i& r, uint8* dst, int dstpitch, int bpp, Fn&& fn)
|
||||||
|
{
|
||||||
|
ASSERT(off.isBlockAligned(r));
|
||||||
|
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
|
||||||
|
int right = r.right >> off.blockShiftX();
|
||||||
|
int bottom = r.bottom >> off.blockShiftY();
|
||||||
|
|
||||||
|
int offset = dstpitch << off.blockShiftY();
|
||||||
|
int xAdd = (1 << off.blockShiftX()) * (bpp / 8);
|
||||||
|
|
||||||
|
for (; bn.blkY() < bottom; bn.nextBlockY(), dst += offset)
|
||||||
|
{
|
||||||
|
for (int x = 0; bn.blkX() < right; bn.nextBlockX(), x += xAdd)
|
||||||
|
{
|
||||||
|
const uint8* src = mem->BlockPtr(bn.value());
|
||||||
|
uint8* read_dst = dst + x;
|
||||||
|
fn(read_dst, src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
uint32 GSLocalMemory::pageOffset32[32][32][64];
|
uint32 GSLocalMemory::pageOffset32[32][32][64];
|
||||||
|
@ -487,22 +509,9 @@ GSLocalMemory::~GSLocalMemory()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
||||||
{
|
{
|
||||||
uint32 hash = bp | (bw << 14) | (psm << 20);
|
return GSOffset(m_psm[psm].info, bp, bw, psm);
|
||||||
|
|
||||||
auto i = m_omap.find(hash);
|
|
||||||
|
|
||||||
if (i != m_omap.end())
|
|
||||||
{
|
|
||||||
return i->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
GSOffset* off = new GSOffset(bp, bw, psm);
|
|
||||||
|
|
||||||
m_omap[hash] = off;
|
|
||||||
|
|
||||||
return off;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
||||||
|
@ -629,19 +638,18 @@ std::vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
|
||||||
int tw = std::max<int>(1 << TEX0.TW, bs.x);
|
int tw = std::max<int>(1 << TEX0.TW, bs.x);
|
||||||
int th = std::max<int>(1 << TEX0.TH, bs.y);
|
int th = std::max<int>(1 << TEX0.TH, bs.y);
|
||||||
|
|
||||||
const GSOffset* off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
GSOffset off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
|
GSOffset::BNHelper bn = off.bnMulti(0, 0);
|
||||||
|
|
||||||
std::unordered_map<uint32, std::unordered_set<uint32>> tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
|
std::unordered_map<uint32, std::unordered_set<uint32>> tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
|
||||||
|
|
||||||
for (int y = 0; y < th; y += bs.y)
|
for (; bn.blkY() < (th >> off.blockShiftY()); bn.nextBlockY())
|
||||||
{
|
{
|
||||||
uint32 base = off->block.row[y >> 3];
|
for (; bn.blkX() < (tw >> off.blockShiftX()); bn.nextBlockX())
|
||||||
|
|
||||||
for (int x = 0, i = y << 7; x < tw; x += bs.x, i += bs.x)
|
|
||||||
{
|
{
|
||||||
uint32 page = ((base + off->block.col[x >> 3]) >> 5) % MAX_PAGES;
|
uint32 page = (bn.value() >> 5) % MAX_PAGES;
|
||||||
|
|
||||||
tmp[page].insert(i >> 3); // ((y << 7) | x) >> 3
|
tmp[page].insert((bn.blkY() << 7) + bn.blkX());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1703,42 +1711,38 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock32(src, read_dst, dstpitch);
|
GSBlock::ReadBlock32(src, read_dst, dstpitch);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
if (TEXA.AEM)
|
if (TEXA.AEM)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 16, 8, 16)
|
foreachBlock(off, this, r, dst, dstpitch, 16, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock16(src, read_dst, dstpitch);
|
GSBlock::ReadBlock16(src, read_dst, dstpitch);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
|
|
||||||
// Convert packed RGB scanline to 32 bits RGBA
|
// Convert packed RGB scanline to 32 bits RGBA
|
||||||
ASSERT(dstpitch >= r.width() * 4);
|
ASSERT(dstpitch >= r.width() * 4);
|
||||||
|
@ -1753,79 +1757,72 @@ void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVecto
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture16(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
if (TEXA.AEM)
|
if (TEXA.AEM)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const uint32* pal = m_clut;
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 16, 16, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint64* pal = m_clut;
|
const uint64* pal = m_clut;
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 32, 16, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8H(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const uint32* pal = m_clut;
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HL(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const uint32* pal = m_clut;
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HH(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const uint32* pal = m_clut;
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
|
@ -1902,9 +1899,9 @@ void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, con
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const psm_t& psm = m_psm[off->psm];
|
const psm_t& psm = m_psm[off.psm()];
|
||||||
|
|
||||||
readTexel rt = psm.rt;
|
readTexel rt = psm.rt;
|
||||||
readTexture rtx = psm.rtx;
|
readTexture rtx = psm.rtx;
|
||||||
|
@ -1913,9 +1910,9 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i&
|
||||||
{
|
{
|
||||||
GIFRegTEX0 TEX0;
|
GIFRegTEX0 TEX0;
|
||||||
|
|
||||||
TEX0.TBP0 = off->bp;
|
TEX0.TBP0 = off.bp();
|
||||||
TEX0.TBW = off->bw;
|
TEX0.TBW = off.bw();
|
||||||
TEX0.PSM = off->psm;
|
TEX0.PSM = off.psm();
|
||||||
|
|
||||||
GSVector4i cr = r.ralign<Align_Inside>(psm.bs);
|
GSVector4i cr = r.ralign<Align_Inside>(psm.bs);
|
||||||
|
|
||||||
|
@ -1981,49 +1978,44 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i&
|
||||||
|
|
||||||
// 32/8
|
// 32/8
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 16, 16, 8)
|
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock8(src, read_dst, dstpitch);
|
GSBlock::ReadBlock8(src, read_dst, dstpitch);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 32, 16, 8)
|
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4P(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4P(src, read_dst, dstpitch);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8HP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 8)
|
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HLP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 8)
|
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HHP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
FOREACH_BLOCK_START(r, 8, 8, 8)
|
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
|
||||||
}
|
});
|
||||||
FOREACH_BLOCK_END
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -2103,14 +2095,14 @@ void GSLocalMemory::SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
/// Helper for GSOffsetNew::pageLooperForRect
|
/// Helper for GSOffset::pageLooperForRect
|
||||||
struct alignas(16) TextureAligned
|
struct alignas(16) TextureAligned
|
||||||
{
|
{
|
||||||
int ox1, oy1, ox2, oy2; ///< Block-aligned outer rect (smallest rectangle containing the original that is block-aligned)
|
int ox1, oy1, ox2, oy2; ///< Block-aligned outer rect (smallest rectangle containing the original that is block-aligned)
|
||||||
int ix1, iy1, ix2, iy2; ///< Page-aligned inner rect (largest rectangle inside original that is page-aligned)
|
int ix1, iy1, ix2, iy2; ///< Page-aligned inner rect (largest rectangle inside original that is page-aligned)
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Helper for GSOffsetNew::pageLooperForRect
|
/// Helper for GSOffset::pageLooperForRect
|
||||||
TextureAligned align(const GSVector4i& rect, const GSVector2i& blockMask, const GSVector2i& pageMask, int blockShiftX, int blockShiftY)
|
TextureAligned align(const GSVector4i& rect, const GSVector2i& blockMask, const GSVector2i& pageMask, int blockShiftX, int blockShiftY)
|
||||||
{
|
{
|
||||||
GSVector4i outer = rect.ralign_presub<Align_Outside>(blockMask);
|
GSVector4i outer = rect.ralign_presub<Align_Outside>(blockMask);
|
||||||
|
@ -2149,7 +2141,7 @@ namespace
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
GSOffsetNew::PageLooper GSOffsetNew::pageLooperForRect(const GSVector4i& rect) const
|
GSOffset::PageLooper GSOffset::pageLooperForRect(const GSVector4i& rect) const
|
||||||
{
|
{
|
||||||
// Plan:
|
// Plan:
|
||||||
// - Split texture into tiles on page lines
|
// - Split texture into tiles on page lines
|
||||||
|
@ -2264,6 +2256,7 @@ GSOffsetNew::PageLooper GSOffsetNew::pageLooperForRect(const GSVector4i& rect) c
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
|
GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
|
||||||
{
|
{
|
||||||
hash = _bp | (_bw << 14) | (_psm << 20);
|
hash = _bp | (_bw << 14) | (_psm << 20);
|
||||||
|
@ -2414,3 +2407,4 @@ void* GSOffset::GetPagesAsBits(const GSVector4i& rect, void* pages)
|
||||||
|
|
||||||
return pages;
|
return pages;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
|
@ -21,38 +21,6 @@
|
||||||
#include "GSBlock.h"
|
#include "GSBlock.h"
|
||||||
#include "GSClut.h"
|
#include "GSClut.h"
|
||||||
|
|
||||||
class GSOffset : public GSAlignedClass<32>
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
struct alignas(32) Block
|
|
||||||
{
|
|
||||||
short row[256]; // yn (n = 0 8 16 ...)
|
|
||||||
short* col; // blockOffset*
|
|
||||||
};
|
|
||||||
|
|
||||||
struct alignas(32) Pixel
|
|
||||||
{
|
|
||||||
int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing)
|
|
||||||
int* col[8]; // rowOffset*
|
|
||||||
};
|
|
||||||
|
|
||||||
union { uint32 hash; struct { uint32 bp:14, bw:6, psm:6; }; };
|
|
||||||
|
|
||||||
Block block;
|
|
||||||
Pixel pixel;
|
|
||||||
|
|
||||||
std::array<uint32*, 256> pages_as_bit; // texture page coverage based on the texture size. Lazy allocated
|
|
||||||
|
|
||||||
GSOffset(uint32 bp, uint32 bw, uint32 psm);
|
|
||||||
virtual ~GSOffset();
|
|
||||||
|
|
||||||
enum { EOP = 0xffffffff };
|
|
||||||
|
|
||||||
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
|
|
||||||
void* GetPagesAsBits(const GSVector4i& rect, void* pages);
|
|
||||||
uint32* GetPagesAsBits(const GIFRegTEX0& TEX0);
|
|
||||||
};
|
|
||||||
|
|
||||||
struct GSPixelOffset
|
struct GSPixelOffset
|
||||||
{
|
{
|
||||||
// 16 bit offsets (m_vm16[...])
|
// 16 bit offsets (m_vm16[...])
|
||||||
|
@ -75,7 +43,8 @@ struct GSPixelOffset4
|
||||||
|
|
||||||
class GSSwizzleInfo;
|
class GSSwizzleInfo;
|
||||||
|
|
||||||
class GSOffsetNew {
|
class GSOffset
|
||||||
|
{
|
||||||
/// Table for storing swizzling of blocks within a page
|
/// Table for storing swizzling of blocks within a page
|
||||||
const GSBlockSwizzleTable* m_blockSwizzle;
|
const GSBlockSwizzleTable* m_blockSwizzle;
|
||||||
/// Table for storing swizzling of pixels within a page (size: uint32[PageHeight][PageWidth])
|
/// Table for storing swizzling of pixels within a page (size: uint32[PageHeight][PageWidth])
|
||||||
|
@ -88,12 +57,12 @@ class GSOffsetNew {
|
||||||
uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset
|
uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset
|
||||||
int m_bp; ///< Offset's base pointer (same measurement as GS)
|
int m_bp; ///< Offset's base pointer (same measurement as GS)
|
||||||
int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures)
|
int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures)
|
||||||
int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffsetNew algorithms)
|
int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffset algorithms)
|
||||||
public:
|
public:
|
||||||
GSOffsetNew() = default;
|
GSOffset() = default;
|
||||||
constexpr GSOffsetNew(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm);
|
constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm);
|
||||||
/// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known
|
/// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known
|
||||||
constexpr static GSOffsetNew fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm);
|
constexpr static GSOffset fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm);
|
||||||
|
|
||||||
uint32 bp() const { return m_bp; }
|
uint32 bp() const { return m_bp; }
|
||||||
uint32 bw() const { return m_bwPg << (m_pageShiftX - 6); }
|
uint32 bw() const { return m_bwPg << (m_pageShiftX - 6); }
|
||||||
|
@ -104,7 +73,7 @@ public:
|
||||||
/// Helper class for efficiently getting the numbers of multiple blocks in a scanning pattern (increment x then y)
|
/// Helper class for efficiently getting the numbers of multiple blocks in a scanning pattern (increment x then y)
|
||||||
class BNHelper
|
class BNHelper
|
||||||
{
|
{
|
||||||
const GSBlockSwizzleTable* m_blockSwizzle; ///< Block swizzle table from GSOffsetNew
|
const GSBlockSwizzleTable* m_blockSwizzle; ///< Block swizzle table from GSOffset
|
||||||
int m_baseBP; ///< bp for start of current row (to return to the origin x when advancing y)
|
int m_baseBP; ///< bp for start of current row (to return to the origin x when advancing y)
|
||||||
int m_bp; ///< bp for current position
|
int m_bp; ///< bp for current position
|
||||||
int m_baseBlkX; ///< x of origin in blocks (to return to the origin x when advancing y)
|
int m_baseBlkX; ///< x of origin in blocks (to return to the origin x when advancing y)
|
||||||
|
@ -114,7 +83,7 @@ public:
|
||||||
int m_pageMaskY; ///< mask for y value of block coordinate to get position within page (to detect page crossing)
|
int m_pageMaskY; ///< mask for y value of block coordinate to get position within page (to detect page crossing)
|
||||||
int m_addY; ///< Amount to add to bp to advance one page in y direction
|
int m_addY; ///< Amount to add to bp to advance one page in y direction
|
||||||
public:
|
public:
|
||||||
BNHelper(const GSOffsetNew& off, int x, int y)
|
BNHelper(const GSOffset& off, int x, int y)
|
||||||
{
|
{
|
||||||
m_blockSwizzle = off.m_blockSwizzle;
|
m_blockSwizzle = off.m_blockSwizzle;
|
||||||
int yAmt = ((y >> (off.m_pageShiftY - 5)) & ~0x1f) * off.m_bwPg;
|
int yAmt = ((y >> (off.m_pageShiftY - 5)) & ~0x1f) * off.m_bwPg;
|
||||||
|
@ -209,7 +178,7 @@ public:
|
||||||
int m_shift; ///< Amount to lshift page number to get element offset for the start of that page
|
int m_shift; ///< Amount to lshift page number to get element offset for the start of that page
|
||||||
public:
|
public:
|
||||||
PAHelper() = default;
|
PAHelper() = default;
|
||||||
PAHelper(const GSOffsetNew& off, int x, int y)
|
PAHelper(const GSOffset& off, int x, int y)
|
||||||
{
|
{
|
||||||
m_pixelSwizzle = off.m_pixelSwizzle + ((y & off.m_pageMask.y) << off.m_pageShiftX);
|
m_pixelSwizzle = off.m_pixelSwizzle + ((y & off.m_pageMask.y) << off.m_pageShiftX);
|
||||||
m_pageBase = (off.m_bp >> 5) + (y >> off.m_pageShiftY) * off.m_bwPg;
|
m_pageBase = (off.m_bp >> 5) + (y >> off.m_pageShiftY) * off.m_bwPg;
|
||||||
|
@ -264,7 +233,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper class for looping over the pages in a rect
|
/// Helper class for looping over the pages in a rect
|
||||||
/// Create with GSOffsetNew::pageLooperForRect
|
/// Create with GSOffset::pageLooperForRect
|
||||||
class PageLooper
|
class PageLooper
|
||||||
{
|
{
|
||||||
int firstRowPgXStart, firstRowPgXEnd; ///< Offset of start/end pages of the first line from x=0 page (only line for textures that don't cross page boundaries)
|
int firstRowPgXStart, firstRowPgXEnd; ///< Offset of start/end pages of the first line from x=0 page (only line for textures that don't cross page boundaries)
|
||||||
|
@ -274,7 +243,7 @@ public:
|
||||||
int yInc; ///< Amount to add to bp when increasing y by one page
|
int yInc; ///< Amount to add to bp when increasing y by one page
|
||||||
int yCnt; ///< Number of pages the rect covers in the y direction
|
int yCnt; ///< Number of pages the rect covers in the y direction
|
||||||
|
|
||||||
friend class GSOffsetNew;
|
friend class GSOffset;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Loop over pages, fn can return `false` to break the loop
|
/// Loop over pages, fn can return `false` to break the loop
|
||||||
|
@ -333,7 +302,7 @@ public:
|
||||||
|
|
||||||
class GSSwizzleInfo
|
class GSSwizzleInfo
|
||||||
{
|
{
|
||||||
friend class GSOffsetNew;
|
friend class GSOffset;
|
||||||
/// Table for storing swizzling of blocks within a page
|
/// Table for storing swizzling of blocks within a page
|
||||||
const GSBlockSwizzleTable* m_blockSwizzle;
|
const GSBlockSwizzleTable* m_blockSwizzle;
|
||||||
/// Table for storing swizzling of pixels within a page
|
/// Table for storing swizzling of pixels within a page
|
||||||
|
@ -367,31 +336,31 @@ public:
|
||||||
/// Get the block number of the given pixel
|
/// Get the block number of the given pixel
|
||||||
uint32 bn(int x, int y, uint32 bp, uint32 bw) const
|
uint32 bn(int x, int y, uint32 bp, uint32 bw) const
|
||||||
{
|
{
|
||||||
return GSOffsetNew(*this, bp, bw, 0).bn(x, y);
|
return GSOffset(*this, bp, bw, 0).bn(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the address of the given pixel
|
/// Get the address of the given pixel
|
||||||
uint32 pa(int x, int y, uint32 bp, uint32 bw) const
|
uint32 pa(int x, int y, uint32 bp, uint32 bw) const
|
||||||
{
|
{
|
||||||
return GSOffsetNew(*this, bp, bw, 0).pa(x, y);
|
return GSOffset(*this, bp, bw, 0).pa(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loop over all the pages in the given rect, calling `fn` on each
|
/// Loop over all the pages in the given rect, calling `fn` on each
|
||||||
template <typename Fn>
|
template <typename Fn>
|
||||||
void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
|
void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
|
||||||
{
|
{
|
||||||
GSOffsetNew(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn));
|
GSOffset(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loop over all the blocks in the given rect, calling `fn` on each
|
/// Loop over all the blocks in the given rect, calling `fn` on each
|
||||||
template <typename Fn>
|
template <typename Fn>
|
||||||
void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
|
void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
|
||||||
{
|
{
|
||||||
GSOffsetNew(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn));
|
GSOffset(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr inline GSOffsetNew::GSOffsetNew(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
|
constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
|
||||||
: m_blockSwizzle(swz.m_blockSwizzle)
|
: m_blockSwizzle(swz.m_blockSwizzle)
|
||||||
, m_pixelSwizzle(swz.m_pixelSwizzle + ((bp & 0x1f) << (swz.m_pageShiftX + swz.m_pageShiftY)))
|
, m_pixelSwizzle(swz.m_pixelSwizzle + ((bp & 0x1f) << (swz.m_pageShiftX + swz.m_pageShiftY)))
|
||||||
, m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask)
|
, m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask)
|
||||||
|
@ -417,7 +386,7 @@ public:
|
||||||
typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const;
|
typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const;
|
||||||
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
|
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
|
||||||
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
|
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
|
||||||
typedef void (GSLocalMemory::*readTexture)(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
typedef void (GSLocalMemory::*readTexture)(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||||
|
|
||||||
struct alignas(128) psm_t
|
struct alignas(128) psm_t
|
||||||
|
@ -520,7 +489,7 @@ public:
|
||||||
GSLocalMemory();
|
GSLocalMemory();
|
||||||
virtual ~GSLocalMemory();
|
virtual ~GSLocalMemory();
|
||||||
|
|
||||||
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
|
GSOffset GetOffset(uint32 bp, uint32 bw, uint32 psm);
|
||||||
GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
||||||
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
||||||
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
|
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
|
||||||
|
@ -985,75 +954,35 @@ public:
|
||||||
WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c);
|
WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void WritePixel32(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r)
|
void WritePixel32(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
src -= r.left * sizeof(uint32);
|
off.loopPixels(r, m_vm32, (uint32*)src, pitch, [&](uint32* dst, uint32* src) { *dst = *src; });
|
||||||
|
|
||||||
for (int y = r.top; y < r.bottom; y++, src += pitch)
|
|
||||||
{
|
|
||||||
uint32* RESTRICT s = (uint32*)src;
|
|
||||||
uint32* RESTRICT d = &m_vm32[off->pixel.row[y]];
|
|
||||||
int* RESTRICT col = off->pixel.col[0];
|
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x++)
|
|
||||||
{
|
|
||||||
d[col[x]] = s[x];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void WritePixel24(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r)
|
void WritePixel24(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
src -= r.left * sizeof(uint32);
|
off.loopPixels(r, m_vm32, (uint32*)src, pitch,
|
||||||
|
[&](uint32* dst, uint32* src)
|
||||||
for (int y = r.top; y < r.bottom; y++, src += pitch)
|
|
||||||
{
|
{
|
||||||
uint32* RESTRICT s = (uint32*)src;
|
*dst = (*dst & 0xff000000) | (*src & 0x00ffffff);
|
||||||
uint32* RESTRICT d = &m_vm32[off->pixel.row[y]];
|
});
|
||||||
int* RESTRICT col = off->pixel.col[0];
|
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x++)
|
|
||||||
{
|
|
||||||
d[col[x]] = (d[col[x]] & 0xff000000) | (s[x] & 0x00ffffff);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void WritePixel16(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r)
|
void WritePixel16(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
src -= r.left * sizeof(uint16);
|
off.loopPixels(r, m_vm16, (uint16*)src, pitch, [&](uint16* dst, uint16* src) { *dst = *src; });
|
||||||
|
|
||||||
for (int y = r.top; y < r.bottom; y++, src += pitch)
|
|
||||||
{
|
|
||||||
uint16* RESTRICT s = (uint16*)src;
|
|
||||||
uint16* RESTRICT d = &m_vm16[off->pixel.row[y]];
|
|
||||||
int* RESTRICT col = off->pixel.col[0];
|
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x++)
|
|
||||||
{
|
|
||||||
d[col[x]] = s[x];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void WriteFrame16(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r)
|
void WriteFrame16(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
src -= r.left * sizeof(uint32);
|
off.loopPixels(r, m_vm16, (uint32*)src, pitch,
|
||||||
|
[&](uint16* dst, uint32* src)
|
||||||
for (int y = r.top; y < r.bottom; y++, src += pitch)
|
|
||||||
{
|
{
|
||||||
uint32* RESTRICT s = (uint32*)src;
|
uint32 rb = *src & 0x00f800f8;
|
||||||
uint16* RESTRICT d = &m_vm16[off->pixel.row[y]];
|
uint32 ga = *src & 0x8000f800;
|
||||||
int* RESTRICT col = off->pixel.col[0];
|
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x++)
|
*dst = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
|
||||||
{
|
});
|
||||||
uint32 rb = s[x] & 0x00f800f8;
|
|
||||||
uint32 ga = s[x] & 0x8000f800;
|
|
||||||
|
|
||||||
d[col[x]] = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline uint32 ReadTexel32(uint32 addr, const GIFRegTEXA& TEXA) const
|
__forceinline uint32 ReadTexel32(uint32 addr, const GIFRegTEXA& TEXA) const
|
||||||
|
@ -1191,17 +1120,17 @@ public:
|
||||||
|
|
||||||
// * => 32
|
// * => 32
|
||||||
|
|
||||||
void ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture16(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture8(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture4(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture8H(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture4HL(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture4HH(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
|
|
||||||
void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
|
|
||||||
void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||||
void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||||
|
@ -1214,11 +1143,11 @@ public:
|
||||||
|
|
||||||
// pal ? 8 : 32
|
// pal ? 8 : 32
|
||||||
|
|
||||||
void ReadTexture8P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture4P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture8HP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture4HLP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
void ReadTexture4HHP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
|
|
||||||
void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||||
void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||||
|
@ -1229,31 +1158,31 @@ public:
|
||||||
//
|
//
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
void ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h);
|
void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h);
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr inline GSOffsetNew GSOffsetNew::fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm)
|
constexpr inline GSOffset GSOffset::fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm)
|
||||||
{
|
{
|
||||||
switch (psm)
|
switch (psm)
|
||||||
{
|
{
|
||||||
case PSM_PSMCT32: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
|
case PSM_PSMCT32: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
|
||||||
case PSM_PSMCT24: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
|
case PSM_PSMCT24: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
|
||||||
case PSM_PSMCT16: return GSOffsetNew(GSLocalMemory::swizzle16, bp, bw, psm);
|
case PSM_PSMCT16: return GSOffset(GSLocalMemory::swizzle16, bp, bw, psm);
|
||||||
case PSM_PSMCT16S: return GSOffsetNew(GSLocalMemory::swizzle16S, bp, bw, psm);
|
case PSM_PSMCT16S: return GSOffset(GSLocalMemory::swizzle16S, bp, bw, psm);
|
||||||
case PSM_PSGPU24: return GSOffsetNew(GSLocalMemory::swizzle16, bp, bw, psm);
|
case PSM_PSGPU24: return GSOffset(GSLocalMemory::swizzle16, bp, bw, psm);
|
||||||
case PSM_PSMT8: return GSOffsetNew(GSLocalMemory::swizzle8, bp, bw, psm);
|
case PSM_PSMT8: return GSOffset(GSLocalMemory::swizzle8, bp, bw, psm);
|
||||||
case PSM_PSMT4: return GSOffsetNew(GSLocalMemory::swizzle4, bp, bw, psm);
|
case PSM_PSMT4: return GSOffset(GSLocalMemory::swizzle4, bp, bw, psm);
|
||||||
case PSM_PSMT8H: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
|
case PSM_PSMT8H: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
|
||||||
case PSM_PSMT4HL: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
|
case PSM_PSMT4HL: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
|
||||||
case PSM_PSMT4HH: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
|
case PSM_PSMT4HH: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
|
||||||
case PSM_PSMZ32: return GSOffsetNew(GSLocalMemory::swizzle32Z, bp, bw, psm);
|
case PSM_PSMZ32: return GSOffset(GSLocalMemory::swizzle32Z, bp, bw, psm);
|
||||||
case PSM_PSMZ24: return GSOffsetNew(GSLocalMemory::swizzle32Z, bp, bw, psm);
|
case PSM_PSMZ24: return GSOffset(GSLocalMemory::swizzle32Z, bp, bw, psm);
|
||||||
case PSM_PSMZ16: return GSOffsetNew(GSLocalMemory::swizzle16Z, bp, bw, psm);
|
case PSM_PSMZ16: return GSOffset(GSLocalMemory::swizzle16Z, bp, bw, psm);
|
||||||
case PSM_PSMZ16S: return GSOffsetNew(GSLocalMemory::swizzle16SZ, bp, bw, psm);
|
case PSM_PSMZ16S: return GSOffset(GSLocalMemory::swizzle16SZ, bp, bw, psm);
|
||||||
}
|
}
|
||||||
return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
|
return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1650,182 +1650,88 @@ void GSState::Move()
|
||||||
|
|
||||||
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
|
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
|
||||||
|
|
||||||
GSOffset* RESTRICT spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
|
GSOffset spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
|
||||||
GSOffset* RESTRICT dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
|
GSOffset dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
|
||||||
|
|
||||||
|
auto copy = [&](auto&& pxCopyFn)
|
||||||
|
{
|
||||||
|
if (xinc > 0)
|
||||||
|
{
|
||||||
|
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
||||||
|
{
|
||||||
|
GSOffset::PAHelper s = spo.paMulti(sx, sy);
|
||||||
|
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
|
||||||
|
|
||||||
|
for (int x = 0; x < w; x++)
|
||||||
|
{
|
||||||
|
pxCopyFn(d.value(), s.value());
|
||||||
|
s.incX();
|
||||||
|
d.incX();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
||||||
|
{
|
||||||
|
GSOffset::PAHelper s = spo.paMulti(sx, sy);
|
||||||
|
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
|
||||||
|
|
||||||
|
for (int x = 0; x < w; x++)
|
||||||
|
{
|
||||||
|
pxCopyFn(d.value(), s.value());
|
||||||
|
s.decX();
|
||||||
|
d.decX();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
|
if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
|
||||||
{
|
{
|
||||||
int* RESTRICT scol = &spo->pixel.col[0][sx];
|
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[0][dx];
|
|
||||||
|
|
||||||
if (spsm.trbpp == 32)
|
if (spsm.trbpp == 32)
|
||||||
{
|
{
|
||||||
if (xinc > 0)
|
copy([&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
m_mem.m_vm32[doff] = m_mem.m_vm32[soff];
|
||||||
{
|
});
|
||||||
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
|
|
||||||
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
|
|
||||||
|
|
||||||
for (int x = 0; x < w; x++)
|
|
||||||
d[dcol[x]] = s[scol[x]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
|
||||||
{
|
|
||||||
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
|
|
||||||
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
|
|
||||||
|
|
||||||
for (int x = 0; x > -w; x--)
|
|
||||||
d[dcol[x]] = s[scol[x]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (spsm.trbpp == 24)
|
else if (spsm.trbpp == 24)
|
||||||
{
|
{
|
||||||
if (xinc > 0)
|
copy([&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
uint32& d = m_mem.m_vm32[doff];
|
||||||
{
|
d = (d & 0xff000000) | (m_mem.m_vm32[soff] & 0x00ffffff);
|
||||||
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
|
});
|
||||||
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
|
|
||||||
|
|
||||||
for (int x = 0; x < w; x++)
|
|
||||||
d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
|
||||||
{
|
|
||||||
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
|
|
||||||
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
|
|
||||||
|
|
||||||
for (int x = 0; x > -w; x--)
|
|
||||||
d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else // if(spsm.trbpp == 16)
|
else // if(spsm.trbpp == 16)
|
||||||
{
|
{
|
||||||
if (xinc > 0)
|
copy([&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
m_mem.m_vm16[doff] = m_mem.m_vm16[soff];
|
||||||
{
|
});
|
||||||
uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]];
|
|
||||||
uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]];
|
|
||||||
|
|
||||||
for (int x = 0; x < w; x++)
|
|
||||||
d[dcol[x]] = s[scol[x]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
|
||||||
{
|
|
||||||
uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]];
|
|
||||||
uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]];
|
|
||||||
|
|
||||||
for (int x = 0; x > -w; x--)
|
|
||||||
d[dcol[x]] = s[scol[x]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
||||||
{
|
{
|
||||||
if (xinc > 0)
|
copy([&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
m_mem.m_vm8[doff] = m_mem.m_vm8[soff];
|
||||||
{
|
});
|
||||||
uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]];
|
|
||||||
uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]];
|
|
||||||
|
|
||||||
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
|
||||||
|
|
||||||
for (int x = 0; x < w; x++)
|
|
||||||
d[dcol[x]] = s[scol[x]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
|
||||||
{
|
|
||||||
uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]];
|
|
||||||
uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]];
|
|
||||||
|
|
||||||
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
|
||||||
|
|
||||||
for (int x = 0; x > -w; x--)
|
|
||||||
d[dcol[x]] = s[scol[x]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
||||||
{
|
{
|
||||||
if (xinc > 0)
|
copy([&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
|
||||||
{
|
});
|
||||||
uint32 sbase = spo->pixel.row[sy];
|
|
||||||
uint32 dbase = dpo->pixel.row[dy];
|
|
||||||
|
|
||||||
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
|
||||||
|
|
||||||
for (int x = 0; x < w; x++)
|
|
||||||
m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
|
||||||
{
|
|
||||||
uint32 sbase = spo->pixel.row[sy];
|
|
||||||
uint32 dbase = dpo->pixel.row[dy];
|
|
||||||
|
|
||||||
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
|
||||||
|
|
||||||
for (int x = 0; x > -w; x--)
|
|
||||||
m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (xinc > 0)
|
copy([&](uint32 doff, uint32 soff)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
|
||||||
{
|
});
|
||||||
uint32 sbase = spo->pixel.row[sy];
|
|
||||||
uint32 dbase = dpo->pixel.row[dy];
|
|
||||||
|
|
||||||
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
|
||||||
|
|
||||||
for (int x = 0; x < w; x++)
|
|
||||||
(m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
|
||||||
{
|
|
||||||
uint32 sbase = spo->pixel.row[sy];
|
|
||||||
uint32 dbase = dpo->pixel.row[dy];
|
|
||||||
|
|
||||||
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
|
||||||
|
|
||||||
for (int x = 0; x > -w; x--)
|
|
||||||
(m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -79,7 +79,7 @@ void GSTextureCache11::Read(Target* t, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
// TODO: block level write
|
// TODO: block level write
|
||||||
|
|
||||||
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
|
|
||||||
switch (TEX0.PSM)
|
switch (TEX0.PSM)
|
||||||
{
|
{
|
||||||
|
@ -124,7 +124,7 @@ void GSTextureCache11::Read(Source* t, const GSVector4i& r)
|
||||||
|
|
||||||
if (offscreen->Map(m, &r_offscreen))
|
if (offscreen->Map(m, &r_offscreen))
|
||||||
{
|
{
|
||||||
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
|
|
||||||
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
|
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
|
||||||
|
|
||||||
|
|
|
@ -885,11 +885,8 @@ void GSRendererHW::SwSpriteRender()
|
||||||
InvalidateLocalMem(bitbltbuf, GSVector4i(sx, sy, sx + w, sy + h));
|
InvalidateLocalMem(bitbltbuf, GSVector4i(sx, sy, sx + w, sy + h));
|
||||||
InvalidateVideoMem(bitbltbuf, GSVector4i(dx, dy, dx + w, dy + h));
|
InvalidateVideoMem(bitbltbuf, GSVector4i(dx, dy, dx + w, dy + h));
|
||||||
|
|
||||||
GSOffset* RESTRICT spo = texture_mapping_enabled ? m_mem.GetOffset(bitbltbuf.SBP, bitbltbuf.SBW, bitbltbuf.SPSM) : nullptr;
|
GSOffset spo = texture_mapping_enabled ? m_mem.GetOffset(bitbltbuf.SBP, bitbltbuf.SBW, bitbltbuf.SPSM) : GSOffset();
|
||||||
GSOffset* RESTRICT dpo = m_mem.GetOffset(bitbltbuf.DBP, bitbltbuf.DBW, bitbltbuf.DPSM);
|
GSOffset dpo = m_mem.GetOffset(bitbltbuf.DBP, bitbltbuf.DBW, bitbltbuf.DPSM);
|
||||||
|
|
||||||
const int* RESTRICT scol = texture_mapping_enabled ? &spo->pixel.col[0][sx] : nullptr;
|
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[0][dx];
|
|
||||||
|
|
||||||
const bool alpha_blending_enabled = PRIM->ABE;
|
const bool alpha_blending_enabled = PRIM->ABE;
|
||||||
|
|
||||||
|
@ -910,19 +907,27 @@ void GSRendererHW::SwSpriteRender()
|
||||||
|
|
||||||
for (int y = 0; y < h; y++, ++sy, ++dy)
|
for (int y = 0; y < h; y++, ++sy, ++dy)
|
||||||
{
|
{
|
||||||
const uint32* RESTRICT s = texture_mapping_enabled ? &m_mem.m_vm32[spo->pixel.row[sy]] : nullptr;
|
GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sx, sy) : GSOffset::PAHelper();
|
||||||
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
|
GSOffset::PAHelper dpa = dpo.paMulti(dx, dy);
|
||||||
|
|
||||||
ASSERT(w % 2 == 0);
|
ASSERT(w % 2 == 0);
|
||||||
|
|
||||||
for (int x = 0; x < w; x += 2)
|
for (int x = 0; x < w; x += 2)
|
||||||
{
|
{
|
||||||
|
uint32 di = dpa.value();
|
||||||
|
dpa.incX();
|
||||||
|
ASSERT(di + 1 == dpa.value()); // Destination pixel pair is adjacent in memory
|
||||||
|
dpa.incX();
|
||||||
|
|
||||||
GSVector4i sc;
|
GSVector4i sc;
|
||||||
if (texture_mapping_enabled)
|
if (texture_mapping_enabled)
|
||||||
{
|
{
|
||||||
|
uint32 si = spa.value();
|
||||||
|
spa.incX();
|
||||||
// Read 2 source pixel colors
|
// Read 2 source pixel colors
|
||||||
ASSERT((scol[x] + 1) == scol[x + 1]); // Source pixel pair is adjacent in memory
|
ASSERT((si + 1) == spa.value()); // Source pixel pair is adjacent in memory
|
||||||
sc = GSVector4i::loadl(&s[scol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
spa.incX();
|
||||||
|
sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
||||||
|
|
||||||
// Apply TFX
|
// Apply TFX
|
||||||
ASSERT(tex0_tfx == 0 || tex0_tfx == 1);
|
ASSERT(tex0_tfx == 0 || tex0_tfx == 1);
|
||||||
|
@ -943,8 +948,7 @@ void GSRendererHW::SwSpriteRender()
|
||||||
if (alpha_blending_enabled || fb_mask_enabled)
|
if (alpha_blending_enabled || fb_mask_enabled)
|
||||||
{
|
{
|
||||||
// Read 2 destination pixel colors
|
// Read 2 destination pixel colors
|
||||||
ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory
|
dc0 = GSVector4i::loadl(&m_mem.m_vm32[di]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
||||||
dc0 = GSVector4i::loadl(&d[dcol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (alpha_blending_enabled)
|
if (alpha_blending_enabled)
|
||||||
|
@ -999,8 +1003,7 @@ void GSRendererHW::SwSpriteRender()
|
||||||
|
|
||||||
// Store 2 pixel colors
|
// Store 2 pixel colors
|
||||||
dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr
|
dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr
|
||||||
ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory
|
GSVector4i::storel(&m_mem.m_vm32[di], dc);
|
||||||
GSVector4i::storel(&d[dcol[x]], dc);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1806,7 +1809,7 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Limit it further to a full screen 0 write
|
// Limit it further to a full screen 0 write
|
||||||
if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0)))
|
if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0)))
|
||||||
{
|
{
|
||||||
GSOffset* off = m_context->offset.fb;
|
const GSOffset& off = m_context->offset.fb;
|
||||||
const GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
|
const GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
|
||||||
// Limit the hack to a single fullscreen clear. Some games might use severals column to clear a screen
|
// Limit the hack to a single fullscreen clear. Some games might use severals column to clear a screen
|
||||||
// but hopefully it will be enough.
|
// but hopefully it will be enough.
|
||||||
|
@ -1824,12 +1827,11 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel32
|
// Based on WritePixel32
|
||||||
for (int y = r.top; y < r.bottom; y++)
|
for (int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]];
|
GSOffset::PAHelper pa = off.paMulti(r.left, y);
|
||||||
int* RESTRICT col = off->pixel.col[0];
|
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x++)
|
for (; pa.x() < r.right; pa.incX())
|
||||||
{
|
{
|
||||||
d[col[x]] = 0; // Here the constant color
|
m_mem.m_vm32[pa.value()] = 0; // Here the constant color
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1838,12 +1840,11 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel24
|
// Based on WritePixel24
|
||||||
for (int y = r.top; y < r.bottom; y++)
|
for (int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]];
|
GSOffset::PAHelper pa = off.paMulti(r.left, y);
|
||||||
int* RESTRICT col = off->pixel.col[0];
|
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x++)
|
for (; pa.x() < r.right; pa.incX())
|
||||||
{
|
{
|
||||||
d[col[x]] &= 0xff000000; // Clear the color
|
m_mem.m_vm32[pa.value()] &= 0xff000000; // Clear the color
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1854,12 +1855,11 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel16
|
// Based on WritePixel16
|
||||||
for(int y = r.top; y < r.bottom; y++)
|
for(int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
uint32* RESTRICT d = &m_mem.m_vm16[off->pixel.row[y]];
|
GSOffset::PAHelper pa = off.paMulti(r.left, y);
|
||||||
int* RESTRICT col = off->pixel.col[0];
|
|
||||||
|
|
||||||
for(int x = r.left; x < r.right; x++)
|
for(int x = r.left; x < r.right; x++)
|
||||||
{
|
{
|
||||||
d[col[x]] = 0; // Here the constant color
|
m_mem.m_vm16[pa.value()] = 0; // Here the constant color
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -797,14 +797,11 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
|
||||||
|
|
||||||
// Goal: invalidate data sent to the GPU when the source (GS memory) is modified
|
// Goal: invalidate data sent to the GPU when the source (GS memory) is modified
|
||||||
// Called each time you want to write to the GS memory
|
// Called each time you want to write to the GS memory
|
||||||
void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target)
|
void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& rect, bool target)
|
||||||
{
|
{
|
||||||
if (!off)
|
uint32 bp = off.bp();
|
||||||
return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
|
uint32 bw = off.bw();
|
||||||
|
uint32 psm = off.psm();
|
||||||
uint32 bp = off->bp;
|
|
||||||
uint32 bw = off->bw;
|
|
||||||
uint32 psm = off->psm;
|
|
||||||
|
|
||||||
if (!target)
|
if (!target)
|
||||||
{
|
{
|
||||||
|
@ -871,18 +868,12 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVector4i r;
|
|
||||||
|
|
||||||
uint32* pages = (uint32*)m_temp;
|
|
||||||
|
|
||||||
off->GetPages(rect, pages, &r);
|
|
||||||
|
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
|
||||||
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
|
GSVector4i r = rect.ralign<Align_Outside>((bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs);
|
||||||
{
|
|
||||||
uint32 page = *p;
|
|
||||||
|
|
||||||
|
off.loopPages(rect, [&](uint32 page)
|
||||||
|
{
|
||||||
auto& list = m_src.m_map[page];
|
auto& list = m_src.m_map[page];
|
||||||
for (auto i = list.begin(); i != list.end();)
|
for (auto i = list.begin(); i != list.end();)
|
||||||
{
|
{
|
||||||
|
@ -937,7 +928,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
if (!target)
|
if (!target)
|
||||||
return;
|
return;
|
||||||
|
@ -1045,10 +1036,10 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
|
||||||
|
|
||||||
// Goal: retrive the data from the GPU to the GS memory.
|
// Goal: retrive the data from the GPU to the GS memory.
|
||||||
// Called each time you want to read from the GS memory
|
// Called each time you want to read from the GS memory
|
||||||
void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r)
|
void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
uint32 bp = off->bp;
|
uint32 bp = off.bp();
|
||||||
uint32 psm = off->psm;
|
uint32 psm = off.psm();
|
||||||
//uint32 bw = off->bw;
|
//uint32 bw = off->bw;
|
||||||
|
|
||||||
// No depth handling please.
|
// No depth handling please.
|
||||||
|
@ -1777,8 +1768,7 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR
|
||||||
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
|
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
|
||||||
}
|
}
|
||||||
|
|
||||||
GSOffset* off = m_renderer->m_context->offset.tex;
|
m_pages = m_renderer->m_context->offset.tex.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||||
m_pages_as_bit = off->GetPagesAsBits(m_TEX0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1808,23 +1798,23 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
|
||||||
m_complete = true; // lame, but better than nothing
|
m_complete = true; // lame, but better than nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
const GSOffset* off = m_renderer->m_context->offset.tex;
|
const GSOffset& off = m_renderer->m_context->offset.tex;
|
||||||
|
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
|
||||||
|
|
||||||
uint32 blocks = 0;
|
uint32 blocks = 0;
|
||||||
|
|
||||||
if (m_repeating)
|
if (m_repeating)
|
||||||
{
|
{
|
||||||
for (int y = r.top; y < r.bottom; y += bs.y)
|
for (int y = r.top; y < r.bottom; y += bs.y, bn.nextBlockY())
|
||||||
{
|
{
|
||||||
uint32 base = off->block.row[y >> 3u];
|
for (int x = r.left; x < r.right; bn.nextBlockX(), x += bs.x)
|
||||||
|
|
||||||
for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
|
|
||||||
{
|
{
|
||||||
uint32 block = base + off->block.col[x >> 3u];
|
int i = (bn.blkY() << 7) + bn.blkX();
|
||||||
|
uint32 block = bn.valueNoWrap();
|
||||||
|
|
||||||
if (block < MAX_BLOCKS || m_wrap_gs_mem)
|
if (block < MAX_BLOCKS || m_wrap_gs_mem)
|
||||||
{
|
{
|
||||||
uint32 addr = (i >> 3u) % MAX_BLOCKS;
|
uint32 addr = i % MAX_BLOCKS;
|
||||||
|
|
||||||
uint32 row = addr >> 5u;
|
uint32 row = addr >> 5u;
|
||||||
uint32 col = 1 << (addr & 31u);
|
uint32 col = 1 << (addr & 31u);
|
||||||
|
@ -1843,13 +1833,11 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int y = r.top; y < r.bottom; y += bs.y)
|
for (int y = r.top; y < r.bottom; y += bs.y, bn.nextBlockY())
|
||||||
{
|
{
|
||||||
uint32 base = off->block.row[y >> 3u];
|
for (int x = r.left; x < r.right; x += bs.x, bn.nextBlockX())
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x += bs.x)
|
|
||||||
{
|
{
|
||||||
uint32 block = base + off->block.col[x >> 3u];
|
uint32 block = bn.valueNoWrap();
|
||||||
|
|
||||||
if (block < MAX_BLOCKS || m_wrap_gs_mem)
|
if (block < MAX_BLOCKS || m_wrap_gs_mem)
|
||||||
{
|
{
|
||||||
|
@ -1951,7 +1939,7 @@ void GSTextureCache::Source::Flush(uint32 count, int layer)
|
||||||
|
|
||||||
GSLocalMemory& mem = m_renderer->m_mem;
|
GSLocalMemory& mem = m_renderer->m_mem;
|
||||||
|
|
||||||
const GSOffset* off = m_renderer->m_context->offset.tex;
|
const GSOffset& off = m_renderer->m_context->offset.tex;
|
||||||
|
|
||||||
GSLocalMemory::readTexture rtx = psm.rtx;
|
GSLocalMemory::readTexture rtx = psm.rtx;
|
||||||
|
|
||||||
|
@ -2079,7 +2067,7 @@ void GSTextureCache::Target::Update()
|
||||||
|
|
||||||
GSTexture* t = m_renderer->m_dev->CreateTexture(w, h);
|
GSTexture* t = m_renderer->m_dev->CreateTexture(w, h);
|
||||||
|
|
||||||
const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
|
GSOffset off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
|
||||||
|
|
||||||
GSTexture::GSMap m;
|
GSTexture::GSMap m;
|
||||||
|
|
||||||
|
@ -2130,7 +2118,7 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect)
|
||||||
|
|
||||||
// GSTextureCache::SourceMap
|
// GSTextureCache::SourceMap
|
||||||
|
|
||||||
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off)
|
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset& off)
|
||||||
{
|
{
|
||||||
m_surfaces.insert(s);
|
m_surfaces.insert(s);
|
||||||
|
|
||||||
|
@ -2147,26 +2135,10 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
|
||||||
}
|
}
|
||||||
|
|
||||||
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
||||||
for (size_t i = 0; i < countof(m_pages); i++)
|
s->m_pages.loopPages([this, s](uint32 page)
|
||||||
{
|
{
|
||||||
if (uint32 p = s->m_pages_as_bit[i])
|
s->m_erase_it[page] = m_map[page].InsertFront(s);
|
||||||
{
|
});
|
||||||
auto* m = &m_map[i << 5];
|
|
||||||
auto* e = &s->m_erase_it[i << 5];
|
|
||||||
|
|
||||||
unsigned long j;
|
|
||||||
|
|
||||||
while (_BitScanForward(&j, p))
|
|
||||||
{
|
|
||||||
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
|
|
||||||
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
|
|
||||||
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
|
|
||||||
p ^= 1U << j;
|
|
||||||
|
|
||||||
e[j] = m[j].InsertFront(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSTextureCache::SourceMap::RemoveAll()
|
void GSTextureCache::SourceMap::RemoveAll()
|
||||||
|
@ -2197,26 +2169,10 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < countof(m_pages); i++)
|
s->m_pages.loopPages([this, s](uint32 page)
|
||||||
{
|
{
|
||||||
if (uint32 p = s->m_pages_as_bit[i])
|
m_map[page].EraseIndex(s->m_erase_it[page]);
|
||||||
{
|
});
|
||||||
auto* m = &m_map[i << 5];
|
|
||||||
const auto* e = &s->m_erase_it[i << 5];
|
|
||||||
|
|
||||||
unsigned long j;
|
|
||||||
|
|
||||||
while (_BitScanForward(&j, p))
|
|
||||||
{
|
|
||||||
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
|
|
||||||
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
|
|
||||||
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
|
|
||||||
p ^= 1U << j;
|
|
||||||
|
|
||||||
m[j].EraseIndex(e[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
delete s;
|
delete s;
|
||||||
|
|
|
@ -125,7 +125,7 @@ public:
|
||||||
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
|
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
|
||||||
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
|
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
|
||||||
std::array<uint16, MAX_PAGES> m_erase_it;
|
std::array<uint16, MAX_PAGES> m_erase_it;
|
||||||
uint32* m_pages_as_bit;
|
GSOffset::PageLooper m_pages;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false);
|
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false);
|
||||||
|
@ -189,7 +189,7 @@ public:
|
||||||
memset(m_pages, 0, sizeof(m_pages));
|
memset(m_pages, 0, sizeof(m_pages));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off);
|
void Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset& off);
|
||||||
void RemoveAll();
|
void RemoveAll();
|
||||||
void RemovePartial();
|
void RemovePartial();
|
||||||
void RemoveAt(Source* s);
|
void RemoveAt(Source* s);
|
||||||
|
@ -249,8 +249,8 @@ public:
|
||||||
|
|
||||||
void InvalidateVideoMemType(int type, uint32 bp);
|
void InvalidateVideoMemType(int type, uint32 bp);
|
||||||
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
|
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
|
||||||
void InvalidateVideoMem(GSOffset* off, const GSVector4i& r, bool target = true);
|
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);
|
||||||
void InvalidateLocalMem(GSOffset* off, const GSVector4i& r);
|
void InvalidateLocalMem(const GSOffset& off, const GSVector4i& r);
|
||||||
|
|
||||||
void IncAge();
|
void IncAge();
|
||||||
bool UserHacks_HalfPixelOffset;
|
bool UserHacks_HalfPixelOffset;
|
||||||
|
|
|
@ -78,7 +78,7 @@ void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
// TODO: block level write
|
// TODO: block level write
|
||||||
|
|
||||||
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
|
|
||||||
switch (TEX0.PSM)
|
switch (TEX0.PSM)
|
||||||
{
|
{
|
||||||
|
@ -125,7 +125,7 @@ void GSTextureCacheOGL::Read(Source* t, const GSVector4i& r)
|
||||||
|
|
||||||
if (offscreen->Map(m, &r_offscreen))
|
if (offscreen->Map(m, &r_offscreen))
|
||||||
{
|
{
|
||||||
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
|
|
||||||
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
|
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
|
||||||
|
|
||||||
|
|
|
@ -2811,31 +2811,28 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
|
|
||||||
if (m != 0xffffffff)
|
if (m != 0xffffffff)
|
||||||
{
|
{
|
||||||
const int* zbr = m_global.zbr;
|
|
||||||
const int* zbc = m_global.zbc;
|
|
||||||
|
|
||||||
uint32 z = v.t.u32[3]; // (uint32)v.p.z;
|
uint32 z = v.t.u32[3]; // (uint32)v.p.z;
|
||||||
|
|
||||||
if (m_global.sel.zpsm != 2)
|
if (m_global.sel.zpsm != 2)
|
||||||
{
|
{
|
||||||
if (m == 0)
|
if (m == 0)
|
||||||
{
|
{
|
||||||
DrawRectT<uint32, false>(zbr, zbc, r, z, m);
|
DrawRectT<uint32, false>(m_global.zbo, r, z, m);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DrawRectT<uint32, true>(zbr, zbc, r, z, m);
|
DrawRectT<uint32, true>(m_global.zbo, r, z, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ((m & 0xffff) == 0)
|
if ((m & 0xffff) == 0)
|
||||||
{
|
{
|
||||||
DrawRectT<uint16, false>(zbr, zbc, r, z, m);
|
DrawRectT<uint16, false>(m_global.zbo, r, z, m);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DrawRectT<uint16, true>(zbr, zbc, r, z, m);
|
DrawRectT<uint16, true>(m_global.zbo, r, z, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2848,9 +2845,6 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
|
|
||||||
if (m != 0xffffffff)
|
if (m != 0xffffffff)
|
||||||
{
|
{
|
||||||
const int* fbr = m_global.fbr;
|
|
||||||
const int* fbc = m_global.fbc;
|
|
||||||
|
|
||||||
uint32 c = (GSVector4i(v.c) >> 7).rgba32();
|
uint32 c = (GSVector4i(v.c) >> 7).rgba32();
|
||||||
|
|
||||||
if (m_global.sel.fba)
|
if (m_global.sel.fba)
|
||||||
|
@ -2862,11 +2856,11 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
{
|
{
|
||||||
if (m == 0)
|
if (m == 0)
|
||||||
{
|
{
|
||||||
DrawRectT<uint32, false>(fbr, fbc, r, c, m);
|
DrawRectT<uint32, false>(m_global.fbo, r, c, m);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DrawRectT<uint32, true>(fbr, fbc, r, c, m);
|
DrawRectT<uint32, true>(m_global.fbo, r, c, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -2875,18 +2869,18 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
|
|
||||||
if ((m & 0xffff) == 0)
|
if ((m & 0xffff) == 0)
|
||||||
{
|
{
|
||||||
DrawRectT<uint16, false>(fbr, fbc, r, c, m);
|
DrawRectT<uint16, false>(m_global.fbo, r, c, m);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DrawRectT<uint16, true>(fbr, fbc, r, c, m);
|
DrawRectT<uint16, true>(m_global.fbo, r, c, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
|
void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m)
|
||||||
{
|
{
|
||||||
if (m == 0xffffffff)
|
if (m == 0xffffffff)
|
||||||
return;
|
return;
|
||||||
|
@ -2921,25 +2915,25 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
|
||||||
|
|
||||||
if (!br.rempty())
|
if (!br.rempty())
|
||||||
{
|
{
|
||||||
FillRect<T, masked>(row, col, GSVector4i(r.x, r.y, r.z, br.y), c, m);
|
FillRect<T, masked>(off, GSVector4i(r.x, r.y, r.z, br.y), c, m);
|
||||||
FillRect<T, masked>(row, col, GSVector4i(r.x, br.w, r.z, r.w), c, m);
|
FillRect<T, masked>(off, GSVector4i(r.x, br.w, r.z, r.w), c, m);
|
||||||
|
|
||||||
if (r.x < br.x || br.z < r.z)
|
if (r.x < br.x || br.z < r.z)
|
||||||
{
|
{
|
||||||
FillRect<T, masked>(row, col, GSVector4i(r.x, br.y, br.x, br.w), c, m);
|
FillRect<T, masked>(off, GSVector4i(r.x, br.y, br.x, br.w), c, m);
|
||||||
FillRect<T, masked>(row, col, GSVector4i(br.z, br.y, r.z, br.w), c, m);
|
FillRect<T, masked>(off, GSVector4i(br.z, br.y, r.z, br.w), c, m);
|
||||||
}
|
}
|
||||||
|
|
||||||
FillBlock<T, masked>(row, col, br, color, mask);
|
FillBlock<T, masked>(off, br, color, mask);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
FillRect<T, masked>(row, col, r, c, m);
|
FillRect<T, masked>(off, r, c, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
|
void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m)
|
||||||
{
|
{
|
||||||
if (r.x >= r.z)
|
if (r.x >= r.z)
|
||||||
return;
|
return;
|
||||||
|
@ -2948,11 +2942,12 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
|
||||||
|
|
||||||
for (int y = r.y; y < r.w; y++)
|
for (int y = r.y; y < r.w; y++)
|
||||||
{
|
{
|
||||||
T* RESTRICT d = &vm[row[y]];
|
GSOffset::PAHelper pa = off.paMulti(r.x, y);
|
||||||
|
|
||||||
for (int x = r.x; x < r.z; x++)
|
for (; pa.x() < r.z; pa.incX())
|
||||||
{
|
{
|
||||||
d[col[x]] = (T)(!masked ? c : (c | (d[col[x]] & m)));
|
T& d = vm[pa.value()];
|
||||||
|
d = (T)(!masked ? c : (c | (d & m)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2960,7 +2955,7 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m)
|
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m)
|
||||||
{
|
{
|
||||||
if (r.x >= r.z)
|
if (r.x >= r.z)
|
||||||
return;
|
return;
|
||||||
|
@ -2969,11 +2964,9 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
|
||||||
|
|
||||||
for (int y = r.y; y < r.w; y += 8)
|
for (int y = r.y; y < r.w; y += 8)
|
||||||
{
|
{
|
||||||
T* RESTRICT d = &vm[row[y]];
|
|
||||||
|
|
||||||
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
|
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
|
||||||
{
|
{
|
||||||
GSVector8i* RESTRICT p = (GSVector8i*)&d[col[x]];
|
GSVector8i* RESTRICT p = (GSVector8i*)&vm[off.pa(x, y)];
|
||||||
|
|
||||||
p[0] = !masked ? c : (c | (p[0] & m));
|
p[0] = !masked ? c : (c | (p[0] & m));
|
||||||
p[1] = !masked ? c : (c | (p[1] & m));
|
p[1] = !masked ? c : (c | (p[1] & m));
|
||||||
|
@ -2990,7 +2983,7 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
|
||||||
#else
|
#else
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
|
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
|
||||||
{
|
{
|
||||||
if (r.x >= r.z)
|
if (r.x >= r.z)
|
||||||
return;
|
return;
|
||||||
|
@ -2999,11 +2992,9 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
|
||||||
|
|
||||||
for (int y = r.y; y < r.w; y += 8)
|
for (int y = r.y; y < r.w; y += 8)
|
||||||
{
|
{
|
||||||
T* RESTRICT d = &vm[row[y]];
|
|
||||||
|
|
||||||
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
|
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
|
||||||
{
|
{
|
||||||
GSVector4i* RESTRICT p = (GSVector4i*)&d[col[x]];
|
GSVector4i* RESTRICT p = (GSVector4i*)&vm[off.pa(x, y)];
|
||||||
|
|
||||||
for (int i = 0; i < 16; i += 4)
|
for (int i = 0; i < 16; i += 4)
|
||||||
{
|
{
|
||||||
|
|
|
@ -38,20 +38,20 @@ protected:
|
||||||
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
|
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
|
void DrawRectT(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m);
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
__forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
|
__forceinline void FillRect(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m);
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m);
|
__forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
|
__forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -409,17 +409,20 @@ void GSRendererSW::Draw()
|
||||||
|
|
||||||
// GSScanlineGlobalData& gd = sd->global;
|
// GSScanlineGlobalData& gd = sd->global;
|
||||||
|
|
||||||
uint32* fb_pages = NULL;
|
GSOffset::PageLooper* fb_pages = NULL;
|
||||||
uint32* zb_pages = NULL;
|
GSOffset::PageLooper* zb_pages = NULL;
|
||||||
|
GSOffset::PageLooper _fb_pages, _zb_pages;
|
||||||
|
|
||||||
if (sd->global.sel.fb)
|
if (sd->global.sel.fb)
|
||||||
{
|
{
|
||||||
fb_pages = m_context->offset.fb->GetPages(r);
|
_fb_pages = m_context->offset.fb.pageLooperForRect(r);
|
||||||
|
fb_pages = &_fb_pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sd->global.sel.zb)
|
if (sd->global.sel.zb)
|
||||||
{
|
{
|
||||||
zb_pages = m_context->offset.zb->GetPages(r);
|
_zb_pages = m_context->offset.zb.pageLooperForRect(r);
|
||||||
|
zb_pages = &_zb_pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if there is an overlap between this and previous targets
|
// check if there is an overlap between this and previous targets
|
||||||
|
@ -438,7 +441,7 @@ void GSRendererSW::Draw()
|
||||||
|
|
||||||
// addref source and target pages
|
// addref source and target pages
|
||||||
|
|
||||||
sd->UsePages(fb_pages, m_context->offset.fb->psm, zb_pages, m_context->offset.zb->psm);
|
sd->UsePages(fb_pages, m_context->offset.fb.psm(), zb_pages, m_context->offset.zb.psm());
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
|
@ -641,26 +644,26 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
||||||
fflush(s_fp);
|
fflush(s_fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
GSOffset off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||||
|
GSOffset::PageLooper pages = off.pageLooperForRect(r);
|
||||||
off->GetPages(r, m_tmp_pages);
|
|
||||||
|
|
||||||
// check if the changing pages either used as a texture or a target
|
// check if the changing pages either used as a texture or a target
|
||||||
|
|
||||||
if (!m_rl->IsSynced())
|
if (!m_rl->IsSynced())
|
||||||
{
|
{
|
||||||
for (uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++)
|
pages.loopPagesWithBreak([&](uint32 page)
|
||||||
{
|
{
|
||||||
if (m_fzb_pages[*p] | m_tex_pages[*p])
|
if (m_fzb_pages[page] | m_tex_pages[page])
|
||||||
{
|
{
|
||||||
Sync(6);
|
Sync(6);
|
||||||
|
|
||||||
break;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
return true;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
m_tc->InvalidatePages(m_tmp_pages, off->psm); // if texture update runs on a thread and Sync(5) happens then this must come later
|
m_tc->InvalidatePages(pages, off.psm()); // if texture update runs on a thread and Sync(5) happens then this must come later
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||||
|
@ -673,77 +676,92 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
||||||
|
|
||||||
if (!m_rl->IsSynced())
|
if (!m_rl->IsSynced())
|
||||||
{
|
{
|
||||||
GSOffset* off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
GSOffset off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
||||||
|
GSOffset::PageLooper pages = off.pageLooperForRect(r);
|
||||||
|
|
||||||
off->GetPages(r, m_tmp_pages);
|
pages.loopPagesWithBreak([&](uint32 page)
|
||||||
|
|
||||||
for (uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++)
|
|
||||||
{
|
{
|
||||||
if (m_fzb_pages[*p])
|
if (m_fzb_pages[page])
|
||||||
{
|
{
|
||||||
Sync(7);
|
Sync(7);
|
||||||
|
|
||||||
break;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
return true;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererSW::UsePages(const uint32* pages, const int type)
|
void GSRendererSW::UsePages(const GSOffset::PageLooper& pages, const int type)
|
||||||
{
|
{
|
||||||
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
|
pages.loopPages([=](uint32 page)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX);
|
ASSERT((m_fzb_pages[page] & 0xFFFF) < USHRT_MAX);
|
||||||
m_fzb_pages[*p] += 1;
|
m_fzb_pages[page] += 1;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
ASSERT((m_fzb_pages[*p] >> 16) < USHRT_MAX);
|
ASSERT((m_fzb_pages[page] >> 16) < USHRT_MAX);
|
||||||
m_fzb_pages[*p] += 0x10000;
|
m_fzb_pages[page] += 0x10000;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
ASSERT(m_tex_pages[*p] < USHRT_MAX);
|
ASSERT(m_tex_pages[page] < USHRT_MAX);
|
||||||
m_tex_pages[*p] += 1;
|
m_tex_pages[page] += 1;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererSW::ReleasePages(const uint32* pages, const int type)
|
void GSRendererSW::ReleasePages(const GSOffset::PageLooper& pages, const int type)
|
||||||
{
|
{
|
||||||
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
|
pages.loopPages([=](uint32 page)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0);
|
ASSERT((m_fzb_pages[page] & 0xFFFF) > 0);
|
||||||
m_fzb_pages[*p] -= 1;
|
m_fzb_pages[page] -= 1;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
ASSERT((m_fzb_pages[*p] >> 16) > 0);
|
ASSERT((m_fzb_pages[page] >> 16) > 0);
|
||||||
m_fzb_pages[*p] -= 0x10000;
|
m_fzb_pages[page] -= 0x10000;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
ASSERT(m_tex_pages[*p] > 0);
|
ASSERT(m_tex_pages[page] > 0);
|
||||||
m_tex_pages[*p] -= 1;
|
m_tex_pages[page] -= 1;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r)
|
bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const GSOffset::PageLooper* zb_pages, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
bool synced = m_rl->IsSynced();
|
bool synced = m_rl->IsSynced();
|
||||||
|
|
||||||
bool fb = fb_pages != NULL;
|
bool fb = fb_pages != NULL;
|
||||||
bool zb = zb_pages != NULL;
|
bool zb = zb_pages != NULL;
|
||||||
|
|
||||||
|
GSOffset::PageLooper _fb_pages, _zb_pages;
|
||||||
|
auto requirePages = [&]
|
||||||
|
{
|
||||||
|
if (fb_pages == NULL)
|
||||||
|
{
|
||||||
|
_fb_pages = m_context->offset.fb.pageLooperForRect(r);
|
||||||
|
fb_pages = &_fb_pages;
|
||||||
|
}
|
||||||
|
if (zb_pages == NULL)
|
||||||
|
{
|
||||||
|
_zb_pages = m_context->offset.zb.pageLooperForRect(r);
|
||||||
|
zb_pages = &_zb_pages;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
bool res = false;
|
bool res = false;
|
||||||
|
|
||||||
if (m_fzb != m_context->offset.fzb4)
|
if (m_fzb != m_context->offset.fzb4)
|
||||||
|
@ -753,17 +771,14 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
m_fzb = m_context->offset.fzb4;
|
m_fzb = m_context->offset.fzb4;
|
||||||
m_fzb_bbox = r;
|
m_fzb_bbox = r;
|
||||||
|
|
||||||
if (fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r);
|
|
||||||
if (zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r);
|
|
||||||
|
|
||||||
memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages));
|
memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages));
|
||||||
|
|
||||||
uint32 used = 0;
|
uint32 used = 0;
|
||||||
|
|
||||||
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
|
requirePages();
|
||||||
{
|
|
||||||
uint32 i = *p;
|
|
||||||
|
|
||||||
|
fb_pages->loopPages([&](uint32 i)
|
||||||
|
{
|
||||||
uint32 row = i >> 5;
|
uint32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
uint32 col = 1 << (i & 31);
|
||||||
|
|
||||||
|
@ -771,12 +786,10 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
|
|
||||||
used |= m_fzb_pages[i];
|
used |= m_fzb_pages[i];
|
||||||
used |= m_tex_pages[i];
|
used |= m_tex_pages[i];
|
||||||
}
|
});
|
||||||
|
|
||||||
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
|
zb_pages->loopPages([&](uint32 i)
|
||||||
{
|
{
|
||||||
uint32 i = *p;
|
|
||||||
|
|
||||||
uint32 row = i >> 5;
|
uint32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
uint32 col = 1 << (i & 31);
|
||||||
|
|
||||||
|
@ -784,7 +797,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
|
|
||||||
used |= m_fzb_pages[i];
|
used |= m_fzb_pages[i];
|
||||||
used |= m_tex_pages[i];
|
used |= m_tex_pages[i];
|
||||||
}
|
});
|
||||||
|
|
||||||
if (!synced)
|
if (!synced)
|
||||||
{
|
{
|
||||||
|
@ -816,15 +829,12 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
{
|
{
|
||||||
// drawing area is larger than previous time, check new parts only to avoid false positives (m_fzb_cur_pages guards)
|
// drawing area is larger than previous time, check new parts only to avoid false positives (m_fzb_cur_pages guards)
|
||||||
|
|
||||||
if (fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r);
|
requirePages();
|
||||||
if (zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r);
|
|
||||||
|
|
||||||
uint32 used = 0;
|
uint32 used = 0;
|
||||||
|
|
||||||
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
|
fb_pages->loopPages([&](uint32 i)
|
||||||
{
|
{
|
||||||
uint32 i = *p;
|
|
||||||
|
|
||||||
uint32 row = i >> 5;
|
uint32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
uint32 col = 1 << (i & 31);
|
||||||
|
|
||||||
|
@ -834,12 +844,10 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
|
|
||||||
used |= m_fzb_pages[i];
|
used |= m_fzb_pages[i];
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
|
zb_pages->loopPages([&](uint32 i)
|
||||||
{
|
{
|
||||||
uint32 i = *p;
|
|
||||||
|
|
||||||
uint32 row = i >> 5;
|
uint32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
uint32 col = 1 << (i & 31);
|
||||||
|
|
||||||
|
@ -849,7 +857,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
|
|
||||||
used |= m_fzb_pages[i];
|
used |= m_fzb_pages[i];
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
if (!synced)
|
if (!synced)
|
||||||
{
|
{
|
||||||
|
@ -873,9 +881,9 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
|
|
||||||
if (fb && !res)
|
if (fb && !res)
|
||||||
{
|
{
|
||||||
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
|
fb_pages->loopPagesWithBreak([&](uint32 page)
|
||||||
{
|
{
|
||||||
if (m_fzb_pages[*p] & 0xffff0000)
|
if (m_fzb_pages[page] & 0xffff0000)
|
||||||
{
|
{
|
||||||
if (LOG)
|
if (LOG)
|
||||||
{
|
{
|
||||||
|
@ -885,16 +893,17 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
|
|
||||||
res = true;
|
res = true;
|
||||||
|
|
||||||
break;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
return true;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zb && !res)
|
if (zb && !res)
|
||||||
{
|
{
|
||||||
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
|
zb_pages->loopPagesWithBreak([&](uint32 page)
|
||||||
{
|
{
|
||||||
if (m_fzb_pages[*p] & 0x0000ffff)
|
if (m_fzb_pages[page] & 0x0000ffff)
|
||||||
{
|
{
|
||||||
if (LOG)
|
if (LOG)
|
||||||
{
|
{
|
||||||
|
@ -904,16 +913,14 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
||||||
|
|
||||||
res = true;
|
res = true;
|
||||||
|
|
||||||
break;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
return true;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fb && fb_pages != NULL) delete[] fb_pages;
|
|
||||||
if (!zb && zb_pages != NULL) delete[] zb_pages;
|
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -923,19 +930,22 @@ bool GSRendererSW::CheckSourcePages(SharedData* sd)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; sd->m_tex[i].t != NULL; i++)
|
for (size_t i = 0; sd->m_tex[i].t != NULL; i++)
|
||||||
{
|
{
|
||||||
sd->m_tex[i].t->m_offset->GetPages(sd->m_tex[i].r, m_tmp_pages);
|
GSOffset::PageLooper pages = sd->m_tex[i].t->m_offset.pageLooperForRect(sd->m_tex[i].r);
|
||||||
|
|
||||||
uint32* pages = m_tmp_pages; // sd->m_tex[i].t->m_pages.n;
|
bool ret = false;
|
||||||
|
pages.loopPagesWithBreak([&](uint32 pages)
|
||||||
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
|
|
||||||
{
|
{
|
||||||
// TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2)
|
// TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2)
|
||||||
|
|
||||||
if (m_fzb_pages[*p]) // currently being drawn to? => sync
|
if (m_fzb_pages[pages]) // currently being drawn to? => sync
|
||||||
{
|
{
|
||||||
return true;
|
ret = true;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
return true;
|
||||||
|
});
|
||||||
|
if (ret)
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -954,10 +964,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
|
|
||||||
gd.vm = m_mem.m_vm8;
|
gd.vm = m_mem.m_vm8;
|
||||||
|
|
||||||
gd.fbr = context->offset.fb->pixel.row;
|
gd.fbo = context->offset.fb;
|
||||||
gd.zbr = context->offset.zb->pixel.row;
|
gd.zbo = context->offset.zb;
|
||||||
gd.fbc = context->offset.fb->pixel.col[0];
|
|
||||||
gd.zbc = context->offset.zb->pixel.col[0];
|
|
||||||
gd.fzbr = context->offset.fzb4->row;
|
gd.fzbr = context->offset.fzb4->row;
|
||||||
gd.fzbc = context->offset.fzb4->col;
|
gd.fzbc = context->offset.fzb4->col;
|
||||||
|
|
||||||
|
@ -1423,8 +1431,6 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
|
|
||||||
GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
|
GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
|
||||||
: m_parent(parent)
|
: m_parent(parent)
|
||||||
, m_fb_pages(NULL)
|
|
||||||
, m_zb_pages(NULL)
|
|
||||||
, m_fpsm(0)
|
, m_fpsm(0)
|
||||||
, m_zpsm(0)
|
, m_zpsm(0)
|
||||||
, m_using_pages(false)
|
, m_using_pages(false)
|
||||||
|
@ -1460,7 +1466,7 @@ GSRendererSW::SharedData::~SharedData()
|
||||||
|
|
||||||
//static TransactionScope::Lock s_lock;
|
//static TransactionScope::Lock s_lock;
|
||||||
|
|
||||||
void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm)
|
void GSRendererSW::SharedData::UsePages(const GSOffset::PageLooper* fb_pages, int fpsm, const GSOffset::PageLooper* zb_pages, int zpsm)
|
||||||
{
|
{
|
||||||
if (m_using_pages)
|
if (m_using_pages)
|
||||||
return;
|
return;
|
||||||
|
@ -1468,24 +1474,26 @@ void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const
|
||||||
{
|
{
|
||||||
//TransactionScope scope(s_lock);
|
//TransactionScope scope(s_lock);
|
||||||
|
|
||||||
if (global.sel.fb && fb_pages != NULL)
|
if (global.sel.fb)
|
||||||
{
|
{
|
||||||
m_parent->UsePages(fb_pages, 0);
|
m_parent->UsePages(*fb_pages, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (global.sel.zb && zb_pages != NULL)
|
if (global.sel.zb)
|
||||||
{
|
{
|
||||||
m_parent->UsePages(zb_pages, 1);
|
m_parent->UsePages(*zb_pages, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; m_tex[i].t != NULL; i++)
|
for (size_t i = 0; m_tex[i].t != NULL; i++)
|
||||||
{
|
{
|
||||||
m_parent->UsePages(m_tex[i].t->m_pages.n, 2);
|
m_parent->UsePages(m_tex[i].t->m_pages, 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_fb_pages = fb_pages;
|
if (fb_pages)
|
||||||
m_zb_pages = zb_pages;
|
m_fb_pages = *fb_pages;
|
||||||
|
if (zb_pages)
|
||||||
|
m_zb_pages = *zb_pages;
|
||||||
m_fpsm = fpsm;
|
m_fpsm = fpsm;
|
||||||
m_zpsm = zpsm;
|
m_zpsm = zpsm;
|
||||||
|
|
||||||
|
@ -1512,16 +1520,10 @@ void GSRendererSW::SharedData::ReleasePages()
|
||||||
|
|
||||||
for (size_t i = 0; m_tex[i].t != NULL; i++)
|
for (size_t i = 0; m_tex[i].t != NULL; i++)
|
||||||
{
|
{
|
||||||
m_parent->ReleasePages(m_tex[i].t->m_pages.n, 2);
|
m_parent->ReleasePages(m_tex[i].t->m_pages, 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
delete[] m_fb_pages;
|
|
||||||
delete[] m_zb_pages;
|
|
||||||
|
|
||||||
m_fb_pages = NULL;
|
|
||||||
m_zb_pages = NULL;
|
|
||||||
|
|
||||||
m_using_pages = false;
|
m_using_pages = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,8 +35,8 @@ class GSRendererSW : public GSRenderer
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRendererSW* m_parent;
|
GSRendererSW* m_parent;
|
||||||
const uint32* m_fb_pages;
|
GSOffset::PageLooper m_fb_pages;
|
||||||
const uint32* m_zb_pages;
|
GSOffset::PageLooper m_zb_pages;
|
||||||
int m_fpsm;
|
int m_fpsm;
|
||||||
int m_zpsm;
|
int m_zpsm;
|
||||||
bool m_using_pages;
|
bool m_using_pages;
|
||||||
|
@ -52,7 +52,7 @@ class GSRendererSW : public GSRenderer
|
||||||
SharedData(GSRendererSW* parent);
|
SharedData(GSRendererSW* parent);
|
||||||
virtual ~SharedData();
|
virtual ~SharedData();
|
||||||
|
|
||||||
void UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm);
|
void UsePages(const GSOffset::PageLooper* fb_pages, int fpsm, const GSOffset::PageLooper* zb_pages, int zpsm);
|
||||||
void ReleasePages();
|
void ReleasePages();
|
||||||
|
|
||||||
void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level);
|
void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level);
|
||||||
|
@ -76,7 +76,6 @@ protected:
|
||||||
uint32 m_fzb_cur_pages[16];
|
uint32 m_fzb_cur_pages[16];
|
||||||
std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
|
std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
|
||||||
std::atomic<uint16> m_tex_pages[512];
|
std::atomic<uint16> m_tex_pages[512];
|
||||||
uint32 m_tmp_pages[512 + 1];
|
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
void VSync(int field);
|
void VSync(int field);
|
||||||
|
@ -90,10 +89,10 @@ protected:
|
||||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
||||||
|
|
||||||
void UsePages(const uint32* pages, const int type);
|
void UsePages(const GSOffset::PageLooper& pages, const int type);
|
||||||
void ReleasePages(const uint32* pages, const int type);
|
void ReleasePages(const GSOffset::PageLooper& pages, const int type);
|
||||||
|
|
||||||
bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r);
|
bool CheckTargetPages(const GSOffset::PageLooper* fb_pages, const GSOffset::PageLooper* zb_pages, const GSVector4i& r);
|
||||||
bool CheckSourcePages(SharedData* sd);
|
bool CheckSourcePages(SharedData* sd);
|
||||||
|
|
||||||
bool GetScanlineGlobalData(SharedData* data);
|
bool GetScanlineGlobalData(SharedData* data);
|
||||||
|
|
|
@ -127,10 +127,8 @@ struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a p
|
||||||
uint32* clut;
|
uint32* clut;
|
||||||
GSVector4i* dimx;
|
GSVector4i* dimx;
|
||||||
|
|
||||||
const int* fbr;
|
GSOffset fbo;
|
||||||
const int* zbr;
|
GSOffset zbo;
|
||||||
const int* fbc;
|
|
||||||
const int* zbc;
|
|
||||||
const GSVector2i* fzbr;
|
const GSVector2i* fzbr;
|
||||||
const GSVector2i* fzbc;
|
const GSVector2i* fzbc;
|
||||||
|
|
||||||
|
|
|
@ -62,21 +62,18 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
|
||||||
|
|
||||||
m_textures.insert(t);
|
m_textures.insert(t);
|
||||||
|
|
||||||
for (const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
|
t->m_pages.loopPages([&](uint32 page)
|
||||||
{
|
{
|
||||||
const uint32 page = *p;
|
|
||||||
t->m_erase_it[page] = m_map[page].InsertFront(t);
|
t->m_erase_it[page] = m_map[page].InsertFront(t);
|
||||||
}
|
});
|
||||||
|
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
|
void GSTextureCacheSW::InvalidatePages(const GSOffset::PageLooper& pages, uint32 psm)
|
||||||
{
|
{
|
||||||
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
|
pages.loopPages([&](uint32 page)
|
||||||
{
|
{
|
||||||
const uint32 page = *p;
|
|
||||||
|
|
||||||
for (Texture* t : m_map[page])
|
for (Texture* t : m_map[page])
|
||||||
{
|
{
|
||||||
if (GSUtil::HasSharedBits(psm, t->m_sharedbits))
|
if (GSUtil::HasSharedBits(psm, t->m_sharedbits))
|
||||||
|
@ -98,7 +95,7 @@ void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
|
||||||
t->m_complete = false;
|
t->m_complete = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSTextureCacheSW::RemoveAll()
|
void GSTextureCacheSW::RemoveAll()
|
||||||
|
@ -124,11 +121,10 @@ void GSTextureCacheSW::IncAge()
|
||||||
{
|
{
|
||||||
i = m_textures.erase(i);
|
i = m_textures.erase(i);
|
||||||
|
|
||||||
for (const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
|
t->m_pages.loopPages([&](uint32 page)
|
||||||
{
|
{
|
||||||
const uint32 page = *p;
|
|
||||||
m_map[page].EraseIndex(t->m_erase_it[page]);
|
m_map[page].EraseIndex(t->m_erase_it[page]);
|
||||||
}
|
});
|
||||||
|
|
||||||
delete t;
|
delete t;
|
||||||
}
|
}
|
||||||
|
@ -162,9 +158,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
||||||
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
|
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
|
||||||
|
|
||||||
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
|
m_pages = m_offset.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||||
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
|
||||||
memcpy(m_pages.bm, m_offset->GetPagesAsBits(TEX0), sizeof(m_pages.bm));
|
|
||||||
|
|
||||||
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
|
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
|
||||||
|
|
||||||
|
@ -176,8 +170,6 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
||||||
|
|
||||||
GSTextureCacheSW::Texture::~Texture()
|
GSTextureCacheSW::Texture::~Texture()
|
||||||
{
|
{
|
||||||
delete[] m_pages.n;
|
|
||||||
|
|
||||||
if (m_buff)
|
if (m_buff)
|
||||||
{
|
{
|
||||||
_aligned_free(m_buff);
|
_aligned_free(m_buff);
|
||||||
|
@ -223,7 +215,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
|
|
||||||
GSLocalMemory& mem = m_state->m_mem;
|
GSLocalMemory& mem = m_state->m_mem;
|
||||||
|
|
||||||
const GSOffset* RESTRICT off = m_offset;
|
GSOffset off = m_offset;
|
||||||
|
|
||||||
uint32 blocks = 0;
|
uint32 blocks = 0;
|
||||||
|
|
||||||
|
@ -235,22 +227,20 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
|
|
||||||
int block_pitch = pitch * bs.y;
|
int block_pitch = pitch * bs.y;
|
||||||
|
|
||||||
r = r.srl32(3);
|
shift += off.blockShiftX();
|
||||||
|
int bottom = r.bottom >> off.blockShiftY();
|
||||||
|
int right = r.right >> off.blockShiftX();
|
||||||
|
|
||||||
bs.x >>= 3;
|
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
|
||||||
bs.y >>= 3;
|
|
||||||
|
|
||||||
shift += 3;
|
|
||||||
|
|
||||||
if (m_repeating)
|
if (m_repeating)
|
||||||
{
|
{
|
||||||
for (int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
|
for (; bn.blkY() < bottom; bn.nextBlockY(), dst += block_pitch)
|
||||||
{
|
{
|
||||||
uint32 base = off->block.row[y];
|
for (; bn.blkX() < right; bn.nextBlockX())
|
||||||
|
|
||||||
for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
|
|
||||||
{
|
{
|
||||||
uint32 block = (base + off->block.col[x]) % MAX_BLOCKS;
|
int i = (bn.blkY() << 7) + bn.blkX();
|
||||||
|
uint32 block = bn.value();
|
||||||
|
|
||||||
uint32 row = i >> 5;
|
uint32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
uint32 col = 1 << (i & 31);
|
||||||
|
@ -259,7 +249,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
{
|
{
|
||||||
m_valid[row] |= col;
|
m_valid[row] |= col;
|
||||||
|
|
||||||
(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA);
|
(mem.*rtxbP)(block, &dst[bn.blkX() << shift], pitch, m_TEXA);
|
||||||
|
|
||||||
blocks++;
|
blocks++;
|
||||||
}
|
}
|
||||||
|
@ -268,13 +258,11 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
|
for (; bn.blkY() < bottom; bn.nextBlockY(), dst += block_pitch)
|
||||||
{
|
{
|
||||||
uint32 base = off->block.row[y];
|
for (; bn.blkX() < right; bn.nextBlockX())
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x += bs.x)
|
|
||||||
{
|
{
|
||||||
uint32 block = (base + off->block.col[x]) % MAX_BLOCKS;
|
uint32 block = bn.value();
|
||||||
|
|
||||||
uint32 row = block >> 5;
|
uint32 row = block >> 5;
|
||||||
uint32 col = 1 << (block & 31);
|
uint32 col = 1 << (block & 31);
|
||||||
|
@ -283,7 +271,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
{
|
{
|
||||||
m_valid[row] |= col;
|
m_valid[row] |= col;
|
||||||
|
|
||||||
(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA);
|
(mem.*rtxbP)(block, &dst[bn.blkX() << shift], pitch, m_TEXA);
|
||||||
|
|
||||||
blocks++;
|
blocks++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,8 @@ public:
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GSState* m_state;
|
GSState* m_state;
|
||||||
GSOffset* m_offset;
|
GSOffset m_offset;
|
||||||
|
GSOffset::PageLooper m_pages;
|
||||||
GIFRegTEX0 m_TEX0;
|
GIFRegTEX0 m_TEX0;
|
||||||
GIFRegTEXA m_TEXA;
|
GIFRegTEXA m_TEXA;
|
||||||
void* m_buff;
|
void* m_buff;
|
||||||
|
@ -36,7 +37,6 @@ public:
|
||||||
std::vector<GSVector2i>* m_p2t;
|
std::vector<GSVector2i>* m_p2t;
|
||||||
uint32 m_valid[MAX_PAGES];
|
uint32 m_valid[MAX_PAGES];
|
||||||
std::array<uint16, MAX_PAGES> m_erase_it;
|
std::array<uint16, MAX_PAGES> m_erase_it;
|
||||||
struct { uint32 bm[16]; const uint32* n; } m_pages;
|
|
||||||
const uint32* RESTRICT m_sharedbits;
|
const uint32* RESTRICT m_sharedbits;
|
||||||
|
|
||||||
// m_valid
|
// m_valid
|
||||||
|
@ -61,7 +61,7 @@ public:
|
||||||
|
|
||||||
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
|
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
|
||||||
|
|
||||||
void InvalidatePages(const uint32* pages, uint32 psm);
|
void InvalidatePages(const GSOffset::PageLooper& pages, uint32 psm);
|
||||||
|
|
||||||
void RemoveAll();
|
void RemoveAll();
|
||||||
void IncAge();
|
void IncAge();
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
#include "GSLocalMemory.h"
|
#include "GSLocalMemory.h"
|
||||||
|
|
||||||
GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
|
GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
|
||||||
GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
||||||
{
|
{
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue