GSdx: Reworked the sw renderer texture cache a bit, at page level its correctness was questionable if the base address was not page aligned, so now keeping track of blocks (32x more), may be slower or faster by a few percent (number of blocks vs. finer resolution).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1142 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-05-07 06:32:10 +00:00
parent e6b9275c13
commit 3202a42578
6 changed files with 554 additions and 413 deletions

View File

@ -128,7 +128,7 @@ void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TE
{
ASSERT(TEX0.CSA == 0);
WriteCLUT_T32_I8_CSM1(&m_mem->m_vm32[m_mem->BlockAddress32(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T32_I8_CSM1((DWORD*)m_mem->BlockPtr32(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
@ -137,31 +137,31 @@ void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TE
GSVector4i dummy; // this just forces stack alignment and enables inlining the next call
WriteCLUT_T32_I4_CSM1(&m_mem->m_vm32[m_mem->BlockAddress32(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T32_I4_CSM1((DWORD*)m_mem->BlockPtr32(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
ASSERT(TEX0.CSA < 16);
WriteCLUT_T16_I8_CSM1(&m_mem->m_vm16[m_mem->BlockAddress16(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T16_I8_CSM1((WORD*)m_mem->BlockPtr16(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
ASSERT(TEX0.CSA < 32);
WriteCLUT_T16_I4_CSM1(&m_mem->m_vm16[m_mem->BlockAddress16(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T16_I4_CSM1((WORD*)m_mem->BlockPtr16(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
WriteCLUT_T16_I8_CSM1(&m_mem->m_vm16[m_mem->BlockAddress16S(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T16_I8_CSM1((WORD*)m_mem->BlockPtr16S(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
WriteCLUT_T16_I4_CSM1(&m_mem->m_vm16[m_mem->BlockAddress16S(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T16_I4_CSM1((WORD*)m_mem->BlockPtr16S(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
}
template<int n> void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)

View File

@ -61,6 +61,15 @@ int GSLocalMemory::rowOffset16SZ[2048];
int GSLocalMemory::rowOffset8[2][2048];
int GSLocalMemory::rowOffset4[2][2048];
int GSLocalMemory::blockOffset32[256];
int GSLocalMemory::blockOffset32Z[256];
int GSLocalMemory::blockOffset16[256];
int GSLocalMemory::blockOffset16S[256];
int GSLocalMemory::blockOffset16Z[256];
int GSLocalMemory::blockOffset16SZ[256];
int GSLocalMemory::blockOffset8[256];
int GSLocalMemory::blockOffset4[256];
//
DWORD GSLocalMemory::m_xtbl[1024];
@ -138,22 +147,60 @@ GSLocalMemory::GSLocalMemory()
for(int x = 0; x < countof(rowOffset8[0]); x++)
{
rowOffset8[0][x] = (int)PixelAddress8(x, 0, 0, 32) - (int)PixelAddress8(0, 0, 0, 32),
rowOffset8[0][x] = (int)PixelAddress8(x, 0, 0, 32) - (int)PixelAddress8(0, 0, 0, 32);
rowOffset8[1][x] = (int)PixelAddress8(x, 2, 0, 32) - (int)PixelAddress8(0, 2, 0, 32);
}
for(int x = 0; x < countof(rowOffset4[0]); x++)
{
rowOffset4[0][x] = (int)PixelAddress4(x, 0, 0, 32) - (int)PixelAddress4(0, 0, 0, 32),
rowOffset4[0][x] = (int)PixelAddress4(x, 0, 0, 32) - (int)PixelAddress4(0, 0, 0, 32);
rowOffset4[1][x] = (int)PixelAddress4(x, 2, 0, 32) - (int)PixelAddress4(0, 2, 0, 32);
}
for(int x = 0; x < countof(blockOffset32); x++)
{
blockOffset32[x] = (int)BlockNumber32(x << 3, 0, 0, 32) - (int)BlockNumber32(0, 0, 0, 32);
}
for(int x = 0; x < countof(blockOffset32Z); x++)
{
blockOffset32Z[x] = (int)BlockNumber32Z(x << 3, 0, 0, 32) - (int)BlockNumber32Z(0, 0, 0, 32);
}
for(int x = 0; x < countof(blockOffset16); x++)
{
blockOffset16[x] = (int)BlockNumber16(x << 3, 0, 0, 32) - (int)BlockNumber16(0, 0, 0, 32);
}
for(int x = 0; x < countof(blockOffset16S); x++)
{
blockOffset16S[x] = (int)BlockNumber16S(x << 3, 0, 0, 32) - (int)BlockNumber16S(0, 0, 0, 32);
}
for(int x = 0; x < countof(blockOffset16Z); x++)
{
blockOffset16Z[x] = (int)BlockNumber16Z(x << 3, 0, 0, 32) - (int)BlockNumber16Z(0, 0, 0, 32);
}
for(int x = 0; x < countof(blockOffset16SZ); x++)
{
blockOffset16SZ[x] = (int)BlockNumber16SZ(x << 3, 0, 0, 32) - (int)BlockNumber16SZ(0, 0, 0, 32);
}
for(int x = 0; x < countof(blockOffset8); x++)
{
blockOffset8[x] = (int)BlockNumber8(x << 3, 0, 0, 32) - (int)BlockNumber8(0, 0, 0, 32);
}
for(int x = 0; x < countof(blockOffset4); x++)
{
blockOffset4[x] = (int)BlockNumber4(x << 3, 0, 0, 32) - (int)BlockNumber4(0, 0, 0, 32);
}
for(int i = 0; i < countof(m_psm); i++)
{
m_psm[i].pa = &GSLocalMemory::PixelAddress32;
m_psm[i].ba = &GSLocalMemory::BlockAddress32;
m_psm[i].pga = &GSLocalMemory::PageAddress32;
m_psm[i].pgn = &GSLocalMemory::PageNumber32;
m_psm[i].bn = &GSLocalMemory::BlockNumber32;
m_psm[i].rp = &GSLocalMemory::ReadPixel32;
m_psm[i].rpa = &GSLocalMemory::ReadPixel32;
m_psm[i].wp = &GSLocalMemory::WritePixel32;
@ -167,11 +214,14 @@ GSLocalMemory::GSLocalMemory()
m_psm[i].rtx = &GSLocalMemory::ReadTexture32;
m_psm[i].rtxNP = &GSLocalMemory::ReadTexture32;
m_psm[i].rtxP = &GSLocalMemory::ReadTexture32;
m_psm[i].rtxb = &GSLocalMemory::ReadTextureBlock32;
m_psm[i].rtxbP = &GSLocalMemory::ReadTextureBlock32;
m_psm[i].bpp = m_psm[i].trbpp = 32;
m_psm[i].pal = 0;
m_psm[i].bs = CSize(8, 8);
m_psm[i].pgs = CSize(64, 32);
for(int j = 0; j < 8; j++) m_psm[i].rowOffset[j] = rowOffset32;
m_psm[i].blockOffset = blockOffset32;
}
m_psm[PSM_PSMCT16].pa = &GSLocalMemory::PixelAddress16;
@ -183,28 +233,14 @@ GSLocalMemory::GSLocalMemory()
m_psm[PSM_PSMZ16].pa = &GSLocalMemory::PixelAddress16Z;
m_psm[PSM_PSMZ16S].pa = &GSLocalMemory::PixelAddress16SZ;
m_psm[PSM_PSMCT16].ba = &GSLocalMemory::BlockAddress16;
m_psm[PSM_PSMCT16S].ba = &GSLocalMemory::BlockAddress16S;
m_psm[PSM_PSMT8].ba = &GSLocalMemory::BlockAddress8;
m_psm[PSM_PSMT4].ba = &GSLocalMemory::BlockAddress4;
m_psm[PSM_PSMZ32].ba = &GSLocalMemory::BlockAddress32Z;
m_psm[PSM_PSMZ24].ba = &GSLocalMemory::BlockAddress32Z;
m_psm[PSM_PSMZ16].ba = &GSLocalMemory::BlockAddress16Z;
m_psm[PSM_PSMZ16S].ba = &GSLocalMemory::BlockAddress16SZ;
m_psm[PSM_PSMCT16].pga = &GSLocalMemory::PageAddress16;
m_psm[PSM_PSMCT16S].pga = &GSLocalMemory::PageAddress16;
m_psm[PSM_PSMZ16].pga = &GSLocalMemory::PageAddress16;
m_psm[PSM_PSMZ16S].pga = &GSLocalMemory::PageAddress16;
m_psm[PSM_PSMT8].pga = &GSLocalMemory::PageAddress8;
m_psm[PSM_PSMT4].pga = &GSLocalMemory::PageAddress4;
m_psm[PSM_PSMCT16].pgn = &GSLocalMemory::PageNumber16;
m_psm[PSM_PSMCT16S].pgn = &GSLocalMemory::PageNumber16;
m_psm[PSM_PSMZ16].pgn = &GSLocalMemory::PageNumber16;
m_psm[PSM_PSMZ16S].pgn = &GSLocalMemory::PageNumber16;
m_psm[PSM_PSMT8].pgn = &GSLocalMemory::PageNumber8;
m_psm[PSM_PSMT4].pgn = &GSLocalMemory::PageNumber4;
m_psm[PSM_PSMCT16].bn = &GSLocalMemory::BlockNumber16;
m_psm[PSM_PSMCT16S].bn = &GSLocalMemory::BlockNumber16S;
m_psm[PSM_PSMT8].bn = &GSLocalMemory::BlockNumber8;
m_psm[PSM_PSMT4].bn = &GSLocalMemory::BlockNumber4;
m_psm[PSM_PSMZ32].bn = &GSLocalMemory::BlockNumber32Z;
m_psm[PSM_PSMZ24].bn = &GSLocalMemory::BlockNumber32Z;
m_psm[PSM_PSMZ16].bn = &GSLocalMemory::BlockNumber16Z;
m_psm[PSM_PSMZ16S].bn = &GSLocalMemory::BlockNumber16SZ;
m_psm[PSM_PSMCT24].rp = &GSLocalMemory::ReadPixel24;
m_psm[PSM_PSMCT16].rp = &GSLocalMemory::ReadPixel16;
@ -342,11 +378,44 @@ GSLocalMemory::GSLocalMemory()
m_psm[PSM_PSMZ16].rtxNP = &GSLocalMemory::ReadTexture16ZNP;
m_psm[PSM_PSMZ16S].rtxNP = &GSLocalMemory::ReadTexture16SZNP;
m_psm[PSM_PSMCT24].rtxP = &GSLocalMemory::ReadTexture24;
m_psm[PSM_PSMCT16].rtxP = &GSLocalMemory::ReadTexture16;
m_psm[PSM_PSMCT16S].rtxP = &GSLocalMemory::ReadTexture16S;
m_psm[PSM_PSMT8].rtxP = &GSLocalMemory::ReadTexture8P;
m_psm[PSM_PSMT4].rtxP = &GSLocalMemory::ReadTexture4P;
m_psm[PSM_PSMT8H].rtxP = &GSLocalMemory::ReadTexture8HP;
m_psm[PSM_PSMT4HL].rtxP = &GSLocalMemory::ReadTexture4HLP;
m_psm[PSM_PSMT4HH].rtxP = &GSLocalMemory::ReadTexture4HHP;
m_psm[PSM_PSMZ32].rtxP = &GSLocalMemory::ReadTexture32Z;
m_psm[PSM_PSMZ24].rtxP = &GSLocalMemory::ReadTexture24Z;
m_psm[PSM_PSMZ16].rtxP = &GSLocalMemory::ReadTexture16Z;
m_psm[PSM_PSMZ16S].rtxP = &GSLocalMemory::ReadTexture16SZ;
m_psm[PSM_PSMCT24].rtxb = &GSLocalMemory::ReadTextureBlock24;
m_psm[PSM_PSMCT16].rtxb = &GSLocalMemory::ReadTextureBlock16;
m_psm[PSM_PSMCT16S].rtxb = &GSLocalMemory::ReadTextureBlock16S;
m_psm[PSM_PSMT8].rtxb = &GSLocalMemory::ReadTextureBlock8;
m_psm[PSM_PSMT4].rtxb = &GSLocalMemory::ReadTextureBlock4;
m_psm[PSM_PSMT8H].rtxb = &GSLocalMemory::ReadTextureBlock8H;
m_psm[PSM_PSMT4HL].rtxb = &GSLocalMemory::ReadTextureBlock4HL;
m_psm[PSM_PSMT4HH].rtxb = &GSLocalMemory::ReadTextureBlock4HH;
m_psm[PSM_PSMZ32].rtxb = &GSLocalMemory::ReadTextureBlock32Z;
m_psm[PSM_PSMZ24].rtxb = &GSLocalMemory::ReadTextureBlock24Z;
m_psm[PSM_PSMZ16].rtxb = &GSLocalMemory::ReadTextureBlock16Z;
m_psm[PSM_PSMZ16S].rtxb = &GSLocalMemory::ReadTextureBlock16SZ;
m_psm[PSM_PSMCT24].rtxbP = &GSLocalMemory::ReadTextureBlock24;
m_psm[PSM_PSMCT16].rtxbP = &GSLocalMemory::ReadTextureBlock16;
m_psm[PSM_PSMCT16S].rtxbP = &GSLocalMemory::ReadTextureBlock16S;
m_psm[PSM_PSMT8].rtxbP = &GSLocalMemory::ReadTextureBlock8P;
m_psm[PSM_PSMT4].rtxbP = &GSLocalMemory::ReadTextureBlock4P;
m_psm[PSM_PSMT8H].rtxbP = &GSLocalMemory::ReadTextureBlock8HP;
m_psm[PSM_PSMT4HL].rtxbP = &GSLocalMemory::ReadTextureBlock4HLP;
m_psm[PSM_PSMT4HH].rtxbP = &GSLocalMemory::ReadTextureBlock4HHP;
m_psm[PSM_PSMZ32].rtxbP = &GSLocalMemory::ReadTextureBlock32Z;
m_psm[PSM_PSMZ24].rtxbP = &GSLocalMemory::ReadTextureBlock24Z;
m_psm[PSM_PSMZ16].rtxbP = &GSLocalMemory::ReadTextureBlock16Z;
m_psm[PSM_PSMZ16S].rtxbP = &GSLocalMemory::ReadTextureBlock16SZ;
m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256;
m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16;
@ -381,6 +450,15 @@ GSLocalMemory::GSLocalMemory()
for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ24].rowOffset[i] = rowOffset32Z;
for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ16].rowOffset[i] = rowOffset16Z;
for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ16S].rowOffset[i] = rowOffset16SZ;
m_psm[PSM_PSMCT16].blockOffset = blockOffset16;
m_psm[PSM_PSMCT16S].blockOffset = blockOffset16S;
m_psm[PSM_PSMT8].blockOffset = blockOffset8;
m_psm[PSM_PSMT4].blockOffset = blockOffset4;
m_psm[PSM_PSMZ32].blockOffset = blockOffset32Z;
m_psm[PSM_PSMZ24].blockOffset = blockOffset32Z;
m_psm[PSM_PSMZ16].blockOffset = blockOffset16Z;
m_psm[PSM_PSMZ16S].blockOffset = blockOffset16SZ;
}
GSLocalMemory::~GSLocalMemory()
@ -505,20 +583,18 @@ bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp,
const psm_t& tbl = m_psm[psm];
writePixel wp = tbl.wp;
pixelAddress ba = tbl.ba;
pixelAddress bn = tbl.bn;
int w = tbl.bs.cx;
int h = tbl.bs.cy;
int bpp = tbl.bpp;
int shift = 0;
switch(bpp)
{
case 32: shift = 0; break;
case 16: shift = 1; c = (c & 0xffff) * 0x00010001; break;
case 8: shift = 2; c = (c & 0xff) * 0x01010101; break;
case 4: shift = 3; c = (c & 0xf) * 0x11111111; break;
case 32: break;
case 16: c = (c & 0xffff) * 0x00010001; break;
case 8: c = (c & 0xff) * 0x01010101; break;
case 4: c = (c & 0xf) * 0x11111111; break;
}
CRect clip;
@ -574,7 +650,7 @@ bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp,
{
for(int x = clip.left; x < clip.right; x += w)
{
GSVector4i* p = (GSVector4i*)&m_vm8[ba(x, y, bp, bw) << 2 >> shift];
GSVector4i* p = (GSVector4i*)&m_vm8[bn(x, y, bp, bw) << 8];
for(int i = 0; i < 16; i += 4)
{
@ -594,7 +670,7 @@ bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp,
{
for(int x = clip.left; x < clip.right; x += w)
{
DWORD* p = &m_vm32[ba(x, y, bp, bw)];
DWORD* p = (DWORD*)&m_vm8[bn(x, y, bp, bw) << 8];
for(int i = 0; i < 64; i += 4)
{
@ -618,7 +694,7 @@ bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp,
{
for(int x = clip.left; x < clip.right; x += w)
{
GSVector4i* p = (GSVector4i*)&m_vm8[ba(x, y, bp, bw) << 2 >> shift];
GSVector4i* p = (GSVector4i*)&m_vm8[bn(x, y, bp, bw) << 8];
for(int i = 0; i < 16; i += 4)
{
@ -636,7 +712,7 @@ bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp,
{
for(int x = clip.left; x < clip.right; x += w)
{
DWORD* p = (DWORD*)&m_vm8[ba(x, y, bp, bw) << 2 >> shift];
DWORD* p = (DWORD*)&m_vm8[bn(x, y, bp, bw) << 8];
for(int i = 0; i < 64; i += 4)
{
@ -670,14 +746,14 @@ void GSLocalMemory::WriteImageColumn(int l, int r, int y, int h, BYTE* src, int
{
switch(psm)
{
case PSM_PSMCT32: WriteColumn32<aligned, 0xffffffff>(y, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], &src[x * 4], srcpitch); break;
case PSM_PSMCT16: WriteColumn16<aligned>(y, (BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], &src[x * 2], srcpitch); break;
case PSM_PSMCT16S: WriteColumn16<aligned>(y, (BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], &src[x * 2], srcpitch); break;
case PSM_PSMT8: WriteColumn8<aligned>(y, (BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], &src[x], srcpitch); break;
case PSM_PSMT4: WriteColumn4<aligned>(y, (BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], &src[x >> 1], srcpitch); break;
case PSM_PSMZ32: WriteColumn32<aligned, 0xffffffff>(y, (BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], &src[x * 4], srcpitch); break;
case PSM_PSMZ16: WriteColumn16<aligned>(y, (BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], &src[x * 2], srcpitch); break;
case PSM_PSMZ16S: WriteColumn16<aligned>(y, (BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], &src[x * 2], srcpitch); break;
case PSM_PSMCT32: WriteColumn32<aligned, 0xffffffff>(y, BlockPtr32(x, y, bp, bw), &src[x * 4], srcpitch); break;
case PSM_PSMCT16: WriteColumn16<aligned>(y, BlockPtr16(x, y, bp, bw), &src[x * 2], srcpitch); break;
case PSM_PSMCT16S: WriteColumn16<aligned>(y, BlockPtr16S(x, y, bp, bw), &src[x * 2], srcpitch); break;
case PSM_PSMT8: WriteColumn8<aligned>(y, BlockPtr8(x, y, bp, bw), &src[x], srcpitch); break;
case PSM_PSMT4: WriteColumn4<aligned>(y, BlockPtr4(x, y, bp, bw), &src[x >> 1], srcpitch); break;
case PSM_PSMZ32: WriteColumn32<aligned, 0xffffffff>(y, BlockPtr32Z(x, y, bp, bw), &src[x * 4], srcpitch); break;
case PSM_PSMZ16: WriteColumn16<aligned>(y, BlockPtr16Z(x, y, bp, bw), &src[x * 2], srcpitch); break;
case PSM_PSMZ16S: WriteColumn16<aligned>(y, BlockPtr16SZ(x, y, bp, bw), &src[x * 2], srcpitch); break;
// TODO
default: __assume(0);
}
@ -697,14 +773,14 @@ void GSLocalMemory::WriteImageBlock(int l, int r, int y, int h, BYTE* src, int s
{
switch(psm)
{
case PSM_PSMCT32: WriteBlock32<aligned, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], &src[x * 4], srcpitch); break;
case PSM_PSMCT16: WriteBlock16<aligned>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], &src[x * 2], srcpitch); break;
case PSM_PSMCT16S: WriteBlock16<aligned>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], &src[x * 2], srcpitch); break;
case PSM_PSMT8: WriteBlock8<aligned>((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], &src[x], srcpitch); break;
case PSM_PSMT4: WriteBlock4<aligned>((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], &src[x >> 1], srcpitch); break;
case PSM_PSMZ32: WriteBlock32<aligned, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], &src[x * 4], srcpitch); break;
case PSM_PSMZ16: WriteBlock16<aligned>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], &src[x * 2], srcpitch); break;
case PSM_PSMZ16S: WriteBlock16<aligned>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], &src[x * 2], srcpitch); break;
case PSM_PSMCT32: WriteBlock32<aligned, 0xffffffff>(BlockPtr32(x, y, bp, bw), &src[x * 4], srcpitch); break;
case PSM_PSMCT16: WriteBlock16<aligned>(BlockPtr16(x, y, bp, bw), &src[x * 2], srcpitch); break;
case PSM_PSMCT16S: WriteBlock16<aligned>(BlockPtr16S(x, y, bp, bw), &src[x * 2], srcpitch); break;
case PSM_PSMT8: WriteBlock8<aligned>(BlockPtr8(x, y, bp, bw), &src[x], srcpitch); break;
case PSM_PSMT4: WriteBlock4<aligned>(BlockPtr4(x, y, bp, bw), &src[x >> 1], srcpitch); break;
case PSM_PSMZ32: WriteBlock32<aligned, 0xffffffff>(BlockPtr32Z(x, y, bp, bw), &src[x * 4], srcpitch); break;
case PSM_PSMZ16: WriteBlock16<aligned>(BlockPtr16Z(x, y, bp, bw), &src[x * 2], srcpitch); break;
case PSM_PSMZ16S: WriteBlock16<aligned>(BlockPtr16SZ(x, y, bp, bw), &src[x * 2], srcpitch); break;
// TODO
default: __assume(0);
}
@ -763,14 +839,14 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, BYTE* src, i
switch(psm)
{
case PSM_PSMCT32: dst = (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]; break;
case PSM_PSMCT16: dst = (BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)]; break;
case PSM_PSMCT16S: dst = (BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)]; break;
case PSM_PSMT8: dst = (BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)]; break;
case PSM_PSMT4: dst = (BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1]; break;
case PSM_PSMZ32: dst = (BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)]; break;
case PSM_PSMZ16: dst = (BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)]; break;
case PSM_PSMZ16S: dst = (BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)]; break;
case PSM_PSMCT32: dst = BlockPtr32(x, y, bp, bw); break;
case PSM_PSMCT16: dst = BlockPtr16(x, y, bp, bw); break;
case PSM_PSMCT16S: dst = BlockPtr16S(x, y, bp, bw); break;
case PSM_PSMT8: dst = BlockPtr8(x, y, bp, bw); break;
case PSM_PSMT4: dst = BlockPtr4(x, y, bp, bw); break;
case PSM_PSMZ32: dst = BlockPtr32Z(x, y, bp, bw); break;
case PSM_PSMZ16: dst = BlockPtr16Z(x, y, bp, bw); break;
case PSM_PSMZ16S: dst = BlockPtr16SZ(x, y, bp, bw); break;
// TODO
default: __assume(0);
}
@ -844,14 +920,14 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, BYTE* src, i
switch(psm)
{
case PSM_PSMCT32: dst = (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]; break;
case PSM_PSMCT16: dst = (BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)]; break;
case PSM_PSMCT16S: dst = (BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)]; break;
case PSM_PSMT8: dst = (BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)]; break;
case PSM_PSMT4: dst = (BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1]; break;
case PSM_PSMZ32: dst = (BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)]; break;
case PSM_PSMZ16: dst = (BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)]; break;
case PSM_PSMZ16S: dst = (BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)]; break;
case PSM_PSMCT32: dst = BlockPtr32(x, y, bp, bw); break;
case PSM_PSMCT16: dst = BlockPtr16(x, y, bp, bw); break;
case PSM_PSMCT16S: dst = BlockPtr16S(x, y, bp, bw); break;
case PSM_PSMT8: dst = BlockPtr8(x, y, bp, bw); break;
case PSM_PSMT4: dst = BlockPtr4(x, y, bp, bw); break;
case PSM_PSMZ32: dst = BlockPtr32Z(x, y, bp, bw); break;
case PSM_PSMZ16: dst = BlockPtr16Z(x, y, bp, bw); break;
case PSM_PSMZ16S: dst = BlockPtr16SZ(x, y, bp, bw); break;
// TODO
default: __assume(0);
}
@ -1026,7 +1102,7 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, BlockPtr32(x, y, bp, bw));
}
}
@ -1060,7 +1136,7 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock8H(src + (x - tx), srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
UnpackAndWriteBlock8H(src + (x - tx), srcpitch, BlockPtr32(x, y, bp, bw));
}
}
@ -1094,7 +1170,7 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock4HL(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
UnpackAndWriteBlock4HL(src + (x - tx) / 2, srcpitch, BlockPtr32(x, y, bp, bw));
}
}
@ -1128,7 +1204,7 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock4HH(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
UnpackAndWriteBlock4HH(src + (x - tx) / 2, srcpitch, BlockPtr32(x, y, bp, bw));
}
}
@ -1161,7 +1237,7 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)]);
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, BlockPtr32Z(x, y, bp, bw));
}
}
@ -1525,10 +1601,9 @@ void GSLocalMemory::ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, const
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
ReadBlock32<true>(BlockPtr32(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
@ -1537,7 +1612,7 @@ void GSLocalMemory::ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, const
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, TEXA);
ReadAndExpandBlock24<true>(BlockPtr32(x, y, bp, bw), dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@ -1545,7 +1620,7 @@ void GSLocalMemory::ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, const
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, TEXA);
ReadAndExpandBlock24<false>(BlockPtr32(x, y, bp, bw), dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@ -1557,7 +1632,7 @@ void GSLocalMemory::ReadTexture16(const CRect& r, BYTE* dst, int dstpitch, const
FOREACH_BLOCK_START(16, 8, 32)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ReadBlock16<true>(BlockPtr16(x, y, bp, bw), (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
@ -1570,7 +1645,7 @@ void GSLocalMemory::ReadTexture16S(const CRect& r, BYTE* dst, int dstpitch, cons
FOREACH_BLOCK_START(16, 8, 32)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ReadBlock16<true>(BlockPtr16S(x, y, bp, bw), (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
@ -1583,7 +1658,7 @@ void GSLocalMemory::ReadTexture8(const CRect& r, BYTE* dst, int dstpitch, const
FOREACH_BLOCK_START(16, 16, 32)
{
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch, pal);
ReadAndExpandBlock8_32(BlockPtr8(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1594,7 +1669,7 @@ void GSLocalMemory::ReadTexture4(const CRect& r, BYTE* dst, int dstpitch, const
FOREACH_BLOCK_START(32, 16, 32)
{
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch, pal);
ReadAndExpandBlock4_32(BlockPtr4(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1605,7 +1680,7 @@ void GSLocalMemory::ReadTexture8H(const CRect& r, BYTE* dst, int dstpitch, const
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
ReadAndExpandBlock8H_32(BlockPtr32(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1616,7 +1691,7 @@ void GSLocalMemory::ReadTexture4HL(const CRect& r, BYTE* dst, int dstpitch, cons
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
ReadAndExpandBlock4HL_32(BlockPtr32(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1627,7 +1702,7 @@ void GSLocalMemory::ReadTexture4HH(const CRect& r, BYTE* dst, int dstpitch, cons
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
ReadAndExpandBlock4HH_32(BlockPtr32(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1636,7 +1711,7 @@ void GSLocalMemory::ReadTexture32Z(const CRect& r, BYTE* dst, int dstpitch, cons
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch);
ReadBlock32<true>(BlockPtr32Z(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -1647,7 +1722,7 @@ void GSLocalMemory::ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, cons
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch, TEXA);
ReadAndExpandBlock24<true>(BlockPtr32Z(x, y, bp, bw), dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@ -1655,7 +1730,7 @@ void GSLocalMemory::ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, cons
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch, TEXA);
ReadAndExpandBlock24<false>(BlockPtr32Z(x, y, bp, bw), dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@ -1667,7 +1742,7 @@ void GSLocalMemory::ReadTexture16Z(const CRect& r, BYTE* dst, int dstpitch, cons
FOREACH_BLOCK_START(16, 8, 32)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ReadBlock16<true>(BlockPtr16Z(x, y, bp, bw), (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
@ -1680,7 +1755,7 @@ void GSLocalMemory::ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, con
FOREACH_BLOCK_START(16, 8, 32)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ReadBlock16<true>(BlockPtr16SZ(x, y, bp, bw), (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
@ -1689,6 +1764,113 @@ void GSLocalMemory::ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, con
///////////////////
void GSLocalMemory::ReadTextureBlock32(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
}
void GSLocalMemory::ReadTextureBlock24(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
if(TEXA.AEM)
{
ReadAndExpandBlock24<true>(BlockPtr(bp), dst, dstpitch, TEXA);
}
else
{
ReadAndExpandBlock24<false>(BlockPtr(bp), dst, dstpitch, TEXA);
}
}
void GSLocalMemory::ReadTextureBlock16(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
__declspec(align(16)) WORD block[16 * 8];
ReadBlock16<true>(BlockPtr(bp), (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
void GSLocalMemory::ReadTextureBlock16S(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
__declspec(align(16)) WORD block[16 * 8];
ReadBlock16<true>(BlockPtr(bp), (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
void GSLocalMemory::ReadTextureBlock8(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
ReadAndExpandBlock8_32(BlockPtr(bp), dst, dstpitch, pal);
}
void GSLocalMemory::ReadTextureBlock4(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
const UINT64* pal = m_clut;
ReadAndExpandBlock4_32(BlockPtr(bp), dst, dstpitch, pal);
}
void GSLocalMemory::ReadTextureBlock8H(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
ReadAndExpandBlock8H_32(BlockPtr(bp), dst, dstpitch, pal);
}
void GSLocalMemory::ReadTextureBlock4HL(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
ReadAndExpandBlock4HL_32(BlockPtr(bp), dst, dstpitch, pal);
}
void GSLocalMemory::ReadTextureBlock4HH(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
ReadAndExpandBlock4HH_32(BlockPtr(bp), dst, dstpitch, pal);
}
void GSLocalMemory::ReadTextureBlock32Z(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
}
void GSLocalMemory::ReadTextureBlock24Z(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
if(TEXA.AEM)
{
ReadAndExpandBlock24<true>(BlockPtr(bp), dst, dstpitch, TEXA);
}
else
{
ReadAndExpandBlock24<false>(BlockPtr(bp), dst, dstpitch, TEXA);
}
}
void GSLocalMemory::ReadTextureBlock16Z(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
__declspec(align(16)) WORD block[16 * 8];
ReadBlock16<true>(BlockPtr(bp), (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
void GSLocalMemory::ReadTextureBlock16SZ(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
__declspec(align(16)) WORD block[16 * 8];
ReadBlock16<true>(BlockPtr(bp), (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
///////////////////
void GSLocalMemory::ReadTexture(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP)
{
readTexture rtx = m_psm[TEX0.PSM].rtx;
@ -1731,7 +1913,7 @@ void GSLocalMemory::ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, con
{
FOREACH_BLOCK_START(16, 8, 16)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], dst, dstpitch);
ReadBlock16<true>(BlockPtr16(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -1740,7 +1922,7 @@ void GSLocalMemory::ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, co
{
FOREACH_BLOCK_START(16, 8, 16)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], dst, dstpitch);
ReadBlock16<true>(BlockPtr16S(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -1753,7 +1935,7 @@ void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, cons
{
FOREACH_BLOCK_START(16, 16, 32)
{
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch, pal);
ReadAndExpandBlock8_32(BlockPtr8(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1765,7 +1947,7 @@ void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, cons
FOREACH_BLOCK_START(16, 16, 16)
{
ReadBlock8<true>(&m_vm8[BlockAddress8(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 16);
ReadBlock8<true>(BlockPtr8(x, y, bp, bw), (BYTE*)block, sizeof(block) / 16);
ExpandBlock8_16(block, dst, dstpitch, pal);
}
@ -1781,7 +1963,7 @@ void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, cons
{
FOREACH_BLOCK_START(32, 16, 32)
{
ReadAndExpandBlock4_32(&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch, pal);
ReadAndExpandBlock4_32(BlockPtr4(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1793,7 +1975,7 @@ void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, cons
FOREACH_BLOCK_START(32, 16, 16)
{
ReadBlock4<true>(&m_vm8[BlockAddress4(x, y, bp, bw)>>1], (BYTE*)block, sizeof(block) / 16);
ReadBlock4<true>(BlockPtr4(x, y, bp, bw), (BYTE*)block, sizeof(block) / 16);
ExpandBlock4_16(block, dst, dstpitch, pal);
}
@ -1809,7 +1991,7 @@ void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, con
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock8H_32((const BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
ReadAndExpandBlock8H_32(BlockPtr32(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1821,7 +2003,7 @@ void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, con
FOREACH_BLOCK_START(8, 8, 16)
{
ReadBlock32<true>((const BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ReadBlock32<true>(BlockPtr32(x, y, bp, bw), (BYTE*)block, sizeof(block) / 8);
ExpandBlock8H_16(block, dst, dstpitch, pal);
}
@ -1837,7 +2019,7 @@ void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, co
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock4HL_32((const BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
ReadAndExpandBlock4HL_32(BlockPtr32(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1849,7 +2031,7 @@ void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, co
FOREACH_BLOCK_START(8, 8, 16)
{
ReadBlock32<true>((const BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ReadBlock32<true>(BlockPtr32(x, y, bp, bw), (BYTE*)block, sizeof(block) / 8);
ExpandBlock4HL_16(block, dst, dstpitch, pal);
}
@ -1865,7 +2047,7 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, co
{
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock4HH_32((const BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
ReadAndExpandBlock4HH_32(BlockPtr32(x, y, bp, bw), dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@ -1877,7 +2059,7 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, co
FOREACH_BLOCK_START(8, 8, 16)
{
ReadBlock32<true>((const BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ReadBlock32<true>(BlockPtr32(x, y, bp, bw), (BYTE*)block, sizeof(block) / 8);
ExpandBlock4HH_16(block, dst, dstpitch, pal);
}
@ -1889,7 +2071,7 @@ void GSLocalMemory::ReadTexture16ZNP(const CRect& r, BYTE* dst, int dstpitch, co
{
FOREACH_BLOCK_START(16, 8, 16)
{
ReadBlock16<true>((const BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], dst, dstpitch);
ReadBlock16<true>(BlockPtr16Z(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -1898,7 +2080,7 @@ void GSLocalMemory::ReadTexture16SZNP(const CRect& r, BYTE* dst, int dstpitch, c
{
FOREACH_BLOCK_START(16, 8, 16)
{
ReadBlock16<true>((const BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], dst, dstpitch);
ReadBlock16<true>(BlockPtr16SZ(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -1996,7 +2178,7 @@ void GSLocalMemory::ReadTexture8P(const CRect& r, BYTE* dst, int dstpitch, const
{
FOREACH_BLOCK_START(16, 16, 8)
{
ReadBlock8<true>(&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch);
ReadBlock8<true>(BlockPtr8(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -2005,7 +2187,7 @@ void GSLocalMemory::ReadTexture4P(const CRect& r, BYTE* dst, int dstpitch, const
{
FOREACH_BLOCK_START(32, 16, 8)
{
ReadBlock4P(&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch);
ReadBlock4P(BlockPtr4(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -2014,7 +2196,7 @@ void GSLocalMemory::ReadTexture8HP(const CRect& r, BYTE* dst, int dstpitch, cons
{
FOREACH_BLOCK_START(8, 8, 8)
{
ReadBlock8HP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
ReadBlock8HP(BlockPtr32(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -2023,7 +2205,7 @@ void GSLocalMemory::ReadTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, con
{
FOREACH_BLOCK_START(8, 8, 8)
{
ReadBlock4HLP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
ReadBlock4HLP(BlockPtr32(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
@ -2032,13 +2214,40 @@ void GSLocalMemory::ReadTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, con
{
FOREACH_BLOCK_START(8, 8, 8)
{
ReadBlock4HHP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
ReadBlock4HHP(BlockPtr32(x, y, bp, bw), dst, dstpitch);
}
FOREACH_BLOCK_END
}
//
void GSLocalMemory::ReadTextureBlock8P(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
ReadBlock8<true>(BlockPtr(bp), dst, dstpitch);
}
void GSLocalMemory::ReadTextureBlock4P(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
ReadBlock4P(BlockPtr(bp), dst, dstpitch);
}
void GSLocalMemory::ReadTextureBlock8HP(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
ReadBlock8HP(BlockPtr(bp), dst, dstpitch);
}
void GSLocalMemory::ReadTextureBlock4HLP(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
ReadBlock4HLP(BlockPtr(bp), dst, dstpitch);
}
void GSLocalMemory::ReadTextureBlock4HHP(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const
{
ReadBlock4HHP(BlockPtr(bp), dst, dstpitch);
}
//
template<typename T>
void GSLocalMemory::ReadTexture(CRect r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, readTexel rt, readTexture rtx)
{

View File

@ -44,12 +44,13 @@ public:
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, BYTE* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
typedef void (GSLocalMemory::*readTexture)(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*readTextureBlock)(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
typedef union
{
struct
{
pixelAddress pa, ba, pga, pgn;
pixelAddress pa, bn;
readPixel rp;
readPixelAddr rpa;
writePixel wp;
@ -60,11 +61,16 @@ public:
writeImage wi;
readImage ri;
readTexture rtx, rtxNP, rtxP;
DWORD bpp, pal, trbpp;
readTextureBlock rtxb, rtxbP;
WORD bpp, trbpp;
DWORD pal;
CSize bs, pgs;
int* rowOffset[8];
int* blockOffset;
};
BYTE dummy[128];
} psm_t;
static psm_t m_psm[64];
@ -110,6 +116,15 @@ protected:
static int rowOffset8[2][2048];
static int rowOffset4[2][2048];
static int blockOffset32[256];
static int blockOffset32Z[256];
static int blockOffset16[256];
static int blockOffset16S[256];
static int blockOffset16Z[256];
static int blockOffset16SZ[256];
static int blockOffset8[256];
static int blockOffset4[256];
__forceinline static DWORD Expand24To32(DWORD c, const GIFRegTEXA& TEXA)
{
return (((!TEXA.AEM | (c & 0xffffff)) ? TEXA.TA0 : 0) << 24) | (c & 0xffffff);
@ -138,180 +153,135 @@ public:
// address
static DWORD PageNumber32(int x, int y, DWORD bp, DWORD bw)
static DWORD BlockNumber32(int x, int y, DWORD bp, DWORD bw)
{
return (bp >> 5) + (y >> 5) * bw + (x >> 6);
return bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable32[(y >> 3) & 3][(x >> 3) & 7];
}
static DWORD PageNumber16(int x, int y, DWORD bp, DWORD bw)
static DWORD BlockNumber16(int x, int y, DWORD bp, DWORD bw)
{
return (bp >> 5) + (y >> 6) * bw + (x >> 6);
return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16[(y >> 3) & 7][(x >> 4) & 3];
}
static DWORD PageNumber8(int x, int y, DWORD bp, DWORD bw)
static DWORD BlockNumber16S(int x, int y, DWORD bp, DWORD bw)
{
return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16S[(y >> 3) & 7][(x >> 4) & 3];
}
static DWORD BlockNumber8(int x, int y, DWORD bp, DWORD bw)
{
ASSERT((bw & 1) == 0);
return (bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7);
return bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f) + blockTable8[(y >> 4) & 3][(x >> 4) & 7];
}
static DWORD PageNumber4(int x, int y, DWORD bp, DWORD bw)
static DWORD BlockNumber4(int x, int y, DWORD bp, DWORD bw)
{
ASSERT((bw & 1) == 0);
return (bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7);
return bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f) + blockTable4[(y >> 4) & 7][(x >> 5) & 3];
}
static DWORD PageAddress32(int x, int y, DWORD bp, DWORD bw)
static DWORD BlockNumber32Z(int x, int y, DWORD bp, DWORD bw)
{
return PageNumber32(x, y, bp, bw) << 11;
return bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable32Z[(y >> 3) & 3][(x >> 3) & 7];
}
static DWORD PageAddress16(int x, int y, DWORD bp, DWORD bw)
static DWORD BlockNumber16Z(int x, int y, DWORD bp, DWORD bw)
{
return PageNumber16(x, y, bp, bw) << 12;
return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16Z[(y >> 3) & 7][(x >> 4) & 3];
}
static DWORD PageAddress8(int x, int y, DWORD bp, DWORD bw)
static DWORD BlockNumber16SZ(int x, int y, DWORD bp, DWORD bw)
{
return PageNumber8(x, y, bp, bw) << 13;
return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3];
}
static DWORD PageAddress4(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr(DWORD bp) const
{
return PageNumber4(x, y, bp, bw) << 14;
ASSERT(bp < 16384);
return &m_vm8[bp << 8];
}
static DWORD BlockAddress32(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr32(int x, int y, DWORD bp, DWORD bw) const
{
DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable32[(y >> 3) & 3][(x >> 3) & 7];
return (page + block) << 6;
return &m_vm8[BlockNumber32(x, y, bp, bw) << 8];
}
static DWORD BlockAddress16(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr16(int x, int y, DWORD bp, DWORD bw) const
{
DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable16[(y >> 3) & 7][(x >> 4) & 3];
return (page + block) << 7;
return &m_vm8[BlockNumber16(x, y, bp, bw) << 8];
}
static DWORD BlockAddress16S(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr16S(int x, int y, DWORD bp, DWORD bw) const
{
DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable16S[(y >> 3) & 7][(x >> 4) & 3];
return (page + block) << 7;
return &m_vm8[BlockNumber16S(x, y, bp, bw) << 8];
}
static DWORD BlockAddress8(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr8(int x, int y, DWORD bp, DWORD bw) const
{
ASSERT((bw & 1) == 0);
DWORD page = bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7];
return (page + block) << 8;
return &m_vm8[BlockNumber8(x, y, bp, bw) << 8];
}
static DWORD BlockAddress4(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr4(int x, int y, DWORD bp, DWORD bw) const
{
ASSERT((bw & 1) == 0);
DWORD page = bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3];
return (page + block) << 9;
return &m_vm8[BlockNumber4(x, y, bp, bw) << 8];
}
static DWORD BlockAddress32Z(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr32Z(int x, int y, DWORD bp, DWORD bw) const
{
DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable32Z[(y >> 3) & 3][(x >> 3) & 7];
return (page + block) << 6;
return &m_vm8[BlockNumber32Z(x, y, bp, bw) << 8];
}
static DWORD BlockAddress16Z(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr16Z(int x, int y, DWORD bp, DWORD bw) const
{
DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable16Z[(y >> 3) & 7][(x >> 4) & 3];
return (page + block) << 7;
return &m_vm8[BlockNumber16Z(x, y, bp, bw) << 8];
}
static DWORD BlockAddress16SZ(int x, int y, DWORD bp, DWORD bw)
BYTE* BlockPtr16SZ(int x, int y, DWORD bp, DWORD bw) const
{
DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3];
return (page + block) << 7;
return &m_vm8[BlockNumber16SZ(x, y, bp, bw) << 8];
}
static DWORD PixelAddressOrg32(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable32[(y >> 3) & 3][(x >> 3) & 7];
DWORD word = ((page + block) << 6) + columnTable32[y & 7][x & 7];
ASSERT(word < 1024*1024);
return word;
return (BlockNumber32(x, y, bp, bw) << 6) + columnTable32[y & 7][x & 7];
}
static DWORD PixelAddressOrg16(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable16[(y >> 3) & 7][(x >> 4) & 3];
DWORD word = ((page + block) << 7) + columnTable16[y & 7][x & 15];
ASSERT(word < 1024*1024*2);
return word;
return (BlockNumber16(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15];
}
static DWORD PixelAddressOrg16S(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable16S[(y >> 3) & 7][(x >> 4) & 3];
DWORD word = ((page + block) << 7) + columnTable16[y & 7][x & 15];
ASSERT(word < 1024*1024*2);
return word;
return (BlockNumber16S(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15];
}
static DWORD PixelAddressOrg8(int x, int y, DWORD bp, DWORD bw)
{
ASSERT((bw & 1) == 0);
DWORD page = bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7];
DWORD word = ((page + block) << 8) + columnTable8[y & 15][x & 15];
ASSERT(word < 1024*1024*4);
return word;
return (BlockNumber8(x, y, bp, bw) << 8) + columnTable8[y & 15][x & 15];
}
static DWORD PixelAddressOrg4(int x, int y, DWORD bp, DWORD bw)
{
ASSERT((bw & 1) == 0);
DWORD page = bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3];
DWORD word = ((page + block) << 9) + columnTable4[y & 15][x & 31];
ASSERT(word < 1024*1024*8);
return word;
return (BlockNumber4(x, y, bp, bw) << 9) + columnTable4[y & 15][x & 31];
}
static DWORD PixelAddressOrg32Z(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable32Z[(y >> 3) & 3][(x >> 3) & 7];
DWORD word = ((page + block) << 6) + columnTable32[y & 7][x & 7];
ASSERT(word < 1024*1024);
return word;
return (BlockNumber32Z(x, y, bp, bw) << 6) + columnTable32[y & 7][x & 7];
}
static DWORD PixelAddressOrg16Z(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable16Z[(y >> 3) & 7][(x >> 4) & 3];
DWORD word = ((page + block) << 7) + columnTable16[y & 7][x & 15];
ASSERT(word < 1024*1024*2);
return word;
return (BlockNumber16Z(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15];
}
static DWORD PixelAddressOrg16SZ(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f);
DWORD block = blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3];
DWORD word = ((page + block) << 7) + columnTable16[y & 7][x & 15];
ASSERT(word < 1024*1024*2);
return word;
return (BlockNumber16SZ(x, y, bp, bw) << 7) + columnTable16[y & 7][x & 15];
}
static __forceinline DWORD PixelAddress32(int x, int y, DWORD bp, DWORD bw)
@ -952,7 +922,7 @@ public:
void ReadImageX(int& tx, int& ty, BYTE* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
//
// * => 32
void ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
@ -971,7 +941,21 @@ public:
void ReadTexture(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
void ReadTextureNC(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
// 32/16
void ReadTextureBlock32(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock24(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock16(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock16S(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock8(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock8H(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4HL(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4HH(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock32Z(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock24Z(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock16Z(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock16SZ(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
// * => 32/16
void ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
@ -986,7 +970,7 @@ public:
void ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
void ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
// 32/8
// pal ? 8 : 32
void ReadTexture8P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
@ -994,6 +978,12 @@ public:
void ReadTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock8P(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4P(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock8HP(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4HLP(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4HHP(DWORD bp, BYTE* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
//
static DWORD m_xtbl[1024], m_ytbl[1024];

View File

@ -654,28 +654,29 @@ protected:
if(fst)
{
GSVector4i uv = GSVector4i(m_vtrace.m_min.t.xyxy(m_vtrace.m_max.t)).sra32(16);
/*
int tw = context->TEX0.TW;
int th = context->TEX0.TH;
GSVector4i u = uv & GSVector4i::xffffffff().srl32(32 - tw);
GSVector4i v = uv & GSVector4i::xffffffff().srl32(32 - th);
GSVector4i u, v;
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
int mask;
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
{
int tw = context->TEX0.TW;
int th = context->TEX0.TH;
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
v = uv & GSVector4i::xffffffff().srl32(32 - th);
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
}
int mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
*/
switch(wms)
{
case CLAMP_REPEAT:
/*
if(mask & 0x000f)
{
if(vr.x < u.x) vr.x = u.x;
if(vr.z > u.z + 1) vr.z = u.z + 1;
}
*/
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
@ -691,13 +692,7 @@ protected:
switch(wmt)
{
case CLAMP_REPEAT:
/*
if(mask & 0xf000)
{
if(vr.y < v.y) vr.y = v.y;
if(vr.w > v.w + 1) vr.w = v.w + 1;
}
*/
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:

View File

@ -22,18 +22,13 @@
#include "StdAfx.h"
#include "GSTextureCacheSW.h"
// static FILE* m_log = NULL;
GSTextureCacheSW::GSTextureCacheSW(GSState* state)
: m_state(state)
{
// m_log = _tfopen(_T("c:\\log.txt"), _T("w"));
}
GSTextureCacheSW::~GSTextureCacheSW()
{
// fclose(m_log);
RemoveAll();
}
@ -43,20 +38,16 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const CAtlList<GSTexturePage*>& t2p = m_p2t[TEX0.TBP0 >> 5];
// fprintf(m_log, "lu %05x %d %d (%d) ", TEX0.TBP0, TEX0.TBW, TEX0.PSM, t2p.GetCount());
// if(r) fprintf(m_log, "(%d %d %d %d) ", r->left, r->top, r->right, r->bottom);
const CAtlMap<GSTexture*, bool>& map = m_map[TEX0.TBP0 >> 5];
GSTexture* t = NULL;
POSITION pos = t2p.GetHeadPosition();
POSITION pos = map.GetStartPosition();
while(pos)
{
GSTexture* t2 = t2p.GetNext(pos)->t;
GSTexture* t2 = map.GetNextKey(pos);
// if(t2->m_TEX0.TBP0 != TEX0.TBP0 || t2->m_TEX0.TBW != TEX0.TBW || t2->m_TEX0.PSM != TEX0.PSM || t2->m_TEX0.TW != TEX0.TW || t2->m_TEX0.TH != TEX0.TH)
if(((t2->m_TEX0.ai32[0] ^ TEX0.ai32[0]) | ((t2->m_TEX0.ai32[1] ^ TEX0.ai32[1]) & 3)) != 0)
{
@ -68,8 +59,6 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
continue;
}
// fprintf(m_log, "cache hit\n");
t = t2;
t->m_age = 0;
@ -79,8 +68,6 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
if(t == NULL)
{
// fprintf(m_log, "cache miss\n");
t = new GSTexture(m_state);
t->m_pos = m_textures.AddTail(t);
@ -91,37 +78,38 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
DWORD bp = TEX0.TBP0;
DWORD bw = TEX0.TBW;
for(int j = 0, y = 0; y < th; j++, y += psm.pgs.cy)
CSize s = (bp & 31) == 0 ? psm.pgs : psm.bs;
for(int y = 0; y < th; y += s.cy)
{
DWORD page = psm.pgn(0, y, bp, bw);
DWORD base = psm.bn(0, y, bp, bw);
for(int i = 0, x = 0; x < tw && page < MAX_PAGES; i++, x += psm.pgs.cx, page++)
for(int x = 0; x < tw; x += s.cx)
{
GSTexturePage* p = new GSTexturePage();
p->t = t;
p->row = j;
p->col = i;
DWORD page = (base + psm.blockOffset[x >> 3]) >> 5;
GSTexturePageEntry* p2te = new GSTexturePageEntry();
if(page >= MAX_PAGES)
{
continue;
}
p2te->p2t = &m_p2t[page];
p2te->pos = m_p2t[page].AddHead(p);
t->m_p2te.AddTail(p2te);
t->m_maxpages++;
m_map[page].SetAt(t, true);
}
}
}
if(!t->Update(TEX0, TEXA, r))
{
printf("!@#$%\n"); // memory allocation may fail if the game is too hungry
m_textures.RemoveAt(t->m_pos);
delete t;
for(int i = 0; i < MAX_PAGES; i++)
{
m_map[i].RemoveKey(t);
}
printf("!@#$%\n"); // memory allocation may fail if the game is too hungry
delete t;
return NULL;
}
@ -142,18 +130,7 @@ void GSTextureCacheSW::RemoveAll()
for(int i = 0; i < MAX_PAGES; i++)
{
CAtlList<GSTexturePage*>& t2p = m_p2t[i];
ASSERT(t2p.IsEmpty());
POSITION pos = t2p.GetHeadPosition();
while(pos)
{
delete t2p.GetNext(pos);
}
t2p.RemoveAll();
m_map[i].RemoveAll();
}
}
@ -167,61 +144,60 @@ void GSTextureCacheSW::IncAge()
GSTexture* t = m_textures.GetNext(pos);
if(++t->m_age > 3)
if(++t->m_age > 30)
{
m_textures.RemoveAt(cur);
for(int i = 0; i < MAX_PAGES; i++)
{
m_map[i].RemoveKey(t);
}
delete t;
}
}
}
void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r)
void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& rect)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[BITBLTBUF.DPSM];
CRect r2;
r2.left = r.left & ~(psm.pgs.cx - 1);
r2.top = r.top & ~(psm.pgs.cy - 1);
r2.right = (r.right + (psm.pgs.cx - 1)) & ~(psm.pgs.cx - 1);
r2.bottom = (r.bottom + (psm.pgs.cy - 1)) & ~(psm.pgs.cy - 1);
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
// fprintf(m_log, "ivm %05x %d %d (%d %d %d %d)\n", bp, bw, BITBLTBUF.DPSM, r2.left, r2.top, r2.right, r2.bottom);
CSize s = (bp & 31) == 0 ? psm.pgs : psm.bs;
for(int y = r2.top; y < r2.bottom; y += psm.pgs.cy)
CRect r;
r.left = rect.left & ~(s.cx - 1);
r.top = rect.top & ~(s.cy - 1);
r.right = (rect.right + (s.cx - 1)) & ~(s.cx - 1);
r.bottom = (rect.bottom + (s.cy - 1)) & ~(s.cy - 1);
for(int y = r.top; y < r.bottom; y += s.cy)
{
DWORD page = psm.pgn(r2.left, y, bp, bw);
DWORD base = psm.bn(0, y, bp, bw);
for(int x = r2.left; x < r2.right && page < MAX_PAGES; x += psm.pgs.cx, page++)
for(int x = r.left; x < r.right; x += s.cx)
{
const CAtlList<GSTexturePage*>& t2p = m_p2t[page];
DWORD page = (base + psm.blockOffset[x >> 3]) >> 5;
POSITION pos = t2p.GetHeadPosition();
if(page >= MAX_PAGES)
{
continue;
}
const CAtlMap<GSTexture*, bool>& map = m_map[page];
POSITION pos = map.GetStartPosition();
while(pos)
{
GSTexturePage* p = t2p.GetNext(pos);
GSTexture* t = map.GetNextKey(pos);
DWORD flag = 1 << p->col;
t->m_valid[page] = 0;
if((p->t->m_valid[p->row] & flag) == 0)
{
continue;
}
if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, p->t->m_TEX0.PSM))
{
p->t->m_valid[p->row] &= ~flag;
p->t->m_pages--;
// fprintf(m_log, "ivm hit %05x %d %d (%d %d) (%d)", p->t->m_TEX0.TBP0, p->t->m_TEX0.TBW, p->t->m_TEX0.PSM, p->row, p->col, p->t->m_pages);
// if(p->t->m_pages == 0) fprintf(m_log, " *");
// fprintf(m_log, "\n");
}
t->m_complete = false;
}
}
}
@ -233,10 +209,9 @@ GSTextureCacheSW::GSTexture::GSTexture(GSState* state)
: m_state(state)
, m_buff(NULL)
, m_tw(0)
, m_maxpages(0)
, m_pages(0)
, m_pos(NULL)
, m_age(0)
, m_pos(NULL)
, m_complete(false)
{
memset(m_valid, 0, sizeof(m_valid));
}
@ -247,30 +222,11 @@ GSTextureCacheSW::GSTexture::~GSTexture()
{
_aligned_free(m_buff);
}
POSITION pos = m_p2te.GetHeadPosition();
while(pos)
{
GSTexturePageEntry* p2te = m_p2te.GetNext(pos);
GSTexturePage* p = p2te->p2t->GetAt(p2te->pos);
ASSERT(p->t == this);
delete p;
p2te->p2t->RemoveAt(p2te->pos);
delete p2te;
}
m_p2te.RemoveAll();
}
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r)
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* rect)
{
if(m_pages == m_maxpages)
if(m_complete)
{
return true;
}
@ -282,16 +238,16 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
DWORD bp = TEX0.TBP0;
DWORD bw = TEX0.TBW;
if(tw < psm.bs.cx) tw = psm.bs.cx;
if(th < psm.bs.cy) th = psm.bs.cy;
CSize s = psm.bs;
int tw = max(1 << TEX0.TW, s.cx);
int th = max(1 << TEX0.TH, s.cy);
if(m_buff == NULL)
{
// fprintf(m_log, "up new (%d %d)\n", tw, th);
m_buff = _aligned_malloc(tw * th * sizeof(DWORD), 16);
if(m_buff == NULL)
@ -302,75 +258,84 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
}
CRect r2;
CRect r(0, 0, tw, th);
if(r)
if(rect)
{
r2.left = r->left & ~(psm.pgs.cx - 1);
r2.top = r->top & ~(psm.pgs.cy - 1);
r2.right = (r->right + (psm.pgs.cx - 1)) & ~(psm.pgs.cx - 1);
r2.bottom = (r->bottom + (psm.pgs.cy - 1)) & ~(psm.pgs.cy - 1);
r.left = rect->left & ~(s.cx - 1);
r.top = rect->top & ~(s.cy - 1);
r.right = (rect->right + (s.cx - 1)) & ~(s.cx - 1);
r.bottom = (rect->bottom + (s.cy - 1)) & ~(s.cy - 1);
}
// TODO
if(r.left == 0 && r.top == 0 && r.right == tw && r.bottom == th)
{
m_complete = true; // lame, but better than nothing
}
GSLocalMemory::readTexture rt = psm.pal > 0 ? psm.rtxP : psm.rtx;
GSLocalMemory::readTextureBlock rtxb = psm.rtxbP;
int bytes = psm.pal > 0 ? 1 : 4;
BYTE* dst = (BYTE*)m_buff;
DWORD pitch = (1 << m_tw) * bytes;
DWORD mask = pitch - 1;
for(int j = 0, y = 0; y < th; j++, y += psm.pgs.cy, dst += pitch * psm.pgs.cy)
BYTE* dst = (BYTE*)m_buff + pitch * r.top;
DWORD blocks = 0;
for(int y = r.top, o = pitch * s.cy; y < r.bottom; y += s.cy, dst += o)
{
if(m_valid[j] == mask)
DWORD base = psm.bn(0, y, bp, bw);
for(int x = r.left; x < r.right; x += s.cx)
{
continue;
}
DWORD block = base + psm.blockOffset[x >> 3];
if(r)
{
if(y < r2.top) continue;
if(y >= r2.bottom) break;
}
DWORD page = psm.pgn(0, y, TEX0.TBP0, TEX0.TBW);
for(int i = 0, x = 0; x < tw && page < MAX_PAGES; i++, x += psm.pgs.cx, page++)
{
if(r)
{
if(x < r2.left) continue;
if(x >= r2.right) break;
}
DWORD flag = 1 << i;
if(m_valid[j] & flag)
if(block >= MAX_BLOCKS)
{
continue;
}
m_valid[j] |= flag;
m_pages++;
DWORD row = block >> 5;
DWORD col = 1 << (block & 31);
ASSERT(m_pages <= m_maxpages);
if(m_valid[row] & col)
{
continue;
}
CRect r;
r.left = x;
r.top = y;
r.right = min(x + psm.pgs.cx, tw);
r.bottom = min(y + psm.pgs.cy, th);
// unfortunatelly a block may be part of the same texture multiple times at different places (when (1 << tw) > (tbw << 6), ex. 1024 > 640),
// so just can't set the block's bit to valid in one pass, even if 99.9% of the games don't address the repeated part at the right side
// fprintf(m_log, "up fetch (%d %d) (%d %d %d %d)\n", j, i, r.left, r.top, r.right, r.bottom);
// m_valid[row] |= col;
(mem.*rt)(r, &dst[x * bytes], pitch, TEX0, TEXA);
(mem.*rtxb)(block, &dst[x * bytes], pitch, TEXA);
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * bytes);
blocks++;
}
}
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, s.cx * s.cy * bytes * blocks);
for(int y = r.top; y < r.bottom; y += s.cy)
{
DWORD base = psm.bn(0, y, bp, bw);
for(int x = r.left; x < r.right; x += s.cx)
{
DWORD block = base + psm.blockOffset[x >> 3];
if(block >= MAX_BLOCKS)
{
continue;
}
DWORD row = block >> 5;
DWORD col = 1 << (block & 31);
m_valid[row] |= col;
}
}
return true;
}
}

View File

@ -24,27 +24,11 @@
#include "GSRenderer.h"
#define MAX_PAGES 512
#define MAX_BLOCKS 16384
class GSTextureCacheSW
{
public:
class GSTexture;
class GSTexturePage;
class GSTexturePage
{
public:
GSTexture* t;
DWORD row, col;
};
class GSTexturePageEntry
{
public:
CAtlList<GSTexturePage*>* p2t;
POSITION pos;
};
class GSTexture
{
public:
@ -53,12 +37,10 @@ public:
GIFRegTEXA m_TEXA;
void* m_buff;
DWORD m_tw;
DWORD m_valid[32];
DWORD m_maxpages;
DWORD m_pages;
CAtlList<GSTexturePageEntry*> m_p2te;
POSITION m_pos;
DWORD m_valid[MAX_PAGES]; // each DWORD bits map to the 32 blocks of that page
DWORD m_age;
POSITION m_pos;
bool m_complete;
explicit GSTexture(GSState* state);
virtual ~GSTexture();
@ -69,7 +51,7 @@ public:
protected:
GSState* m_state;
CAtlList<GSTexture*> m_textures;
CAtlList<GSTexturePage*> m_p2t[MAX_PAGES];
CAtlMap<GSTexture*, bool> m_map[MAX_PAGES];
public:
GSTextureCacheSW(GSState* state);