zzogl-pg: Pull a check out of a for loop in Mem.cpp. Break CheckChangeInClut in two. Turn a bit of repeated code in ZeroGS::CRangeManager::Insert into a function. Work on creating a new tex0 struct.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2860 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-04-15 22:03:23 +00:00
parent a79f87439f
commit a91d499f17
4 changed files with 275 additions and 149 deletions

View File

@ -415,6 +415,66 @@ typedef struct {
u8 cld;
} tex0Info;
union tex_0_info
{
struct
{
u64 tbp0 : 14;
u64 tbw : 6;
u64 psm : 6;
u64 tw : 4;
u64 th : 4;
u64 tcc : 1;
u64 tfx : 2;
u64 cbp : 14;
u64 cpsm : 4;
u64 csm : 1;
u64 csa : 5;
u64 cld : 3;
};
u64 _u64;
u32 _u32[2];
u16 _u16[4];
u8 _u8[8];
tex_0_info(u64 data) { _u64 = data; }
tex_0_info(u32 data) { _u32[0] = data; _u32[1] = 0; }
tex_0_info(u32 data0, u32 data1) { _u32[0] = data0; _u32[1] = data1; }
u32 tbw_mult()
{
if (tbw == 0)
return 64;
else
return (tbw << 6);
}
u32 psm_fix()
{
// printf ("psm %d\n", psm);
if ( psm == 9 ) return 1;
return psm;
}
u32 tw_exp()
{
if (tw > 10) return (1<<10);
return (1<<tw);
}
u32 th_exp()
{
if (th > 10) return (1<<10);
return (1<<th);
}
u32 cpsm_fix()
{
return cpsm & 0xe;
}
u32 csa_fix()
{
if (cpsm < 2)
return (csa & 0xf);
else
return (csa & 0x1f);
}
};
#define TEX_MODULATE 0
#define TEX_DECAL 1
#define TEX_HIGHLIGHT 2
@ -607,21 +667,24 @@ inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
// Tex0Info (TEXD_x registers) bits, lower word
// The register is really 64-bit, but we use 2 32bit ones to represent it
// Obtain tbp0 -- Texture Buffer Base Pointer (Word Address/64) -- from data. Bits 0-13.
inline int
ZZOglGet_tbp0_TexBits(u32 data) {
static __forceinline int ZZOglGet_tbp0_TexBits(u32 data)
{
//return tex_0_info(data).tbp0;
return (data ) & 0x3fff;
}
// Obtain tbw -- Texture Buffer Width (Texels/64) -- from data, do not multiply to 64. Bits 14-19
// ( data & 0xfc000 ) >> 14
inline int
ZZOglGet_tbw_TexBits(u32 data) {
static __forceinline int ZZOglGet_tbw_TexBits(u32 data)
{
//return tex_0_info(data).tbw;
return (data >> 14) & 0x3f;
}
// Obtain tbw -- Texture Buffer Width (Texels) -- from data, do multiply to 64, never return 0.
inline int
ZZOglGet_tbw_TexBitsMult(u32 data) {
static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
{
//return text_0_info(data).tbw_mult();
int result = ZZOglGet_tbw_TexBits(data);
if (result == 0)
return 64;
@ -631,94 +694,106 @@ ZZOglGet_tbw_TexBitsMult(u32 data) {
// Obtain psm -- Pixel Storage Format -- from data. Bits 20-25.
// (data & 0x3f00000) >> 20
inline int
ZZOglGet_psm_TexBits(u32 data) {
static __forceinline int ZZOglGet_psm_TexBits(u32 data)
{
//return tex_0_info(data).psm;
return ((data >> 20) & 0x3f);
}
// Obtain psm -- Pixel Storage Format -- from data. Bits 20-25. Fix incorrect psm == 9
inline int
ZZOglGet_psm_TexBitsFix(u32 data) {
static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
{
//return tex_0_info(data).psm_fix();
int result = ZZOglGet_psm_TexBits(data) ;
// printf ("result %d\n", result);
if ( result == 9 )
result = 1;
if ( result == 9 ) result = 1;
return result;
}
// Obtain tw -- Texture Width (Width = 2^TW) -- from data. Bits 26-29
// (data & 0x3c000000)>>26
inline u16
ZZOglGet_tw_TexBits(u32 data) {
static __forceinline u16 ZZOglGet_tw_TexBits(u32 data)
{
//return tex_0_info(data).tw;
return ((data >> 26) & 0xf);
}
// Obtain tw -- Texture Width (Width = TW) -- from data. Width could newer be more than 1024.
inline u16
ZZOglGet_tw_TexBitsExp(u32 data) {
static __forceinline u16 ZZOglGet_tw_TexBitsExp(u32 data)
{
//return tex_0_info(data).tw_exp();
u16 result = ZZOglGet_tw_TexBits(data);
if (result > 10)
result = 10;
if (result > 10) result = 10;
return (1<<result);
}
// TH set at the border of upper and higher words.
// Obtain th -- Texture Height (Height = 2^TH) -- from data. Bits 30-31 lower, 0-1 higher
// (dataLO & 0xc0000000) >> 30 + (dataHI & 0x3) * 0x4
inline u16
ZZOglGet_th_TexBits(u32 dataLO, u32 dataHI) {
static __forceinline u16 ZZOglGet_th_TexBits(u32 dataLO, u32 dataHI)
{
//return tex_0_info(dataLO, dataHI).th;
return (((dataLO >> 30) & 0x3) | ((dataHI & 0x3) << 2));
}
// Obtain th --Texture Height (Height = 2^TH) -- from data. Height could newer be more than 1024.
inline u16
ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI) {
static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
{
//return tex_0_info(dataLO, dataHI).th_exp();
u16 result = ZZOglGet_th_TexBits(dataLO, dataHI);
if (result > 10)
result = 10;
if (result > 10) result = 10;
return (1<<result);
}
// Tex0Info bits, higher word.
// Obtain tcc -- Tecture Color Component 0=RGB, 1=RGBA + use Alpha from TEXA reg when not in PSM -- from data. Bit 3
// Obtain tcc -- Texture Color Component 0=RGB, 1=RGBA + use Alpha from TEXA reg when not in PSM -- from data. Bit 3
// (data & 0x4)>>2
inline u8
ZZOglGet_tcc_TexBits(u32 data) {
static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data)
{
//return tex_0_info(0, data).tcc;
return ((data >> 2) & 0x1);
}
// Obtain tfx -- Texture Function (0=modulate, 1=decal, 2=hilight, 3=hilight2) -- from data. Bit 4-5
// (data & 0x18)>>3
inline u8
ZZOglGet_tfx_TexBits(u32 data) {
static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data)
{
//return tex_0_info(0, data).tfx;
return ((data >> 3) & 0x3);
}
// Obtain cbp from data -- Clut Buffer Base Pointer (Address/256) -- Bits 5-18
// (data & 0x7ffe0)>>5
inline int
ZZOglGet_cbp_TexBits(u32 data) {
static __forceinline int ZZOglGet_cbp_TexBits(u32 data)
{
//return tex_0_info(0, data).cbp;
return ((data >> 5) & 0x3fff);
}
// Obtain cpsm from data -- Clut pixel Storage Format -- Bits 19-22. 22nd is at no use.
// (data & 0x700000)>>19
inline u8
ZZOglGet_cpsm_TexBits(u32 data) {
// 0000 - psmct32; 0010 - psmct16; 1010 - psmct16s.
static __forceinline u8 ZZOglGet_cpsm_TexBits(u32 data)
{
//return (tex_0_info(0, data).cpsm & 0xe);
return ((data >> 19) & 0xe);
}
// Obtain csm -- I don't know what is it -- from data. Bit 23
// (data & 0x800000)>>23
inline u8
ZZOglGet_csm_TexBits(u32 data) {
// csm is the clut storage mode. 0 for CSM1, 1 for CSM2.
static __forceinline u8 ZZOglGet_csm_TexBits(u32 data)
{
//return tex_0_info(0, data).csm;
return ((data >> 23) & 0x1);
}
// Obtain csa -- -- from data. Bits 24-28
// (data & 0x1f000000)>>24
inline u8
ZZOglGet_csa_TexBits(u32 data) {
static __forceinline u8 ZZOglGet_csa_TexBits(u32 data)
{
//return tex_0_info(0, data).csa_fix();
if ((data & 0x700000) == 0 ) // it is cpsm < 2 check
return ((data >> 24) & 0xf);
else
@ -727,8 +802,9 @@ ZZOglGet_csa_TexBits(u32 data) {
// Obtain cld -- -- from data. Bits 29-31
// (data & 0xe0000000)>>29
inline u8
ZZOglGet_cld_TexBits(u32 data) {
static __forceinline u8 ZZOglGet_cld_TexBits(u32 data)
{
//return tex_0_info(0, data).cld;
return ((data >> 29) & 0x7);
}

View File

@ -64,6 +64,7 @@ static __forceinline const T* AlignOnBlockBoundry(TransferData data, TransferFun
{
/* don't transfer */
/*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/
//ERROR_LOG("bad texture: testwidth = %d; data.widthlimit = %d\n", testwidth, data.widthlimit);
gs.imageTransfer = -1;
}
bCanAlign = false;
@ -111,14 +112,14 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
/* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */
bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, data.transfersize) & 0xf) == 0;
/* transfer aligning to blocks */
for(; tempY < alignedPt.y && nSize >= area; tempY += data.blockheight, nSize -= area)
{
if ( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL)))
swizzle = (fun.Swizzle);
else
swizzle = (fun.Swizzle_u);
//Transfer aligning to blocks.
for(; tempY < alignedPt.y && nSize >= area; tempY += data.blockheight, nSize -= area)
{
for(int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += data.blockwidth, pbuf += TransPitch(data.blockwidth, data.transfersize)/TSize)
{
u8 *temp = pstart + fun.gp(tempj, tempY, gs.dstbuf.bw) * data.blockbits/8;
@ -126,7 +127,7 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
}
/* transfer the rest */
if( alignedPt.x < gs.imageEndX )
if (alignedPt.x < gs.imageEndX)
{
pbuf = TransmitHostLocalX<T>(data, fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf);
if (pbuf == NULL) return NULL;

View File

@ -1576,7 +1576,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T*
switch(psm)
{
case PSMT8:
for(int i = 0; i < height; ++i)
for(u32 i = 0; i < height; ++i)
{
for(int j = 0; j < GPU_TEXWIDTH/2; ++j)
{
@ -1595,7 +1595,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T*
break;
case PSMT4:
for(int i = 0; i < height; ++i)
for(u32 i = 0; i < height; ++i)
{
for(int j = 0; j < GPU_TEXWIDTH; ++j)
{
@ -1615,7 +1615,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T*
break;
case PSMT8H:
for(int i = 0; i < height; ++i)
for(u32 i = 0; i < height; ++i)
{
for(int j = 0; j < GPU_TEXWIDTH/8; ++j)
{
@ -1634,7 +1634,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T*
break;
case PSMT4HH:
for(int i = 0; i < height; ++i)
for(u32 i = 0; i < height; ++i)
{
for(int j = 0; j < GPU_TEXWIDTH/8; ++j)
{
@ -1653,7 +1653,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T*
break;
case PSMT4HL:
for(int i = 0; i < height; ++i)
for(u32 i = 0; i < height; ++i)
{
for(int j = 0; j < GPU_TEXWIDTH/8; ++j)
{
@ -1883,11 +1883,11 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
ZeroGS::CMemoryTarget* it = MemoryTarget_SearchExistTarget (start, end, nClutOffset, clutsize, tex0, forcevalidate);
if (it != NULL) return it;
// couldn't find so create
// couldn't find, so create.
CMemoryTarget* targ;
u32 fmt = GL_UNSIGNED_BYTE;
// if ((PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1) || tex0.psm == PSMCT16 || tex0.psm == PSMCT16S) {
if (PSMT_ISHALF_STORAGE(tex0))
{
fmt = GL_UNSIGNED_SHORT_1_5_5_5_REV;
@ -2299,141 +2299,160 @@ u32 ZeroGS::CBitwiseTextureMngr::GetTexInt(u32 bitvalue, u32 ptexDoNotDelete)
return ptex;
}
static __forceinline void RangeSanityCheck()
{
#ifdef _DEBUG
// sanity check
for(int i = 0; i < (int)ranges.size()-1; ++i)
{
assert( ranges[i].end < ranges[i+1].start );
}
#endif
}
void ZeroGS::CRangeManager::Insert(int start, int end)
{
FUNCLOG
int imin = 0, imax = (int)ranges.size(), imid;
#ifdef _DEBUG
// sanity check
for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start );
#endif
RangeSanityCheck();
switch( ranges.size() ) {
switch(ranges.size())
{
case 0:
ranges.push_back(RANGE(start, end));
return;
case 1:
if( end < ranges.front().start ) {
if (end < ranges.front().start)
{
ranges.insert(ranges.begin(), RANGE(start, end));
}
else if( start > ranges.front().end ) {
else if (start > ranges.front().end)
{
ranges.push_back(RANGE(start, end));
}
else {
if( start < ranges.front().start ) ranges.front().start = start;
if( end > ranges.front().end ) ranges.front().end = end;
else
{
if (start < ranges.front().start) ranges.front().start = start;
if (end > ranges.front().end) ranges.front().end = end;
}
return;
}
// find where start is
while(imin < imax) {
while(imin < imax)
{
imid = (imin+imax)>>1;
assert( imid < (int)ranges.size() );
if( ranges[imid].end >= start && (imid == 0 || ranges[imid-1].end < start) ) {
if ((ranges[imid].end >= start) && ((imid == 0) || (ranges[imid-1].end < start)))
{
imin = imid;
break;
}
else if( ranges[imid].start > start ) imax = imid;
else imin = imid+1;
else if (ranges[imid].start > start)
{
imax = imid;
}
else
{
imin = imid + 1;
}
}
int startindex = imin;
if( startindex >= (int)ranges.size() ) {
if (startindex >= (int)ranges.size())
{
// non intersecting
assert( start > ranges.back().end );
ranges.push_back(RANGE(start, end));
return;
}
if( startindex == 0 && end < ranges.front().start ) {
if (startindex == 0 && end < ranges.front().start)
{
ranges.insert(ranges.begin(), RANGE(start, end));
#ifdef _DEBUG
// sanity check
for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start );
#endif
RangeSanityCheck();
return;
}
imin = 0; imax = (int)ranges.size();
imin = 0;
imax = (int)ranges.size();
// find where end is
while(imin < imax) {
imid = (imin+imax)>>1;
while(imin < imax)
{
imid = (imin + imax) >> 1;
assert( imid < (int)ranges.size() );
assert(imid < (int)ranges.size());
if( ranges[imid].end <= end && (imid == ranges.size()-1 || ranges[imid+1].start > end ) ) {
if ((ranges[imid].end <= end) && ((imid == ranges.size() - 1) || (ranges[imid+1].start > end)))
{
imin = imid;
break;
}
else if( ranges[imid].start >= end ) imax = imid;
else imin = imid+1;
else if (ranges[imid].start >= end)
{
imax = imid;
}
else
{
imin = imid + 1;
}
}
int endindex = imin;
if( startindex > endindex ) {
if (startindex > endindex)
{
// create a new range
ranges.insert(ranges.begin()+startindex, RANGE(start, end));
#ifdef _DEBUG
// sanity check
for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start );
#endif
ranges.insert(ranges.begin() + startindex, RANGE(start, end));
RangeSanityCheck();
return;
}
if( endindex >= (int)ranges.size()-1 ) {
if (endindex >= (int)ranges.size() - 1)
{
// pop until startindex is reached
int lastend = ranges.back().end;
int numpop = (int)ranges.size() - startindex - 1;
while(numpop-- > 0 ) ranges.pop_back();
while(numpop-- > 0 )
{
ranges.pop_back();
}
assert( start <= ranges.back().end );
if( start < ranges.back().start ) ranges.back().start = start;
if( lastend > ranges.back().end ) ranges.back().end = lastend;
if( end > ranges.back().end ) ranges.back().end = end;
#ifdef _DEBUG
// sanity check
for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start );
#endif
if (start < ranges.back().start) ranges.back().start = start;
if (lastend > ranges.back().end) ranges.back().end = lastend;
if (end > ranges.back().end) ranges.back().end = end;
RangeSanityCheck();
return;
}
if( endindex == 0 ) {
if( endindex == 0 )
{
assert( end >= ranges.front().start );
if( start < ranges.front().start ) ranges.front().start = start;
if( end > ranges.front().end ) ranges.front().end = end;
#ifdef _DEBUG
// sanity check
for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start );
#endif
if (start < ranges.front().start) ranges.front().start = start;
if (end > ranges.front().end) ranges.front().end = end;
RangeSanityCheck();
}
// somewhere in the middle
if( ranges[startindex].start < start ) start = ranges[startindex].start;
if (ranges[startindex].start < start) start = ranges[startindex].start;
if( startindex < endindex ) {
if (startindex < endindex)
{
ranges.erase(ranges.begin() + startindex, ranges.begin() + endindex );
}
if( start < ranges[startindex].start ) ranges[startindex].start = start;
if( end > ranges[startindex].end ) ranges[startindex].end = end;
if (start < ranges[startindex].start) ranges[startindex].start = start;
if (end > ranges[startindex].end) ranges[startindex].end = end;
#ifdef _DEBUG
// sanity check
for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start );
#endif
RangeSanityCheck();
}
namespace ZeroGS {

View File

@ -820,37 +820,35 @@ void ZeroGS::ExtWrite()
// Caches //
////////////
bool ZeroGS::CheckChangeInClut(u32 highdword, u32 psm)
// case 0: return false;
// case 1: break;
// case 2: m_CBP[0] = TEX0.CBP; break;
// case 3: m_CBP[1] = TEX0.CBP; break;
// case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break;
// case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break;
// case 6: ASSERT(0); return false; // ffx2 menu
// case 7: ASSERT(0); return false;
// default: __assume(0);
bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
{
FUNCLOG
int cld = ZZOglGet_cld_TexBits(highdword);
int cbp = ZZOglGet_cbp_TexBits(highdword);
// processing the CLUT after tex0/2 are written
switch(cld) {
case 0: return false;
case 1: break; // Seems to rarely not be 1.
// note sure about changing cbp[0,1]
case 4: return gs.cbp[0] != cbp;
case 5: return gs.cbp[1] != cbp;
// default: load
default: break;
}
int cpsm = ZZOglGet_cpsm_TexBits(highdword);
int csm = ZZOglGet_csm_TexBits(highdword);
if( cpsm > 1 || csm )
if (cpsm > 1 || csm)
{
ERROR_LOG("16 bit clut not supported.\n");
// don't support 16bit for now
return true;
}
int csa = ZZOglGet_csa_TexBits(highdword);
int entries = PSMT_IS8CLUT(psm) ? 256 : 16;
u64* src = (u64*)(g_pbyGSMemory + cbp*256);
u64* dst = (u64*)(g_pbyGSClut+64*csa);
u64* src = (u64*)(g_pbyGSMemory + cbp * 256);
u64* dst = (u64*)(g_pbyGSClut + 64 * csa);
bool bRet = false;
@ -976,34 +974,66 @@ Return:
".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "b"(entries) : "eax", "memory");
#endif // _WIN32
return bRet;
}
return bRet;
// cld state:
// 000 - clut data is not loaded; data in the temp buffer is stored
// 001 - clut data is always loaded.
// 010 - clut data is always loaded; cbp0 = cbp.
// 011 - clut data is always loadedl cbp1 = cbp.
// 100 - cbp0 is compared with cbp. if different, clut data is loaded.
// 101 - cbp1 is compared with cbp. if different, clut data is loaded.
// GSdx sets cbp0 & cbp1 when checking for clut changes. ZeroGS sets them in texClutWrite.
bool ZeroGS::CheckChangeInClut(u32 highdword, u32 psm)
{
FUNCLOG
int cld = ZZOglGet_cld_TexBits(highdword);
int cbp = ZZOglGet_cbp_TexBits(highdword);
// processing the CLUT after tex0/2 are written
ERROR_LOG("high == 0x%x; cld == %d\n", highdword, cld);
switch(cld) {
case 0: return false;
case 1: break;
case 2: break;
case 3: break;
case 4: if (gs.cbp[0] == cbp) return false; break;
case 5: if (gs.cbp[1] == cbp) return false; break;
//case 4: return gs.cbp[0] != cbp;
//case 5: return gs.cbp[1] != cbp;
// default: load
default: break;
}
return IsDirty(highdword, psm, cld, cbp);
}
void ZeroGS::texClutWrite(int ctx)
{
FUNCLOG
s_bTexFlush = 0;
if( g_bIsLost )
return;
if (g_bIsLost) return;
tex0Info& tex0 = vb[ctx].tex0;
assert( PSMT_ISCLUT(tex0.psm) );
// processing the CLUT after tex0/2 are written
switch(tex0.cld) {
switch(tex0.cld)
{
case 0: return;
case 1: break; // tex0.cld is usually 1.
case 2: gs.cbp[0] = tex0.cbp; break;
case 3: gs.cbp[1] = tex0.cbp; break;
// not sure about changing cbp[0,1]
case 4:
if( gs.cbp[0] == tex0.cbp )
return;
if (gs.cbp[0] == tex0.cbp) return;
gs.cbp[0] = tex0.cbp;
break;
case 5:
if( gs.cbp[1] == tex0.cbp )
return;
if (gs.cbp[1] == tex0.cbp) return;
gs.cbp[1] = tex0.cbp;
break;
default: //DEBUG_LOG("cld isn't 0-5!");