diff --git a/plugins/zzogl-pg/opengl/GS.h b/plugins/zzogl-pg/opengl/GS.h index f02e96a629..242d7bf434 100644 --- a/plugins/zzogl-pg/opengl/GS.h +++ b/plugins/zzogl-pg/opengl/GS.h @@ -415,6 +415,66 @@ typedef struct { u8 cld; } tex0Info; +union tex_0_info +{ + struct + { + u64 tbp0 : 14; + u64 tbw : 6; + u64 psm : 6; + u64 tw : 4; + u64 th : 4; + u64 tcc : 1; + u64 tfx : 2; + u64 cbp : 14; + u64 cpsm : 4; + u64 csm : 1; + u64 csa : 5; + u64 cld : 3; + }; + u64 _u64; + u32 _u32[2]; + u16 _u16[4]; + u8 _u8[8]; + tex_0_info(u64 data) { _u64 = data; } + tex_0_info(u32 data) { _u32[0] = data; _u32[1] = 0; } + tex_0_info(u32 data0, u32 data1) { _u32[0] = data0; _u32[1] = data1; } + u32 tbw_mult() + { + if (tbw == 0) + return 64; + else + return (tbw << 6); + } + u32 psm_fix() + { + // printf ("psm %d\n", psm); + if ( psm == 9 ) return 1; + return psm; + } + u32 tw_exp() + { + if (tw > 10) return (1<<10); + return (1< 10) return (1<<10); + return (1<> 14 -inline int -ZZOglGet_tbw_TexBits(u32 data) { +static __forceinline int ZZOglGet_tbw_TexBits(u32 data) +{ + //return tex_0_info(data).tbw; return (data >> 14) & 0x3f; } // Obtain tbw -- Texture Buffer Width (Texels) -- from data, do multiply to 64, never return 0. -inline int -ZZOglGet_tbw_TexBitsMult(u32 data) { +static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data) +{ + //return text_0_info(data).tbw_mult(); int result = ZZOglGet_tbw_TexBits(data); if (result == 0) return 64; @@ -631,94 +694,106 @@ ZZOglGet_tbw_TexBitsMult(u32 data) { // Obtain psm -- Pixel Storage Format -- from data. Bits 20-25. // (data & 0x3f00000) >> 20 -inline int -ZZOglGet_psm_TexBits(u32 data) { +static __forceinline int ZZOglGet_psm_TexBits(u32 data) +{ + //return tex_0_info(data).psm; return ((data >> 20) & 0x3f); } // Obtain psm -- Pixel Storage Format -- from data. Bits 20-25. Fix incorrect psm == 9 -inline int -ZZOglGet_psm_TexBitsFix(u32 data) { +static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data) +{ + //return tex_0_info(data).psm_fix(); int result = ZZOglGet_psm_TexBits(data) ; // printf ("result %d\n", result); - if ( result == 9 ) - result = 1; + if ( result == 9 ) result = 1; return result; } // Obtain tw -- Texture Width (Width = 2^TW) -- from data. Bits 26-29 // (data & 0x3c000000)>>26 -inline u16 -ZZOglGet_tw_TexBits(u32 data) { +static __forceinline u16 ZZOglGet_tw_TexBits(u32 data) +{ + //return tex_0_info(data).tw; return ((data >> 26) & 0xf); } // Obtain tw -- Texture Width (Width = TW) -- from data. Width could newer be more than 1024. -inline u16 -ZZOglGet_tw_TexBitsExp(u32 data) { +static __forceinline u16 ZZOglGet_tw_TexBitsExp(u32 data) +{ + //return tex_0_info(data).tw_exp(); u16 result = ZZOglGet_tw_TexBits(data); - if (result > 10) - result = 10; + if (result > 10) result = 10; return (1<> 30 + (dataHI & 0x3) * 0x4 -inline u16 -ZZOglGet_th_TexBits(u32 dataLO, u32 dataHI) { +static __forceinline u16 ZZOglGet_th_TexBits(u32 dataLO, u32 dataHI) +{ + //return tex_0_info(dataLO, dataHI).th; return (((dataLO >> 30) & 0x3) | ((dataHI & 0x3) << 2)); } // Obtain th --Texture Height (Height = 2^TH) -- from data. Height could newer be more than 1024. -inline u16 -ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI) { +static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI) +{ + //return tex_0_info(dataLO, dataHI).th_exp(); u16 result = ZZOglGet_th_TexBits(dataLO, dataHI); - if (result > 10) - result = 10; + if (result > 10) result = 10; return (1<>2 -inline u8 -ZZOglGet_tcc_TexBits(u32 data) { +static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data) +{ + //return tex_0_info(0, data).tcc; return ((data >> 2) & 0x1); } // Obtain tfx -- Texture Function (0=modulate, 1=decal, 2=hilight, 3=hilight2) -- from data. Bit 4-5 // (data & 0x18)>>3 -inline u8 -ZZOglGet_tfx_TexBits(u32 data) { +static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data) +{ + //return tex_0_info(0, data).tfx; return ((data >> 3) & 0x3); } // Obtain cbp from data -- Clut Buffer Base Pointer (Address/256) -- Bits 5-18 // (data & 0x7ffe0)>>5 -inline int -ZZOglGet_cbp_TexBits(u32 data) { +static __forceinline int ZZOglGet_cbp_TexBits(u32 data) +{ + //return tex_0_info(0, data).cbp; return ((data >> 5) & 0x3fff); } // Obtain cpsm from data -- Clut pixel Storage Format -- Bits 19-22. 22nd is at no use. // (data & 0x700000)>>19 -inline u8 -ZZOglGet_cpsm_TexBits(u32 data) { +// 0000 - psmct32; 0010 - psmct16; 1010 - psmct16s. +static __forceinline u8 ZZOglGet_cpsm_TexBits(u32 data) +{ + //return (tex_0_info(0, data).cpsm & 0xe); return ((data >> 19) & 0xe); } // Obtain csm -- I don't know what is it -- from data. Bit 23 // (data & 0x800000)>>23 -inline u8 -ZZOglGet_csm_TexBits(u32 data) { +// csm is the clut storage mode. 0 for CSM1, 1 for CSM2. +static __forceinline u8 ZZOglGet_csm_TexBits(u32 data) +{ + //return tex_0_info(0, data).csm; return ((data >> 23) & 0x1); } // Obtain csa -- -- from data. Bits 24-28 // (data & 0x1f000000)>>24 -inline u8 -ZZOglGet_csa_TexBits(u32 data) { +static __forceinline u8 ZZOglGet_csa_TexBits(u32 data) +{ + //return tex_0_info(0, data).csa_fix(); + if ((data & 0x700000) == 0 ) // it is cpsm < 2 check return ((data >> 24) & 0xf); else @@ -727,8 +802,9 @@ ZZOglGet_csa_TexBits(u32 data) { // Obtain cld -- -- from data. Bits 29-31 // (data & 0xe0000000)>>29 -inline u8 -ZZOglGet_cld_TexBits(u32 data) { +static __forceinline u8 ZZOglGet_cld_TexBits(u32 data) +{ + //return tex_0_info(0, data).cld; return ((data >> 29) & 0x7); } diff --git a/plugins/zzogl-pg/opengl/Mem.cpp b/plugins/zzogl-pg/opengl/Mem.cpp index 0dd2d81df9..55ad989da5 100644 --- a/plugins/zzogl-pg/opengl/Mem.cpp +++ b/plugins/zzogl-pg/opengl/Mem.cpp @@ -52,7 +52,7 @@ static __forceinline const T* AlignOnBlockBoundry(TransferData data, TransferFun { bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) && (alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx)); - + if ((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) { /* hack */ @@ -64,6 +64,7 @@ static __forceinline const T* AlignOnBlockBoundry(TransferData data, TransferFun { /* don't transfer */ /*DEBUG_LOG("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/ + //ERROR_LOG("bad texture: testwidth = %d; data.widthlimit = %d\n", testwidth, data.widthlimit); gs.imageTransfer = -1; } bCanAlign = false; @@ -111,14 +112,14 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */ bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, data.transfersize) & 0xf) == 0; - /* transfer aligning to blocks */ + if ( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL))) + swizzle = (fun.Swizzle); + else + swizzle = (fun.Swizzle_u); + + //Transfer aligning to blocks. for(; tempY < alignedPt.y && nSize >= area; tempY += data.blockheight, nSize -= area) { - if ( bAligned || ((DSTPSM==PSMCT24) || (DSTPSM==PSMT8H) || (DSTPSM==PSMT4HH) || (DSTPSM==PSMT4HL))) - swizzle = (fun.Swizzle); - else - swizzle = (fun.Swizzle_u); - for(int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += data.blockwidth, pbuf += TransPitch(data.blockwidth, data.transfersize)/TSize) { u8 *temp = pstart + fun.gp(tempj, tempY, gs.dstbuf.bw) * data.blockbits/8; @@ -126,7 +127,7 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf } /* transfer the rest */ - if( alignedPt.x < gs.imageEndX ) + if (alignedPt.x < gs.imageEndX) { pbuf = TransmitHostLocalX(data, fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf); if (pbuf == NULL) return NULL; @@ -139,7 +140,7 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf } tempX = gs.trxpos.dx; - } + } return pbuf; } @@ -151,7 +152,7 @@ static __forceinline int FinishTransfer(TransferData data, int nLeftOver) gs.imageTransfer = -1; /*int start, end; ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw); - ZeroGS::g_MemTargs.ClearRange(start, end);*/ + ZeroGS::g_MemTargs.ClearRange(start, end);*/ } else { diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 53b5f26d0b..06acef34d8 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -1576,7 +1576,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* switch(psm) { case PSMT8: - for(int i = 0; i < height; ++i) + for(u32 i = 0; i < height; ++i) { for(int j = 0; j < GPU_TEXWIDTH/2; ++j) { @@ -1595,7 +1595,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* break; case PSMT4: - for(int i = 0; i < height; ++i) + for(u32 i = 0; i < height; ++i) { for(int j = 0; j < GPU_TEXWIDTH; ++j) { @@ -1615,7 +1615,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* break; case PSMT8H: - for(int i = 0; i < height; ++i) + for(u32 i = 0; i < height; ++i) { for(int j = 0; j < GPU_TEXWIDTH/8; ++j) { @@ -1634,7 +1634,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* break; case PSMT4HH: - for(int i = 0; i < height; ++i) + for(u32 i = 0; i < height; ++i) { for(int j = 0; j < GPU_TEXWIDTH/8; ++j) { @@ -1653,7 +1653,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* break; case PSMT4HL: - for(int i = 0; i < height; ++i) + for(u32 i = 0; i < height; ++i) { for(int j = 0; j < GPU_TEXWIDTH/8; ++j) { @@ -1883,11 +1883,11 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info ZeroGS::CMemoryTarget* it = MemoryTarget_SearchExistTarget (start, end, nClutOffset, clutsize, tex0, forcevalidate); if (it != NULL) return it; - // couldn't find so create + // couldn't find, so create. CMemoryTarget* targ; u32 fmt = GL_UNSIGNED_BYTE; -// if ((PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1) || tex0.psm == PSMCT16 || tex0.psm == PSMCT16S) { + if (PSMT_ISHALF_STORAGE(tex0)) { fmt = GL_UNSIGNED_SHORT_1_5_5_5_REV; @@ -2299,141 +2299,160 @@ u32 ZeroGS::CBitwiseTextureMngr::GetTexInt(u32 bitvalue, u32 ptexDoNotDelete) return ptex; } +static __forceinline void RangeSanityCheck() +{ +#ifdef _DEBUG + // sanity check + for(int i = 0; i < (int)ranges.size()-1; ++i) + { + assert( ranges[i].end < ranges[i+1].start ); + } +#endif +} + void ZeroGS::CRangeManager::Insert(int start, int end) { FUNCLOG int imin = 0, imax = (int)ranges.size(), imid; -#ifdef _DEBUG - // sanity check - for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start ); -#endif - - switch( ranges.size() ) { + RangeSanityCheck(); + + switch(ranges.size()) + { case 0: ranges.push_back(RANGE(start, end)); return; case 1: - if( end < ranges.front().start ) { + if (end < ranges.front().start) + { ranges.insert(ranges.begin(), RANGE(start, end)); } - else if( start > ranges.front().end ) { + else if (start > ranges.front().end) + { ranges.push_back(RANGE(start, end)); } - else { - if( start < ranges.front().start ) ranges.front().start = start; - if( end > ranges.front().end ) ranges.front().end = end; + else + { + if (start < ranges.front().start) ranges.front().start = start; + if (end > ranges.front().end) ranges.front().end = end; } return; } // find where start is - while(imin < imax) { + while(imin < imax) + { imid = (imin+imax)>>1; assert( imid < (int)ranges.size() ); - if( ranges[imid].end >= start && (imid == 0 || ranges[imid-1].end < start) ) { + if ((ranges[imid].end >= start) && ((imid == 0) || (ranges[imid-1].end < start))) + { imin = imid; break; } - else if( ranges[imid].start > start ) imax = imid; - else imin = imid+1; + else if (ranges[imid].start > start) + { + imax = imid; + } + else + { + imin = imid + 1; + } } int startindex = imin; - if( startindex >= (int)ranges.size() ) { + if (startindex >= (int)ranges.size()) + { // non intersecting assert( start > ranges.back().end ); ranges.push_back(RANGE(start, end)); return; } - if( startindex == 0 && end < ranges.front().start ) { + if (startindex == 0 && end < ranges.front().start) + { ranges.insert(ranges.begin(), RANGE(start, end)); - -#ifdef _DEBUG - // sanity check - for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start ); -#endif + RangeSanityCheck(); return; } - imin = 0; imax = (int)ranges.size(); + imin = 0; + imax = (int)ranges.size(); // find where end is - while(imin < imax) { - imid = (imin+imax)>>1; + while(imin < imax) + { + imid = (imin + imax) >> 1; - assert( imid < (int)ranges.size() ); + assert(imid < (int)ranges.size()); - if( ranges[imid].end <= end && (imid == ranges.size()-1 || ranges[imid+1].start > end ) ) { + if ((ranges[imid].end <= end) && ((imid == ranges.size() - 1) || (ranges[imid+1].start > end))) + { imin = imid; break; } - else if( ranges[imid].start >= end ) imax = imid; - else imin = imid+1; + else if (ranges[imid].start >= end) + { + imax = imid; + } + else + { + imin = imid + 1; + } } int endindex = imin; - if( startindex > endindex ) { + if (startindex > endindex) + { // create a new range - ranges.insert(ranges.begin()+startindex, RANGE(start, end)); - -#ifdef _DEBUG - // sanity check - for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start ); -#endif + ranges.insert(ranges.begin() + startindex, RANGE(start, end)); + RangeSanityCheck(); return; } - if( endindex >= (int)ranges.size()-1 ) { + if (endindex >= (int)ranges.size() - 1) + { // pop until startindex is reached int lastend = ranges.back().end; int numpop = (int)ranges.size() - startindex - 1; - while(numpop-- > 0 ) ranges.pop_back(); + + while(numpop-- > 0 ) + { + ranges.pop_back(); + } assert( start <= ranges.back().end ); - if( start < ranges.back().start ) ranges.back().start = start; - if( lastend > ranges.back().end ) ranges.back().end = lastend; - if( end > ranges.back().end ) ranges.back().end = end; - -#ifdef _DEBUG - // sanity check - for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start ); -#endif - + if (start < ranges.back().start) ranges.back().start = start; + if (lastend > ranges.back().end) ranges.back().end = lastend; + if (end > ranges.back().end) ranges.back().end = end; + RangeSanityCheck(); return; } - if( endindex == 0 ) { + if( endindex == 0 ) + { assert( end >= ranges.front().start ); - if( start < ranges.front().start ) ranges.front().start = start; - if( end > ranges.front().end ) ranges.front().end = end; - -#ifdef _DEBUG - // sanity check - for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start ); -#endif + if (start < ranges.front().start) ranges.front().start = start; + if (end > ranges.front().end) ranges.front().end = end; + RangeSanityCheck(); } // somewhere in the middle - if( ranges[startindex].start < start ) start = ranges[startindex].start; + if (ranges[startindex].start < start) start = ranges[startindex].start; - if( startindex < endindex ) { + if (startindex < endindex) + { ranges.erase(ranges.begin() + startindex, ranges.begin() + endindex ); } - if( start < ranges[startindex].start ) ranges[startindex].start = start; - if( end > ranges[startindex].end ) ranges[startindex].end = end; - -#ifdef _DEBUG - // sanity check - for(int i = 0; i < (int)ranges.size()-1; ++i) assert( ranges[i].end < ranges[i+1].start ); -#endif + if (start < ranges[startindex].start) ranges[startindex].start = start; + if (end > ranges[startindex].end) ranges[startindex].end = end; + + RangeSanityCheck(); } namespace ZeroGS { diff --git a/plugins/zzogl-pg/opengl/zerogs.cpp b/plugins/zzogl-pg/opengl/zerogs.cpp index 2a19b6dc29..0a32e52c8f 100644 --- a/plugins/zzogl-pg/opengl/zerogs.cpp +++ b/plugins/zzogl-pg/opengl/zerogs.cpp @@ -820,37 +820,35 @@ void ZeroGS::ExtWrite() // Caches // //////////// -bool ZeroGS::CheckChangeInClut(u32 highdword, u32 psm) -{ - FUNCLOG - int cld = ZZOglGet_cld_TexBits(highdword); - int cbp = ZZOglGet_cbp_TexBits(highdword); + +// case 0: return false; +// case 1: break; +// case 2: m_CBP[0] = TEX0.CBP; break; +// case 3: m_CBP[1] = TEX0.CBP; break; +// case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break; +// case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break; +// case 6: ASSERT(0); return false; // ffx2 menu +// case 7: ASSERT(0); return false; +// default: __assume(0); - // processing the CLUT after tex0/2 are written - switch(cld) { - case 0: return false; - case 1: break; // Seems to rarely not be 1. - // note sure about changing cbp[0,1] - case 4: return gs.cbp[0] != cbp; - case 5: return gs.cbp[1] != cbp; - - // default: load - default: break; - } - +bool IsDirty(u32 highdword, u32 psm, int cld, int cbp) +{ int cpsm = ZZOglGet_cpsm_TexBits(highdword); int csm = ZZOglGet_csm_TexBits(highdword); - if( cpsm > 1 || csm ) + if (cpsm > 1 || csm) + { + ERROR_LOG("16 bit clut not supported.\n"); // don't support 16bit for now return true; + } int csa = ZZOglGet_csa_TexBits(highdword); int entries = PSMT_IS8CLUT(psm) ? 256 : 16; - u64* src = (u64*)(g_pbyGSMemory + cbp*256); - u64* dst = (u64*)(g_pbyGSClut+64*csa); + u64* src = (u64*)(g_pbyGSMemory + cbp * 256); + u64* dst = (u64*)(g_pbyGSClut + 64 * csa); bool bRet = false; @@ -976,34 +974,66 @@ Return: ".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "b"(entries) : "eax", "memory"); #endif // _WIN32 +return bRet; +} - return bRet; +// cld state: +// 000 - clut data is not loaded; data in the temp buffer is stored +// 001 - clut data is always loaded. +// 010 - clut data is always loaded; cbp0 = cbp. +// 011 - clut data is always loadedl cbp1 = cbp. +// 100 - cbp0 is compared with cbp. if different, clut data is loaded. +// 101 - cbp1 is compared with cbp. if different, clut data is loaded. + +// GSdx sets cbp0 & cbp1 when checking for clut changes. ZeroGS sets them in texClutWrite. +bool ZeroGS::CheckChangeInClut(u32 highdword, u32 psm) +{ + FUNCLOG + int cld = ZZOglGet_cld_TexBits(highdword); + int cbp = ZZOglGet_cbp_TexBits(highdword); + + // processing the CLUT after tex0/2 are written + ERROR_LOG("high == 0x%x; cld == %d\n", highdword, cld); + switch(cld) { + case 0: return false; + case 1: break; + case 2: break; + case 3: break; + case 4: if (gs.cbp[0] == cbp) return false; break; + case 5: if (gs.cbp[1] == cbp) return false; break; + + //case 4: return gs.cbp[0] != cbp; + //case 5: return gs.cbp[1] != cbp; + + // default: load + default: break; + } + + return IsDirty(highdword, psm, cld, cbp); } void ZeroGS::texClutWrite(int ctx) { FUNCLOG s_bTexFlush = 0; - if( g_bIsLost ) - return; + if (g_bIsLost) return; tex0Info& tex0 = vb[ctx].tex0; assert( PSMT_ISCLUT(tex0.psm) ); + // processing the CLUT after tex0/2 are written - switch(tex0.cld) { + switch(tex0.cld) + { case 0: return; case 1: break; // tex0.cld is usually 1. case 2: gs.cbp[0] = tex0.cbp; break; case 3: gs.cbp[1] = tex0.cbp; break; - // not sure about changing cbp[0,1] case 4: - if( gs.cbp[0] == tex0.cbp ) - return; + if (gs.cbp[0] == tex0.cbp) return; gs.cbp[0] = tex0.cbp; break; case 5: - if( gs.cbp[1] == tex0.cbp ) - return; + if (gs.cbp[1] == tex0.cbp) return; gs.cbp[1] = tex0.cbp; break; default: //DEBUG_LOG("cld isn't 0-5!");