From cfb4ba8b2bd4803795fe867a8e4e65af7bdfa5fc Mon Sep 17 00:00:00 2001 From: arcum42 Date: Mon, 20 Sep 2010 10:09:46 +0000 Subject: [PATCH] GregMiscellaneous: zzogl-pg: Apply Zeydlitz's changes from r237 of zzogl. Improves code readability, and gives a slight speedup. git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3810 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/GS.h | 2 ++ plugins/zzogl-pg/opengl/ZZoglFlush.cpp | 4 +-- plugins/zzogl-pg/opengl/ZZoglShoots.cpp | 8 ++--- plugins/zzogl-pg/opengl/targets.cpp | 45 +++++++++++++------------ plugins/zzogl-pg/opengl/zerogs.h | 34 ++++++++++++++++--- 5 files changed, 62 insertions(+), 31 deletions(-) diff --git a/plugins/zzogl-pg/opengl/GS.h b/plugins/zzogl-pg/opengl/GS.h index 23fb55613d..1614ccbf74 100644 --- a/plugins/zzogl-pg/opengl/GS.h +++ b/plugins/zzogl-pg/opengl/GS.h @@ -205,6 +205,8 @@ inline bool PSMT_HAS_SHARED_BITS (int fpsm, int tpsm) { return (SUM == 0x15 || SUM == 0x1D || SUM == 0x2C || SUM == 0x30); } +// If a clut is in 32-bit color, its size is 4 bytes, and 16-bit clut has a 2 byte size. +inline int CLUT_PIXEL_SIZE(int cpsm) {return ((cpsm <= 1) ? 4 : 2); } //----------------------- Data from registers ----------------------- diff --git a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp index a457cebd8a..587cc619cb 100644 --- a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp @@ -2296,8 +2296,8 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset"); // get hardware texture dims - //int texheight = (pmemtarg->realheight+pmemtarg->widthmult-1)/pmemtarg->widthmult; - int texwidth = GPU_TEXWIDTH * pmemtarg->widthmult * pmemtarg->channels; + //int texheight = pmemtarg->texH; + int texwidth = pmemtarg->texW; v.y = 1.0f; v.x = (fpageint - (float)pmemtarg->realy / (float)pmemtarg->widthmult + 0.5f);//*v.y; diff --git a/plugins/zzogl-pg/opengl/ZZoglShoots.cpp b/plugins/zzogl-pg/opengl/ZZoglShoots.cpp index 3e10705205..6dfd0f5418 100644 --- a/plugins/zzogl-pg/opengl/ZZoglShoots.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglShoots.cpp @@ -392,16 +392,16 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid) assert(pmemtarg != NULL); glBindTexture(GL_TEXTURE_RECTANGLE_NV, pmemtarg->ptex->tex); - srcdata.resize(pmemtarg->realheight * GPU_TEXWIDTH * pmemtarg->widthmult * 4 * 8); // max of 8 cannels + srcdata.resize(4 * pmemtarg->texW * pmemtarg->texH); glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]); - u32 offset = pmemtarg->realy * 4 * GPU_TEXWIDTH; + u32 offset = MemorySize(pmemtarg->realy); if (ptex->psm == PSMT8) - offset *= PSMT_IS32BIT(ptex->cpsm) ? 4 : 2; + offset *= CLUT_PIXEL_SIZE(ptex->cpsm); else if (ptex->psm == PSMT4) - offset *= PSMT_IS32BIT(ptex->cpsm) ? 8 : 4; + offset *= CLUT_PIXEL_SIZE(ptex->cpsm) * 2; psrc = &srcdata[0] - offset; } diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 573e685125..4f72a1d73c 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -1730,8 +1730,8 @@ bool ZeroGS::CMemoryTarget::ValidateTex(const tex0Info& tex0, int starttex, int // lock and compare assert(ptex != NULL && ptex->memptr != NULL); - int result = memcmp_mmx(ptex->memptr + (checkstarty - realy) * 4 * GPU_TEXWIDTH, g_pbyGSMemory + checkstarty * 4 * GPU_TEXWIDTH, (checkendy - checkstarty) * 4 * GPU_TEXWIDTH); - + int result = memcmp_mmx(ptex->memptr + MemorySize(checkstarty-realy), MemoryAddress(checkstarty), MemorySize(checkendy-checkstarty)); + if (result == 0) { clearmaxy = 0; @@ -2054,7 +2054,7 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_ClearedTargetsSea while (itbest != listClearedTargets.end()) { - if ((height <= itbest->realheight) && (itbest->fmt == fmt) && (itbest->widthmult == widthmult) && (itbest->channels == channels)) + if ((height == itbest->realheight) && (itbest->fmt == fmt) && (itbest->widthmult == widthmult) && (itbest->channels == channels)) { // check channels if (PIXELS_PER_WORD(itbest->psm) == channels) break; @@ -2107,8 +2107,14 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info int widthmult = 1, channels = 1; - // If our texture is too big and could not be placed in 1 GPU texture. Pretty rare. - if ((g_MaxTexHeight < 4096) && (end - start > g_MaxTexHeight)) widthmult = 2; + // If our texture is too big and could not be placed in 1 GPU texture. Pretty rare in modern cards. + if ((g_MaxTexHeight < 4096) && (end - start > g_MaxTexHeight)) + { + // In this rare case we made a texture of half height and place it on the screen. + ZZLog::Debug_Log("Making a half height texture (start - end == 0x%x)", (end-start)); + widthmult = 2; + } + channels = PIXELS_PER_WORD(tex0.psm); targ = MemoryTarget_ClearedTargetsSearch(fmt, widthmult, channels, end - start); @@ -2180,6 +2186,8 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info targ->cpsm = tex0.cpsm; targ->widthmult = widthmult; targ->channels = channels; + targ->texH = (targ->realheight + widthmult - 1)/widthmult; + targ->texW = GPU_TEXWIDTH * widthmult * channels; // alloc the mem targ->ptex = new CMemoryTarget::TEXTURE(); @@ -2187,30 +2195,27 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info } #if defined(ZEROGS_DEVBUILD) - g_TransferredToGPU += GPU_TEXWIDTH * channels * 4 * targ->height; + g_TransferredToGPU += MemorySize(channels * targ->height); #endif - const int texH = (targ->realheight + widthmult - 1) / widthmult; - const int texW = GPU_TEXWIDTH * channels * widthmult; - // fill with data if (targ->ptex->memptr == NULL) { - targ->ptex->memptr = (u8*)_aligned_malloc(4 * GPU_TEXWIDTH * targ->realheight, 16); + targ->ptex->memptr = (u8*)_aligned_malloc(MemorySize(targ->realheight), 16); assert(targ->ptex->ref > 0); } - memcpy_amd(targ->ptex->memptr, g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy, 4 * GPU_TEXWIDTH * targ->height); + memcpy_amd(targ->ptex->memptr, MemoryAddress(targ->realy), MemorySize(targ->height)); __aligned16 u8* ptexdata = NULL; bool has_data = false; if (PSMT_ISCLUT(tex0.psm)) { - ptexdata = (u8*)_aligned_malloc(((tex0.cpsm <= 1) ? 4 : 2) * texW * texH, 16); + ptexdata = (u8*)_aligned_malloc(CLUT_PIXEL_SIZE(tex0.cpsm) * targ->texH * targ->texW, 16); has_data = true; - u8* psrc = (u8*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy); + u8* psrc = (u8*)(MemoryAddress(targ->realy)); if (PSMT_IS32BIT(tex0.cpsm)) { @@ -2231,12 +2236,12 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info { if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ) { - ptexdata = (u8*)_aligned_malloc(4 * texW * texH, 16); + ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16); has_data = true; // needs to be 8 bit, use xmm for unpacking u16* dst = (u16*)ptexdata; - u16* src = (u16*)(g_pbyGSMemory + 4 * GPU_TEXWIDTH * targ->realy); + u16* src = (u16*)(MemoryAddress(targ->realy)); #if defined(ZEROGS_SSE2) assert(((u32)(uptr)dst) % 16 == 0); @@ -2276,11 +2281,9 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info glBindTexture(GL_TEXTURE_RECTANGLE_NV, targ->ptex->tex); if (fmt == GL_UNSIGNED_BYTE) - TextureRect(GL_RGBA, texW, texH, GL_RGBA, fmt, ptexdata); + TextureRect(GL_RGBA, targ->texW, targ->texH, GL_RGBA, fmt, ptexdata); else - TextureRect(GL_RGB5_A1, texW, texH, GL_RGBA, fmt, ptexdata); - - int realheight = targ->realheight; + TextureRect(GL_RGB5_A1, targ->texW, targ->texH, GL_RGBA, fmt, ptexdata); while (glGetError() != GL_NO_ERROR) { @@ -2293,7 +2296,7 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info { if (listTargets.size() == 0) { - ZZLog::Error_Log("Failed to create %dx%x texture.", GPU_TEXWIDTH*channels*widthmult, (realheight + widthmult - 1) / widthmult); + ZZLog::Error_Log("Failed to create %dx%x texture.", targ->texW, targ->texH); channels = 1; if (has_data) _aligned_free(ptexdata); return NULL; @@ -2302,7 +2305,7 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info DestroyOldest(); } - TextureRect(GL_RGBA, texW, texH, GL_RGBA, fmt, ptexdata); + TextureRect(GL_RGBA, targ->texW, targ->texH, GL_RGBA, fmt, ptexdata); } setRectWrap(GL_CLAMP); diff --git a/plugins/zzogl-pg/opengl/zerogs.h b/plugins/zzogl-pg/opengl/zerogs.h index c9365593be..a56720f760 100644 --- a/plugins/zzogl-pg/opengl/zerogs.h +++ b/plugins/zzogl-pg/opengl/zerogs.h @@ -231,6 +231,8 @@ class CMemoryTarget clearminy = r.clearminy; clearmaxy = r.clearmaxy; widthmult = r.widthmult; + texH = r.texH; + texW = r.texW; channels = r.channels; validatecount = r.validatecount; fmt = r.fmt; @@ -261,14 +263,20 @@ class CMemoryTarget int starty, height; // assert(starty >= realy) int realy, realheight; // this is never touched once allocated + // realy is start pointer of data in 4M data block (start) and size (end-start). + u32 usedstamp; u8 psm, cpsm; // texture and clut format. For psm, only 16bit/32bit differentiation matters u32 fmt; - int widthmult; - int channels; - int clearminy, clearmaxy; // when maxy > 0, need to check for clearing + int widthmult; // Either 1 or 2. + int channels; // The number of pixels per PSM format word. channels == PIXELS_PER_WORD(psm) + // This is the real drawing size in pixels of the texture in renderbuffer. + int texW; // (realheight + widthmult - 1)/widthmult == realheight or [(realheight+1)/2] + int texH; // GPU_TEXWIDTH *widthmult * channels; + + int clearminy, clearmaxy; // when maxy > 0, need to check for clearing int validatecount; // count how many times has been validated, if too many, destroy @@ -514,7 +522,25 @@ inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx) tex0->cld = ZZOglGet_cld_TexBits(Data); ZeroGS::texClutWrite(ictx); + }; + +// The size in bytes of x strings (of texture). +inline int MemorySize(int x) +{ + return 4 * GPU_TEXWIDTH * x; } -}; +// Return the address in memory of data block for string x. +inline u8* MemoryAddress(int x) +{ + return g_pbyGSMemory + MemorySize(x); +} + +template +inline u8* _MemoryAddress(int x) +{ + return g_pbyGSMemory + mult * x; +} + +}; #endif