diff --git a/plugins/zzogl-pg/opengl/Linux/Linux.cpp b/plugins/zzogl-pg/opengl/Linux/Linux.cpp index df1e87f63c..cddd200b23 100644 --- a/plugins/zzogl-pg/opengl/Linux/Linux.cpp +++ b/plugins/zzogl-pg/opengl/Linux/Linux.cpp @@ -234,7 +234,8 @@ void DisplayAdvancedDialog() dialog = gtk_dialog_new(); gtk_window_set_title(GTK_WINDOW(dialog), "ZZOgl PG Advanced Config"); - gtk_window_set_default_size(GTK_WINDOW(dialog), 600, 600); + // A good value for the heigh will be 1000 instead of 800 but I'm afraid that some people still uses small screen... + gtk_window_set_default_size(GTK_WINDOW(dialog), 600, 800); gtk_window_set_modal(GTK_WINDOW(dialog), true); advanced_box = gtk_vbox_new(false, 5); diff --git a/plugins/zzogl-pg/opengl/ZZoglCreate.cpp b/plugins/zzogl-pg/opengl/ZZoglCreate.cpp index 2107c41b4b..279986e93f 100644 --- a/plugins/zzogl-pg/opengl/ZZoglCreate.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglCreate.cpp @@ -188,7 +188,7 @@ inline bool CreateImportantCheck() if (!IsGLExt("GL_EXT_framebuffer_object")) { - ZZLog::Error_Log("*********\nZZogl: ERROR: Need GL_EXT_framebufer_object for multiple render targets\nZZogl: *********"); + ZZLog::Error_Log("*********\nZZogl: ERROR: Need GL_EXT_framebuffer_object for multiple render targets\nZZogl: *********"); bSuccess = false; } diff --git a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp index 731f0fcadc..0c7b888a5f 100644 --- a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp @@ -26,6 +26,7 @@ #include "targets.h" #include "ZZoglFlushHack.h" #include "ZZoglShaders.h" +#include "ZZClut.h" #include //------------------ Defines @@ -337,14 +338,9 @@ inline void VisualBufferMessage(int context) curvb.tex0.th, curvb.tex0.tcc, curvb.tex0.tfx, curvb.tex0.cbp, curvb.tex0.cpsm, curvb.tex0.csm, curvb.tex0.csa, curvb.tex0.cld); char* Name; -// if (g_bSaveTex) { -// if (g_bSaveTex == 1) Name = NamedSaveTex(&curvb.tex0, 1); -// else -// Name = NamedSaveTex(&curvb.tex0, 0); ZZLog::Error_Log("TGA name '%s'.", Name); free(Name); -// } ZZLog::Debug_Log("buffer %ld.\n", BufferNumber); #endif } @@ -730,57 +726,19 @@ inline void FlushDecodeClut(VB& curvb, GLuint& ptexclut) if (ptexclut != 0) { - - int nClutOffset = 0, clutsize; + int clutsize; int entries = PSMT_IS8CLUT(curvb.tex0.psm) ? 256 : 16; if (curvb.tex0.csm && curvb.tex0.csa) ZZLog::Debug_Log("ERROR, csm1."); - if (PSMT_IS32BIT(curvb.tex0.cpsm)) // 32 bit - { - nClutOffset = 64 * curvb.tex0.csa; + if (PSMT_IS32BIT(curvb.tex0.cpsm)) { clutsize = min(entries, 256 - curvb.tex0.csa * 16) * 4; - } - else - { - nClutOffset = 64 * (curvb.tex0.csa & 15) + (curvb.tex0.csa >= 16 ? 2 : 0); + ClutBuffer_to_Array((u32*)&data[0], curvb.tex0.csa, clutsize); + } else { clutsize = min(entries, 512 - curvb.tex0.csa * 16) * 2; - } - - if (PSMT_IS32BIT(curvb.tex0.cpsm)) // 32 bit - { - memcpy_amd(&data[0], g_pbyGSClut + nClutOffset, clutsize); - } - else - { - u16* pClutBuffer = (u16*)(g_pbyGSClut + nClutOffset); - u16* pclut = (u16*) & data[0]; - int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset & 0x3ff) / 2) + clutsize - 512; - - if (left > 0) clutsize -= left; - - while (clutsize > 0) - { - pclut[0] = pClutBuffer[0]; - pclut++; - pClutBuffer += 2; - clutsize -= 2; - } - - if (left > 0) - { - pClutBuffer = (u16*)(g_pbyGSClut + 2); - - while (left > 0) - { - pclut[0] = pClutBuffer[0]; - left -= 2; - pClutBuffer += 2; - pclut++; - } - } - } + ClutBuffer_to_Array((u16*)&data[0], curvb.tex0.csa, clutsize); + } GLenum tempType = PSMT_ISHALF_STORAGE(curvb.tex0) ? GL_UNSIGNED_SHORT_5_5_5_1 : GL_UNSIGNED_BYTE; Texture2D(4, 256, 1, GL_RGBA, tempType, &data[0]); @@ -987,6 +945,7 @@ inline FRAGMENTSHADER* FlushMadeNewTarget(VB& curvb, int exactcolor, int context // save the texture if (g_bSaveTex) { + // FIXME: I suspect one of g_bSaveTex test variable is wrong if (g_bSaveTex == 1) { SaveTex(&curvb.tex0, 1); diff --git a/plugins/zzogl-pg/opengl/ZZoglShoots.cpp b/plugins/zzogl-pg/opengl/ZZoglShoots.cpp index 7455f0fd6e..eb174e9ef0 100644 --- a/plugins/zzogl-pg/opengl/ZZoglShoots.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglShoots.cpp @@ -395,6 +395,8 @@ SaveTex(tex0Info* ptex, int usevid) glBindTexture(GL_TEXTURE_RECTANGLE_NV, pmemtarg->ptex->tex); srcdata.resize(4 * pmemtarg->texW * pmemtarg->texH); + // FIXME strangely this function call seem to crash pcsx2 on atelier of iris 1 + // Note: fmt is GL_UNSIGNED_SHORT_1_5_5_5_REV glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]); u32 offset = MemorySize(pmemtarg->realy); @@ -613,6 +615,9 @@ SaveTex(tex0Info* ptex, int usevid) snprintf(Name, TGA_FILE_NAME_MAX_LENGTH, "Tex.%d.tga", TexNumber); SaveTGA(Name, ptex->tw, ptex->th, &data[0]); + + TexNumber++; + if (TexNumber > MAX_NUMBER_SAVED_TGA) TexNumber = 0; } @@ -621,13 +626,10 @@ SaveTex(tex0Info* ptex, int usevid) char* NamedSaveTex(tex0Info* ptex, int usevid) { SaveTex(ptex, usevid); + char* Name = (char*)malloc(TGA_FILE_NAME_MAX_LENGTH); snprintf(Name, TGA_FILE_NAME_MAX_LENGTH, "Tex.%d.tga", TexNumber); - TexNumber++; - - if (TexNumber > MAX_NUMBER_SAVED_TGA) TexNumber = 0; - return Name; } diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index b956b4cea5..8b134a72ef 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -475,6 +475,9 @@ void CRenderTarget::Update(int context, CRenderTarget* pdepth) texframe.tw = fbw; texframe.th = fbh; texframe.psm = psm; + // FIXME some field are not initialized... + // in particular the clut related one + assert(!PSMT_ISCLUT(psm)); // write color and zero out stencil buf, always 0 context! // force bilinear if using AA @@ -966,6 +969,9 @@ void CDepthTarget::Update(int context, CRenderTarget* prndr) texframe.tw = fbw; texframe.th = fbh; texframe.psm = psm; + // FIXME some field are not initialized... + // in particular the clut related one + assert(!PSMT_ISCLUT(psm)); DisableAllgl(); @@ -2017,96 +2023,93 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc assert(targ->clutsize > 0); } - else - { - if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ) - { - ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16); - has_data = true; + else if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ) + { + ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16); + has_data = true; - // needs to be 8 bit, use xmm for unpacking - u16* dst = (u16*)ptexdata; - u16* src = (u16*)(MemoryAddress(targ->realy)); + // needs to be 8 bit, use xmm for unpacking + u16* dst = (u16*)ptexdata; + u16* src = (u16*)(MemoryAddress(targ->realy)); #ifdef ZEROGS_SSE2 - assert(((u32)(uptr)dst) % 16 == 0); - // FIXME Uncomment to test intrinsic versions (instead of asm) - // perf improvement vs asm: - // 1/ gcc updates both pointer with 1 addition - // 2/ Bypass the cache for the store + assert(((u32)(uptr)dst) % 16 == 0); + // FIXME Uncomment to test intrinsic versions (instead of asm) + // perf improvement vs asm: + // 1/ gcc updates both pointer with 1 addition + // 2/ Bypass the cache for the store #define NEW_INTRINSIC_VERSION #ifdef NEW_INTRINSIC_VERSION - __m128i zero_128 = _mm_setzero_si128(); - // NOTE: future performance improvement - // SSE4.1 support uncacheable load 128bits. Maybe it can - // avoid some cache pollution - // NOTE2: I create multiple _n variable to mimic the previous ASM behavior - // but I'm not sure there are real gains. - for (int i = targ->height * GPU_TEXWIDTH/16 ; i > 0 ; --i) - { - // Convert 16 bits pixels to 32bits (zero extended) - // Batch 64 bytes (32 pixels) at once. - __m128i pixels_1 = _mm_load_si128((__m128i*)src); - __m128i pixels_2 = _mm_load_si128((__m128i*)(src+8)); - __m128i pixels_3 = _mm_load_si128((__m128i*)(src+16)); - __m128i pixels_4 = _mm_load_si128((__m128i*)(src+24)); + __m128i zero_128 = _mm_setzero_si128(); + // NOTE: future performance improvement + // SSE4.1 support uncacheable load 128bits. Maybe it can + // avoid some cache pollution + // NOTE2: I create multiple _n variable to mimic the previous ASM behavior + // but I'm not sure there are real gains. + for (int i = targ->height * GPU_TEXWIDTH/16 ; i > 0 ; --i) + { + // Convert 16 bits pixels to 32bits (zero extended) + // Batch 64 bytes (32 pixels) at once. + __m128i pixels_1 = _mm_load_si128((__m128i*)src); + __m128i pixels_2 = _mm_load_si128((__m128i*)(src+8)); + __m128i pixels_3 = _mm_load_si128((__m128i*)(src+16)); + __m128i pixels_4 = _mm_load_si128((__m128i*)(src+24)); - __m128i pix_low_1 = _mm_unpacklo_epi16(pixels_1, zero_128); - __m128i pix_high_1 = _mm_unpackhi_epi16(pixels_1, zero_128); - __m128i pix_low_2 = _mm_unpacklo_epi16(pixels_2, zero_128); - __m128i pix_high_2 = _mm_unpackhi_epi16(pixels_2, zero_128); + __m128i pix_low_1 = _mm_unpacklo_epi16(pixels_1, zero_128); + __m128i pix_high_1 = _mm_unpackhi_epi16(pixels_1, zero_128); + __m128i pix_low_2 = _mm_unpacklo_epi16(pixels_2, zero_128); + __m128i pix_high_2 = _mm_unpackhi_epi16(pixels_2, zero_128); - // Note: bypass cache - _mm_stream_si128((__m128i*)dst, pix_low_1); - _mm_stream_si128((__m128i*)(dst+8), pix_high_1); - _mm_stream_si128((__m128i*)(dst+16), pix_low_2); - _mm_stream_si128((__m128i*)(dst+24), pix_high_2); + // Note: bypass cache + _mm_stream_si128((__m128i*)dst, pix_low_1); + _mm_stream_si128((__m128i*)(dst+8), pix_high_1); + _mm_stream_si128((__m128i*)(dst+16), pix_low_2); + _mm_stream_si128((__m128i*)(dst+24), pix_high_2); - __m128i pix_low_3 = _mm_unpacklo_epi16(pixels_3, zero_128); - __m128i pix_high_3 = _mm_unpackhi_epi16(pixels_3, zero_128); - __m128i pix_low_4 = _mm_unpacklo_epi16(pixels_4, zero_128); - __m128i pix_high_4 = _mm_unpackhi_epi16(pixels_4, zero_128); + __m128i pix_low_3 = _mm_unpacklo_epi16(pixels_3, zero_128); + __m128i pix_high_3 = _mm_unpackhi_epi16(pixels_3, zero_128); + __m128i pix_low_4 = _mm_unpacklo_epi16(pixels_4, zero_128); + __m128i pix_high_4 = _mm_unpackhi_epi16(pixels_4, zero_128); - // Note: bypass cache - _mm_stream_si128((__m128i*)(dst+32), pix_low_3); - _mm_stream_si128((__m128i*)(dst+40), pix_high_3); - _mm_stream_si128((__m128i*)(dst+48), pix_low_4); - _mm_stream_si128((__m128i*)(dst+56), pix_high_4); + // Note: bypass cache + _mm_stream_si128((__m128i*)(dst+32), pix_low_3); + _mm_stream_si128((__m128i*)(dst+40), pix_high_3); + _mm_stream_si128((__m128i*)(dst+48), pix_low_4); + _mm_stream_si128((__m128i*)(dst+56), pix_high_4); - src += 32; - dst += 64; - } - // It is advise to use a fence instruction after non temporal move (mm_stream) instruction... - // store fence insures that previous store are finish before execute new one. - _mm_sfence(); + src += 32; + dst += 64; + } + // It is advise to use a fence instruction after non temporal move (mm_stream) instruction... + // store fence insures that previous store are finish before execute new one. + _mm_sfence(); #else - SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16); + SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16); #endif #else // ZEROGS_SSE2 - for (int i = 0; i < targ->height; ++i) - { - for (int j = 0; j < GPU_TEXWIDTH; ++j) - { - dst[0] = src[0]; - dst[1] = 0; - dst[2] = src[1]; - dst[3] = 0; - dst += 4; - src += 2; - } - } + for (int i = 0; i < targ->height; ++i) + { + for (int j = 0; j < GPU_TEXWIDTH; ++j) + { + dst[0] = src[0]; + dst[1] = 0; + dst[2] = src[1]; + dst[3] = 0; + dst += 4; + src += 2; + } + } #endif // ZEROGS_SSE2 - } - else - { - ptexdata = targ->ptex->memptr; - // We really don't want to deallocate memptr. As a reminder... - has_data = false; - } - } + } + else + { + ptexdata = targ->ptex->memptr; + // We really don't want to deallocate memptr. As a reminder... + has_data = false; + } // create the texture GL_REPORT_ERRORD();