* increase a little the hack window (better for screenshot, not too big for small screen)
* Use generic clut function in FlushDecodeClut
* Various clean and comment


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4113 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut@gmail.com 2010-12-20 19:57:50 +00:00
parent 39780dcc10
commit 6a96e46920
5 changed files with 93 additions and 128 deletions

View File

@ -234,7 +234,8 @@ void DisplayAdvancedDialog()
dialog = gtk_dialog_new();
gtk_window_set_title(GTK_WINDOW(dialog), "ZZOgl PG Advanced Config");
gtk_window_set_default_size(GTK_WINDOW(dialog), 600, 600);
// A good value for the heigh will be 1000 instead of 800 but I'm afraid that some people still uses small screen...
gtk_window_set_default_size(GTK_WINDOW(dialog), 600, 800);
gtk_window_set_modal(GTK_WINDOW(dialog), true);
advanced_box = gtk_vbox_new(false, 5);

View File

@ -188,7 +188,7 @@ inline bool CreateImportantCheck()
if (!IsGLExt("GL_EXT_framebuffer_object"))
{
ZZLog::Error_Log("*********\nZZogl: ERROR: Need GL_EXT_framebufer_object for multiple render targets\nZZogl: *********");
ZZLog::Error_Log("*********\nZZogl: ERROR: Need GL_EXT_framebuffer_object for multiple render targets\nZZogl: *********");
bSuccess = false;
}

View File

@ -26,6 +26,7 @@
#include "targets.h"
#include "ZZoglFlushHack.h"
#include "ZZoglShaders.h"
#include "ZZClut.h"
#include <math.h>
//------------------ Defines
@ -337,14 +338,9 @@ inline void VisualBufferMessage(int context)
curvb.tex0.th, curvb.tex0.tcc, curvb.tex0.tfx, curvb.tex0.cbp,
curvb.tex0.cpsm, curvb.tex0.csm, curvb.tex0.csa, curvb.tex0.cld);
char* Name;
// if (g_bSaveTex) {
// if (g_bSaveTex == 1)
Name = NamedSaveTex(&curvb.tex0, 1);
// else
// Name = NamedSaveTex(&curvb.tex0, 0);
ZZLog::Error_Log("TGA name '%s'.", Name);
free(Name);
// }
ZZLog::Debug_Log("buffer %ld.\n", BufferNumber);
#endif
}
@ -730,57 +726,19 @@ inline void FlushDecodeClut(VB& curvb, GLuint& ptexclut)
if (ptexclut != 0)
{
int nClutOffset = 0, clutsize;
int clutsize;
int entries = PSMT_IS8CLUT(curvb.tex0.psm) ? 256 : 16;
if (curvb.tex0.csm && curvb.tex0.csa)
ZZLog::Debug_Log("ERROR, csm1.");
if (PSMT_IS32BIT(curvb.tex0.cpsm)) // 32 bit
{
nClutOffset = 64 * curvb.tex0.csa;
if (PSMT_IS32BIT(curvb.tex0.cpsm)) {
clutsize = min(entries, 256 - curvb.tex0.csa * 16) * 4;
}
else
{
nClutOffset = 64 * (curvb.tex0.csa & 15) + (curvb.tex0.csa >= 16 ? 2 : 0);
ClutBuffer_to_Array<u32>((u32*)&data[0], curvb.tex0.csa, clutsize);
} else {
clutsize = min(entries, 512 - curvb.tex0.csa * 16) * 2;
}
if (PSMT_IS32BIT(curvb.tex0.cpsm)) // 32 bit
{
memcpy_amd(&data[0], g_pbyGSClut + nClutOffset, clutsize);
}
else
{
u16* pClutBuffer = (u16*)(g_pbyGSClut + nClutOffset);
u16* pclut = (u16*) & data[0];
int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset & 0x3ff) / 2) + clutsize - 512;
if (left > 0) clutsize -= left;
while (clutsize > 0)
{
pclut[0] = pClutBuffer[0];
pclut++;
pClutBuffer += 2;
clutsize -= 2;
}
if (left > 0)
{
pClutBuffer = (u16*)(g_pbyGSClut + 2);
while (left > 0)
{
pclut[0] = pClutBuffer[0];
left -= 2;
pClutBuffer += 2;
pclut++;
}
}
}
ClutBuffer_to_Array<u16>((u16*)&data[0], curvb.tex0.csa, clutsize);
}
GLenum tempType = PSMT_ISHALF_STORAGE(curvb.tex0) ? GL_UNSIGNED_SHORT_5_5_5_1 : GL_UNSIGNED_BYTE;
Texture2D(4, 256, 1, GL_RGBA, tempType, &data[0]);
@ -987,6 +945,7 @@ inline FRAGMENTSHADER* FlushMadeNewTarget(VB& curvb, int exactcolor, int context
// save the texture
if (g_bSaveTex)
{
// FIXME: I suspect one of g_bSaveTex test variable is wrong
if (g_bSaveTex == 1)
{
SaveTex(&curvb.tex0, 1);

View File

@ -395,6 +395,8 @@ SaveTex(tex0Info* ptex, int usevid)
glBindTexture(GL_TEXTURE_RECTANGLE_NV, pmemtarg->ptex->tex);
srcdata.resize(4 * pmemtarg->texW * pmemtarg->texH);
// FIXME strangely this function call seem to crash pcsx2 on atelier of iris 1
// Note: fmt is GL_UNSIGNED_SHORT_1_5_5_5_REV
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]);
u32 offset = MemorySize(pmemtarg->realy);
@ -613,6 +615,9 @@ SaveTex(tex0Info* ptex, int usevid)
snprintf(Name, TGA_FILE_NAME_MAX_LENGTH, "Tex.%d.tga", TexNumber);
SaveTGA(Name, ptex->tw, ptex->th, &data[0]);
TexNumber++;
if (TexNumber > MAX_NUMBER_SAVED_TGA) TexNumber = 0;
}
@ -621,13 +626,10 @@ SaveTex(tex0Info* ptex, int usevid)
char* NamedSaveTex(tex0Info* ptex, int usevid)
{
SaveTex(ptex, usevid);
char* Name = (char*)malloc(TGA_FILE_NAME_MAX_LENGTH);
snprintf(Name, TGA_FILE_NAME_MAX_LENGTH, "Tex.%d.tga", TexNumber);
TexNumber++;
if (TexNumber > MAX_NUMBER_SAVED_TGA) TexNumber = 0;
return Name;
}

View File

@ -475,6 +475,9 @@ void CRenderTarget::Update(int context, CRenderTarget* pdepth)
texframe.tw = fbw;
texframe.th = fbh;
texframe.psm = psm;
// FIXME some field are not initialized...
// in particular the clut related one
assert(!PSMT_ISCLUT(psm));
// write color and zero out stencil buf, always 0 context!
// force bilinear if using AA
@ -966,6 +969,9 @@ void CDepthTarget::Update(int context, CRenderTarget* prndr)
texframe.tw = fbw;
texframe.th = fbh;
texframe.psm = psm;
// FIXME some field are not initialized...
// in particular the clut related one
assert(!PSMT_ISCLUT(psm));
DisableAllgl();
@ -2017,96 +2023,93 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
assert(targ->clutsize > 0);
}
else
{
if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
{
ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16);
has_data = true;
else if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
{
ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16);
has_data = true;
// needs to be 8 bit, use xmm for unpacking
u16* dst = (u16*)ptexdata;
u16* src = (u16*)(MemoryAddress(targ->realy));
// needs to be 8 bit, use xmm for unpacking
u16* dst = (u16*)ptexdata;
u16* src = (u16*)(MemoryAddress(targ->realy));
#ifdef ZEROGS_SSE2
assert(((u32)(uptr)dst) % 16 == 0);
// FIXME Uncomment to test intrinsic versions (instead of asm)
// perf improvement vs asm:
// 1/ gcc updates both pointer with 1 addition
// 2/ Bypass the cache for the store
assert(((u32)(uptr)dst) % 16 == 0);
// FIXME Uncomment to test intrinsic versions (instead of asm)
// perf improvement vs asm:
// 1/ gcc updates both pointer with 1 addition
// 2/ Bypass the cache for the store
#define NEW_INTRINSIC_VERSION
#ifdef NEW_INTRINSIC_VERSION
__m128i zero_128 = _mm_setzero_si128();
// NOTE: future performance improvement
// SSE4.1 support uncacheable load 128bits. Maybe it can
// avoid some cache pollution
// NOTE2: I create multiple _n variable to mimic the previous ASM behavior
// but I'm not sure there are real gains.
for (int i = targ->height * GPU_TEXWIDTH/16 ; i > 0 ; --i)
{
// Convert 16 bits pixels to 32bits (zero extended)
// Batch 64 bytes (32 pixels) at once.
__m128i pixels_1 = _mm_load_si128((__m128i*)src);
__m128i pixels_2 = _mm_load_si128((__m128i*)(src+8));
__m128i pixels_3 = _mm_load_si128((__m128i*)(src+16));
__m128i pixels_4 = _mm_load_si128((__m128i*)(src+24));
__m128i zero_128 = _mm_setzero_si128();
// NOTE: future performance improvement
// SSE4.1 support uncacheable load 128bits. Maybe it can
// avoid some cache pollution
// NOTE2: I create multiple _n variable to mimic the previous ASM behavior
// but I'm not sure there are real gains.
for (int i = targ->height * GPU_TEXWIDTH/16 ; i > 0 ; --i)
{
// Convert 16 bits pixels to 32bits (zero extended)
// Batch 64 bytes (32 pixels) at once.
__m128i pixels_1 = _mm_load_si128((__m128i*)src);
__m128i pixels_2 = _mm_load_si128((__m128i*)(src+8));
__m128i pixels_3 = _mm_load_si128((__m128i*)(src+16));
__m128i pixels_4 = _mm_load_si128((__m128i*)(src+24));
__m128i pix_low_1 = _mm_unpacklo_epi16(pixels_1, zero_128);
__m128i pix_high_1 = _mm_unpackhi_epi16(pixels_1, zero_128);
__m128i pix_low_2 = _mm_unpacklo_epi16(pixels_2, zero_128);
__m128i pix_high_2 = _mm_unpackhi_epi16(pixels_2, zero_128);
__m128i pix_low_1 = _mm_unpacklo_epi16(pixels_1, zero_128);
__m128i pix_high_1 = _mm_unpackhi_epi16(pixels_1, zero_128);
__m128i pix_low_2 = _mm_unpacklo_epi16(pixels_2, zero_128);
__m128i pix_high_2 = _mm_unpackhi_epi16(pixels_2, zero_128);
// Note: bypass cache
_mm_stream_si128((__m128i*)dst, pix_low_1);
_mm_stream_si128((__m128i*)(dst+8), pix_high_1);
_mm_stream_si128((__m128i*)(dst+16), pix_low_2);
_mm_stream_si128((__m128i*)(dst+24), pix_high_2);
// Note: bypass cache
_mm_stream_si128((__m128i*)dst, pix_low_1);
_mm_stream_si128((__m128i*)(dst+8), pix_high_1);
_mm_stream_si128((__m128i*)(dst+16), pix_low_2);
_mm_stream_si128((__m128i*)(dst+24), pix_high_2);
__m128i pix_low_3 = _mm_unpacklo_epi16(pixels_3, zero_128);
__m128i pix_high_3 = _mm_unpackhi_epi16(pixels_3, zero_128);
__m128i pix_low_4 = _mm_unpacklo_epi16(pixels_4, zero_128);
__m128i pix_high_4 = _mm_unpackhi_epi16(pixels_4, zero_128);
__m128i pix_low_3 = _mm_unpacklo_epi16(pixels_3, zero_128);
__m128i pix_high_3 = _mm_unpackhi_epi16(pixels_3, zero_128);
__m128i pix_low_4 = _mm_unpacklo_epi16(pixels_4, zero_128);
__m128i pix_high_4 = _mm_unpackhi_epi16(pixels_4, zero_128);
// Note: bypass cache
_mm_stream_si128((__m128i*)(dst+32), pix_low_3);
_mm_stream_si128((__m128i*)(dst+40), pix_high_3);
_mm_stream_si128((__m128i*)(dst+48), pix_low_4);
_mm_stream_si128((__m128i*)(dst+56), pix_high_4);
// Note: bypass cache
_mm_stream_si128((__m128i*)(dst+32), pix_low_3);
_mm_stream_si128((__m128i*)(dst+40), pix_high_3);
_mm_stream_si128((__m128i*)(dst+48), pix_low_4);
_mm_stream_si128((__m128i*)(dst+56), pix_high_4);
src += 32;
dst += 64;
}
// It is advise to use a fence instruction after non temporal move (mm_stream) instruction...
// store fence insures that previous store are finish before execute new one.
_mm_sfence();
src += 32;
dst += 64;
}
// It is advise to use a fence instruction after non temporal move (mm_stream) instruction...
// store fence insures that previous store are finish before execute new one.
_mm_sfence();
#else
SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16);
SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16);
#endif
#else // ZEROGS_SSE2
for (int i = 0; i < targ->height; ++i)
{
for (int j = 0; j < GPU_TEXWIDTH; ++j)
{
dst[0] = src[0];
dst[1] = 0;
dst[2] = src[1];
dst[3] = 0;
dst += 4;
src += 2;
}
}
for (int i = 0; i < targ->height; ++i)
{
for (int j = 0; j < GPU_TEXWIDTH; ++j)
{
dst[0] = src[0];
dst[1] = 0;
dst[2] = src[1];
dst[3] = 0;
dst += 4;
src += 2;
}
}
#endif // ZEROGS_SSE2
}
else
{
ptexdata = targ->ptex->memptr;
// We really don't want to deallocate memptr. As a reminder...
has_data = false;
}
}
}
else
{
ptexdata = targ->ptex->memptr;
// We really don't want to deallocate memptr. As a reminder...
has_data = false;
}
// create the texture
GL_REPORT_ERRORD();