mirror of https://github.com/PCSX2/pcsx2.git
zzogl:
* increase a little the hack window (better for screenshot, not too big for small screen) * Use generic clut function in FlushDecodeClut * Various clean and comment git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4113 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
39780dcc10
commit
6a96e46920
|
@ -234,7 +234,8 @@ void DisplayAdvancedDialog()
|
||||||
|
|
||||||
dialog = gtk_dialog_new();
|
dialog = gtk_dialog_new();
|
||||||
gtk_window_set_title(GTK_WINDOW(dialog), "ZZOgl PG Advanced Config");
|
gtk_window_set_title(GTK_WINDOW(dialog), "ZZOgl PG Advanced Config");
|
||||||
gtk_window_set_default_size(GTK_WINDOW(dialog), 600, 600);
|
// A good value for the heigh will be 1000 instead of 800 but I'm afraid that some people still uses small screen...
|
||||||
|
gtk_window_set_default_size(GTK_WINDOW(dialog), 600, 800);
|
||||||
gtk_window_set_modal(GTK_WINDOW(dialog), true);
|
gtk_window_set_modal(GTK_WINDOW(dialog), true);
|
||||||
|
|
||||||
advanced_box = gtk_vbox_new(false, 5);
|
advanced_box = gtk_vbox_new(false, 5);
|
||||||
|
|
|
@ -188,7 +188,7 @@ inline bool CreateImportantCheck()
|
||||||
|
|
||||||
if (!IsGLExt("GL_EXT_framebuffer_object"))
|
if (!IsGLExt("GL_EXT_framebuffer_object"))
|
||||||
{
|
{
|
||||||
ZZLog::Error_Log("*********\nZZogl: ERROR: Need GL_EXT_framebufer_object for multiple render targets\nZZogl: *********");
|
ZZLog::Error_Log("*********\nZZogl: ERROR: Need GL_EXT_framebuffer_object for multiple render targets\nZZogl: *********");
|
||||||
bSuccess = false;
|
bSuccess = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "targets.h"
|
#include "targets.h"
|
||||||
#include "ZZoglFlushHack.h"
|
#include "ZZoglFlushHack.h"
|
||||||
#include "ZZoglShaders.h"
|
#include "ZZoglShaders.h"
|
||||||
|
#include "ZZClut.h"
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
//------------------ Defines
|
//------------------ Defines
|
||||||
|
@ -337,14 +338,9 @@ inline void VisualBufferMessage(int context)
|
||||||
curvb.tex0.th, curvb.tex0.tcc, curvb.tex0.tfx, curvb.tex0.cbp,
|
curvb.tex0.th, curvb.tex0.tcc, curvb.tex0.tfx, curvb.tex0.cbp,
|
||||||
curvb.tex0.cpsm, curvb.tex0.csm, curvb.tex0.csa, curvb.tex0.cld);
|
curvb.tex0.cpsm, curvb.tex0.csm, curvb.tex0.csa, curvb.tex0.cld);
|
||||||
char* Name;
|
char* Name;
|
||||||
// if (g_bSaveTex) {
|
|
||||||
// if (g_bSaveTex == 1)
|
|
||||||
Name = NamedSaveTex(&curvb.tex0, 1);
|
Name = NamedSaveTex(&curvb.tex0, 1);
|
||||||
// else
|
|
||||||
// Name = NamedSaveTex(&curvb.tex0, 0);
|
|
||||||
ZZLog::Error_Log("TGA name '%s'.", Name);
|
ZZLog::Error_Log("TGA name '%s'.", Name);
|
||||||
free(Name);
|
free(Name);
|
||||||
// }
|
|
||||||
ZZLog::Debug_Log("buffer %ld.\n", BufferNumber);
|
ZZLog::Debug_Log("buffer %ld.\n", BufferNumber);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -730,57 +726,19 @@ inline void FlushDecodeClut(VB& curvb, GLuint& ptexclut)
|
||||||
|
|
||||||
if (ptexclut != 0)
|
if (ptexclut != 0)
|
||||||
{
|
{
|
||||||
|
int clutsize;
|
||||||
int nClutOffset = 0, clutsize;
|
|
||||||
int entries = PSMT_IS8CLUT(curvb.tex0.psm) ? 256 : 16;
|
int entries = PSMT_IS8CLUT(curvb.tex0.psm) ? 256 : 16;
|
||||||
|
|
||||||
if (curvb.tex0.csm && curvb.tex0.csa)
|
if (curvb.tex0.csm && curvb.tex0.csa)
|
||||||
ZZLog::Debug_Log("ERROR, csm1.");
|
ZZLog::Debug_Log("ERROR, csm1.");
|
||||||
|
|
||||||
if (PSMT_IS32BIT(curvb.tex0.cpsm)) // 32 bit
|
if (PSMT_IS32BIT(curvb.tex0.cpsm)) {
|
||||||
{
|
|
||||||
nClutOffset = 64 * curvb.tex0.csa;
|
|
||||||
clutsize = min(entries, 256 - curvb.tex0.csa * 16) * 4;
|
clutsize = min(entries, 256 - curvb.tex0.csa * 16) * 4;
|
||||||
}
|
ClutBuffer_to_Array<u32>((u32*)&data[0], curvb.tex0.csa, clutsize);
|
||||||
else
|
} else {
|
||||||
{
|
|
||||||
nClutOffset = 64 * (curvb.tex0.csa & 15) + (curvb.tex0.csa >= 16 ? 2 : 0);
|
|
||||||
clutsize = min(entries, 512 - curvb.tex0.csa * 16) * 2;
|
clutsize = min(entries, 512 - curvb.tex0.csa * 16) * 2;
|
||||||
}
|
ClutBuffer_to_Array<u16>((u16*)&data[0], curvb.tex0.csa, clutsize);
|
||||||
|
}
|
||||||
if (PSMT_IS32BIT(curvb.tex0.cpsm)) // 32 bit
|
|
||||||
{
|
|
||||||
memcpy_amd(&data[0], g_pbyGSClut + nClutOffset, clutsize);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
u16* pClutBuffer = (u16*)(g_pbyGSClut + nClutOffset);
|
|
||||||
u16* pclut = (u16*) & data[0];
|
|
||||||
int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset & 0x3ff) / 2) + clutsize - 512;
|
|
||||||
|
|
||||||
if (left > 0) clutsize -= left;
|
|
||||||
|
|
||||||
while (clutsize > 0)
|
|
||||||
{
|
|
||||||
pclut[0] = pClutBuffer[0];
|
|
||||||
pclut++;
|
|
||||||
pClutBuffer += 2;
|
|
||||||
clutsize -= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (left > 0)
|
|
||||||
{
|
|
||||||
pClutBuffer = (u16*)(g_pbyGSClut + 2);
|
|
||||||
|
|
||||||
while (left > 0)
|
|
||||||
{
|
|
||||||
pclut[0] = pClutBuffer[0];
|
|
||||||
left -= 2;
|
|
||||||
pClutBuffer += 2;
|
|
||||||
pclut++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
GLenum tempType = PSMT_ISHALF_STORAGE(curvb.tex0) ? GL_UNSIGNED_SHORT_5_5_5_1 : GL_UNSIGNED_BYTE;
|
GLenum tempType = PSMT_ISHALF_STORAGE(curvb.tex0) ? GL_UNSIGNED_SHORT_5_5_5_1 : GL_UNSIGNED_BYTE;
|
||||||
Texture2D(4, 256, 1, GL_RGBA, tempType, &data[0]);
|
Texture2D(4, 256, 1, GL_RGBA, tempType, &data[0]);
|
||||||
|
@ -987,6 +945,7 @@ inline FRAGMENTSHADER* FlushMadeNewTarget(VB& curvb, int exactcolor, int context
|
||||||
// save the texture
|
// save the texture
|
||||||
if (g_bSaveTex)
|
if (g_bSaveTex)
|
||||||
{
|
{
|
||||||
|
// FIXME: I suspect one of g_bSaveTex test variable is wrong
|
||||||
if (g_bSaveTex == 1)
|
if (g_bSaveTex == 1)
|
||||||
{
|
{
|
||||||
SaveTex(&curvb.tex0, 1);
|
SaveTex(&curvb.tex0, 1);
|
||||||
|
|
|
@ -395,6 +395,8 @@ SaveTex(tex0Info* ptex, int usevid)
|
||||||
glBindTexture(GL_TEXTURE_RECTANGLE_NV, pmemtarg->ptex->tex);
|
glBindTexture(GL_TEXTURE_RECTANGLE_NV, pmemtarg->ptex->tex);
|
||||||
srcdata.resize(4 * pmemtarg->texW * pmemtarg->texH);
|
srcdata.resize(4 * pmemtarg->texW * pmemtarg->texH);
|
||||||
|
|
||||||
|
// FIXME strangely this function call seem to crash pcsx2 on atelier of iris 1
|
||||||
|
// Note: fmt is GL_UNSIGNED_SHORT_1_5_5_5_REV
|
||||||
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]);
|
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]);
|
||||||
|
|
||||||
u32 offset = MemorySize(pmemtarg->realy);
|
u32 offset = MemorySize(pmemtarg->realy);
|
||||||
|
@ -613,6 +615,9 @@ SaveTex(tex0Info* ptex, int usevid)
|
||||||
|
|
||||||
snprintf(Name, TGA_FILE_NAME_MAX_LENGTH, "Tex.%d.tga", TexNumber);
|
snprintf(Name, TGA_FILE_NAME_MAX_LENGTH, "Tex.%d.tga", TexNumber);
|
||||||
SaveTGA(Name, ptex->tw, ptex->th, &data[0]);
|
SaveTGA(Name, ptex->tw, ptex->th, &data[0]);
|
||||||
|
|
||||||
|
TexNumber++;
|
||||||
|
if (TexNumber > MAX_NUMBER_SAVED_TGA) TexNumber = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -621,13 +626,10 @@ SaveTex(tex0Info* ptex, int usevid)
|
||||||
char* NamedSaveTex(tex0Info* ptex, int usevid)
|
char* NamedSaveTex(tex0Info* ptex, int usevid)
|
||||||
{
|
{
|
||||||
SaveTex(ptex, usevid);
|
SaveTex(ptex, usevid);
|
||||||
|
|
||||||
char* Name = (char*)malloc(TGA_FILE_NAME_MAX_LENGTH);
|
char* Name = (char*)malloc(TGA_FILE_NAME_MAX_LENGTH);
|
||||||
snprintf(Name, TGA_FILE_NAME_MAX_LENGTH, "Tex.%d.tga", TexNumber);
|
snprintf(Name, TGA_FILE_NAME_MAX_LENGTH, "Tex.%d.tga", TexNumber);
|
||||||
|
|
||||||
TexNumber++;
|
|
||||||
|
|
||||||
if (TexNumber > MAX_NUMBER_SAVED_TGA) TexNumber = 0;
|
|
||||||
|
|
||||||
return Name;
|
return Name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -475,6 +475,9 @@ void CRenderTarget::Update(int context, CRenderTarget* pdepth)
|
||||||
texframe.tw = fbw;
|
texframe.tw = fbw;
|
||||||
texframe.th = fbh;
|
texframe.th = fbh;
|
||||||
texframe.psm = psm;
|
texframe.psm = psm;
|
||||||
|
// FIXME some field are not initialized...
|
||||||
|
// in particular the clut related one
|
||||||
|
assert(!PSMT_ISCLUT(psm));
|
||||||
|
|
||||||
// write color and zero out stencil buf, always 0 context!
|
// write color and zero out stencil buf, always 0 context!
|
||||||
// force bilinear if using AA
|
// force bilinear if using AA
|
||||||
|
@ -966,6 +969,9 @@ void CDepthTarget::Update(int context, CRenderTarget* prndr)
|
||||||
texframe.tw = fbw;
|
texframe.tw = fbw;
|
||||||
texframe.th = fbh;
|
texframe.th = fbh;
|
||||||
texframe.psm = psm;
|
texframe.psm = psm;
|
||||||
|
// FIXME some field are not initialized...
|
||||||
|
// in particular the clut related one
|
||||||
|
assert(!PSMT_ISCLUT(psm));
|
||||||
|
|
||||||
DisableAllgl();
|
DisableAllgl();
|
||||||
|
|
||||||
|
@ -2017,96 +2023,93 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
|
||||||
|
|
||||||
assert(targ->clutsize > 0);
|
assert(targ->clutsize > 0);
|
||||||
}
|
}
|
||||||
else
|
else if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
|
||||||
{
|
{
|
||||||
if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
|
ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16);
|
||||||
{
|
has_data = true;
|
||||||
ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16);
|
|
||||||
has_data = true;
|
|
||||||
|
|
||||||
// needs to be 8 bit, use xmm for unpacking
|
// needs to be 8 bit, use xmm for unpacking
|
||||||
u16* dst = (u16*)ptexdata;
|
u16* dst = (u16*)ptexdata;
|
||||||
u16* src = (u16*)(MemoryAddress(targ->realy));
|
u16* src = (u16*)(MemoryAddress(targ->realy));
|
||||||
|
|
||||||
#ifdef ZEROGS_SSE2
|
#ifdef ZEROGS_SSE2
|
||||||
assert(((u32)(uptr)dst) % 16 == 0);
|
assert(((u32)(uptr)dst) % 16 == 0);
|
||||||
// FIXME Uncomment to test intrinsic versions (instead of asm)
|
// FIXME Uncomment to test intrinsic versions (instead of asm)
|
||||||
// perf improvement vs asm:
|
// perf improvement vs asm:
|
||||||
// 1/ gcc updates both pointer with 1 addition
|
// 1/ gcc updates both pointer with 1 addition
|
||||||
// 2/ Bypass the cache for the store
|
// 2/ Bypass the cache for the store
|
||||||
#define NEW_INTRINSIC_VERSION
|
#define NEW_INTRINSIC_VERSION
|
||||||
#ifdef NEW_INTRINSIC_VERSION
|
#ifdef NEW_INTRINSIC_VERSION
|
||||||
|
|
||||||
__m128i zero_128 = _mm_setzero_si128();
|
__m128i zero_128 = _mm_setzero_si128();
|
||||||
// NOTE: future performance improvement
|
// NOTE: future performance improvement
|
||||||
// SSE4.1 support uncacheable load 128bits. Maybe it can
|
// SSE4.1 support uncacheable load 128bits. Maybe it can
|
||||||
// avoid some cache pollution
|
// avoid some cache pollution
|
||||||
// NOTE2: I create multiple _n variable to mimic the previous ASM behavior
|
// NOTE2: I create multiple _n variable to mimic the previous ASM behavior
|
||||||
// but I'm not sure there are real gains.
|
// but I'm not sure there are real gains.
|
||||||
for (int i = targ->height * GPU_TEXWIDTH/16 ; i > 0 ; --i)
|
for (int i = targ->height * GPU_TEXWIDTH/16 ; i > 0 ; --i)
|
||||||
{
|
{
|
||||||
// Convert 16 bits pixels to 32bits (zero extended)
|
// Convert 16 bits pixels to 32bits (zero extended)
|
||||||
// Batch 64 bytes (32 pixels) at once.
|
// Batch 64 bytes (32 pixels) at once.
|
||||||
__m128i pixels_1 = _mm_load_si128((__m128i*)src);
|
__m128i pixels_1 = _mm_load_si128((__m128i*)src);
|
||||||
__m128i pixels_2 = _mm_load_si128((__m128i*)(src+8));
|
__m128i pixels_2 = _mm_load_si128((__m128i*)(src+8));
|
||||||
__m128i pixels_3 = _mm_load_si128((__m128i*)(src+16));
|
__m128i pixels_3 = _mm_load_si128((__m128i*)(src+16));
|
||||||
__m128i pixels_4 = _mm_load_si128((__m128i*)(src+24));
|
__m128i pixels_4 = _mm_load_si128((__m128i*)(src+24));
|
||||||
|
|
||||||
__m128i pix_low_1 = _mm_unpacklo_epi16(pixels_1, zero_128);
|
__m128i pix_low_1 = _mm_unpacklo_epi16(pixels_1, zero_128);
|
||||||
__m128i pix_high_1 = _mm_unpackhi_epi16(pixels_1, zero_128);
|
__m128i pix_high_1 = _mm_unpackhi_epi16(pixels_1, zero_128);
|
||||||
__m128i pix_low_2 = _mm_unpacklo_epi16(pixels_2, zero_128);
|
__m128i pix_low_2 = _mm_unpacklo_epi16(pixels_2, zero_128);
|
||||||
__m128i pix_high_2 = _mm_unpackhi_epi16(pixels_2, zero_128);
|
__m128i pix_high_2 = _mm_unpackhi_epi16(pixels_2, zero_128);
|
||||||
|
|
||||||
// Note: bypass cache
|
// Note: bypass cache
|
||||||
_mm_stream_si128((__m128i*)dst, pix_low_1);
|
_mm_stream_si128((__m128i*)dst, pix_low_1);
|
||||||
_mm_stream_si128((__m128i*)(dst+8), pix_high_1);
|
_mm_stream_si128((__m128i*)(dst+8), pix_high_1);
|
||||||
_mm_stream_si128((__m128i*)(dst+16), pix_low_2);
|
_mm_stream_si128((__m128i*)(dst+16), pix_low_2);
|
||||||
_mm_stream_si128((__m128i*)(dst+24), pix_high_2);
|
_mm_stream_si128((__m128i*)(dst+24), pix_high_2);
|
||||||
|
|
||||||
__m128i pix_low_3 = _mm_unpacklo_epi16(pixels_3, zero_128);
|
__m128i pix_low_3 = _mm_unpacklo_epi16(pixels_3, zero_128);
|
||||||
__m128i pix_high_3 = _mm_unpackhi_epi16(pixels_3, zero_128);
|
__m128i pix_high_3 = _mm_unpackhi_epi16(pixels_3, zero_128);
|
||||||
__m128i pix_low_4 = _mm_unpacklo_epi16(pixels_4, zero_128);
|
__m128i pix_low_4 = _mm_unpacklo_epi16(pixels_4, zero_128);
|
||||||
__m128i pix_high_4 = _mm_unpackhi_epi16(pixels_4, zero_128);
|
__m128i pix_high_4 = _mm_unpackhi_epi16(pixels_4, zero_128);
|
||||||
|
|
||||||
// Note: bypass cache
|
// Note: bypass cache
|
||||||
_mm_stream_si128((__m128i*)(dst+32), pix_low_3);
|
_mm_stream_si128((__m128i*)(dst+32), pix_low_3);
|
||||||
_mm_stream_si128((__m128i*)(dst+40), pix_high_3);
|
_mm_stream_si128((__m128i*)(dst+40), pix_high_3);
|
||||||
_mm_stream_si128((__m128i*)(dst+48), pix_low_4);
|
_mm_stream_si128((__m128i*)(dst+48), pix_low_4);
|
||||||
_mm_stream_si128((__m128i*)(dst+56), pix_high_4);
|
_mm_stream_si128((__m128i*)(dst+56), pix_high_4);
|
||||||
|
|
||||||
src += 32;
|
src += 32;
|
||||||
dst += 64;
|
dst += 64;
|
||||||
}
|
}
|
||||||
// It is advise to use a fence instruction after non temporal move (mm_stream) instruction...
|
// It is advise to use a fence instruction after non temporal move (mm_stream) instruction...
|
||||||
// store fence insures that previous store are finish before execute new one.
|
// store fence insures that previous store are finish before execute new one.
|
||||||
_mm_sfence();
|
_mm_sfence();
|
||||||
#else
|
#else
|
||||||
SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16);
|
SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16);
|
||||||
#endif
|
#endif
|
||||||
#else // ZEROGS_SSE2
|
#else // ZEROGS_SSE2
|
||||||
|
|
||||||
for (int i = 0; i < targ->height; ++i)
|
for (int i = 0; i < targ->height; ++i)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < GPU_TEXWIDTH; ++j)
|
for (int j = 0; j < GPU_TEXWIDTH; ++j)
|
||||||
{
|
{
|
||||||
dst[0] = src[0];
|
dst[0] = src[0];
|
||||||
dst[1] = 0;
|
dst[1] = 0;
|
||||||
dst[2] = src[1];
|
dst[2] = src[1];
|
||||||
dst[3] = 0;
|
dst[3] = 0;
|
||||||
dst += 4;
|
dst += 4;
|
||||||
src += 2;
|
src += 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // ZEROGS_SSE2
|
#endif // ZEROGS_SSE2
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ptexdata = targ->ptex->memptr;
|
ptexdata = targ->ptex->memptr;
|
||||||
// We really don't want to deallocate memptr. As a reminder...
|
// We really don't want to deallocate memptr. As a reminder...
|
||||||
has_data = false;
|
has_data = false;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// create the texture
|
// create the texture
|
||||||
GL_REPORT_ERRORD();
|
GL_REPORT_ERRORD();
|
||||||
|
|
Loading…
Reference in New Issue