gsdx-tc: improve support of TEXA register

* Greatly reduce the number of clut read (factor 10x)
* Avoid to get wrong TEXA texture in the cache.
* Fix "jump depends on uninitialized variable" Valgrind warning.

Fix #748

I try my best to avoid any breakage of DX but please test it too.
This commit is contained in:
Gregory Hainaut 2015-08-08 15:15:51 +02:00
parent 78569ee833
commit cee0fde940
4 changed files with 42 additions and 28 deletions

View File

@ -396,6 +396,7 @@ void GSRendererHW::Draw()
return; return;
} }
// FIXME: Could be removed on openGL
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{ {
m_mem.m_clut.Read32(context->TEX0, env.TEXA); m_mem.m_clut.Read32(context->TEX0, env.TEXA);

View File

@ -22,9 +22,13 @@
#include "stdafx.h" #include "stdafx.h"
#include "GSTextureCache.h" #include "GSTextureCache.h"
bool s_IS_OPENGL = false;
GSTextureCache::GSTextureCache(GSRenderer* r) GSTextureCache::GSTextureCache(GSRenderer* r)
: m_renderer(r) : m_renderer(r)
{ {
s_IS_OPENGL = (theApp.GetConfig("Renderer", 12) == 12);
m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0; m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0;
UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
@ -72,12 +76,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm; //const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
GIFRegTEXA plainTEXA; // Until DX is fixed
if (s_IS_OPENGL) {
if(psm.pal > 0)
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
} else {
GIFRegTEXA plainTEXA;
plainTEXA.AEM = 1; plainTEXA.AEM = 1;
plainTEXA.TA0 = 0; plainTEXA.TA0 = 0;
plainTEXA.TA1 = 0x80; plainTEXA.TA1 = 0x80;
m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA); m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA);
}
const uint32* clut = m_renderer->m_mem.m_clut; const uint32* clut = m_renderer->m_mem.m_clut;
@ -85,26 +95,27 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
list<Source*>& m = m_src.m_map[TEX0.TBP0 >> 5]; list<Source*>& m = m_src.m_map[TEX0.TBP0 >> 5];
for(list<Source*>::iterator i = m.begin(); i != m.end(); i++) for(list<Source*>::iterator i = m.begin(); i != m.end(); i++)
{ {
Source* s = *i; Source* s = *i;
if(((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH if (((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
{
continue; continue;
}
// Special check for palette texture (psm.pal > 0) // Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
// if (!s->m_target) {
// if m_paltex is enabled // We request a palette texture (psm.pal). If the texture was
// 1/ s->m_palette must always be defined // converted by the CPU (s->m_palette == NULL), we need to ensure
// 2/ Clut is useless (will be uploaded again at the end of the function) // palette content is the same.
// // Note: content of the palette will be uploaded at the end of the function
// if m_paltex is disabled if (psm.pal > 0 && s->m_palette == NULL && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0])))
// 1/ Clut must match if m_palette is NULL continue;
if(s->m_palette == NULL && psm.pal > 0 && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0])))
{ // We request a 24/16 bit RGBA texture. Alpha expansion was done by
continue; // the CPU. We need to check that TEXA is identical
if (psm.pal == 0 && psm.fmt > 0 && s->m_TEXA.u64 != TEXA.u64)
continue;
} }
m.splice(m.begin(), m, i); m.splice(m.begin(), m, i);
@ -147,7 +158,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) { if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) {
if (!IsOpenGL() && (psm == PSM_PSMT8)) { if (!s_IS_OPENGL && (psm == PSM_PSMT8)) {
// OpenGL can convert the texture directly in the GPU. Not sure we want to keep this // OpenGL can convert the texture directly in the GPU. Not sure we want to keep this
// code for DX. It fixes effect but it is slow (MGS3) // code for DX. It fixes effect but it is slow (MGS3)
@ -324,7 +335,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
// //
// From a performance point of view, it might cost a little on big upscaling // From a performance point of view, it might cost a little on big upscaling
// but normally few RT are miss so it must remain reasonable. // but normally few RT are miss so it must remain reasonable.
if (IsOpenGL()) { if (s_IS_OPENGL) {
switch (type) { switch (type) {
case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break;
case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break; case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break;
@ -863,7 +874,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// TODO: clean up this mess // TODO: clean up this mess
int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY; int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY;
bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL(); bool is_8bits = TEX0.PSM == PSM_PSMT8 && s_IS_OPENGL;
if (is_8bits) { if (is_8bits) {
GL_INS("Reading RT as a packed-indexed 8 bits format"); GL_INS("Reading RT as a packed-indexed 8 bits format");
@ -1417,9 +1428,14 @@ void GSTextureCache::Source::Flush(uint32 count)
GIFRegTEXA plainTEXA; GIFRegTEXA plainTEXA;
plainTEXA.AEM = 1; // Until DX is fixed
plainTEXA.TA0 = 0; if (s_IS_OPENGL) {
plainTEXA.TA1 = 0x80; plainTEXA = m_TEXA;
} else {
plainTEXA.AEM = 1;
plainTEXA.TA0 = 0;
plainTEXA.TA1 = 0x80;
}
if(m_palette) if(m_palette)
{ {

View File

@ -129,7 +129,6 @@ protected:
#endif #endif
virtual bool CanConvertDepth() { return m_can_convert_depth; } virtual bool CanConvertDepth() { return m_can_convert_depth; }
virtual bool IsOpenGL() { return false; }
public: public:
GSTextureCache(GSRenderer* r); GSTextureCache(GSRenderer* r);

View File

@ -32,8 +32,6 @@ protected:
void Read(Target* t, const GSVector4i& r); void Read(Target* t, const GSVector4i& r);
virtual bool IsOpenGL() { return true; }
public: public:
GSTextureCacheOGL(GSRenderer* r); GSTextureCacheOGL(GSRenderer* r);
}; };