From 9ee090a36eccb9143a08cbb01c0cb5914e28fe91 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 21 Apr 2016 19:08:41 +0200 Subject: [PATCH 1/7] gsdx tc: add a shareable texture flag Plan is to bypass shader conversion/rescaling/copy for depth texture --- plugins/GSdx/GSTextureCache.cpp | 16 ++++++++++++---- plugins/GSdx/GSTextureCache.h | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 7d25fcebfe..377f5e1e5f 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -944,8 +944,12 @@ void GSTextureCache::IncAge() Source* s = *j; - if(++s->m_age > maxage) - { + if(s->m_shared_texture) { + // Shared textures are temporary only added in the hash set but not in the texture + // cache list therefore you can't use RemoveAt + m_src.m_surfaces.erase(s); + delete s; + } else if(++s->m_age > maxage) { m_src.RemoveAt(s); } } @@ -1339,7 +1343,7 @@ void GSTextureCache::PrintMemoryUsage() uint32 dss = 0; for(hash_set::iterator i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end(); i++) { Source* s = *i; - if (s) { + if (s && !s->m_shared_texture) { if (s->m_target) tex_rt += s->m_texture->GetMemUsage(); else @@ -1370,13 +1374,17 @@ GSTextureCache::Surface::Surface(GSRenderer* r, uint8* temp) , m_age(0) , m_temp(temp) , m_32_bits_fmt(false) + , m_shared_texture(false) { m_TEX0.TBP0 = 0x3fff; } GSTextureCache::Surface::~Surface() { - m_renderer->m_dev->Recycle(m_texture); + // Shared textures are pointers copy. Therefore no allocation + // to recycle. + if (!m_shared_texture) + m_renderer->m_dev->Recycle(m_texture); } void GSTextureCache::Surface::Update() diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 0d1438481c..babcf8f944 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -41,6 +41,7 @@ public: int m_age; uint8* m_temp; bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture + bool m_shared_texture; public: Surface(GSRenderer* r, uint8* temp); From 583de1bf0b8bef75c48c0a02d8a7f634b2596264 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 21 Apr 2016 19:13:47 +0200 Subject: [PATCH 2/7] gsdx tc: add a dedicated function to lookup a depth source The hypothesis is that game will use a depth (aka Z32/Z24/Z16/Z16S) format when sampling depth texture as color. Technically one could use a standard color format but block/pixel order won't be the same. (otherwise I'm screwed) => Hypothesis invalid on GoW. They just do a scrambled rendering... Lookup info: * The first searched list is the depth pool as we search a depth texture. * 2nd one is the render target pool (if a depth was converted to a render target already) To avoid any CPU overhead, the source will be a pointer to the real texture * Conversion (if float texture) will be done on the fly by the shader (GPU). * Relative rescaling won't be supported. Texture must be fetched with integral coordinate --- plugins/GSdx/GSTextureCache.cpp | 78 +++++++++++++++++++++++++++++++++ plugins/GSdx/GSTextureCache.h | 2 + 2 files changed, 80 insertions(+) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 377f5e1e5f..a70169effa 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -85,6 +85,75 @@ void GSTextureCache::RemoveAll() } } +GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r) +{ + if (!CanConvertDepth()) return NULL; + + if(GSLocalMemory::m_psm[TEX0.PSM].pal > 0) + m_renderer->m_mem.m_clut.Read32(TEX0, TEXA); + + Source* src = NULL; + Target* dst = NULL; + + // Check only current frame, I guess it is only used as a postprocessing effect + uint32 bp = TEX0.TBP0; + uint32 psm = TEX0.PSM; + for(auto t : m_dst[DepthStencil]) { + if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { + ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); + dst = t; + break; + } + } + + if (!dst) { + // Retry on the render target (Silent Hill 4) + for(auto t : m_dst[RenderTarget]) { + if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { + ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); + dst = t; + break; + } + } + } + + if (dst) { + GL_CACHE("TC depth: dst %s hit: %d (0x%x, F:0x%x)", to_string(dst->m_type), + dst->m_texture ? dst->m_texture->GetID() : 0, + TEX0.TBP0, TEX0.PSM); + + // Create a shared texture source + src = new Source(m_renderer, TEX0, TEXA, m_temp); + src->m_texture = dst->m_texture; + src->m_shared_texture = true; + src->m_target = true; // So renderer can check if a conversion is required + src->m_32_bits_fmt = dst->m_32_bits_fmt; + + // Insert the texture in the hash set to keep track of it. But don't bother with + // texture cache list. It means that a new Source is created everytime we need it. + // If it is too expensive, one could cut memory allocation in Source constructor for this + // use case. + + m_src.m_surfaces.insert(src); + } else { + GL_CACHE("TC depth: ERROR miss (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM); + // Possible ? In this case we could call LookupSource + // Or just put a basic texture + // src->m_texture = m_renderer->m_dev->CreateTexture(tw, th); + // In all cases rendering will be broken + // + // Note: might worth to check previous frame + // Note: otherwise return NULL and skip the draw + + //ASSERT(0); + return LookupSource(TEX0, TEXA, r); + } + + return src; +} + GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r) { const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; @@ -207,6 +276,14 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con } } + // Pure depth texture format will be fetched by LookupDepthSource. + // However guess what, some games (GoW) read the depth as a standard + // color format (instead of a depth format). All pixels are scrambled + // (because color and depth don't have same location). They don't care + // pixel will be several draw calls later. + // + // Sigh... They don't help us. + if (dst == NULL && CanConvertDepth()) { // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase @@ -217,6 +294,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { + GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); dst = t; break; } diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index babcf8f944..3aed4a3449 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -142,6 +142,8 @@ public: void RemovePartial(); Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); + Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); + Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used); Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h); From fda511a949621f2764ffed1a974921905259009b Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 23 Apr 2016 12:06:10 +0200 Subject: [PATCH 3/7] gsdx glsl: extend hw shader to sample depth texture Will use integral coordinate to avoid any rescaling. Bilinear interpolation isn't supported. I don't think it is allowed to filter a depth texture anyway. --- plugins/GSdx/GSDeviceOGL.cpp | 1 + plugins/GSdx/GSDeviceOGL.h | 3 +- plugins/GSdx/res/glsl/tfx_fs.glsl | 99 ++++++++++++++++++++++++++++++- plugins/GSdx/res/glsl_source.h | 99 ++++++++++++++++++++++++++++++- 4 files changed, 197 insertions(+), 5 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 03fad05fdf..0e9c921561 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -777,6 +777,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) + format("#define PS_WMT %d\n", sel.wmt) + format("#define PS_TEX_FMT %d\n", sel.tex_fmt) + format("#define PS_DFMT %d\n", sel.dfmt) + + format("#define PS_DEPTH_FMT %d\n", sel.depth_fmt) + format("#define PS_AEM %d\n", sel.aem) + format("#define PS_TFX %d\n", sel.tfx) + format("#define PS_TCC %d\n", sel.tcc) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 1c2fb06949..962b4c282e 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -247,6 +247,7 @@ class GSDeviceOGL final : public GSDevice // Format uint32 tex_fmt:4; uint32 dfmt:2; + uint32 depth_fmt:2; // Alpha extension/Correction uint32 aem:1; uint32 fba:1; @@ -270,7 +271,7 @@ class GSDeviceOGL final : public GSDevice uint32 write_rg:1; uint32 fbmask:1; - uint32 _free1:2; + //uint32 _free1:0; // *** Word 2 // Blend and Colclip diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 0c4a203b98..b91d8038e9 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -175,6 +175,94 @@ mat4 sample_4p(vec4 u) return c; } +////////////////////////////////////////////////////////////////////// +// Depth sampling +////////////////////////////////////////////////////////////////////// +vec4 fetch_c(ivec2 uv) +{ + return texelFetch(TextureSampler, ivec2(uv), 0); +} + +ivec2 clamp_wrap_uv_depth(ivec2 uv) +{ + ivec2 uv_out = uv; + + // Keep the full precision + // It allow to multiply the ScalingFactor before the 1/16 coeff + ivec4 mask = ivec4(MskFix) << 4; + +#if PS_WMS == PS_WMT + +#if PS_WMS == 2 + uv_out = clamp(uv, mask.xy, mask.zw); +#elif PS_WMS == 3 + uv_out = (uv & mask.xy) | mask.zw; +#endif + +#else // PS_WMS != PS_WMT + +#if PS_WMS == 2 + uv_out.x = clamp(uv, mask.x, mask.z); +#elif PS_WMS == 3 + uv_out.x = (uv.x & mask.x) | mask.z; +#endif + +#if PS_WMT == 2 + uv_out.y = clamp(uv, mask.y, mask.w); +#elif PS_WMT == 3 + uv_out.y = (uv.y & mask.y) | mask.w; +#endif + +#endif + + return uv_out; +} + +vec4 sample_depth(vec2 st) +{ + vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScalingFactor.xy) * vec2(1.0f/16.0f); + ivec2 uv = ivec2(uv_f); + + vec4 t; +#if PS_DEPTH_FMT == 1 + // Based on ps_main11 of convert + + // Convert a GL_FLOAT32 depth texture into a RGBA color texture + const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f)); + const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0); + + vec4 res = fract(vec4(fetch_c(uv).r) * bitSh); + + t = (res - res.xxyz * bitMsk) * 256.0f; + +#elif PS_DEPTH_FMT == 2 + // Based on ps_main12 of convert + + // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture + const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f)); + const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1); + uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk; + + t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f); + +#elif PS_DEPTH_FMT == 3 + // Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture + t = fetch_c(uv) * 255.0f; + +#endif + + // warning t ranges from 0 to 255 +#if (PS_AEM_FMT == FMT_24) + t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f; +#elif (PS_AEM_FMT == FMT_16) + t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f; +#endif + + + return t; +} +////////////////////////////////////////////////////////////////////// + vec4 sample_color(vec2 st) { #if (PS_TCOFFSETHACK == 1) @@ -328,10 +416,17 @@ vec4 ps_color() { //FIXME: maybe we can set gl_Position.w = q in VS #if (PS_FST == 0) - vec4 T = sample_color(PSin.t_float.xy / vec2(PSin.t_float.w)); + vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w); #else // Note xy are normalized coordinate - vec4 T = sample_color(PSin.t_int.xy); + vec2 st = PSin.t_int.xy; +#endif + +#if (PS_DEPTH_FMT > 0) + // Integral coordinate + vec4 T = sample_depth(PSin.t_int.zw); +#else + vec4 T = sample_color(st); #endif #if PS_IIP == 1 diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 47d8426a91..f977403632 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1019,6 +1019,94 @@ static const char* const tfx_fs_all_glsl = " return c;\n" "}\n" "\n" + "//////////////////////////////////////////////////////////////////////\n" + "// Depth sampling\n" + "//////////////////////////////////////////////////////////////////////\n" + "vec4 fetch_c(ivec2 uv)\n" + "{\n" + " return texelFetch(TextureSampler, ivec2(uv), 0);\n" + "}\n" + "\n" + "ivec2 clamp_wrap_uv_depth(ivec2 uv)\n" + "{\n" + " ivec2 uv_out = uv;\n" + "\n" + " // Keep the full precision\n" + " // It allow to multiply the ScalingFactor before the 1/16 coeff\n" + " ivec4 mask = ivec4(MskFix) << 4;\n" + "\n" + "#if PS_WMS == PS_WMT\n" + "\n" + "#if PS_WMS == 2\n" + " uv_out = clamp(uv, mask.xy, mask.zw);\n" + "#elif PS_WMS == 3\n" + " uv_out = (uv & mask.xy) | mask.zw;\n" + "#endif\n" + "\n" + "#else // PS_WMS != PS_WMT\n" + "\n" + "#if PS_WMS == 2\n" + " uv_out.x = clamp(uv, mask.x, mask.z);\n" + "#elif PS_WMS == 3\n" + " uv_out.x = (uv.x & mask.x) | mask.z;\n" + "#endif\n" + "\n" + "#if PS_WMT == 2\n" + " uv_out.y = clamp(uv, mask.y, mask.w);\n" + "#elif PS_WMT == 3\n" + " uv_out.y = (uv.y & mask.y) | mask.w;\n" + "#endif\n" + "\n" + "#endif\n" + "\n" + " return uv_out;\n" + "}\n" + "\n" + "vec4 sample_depth(vec2 st)\n" + "{\n" + " vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScalingFactor.xy) * vec2(1.0f/16.0f);\n" + " ivec2 uv = ivec2(uv_f);\n" + "\n" + " vec4 t;\n" + "#if PS_DEPTH_FMT == 1\n" + " // Based on ps_main11 of convert\n" + "\n" + " // Convert a GL_FLOAT32 depth texture into a RGBA color texture\n" + " const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));\n" + " const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);\n" + "\n" + " vec4 res = fract(vec4(fetch_c(uv).r) * bitSh);\n" + "\n" + " t = (res - res.xxyz * bitMsk) * 256.0f;\n" + "\n" + "#elif PS_DEPTH_FMT == 2\n" + " // Based on ps_main12 of convert\n" + "\n" + " // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture\n" + " const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));\n" + " const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);\n" + " uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk;\n" + "\n" + " t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f);\n" + "\n" + "#elif PS_DEPTH_FMT == 3\n" + " // Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture\n" + " t = fetch_c(uv) * 255.0f;\n" + "\n" + "#endif\n" + "\n" + " // warning t ranges from 0 to 255\n" + "#if (PS_AEM_FMT == FMT_24)\n" + " t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;\n" + "#elif (PS_AEM_FMT == FMT_16)\n" + " t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;\n" + "#endif\n" + "\n" + "\n" + " return t;\n" + "}\n" + "//////////////////////////////////////////////////////////////////////\n" + "\n" "vec4 sample_color(vec2 st)\n" "{\n" "#if (PS_TCOFFSETHACK == 1)\n" @@ -1172,10 +1260,17 @@ static const char* const tfx_fs_all_glsl = "{\n" " //FIXME: maybe we can set gl_Position.w = q in VS\n" "#if (PS_FST == 0)\n" - " vec4 T = sample_color(PSin.t_float.xy / vec2(PSin.t_float.w));\n" + " vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);\n" "#else\n" " // Note xy are normalized coordinate\n" - " vec4 T = sample_color(PSin.t_int.xy);\n" + " vec2 st = PSin.t_int.xy;\n" + "#endif\n" + "\n" + "#if (PS_DEPTH_FMT > 0)\n" + " // Integral coordinate\n" + " vec4 T = sample_depth(PSin.t_int.zw);\n" + "#else\n" + " vec4 T = sample_color(st);\n" "#endif\n" "\n" "#if PS_IIP == 1\n" From de38963904140259ca1b398234e04931f545bbf8 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 23 Apr 2016 12:06:58 +0200 Subject: [PATCH 4/7] gsdx ogl: plug the new depth sampling in the renderer Note: When source format is depth, integral texture coordinate must be used => depth_fmt 1/2/3 --- plugins/GSdx/GSRendererHW.cpp | 2 +- plugins/GSdx/GSRendererOGL.cpp | 23 ++++++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index 8ae13d321e..5a1afec1e4 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -415,7 +415,7 @@ void GSRendererHW::Draw() GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); - tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); + tex = tex_psm.depth ? m_tc->LookupDepthSource(context->TEX0, env.TEXA, r) : m_tc->LookupSource(context->TEX0, env.TEXA, r); if(!tex) { GL_POP(); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 6863007e6e..1b575065a6 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -876,13 +876,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0; - bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2; + bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2 && !psm.depth; // Don't force extra filtering on sprite (it creates various upscaling issue) bilinear &= !((m_vt.m_primclass == GS_SPRITE_CLASS) && m_userhacks_round_sprite_offset && !m_vt.IsLinear()); ps_sel.wms = m_context->CLAMP.WMS; ps_sel.wmt = m_context->CLAMP.WMT; + // Depth + bilinear filtering isn't done yet (And I'm not sure we need it anyway but a game will prove me wrong) + ASSERT(!(psm.depth && m_vt.IsLinear())); + // Performance note: // 1/ Don't set 0 as it is the default value // 2/ Only keep aem when it is useful (avoid useless shader permutation) @@ -892,6 +895,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.aem = m_env.TEXA.AEM; ASSERT(tex->m_target); + // Require a float conversion if the texure is a depth otherwise uses Integral scaling + if (psm.depth) { + ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : 1; + } + // Shuffle is a 16 bits format, so aem is always required GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); ta /= 255.0f; @@ -899,8 +907,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_cb.TA_Af.x = ta.x; ps_cb.TA_Af.y = ta.y; - // FIXME: it is likely a bad idea to do the bilinear interpolation here - // bilinear &= m_vt.IsLinear(); + // The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea. + bilinear &= m_vt.IsLinear(); } else if (tex->m_target) { // Use an old target. AEM and index aren't resolved it must be done @@ -935,6 +943,15 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bilinear &= m_vt.IsLinear(); } + // Depth format + if (psm.depth) { + // Require a float conversion if the texure is a depth otherwise uses Integral scaling + ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : + (psm.bpp == 16) ? 2 : 1; + // Don't force interpolation on depth format + bilinear &= m_vt.IsLinear(); + } + } else if (tex->m_palette) { // Use a standard 8 bits texture. AEM is already done on the CLUT // Therefore you only need to set the index From 1960d51e602a3ff6ab7e14c2d66a2c346e1867d7 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 24 Apr 2016 15:32:58 +0200 Subject: [PATCH 5/7] gsdx tc: properly support 16 bits depth conversion --- plugins/GSdx/GSTextureCache.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index a70169effa..6d36daebe3 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -392,14 +392,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst = CreateTarget(TEX0, w, h, type); dst->m_32_bits_fmt = t->m_32_bits_fmt; + int shader; + bool fmt_16_bits = (GSLocalMemory::m_psm[TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 16); if (type == DepthStencil) { - GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); - int shader = ShaderConvert_RGBA8_TO_FLOAT32 + GSLocalMemory::m_psm[TEX0.PSM].fmt; - m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false); + GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x was F:0x%x)", w, h, bp, TEX0.PSM, t->m_TEX0.PSM); + shader = (fmt_16_bits) ? ShaderConvert_RGB5A1_TO_FLOAT16 : ShaderConvert_RGBA8_TO_FLOAT32 + GSLocalMemory::m_psm[TEX0.PSM].fmt; } else { - GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); - m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, ShaderConvert_FLOAT32_TO_RGBA8, false); + GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x was F:0x%x)", w, h, bp, TEX0.PSM, t->m_TEX0.PSM); + shader = (fmt_16_bits) ? ShaderConvert_FLOAT16_TO_RGB5A1 : ShaderConvert_FLOAT32_TO_RGBA8; } + m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false); break; } From ad08701cb1867703580f13ebed94974cef2471a9 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 24 Apr 2016 20:30:54 +0200 Subject: [PATCH 6/7] gsdx tc: trick the texture cache to use a depth format for depth texture This way we avoid the rescaling of the depth buffer and texture allocation. --- plugins/GSdx/GSRendererOGL.cpp | 4 +++- plugins/GSdx/GSTextureCache.cpp | 10 ++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 1b575065a6..b994f3ac6a 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -873,7 +873,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if (tex) { - const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. + //const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0; bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2 && !psm.depth; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 6d36daebe3..a48d779310 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -295,8 +295,14 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); - dst = t; - break; + //dst = t; + //break; + // Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the + // rescaling of the current function. + GIFRegTEX0 depth_TEX0; + depth_TEX0.u32[0] = TEX0.u32[0] | (0x30u << 20u); + depth_TEX0.u32[1] = TEX0.u32[1]; + return LookupDepthSource(depth_TEX0, TEXA, r); } } } From 49d175b67712f50d4d5b82b2f5c0cc10ad6932fb Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 24 Apr 2016 22:30:56 +0200 Subject: [PATCH 7/7] gsdx tc: allow to create a dummy Source without tons of memory allocation It makes shared texture virtually free from the CPU PoV. --- plugins/GSdx/GSTextureCache.cpp | 34 ++++++++++++++++++++++----------- plugins/GSdx/GSTextureCache.h | 2 +- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index a48d779310..f1fd6d0506 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -125,7 +125,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0 TEX0.TBP0, TEX0.PSM); // Create a shared texture source - src = new Source(m_renderer, TEX0, TEXA, m_temp); + src = new Source(m_renderer, TEX0, TEXA, m_temp, true); src->m_texture = dst->m_texture; src->m_shared_texture = true; src->m_target = true; // So renderer can check if a conversion is required @@ -1480,7 +1480,7 @@ void GSTextureCache::Surface::Update() // GSTextureCache::Source -GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp) +GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container) : Surface(r, temp) , m_palette(NULL) , m_initpalette(true) @@ -1492,20 +1492,32 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR m_TEX0 = TEX0; m_TEXA = TEXA; - memset(m_valid, 0, sizeof(m_valid)); + if (dummy_container) { + // Dummy container only contain a m_texture that is a pointer to another source. - m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32); + m_write.rect = NULL; + m_write.count = 0; - memset(m_clut, 0, 256*sizeof(uint32)); + m_clut = NULL; - m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32); - m_write.count = 0; + m_repeating = false; - m_repeating = m_TEX0.IsRepeating(); + } else { + memset(m_valid, 0, sizeof(m_valid)); - if(m_repeating) - { - m_p2t = r->m_mem.GetPage2TileMap(m_TEX0); + m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32); + + memset(m_clut, 0, 256*sizeof(uint32)); + + m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32); + m_write.count = 0; + + m_repeating = m_TEX0.IsRepeating(); + + if(m_repeating) + { + m_p2t = r->m_mem.GetPage2TileMap(m_TEX0); + } } } diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 3aed4a3449..70a494ceba 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -69,7 +69,7 @@ public: vector* m_p2t; public: - Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp); + Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false); virtual ~Source(); virtual void Update(const GSVector4i& rect);