From 6121677aa1064269db333f4d5db0b3ead65270c1 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Mon, 29 Jun 2015 19:17:46 +0200 Subject: [PATCH] gsdx-tc: use a single shader pass to convert texture in 8 bits format It might save a couple of fps Add a define to test the perf if we keep only the blue channel. It brokes the code in Prince Of Persia that use the Red/Green channel... Maybe the speed hack :( Or find a way to replace all if with a lookup table Note: it is only supported on OpenGL currently --- plugins/GSdx/GSDevice.h | 8 +++++ plugins/GSdx/GSDeviceOGL.cpp | 8 +++++ plugins/GSdx/GSDeviceOGL.h | 1 + plugins/GSdx/GSTextureCache.cpp | 52 ++++++++++++------------------ plugins/GSdx/res/glsl/convert.glsl | 31 +++++++++++++++--- plugins/GSdx/res/glsl_source.h | 31 +++++++++++++++--- 6 files changed, 92 insertions(+), 39 deletions(-) diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index 0a5b629bec..8fde98d93f 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -28,6 +28,14 @@ #pragma pack(push, 1) +class ConvertConstantBuffer +{ +public: + GSVector4i ScalingFactor; + + ConvertConstantBuffer() {memset(this, 0, sizeof(*this));} +}; + class MergeConstantBuffer { public: diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index c89a81b1f9..cd8e075084 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -39,6 +39,7 @@ static const uint32 g_merge_cb_index = 10; static const uint32 g_interlace_cb_index = 11; static const uint32 g_shadeboost_cb_index = 12; static const uint32 g_fx_cb_index = 14; +static const uint32 g_convert_index = 15; bool GSDeviceOGL::m_debug_gl_call = false; int GSDeviceOGL::s_n = 0; @@ -103,6 +104,7 @@ GSDeviceOGL::~GSDeviceOGL() delete m_convert.dss; delete m_convert.dss_write; delete m_convert.bs; + delete m_convert.cb; // Clean m_fxaa delete m_fxaa.cb; @@ -242,6 +244,12 @@ bool GSDeviceOGL::Create(GSWnd* wnd) // **************************************************************** // convert // **************************************************************** + m_convert.cb = new GSUniformBufferOGL(g_convert_index, sizeof(ConvertConstantBuffer)); + // Upload once and forget about it + ConvertConstantBuffer cb; + cb.ScalingFactor = GSVector4i(theApp.GetConfig("nativeres", 0) ? 1 : theApp.GetConfig("upscale_multiplier", 2)); + m_convert.cb->upload(&cb); + m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl); for(size_t i = 0; i < countof(m_convert.ps); i++) m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index fd95d6a9cd..46611072e3 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -504,6 +504,7 @@ class GSDeviceOGL : public GSDevice GSDepthStencilOGL* dss; GSDepthStencilOGL* dss_write; GSBlendStateOGL* bs; + GSUniformBufferOGL* cb; } m_convert; struct { diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 8adad2b21f..2892364aba 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -820,8 +820,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Shader 11 convert depth to color // Shader 14 convert 32 bits color to 8 bits color int shader = dst->m_type != RenderTarget ? 11 : 0; + bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL(); - if (TEX0.PSM == PSM_PSMT8) { + if (is_8bits) { GL_INS("Reading RT as a packed-indexed 8 bits format"); shader = 14; // ask a conversion to 8 bits format } @@ -847,29 +848,15 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } - // Unscale 8 bits textures, quality won't be nice but format is really awful - // Code won't be compatible with MSAA but it is a DX issue - if (TEX0.PSM == PSM_PSMT8) { - GSVector2 old_scale = dst->m_texture->GetScale(); - - if (old_scale != GSVector2(1.0f, 1.0f)) { - GSVector2i size = dst->m_texture->GetSize(); - tmp = dst->m_texture; - - dst->m_texture = m_renderer->m_dev->CreateRenderTarget(size.x, size.y, false); - - GSVector4 sRect(0.0, 0.0, old_scale.x, old_scale.y); - GSVector4 dRect(0.0, 0.0, size.x, size.y); - m_renderer->m_dev->StretchRect(tmp, sRect, dst->m_texture, dRect, 0, false); - - dst->m_texture->SetScale(GSVector2(1.0f, 1.0f)); - } - } - // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) int w = (int)(dst->m_texture->GetScale().x * tw); int h = (int)(dst->m_texture->GetScale().y * th); + if (is_8bits) { + // Unscale 8 bits textures, quality won't be nice but format is really awful + w = tw; + h = th; + } GSVector2i dstsize = dst->m_texture->GetSize(); @@ -955,18 +942,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0) // At 2x it will become 0.5/128 * 256 = 1 (pixel 1) - if(w > dstsize.x) - { - scale.x = (float)dstsize.x / tw; - dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x; - w = dstsize.x; - } + if (!is_8bits) { + // 8 bits handling is special due to unscaling. It is better to not execute this code + if (w > dstsize.x) + { + scale.x = (float)dstsize.x / tw; + dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x; + w = dstsize.x; + } - if(h > dstsize.y) - { - scale.y = (float)dstsize.y / th; - dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y; - h = dstsize.y; + if (h > dstsize.y) + { + scale.y = (float)dstsize.y / th; + dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y; + h = dstsize.y; + } } GSVector4 sRect(0, 0, w, h); diff --git a/plugins/GSdx/res/glsl/convert.glsl b/plugins/GSdx/res/glsl/convert.glsl index fce5d19b97..5e5b6841c1 100644 --- a/plugins/GSdx/res/glsl/convert.glsl +++ b/plugins/GSdx/res/glsl/convert.glsl @@ -70,6 +70,11 @@ layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; layout(binding = 0) uniform sampler2D TextureSampler; #endif +layout(std140, binding = 15) uniform cb15 +{ + ivec4 ScalingFactor; +}; + vec4 sample_c() { return texture(TextureSampler, PSin_t ); @@ -199,6 +204,15 @@ void ps_main13() #ifdef ps_main14 void ps_main14() { + + // Potential speed optimization. There is a high probability that + // game only want to extract a single channel (blue). It will allow + // to remove most of the conditional operation and yield a +2/3 fps + // boost on MGS3 + // + // Hypothesis wrong in Prince of Persia ... Seriously WTF ! +//#define ONLY_BLUE; + // Convert a RGBA texture into a 8 bits packed texture // Input column: 8x2 RGBA pixels // 0: 8 RGBA @@ -208,7 +222,6 @@ void ps_main14() // 1: 8 R | 8 B // 2: 8 G | 8 A // 3: 8 G | 8 A - float c; uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u); @@ -218,15 +231,20 @@ void ps_main14() int txN = tb.x | (int(gl_FragCoord.x) & 7); int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7); + txN *= ScalingFactor.x; + txH *= ScalingFactor.x; + ty *= ScalingFactor.y; + + // TODO investigate texture gather vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0); vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0); - // Potential speed optimization. There is a high probability that - // game only want to extract a single channel (blue). It will allow - // to remove the sel.x condition check if ((sel.y & 4u) == 0u) { // Column 0 and 2 +#ifdef ONLY_BLUE + c = cN.b; +#else if ((sel.y & 3u) < 2u) { // first 2 lines of the col if (sel.x < 8u) @@ -239,7 +257,11 @@ void ps_main14() else c = cH.a; } +#endif } else { +#ifdef ONLY_BLUE + c = cH.b; +#else // Column 1 and 3 if ((sel.y & 3u) < 2u) { // first 2 lines of the col @@ -253,6 +275,7 @@ void ps_main14() else c = cN.a; } +#endif } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 6789ad857e..aac59407a5 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -95,6 +95,11 @@ static const char* convert_glsl = "layout(binding = 0) uniform sampler2D TextureSampler;\n" "#endif\n" "\n" + "layout(std140, binding = 15) uniform cb15\n" + "{\n" + " ivec4 ScalingFactor;\n" + "};\n" + "\n" "vec4 sample_c()\n" "{\n" " return texture(TextureSampler, PSin_t );\n" @@ -224,6 +229,15 @@ static const char* convert_glsl = "#ifdef ps_main14\n" "void ps_main14()\n" "{\n" + "\n" + " // Potential speed optimization. There is a high probability that\n" + " // game only want to extract a single channel (blue). It will allow\n" + " // to remove most of the conditional operation and yield a +2/3 fps\n" + " // boost on MGS3\n" + " //\n" + " // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n" + "//#define ONLY_BLUE;\n" + "\n" " // Convert a RGBA texture into a 8 bits packed texture\n" " // Input column: 8x2 RGBA pixels\n" " // 0: 8 RGBA\n" @@ -233,7 +247,6 @@ static const char* convert_glsl = " // 1: 8 R | 8 B\n" " // 2: 8 G | 8 A\n" " // 3: 8 G | 8 A\n" - "\n" " float c;\n" "\n" " uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n" @@ -243,15 +256,20 @@ static const char* convert_glsl = " int txN = tb.x | (int(gl_FragCoord.x) & 7);\n" " int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n" "\n" + " txN *= ScalingFactor.x;\n" + " txH *= ScalingFactor.x;\n" + " ty *= ScalingFactor.y;\n" + "\n" + " // TODO investigate texture gather\n" " vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n" " vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n" "\n" - " // Potential speed optimization. There is a high probability that\n" - " // game only want to extract a single channel (blue). It will allow\n" - " // to remove the sel.x condition check\n" "\n" " if ((sel.y & 4u) == 0u) {\n" " // Column 0 and 2\n" + "#ifdef ONLY_BLUE\n" + " c = cN.b;\n" + "#else\n" " if ((sel.y & 3u) < 2u) {\n" " // first 2 lines of the col\n" " if (sel.x < 8u)\n" @@ -264,7 +282,11 @@ static const char* convert_glsl = " else\n" " c = cH.a;\n" " }\n" + "#endif\n" " } else {\n" + "#ifdef ONLY_BLUE\n" + " c = cH.b;\n" + "#else\n" " // Column 1 and 3\n" " if ((sel.y & 3u) < 2u) {\n" " // first 2 lines of the col\n" @@ -278,6 +300,7 @@ static const char* convert_glsl = " else\n" " c = cN.a;\n" " }\n" + "#endif\n" " }\n" "\n" "\n"