gsdx-tc: use a single shader pass to convert texture in 8 bits format

It might save a couple of fps

Add a define to test the perf if we keep only the blue channel. It brokes
the code in Prince Of Persia that use the Red/Green channel... Maybe the
speed hack :( Or find a way to replace all if with a lookup table

Note: it is only supported on OpenGL currently
This commit is contained in:
Gregory Hainaut 2015-06-29 19:17:46 +02:00
parent 2ecca529d1
commit 6121677aa1
6 changed files with 92 additions and 39 deletions

View File

@ -28,6 +28,14 @@
#pragma pack(push, 1) #pragma pack(push, 1)
class ConvertConstantBuffer
{
public:
GSVector4i ScalingFactor;
ConvertConstantBuffer() {memset(this, 0, sizeof(*this));}
};
class MergeConstantBuffer class MergeConstantBuffer
{ {
public: public:

View File

@ -39,6 +39,7 @@ static const uint32 g_merge_cb_index = 10;
static const uint32 g_interlace_cb_index = 11; static const uint32 g_interlace_cb_index = 11;
static const uint32 g_shadeboost_cb_index = 12; static const uint32 g_shadeboost_cb_index = 12;
static const uint32 g_fx_cb_index = 14; static const uint32 g_fx_cb_index = 14;
static const uint32 g_convert_index = 15;
bool GSDeviceOGL::m_debug_gl_call = false; bool GSDeviceOGL::m_debug_gl_call = false;
int GSDeviceOGL::s_n = 0; int GSDeviceOGL::s_n = 0;
@ -103,6 +104,7 @@ GSDeviceOGL::~GSDeviceOGL()
delete m_convert.dss; delete m_convert.dss;
delete m_convert.dss_write; delete m_convert.dss_write;
delete m_convert.bs; delete m_convert.bs;
delete m_convert.cb;
// Clean m_fxaa // Clean m_fxaa
delete m_fxaa.cb; delete m_fxaa.cb;
@ -242,6 +244,12 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
// **************************************************************** // ****************************************************************
// convert // convert
// **************************************************************** // ****************************************************************
m_convert.cb = new GSUniformBufferOGL(g_convert_index, sizeof(ConvertConstantBuffer));
// Upload once and forget about it
ConvertConstantBuffer cb;
cb.ScalingFactor = GSVector4i(theApp.GetConfig("nativeres", 0) ? 1 : theApp.GetConfig("upscale_multiplier", 2));
m_convert.cb->upload(&cb);
m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl); m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl);
for(size_t i = 0; i < countof(m_convert.ps); i++) for(size_t i = 0; i < countof(m_convert.ps); i++)
m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl); m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl);

View File

@ -504,6 +504,7 @@ class GSDeviceOGL : public GSDevice
GSDepthStencilOGL* dss; GSDepthStencilOGL* dss;
GSDepthStencilOGL* dss_write; GSDepthStencilOGL* dss_write;
GSBlendStateOGL* bs; GSBlendStateOGL* bs;
GSUniformBufferOGL* cb;
} m_convert; } m_convert;
struct { struct {

View File

@ -820,8 +820,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// Shader 11 convert depth to color // Shader 11 convert depth to color
// Shader 14 convert 32 bits color to 8 bits color // Shader 14 convert 32 bits color to 8 bits color
int shader = dst->m_type != RenderTarget ? 11 : 0; int shader = dst->m_type != RenderTarget ? 11 : 0;
bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL();
if (TEX0.PSM == PSM_PSMT8) { if (is_8bits) {
GL_INS("Reading RT as a packed-indexed 8 bits format"); GL_INS("Reading RT as a packed-indexed 8 bits format");
shader = 14; // ask a conversion to 8 bits format shader = 14; // ask a conversion to 8 bits format
} }
@ -847,29 +848,15 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
} }
// Unscale 8 bits textures, quality won't be nice but format is really awful
// Code won't be compatible with MSAA but it is a DX issue
if (TEX0.PSM == PSM_PSMT8) {
GSVector2 old_scale = dst->m_texture->GetScale();
if (old_scale != GSVector2(1.0f, 1.0f)) {
GSVector2i size = dst->m_texture->GetSize();
tmp = dst->m_texture;
dst->m_texture = m_renderer->m_dev->CreateRenderTarget(size.x, size.y, false);
GSVector4 sRect(0.0, 0.0, old_scale.x, old_scale.y);
GSVector4 dRect(0.0, 0.0, size.x, size.y);
m_renderer->m_dev->StretchRect(tmp, sRect, dst->m_texture, dRect, 0, false);
dst->m_texture->SetScale(GSVector2(1.0f, 1.0f));
}
}
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
int w = (int)(dst->m_texture->GetScale().x * tw); int w = (int)(dst->m_texture->GetScale().x * tw);
int h = (int)(dst->m_texture->GetScale().y * th); int h = (int)(dst->m_texture->GetScale().y * th);
if (is_8bits) {
// Unscale 8 bits textures, quality won't be nice but format is really awful
w = tw;
h = th;
}
GSVector2i dstsize = dst->m_texture->GetSize(); GSVector2i dstsize = dst->m_texture->GetSize();
@ -955,18 +942,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0) // FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0)
// At 2x it will become 0.5/128 * 256 = 1 (pixel 1) // At 2x it will become 0.5/128 * 256 = 1 (pixel 1)
if(w > dstsize.x) if (!is_8bits) {
{ // 8 bits handling is special due to unscaling. It is better to not execute this code
scale.x = (float)dstsize.x / tw; if (w > dstsize.x)
dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x; {
w = dstsize.x; scale.x = (float)dstsize.x / tw;
} dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
w = dstsize.x;
}
if(h > dstsize.y) if (h > dstsize.y)
{ {
scale.y = (float)dstsize.y / th; scale.y = (float)dstsize.y / th;
dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y; dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
h = dstsize.y; h = dstsize.y;
}
} }
GSVector4 sRect(0, 0, w, h); GSVector4 sRect(0, 0, w, h);

View File

@ -70,6 +70,11 @@ layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
layout(binding = 0) uniform sampler2D TextureSampler; layout(binding = 0) uniform sampler2D TextureSampler;
#endif #endif
layout(std140, binding = 15) uniform cb15
{
ivec4 ScalingFactor;
};
vec4 sample_c() vec4 sample_c()
{ {
return texture(TextureSampler, PSin_t ); return texture(TextureSampler, PSin_t );
@ -199,6 +204,15 @@ void ps_main13()
#ifdef ps_main14 #ifdef ps_main14
void ps_main14() void ps_main14()
{ {
// Potential speed optimization. There is a high probability that
// game only want to extract a single channel (blue). It will allow
// to remove most of the conditional operation and yield a +2/3 fps
// boost on MGS3
//
// Hypothesis wrong in Prince of Persia ... Seriously WTF !
//#define ONLY_BLUE;
// Convert a RGBA texture into a 8 bits packed texture // Convert a RGBA texture into a 8 bits packed texture
// Input column: 8x2 RGBA pixels // Input column: 8x2 RGBA pixels
// 0: 8 RGBA // 0: 8 RGBA
@ -208,7 +222,6 @@ void ps_main14()
// 1: 8 R | 8 B // 1: 8 R | 8 B
// 2: 8 G | 8 A // 2: 8 G | 8 A
// 3: 8 G | 8 A // 3: 8 G | 8 A
float c; float c;
uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u); uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);
@ -218,15 +231,20 @@ void ps_main14()
int txN = tb.x | (int(gl_FragCoord.x) & 7); int txN = tb.x | (int(gl_FragCoord.x) & 7);
int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7); int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);
txN *= ScalingFactor.x;
txH *= ScalingFactor.x;
ty *= ScalingFactor.y;
// TODO investigate texture gather
vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0); vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);
vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0); vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);
// Potential speed optimization. There is a high probability that
// game only want to extract a single channel (blue). It will allow
// to remove the sel.x condition check
if ((sel.y & 4u) == 0u) { if ((sel.y & 4u) == 0u) {
// Column 0 and 2 // Column 0 and 2
#ifdef ONLY_BLUE
c = cN.b;
#else
if ((sel.y & 3u) < 2u) { if ((sel.y & 3u) < 2u) {
// first 2 lines of the col // first 2 lines of the col
if (sel.x < 8u) if (sel.x < 8u)
@ -239,7 +257,11 @@ void ps_main14()
else else
c = cH.a; c = cH.a;
} }
#endif
} else { } else {
#ifdef ONLY_BLUE
c = cH.b;
#else
// Column 1 and 3 // Column 1 and 3
if ((sel.y & 3u) < 2u) { if ((sel.y & 3u) < 2u) {
// first 2 lines of the col // first 2 lines of the col
@ -253,6 +275,7 @@ void ps_main14()
else else
c = cN.a; c = cN.a;
} }
#endif
} }

View File

@ -95,6 +95,11 @@ static const char* convert_glsl =
"layout(binding = 0) uniform sampler2D TextureSampler;\n" "layout(binding = 0) uniform sampler2D TextureSampler;\n"
"#endif\n" "#endif\n"
"\n" "\n"
"layout(std140, binding = 15) uniform cb15\n"
"{\n"
" ivec4 ScalingFactor;\n"
"};\n"
"\n"
"vec4 sample_c()\n" "vec4 sample_c()\n"
"{\n" "{\n"
" return texture(TextureSampler, PSin_t );\n" " return texture(TextureSampler, PSin_t );\n"
@ -224,6 +229,15 @@ static const char* convert_glsl =
"#ifdef ps_main14\n" "#ifdef ps_main14\n"
"void ps_main14()\n" "void ps_main14()\n"
"{\n" "{\n"
"\n"
" // Potential speed optimization. There is a high probability that\n"
" // game only want to extract a single channel (blue). It will allow\n"
" // to remove most of the conditional operation and yield a +2/3 fps\n"
" // boost on MGS3\n"
" //\n"
" // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n"
"//#define ONLY_BLUE;\n"
"\n"
" // Convert a RGBA texture into a 8 bits packed texture\n" " // Convert a RGBA texture into a 8 bits packed texture\n"
" // Input column: 8x2 RGBA pixels\n" " // Input column: 8x2 RGBA pixels\n"
" // 0: 8 RGBA\n" " // 0: 8 RGBA\n"
@ -233,7 +247,6 @@ static const char* convert_glsl =
" // 1: 8 R | 8 B\n" " // 1: 8 R | 8 B\n"
" // 2: 8 G | 8 A\n" " // 2: 8 G | 8 A\n"
" // 3: 8 G | 8 A\n" " // 3: 8 G | 8 A\n"
"\n"
" float c;\n" " float c;\n"
"\n" "\n"
" uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n" " uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n"
@ -243,15 +256,20 @@ static const char* convert_glsl =
" int txN = tb.x | (int(gl_FragCoord.x) & 7);\n" " int txN = tb.x | (int(gl_FragCoord.x) & 7);\n"
" int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n" " int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n"
"\n" "\n"
" txN *= ScalingFactor.x;\n"
" txH *= ScalingFactor.x;\n"
" ty *= ScalingFactor.y;\n"
"\n"
" // TODO investigate texture gather\n"
" vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n" " vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n"
" vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n" " vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n"
"\n" "\n"
" // Potential speed optimization. There is a high probability that\n"
" // game only want to extract a single channel (blue). It will allow\n"
" // to remove the sel.x condition check\n"
"\n" "\n"
" if ((sel.y & 4u) == 0u) {\n" " if ((sel.y & 4u) == 0u) {\n"
" // Column 0 and 2\n" " // Column 0 and 2\n"
"#ifdef ONLY_BLUE\n"
" c = cN.b;\n"
"#else\n"
" if ((sel.y & 3u) < 2u) {\n" " if ((sel.y & 3u) < 2u) {\n"
" // first 2 lines of the col\n" " // first 2 lines of the col\n"
" if (sel.x < 8u)\n" " if (sel.x < 8u)\n"
@ -264,7 +282,11 @@ static const char* convert_glsl =
" else\n" " else\n"
" c = cH.a;\n" " c = cH.a;\n"
" }\n" " }\n"
"#endif\n"
" } else {\n" " } else {\n"
"#ifdef ONLY_BLUE\n"
" c = cH.b;\n"
"#else\n"
" // Column 1 and 3\n" " // Column 1 and 3\n"
" if ((sel.y & 3u) < 2u) {\n" " if ((sel.y & 3u) < 2u) {\n"
" // first 2 lines of the col\n" " // first 2 lines of the col\n"
@ -278,6 +300,7 @@ static const char* convert_glsl =
" else\n" " else\n"
" c = cN.a;\n" " c = cN.a;\n"
" }\n" " }\n"
"#endif\n"
" }\n" " }\n"
"\n" "\n"
"\n" "\n"