mirror of https://github.com/PCSX2/pcsx2.git
gsdx-tc: use a single shader pass to convert texture in 8 bits format
It might save a couple of fps Add a define to test the perf if we keep only the blue channel. It brokes the code in Prince Of Persia that use the Red/Green channel... Maybe the speed hack :( Or find a way to replace all if with a lookup table Note: it is only supported on OpenGL currently
This commit is contained in:
parent
2ecca529d1
commit
6121677aa1
|
@ -28,6 +28,14 @@
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
|
class ConvertConstantBuffer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GSVector4i ScalingFactor;
|
||||||
|
|
||||||
|
ConvertConstantBuffer() {memset(this, 0, sizeof(*this));}
|
||||||
|
};
|
||||||
|
|
||||||
class MergeConstantBuffer
|
class MergeConstantBuffer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -39,6 +39,7 @@ static const uint32 g_merge_cb_index = 10;
|
||||||
static const uint32 g_interlace_cb_index = 11;
|
static const uint32 g_interlace_cb_index = 11;
|
||||||
static const uint32 g_shadeboost_cb_index = 12;
|
static const uint32 g_shadeboost_cb_index = 12;
|
||||||
static const uint32 g_fx_cb_index = 14;
|
static const uint32 g_fx_cb_index = 14;
|
||||||
|
static const uint32 g_convert_index = 15;
|
||||||
|
|
||||||
bool GSDeviceOGL::m_debug_gl_call = false;
|
bool GSDeviceOGL::m_debug_gl_call = false;
|
||||||
int GSDeviceOGL::s_n = 0;
|
int GSDeviceOGL::s_n = 0;
|
||||||
|
@ -103,6 +104,7 @@ GSDeviceOGL::~GSDeviceOGL()
|
||||||
delete m_convert.dss;
|
delete m_convert.dss;
|
||||||
delete m_convert.dss_write;
|
delete m_convert.dss_write;
|
||||||
delete m_convert.bs;
|
delete m_convert.bs;
|
||||||
|
delete m_convert.cb;
|
||||||
|
|
||||||
// Clean m_fxaa
|
// Clean m_fxaa
|
||||||
delete m_fxaa.cb;
|
delete m_fxaa.cb;
|
||||||
|
@ -242,6 +244,12 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
|
||||||
// ****************************************************************
|
// ****************************************************************
|
||||||
// convert
|
// convert
|
||||||
// ****************************************************************
|
// ****************************************************************
|
||||||
|
m_convert.cb = new GSUniformBufferOGL(g_convert_index, sizeof(ConvertConstantBuffer));
|
||||||
|
// Upload once and forget about it
|
||||||
|
ConvertConstantBuffer cb;
|
||||||
|
cb.ScalingFactor = GSVector4i(theApp.GetConfig("nativeres", 0) ? 1 : theApp.GetConfig("upscale_multiplier", 2));
|
||||||
|
m_convert.cb->upload(&cb);
|
||||||
|
|
||||||
m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl);
|
m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl);
|
||||||
for(size_t i = 0; i < countof(m_convert.ps); i++)
|
for(size_t i = 0; i < countof(m_convert.ps); i++)
|
||||||
m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl);
|
m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl);
|
||||||
|
|
|
@ -504,6 +504,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
GSDepthStencilOGL* dss;
|
GSDepthStencilOGL* dss;
|
||||||
GSDepthStencilOGL* dss_write;
|
GSDepthStencilOGL* dss_write;
|
||||||
GSBlendStateOGL* bs;
|
GSBlendStateOGL* bs;
|
||||||
|
GSUniformBufferOGL* cb;
|
||||||
} m_convert;
|
} m_convert;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -820,8 +820,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
||||||
// Shader 11 convert depth to color
|
// Shader 11 convert depth to color
|
||||||
// Shader 14 convert 32 bits color to 8 bits color
|
// Shader 14 convert 32 bits color to 8 bits color
|
||||||
int shader = dst->m_type != RenderTarget ? 11 : 0;
|
int shader = dst->m_type != RenderTarget ? 11 : 0;
|
||||||
|
bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL();
|
||||||
|
|
||||||
if (TEX0.PSM == PSM_PSMT8) {
|
if (is_8bits) {
|
||||||
GL_INS("Reading RT as a packed-indexed 8 bits format");
|
GL_INS("Reading RT as a packed-indexed 8 bits format");
|
||||||
shader = 14; // ask a conversion to 8 bits format
|
shader = 14; // ask a conversion to 8 bits format
|
||||||
}
|
}
|
||||||
|
@ -847,29 +848,15 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Unscale 8 bits textures, quality won't be nice but format is really awful
|
|
||||||
// Code won't be compatible with MSAA but it is a DX issue
|
|
||||||
if (TEX0.PSM == PSM_PSMT8) {
|
|
||||||
GSVector2 old_scale = dst->m_texture->GetScale();
|
|
||||||
|
|
||||||
if (old_scale != GSVector2(1.0f, 1.0f)) {
|
|
||||||
GSVector2i size = dst->m_texture->GetSize();
|
|
||||||
tmp = dst->m_texture;
|
|
||||||
|
|
||||||
dst->m_texture = m_renderer->m_dev->CreateRenderTarget(size.x, size.y, false);
|
|
||||||
|
|
||||||
GSVector4 sRect(0.0, 0.0, old_scale.x, old_scale.y);
|
|
||||||
GSVector4 dRect(0.0, 0.0, size.x, size.y);
|
|
||||||
m_renderer->m_dev->StretchRect(tmp, sRect, dst->m_texture, dRect, 0, false);
|
|
||||||
|
|
||||||
dst->m_texture->SetScale(GSVector2(1.0f, 1.0f));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
|
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
|
||||||
|
|
||||||
int w = (int)(dst->m_texture->GetScale().x * tw);
|
int w = (int)(dst->m_texture->GetScale().x * tw);
|
||||||
int h = (int)(dst->m_texture->GetScale().y * th);
|
int h = (int)(dst->m_texture->GetScale().y * th);
|
||||||
|
if (is_8bits) {
|
||||||
|
// Unscale 8 bits textures, quality won't be nice but format is really awful
|
||||||
|
w = tw;
|
||||||
|
h = th;
|
||||||
|
}
|
||||||
|
|
||||||
GSVector2i dstsize = dst->m_texture->GetSize();
|
GSVector2i dstsize = dst->m_texture->GetSize();
|
||||||
|
|
||||||
|
@ -955,18 +942,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
||||||
// FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0)
|
// FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0)
|
||||||
// At 2x it will become 0.5/128 * 256 = 1 (pixel 1)
|
// At 2x it will become 0.5/128 * 256 = 1 (pixel 1)
|
||||||
|
|
||||||
if(w > dstsize.x)
|
if (!is_8bits) {
|
||||||
{
|
// 8 bits handling is special due to unscaling. It is better to not execute this code
|
||||||
scale.x = (float)dstsize.x / tw;
|
if (w > dstsize.x)
|
||||||
dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
|
{
|
||||||
w = dstsize.x;
|
scale.x = (float)dstsize.x / tw;
|
||||||
}
|
dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
|
||||||
|
w = dstsize.x;
|
||||||
|
}
|
||||||
|
|
||||||
if(h > dstsize.y)
|
if (h > dstsize.y)
|
||||||
{
|
{
|
||||||
scale.y = (float)dstsize.y / th;
|
scale.y = (float)dstsize.y / th;
|
||||||
dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
|
dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
|
||||||
h = dstsize.y;
|
h = dstsize.y;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVector4 sRect(0, 0, w, h);
|
GSVector4 sRect(0, 0, w, h);
|
||||||
|
|
|
@ -70,6 +70,11 @@ layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
|
||||||
layout(binding = 0) uniform sampler2D TextureSampler;
|
layout(binding = 0) uniform sampler2D TextureSampler;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
layout(std140, binding = 15) uniform cb15
|
||||||
|
{
|
||||||
|
ivec4 ScalingFactor;
|
||||||
|
};
|
||||||
|
|
||||||
vec4 sample_c()
|
vec4 sample_c()
|
||||||
{
|
{
|
||||||
return texture(TextureSampler, PSin_t );
|
return texture(TextureSampler, PSin_t );
|
||||||
|
@ -199,6 +204,15 @@ void ps_main13()
|
||||||
#ifdef ps_main14
|
#ifdef ps_main14
|
||||||
void ps_main14()
|
void ps_main14()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
// Potential speed optimization. There is a high probability that
|
||||||
|
// game only want to extract a single channel (blue). It will allow
|
||||||
|
// to remove most of the conditional operation and yield a +2/3 fps
|
||||||
|
// boost on MGS3
|
||||||
|
//
|
||||||
|
// Hypothesis wrong in Prince of Persia ... Seriously WTF !
|
||||||
|
//#define ONLY_BLUE;
|
||||||
|
|
||||||
// Convert a RGBA texture into a 8 bits packed texture
|
// Convert a RGBA texture into a 8 bits packed texture
|
||||||
// Input column: 8x2 RGBA pixels
|
// Input column: 8x2 RGBA pixels
|
||||||
// 0: 8 RGBA
|
// 0: 8 RGBA
|
||||||
|
@ -208,7 +222,6 @@ void ps_main14()
|
||||||
// 1: 8 R | 8 B
|
// 1: 8 R | 8 B
|
||||||
// 2: 8 G | 8 A
|
// 2: 8 G | 8 A
|
||||||
// 3: 8 G | 8 A
|
// 3: 8 G | 8 A
|
||||||
|
|
||||||
float c;
|
float c;
|
||||||
|
|
||||||
uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);
|
uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);
|
||||||
|
@ -218,15 +231,20 @@ void ps_main14()
|
||||||
int txN = tb.x | (int(gl_FragCoord.x) & 7);
|
int txN = tb.x | (int(gl_FragCoord.x) & 7);
|
||||||
int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);
|
int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);
|
||||||
|
|
||||||
|
txN *= ScalingFactor.x;
|
||||||
|
txH *= ScalingFactor.x;
|
||||||
|
ty *= ScalingFactor.y;
|
||||||
|
|
||||||
|
// TODO investigate texture gather
|
||||||
vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);
|
vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);
|
||||||
vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);
|
vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);
|
||||||
|
|
||||||
// Potential speed optimization. There is a high probability that
|
|
||||||
// game only want to extract a single channel (blue). It will allow
|
|
||||||
// to remove the sel.x condition check
|
|
||||||
|
|
||||||
if ((sel.y & 4u) == 0u) {
|
if ((sel.y & 4u) == 0u) {
|
||||||
// Column 0 and 2
|
// Column 0 and 2
|
||||||
|
#ifdef ONLY_BLUE
|
||||||
|
c = cN.b;
|
||||||
|
#else
|
||||||
if ((sel.y & 3u) < 2u) {
|
if ((sel.y & 3u) < 2u) {
|
||||||
// first 2 lines of the col
|
// first 2 lines of the col
|
||||||
if (sel.x < 8u)
|
if (sel.x < 8u)
|
||||||
|
@ -239,7 +257,11 @@ void ps_main14()
|
||||||
else
|
else
|
||||||
c = cH.a;
|
c = cH.a;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
|
#ifdef ONLY_BLUE
|
||||||
|
c = cH.b;
|
||||||
|
#else
|
||||||
// Column 1 and 3
|
// Column 1 and 3
|
||||||
if ((sel.y & 3u) < 2u) {
|
if ((sel.y & 3u) < 2u) {
|
||||||
// first 2 lines of the col
|
// first 2 lines of the col
|
||||||
|
@ -253,6 +275,7 @@ void ps_main14()
|
||||||
else
|
else
|
||||||
c = cN.a;
|
c = cN.a;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -95,6 +95,11 @@ static const char* convert_glsl =
|
||||||
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
|
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
"layout(std140, binding = 15) uniform cb15\n"
|
||||||
|
"{\n"
|
||||||
|
" ivec4 ScalingFactor;\n"
|
||||||
|
"};\n"
|
||||||
|
"\n"
|
||||||
"vec4 sample_c()\n"
|
"vec4 sample_c()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" return texture(TextureSampler, PSin_t );\n"
|
" return texture(TextureSampler, PSin_t );\n"
|
||||||
|
@ -224,6 +229,15 @@ static const char* convert_glsl =
|
||||||
"#ifdef ps_main14\n"
|
"#ifdef ps_main14\n"
|
||||||
"void ps_main14()\n"
|
"void ps_main14()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
|
"\n"
|
||||||
|
" // Potential speed optimization. There is a high probability that\n"
|
||||||
|
" // game only want to extract a single channel (blue). It will allow\n"
|
||||||
|
" // to remove most of the conditional operation and yield a +2/3 fps\n"
|
||||||
|
" // boost on MGS3\n"
|
||||||
|
" //\n"
|
||||||
|
" // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n"
|
||||||
|
"//#define ONLY_BLUE;\n"
|
||||||
|
"\n"
|
||||||
" // Convert a RGBA texture into a 8 bits packed texture\n"
|
" // Convert a RGBA texture into a 8 bits packed texture\n"
|
||||||
" // Input column: 8x2 RGBA pixels\n"
|
" // Input column: 8x2 RGBA pixels\n"
|
||||||
" // 0: 8 RGBA\n"
|
" // 0: 8 RGBA\n"
|
||||||
|
@ -233,7 +247,6 @@ static const char* convert_glsl =
|
||||||
" // 1: 8 R | 8 B\n"
|
" // 1: 8 R | 8 B\n"
|
||||||
" // 2: 8 G | 8 A\n"
|
" // 2: 8 G | 8 A\n"
|
||||||
" // 3: 8 G | 8 A\n"
|
" // 3: 8 G | 8 A\n"
|
||||||
"\n"
|
|
||||||
" float c;\n"
|
" float c;\n"
|
||||||
"\n"
|
"\n"
|
||||||
" uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n"
|
" uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n"
|
||||||
|
@ -243,15 +256,20 @@ static const char* convert_glsl =
|
||||||
" int txN = tb.x | (int(gl_FragCoord.x) & 7);\n"
|
" int txN = tb.x | (int(gl_FragCoord.x) & 7);\n"
|
||||||
" int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n"
|
" int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
" txN *= ScalingFactor.x;\n"
|
||||||
|
" txH *= ScalingFactor.x;\n"
|
||||||
|
" ty *= ScalingFactor.y;\n"
|
||||||
|
"\n"
|
||||||
|
" // TODO investigate texture gather\n"
|
||||||
" vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n"
|
" vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n"
|
||||||
" vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n"
|
" vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n"
|
||||||
"\n"
|
"\n"
|
||||||
" // Potential speed optimization. There is a high probability that\n"
|
|
||||||
" // game only want to extract a single channel (blue). It will allow\n"
|
|
||||||
" // to remove the sel.x condition check\n"
|
|
||||||
"\n"
|
"\n"
|
||||||
" if ((sel.y & 4u) == 0u) {\n"
|
" if ((sel.y & 4u) == 0u) {\n"
|
||||||
" // Column 0 and 2\n"
|
" // Column 0 and 2\n"
|
||||||
|
"#ifdef ONLY_BLUE\n"
|
||||||
|
" c = cN.b;\n"
|
||||||
|
"#else\n"
|
||||||
" if ((sel.y & 3u) < 2u) {\n"
|
" if ((sel.y & 3u) < 2u) {\n"
|
||||||
" // first 2 lines of the col\n"
|
" // first 2 lines of the col\n"
|
||||||
" if (sel.x < 8u)\n"
|
" if (sel.x < 8u)\n"
|
||||||
|
@ -264,7 +282,11 @@ static const char* convert_glsl =
|
||||||
" else\n"
|
" else\n"
|
||||||
" c = cH.a;\n"
|
" c = cH.a;\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
|
"#endif\n"
|
||||||
" } else {\n"
|
" } else {\n"
|
||||||
|
"#ifdef ONLY_BLUE\n"
|
||||||
|
" c = cH.b;\n"
|
||||||
|
"#else\n"
|
||||||
" // Column 1 and 3\n"
|
" // Column 1 and 3\n"
|
||||||
" if ((sel.y & 3u) < 2u) {\n"
|
" if ((sel.y & 3u) < 2u) {\n"
|
||||||
" // first 2 lines of the col\n"
|
" // first 2 lines of the col\n"
|
||||||
|
@ -278,6 +300,7 @@ static const char* convert_glsl =
|
||||||
" else\n"
|
" else\n"
|
||||||
" c = cN.a;\n"
|
" c = cN.a;\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
|
"#endif\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
|
Loading…
Reference in New Issue