mirror of https://github.com/PCSX2/pcsx2.git
gsdx-tc: use a single shader pass to convert texture in 8 bits format
It might save a couple of fps Add a define to test the perf if we keep only the blue channel. It brokes the code in Prince Of Persia that use the Red/Green channel... Maybe the speed hack :( Or find a way to replace all if with a lookup table Note: it is only supported on OpenGL currently
This commit is contained in:
parent
2ecca529d1
commit
6121677aa1
|
@ -28,6 +28,14 @@
|
|||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
class ConvertConstantBuffer
|
||||
{
|
||||
public:
|
||||
GSVector4i ScalingFactor;
|
||||
|
||||
ConvertConstantBuffer() {memset(this, 0, sizeof(*this));}
|
||||
};
|
||||
|
||||
class MergeConstantBuffer
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -39,6 +39,7 @@ static const uint32 g_merge_cb_index = 10;
|
|||
static const uint32 g_interlace_cb_index = 11;
|
||||
static const uint32 g_shadeboost_cb_index = 12;
|
||||
static const uint32 g_fx_cb_index = 14;
|
||||
static const uint32 g_convert_index = 15;
|
||||
|
||||
bool GSDeviceOGL::m_debug_gl_call = false;
|
||||
int GSDeviceOGL::s_n = 0;
|
||||
|
@ -103,6 +104,7 @@ GSDeviceOGL::~GSDeviceOGL()
|
|||
delete m_convert.dss;
|
||||
delete m_convert.dss_write;
|
||||
delete m_convert.bs;
|
||||
delete m_convert.cb;
|
||||
|
||||
// Clean m_fxaa
|
||||
delete m_fxaa.cb;
|
||||
|
@ -242,6 +244,12 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
|
|||
// ****************************************************************
|
||||
// convert
|
||||
// ****************************************************************
|
||||
m_convert.cb = new GSUniformBufferOGL(g_convert_index, sizeof(ConvertConstantBuffer));
|
||||
// Upload once and forget about it
|
||||
ConvertConstantBuffer cb;
|
||||
cb.ScalingFactor = GSVector4i(theApp.GetConfig("nativeres", 0) ? 1 : theApp.GetConfig("upscale_multiplier", 2));
|
||||
m_convert.cb->upload(&cb);
|
||||
|
||||
m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl);
|
||||
for(size_t i = 0; i < countof(m_convert.ps); i++)
|
||||
m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl);
|
||||
|
|
|
@ -504,6 +504,7 @@ class GSDeviceOGL : public GSDevice
|
|||
GSDepthStencilOGL* dss;
|
||||
GSDepthStencilOGL* dss_write;
|
||||
GSBlendStateOGL* bs;
|
||||
GSUniformBufferOGL* cb;
|
||||
} m_convert;
|
||||
|
||||
struct {
|
||||
|
|
|
@ -820,8 +820,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
// Shader 11 convert depth to color
|
||||
// Shader 14 convert 32 bits color to 8 bits color
|
||||
int shader = dst->m_type != RenderTarget ? 11 : 0;
|
||||
bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL();
|
||||
|
||||
if (TEX0.PSM == PSM_PSMT8) {
|
||||
if (is_8bits) {
|
||||
GL_INS("Reading RT as a packed-indexed 8 bits format");
|
||||
shader = 14; // ask a conversion to 8 bits format
|
||||
}
|
||||
|
@ -847,29 +848,15 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
}
|
||||
|
||||
|
||||
// Unscale 8 bits textures, quality won't be nice but format is really awful
|
||||
// Code won't be compatible with MSAA but it is a DX issue
|
||||
if (TEX0.PSM == PSM_PSMT8) {
|
||||
GSVector2 old_scale = dst->m_texture->GetScale();
|
||||
|
||||
if (old_scale != GSVector2(1.0f, 1.0f)) {
|
||||
GSVector2i size = dst->m_texture->GetSize();
|
||||
tmp = dst->m_texture;
|
||||
|
||||
dst->m_texture = m_renderer->m_dev->CreateRenderTarget(size.x, size.y, false);
|
||||
|
||||
GSVector4 sRect(0.0, 0.0, old_scale.x, old_scale.y);
|
||||
GSVector4 dRect(0.0, 0.0, size.x, size.y);
|
||||
m_renderer->m_dev->StretchRect(tmp, sRect, dst->m_texture, dRect, 0, false);
|
||||
|
||||
dst->m_texture->SetScale(GSVector2(1.0f, 1.0f));
|
||||
}
|
||||
}
|
||||
|
||||
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
|
||||
|
||||
int w = (int)(dst->m_texture->GetScale().x * tw);
|
||||
int h = (int)(dst->m_texture->GetScale().y * th);
|
||||
if (is_8bits) {
|
||||
// Unscale 8 bits textures, quality won't be nice but format is really awful
|
||||
w = tw;
|
||||
h = th;
|
||||
}
|
||||
|
||||
GSVector2i dstsize = dst->m_texture->GetSize();
|
||||
|
||||
|
@ -955,18 +942,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
// FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0)
|
||||
// At 2x it will become 0.5/128 * 256 = 1 (pixel 1)
|
||||
|
||||
if(w > dstsize.x)
|
||||
{
|
||||
scale.x = (float)dstsize.x / tw;
|
||||
dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
|
||||
w = dstsize.x;
|
||||
}
|
||||
if (!is_8bits) {
|
||||
// 8 bits handling is special due to unscaling. It is better to not execute this code
|
||||
if (w > dstsize.x)
|
||||
{
|
||||
scale.x = (float)dstsize.x / tw;
|
||||
dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
|
||||
w = dstsize.x;
|
||||
}
|
||||
|
||||
if(h > dstsize.y)
|
||||
{
|
||||
scale.y = (float)dstsize.y / th;
|
||||
dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
|
||||
h = dstsize.y;
|
||||
if (h > dstsize.y)
|
||||
{
|
||||
scale.y = (float)dstsize.y / th;
|
||||
dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
|
||||
h = dstsize.y;
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4 sRect(0, 0, w, h);
|
||||
|
|
|
@ -70,6 +70,11 @@ layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
|
|||
layout(binding = 0) uniform sampler2D TextureSampler;
|
||||
#endif
|
||||
|
||||
layout(std140, binding = 15) uniform cb15
|
||||
{
|
||||
ivec4 ScalingFactor;
|
||||
};
|
||||
|
||||
vec4 sample_c()
|
||||
{
|
||||
return texture(TextureSampler, PSin_t );
|
||||
|
@ -199,6 +204,15 @@ void ps_main13()
|
|||
#ifdef ps_main14
|
||||
void ps_main14()
|
||||
{
|
||||
|
||||
// Potential speed optimization. There is a high probability that
|
||||
// game only want to extract a single channel (blue). It will allow
|
||||
// to remove most of the conditional operation and yield a +2/3 fps
|
||||
// boost on MGS3
|
||||
//
|
||||
// Hypothesis wrong in Prince of Persia ... Seriously WTF !
|
||||
//#define ONLY_BLUE;
|
||||
|
||||
// Convert a RGBA texture into a 8 bits packed texture
|
||||
// Input column: 8x2 RGBA pixels
|
||||
// 0: 8 RGBA
|
||||
|
@ -208,7 +222,6 @@ void ps_main14()
|
|||
// 1: 8 R | 8 B
|
||||
// 2: 8 G | 8 A
|
||||
// 3: 8 G | 8 A
|
||||
|
||||
float c;
|
||||
|
||||
uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);
|
||||
|
@ -218,15 +231,20 @@ void ps_main14()
|
|||
int txN = tb.x | (int(gl_FragCoord.x) & 7);
|
||||
int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);
|
||||
|
||||
txN *= ScalingFactor.x;
|
||||
txH *= ScalingFactor.x;
|
||||
ty *= ScalingFactor.y;
|
||||
|
||||
// TODO investigate texture gather
|
||||
vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);
|
||||
vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);
|
||||
|
||||
// Potential speed optimization. There is a high probability that
|
||||
// game only want to extract a single channel (blue). It will allow
|
||||
// to remove the sel.x condition check
|
||||
|
||||
if ((sel.y & 4u) == 0u) {
|
||||
// Column 0 and 2
|
||||
#ifdef ONLY_BLUE
|
||||
c = cN.b;
|
||||
#else
|
||||
if ((sel.y & 3u) < 2u) {
|
||||
// first 2 lines of the col
|
||||
if (sel.x < 8u)
|
||||
|
@ -239,7 +257,11 @@ void ps_main14()
|
|||
else
|
||||
c = cH.a;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
#ifdef ONLY_BLUE
|
||||
c = cH.b;
|
||||
#else
|
||||
// Column 1 and 3
|
||||
if ((sel.y & 3u) < 2u) {
|
||||
// first 2 lines of the col
|
||||
|
@ -253,6 +275,7 @@ void ps_main14()
|
|||
else
|
||||
c = cN.a;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -95,6 +95,11 @@ static const char* convert_glsl =
|
|||
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"layout(std140, binding = 15) uniform cb15\n"
|
||||
"{\n"
|
||||
" ivec4 ScalingFactor;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"vec4 sample_c()\n"
|
||||
"{\n"
|
||||
" return texture(TextureSampler, PSin_t );\n"
|
||||
|
@ -224,6 +229,15 @@ static const char* convert_glsl =
|
|||
"#ifdef ps_main14\n"
|
||||
"void ps_main14()\n"
|
||||
"{\n"
|
||||
"\n"
|
||||
" // Potential speed optimization. There is a high probability that\n"
|
||||
" // game only want to extract a single channel (blue). It will allow\n"
|
||||
" // to remove most of the conditional operation and yield a +2/3 fps\n"
|
||||
" // boost on MGS3\n"
|
||||
" //\n"
|
||||
" // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n"
|
||||
"//#define ONLY_BLUE;\n"
|
||||
"\n"
|
||||
" // Convert a RGBA texture into a 8 bits packed texture\n"
|
||||
" // Input column: 8x2 RGBA pixels\n"
|
||||
" // 0: 8 RGBA\n"
|
||||
|
@ -233,7 +247,6 @@ static const char* convert_glsl =
|
|||
" // 1: 8 R | 8 B\n"
|
||||
" // 2: 8 G | 8 A\n"
|
||||
" // 3: 8 G | 8 A\n"
|
||||
"\n"
|
||||
" float c;\n"
|
||||
"\n"
|
||||
" uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n"
|
||||
|
@ -243,15 +256,20 @@ static const char* convert_glsl =
|
|||
" int txN = tb.x | (int(gl_FragCoord.x) & 7);\n"
|
||||
" int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n"
|
||||
"\n"
|
||||
" txN *= ScalingFactor.x;\n"
|
||||
" txH *= ScalingFactor.x;\n"
|
||||
" ty *= ScalingFactor.y;\n"
|
||||
"\n"
|
||||
" // TODO investigate texture gather\n"
|
||||
" vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n"
|
||||
" vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n"
|
||||
"\n"
|
||||
" // Potential speed optimization. There is a high probability that\n"
|
||||
" // game only want to extract a single channel (blue). It will allow\n"
|
||||
" // to remove the sel.x condition check\n"
|
||||
"\n"
|
||||
" if ((sel.y & 4u) == 0u) {\n"
|
||||
" // Column 0 and 2\n"
|
||||
"#ifdef ONLY_BLUE\n"
|
||||
" c = cN.b;\n"
|
||||
"#else\n"
|
||||
" if ((sel.y & 3u) < 2u) {\n"
|
||||
" // first 2 lines of the col\n"
|
||||
" if (sel.x < 8u)\n"
|
||||
|
@ -264,7 +282,11 @@ static const char* convert_glsl =
|
|||
" else\n"
|
||||
" c = cH.a;\n"
|
||||
" }\n"
|
||||
"#endif\n"
|
||||
" } else {\n"
|
||||
"#ifdef ONLY_BLUE\n"
|
||||
" c = cH.b;\n"
|
||||
"#else\n"
|
||||
" // Column 1 and 3\n"
|
||||
" if ((sel.y & 3u) < 2u) {\n"
|
||||
" // first 2 lines of the col\n"
|
||||
|
@ -278,6 +300,7 @@ static const char* convert_glsl =
|
|||
" else\n"
|
||||
" c = cN.a;\n"
|
||||
" }\n"
|
||||
"#endif\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"\n"
|
||||
|
|
Loading…
Reference in New Issue