glsl: round texture and fragment color as the SW renderer

GS uses integer value and does integer operation too.

This commit trunc the sampled texture, the interpoled fragment color
and the product of the 2.

It impacts negatively the perf of about 3/4% (GPU) but it fixes rendering on
suikoden and potentially some others games too.
This commit is contained in:
Gregory Hainaut 2015-07-17 20:57:32 +02:00
parent 5f247a6e16
commit 36e83b42ce
3 changed files with 29 additions and 35 deletions

View File

@ -226,7 +226,7 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
{ {
{2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(0) } , {2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(0) } ,
{2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(16) } , {2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(16) } ,
{4 , GL_UNSIGNED_BYTE , GL_TRUE , sizeof(GSVertex) , (const GLvoid*)(8) } , {4 , GL_UNSIGNED_BYTE , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(8) } ,
{1 , GL_FLOAT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(12) } , {1 , GL_FLOAT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(12) } ,
{2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(16) } , {2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(16) } ,
{1 , GL_UNSIGNED_INT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(20) } , {1 , GL_UNSIGNED_INT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(20) } ,

View File

@ -13,6 +13,11 @@
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful // TEX_COORD_DEBUG output the uv coordinate as color. It is useful
// to detect bad sampling due to upscaling // to detect bad sampling due to upscaling
//#define TEX_COORD_DEBUG //#define TEX_COORD_DEBUG
// Just copy directly the texture coordinate
#ifdef TEX_COORD_DEBUG
#define PS_TFX 1
#define PS_TCC 1
#endif
// Not sure we have same issue on opengl. Doesn't work anyway on ATI card // Not sure we have same issue on opengl. Doesn't work anyway on ATI card
// And I say this as an ATI user. // And I say this as an ATI user.
@ -274,16 +279,13 @@ vec4 sample_color(vec2 st, float q)
t = c[0]; t = c[0];
#endif #endif
return t; return trunc(t * 255.0f);
} }
vec4 tfx(vec4 t, vec4 c) vec4 tfx(vec4 t, vec4 c)
{ {
vec4 c_out; vec4 c_out;
// Note: It will be possible to precompute the factor 255/128 in the VS/GS vec4 FxT = trunc(trunc(c) * t / 128.0f);
// However, I didn't see real speedup and it might make the code more difficult
// to support proper rounding
vec4 FxT = c * t * 255.0f / 128.0f;
#if (PS_TFX == 0) #if (PS_TFX == 0)
c_out = FxT; c_out = FxT;
@ -303,7 +305,10 @@ vec4 tfx(vec4 t, vec4 c)
c_out.a = c.a; c_out.a = c.a;
#endif #endif
return c_out; // Normalize the value
c_out /= 255.0f;
return clamp(c_out, vec4(0.0f), vec4(1.0f));
} }
void atst(vec4 c) void atst(vec4 c)
@ -340,7 +345,7 @@ void colclip(inout vec4 c)
#if (PS_COLCLIP == 2) #if (PS_COLCLIP == 2)
c.rgb = 256.0f/255.0f - c.rgb; c.rgb = 256.0f/255.0f - c.rgb;
#endif #endif
#if (PS_COLCLIP > 0) #if (PS_COLCLIP == 1 || PS_COLCLIP == 2)
bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f)); bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));
c.rgb *= vec3(factor); c.rgb *= vec3(factor);
#endif #endif
@ -357,25 +362,17 @@ vec4 ps_color()
{ {
vec4 t = sample_color(PSin_t.xy, PSin_t.w); vec4 t = sample_color(PSin_t.xy, PSin_t.w);
vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f);
vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f);
#ifdef TEX_COORD_DEBUG
vec4 c = clamp(t, zero, one);
#else
#if PS_IIP == 1 #if PS_IIP == 1
vec4 c = clamp(tfx(t, PSin_c), zero, one); vec4 c = tfx(t, PSin_c);
#else #else
vec4 c = clamp(tfx(t, PSin_fc), zero, one); vec4 c = tfx(t, PSin_fc);
#endif
#endif #endif
atst(c); atst(c);
fog(c, PSin_t.z); fog(c, PSin_t.z);
#if (PS_COLCLIP < 3)
colclip(c); colclip(c);
#endif
#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes #if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
c.rgb = vec3(1.0f, 1.0f, 1.0f); c.rgb = vec3(1.0f, 1.0f, 1.0f);

View File

@ -874,6 +874,11 @@ static const char* tfx_fs_all_glsl =
"// TEX_COORD_DEBUG output the uv coordinate as color. It is useful\n" "// TEX_COORD_DEBUG output the uv coordinate as color. It is useful\n"
"// to detect bad sampling due to upscaling\n" "// to detect bad sampling due to upscaling\n"
"//#define TEX_COORD_DEBUG\n" "//#define TEX_COORD_DEBUG\n"
"// Just copy directly the texture coordinate\n"
"#ifdef TEX_COORD_DEBUG\n"
"#define PS_TFX 1\n"
"#define PS_TCC 1\n"
"#endif\n"
"\n" "\n"
"// Not sure we have same issue on opengl. Doesn't work anyway on ATI card\n" "// Not sure we have same issue on opengl. Doesn't work anyway on ATI card\n"
"// And I say this as an ATI user.\n" "// And I say this as an ATI user.\n"
@ -1135,16 +1140,13 @@ static const char* tfx_fs_all_glsl =
" t = c[0];\n" " t = c[0];\n"
"#endif\n" "#endif\n"
"\n" "\n"
" return t;\n" " return trunc(t * 255.0f);\n"
"}\n" "}\n"
"\n" "\n"
"vec4 tfx(vec4 t, vec4 c)\n" "vec4 tfx(vec4 t, vec4 c)\n"
"{\n" "{\n"
" vec4 c_out;\n" " vec4 c_out;\n"
" // Note: It will be possible to precompute the factor 255/128 in the VS/GS\n" " vec4 FxT = trunc(trunc(c) * t / 128.0f);\n"
" // However, I didn't see real speedup and it might make the code more difficult\n"
" // to support proper rounding\n"
" vec4 FxT = c * t * 255.0f / 128.0f;\n"
"\n" "\n"
"#if (PS_TFX == 0)\n" "#if (PS_TFX == 0)\n"
" c_out = FxT;\n" " c_out = FxT;\n"
@ -1164,7 +1166,10 @@ static const char* tfx_fs_all_glsl =
" c_out.a = c.a;\n" " c_out.a = c.a;\n"
"#endif\n" "#endif\n"
"\n" "\n"
" return c_out;\n" " // Normalize the value\n"
" c_out /= 255.0f;\n"
"\n"
" return clamp(c_out, vec4(0.0f), vec4(1.0f));\n"
"}\n" "}\n"
"\n" "\n"
"void atst(vec4 c)\n" "void atst(vec4 c)\n"
@ -1201,7 +1206,7 @@ static const char* tfx_fs_all_glsl =
"#if (PS_COLCLIP == 2)\n" "#if (PS_COLCLIP == 2)\n"
" c.rgb = 256.0f/255.0f - c.rgb;\n" " c.rgb = 256.0f/255.0f - c.rgb;\n"
"#endif\n" "#endif\n"
"#if (PS_COLCLIP > 0)\n" "#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)\n"
" bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));\n" " bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));\n"
" c.rgb *= vec3(factor);\n" " c.rgb *= vec3(factor);\n"
"#endif\n" "#endif\n"
@ -1218,25 +1223,17 @@ static const char* tfx_fs_all_glsl =
"{\n" "{\n"
" vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n" " vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n"
"\n" "\n"
" vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f);\n"
" vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f);\n"
"#ifdef TEX_COORD_DEBUG\n"
" vec4 c = clamp(t, zero, one);\n"
"#else\n"
"#if PS_IIP == 1\n" "#if PS_IIP == 1\n"
" vec4 c = clamp(tfx(t, PSin_c), zero, one);\n" " vec4 c = tfx(t, PSin_c);\n"
"#else\n" "#else\n"
" vec4 c = clamp(tfx(t, PSin_fc), zero, one);\n" " vec4 c = tfx(t, PSin_fc);\n"
"#endif\n"
"#endif\n" "#endif\n"
"\n" "\n"
" atst(c);\n" " atst(c);\n"
"\n" "\n"
" fog(c, PSin_t.z);\n" " fog(c, PSin_t.z);\n"
"\n" "\n"
"#if (PS_COLCLIP < 3)\n"
" colclip(c);\n" " colclip(c);\n"
"#endif\n"
"\n" "\n"
"#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n" "#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
" c.rgb = vec3(1.0f, 1.0f, 1.0f);\n" " c.rgb = vec3(1.0f, 1.0f, 1.0f);\n"