glsl: round texture and fragment color as the SW renderer

GS uses integer value and does integer operation too. This commit trunc the sampled texture, the interpoled fragment color and the product of the 2. It impacts negatively the perf of about 3/4% (GPU) but it fixes rendering on suikoden and potentially some others games too.
2015-07-17 20:57:32 +02:00 · 2015-07-17 20:57:32 +02:00 · 36e83b42ce
parent 5f247a6e16
commit 36e83b42ce
3 changed files with 29 additions and 35 deletions
--- a/plugins/GSdx/GSDeviceOGL.cpp
+++ b/plugins/GSdx/GSDeviceOGL.cpp
@ -226,7 +226,7 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
 	{
 		{2 , GL_FLOAT          , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(0) }  ,
 		{2 , GL_FLOAT          , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(16) } ,
-		{4 , GL_UNSIGNED_BYTE  , GL_TRUE  , sizeof(GSVertex)    , (const GLvoid*)(8) }  ,
+		{4 , GL_UNSIGNED_BYTE  , GL_FALSE , sizeof(GSVertex)    , (const GLvoid*)(8) }  ,
 		{1 , GL_FLOAT          , GL_FALSE , sizeof(GSVertex)    , (const GLvoid*)(12) } ,
 		{2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex)    , (const GLvoid*)(16) } ,
 		{1 , GL_UNSIGNED_INT   , GL_FALSE , sizeof(GSVertex)    , (const GLvoid*)(20) } ,
--- a/plugins/GSdx/res/glsl/tfx_fs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_fs.glsl
@ -13,6 +13,11 @@
 // TEX_COORD_DEBUG output the uv coordinate as color. It is useful
 // to detect bad sampling due to upscaling
 //#define TEX_COORD_DEBUG
+// Just copy directly the texture coordinate
+#ifdef TEX_COORD_DEBUG
+#define PS_TFX 1
+#define PS_TCC 1
+#endif

 // Not sure we have same issue on opengl. Doesn't work anyway on ATI card
 // And I say this as an ATI user.
@ -274,16 +279,13 @@ vec4 sample_color(vec2 st, float q)
 	t = c[0];
 #endif

-	return t;
+	return trunc(t * 255.0f);
 }

 vec4 tfx(vec4 t, vec4 c)
 {
 	vec4 c_out;
-	// Note: It will be possible to precompute the factor 255/128 in the VS/GS
-	// However, I didn't see real speedup and it might make the code more difficult
-	// to support proper rounding
-	vec4 FxT = c * t * 255.0f / 128.0f;
+	vec4 FxT = trunc(trunc(c) * t / 128.0f);

 #if (PS_TFX == 0)
 	c_out = FxT;
@ -303,7 +305,10 @@ vec4 tfx(vec4 t, vec4 c)
    c_out.a = c.a;
 #endif

-	return c_out;
+	// Normalize the value
+	c_out /= 255.0f;
+
+	return clamp(c_out, vec4(0.0f), vec4(1.0f));
 }

 void atst(vec4 c)
@ -340,7 +345,7 @@ void colclip(inout vec4 c)
 #if (PS_COLCLIP == 2)
 	c.rgb = 256.0f/255.0f - c.rgb;
 #endif
-#if (PS_COLCLIP > 0)
+#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)
 	bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));
 	c.rgb *= vec3(factor);
 #endif
@ -357,25 +362,17 @@ vec4 ps_color()
 {
 	vec4 t = sample_color(PSin_t.xy, PSin_t.w);

-	vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f);
-	vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f);
-#ifdef TEX_COORD_DEBUG
-	vec4 c = clamp(t, zero, one);
-#else
 #if PS_IIP == 1
-	vec4 c = clamp(tfx(t, PSin_c), zero, one);
+	vec4 c = tfx(t, PSin_c);
 #else
-	vec4 c = clamp(tfx(t, PSin_fc), zero, one);
-#endif
+	vec4 c = tfx(t, PSin_fc);
 #endif

 	atst(c);

 	fog(c, PSin_t.z);

-#if (PS_COLCLIP < 3)
 	colclip(c);
-#endif

 #if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
 	c.rgb = vec3(1.0f, 1.0f, 1.0f);
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@ -874,6 +874,11 @@ static const char* tfx_fs_all_glsl =
 	"// TEX_COORD_DEBUG output the uv coordinate as color. It is useful\n"
 	"// to detect bad sampling due to upscaling\n"
 	"//#define TEX_COORD_DEBUG\n"
+	"// Just copy directly the texture coordinate\n"
+	"#ifdef TEX_COORD_DEBUG\n"
+	"#define PS_TFX 1\n"
+	"#define PS_TCC 1\n"
+	"#endif\n"
 	"\n"
 	"// Not sure we have same issue on opengl. Doesn't work anyway on ATI card\n"
 	"// And I say this as an ATI user.\n"
@ -1135,16 +1140,13 @@ static const char* tfx_fs_all_glsl =
 	"	t = c[0];\n"
 	"#endif\n"
 	"\n"
-	"	return t;\n"
+	"	return trunc(t * 255.0f);\n"
 	"}\n"
 	"\n"
 	"vec4 tfx(vec4 t, vec4 c)\n"
 	"{\n"
 	"	vec4 c_out;\n"
-	"	// Note: It will be possible to precompute the factor 255/128 in the VS/GS\n"
-	"	// However, I didn't see real speedup and it might make the code more difficult\n"
-	"	// to support proper rounding\n"
-	"	vec4 FxT = c * t * 255.0f / 128.0f;\n"
+	"	vec4 FxT = trunc(trunc(c) * t / 128.0f);\n"
 	"\n"
 	"#if (PS_TFX == 0)\n"
 	"	c_out = FxT;\n"
@ -1164,7 +1166,10 @@ static const char* tfx_fs_all_glsl =
 	"    c_out.a = c.a;\n"
 	"#endif\n"
 	"\n"
-	"	return c_out;\n"
+	"	// Normalize the value\n"
+	"	c_out /= 255.0f;\n"
+	"\n"
+	"	return clamp(c_out, vec4(0.0f), vec4(1.0f));\n"
 	"}\n"
 	"\n"
 	"void atst(vec4 c)\n"
@ -1201,7 +1206,7 @@ static const char* tfx_fs_all_glsl =
 	"#if (PS_COLCLIP == 2)\n"
 	"	c.rgb = 256.0f/255.0f - c.rgb;\n"
 	"#endif\n"
-	"#if (PS_COLCLIP > 0)\n"
+	"#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)\n"
 	"	bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));\n"
 	"	c.rgb *= vec3(factor);\n"
 	"#endif\n"
@ -1218,25 +1223,17 @@ static const char* tfx_fs_all_glsl =
 	"{\n"
 	"	vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n"
 	"\n"
-	"	vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f);\n"
-	"	vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f);\n"
-	"#ifdef TEX_COORD_DEBUG\n"
-	"	vec4 c = clamp(t, zero, one);\n"
-	"#else\n"
 	"#if PS_IIP == 1\n"
-	"	vec4 c = clamp(tfx(t, PSin_c), zero, one);\n"
+	"	vec4 c = tfx(t, PSin_c);\n"
 	"#else\n"
-	"	vec4 c = clamp(tfx(t, PSin_fc), zero, one);\n"
-	"#endif\n"
+	"	vec4 c = tfx(t, PSin_fc);\n"
 	"#endif\n"
 	"\n"
 	"	atst(c);\n"
 	"\n"
 	"	fog(c, PSin_t.z);\n"
 	"\n"
-	"#if (PS_COLCLIP < 3)\n"
 	"	colclip(c);\n"
-	"#endif\n"
 	"\n"
 	"#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
 	"	c.rgb = vec3(1.0f, 1.0f, 1.0f);\n"