glsl: correct the alternate implementation of ps_main1

Still not yet enabled by default Potentially it can be optimized with the dot product but special care need to be taken to ensure float accuracy. Bonus: it could work on old GPU (aka DX9)
2015-05-24 13:10:12 +02:00 · 2015-05-24 13:10:12 +02:00 · a70c3bf5de
parent c82fd94c6c
commit a70c3bf5de
2 changed files with 28 additions and 6 deletions
--- a/plugins/GSdx/res/glsl/convert.glsl
+++ b/plugins/GSdx/res/glsl/convert.glsl
@ -114,13 +114,23 @@ void ps_main1()
 	// A1-BGR5

 #if 0
+	// Note: dot is a good idea from pseudo. However we must be careful about float accuraccy.
+	// Here a global idea example:
+	//
+	// SV_Target1 = dot(round(sample_c() * vec4(31.f, 31.f, 31.f, 1.f)), vec4(1.f, 32.f, 1024.f, 32768.f));
+	//
+
 	// For me this code is more accurate but it will require some tests

-    vec4 c = sample_c() * 255.0f + 0.5f; // Denormalize value
+    vec4 c = sample_c() * 255.0f + 0.5f; // Denormalize value to avoid float precision issue

-    highp uvec4 i = uvec4(c * vec4(1/32.0f, 4.0f, 64.0f, 512.0f)); // Shift value
+	// shift Red: -3
+	// shift Green: -3 + 5
+	// shift Blue: -3 + 10
+	// shift Alpha: -7 + 15
+    highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value

-    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));
+    SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));

 #else
 	// Old code which is likely wrong.
@ -134,6 +144,7 @@ void ps_main1()
    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));
 #endif

+
 }
 #endif

--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@ -139,13 +139,23 @@ static const char* convert_glsl =
 	"	// A1-BGR5\n"
 	"\n"
 	"#if 0\n"
+	"	// Note: dot is a good idea from pseudo. However we must be careful about float accuraccy.\n"
+	"	// Here a global idea example:\n"
+	"	//\n"
+	"	// SV_Target1 = dot(round(sample_c() * vec4(31.f, 31.f, 31.f, 1.f)), vec4(1.f, 32.f, 1024.f, 32768.f));\n"
+	"	//\n"
+	"\n"
 	"	// For me this code is more accurate but it will require some tests\n"
 	"\n"
-	"    vec4 c = sample_c() * 255.0f + 0.5f; // Denormalize value\n"
+	"    vec4 c = sample_c() * 255.0f + 0.5f; // Denormalize value to avoid float precision issue\n"
 	"\n"
-	"    highp uvec4 i = uvec4(c * vec4(1/32.0f, 4.0f, 64.0f, 512.0f)); // Shift value\n"
+	"	// shift Red: -3\n"
+	"	// shift Green: -3 + 5\n"
+	"	// shift Blue: -3 + 10\n"
+	"	// shift Alpha: -7 + 15\n"
+	"    highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value\n"
 	"\n"
-	"    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
+	"    SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));\n"
 	"\n"
 	"#else\n"
 	"	// Old code which is likely wrong.\n"
@ -159,6 +169,7 @@ static const char* convert_glsl =
 	"    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
 	"#endif\n"
 	"\n"
+	"\n"
 	"}\n"
 	"#endif\n"
 	"\n"