From d4b1d9abe51a4dca59ca950f6070c9f52e7a1baf Mon Sep 17 00:00:00 2001
From: refractionpcsx2 <refraction@gmail.com>
Date: Fri, 11 Feb 2022 21:01:27 +0000
Subject: [PATCH] GS-hw: Increase 32->16bit conversion accuracy

Improves Dogs life (no longer goes completely black right away, but has decal problems)
Vastly improves texture quality in Spider-Man 3 when using Framebuffer Conversion to fix the textures
---
 bin/resources/shaders/dx11/convert.fx     |  9 ++---
 bin/resources/shaders/opengl/convert.glsl | 41 ++---------------------
 bin/resources/shaders/vulkan/convert.glsl |  9 ++---
 3 files changed, 9 insertions(+), 50 deletions(-)

diff --git a/bin/resources/shaders/dx11/convert.fx b/bin/resources/shaders/dx11/convert.fx
index 1f53695041..977fb0a532 100644
--- a/bin/resources/shaders/dx11/convert.fx
+++ b/bin/resources/shaders/dx11/convert.fx
@@ -114,15 +114,12 @@ float4 ps_scanlines(PS_INPUT input, int i)
 	return sample_c(input.t) * saturate(mask[i] + 0.5f);
 }
 
+// Need to be careful with precision here, it can break games like Spider-Man 3 and Dogs Life
 uint ps_convert_rgba8_16bits(PS_INPUT input) : SV_Target0
 {
-	float4 c = sample_c(input.t);
+	uint4 i = sample_c(input.t) * float4(255.5f, 255.5f, 255.5f, 255.5f);
 
-	c.a *= 256.0f / 127; // hm, 0.5 won't give us 1.0 if we just multiply with 2
-
-	uint4 i = c * float4(0x001f, 0x03e0, 0x7c00, 0x8000);
-
-	return (i.x & 0x001f) | (i.y & 0x03e0) | (i.z & 0x7c00) | (i.w & 0x8000);	
+	return ((i.x & 0x00F8u) >> 3) | ((i.y & 0x00F8u) << 2) | ((i.z & 0x00f8u) << 7) | ((i.w & 0x80u) << 8);
 }
 
 PS_OUTPUT ps_datm1(PS_INPUT input)
diff --git a/bin/resources/shaders/opengl/convert.glsl b/bin/resources/shaders/opengl/convert.glsl
index 5e5efba1a9..d17448e347 100644
--- a/bin/resources/shaders/opengl/convert.glsl
+++ b/bin/resources/shaders/opengl/convert.glsl
@@ -74,47 +74,12 @@ void ps_depth_copy()
 #endif
 
 #ifdef ps_convert_rgba8_16bits
+// Need to be careful with precision here, it can break games like Spider-Man 3 and Dogs Life
 void ps_convert_rgba8_16bits()
 {
-    // Input Color is RGBA8
-
-    // We want to output a pixel on the PSMCT16* format
-    // A1-BGR5
-
-#if 0
-    // Note: dot is a good idea from pseudo. However we must be careful about float accuraccy.
-    // Here a global idea example:
-    //
-    // SV_Target1 = dot(round(sample_c() * vec4(31.f, 31.f, 31.f, 1.f)), vec4(1.f, 32.f, 1024.f, 32768.f));
-    //
-
-    // For me this code is more accurate but it will require some tests
-
-    vec4 c = sample_c() * 255.0f + 0.5f; // Denormalize value to avoid float precision issue
-
-    // shift Red: -3
-    // shift Green: -3 + 5
-    // shift Blue: -3 + 10
-    // shift Alpha: -7 + 15
-    highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value
-
-    // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
-    SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));
-
-#else
-    // Old code which is likely wrong.
-
-    vec4 c = sample_c();
-
-    c.a *= 256.0f / 127.0f; // hm, 0.5 won't give us 1.0 if we just multiply with 2
-
-    highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));
-
-    // bit field operation requires GL4 HW.
-    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));
-#endif
-
+    highp uvec4 i = uvec4(sample_c() * vec4(255.5f, 255.5f, 255.5f, 255.5f));
 
+    SV_Target1 = ((i.x & 0x00F8u) >> 3) | ((i.y & 0x00F8u) << 2) | ((i.z & 0x00f8u) << 7) | ((i.w & 0x80u) << 8);
 }
 #endif
 
diff --git a/bin/resources/shaders/vulkan/convert.glsl b/bin/resources/shaders/vulkan/convert.glsl
index bc8cd99342..4c65e70824 100644
--- a/bin/resources/shaders/vulkan/convert.glsl
+++ b/bin/resources/shaders/vulkan/convert.glsl
@@ -87,15 +87,12 @@ void ps_filter_transparency()
 #endif
 
 #ifdef ps_convert_rgba8_16bits
+// Need to be careful with precision here, it can break games like Spider-Man 3 and Dogs Life
 void ps_convert_rgba8_16bits()
 {
-	vec4 c = sample_c(v_tex);
+	highp uvec4 i = uvec4(sample_c(v_tex) * vec4(255.5f, 255.5f, 255.5f, 255.5f));
 
-	c.a *= 256.0f / 127; // hm, 0.5 won't give us 1.0 if we just multiply with 2
-
-	uvec4 i = uvec4(c * vec4(0x001f, 0x03e0, 0x7c00, 0x8000));
-
-	o_col0 = (i.x & 0x001fu) | (i.y & 0x03e0u) | (i.z & 0x7c00u) | (i.w & 0x8000u);	
+	o_col0 = ((i.x & 0x00F8u) >> 3) | ((i.y & 0x00F8u) << 2) | ((i.z & 0x00f8u) << 7) | ((i.w & 0x80u) << 8);
 }
 #endif