GS/HW: Use integers for depth conversion shaders

Fixes z-fighting in reflections in DBZ BT3, maybe others?
2022-02-12 14:50:35 +10:00 · 2022-02-12 14:50:35 +10:00 · dfe4bc199f
parent 1630404621
commit dfe4bc199f
6 changed files with 48 additions and 116 deletions
--- a/bin/resources/shaders/dx11/convert.fx
+++ b/bin/resources/shaders/dx11/convert.fx
@ -218,12 +218,8 @@ PS_OUTPUT ps_convert_float32_rgba8(PS_INPUT input)
 	PS_OUTPUT output;

 	// Convert a FLOAT32 depth texture into a RGBA color texture
-	const float4 bitSh = float4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
-	const float4 bitMsk = float4(0.0, 1.0 / 256.0, 1.0 / 256.0, 1.0 / 256.0);
-
-	float4 res = frac(float4(sample_c(input.t).rrrr) * bitSh);
-
-	output.c = (res - res.xxyz * bitMsk) * 256.0f / 255.0f;
+	uint d = uint(sample_c(input.t).r * exp2(32.0f));
+	output.c = float4(uint4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24))) / 255.0f;

 	return output;
 }
@ -233,21 +229,16 @@ PS_OUTPUT ps_convert_float16_rgb5a1(PS_INPUT input)
 	PS_OUTPUT output;

 	// Convert a FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
-	const float4 bitSh = float4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
-	const uint4 bitMsk = uint4(0x1F, 0x1F, 0x1F, 0x1);
-	uint4 color = uint4(float4(sample_c(input.t).rrrr) * bitSh) & bitMsk;
-
-	output.c = float4(color) / float4(32.0f, 32.0f, 32.0f, 1.0f);
+	uint d = uint(sample_c(input.t).r * exp2(32.0f));
+	output.c = float4(uint4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) / float4(32.0f, 32.0f, 32.0f, 1.0f);

 	return output;
 }
 float ps_convert_rgba8_float32(PS_INPUT input) : SV_Depth
 {
 	// Convert a RRGBA texture into a float depth texture
-	// FIXME: I'm afraid of the accuracy
-	const float4 bitSh = float4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp2(-8.0f)) * (float4)255.0;
-
-	return dot(sample_c(input.t), bitSh);
+	uint4 c = uint4(sample_c(input.t) * 255.0f + 0.5f);
+	return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
 }

 float ps_convert_rgba8_float24(PS_INPUT input) : SV_Depth
@ -255,9 +246,8 @@ float ps_convert_rgba8_float24(PS_INPUT input) : SV_Depth
 	// Same as above but without the alpha channel (24 bits Z)

 	// Convert a RRGBA texture into a float depth texture
-	const float3 bitSh = float3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * (float3)255.0;
-
-	return dot(sample_c(input.t).rgb, bitSh);
+	uint3 c = uint3(sample_c(input.t).rgb * 255.0f + 0.5f);
+	return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
 }

 float ps_convert_rgba8_float16(PS_INPUT input) : SV_Depth
@ -265,21 +255,15 @@ float ps_convert_rgba8_float16(PS_INPUT input) : SV_Depth
 	// Same as above but without the A/B channels (16 bits Z)

 	// Convert a RRGBA texture into a float depth texture
-	// FIXME: I'm afraid of the accuracy
-	const float2 bitSh = float2(exp2(-32.0f), exp2(-24.0f)) * (float2)255.0;
-
-	return dot(sample_c(input.t).rg, bitSh);
+	uint2 c = uint2(sample_c(input.t).rg * 255.0f + 0.5f);
+	return float(c.r | (c.g << 8)) * exp2(-32.0f);
 }

 float ps_convert_rgb5a1_float16(PS_INPUT input) : SV_Depth
 {
 	// Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z
-	// FIXME: I'm afraid of the accuracy
-	const float4 bitSh = float4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp2(-17.0f));
-	// Trunc color to drop useless lsb
-	float4 color = trunc(sample_c(input.t) * (float4)255.0 / float4(8.0f, 8.0f, 8.0f, 128.0f));
-
-	return dot(float4(color), bitSh);
+	uint4 c = uint4(sample_c(input.t) * 255.0f + 0.5f);
+	return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
 }

 PS_OUTPUT ps_convert_rgba_8i(PS_INPUT input)
--- a/bin/resources/shaders/dx11/tfx.fx
+++ b/bin/resources/shaders/dx11/tfx.fx
@ -393,23 +393,16 @@ float4 sample_depth(float2 st, float2 pos)
 		// Based on ps_main11 of convert

 		// Convert a FLOAT32 depth texture into a RGBA color texture
-		const float4 bitSh = float4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
-		const float4 bitMsk = float4(0.0, 1.0f / 256.0f, 1.0f / 256.0f, 1.0f / 256.0f);
-
-		float4 res = frac((float4)fetch_c(uv).r * bitSh);
-
-		t = (res - res.xxyz * bitMsk) * 256.0f;
+		uint d = uint(fetch_c(uv).r * exp2(32.0f));
+		t = float4(uint4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));
 	}
 	else if (PS_DEPTH_FMT == 2)
 	{
 		// Based on ps_main12 of convert

 		// Convert a FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
-		const float4 bitSh = float4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
-		const uint4 bitMsk = uint4(0x1F, 0x1F, 0x1F, 0x1);
-		uint4 color = (uint4)((float4)fetch_c(uv).r * bitSh) & bitMsk;
-
-		t = (float4)color * float4(8.0f, 8.0f, 8.0f, 128.0f);
+		uint d = uint(fetch_c(uv).r * exp2(32.0f));
+		t = float4(uint4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u));
 	}
 	else if (PS_DEPTH_FMT == 3)
 	{
--- a/bin/resources/shaders/opengl/convert.glsl
+++ b/bin/resources/shaders/opengl/convert.glsl
@ -95,12 +95,8 @@ void ps_convert_float32_32bits()
 void ps_convert_float32_rgba8()
 {
    // Convert a GL_FLOAT32 depth texture into a RGBA color texture
-    const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
-    const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);
-
-    vec4 res = fract(vec4(sample_c().r) * bitSh);
-
-    SV_Target0 = (res - res.xxyz * bitMsk) * 256.0f/255.0f;
+    uint d = uint(sample_c().r * exp2(32.0f));
+    SV_Target0 = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24))) / vec4(255.0);
 }
 #endif

@ -108,11 +104,8 @@ void ps_convert_float32_rgba8()
 void ps_convert_float16_rgb5a1()
 {
    // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
-    const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
-    const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);
-    uvec4 color = uvec4(vec4(sample_c().r) * bitSh) & bitMsk;
-
-    SV_Target0 = vec4(color) / vec4(32.0f, 32.0f, 32.0f, 1.0f);
+    uint d = uint(sample_c().r * exp2(32.0f));
+    SV_Target0 = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) / vec4(32.0f, 32.0f, 32.0f, 1.0f);
 }
 #endif

@ -120,9 +113,8 @@ void ps_convert_float16_rgb5a1()
 void ps_convert_rgba8_float32()
 {
    // Convert a RRGBA texture into a float depth texture
-    // FIXME: I'm afraid of the accuracy
-    const vec4 bitSh = vec4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp2(-8.0f)) * vec4(255.0);
-    gl_FragDepth = dot(sample_c(), bitSh);
+    uvec4 c = uvec4(sample_c() * vec4(255.0f) + vec4(0.5f));
+    gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
 }
 #endif

@ -132,9 +124,8 @@ void ps_convert_rgba8_float24()
    // Same as above but without the alpha channel (24 bits Z)

    // Convert a RRGBA texture into a float depth texture
-    // FIXME: I'm afraid of the accuracy
-    const vec3 bitSh = vec3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * vec3(255.0);
-    gl_FragDepth = dot(sample_c().rgb, bitSh);
+    uvec3 c = uvec3(sample_c().rgb * vec3(255.0f) + vec3(0.5f));
+    gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
 }
 #endif

@ -144,9 +135,8 @@ void ps_convert_rgba8_float16()
    // Same as above but without the A/B channels (16 bits Z)

    // Convert a RRGBA texture into a float depth texture
-    // FIXME: I'm afraid of the accuracy
-    const vec2 bitSh = vec2(exp2(-32.0f), exp2(-24.0f)) * vec2(255.0);
-    gl_FragDepth = dot(sample_c().rg, bitSh);
+    uvec2 c = uvec2(sample_c().rg * vec2(255.0f) + vec2(0.5f));
+    gl_FragDepth = float(c.r | (c.g << 8)) * exp2(-32.0f);
 }
 #endif

@ -154,11 +144,8 @@ void ps_convert_rgba8_float16()
 void ps_convert_rgb5a1_float16()
 {
    // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z
-    // FIXME: I'm afraid of the accuracy
-    const vec4 bitSh = vec4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp2(-17.0f));
-    // Trunc color to drop useless lsb
-    vec4 color = trunc(sample_c() * vec4(255.0f) / vec4(8.0f, 8.0f, 8.0f, 128.0f));
-    gl_FragDepth = dot(vec4(color), bitSh);
+    uvec4 c = uvec4(sample_c() * vec4(255.0f) + vec4(0.5f));
+    gl_FragDepth = float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
 }
 #endif

--- a/bin/resources/shaders/opengl/tfx_fs.glsl
+++ b/bin/resources/shaders/opengl/tfx_fs.glsl
@ -327,24 +327,15 @@ vec4 sample_depth(vec2 st)

 #elif PS_DEPTH_FMT == 1
    // Based on ps_main11 of convert
-
    // Convert a GL_FLOAT32 depth texture into a RGBA color texture
-    const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
-    const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);
-
-    vec4 res = fract(vec4(fetch_c(uv).r) * bitSh);
-
-    t = (res - res.xxyz * bitMsk) * 256.0f;
+    uint d = uint(fetch_c(uv).r * exp2(32.0f));
+    t = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));

 #elif PS_DEPTH_FMT == 2
    // Based on ps_main12 of convert
-
    // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
-    const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
-    const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);
-    uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk;
-
-    t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
+    uint d = uint(fetch_c(uv).r * exp2(32.0f));
+    t = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) * vec4(8.0f, 8.0f, 8.0f, 128.0f);

 #elif PS_DEPTH_FMT == 3
    // Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
--- a/bin/resources/shaders/vulkan/convert.glsl
+++ b/bin/resources/shaders/vulkan/convert.glsl
@ -180,12 +180,8 @@ void ps_convert_float32_32bits()
 void ps_convert_float32_rgba8()
 {
 	// Convert a vec32 depth texture into a RGBA color texture
-	const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
-	const vec4 bitMsk = vec4(0.0, 1.0 / 256.0, 1.0 / 256.0, 1.0 / 256.0);
-
-	vec4 res = fract(vec4(sample_c(v_tex).rrrr) * bitSh);
-
-	o_col0 = (res - res.xxyz * bitMsk) * 256.0f / 255.0f;
+	uint d = uint(sample_c(v_tex).r * exp2(32.0f));
+	o_col0 = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24))) / vec4(255.0);
 }
 #endif

@ -193,11 +189,8 @@ void ps_convert_float32_rgba8()
 void ps_convert_float16_rgb5a1()
 {
 	// Convert a vec32 (only 16 lsb) depth into a RGB5A1 color texture
-	const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
-	const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);
-	uvec4 color = uvec4(vec4(sample_c(v_tex).rrrr) * bitSh) & bitMsk;
-
-	o_col0 = vec4(color) / vec4(32.0f, 32.0f, 32.0f, 1.0f);
+	uint d = uint(sample_c(v_tex).r * exp2(32.0f));
+	o_col0 = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) / vec4(32.0f, 32.0f, 32.0f, 1.0f);
 }
 #endif

@ -205,10 +198,8 @@ void ps_convert_float16_rgb5a1()
 void ps_convert_rgba8_float32()
 {
 	// Convert a RRGBA texture into a float depth texture
-	// FIXME: I'm afraid of the accuracy
-	const vec4 bitSh = vec4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp2(-8.0f)) * vec4(255.0);
-
-	gl_FragDepth = dot(sample_c(v_tex), bitSh);
+	uvec4 c = uvec4(sample_c(v_tex) * vec4(255.0f) + vec4(0.5f));
+	gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
 }
 #endif

@ -218,9 +209,8 @@ void ps_convert_rgba8_float24()
 	// Same as above but without the alpha channel (24 bits Z)

 	// Convert a RRGBA texture into a float depth texture
-	const vec3 bitSh = vec3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * vec3(255.0);
-
-	gl_FragDepth = dot(sample_c(v_tex).rgb, bitSh);
+	uvec3 c = uvec3(sample_c(v_tex).rgb * vec3(255.0f) + vec3(0.5f));
+	gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
 }
 #endif

@ -230,10 +220,8 @@ void ps_convert_rgba8_float16()
 	// Same as above but without the A/B channels (16 bits Z)

 	// Convert a RRGBA texture into a float depth texture
-	// FIXME: I'm afraid of the accuracy
-	const vec2 bitSh = vec2(exp2(-32.0f), exp2(-24.0f)) * vec2(255.0);
-
-	gl_FragDepth = dot(sample_c(v_tex).rg, bitSh);
+	uvec2 c = uvec2(sample_c(v_tex).rg * vec2(255.0f) + vec2(0.5f));
+	gl_FragDepth = float(c.r | (c.g << 8)) * exp2(-32.0f);
 }
 #endif

@ -241,12 +229,8 @@ void ps_convert_rgba8_float16()
 void ps_convert_rgb5a1_float16()
 {
 	// Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z
-	// FIXME: I'm afraid of the accuracy
-	const vec4 bitSh = vec4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp2(-17.0f));
-	// Trunc color to drop useless lsb
-	vec4 color = trunc(sample_c(v_tex) * vec4(255.0f) / vec4(8.0f, 8.0f, 8.0f, 128.0f));
-
-	gl_FragDepth = dot(vec4(color), bitSh);
+	uvec4 c = uvec4(sample_c(v_tex) * vec4(255.0f) + vec4(0.5f));
+	gl_FragDepth = float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
 }
 #endif

--- a/bin/resources/shaders/vulkan/tfx.glsl
+++ b/bin/resources/shaders/vulkan/tfx.glsl
@ -653,23 +653,16 @@ vec4 sample_depth(vec2 st, ivec2 pos)
 		// Based on ps_main11 of convert

 		// Convert a vec32 depth texture into a RGBA color texture
-		const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
-		const vec4 bitMsk = vec4(0.0, 1.0f / 256.0f, 1.0f / 256.0f, 1.0f / 256.0f);
-
-		vec4 res = fract(vec4(fetch_c(uv).r) * bitSh);
-
-		t = (res - res.xxyz * bitMsk) * 256.0f;
+		uint d = uint(fetch_c(uv).r * exp2(32.0f));
+		t = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));
 	}
 	#elif (PS_DEPTH_FMT == 2)
 	{
 		// Based on ps_main12 of convert

 		// Convert a vec32 (only 16 lsb) depth into a RGB5A1 color texture
-		const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
-		const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);
-		uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk;
-
-		t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
+		uint d = uint(fetch_c(uv).r * exp2(32.0f));
+		t = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
 	}
 	#elif (PS_DEPTH_FMT == 3)
 	{