From f97a642bc9012b2ebde2dd23b17c40b710e01c1a Mon Sep 17 00:00:00 2001
From: patrickvl <pvanlogchem@gmail.com>
Date: Sun, 22 Dec 2019 15:47:46 +0100
Subject: [PATCH] Fixed XDK Fog sample over-saturation by literally saturating
 all COLOR-semantic outputs (oD0, oD1, oB0 and oB1)!

Also updated the ARL bias, applied the frexp intrinsic to our LOGP function and updated LIT according to https://github.com/google/swiftshader/blob/6e3a387c01598240a7a8722ab8cbd1015ec1cd0a/src/Shader/ShaderCore.cpp#L1168
---
 .../Direct3D9/CxbxVertexShaderTemplate.hlsl   | 38 +++++++++----------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl
index ab510d6ca..19173ae0a 100644
--- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl
+++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl
@@ -61,8 +61,8 @@ float4 c(int register_number)
 // some titles produce values just below the threshold of the next integer.
 // We can add a small bias to make sure it's bumped over the threshold
 // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader)
-#define BIAS 0.0001
-// TODO : Use 0.001 like xqemu?
+#define BIAS 0.001
+// NOTE : Was 0.0001, unlike xqemu
 
 // 2.14.1.11  Vertex Program Floating Point Requirements
 // The floor operations used by the ARL and EXP instructions must
@@ -196,10 +196,9 @@ float4 _logp(float src)
 			dest.z = 1.#INF;
 		} else {
 #endif
-			float exponent = floor(log2(src)); // TODO : x_floor
-			float mantissa = 1 / exp2(exponent);
-			float z = log2(src); // TODO : exponent + log2(mantissa); // TODO : Or log2(t)?
-			// TODO : float exponent = frexp(src + BIAS, /*out*/mantissa);
+			float exponent;
+			float mantissa = frexp(src/* + BIAS*/, /*out*/exponent);
+			float z = log2(src);
 			dest.x = exponent;
 			dest.y = mantissa;
 			dest.z = z;
@@ -228,8 +227,7 @@ float4 _lit(float4 src0)
 	float4 dest;
 	dest.x = 1;
 	dest.y = max(0, diffuse);
-	dest.z = diffuse > 0 ? exp2(specPower * log(blinn)) : 0;
-	// TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0;
+	dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0;
 	dest.w = 1;
 
 	return dest;
@@ -283,19 +281,19 @@ VS_OUTPUT main(const VS_INPUT xIn)
 	r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 0);
 	#define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox
 
-	// Input registers
+	// Input registers
 	float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15;
-
+
 	// View 4 packed overrides as an array of 16 floats
 	float vOverride[16] = (float[16])vOverridePacked;
 
 	// Initialize input registers from the vertex buffer
-	// Or use an override value set with SetVertexData4f
-	#define init_v(i) v##i = lerp(xIn.v[i], vOverride[i], vOverrideValue[i]);
-	// Note : unroll manually instead of for-loop, because of the ## concatenation
-	init_v( 0); init_v( 1); init_v( 2); init_v( 3);
-	init_v( 4); init_v( 5); init_v( 6); init_v( 7);
-	init_v( 8); init_v( 9); init_v(10); init_v(11);
+	// Or use an override value set with SetVertexData4f
+	#define init_v(i) v##i = lerp(xIn.v[i], vOverride[i], vOverrideValue[i]);
+	// Note : unroll manually instead of for-loop, because of the ## concatenation
+	init_v( 0); init_v( 1); init_v( 2); init_v( 3);
+	init_v( 4); init_v( 5); init_v( 6); init_v( 7);
+	init_v( 8); init_v( 9); init_v(10); init_v(11);
 	init_v(12); init_v(13); init_v(14); init_v(15);
 
 	// Xbox shader program)DELIMITER", /* This terminates the header raw string" // */
@@ -306,12 +304,12 @@ R"DELIMITER(
 	VS_OUTPUT xOut;
 
 	xOut.oPos = reverseScreenspaceTransform(oPos);
-	xOut.oD0 = oD0;
-	xOut.oD1 = oD1;
+	xOut.oD0 = saturate(oD0);
+	xOut.oD1 = saturate(oD1);
 	xOut.oFog = oFog.x;
 	xOut.oPts = oPts.x;
-	xOut.oB0 = oB0;
-	xOut.oB1 = oB1;
+	xOut.oB0 = saturate(oB0);
+	xOut.oB1 = saturate(oB1);
 	xOut.oT0 = oT0;
 	xOut.oT1 = oT1;
 	xOut.oT2 = oT2;