From f97a642bc9012b2ebde2dd23b17c40b710e01c1a Mon Sep 17 00:00:00 2001 From: patrickvl Date: Sun, 22 Dec 2019 15:47:46 +0100 Subject: [PATCH] Fixed XDK Fog sample over-saturation by literally saturating all COLOR-semantic outputs (oD0, oD1, oB0 and oB1)! Also updated the ARL bias, applied the frexp intrinsic to our LOGP function and updated LIT according to https://github.com/google/swiftshader/blob/6e3a387c01598240a7a8722ab8cbd1015ec1cd0a/src/Shader/ShaderCore.cpp#L1168 --- .../Direct3D9/CxbxVertexShaderTemplate.hlsl | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index ab510d6ca..19173ae0a 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -61,8 +61,8 @@ float4 c(int register_number) // some titles produce values just below the threshold of the next integer. // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) -#define BIAS 0.0001 -// TODO : Use 0.001 like xqemu? +#define BIAS 0.001 +// NOTE : Was 0.0001, unlike xqemu // 2.14.1.11 Vertex Program Floating Point Requirements // The floor operations used by the ARL and EXP instructions must @@ -196,10 +196,9 @@ float4 _logp(float src) dest.z = 1.#INF; } else { #endif - float exponent = floor(log2(src)); // TODO : x_floor - float mantissa = 1 / exp2(exponent); - float z = log2(src); // TODO : exponent + log2(mantissa); // TODO : Or log2(t)? - // TODO : float exponent = frexp(src + BIAS, /*out*/mantissa); + float exponent; + float mantissa = frexp(src/* + BIAS*/, /*out*/exponent); + float z = log2(src); dest.x = exponent; dest.y = mantissa; dest.z = z; @@ -228,8 +227,7 @@ float4 _lit(float4 src0) float4 dest; dest.x = 1; dest.y = max(0, diffuse); - dest.z = diffuse > 0 ? exp2(specPower * log(blinn)) : 0; - // TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0; + dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0; dest.w = 1; return dest; @@ -283,19 +281,19 @@ VS_OUTPUT main(const VS_INPUT xIn) r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 0); #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox - // Input registers + // Input registers float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - + // View 4 packed overrides as an array of 16 floats float vOverride[16] = (float[16])vOverridePacked; // Initialize input registers from the vertex buffer - // Or use an override value set with SetVertexData4f - #define init_v(i) v##i = lerp(xIn.v[i], vOverride[i], vOverrideValue[i]); - // Note : unroll manually instead of for-loop, because of the ## concatenation - init_v( 0); init_v( 1); init_v( 2); init_v( 3); - init_v( 4); init_v( 5); init_v( 6); init_v( 7); - init_v( 8); init_v( 9); init_v(10); init_v(11); + // Or use an override value set with SetVertexData4f + #define init_v(i) v##i = lerp(xIn.v[i], vOverride[i], vOverrideValue[i]); + // Note : unroll manually instead of for-loop, because of the ## concatenation + init_v( 0); init_v( 1); init_v( 2); init_v( 3); + init_v( 4); init_v( 5); init_v( 6); init_v( 7); + init_v( 8); init_v( 9); init_v(10); init_v(11); init_v(12); init_v(13); init_v(14); init_v(15); // Xbox shader program)DELIMITER", /* This terminates the header raw string" // */ @@ -306,12 +304,12 @@ R"DELIMITER( VS_OUTPUT xOut; xOut.oPos = reverseScreenspaceTransform(oPos); - xOut.oD0 = oD0; - xOut.oD1 = oD1; + xOut.oD0 = saturate(oD0); + xOut.oD1 = saturate(oD1); xOut.oFog = oFog.x; xOut.oPts = oPts.x; - xOut.oB0 = oB0; - xOut.oB1 = oB1; + xOut.oB0 = saturate(oB0); + xOut.oB1 = saturate(oB1); xOut.oT0 = oT0; xOut.oT1 = oT1; xOut.oT2 = oT2;