Renamed vertex shader hlsl file, and prepared it for future extensions to behave closer to specifications (nothing changed for now)

2019-12-15 14:41:47 +01:00 · 2019-12-15 14:41:47 +01:00 · 5fe0a16cc6
parent cc594ca8d7
commit 5fe0a16cc6
4 changed files with 89 additions and 42 deletions
--- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl
+++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl
@ -57,6 +57,23 @@ float4 c(int register_number)
    return C[register_number];
 }

+// Due to rounding differences with the Xbox (and increased precision on PC?)
+// some titles produce values just below the threshold of the next integer.
+// We can add a small bias to make sure it's bumped over the threshold
+// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader)
+#define BIAS 0.0001
+// TODO : Use 0.001 like xqemu?
+
+// 2.14.1.11  Vertex Program Floating Point Requirements
+// The floor operations used by the ARL and EXP instructions must
+// operate identically.  Specifically, the EXP instruction's floor(t.x)
+// intermediate result must exactly match the integer stored in the
+// address register by the ARL instruction.
+float x_floor(float src)
+{
+	return floor(src + BIAS);
+}
+
 // http://xboxdevwiki.net/NV2A/Vertex_Shader
 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program.txt
 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program1_1.txt
@ -65,11 +82,7 @@ float4 c(int register_number)

 // 2.14.1.10.1  ARL: Address Register Load
 // The address register should be floored
-// Due to rounding differences with the Xbox (and increased precision on PC?)
-// some titles produce values just below the threshold of the next integer.
-// We can add a small bias to make sure it's bumped over the threshold
-// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader)
-#define x_arl(dest, mask, src0) dest.mask = floor(_tof4(src0).x + 0.0001).mask
+#define x_arl(dest, mask, src0) dest.mask = x_floor(_tof4(src0).x).mask

 // 2.14.1.10.2  MOV: Move
 #define x_mov(dest, mask, src0) dest.mask = (_tof4(src0)).mask
@ -132,39 +145,73 @@ float _dph(float4 src0, float4 src1)
 // Xbox ILU Functions

 // 2.14.1.10.6  RCP: Reciprocal
-#define x_rcp(dest, mask, src0) dest.mask = _ssss(1 / _scalar(src0)).mask
-// TODO : #define x_rcp(dest, mask, src0) dest.mask = (_scalar(src0) == 0) ? 1.#INF : (1 / _scalar(src0))
+#define x_rcp(dest, mask, src0) dest.mask = _ssss(_rcp(_scalar(src0))).mask
+float _rcp(float src)
+{
+#if 0 // TODO : Enable
+	if (src == 1) return 1;
+	if (src == 0) return 1.#INF;
+#endif
+	return 1/ src;
+}

 // 2.14.1.10.7  RSQ: Reciprocal Square Root
-#define x_rsq(dest, mask, src0) dest.mask = _ssss(rsqrt(abs(_scalar(src0)))).mask
+#define x_rsq(dest, mask, src0) dest.mask = _ssss(_rsq(_scalar(src0))).mask
+float _rsq(float src)
+{
+	float a = abs(src);
+#if 0 // TODO : Enable
+	if (a == 1) return 1;
+	if (a == 0) return 1.#INF;
+#endif
+	return rsqrt(a);
+}

 // 2.14.1.10.15  EXP: Exponential Base 2
 #define x_expp(dest, mask, src0) dest.mask = _expp(_scalar(src0)).mask
-float4 _expp(float input)
+float4 _expp(float src)
 {
-    float base = floor(input);
+    float floor_src = x_floor(src);

-    float4 dest;
-    dest.x = exp2(base);
-    dest.y = input - base; // Was : frac(input)
-    dest.z = exp2(input);
+    float4 dest;
+    dest.x = exp2(floor_src);
+    dest.y = src - floor_src;
+    dest.z = exp2(src);
    dest.w = 1;

-	return dest;
+	return dest;
 }

 // 2.14.1.10.16  LOG: Logarithm Base 2
 #define x_logp(dest, mask, src0) dest.mask = _logp(_scalar(src0)).mask
-float4 _logp(float input)
-{
-	float exponent = floor(log2(input));
-
+float4 _logp(float src)
+{
    float4 dest;
-    dest.x = exponent;
-    dest.y = 1 / exp2(exponent); // mantissa
-    dest.z = log2(input);
-    dest.w = 1;
-    
+#if 0 // TODO : Enable
+	float t = abs(src);
+	if (t != 0) {
+		if (t == 1.#INF) {
+			dest.x = 1.#INF;
+			dest.y = 1;
+			dest.z = 1.#INF;
+		} else {
+#endif
+			float exponent = floor(log2(src)); // TODO : x_floor
+			float mantissa = 1 / exp2(exponent);
+			float z = log2(src); // TODO : exponent + log2(mantissa); // TODO : Or log2(t)?
+			// TODO : float exponent = frexp(src + BIAS, /*out*/mantissa);
+			dest.x = exponent;
+			dest.y = mantissa;
+			dest.z = z;
+#if 0
+		}
+	} else {
+		dest.x = -1.#INF;
+		dest.y = 1;
+		dest.z = -1.#INF;
+	}
+#endif
+    dest.w = 1;    
 	return dest;
 }

@ -190,10 +237,10 @@ float4 _lit(float4 src0)

 // 2.14.1.10.19  RCC: Reciprocal Clamped
 #define x_rcc(dest, mask, src0) dest.mask = _ssss(_rcc(_scalar(src0))).mask
-float _rcc(float input)
+float _rcc(float src)
 {
 	// Calculate the reciprocal
-	float r = 1 / input;
+	float r = 1 / src;

 	// Clamp
 	return (r >= 0)
--- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp
+++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp
@ -3811,10 +3811,10 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4])

 	// Default scale and offset.
 	// Multisample state will affect these
-	float xScale = 1;
-	float yScale = 1;
-	float xOffset = 0.5;
-	float yOffset = 0.5;
+	float xScale = 1.0f;
+	float yScale = 1.0f;
+	float xOffset = 0.5f;
+	float yOffset = 0.5f;

 	// MULTISAMPLE options have offset of 0
 	// Various sample sizes have various x and y scales
@ -3824,21 +3824,21 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4])
 		case XTL::X_D3DMULTISAMPLE_2_SAMPLES_MULTISAMPLE_QUINCUNX:
 		case XTL::X_D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_LINEAR:
 		case XTL::X_D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_GAUSSIAN:
-			xOffset = yOffset = 0;
+			xOffset = yOffset = 0.0f;
 			break;
 		case XTL::X_D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_HORIZONTAL_LINEAR:
-			xScale = 2;
+			xScale = 2.0f;
 			break;
 		case XTL::X_D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_VERTICAL_LINEAR:
-			yScale = 2;
+			yScale = 2.0f;
 			break;
 		case XTL::X_D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_LINEAR:
 		case XTL::X_D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_GAUSSIAN:
-			xScale = yScale = 2;
+			xScale = yScale = 2.0f;
 			break;
 		case XTL::X_D3DMULTISAMPLE_9_SAMPLES_MULTISAMPLE_GAUSSIAN:
 			xScale = yScale = 1.5f;
-			xOffset = yOffset = 0;
+			xOffset = yOffset = 0.0f;
 			break;
 		case XTL::X_D3DMULTISAMPLE_9_SAMPLES_SUPERSAMPLE_GAUSSIAN:
 			xScale = yScale = 3.0f;
@ -3856,14 +3856,14 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4])
 	// Pre-transformed 2d geometry is in the same space as the 3d geometry...?

 	// Offset with a host correction
-	vOffset[0] = xOffset + (0.5 * ViewPort.Width / g_RenderScaleFactor);
-	vOffset[1] = yOffset + (0.5 * ViewPort.Height / g_RenderScaleFactor);
-	vOffset[2] = 0; //offsetZ;
+	vOffset[0] = xOffset + (0.5f * (float)ViewPort.Width / (float)g_RenderScaleFactor);
+	vOffset[1] = yOffset + (0.5f * (float)ViewPort.Height / (float)g_RenderScaleFactor);
+	vOffset[2] = 0.0f; //offsetZ;
 	vOffset[3] = 0.0f;

 	// Scale with a host correction
-	vScale[0] = xScale * (1.0f / (2.0f * g_RenderScaleFactor));
-	vScale[1] = yScale * (1.0f / (-2.0f * g_RenderScaleFactor));
+	vScale[0] = xScale * (1.0f / ( 2.0f * (float)g_RenderScaleFactor));
+	vScale[1] = yScale * (1.0f / (-2.0f * (float)g_RenderScaleFactor));
 	vScale[2] = scaleZ; // ?
 	vScale[3] = 1.0f; // ?
 }
--- a/src/core/hle/D3D8/XbVertexShader.cpp
+++ b/src/core/hle/D3D8/XbVertexShader.cpp
@ -1788,7 +1788,7 @@ extern HRESULT EmuRecompileVshFunction

 	if (SUCCEEDED(hRet)) {
 		static std::string hlsl_template =
-			#include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string
+			#include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" // Note : This included .hlsl defines a raw string
 			;

 		auto hlsl_stream = std::stringstream();
--- a/src/core/hle/D3D8/XbVertexShader.h
+++ b/src/core/hle/D3D8/XbVertexShader.h
@ -57,7 +57,7 @@ CxbxVertexShaderStreamElement;

 typedef struct _CxbxVertexShaderStreamInfo
 {
-	BOOL  NeedPatch;       // This is to know whether it's data which must be patched
+	bool  NeedPatch;       // This is to know whether it's data which must be patched
 	BOOL DeclPosition;
 	WORD HostVertexStride;
 	DWORD NumberOfVertexElements;        // Number of the stream data types