diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 581044e2a..fad1323a0 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -1,5 +1,5 @@ // This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : -R"DELIMITER( +//R"DELIMITER( // Xbox HLSL vertex shader (template populated at runtime) struct VS_INPUT { @@ -26,67 +26,35 @@ extern uniform float4 c[192] : register(c0); // Functions for MAC ('Multiply And Accumulate') opcodes -float4 x_mov(float4 src0) -{ - return src0; -} +#define x_mov(dest, src0) dest = src0 -float4 x_mul(float4 src0, float4 src1) -{ - return src0 * src1; -} +#define x_mul(dest, src0, src1) dest = src0 * src1 -float4 x_add(float4 src0, float4 src1) -{ - return src0 + src1; -} +#define x_add(dest, src0, src1) dest = src0 + src1 -float4 x_dst(float4 src0, float4 src1) -{ - return dst(src0, src1); -} +#define x_dst(dest, src0, src1) dest = dst(src0, src1) // equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } -float4 x_min(float4 src0, float4 src1) -{ - return min(src0, src1); -} +#define x_min(dest, src0, src1) dest = min(src0, src1) -float4 x_max(float4 src0, float4 src1) -{ - return max(src0, src1); -} +#define x_max(dest, src0, src1) dest = max(src0, src1) -float4 x_mad(float4 src0, float4 src1, float4 src2) -{ - return (src0 * src1) + src2; -} +#define x_mad(dest, src0, src1, src2) dest = (src0 * src1) + src2 -int x_arl(float src0) -{ - // The address register should be floored - // Due to rounding differences with the Xbox (and increased precision on PC?) - // some titles produce values just below the threshold of the next integer. - // We can add a small bias to make sure it's bumped over the threshold - // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) - return floor(src0 + 0.0001); -} +// The address register should be floored +// Due to rounding differences with the Xbox (and increased precision on PC?) +// some titles produce values just below the threshold of the next integer. +// We can add a small bias to make sure it's bumped over the threshold +// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) +#define x_arl(dest, src0) dest = floor(src0 + 0.0001) -float x_dp3(float4 src0, float4 src1) -{ - return dot(src0.xyz, src1.xyz); -} +#define x_dp3(dest, src0, src1) dest = dot((float3)src0, (float3)src1) -float x_dph(float4 src0, float4 src1) -{ - return x_dp3(src0, src1) + src1.w; -} +#define x_dph(dest, src0, src1) x_dp3(src0, src1) + src1.w -float x_dp4(float4 src0, float4 src1) -{ - return dot(src0, src1); -} +#define x_dp4(dest, src0, src1) dest = dot(src0, src1) -float4 x_sge(float4 src0, float4 src1) +#define x_sge(dest, src0) dest = _sge(src0) +float4 _sge(float4 src0, float4 src1) { float4 dest; dest.x = (src0.x >= src1.x) ? 1 : 0; @@ -96,7 +64,8 @@ float4 x_sge(float4 src0, float4 src1) return dest; } -float4 x_slt(float4 src0, float4 src1) +#define x_slt(dest, src0) dest = _slt(src0) +float4 _slt(float4 src0, float4 src1) { float4 dest; dest.x = (src0.x < src1.x) ? 1 : 0; @@ -108,17 +77,13 @@ float4 x_slt(float4 src0, float4 src1) // Xbox ILU Functions -float scalar_component(float4 src0) -{ - return src0.w; // use w component by default -} +#define scalar_component(src0) src0.x -float x_rcp(float4 src0) -{ - return 1 / scalar_component(src0); -} +#define x_rcp(dest, src0) dest = 1 / scalar_component(src0) +// TODO : #define x_rcp(dest, src0) dest = (scalar_component(src0) == 0) ? 1.#INF : (1 / scalar_component(src0)) -float x_rcc(float4 src0) +#define x_rcc(dest, src0) dest = _rcc(src0) +float _rcc(float4 src0) { float input = scalar_component(src0); @@ -131,42 +96,54 @@ float x_rcc(float4 src0) : clamp(r, -1.84467e+019f, -5.42101e-020f); } -float x_rsq(float4 src0) +#define x_rsq(dest, src0) dest = rsqrt(abs(scalar_component(src0))) + +#define x_expp(dest, src0) dest = x_expp(src0) +float4 _expp(float4 src0) { - return rsqrt(scalar_component(src0)); + float input = scalar_component(src0); + float base = floor(input); + + float4 dest; + dest.x = exp2(base); + dest.y = input - base; // Was : frac(input) + dest.z = exp2(input); + dest.w = 1; + + return dest; } -float4 x_exp(float4 src0) +#define x_logp(dest, src0) dest = _logp(src0) +float4 _logp(float4 src0) { - float input = scalar_component(src0); - float x = exp2(floor(input)); - float fractional = frac(input); - float power = exp2(input); - return float4(x, fractional, power, 1); -} - -float4 x_log(float4 src0) -{ - float input = scalar_component(src0); + float input = abs(scalar_component(src0)); float exponent = floor(log2(input)); - float mantissa = 1 / exp2(exponent); - float logResult = log2(input); - return float4(exponent, mantissa, logResult, 1); + + float4 dest; + dest.x = exponent; + dest.y = 1 / exp2(exponent); // mantissa + dest.z = exponent + log2(input); // logResult + dest.w = 1; + + return dest; } - -float4 x_lit(float4 src0) + +#define x_lit(dest, src) dest = _lit(src) +float4 _lit(float4 src0) { - const float epsilon = 1.0f / 256.0f; + const float epsilon = 1.0f / 256.0f; + float diffuse = src0.x; float blinn = src0.y; float specPower = clamp(src0.w, -(128 - epsilon), (128 - epsilon)); float4 dest; dest.x = 1; - dest.y = max(diffuse, 0); - dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; + dest.y = max(0, diffuse); + dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; // TODO : Use exp2(#) instead of pow(2, #) ? + // TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0; dest.w = 1; - + return dest; } diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index bc0237bdc..fe6faf75b 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -155,10 +155,10 @@ VSH_OUTPUT_TYPE; typedef enum _VSH_ARGUMENT_TYPE { PARAM_UNKNOWN = 0, - PARAM_R, // Temporary registers + PARAM_R, // Temporary (scRatch) registers PARAM_V, // Vertex registers PARAM_C, // Constant registers, set by SetVertexShaderConstant - PARAM_O + PARAM_O // = 0?? } VSH_ARGUMENT_TYPE; @@ -246,10 +246,10 @@ typedef struct _VSH_OUTPUT int16_t OutputAddress; // MAC output R register boolean MACRMask[4]; - boolean MACRAddress; + int16_t MACRAddress; // ILU output R register boolean ILURMask[4]; - boolean ILURAddress; + int16_t ILURAddress; } VSH_OUTPUT; @@ -262,7 +262,8 @@ typedef struct _VSH_SHADER_INSTRUCTION VSH_PARAMETER A; VSH_PARAMETER B; VSH_PARAMETER C; - boolean a0x; + boolean a0x; + boolean Final; } VSH_SHADER_INSTRUCTION; @@ -378,7 +379,7 @@ static const VSH_FIELDMAPPING g_FieldMapping[] = // Final instruction { FLD_FINAL, 3, 0, 1 } }; - + static const VSH_OPCODE_PARAMS g_OpCodeParams_ILU[] = { // ILU OP MAC OP ParamA ParamB ParamC @@ -516,7 +517,7 @@ static VSH_OPCODE_PARAMS* VshGetOpCodeParams(VSH_ILU ILU, static void VshParseInstruction(uint32_t *pShaderToken, VSH_SHADER_INSTRUCTION *pInstruction) { - // First get the instruction(s). + // First get the instruction(s). pInstruction->ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); pInstruction->MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); @@ -589,14 +590,14 @@ static void VshParseInstruction(uint32_t *pShaderToken, pInstruction->C.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_W); // Get output // Output register - pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB); + pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB); switch(pInstruction->Output.OutputType) { case OUTPUT_C: pInstruction->Output.OutputAddress = ConvertCRegister(VshGetField(pShaderToken, FLD_OUT_ADDRESS)); break; case OUTPUT_O: - pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF; + pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF; break; } pInstruction->Output.OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX); @@ -617,7 +618,8 @@ static void VshParseInstruction(uint32_t *pShaderToken, pInstruction->Output.ILURMask[3] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_W); pInstruction->Output.ILURAddress = VshGetField(pShaderToken, FLD_OUT_R); // Finally, get a0.x indirect constant addressing - pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); + pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); + pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL); } // Print functions @@ -802,30 +804,6 @@ static VSH_INTERMEDIATE_FORMAT *VshNewIntermediate(VSH_XBOX_SHADER *pShader) return &pShader->Intermediate[pShader->IntermediateCount++]; } -static void VshInsertIntermediate(VSH_XBOX_SHADER *pShader, - VSH_INTERMEDIATE_FORMAT *pIntermediate, - uint16_t Pos) -{ - VshVerifyBufferBounds(pShader); - - for (int i = pShader->IntermediateCount; i >= Pos; i--) - { - pShader->Intermediate[i + 1] = pShader->Intermediate[i]; - } - pShader->Intermediate[Pos] = *pIntermediate; - pShader->IntermediateCount++; -} - -static void VshDeleteIntermediate(VSH_XBOX_SHADER *pShader, - uint16_t Pos) -{ - for (int i = Pos; i < (pShader->IntermediateCount - 1); i++) - { - pShader->Intermediate[i] = pShader->Intermediate[i + 1]; - } - pShader->IntermediateCount--; -} - static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader, boolean IsCombined) @@ -1834,7 +1812,7 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration return pHostVertexElements; } -extern std::string BuildShader(VSH_XBOX_SHADER* pShader); +extern void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader); std::string DebugPrependLineNumbers(std::string shaderString) { std::stringstream shader(shaderString); @@ -1901,12 +1879,18 @@ extern HRESULT EmuRecompileVshFunction } if(SUCCEEDED(hRet)) { + static std::string hlsl_template = + #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string + ; + + auto hlsl_stream = std::stringstream(); + for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { VSH_SHADER_INSTRUCTION Inst; VshParseInstruction((uint32_t*)pToken, &Inst); VshConvertToIntermediate(&Inst, pShader); - EOI = (boolean)VshGetField((uint32_t*)pToken, FLD_FINAL); + EOI = Inst.Final; } // The size of the shader is @@ -1919,20 +1903,17 @@ extern HRESULT EmuRecompileVshFunction return D3D_OK; } - static std::string hlslTemplate = - #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string - ; - - auto hlslTest = BuildShader(pShader); - hlslTest = std::regex_replace(hlslTemplate, std::regex("// "), hlslTest); + BuildShader(hlsl_stream, pShader); + std::string hlsl_str = hlsl_stream.str(); + hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str); DbgVshPrintf("--- HLSL conversion ---\n"); - DbgVshPrintf(DebugPrependLineNumbers(hlslTest).c_str()); + DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); DbgVshPrintf("-----------------------\n"); hRet = D3DCompile( - hlslTest.c_str(), - hlslTest.length(), + hlsl_str.c_str(), + hlsl_str.length(), nullptr, // pSourceName nullptr, // pDefines nullptr, // pInclude // TODO precompile x_* HLSL functions? @@ -2095,14 +2076,10 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) if (!(dest.Mask[0] && dest.Mask[1] && dest.Mask[2] && dest.Mask[3])) { hlsl << "."; - unsigned vector_size = 0; - if (dest.Mask[0]) { hlsl << "x"; vector_size++; } - if (dest.Mask[1]) { hlsl << "y"; vector_size++; } - if (dest.Mask[2]) { hlsl << "z"; vector_size++; } - if (dest.Mask[3]) { hlsl << "w"; vector_size++; } - hlsl << " = (float" << vector_size << ")"; - } else { - hlsl << " = "; + if (dest.Mask[0]) hlsl << "x"; + if (dest.Mask[1]) hlsl << "y"; + if (dest.Mask[2]) hlsl << "z"; + if (dest.Mask[3]) hlsl << "w"; } } @@ -2161,7 +2138,7 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) } } -std::string BuildShader(VSH_XBOX_SHADER* pShader) +void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) { // HLSL strings for all MAC opcodes, indexed with VSH_MAC static std::string VSH_MAC_HLSL[] = { @@ -2178,7 +2155,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) /*MAC_MAX:*/"x_max", /*MAC_SLT:*/"x_slt", /*MAC_SGE:*/"x_sge", - /*MAC_ARL:*/"x_arl", // Note : For this MAC_ARL case, ToHlsl would always replace 'dest' with 'a', so we optimized this upfront + /*MAC_ARL:*/"x_arl", "", "" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well }; @@ -2190,13 +2167,11 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) /*ILU_RCP:*/"x_rcp", /*ILU_RCC:*/"x_rcc", /*ILU_RSQ:*/"x_rsq", - /*ILU_EXP:*/"x_exp", - /*ILU_LOG:*/"x_log", + /*ILU_EXP:*/"x_expp", + /*ILU_LOG:*/"x_logp", /*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used }; - auto hlsl = std::stringstream(); - for (int i = 0; i < pShader->IntermediateCount; i++) { VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i]; @@ -2212,20 +2187,15 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) } if (!str.empty()) { - hlsl << "\n "; + hlsl << "\n " << str << "("; // opcode OutputHlsl(hlsl, xboxInstruction.Output); - hlsl << str; // opcode - str = "("; - for (int i = 0; i < 3; i++) { // TODO remove magic number + for (int i = 0; i < 3; i++) { if (xboxInstruction.Parameters[i].Active) { - hlsl << str; // separator + hlsl << ", "; ParameterHlsl(hlsl, xboxInstruction.Parameters[i]); - str = ", "; } } hlsl << ");"; } } - - return hlsl.str(); }