From 62015b1499c093df38ed7b615cddf4e1909c1ac8 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Fri, 29 Nov 2019 22:54:07 +1300 Subject: [PATCH 01/77] initial --- CMakeLists.txt | 2 + .../hle/D3D8/Direct3D9/HlslVertexShader.cpp | 225 ++++++++++++++++++ .../hle/D3D8/Direct3D9/HlslVertexShader.h | 5 + src/core/hle/D3D8/Direct3D9/Xb.hlsl | 53 +++++ src/core/hle/D3D8/XbVertexShader.cpp | 215 +++++++++++++++++ 5 files changed, 500 insertions(+) create mode 100644 src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp create mode 100644 src/core/hle/D3D8/Direct3D9/HlslVertexShader.h create mode 100644 src/core/hle/D3D8/Direct3D9/Xb.hlsl diff --git a/CMakeLists.txt b/CMakeLists.txt index 778e66096..8bd8f5211 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,6 +129,7 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/common/util/gloffscreen/gloffscreen.h" "${CXBXR_ROOT_DIR}/src/common/XADPCM.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" + #"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/HlslVertexShader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/ResourceTracker.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/XbConvert.h" @@ -262,6 +263,7 @@ file (GLOB CXBXR_SOURCE_EMU "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/RenderStates.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/TextureStates.cpp" + #"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/ResourceTracker.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/XbConvert.cpp" diff --git a/src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp b/src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp new file mode 100644 index 000000000..4d3a64bbc --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp @@ -0,0 +1,225 @@ + +//#define LOG_PREFIX CXBXR_MODULE::VTXSH +//#include +// +////#include "HlslVertexShader.h" +// +////#include "XbD3D8Types.h" // For X_D3DVSDE_* +//#include +//#include +////#include +//#include +////#include + +//#pragma once +//#include "core\hle\D3D8\XbVertexShader.cpp" + +// HLSL outputs +std::array outputs = +{ + "oPos", + "oT0", + "oT1", + "oT2", + "oT3", + "oT4", + "oD0", + "oD1", + "oB0", + "oB1", + "oFog", + "oPts" +}; + +std::string ToHlsl(VSH_IMD_OUTPUT& dest) { + auto hlsl = std::stringstream(); + switch (dest.Type) + { + case IMD_OUTPUT_O: + hlsl << "out." << outputs[dest.Address]; + break; + case IMD_OUTPUT_A0X: + hlsl << "a"; + break; + case IMD_OUTPUT_C: + hlsl << "c[" << dest.Address << "]"; //todo we can output to constants...? + break; + case IMD_OUTPUT_R: + hlsl << "r[" << dest.Address << "]"; + break; + default: + break; + } + + return hlsl.str(); +} + +std::string ToHlsl(VSH_IMD_PARAMETER& parameter) +{ + auto hlsl = std::stringstream(); + + hlsl << (parameter.Parameter.Neg ? "-" : "") << VshGetRegisterName(parameter.Parameter.ParameterType); + + if (parameter.Parameter.ParameterType == PARAM_C && parameter.IndexesWithA0_X) + { + // Only display the offset if it's not 0. + parameter.Parameter.Address + ? hlsl << "[a+" << parameter.Parameter.Address << "]" + : hlsl << "[a]"; + } + else + { + hlsl << parameter.Parameter.Address; + } + + // Only bother printing the swizzle if it is not .xyzw + if (!(parameter.Parameter.Swizzle[0] == SWIZZLE_X && + parameter.Parameter.Swizzle[1] == SWIZZLE_Y && + parameter.Parameter.Swizzle[2] == SWIZZLE_Z && + parameter.Parameter.Swizzle[3] == SWIZZLE_W)) + { + hlsl << "."; + for (int i = 0; i < 4; i++) + { + char Swizzle = '?'; + switch (parameter.Parameter.Swizzle[i]) + { + case SWIZZLE_X: + Swizzle = 'x'; + break; + case SWIZZLE_Y: + Swizzle = 'y'; + break; + case SWIZZLE_Z: + Swizzle = 'z'; + break; + case SWIZZLE_W: + Swizzle = 'w'; + break; + } + hlsl << Swizzle; + } + } + + return hlsl.str(); +} + +std::string ToHlsl(std::string pattern, VSH_INTERMEDIATE_FORMAT& instruction) { + auto static dest = std::regex("dest"); + const std::regex src[] = { std::regex("src0"), std::regex("src1"), std::regex("src2") }; + + // TODO use simple string replace + // Warn if we didn't replace anything etc. + // Replace dest + auto hlsl = std::regex_replace(pattern, std::regex("dest"), ToHlsl(instruction.Output)); + + int srcNum = 0; + for (int i = 0; i < 3; i++) { // TODO remove magic number + if (instruction.Parameters[i].Active) { + hlsl = std::regex_replace(hlsl, std::regex(src[srcNum]), ToHlsl(instruction.Parameters[i])); + srcNum += 1; + } + } + + return hlsl; +} + +std::string BuildShader(VSH_XBOX_SHADER* pShader) { + + auto hlsl = std::stringstream(); + + + for (int i = 0; i < pShader->IntermediateCount; i++) { + + VSH_INTERMEDIATE_FORMAT xboxInstruction = pShader->Intermediate[i]; + + if (xboxInstruction.InstructionType == IMD_MAC) + { + switch (xboxInstruction.MAC) + { + case MAC_NOP: + break; + case MAC_MOV: + hlsl << ToHlsl("dest = src0", xboxInstruction); + break; + case MAC_MUL: + hlsl << ToHlsl("dest = src0 * src1", xboxInstruction); + break; + case MAC_ADD: + hlsl << ToHlsl("dest = src0 + src1", xboxInstruction); + break; + case MAC_MAD: + hlsl << ToHlsl("dest = mad(src0, src1, src2)", xboxInstruction); + break; + case MAC_DP3: + hlsl << ToHlsl("dest = dot(src0, src1)", xboxInstruction); + break; + case MAC_DPH: + hlsl << ToHlsl("dest = dot(src0, src1) + src1.w", xboxInstruction); + break; + case MAC_DP4: + hlsl << ToHlsl("dest = dot(src0, src1)", xboxInstruction); + break; + case MAC_DST: + hlsl << ToHlsl("dest = x_dst(src0, src1)", xboxInstruction); + break; + case MAC_MIN: + hlsl << ToHlsl("dest = min(src0, src1)", xboxInstruction); + break; + case MAC_MAX: + hlsl << ToHlsl("dest = max(src0, src1)", xboxInstruction); + break; + case MAC_SLT: + hlsl << ToHlsl("dest = x_slt(src0, src1)", xboxInstruction); + break; + case MAC_SGE: + hlsl << ToHlsl("dest = x_sge(src0, src1)", xboxInstruction); + break; + case MAC_ARL: + hlsl << ToHlsl("a = src0", xboxInstruction); + break; + default: + EmuLog(LOG_LEVEL::WARNING, "TODO message"); + } + } + else if (xboxInstruction.InstructionType == IMD_ILU) + { + switch (xboxInstruction.ILU) + { + case ILU_NOP: + break; + case ILU_MOV: + hlsl << ToHlsl("dest = src0", xboxInstruction); + break; + case ILU_RCP: + hlsl << ToHlsl("// rcp TODO", xboxInstruction); + break; + case ILU_RCC: + hlsl << ToHlsl("// rcc TODO", xboxInstruction); + break; + case ILU_RSQ: + hlsl << ToHlsl("// rsq TODO", xboxInstruction); + break; + case ILU_EXP: + hlsl << ToHlsl("// exp TODO", xboxInstruction); + break; + case ILU_LOG: + hlsl << ToHlsl("// log TODO", xboxInstruction); + break; + case ILU_LIT: + hlsl << ToHlsl("// lit TODO", xboxInstruction); + break; + default: + EmuLog(LOG_LEVEL::WARNING, "TODO message"); + } + } + else + { + EmuLog(LOG_LEVEL::WARNING, "TODO message"); + } + + // Finish the line + hlsl << "\n"; + } + return hlsl.str(); +} diff --git a/src/core/hle/D3D8/Direct3D9/HlslVertexShader.h b/src/core/hle/D3D8/Direct3D9/HlslVertexShader.h new file mode 100644 index 000000000..860dcdba2 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/HlslVertexShader.h @@ -0,0 +1,5 @@ +// +//#pragma once +//#include "core\hle\D3D8\XbVertexShader.h" +//#include +//std::string BuildShader(VSH_XBOX_SHADER* pShader); diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl new file mode 100644 index 000000000..62a46b35e --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -0,0 +1,53 @@ +struct VS_INPUT +{ + float4 v[16] : TEXCOORD; +}; + +struct VS_OUTPUT +{ + float4 oPos : POSITION; + float4 oD0 : COLOR0; // Colour 0 + float4 oD1 : COLOR1; // Colour 1 + float oFog : FOG; + float oPts : PSIZE; + float4 oB0 : TEXCOORD4; // Backface Colour 0 + float4 oB1 : TEXCOORD5; // Backface Colour 1 + float4 oT0 : TEXCOORD0; // Texture Coord 0 + float4 oT1 : TEXCOORD1; // Texture Coord 0 + float4 oT2 : TEXCOORD2; // Texture Coord 0 + float4 oT3 : TEXCOORD3; // Texture Coord 0 +}; + +extern float4 c[192]; // Constant registers +static float4 r[16]; // Temporary registers +static int a; // address register + +VS_OUTPUT defaultOut() { + VS_OUTPUT o; + + // Initialize default values + o.oPos = float4(0, 0, 0, 1); + o.oD0 = float4(0, 0, 0, 1); + o.oD1 = float4(0, 0, 0, 1); + o.oFog = 0; + o.oPts = 0; + o.oB0 = float4(0, 0, 0, 1); + o.oB1 = float4(0, 0, 0, 1); + o.oT0 = float4(0, 0, 0, 1); + o.oT1 = float4(0, 0, 0, 1); + o.oT2 = float4(0, 0, 0, 1); + o.oT3 = float4(0, 0, 0, 1); + + return o; +} + +VS_OUTPUT main(const VS_INPUT xIn) +{ + VS_OUTPUT xOut = defaultOut(); + + // Insert Xbox shader here + + + + return xOut; +} diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 5a5c90940..565e85af1 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2670,6 +2670,8 @@ std::string VshPostProcess(std::string shader) { return VshPostProcess_TruncateMovA(shader); } +extern std::string BuildShader(VSH_XBOX_SHADER* pShader); + // recompile xbox vertex shader function extern HRESULT EmuRecompileVshFunction ( @@ -2762,6 +2764,10 @@ extern HRESULT EmuRecompileVshFunction DbgVshPrintf("%s", finalHostShader.c_str()); DbgVshPrintf("-----------------------\n"); + DbgVshPrintf("-- HLSL conversion ---\n"); + DbgVshPrintf("%s", BuildShader(pShader)); + DbgVshPrintf("-----------------------\n"); + // HACK: Azurik. Prevent Direct3D from trying to assemble this. if(finalHostShader == "vs.2.x\n") { @@ -2918,3 +2924,212 @@ void CxbxImpl_SelectVertexShaderDirect LOG_UNIMPLEMENTED(); } +// HLSL outputs + +std::string ToHlsl(VSH_IMD_OUTPUT& dest) { + auto hlsl = std::stringstream(); + switch (dest.Type) + { + case IMD_OUTPUT_O: + hlsl << "xOut." << OReg_Name[dest.Address]; + break; + case IMD_OUTPUT_A0X: + hlsl << "a"; + break; + case IMD_OUTPUT_C: + hlsl << "c[" << dest.Address << "]"; //todo we can output to constants...? + break; + case IMD_OUTPUT_R: + hlsl << "r[" << dest.Address << "]"; + break; + default: + break; + } + + return hlsl.str(); +} + +std::string ToHlsl(VSH_IMD_PARAMETER& parameter) +{ + auto hlsl = std::stringstream(); + + hlsl << (parameter.Parameter.Neg ? "-" : ""); + + if (parameter.Parameter.ParameterType == PARAM_V) + hlsl << "xIn."; + + hlsl << VshGetRegisterName(parameter.Parameter.ParameterType); + + if (parameter.Parameter.ParameterType == PARAM_C && parameter.IndexesWithA0_X) + { + // Only display the offset if it's not 0. + parameter.Parameter.Address + ? hlsl << "[a+" << parameter.Parameter.Address << "]" + : hlsl << "[a]"; + } + else + { + hlsl << "[" << parameter.Parameter.Address << "]"; + } + + // Only bother printing the swizzle if it is not .xyzw + if (!(parameter.Parameter.Swizzle[0] == SWIZZLE_X && + parameter.Parameter.Swizzle[1] == SWIZZLE_Y && + parameter.Parameter.Swizzle[2] == SWIZZLE_Z && + parameter.Parameter.Swizzle[3] == SWIZZLE_W)) + { + hlsl << "."; + + // Find the last difference, so we don't write repeated trailing swizzles + // "var.x" instead of "var.xxxx" + auto lastDiffIndex = 0; + for (int i = 1; i < 4; i++) { + if (parameter.Parameter.Swizzle[i] != parameter.Parameter.Swizzle[i-1]) + lastDiffIndex = i; + } + + for (int i = 0; i <= lastDiffIndex; i++) + { + char Swizzle = '?'; + switch (parameter.Parameter.Swizzle[i]) + { + case SWIZZLE_X: + Swizzle = 'x'; + break; + case SWIZZLE_Y: + Swizzle = 'y'; + break; + case SWIZZLE_Z: + Swizzle = 'z'; + break; + case SWIZZLE_W: + Swizzle = 'w'; + break; + } + hlsl << Swizzle; + } + } + + return hlsl.str(); +} + +std::string ToHlsl(std::string pattern, VSH_INTERMEDIATE_FORMAT& instruction) { + auto static regDest = std::regex("dest"); + const std::regex regSrc[] = { std::regex("src0"), std::regex("src1"), std::regex("src2") }; + + // TODO use simple string replace + // Warn if we didn't replace anything etc. + // Replace dest + auto hlsl = std::regex_replace(pattern, regDest, ToHlsl(instruction.Output)); + + int srcNum = 0; + for (int i = 0; i < 3; i++) { // TODO remove magic number + if (instruction.Parameters[i].Active) { + hlsl = std::regex_replace(hlsl, regSrc[srcNum], ToHlsl(instruction.Parameters[i])); + srcNum += 1; + } + } + + return hlsl; +} + +std::string BuildShader(VSH_XBOX_SHADER* pShader) { + + auto hlsl = std::stringstream(); + + + for (int i = 0; i < pShader->IntermediateCount; i++) { + + VSH_INTERMEDIATE_FORMAT xboxInstruction = pShader->Intermediate[i]; + + if (xboxInstruction.InstructionType == IMD_MAC) + { + switch (xboxInstruction.MAC) + { + case MAC_NOP: + break; + case MAC_MOV: + hlsl << ToHlsl("dest = src0", xboxInstruction); + break; + case MAC_MUL: + hlsl << ToHlsl("dest = src0 * src1", xboxInstruction); + break; + case MAC_ADD: + hlsl << ToHlsl("dest = src0 + src1", xboxInstruction); + break; + case MAC_MAD: + hlsl << ToHlsl("dest = mad(src0, src1, src2)", xboxInstruction); + break; + case MAC_DP3: + hlsl << ToHlsl("dest = dot(src0, src1)", xboxInstruction); + break; + case MAC_DPH: + hlsl << ToHlsl("dest = dot(src0, src1) + src1.w", xboxInstruction); + break; + case MAC_DP4: + hlsl << ToHlsl("dest = dot(src0, src1)", xboxInstruction); + break; + case MAC_DST: + hlsl << ToHlsl("dest = x_dst(src0, src1)", xboxInstruction); + break; + case MAC_MIN: + hlsl << ToHlsl("dest = min(src0, src1)", xboxInstruction); + break; + case MAC_MAX: + hlsl << ToHlsl("dest = max(src0, src1)", xboxInstruction); + break; + case MAC_SLT: + hlsl << ToHlsl("dest = x_slt(src0, src1)", xboxInstruction); + break; + case MAC_SGE: + hlsl << ToHlsl("dest = x_sge(src0, src1)", xboxInstruction); + break; + case MAC_ARL: + hlsl << ToHlsl("a = src0", xboxInstruction); + break; + default: + EmuLog(LOG_LEVEL::WARNING, "TODO message"); + } + } + else if (xboxInstruction.InstructionType == IMD_ILU) + { + switch (xboxInstruction.ILU) + { + case ILU_NOP: + break; + case ILU_MOV: + hlsl << ToHlsl("dest = src0", xboxInstruction); + break; + case ILU_RCP: + hlsl << ToHlsl("// rcp TODO", xboxInstruction); + break; + case ILU_RCC: + hlsl << ToHlsl("// rcc TODO", xboxInstruction); + break; + case ILU_RSQ: + hlsl << ToHlsl("// rsq TODO", xboxInstruction); + break; + case ILU_EXP: + hlsl << ToHlsl("// exp TODO", xboxInstruction); + break; + case ILU_LOG: + hlsl << ToHlsl("// log TODO", xboxInstruction); + break; + case ILU_LIT: + hlsl << ToHlsl("// lit TODO", xboxInstruction); + break; + default: + EmuLog(LOG_LEVEL::WARNING, "TODO message"); + } + } + else + { + EmuLog(LOG_LEVEL::WARNING, "TODO message"); + } + + // Finish the line + hlsl << ";\n"; + } + return hlsl.str(); +} + From da6fc5f35ce0f7cbe384e4a0f01576ee0171b00a Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 30 Nov 2019 00:39:50 +1300 Subject: [PATCH 02/77] TMP more stuff --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 75 ++++++++++++++++++++-------- src/core/hle/D3D8/XbVertexShader.cpp | 67 ++++++++++++++++--------- 2 files changed, 97 insertions(+), 45 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 62a46b35e..e041e061f 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -18,36 +18,67 @@ struct VS_OUTPUT float4 oT3 : TEXCOORD3; // Texture Coord 0 }; -extern float4 c[192]; // Constant registers -static float4 r[16]; // Temporary registers +extern float4 hostConstants[192]; // Constant registers +float4 c(int index); + static int a; // address register -VS_OUTPUT defaultOut() { - VS_OUTPUT o; - - // Initialize default values - o.oPos = float4(0, 0, 0, 1); - o.oD0 = float4(0, 0, 0, 1); - o.oD1 = float4(0, 0, 0, 1); - o.oFog = 0; - o.oPts = 0; - o.oB0 = float4(0, 0, 0, 1); - o.oB1 = float4(0, 0, 0, 1); - o.oT0 = float4(0, 0, 0, 1); - o.oT1 = float4(0, 0, 0, 1); - o.oT2 = float4(0, 0, 0, 1); - o.oT3 = float4(0, 0, 0, 1); - - return o; -} - VS_OUTPUT main(const VS_INPUT xIn) { - VS_OUTPUT xOut = defaultOut(); + // Input registers + float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; + + // Temporary variables + float4 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; + r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 1); // TODO correct? + + // Output variables + float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; + oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // TODO correct? + float oFog = 0, oPts = 0; + + // Initialize input variables + v0 = xIn.v[0]; + v1 = xIn.v[1]; + v2 = xIn.v[2]; + v3 = xIn.v[3]; + v4 = xIn.v[4]; + v5 = xIn.v[5]; + v6 = xIn.v[6]; + v7 = xIn.v[7]; + v8 = xIn.v[8]; + v9 = xIn.v[9]; + v10 = xIn.v[10]; + v11 = xIn.v[11]; + v12 = xIn.v[12]; + v13 = xIn.v[13]; + v14 = xIn.v[14]; + v15 = xIn.v[15]; // Insert Xbox shader here + // Copy variables to output struct + VS_OUTPUT xOut; + + xOut.oPos = oPos; + xOut.oD0 = oD0; + xOut.oD1 = oD1; + xOut.oFog = oFog; + xOut.oPts = oPts; + xOut.oB0 = oB0; + xOut.oB1 = oB1; + xOut.oT0 = oT0; + xOut.oT1 = oT1; + xOut.oT2 = oT2; + xOut.oT3 = oT3; + return xOut; } + +// Account for Xbox's negative constant indexes +// Map Xbox [-96, 95] to Host [0, 191] +float4 c(int index) { + return hostConstants[index + 96]; +} diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 565e85af1..fe28fa876 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2754,6 +2754,11 @@ extern HRESULT EmuRecompileVshFunction DbgVshPrintf("%s", pXboxShaderDisassembly.str().c_str()); DbgVshPrintf("-----------------------\n"); + + DbgVshPrintf("-- HLSL conversion 1 ---\n"); + DbgVshPrintf("%s", BuildShader(pShader)); + DbgVshPrintf("-----------------------\n"); + VshConvertShader(pShader, bNoReservedConstants); VshWriteShader(pShader, pHostShaderDisassembly, pRecompiledDeclaration, TRUE); @@ -2764,7 +2769,7 @@ extern HRESULT EmuRecompileVshFunction DbgVshPrintf("%s", finalHostShader.c_str()); DbgVshPrintf("-----------------------\n"); - DbgVshPrintf("-- HLSL conversion ---\n"); + DbgVshPrintf("-- HLSL conversion 2 ---\n"); DbgVshPrintf("%s", BuildShader(pShader)); DbgVshPrintf("-----------------------\n"); @@ -2931,7 +2936,7 @@ std::string ToHlsl(VSH_IMD_OUTPUT& dest) { switch (dest.Type) { case IMD_OUTPUT_O: - hlsl << "xOut." << OReg_Name[dest.Address]; + hlsl << OReg_Name[dest.Address]; break; case IMD_OUTPUT_A0X: hlsl << "a"; @@ -2940,43 +2945,59 @@ std::string ToHlsl(VSH_IMD_OUTPUT& dest) { hlsl << "c[" << dest.Address << "]"; //todo we can output to constants...? break; case IMD_OUTPUT_R: - hlsl << "r[" << dest.Address << "]"; + hlsl << "r" << dest.Address; break; default: break; } + // If we're not writing all channels, write the mask + if (!(dest.Mask[0] && dest.Mask[1] && dest.Mask[2] && dest.Mask[3])) + { + hlsl << "." << (dest.Mask[0] ? "x" : "") + << (dest.Mask[1] ? "y" : "") + << (dest.Mask[2] ? "z" : "") + << (dest.Mask[3] ? "w" : ""); + } + return hlsl.str(); } -std::string ToHlsl(VSH_IMD_PARAMETER& parameter) +std::string ToHlsl(VSH_IMD_PARAMETER& paramMeta) { auto hlsl = std::stringstream(); - hlsl << (parameter.Parameter.Neg ? "-" : ""); + auto param = paramMeta.Parameter; - if (parameter.Parameter.ParameterType == PARAM_V) - hlsl << "xIn."; + hlsl << (param.Neg ? "-" : ""); - hlsl << VshGetRegisterName(parameter.Parameter.ParameterType); + if (param.ParameterType == PARAM_C){ + hlsl << "c"; - if (parameter.Parameter.ParameterType == PARAM_C && parameter.IndexesWithA0_X) - { + // We'll use the c() function instead of direct indexing // Only display the offset if it's not 0. - parameter.Parameter.Address - ? hlsl << "[a+" << parameter.Parameter.Address << "]" - : hlsl << "[a]"; + if (paramMeta.IndexesWithA0_X) { + param.Address + ? hlsl << "(a+" << param.Address << ")" + : hlsl << "(a)"; + } + else { + hlsl << "(" << param.Address << ")"; + } } - else - { - hlsl << "[" << parameter.Parameter.Address << "]"; + else if (param.ParameterType == PARAM_R && param.Address == 12) { + // Replace r12 with oPos + hlsl << "oPos"; + } + else { + hlsl << VshGetRegisterName(param.ParameterType) << param.Address; } // Only bother printing the swizzle if it is not .xyzw - if (!(parameter.Parameter.Swizzle[0] == SWIZZLE_X && - parameter.Parameter.Swizzle[1] == SWIZZLE_Y && - parameter.Parameter.Swizzle[2] == SWIZZLE_Z && - parameter.Parameter.Swizzle[3] == SWIZZLE_W)) + if (!(param.Swizzle[0] == SWIZZLE_X && + param.Swizzle[1] == SWIZZLE_Y && + param.Swizzle[2] == SWIZZLE_Z && + param.Swizzle[3] == SWIZZLE_W )) { hlsl << "."; @@ -2984,14 +3005,14 @@ std::string ToHlsl(VSH_IMD_PARAMETER& parameter) // "var.x" instead of "var.xxxx" auto lastDiffIndex = 0; for (int i = 1; i < 4; i++) { - if (parameter.Parameter.Swizzle[i] != parameter.Parameter.Swizzle[i-1]) + if (param.Swizzle[i] != param.Swizzle[i-1]) lastDiffIndex = i; } for (int i = 0; i <= lastDiffIndex; i++) { char Swizzle = '?'; - switch (parameter.Parameter.Swizzle[i]) + switch (param.Swizzle[i]) { case SWIZZLE_X: Swizzle = 'x'; @@ -3085,7 +3106,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << ToHlsl("dest = x_sge(src0, src1)", xboxInstruction); break; case MAC_ARL: - hlsl << ToHlsl("a = src0", xboxInstruction); + hlsl << ToHlsl("a = floor(src0)", xboxInstruction); break; default: EmuLog(LOG_LEVEL::WARNING, "TODO message"); From 389e50a57003e9cbc76404c8530abbe2a84703fe Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 30 Nov 2019 15:07:13 +1300 Subject: [PATCH 03/77] more hlsl --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 45 +++++++++++++++++++++++++++- src/core/hle/D3D8/XbVertexShader.cpp | 12 ++++---- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index e041e061f..e9e852021 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -21,7 +21,50 @@ struct VS_OUTPUT extern float4 hostConstants[192]; // Constant registers float4 c(int index); -static int a; // address register +static int a; // Xbox index register + +int toXboxIndex(src0) { + // The address register should be floored + // Due to rounding differences with the Xbox (and increased precision on PC?) + // some titles produce values just below the threshold of the next integer. + // We can add a small bias to make sure it's bumped over the threshold + // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) + return floor(src0) + 0.00000001; // TODO test +} + +float x_dph(float4 src0, float4 src1) { + return dot(src0, src1) + src1.w; +} + +float4 x_sge(float4 a, float4 b) { + float4 dest; + dest.x = (src0.x >= src1.x) ? 1.0f : 0.0f; + dest.y = (src0.y >= src1.y) ? 1.0f : 0.0f; + dest.z = (src0.z >= src1.z) ? 1.0f : 0.0f; + dest.w = (src0.w >= src1.w) ? 1.0f : 0.0f; + return dest; +} + +float4 x_sge(float4 a, float4 src1) { + float4 dest; + dest.x = (src0.x < src1.x) ? 1.0f : 0.0f; + dest.y = (src0.y < src1.y) ? 1.0f : 0.0f; + dest.z = (src0.z < src1.z) ? 1.0f : 0.0f; + dest.w = (src0.w < src1.w) ? 1.0f : 0.0f; + return dest; +} + +// Clamped reciprocal +float x_rcc(float src0) { + + // Calculate the reciprocal + float r = 1.0f / src0; + + // Clamp + return (r > 0) + ? clamp(r, 5.42101e-020, 1.84467e+019) + : clamp(r, -5.42101e-020, -1.84467e+019); +} VS_OUTPUT main(const VS_INPUT xIn) { diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index fe28fa876..1fd7ecdca 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -3106,7 +3106,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << ToHlsl("dest = x_sge(src0, src1)", xboxInstruction); break; case MAC_ARL: - hlsl << ToHlsl("a = floor(src0)", xboxInstruction); + hlsl << ToHlsl("a = toXboxIndex(src0)", xboxInstruction); break; default: EmuLog(LOG_LEVEL::WARNING, "TODO message"); @@ -3122,19 +3122,19 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << ToHlsl("dest = src0", xboxInstruction); break; case ILU_RCP: - hlsl << ToHlsl("// rcp TODO", xboxInstruction); + hlsl << ToHlsl("dest = rcp(src0)", xboxInstruction); break; case ILU_RCC: - hlsl << ToHlsl("// rcc TODO", xboxInstruction); + hlsl << ToHlsl("dest = rcc(src0)", xboxInstruction); break; case ILU_RSQ: - hlsl << ToHlsl("// rsq TODO", xboxInstruction); + hlsl << ToHlsl("dest = rsqrt(src0)", xboxInstruction); break; case ILU_EXP: - hlsl << ToHlsl("// exp TODO", xboxInstruction); + hlsl << ToHlsl("dest = x_exp(src0)", xboxInstruction); break; case ILU_LOG: - hlsl << ToHlsl("// log TODO", xboxInstruction); + hlsl << ToHlsl("dest = x_log(src0)", xboxInstruction); break; case ILU_LIT: hlsl << ToHlsl("// lit TODO", xboxInstruction); From 21712cb4de852b14c51a21d8c524058db5740b42 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 30 Nov 2019 21:03:37 +1300 Subject: [PATCH 04/77] Implement remaining xbox functions --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 23 +++++++++++++++++++---- src/core/hle/D3D8/XbVertexShader.cpp | 4 +++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index e9e852021..154c9309a 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -23,7 +23,7 @@ float4 c(int index); static int a; // Xbox index register -int toXboxIndex(src0) { +int toXboxIndex(float src0) { // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) // some titles produce values just below the threshold of the next integer. @@ -36,7 +36,7 @@ float x_dph(float4 src0, float4 src1) { return dot(src0, src1) + src1.w; } -float4 x_sge(float4 a, float4 b) { +float4 x_sge(float4 src0, float4 src1) { float4 dest; dest.x = (src0.x >= src1.x) ? 1.0f : 0.0f; dest.y = (src0.y >= src1.y) ? 1.0f : 0.0f; @@ -45,7 +45,7 @@ float4 x_sge(float4 a, float4 b) { return dest; } -float4 x_sge(float4 a, float4 src1) { +float4 x_sle(float4 src0, float4 src1) { float4 dest; dest.x = (src0.x < src1.x) ? 1.0f : 0.0f; dest.y = (src0.y < src1.y) ? 1.0f : 0.0f; @@ -66,6 +66,21 @@ float x_rcc(float src0) { : clamp(r, -5.42101e-020, -1.84467e+019); } +float4 x_lit(float4 src0) { + const float epsilon = 1.0 / 256.0; + float diffuse = src0.x; + float blinn = src0.y; + float specPower = clamp(src0.w, -(128 - epsilon), (128 - epsilon)); + + float4 dest; + dest.x = 1; + dest.y = max(diffuse, 0); + dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; + dest.w = 1; + + return dest; +} + VS_OUTPUT main(const VS_INPUT xIn) { // Input registers @@ -100,7 +115,7 @@ VS_OUTPUT main(const VS_INPUT xIn) // Insert Xbox shader here - + // // Copy variables to output struct VS_OUTPUT xOut; diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 1fd7ecdca..307a63f6a 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -3068,6 +3068,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { switch (xboxInstruction.MAC) { case MAC_NOP: + hlsl << "// NOP"; break; case MAC_MOV: hlsl << ToHlsl("dest = src0", xboxInstruction); @@ -3117,6 +3118,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { switch (xboxInstruction.ILU) { case ILU_NOP: + hlsl << "// NOP"; break; case ILU_MOV: hlsl << ToHlsl("dest = src0", xboxInstruction); @@ -3137,7 +3139,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << ToHlsl("dest = x_log(src0)", xboxInstruction); break; case ILU_LIT: - hlsl << ToHlsl("// lit TODO", xboxInstruction); + hlsl << ToHlsl("dest = x_lit(src0)", xboxInstruction); break; default: EmuLog(LOG_LEVEL::WARNING, "TODO message"); From c6242b798c1f07493a55d15c2e73bc14ac205caa Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 30 Nov 2019 21:04:05 +1300 Subject: [PATCH 05/77] remove junk files --- .../hle/D3D8/Direct3D9/HlslVertexShader.cpp | 225 ------------------ .../hle/D3D8/Direct3D9/HlslVertexShader.h | 5 - 2 files changed, 230 deletions(-) delete mode 100644 src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp delete mode 100644 src/core/hle/D3D8/Direct3D9/HlslVertexShader.h diff --git a/src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp b/src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp deleted file mode 100644 index 4d3a64bbc..000000000 --- a/src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp +++ /dev/null @@ -1,225 +0,0 @@ - -//#define LOG_PREFIX CXBXR_MODULE::VTXSH -//#include -// -////#include "HlslVertexShader.h" -// -////#include "XbD3D8Types.h" // For X_D3DVSDE_* -//#include -//#include -////#include -//#include -////#include - -//#pragma once -//#include "core\hle\D3D8\XbVertexShader.cpp" - -// HLSL outputs -std::array outputs = -{ - "oPos", - "oT0", - "oT1", - "oT2", - "oT3", - "oT4", - "oD0", - "oD1", - "oB0", - "oB1", - "oFog", - "oPts" -}; - -std::string ToHlsl(VSH_IMD_OUTPUT& dest) { - auto hlsl = std::stringstream(); - switch (dest.Type) - { - case IMD_OUTPUT_O: - hlsl << "out." << outputs[dest.Address]; - break; - case IMD_OUTPUT_A0X: - hlsl << "a"; - break; - case IMD_OUTPUT_C: - hlsl << "c[" << dest.Address << "]"; //todo we can output to constants...? - break; - case IMD_OUTPUT_R: - hlsl << "r[" << dest.Address << "]"; - break; - default: - break; - } - - return hlsl.str(); -} - -std::string ToHlsl(VSH_IMD_PARAMETER& parameter) -{ - auto hlsl = std::stringstream(); - - hlsl << (parameter.Parameter.Neg ? "-" : "") << VshGetRegisterName(parameter.Parameter.ParameterType); - - if (parameter.Parameter.ParameterType == PARAM_C && parameter.IndexesWithA0_X) - { - // Only display the offset if it's not 0. - parameter.Parameter.Address - ? hlsl << "[a+" << parameter.Parameter.Address << "]" - : hlsl << "[a]"; - } - else - { - hlsl << parameter.Parameter.Address; - } - - // Only bother printing the swizzle if it is not .xyzw - if (!(parameter.Parameter.Swizzle[0] == SWIZZLE_X && - parameter.Parameter.Swizzle[1] == SWIZZLE_Y && - parameter.Parameter.Swizzle[2] == SWIZZLE_Z && - parameter.Parameter.Swizzle[3] == SWIZZLE_W)) - { - hlsl << "."; - for (int i = 0; i < 4; i++) - { - char Swizzle = '?'; - switch (parameter.Parameter.Swizzle[i]) - { - case SWIZZLE_X: - Swizzle = 'x'; - break; - case SWIZZLE_Y: - Swizzle = 'y'; - break; - case SWIZZLE_Z: - Swizzle = 'z'; - break; - case SWIZZLE_W: - Swizzle = 'w'; - break; - } - hlsl << Swizzle; - } - } - - return hlsl.str(); -} - -std::string ToHlsl(std::string pattern, VSH_INTERMEDIATE_FORMAT& instruction) { - auto static dest = std::regex("dest"); - const std::regex src[] = { std::regex("src0"), std::regex("src1"), std::regex("src2") }; - - // TODO use simple string replace - // Warn if we didn't replace anything etc. - // Replace dest - auto hlsl = std::regex_replace(pattern, std::regex("dest"), ToHlsl(instruction.Output)); - - int srcNum = 0; - for (int i = 0; i < 3; i++) { // TODO remove magic number - if (instruction.Parameters[i].Active) { - hlsl = std::regex_replace(hlsl, std::regex(src[srcNum]), ToHlsl(instruction.Parameters[i])); - srcNum += 1; - } - } - - return hlsl; -} - -std::string BuildShader(VSH_XBOX_SHADER* pShader) { - - auto hlsl = std::stringstream(); - - - for (int i = 0; i < pShader->IntermediateCount; i++) { - - VSH_INTERMEDIATE_FORMAT xboxInstruction = pShader->Intermediate[i]; - - if (xboxInstruction.InstructionType == IMD_MAC) - { - switch (xboxInstruction.MAC) - { - case MAC_NOP: - break; - case MAC_MOV: - hlsl << ToHlsl("dest = src0", xboxInstruction); - break; - case MAC_MUL: - hlsl << ToHlsl("dest = src0 * src1", xboxInstruction); - break; - case MAC_ADD: - hlsl << ToHlsl("dest = src0 + src1", xboxInstruction); - break; - case MAC_MAD: - hlsl << ToHlsl("dest = mad(src0, src1, src2)", xboxInstruction); - break; - case MAC_DP3: - hlsl << ToHlsl("dest = dot(src0, src1)", xboxInstruction); - break; - case MAC_DPH: - hlsl << ToHlsl("dest = dot(src0, src1) + src1.w", xboxInstruction); - break; - case MAC_DP4: - hlsl << ToHlsl("dest = dot(src0, src1)", xboxInstruction); - break; - case MAC_DST: - hlsl << ToHlsl("dest = x_dst(src0, src1)", xboxInstruction); - break; - case MAC_MIN: - hlsl << ToHlsl("dest = min(src0, src1)", xboxInstruction); - break; - case MAC_MAX: - hlsl << ToHlsl("dest = max(src0, src1)", xboxInstruction); - break; - case MAC_SLT: - hlsl << ToHlsl("dest = x_slt(src0, src1)", xboxInstruction); - break; - case MAC_SGE: - hlsl << ToHlsl("dest = x_sge(src0, src1)", xboxInstruction); - break; - case MAC_ARL: - hlsl << ToHlsl("a = src0", xboxInstruction); - break; - default: - EmuLog(LOG_LEVEL::WARNING, "TODO message"); - } - } - else if (xboxInstruction.InstructionType == IMD_ILU) - { - switch (xboxInstruction.ILU) - { - case ILU_NOP: - break; - case ILU_MOV: - hlsl << ToHlsl("dest = src0", xboxInstruction); - break; - case ILU_RCP: - hlsl << ToHlsl("// rcp TODO", xboxInstruction); - break; - case ILU_RCC: - hlsl << ToHlsl("// rcc TODO", xboxInstruction); - break; - case ILU_RSQ: - hlsl << ToHlsl("// rsq TODO", xboxInstruction); - break; - case ILU_EXP: - hlsl << ToHlsl("// exp TODO", xboxInstruction); - break; - case ILU_LOG: - hlsl << ToHlsl("// log TODO", xboxInstruction); - break; - case ILU_LIT: - hlsl << ToHlsl("// lit TODO", xboxInstruction); - break; - default: - EmuLog(LOG_LEVEL::WARNING, "TODO message"); - } - } - else - { - EmuLog(LOG_LEVEL::WARNING, "TODO message"); - } - - // Finish the line - hlsl << "\n"; - } - return hlsl.str(); -} diff --git a/src/core/hle/D3D8/Direct3D9/HlslVertexShader.h b/src/core/hle/D3D8/Direct3D9/HlslVertexShader.h deleted file mode 100644 index 860dcdba2..000000000 --- a/src/core/hle/D3D8/Direct3D9/HlslVertexShader.h +++ /dev/null @@ -1,5 +0,0 @@ -// -//#pragma once -//#include "core\hle\D3D8\XbVertexShader.h" -//#include -//std::string BuildShader(VSH_XBOX_SHADER* pShader); From 3cd2f733324e862b828f59d3aeaf3afe42db6788 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 30 Nov 2019 21:25:21 +1300 Subject: [PATCH 06/77] Apply scaling hack --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 154c9309a..d090e0d61 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -117,6 +117,10 @@ VS_OUTPUT main(const VS_INPUT xIn) // + // TODO fix scaling + // Apply scaling hack from existing code + oPos = oPos * c(-38) + c(-37); + // Copy variables to output struct VS_OUTPUT xOut; From b24ea2309c06e463c99f3e2f69376367a21da055 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 30 Nov 2019 22:08:42 +1300 Subject: [PATCH 07/77] Fix HLSL debug logging --- src/core/hle/D3D8/XbVertexShader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 307a63f6a..c9cafc8d8 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2756,7 +2756,7 @@ extern HRESULT EmuRecompileVshFunction DbgVshPrintf("-- HLSL conversion 1 ---\n"); - DbgVshPrintf("%s", BuildShader(pShader)); + DbgVshPrintf(BuildShader(pShader).c_str()); DbgVshPrintf("-----------------------\n"); VshConvertShader(pShader, bNoReservedConstants); @@ -2770,7 +2770,7 @@ extern HRESULT EmuRecompileVshFunction DbgVshPrintf("-----------------------\n"); DbgVshPrintf("-- HLSL conversion 2 ---\n"); - DbgVshPrintf("%s", BuildShader(pShader)); + DbgVshPrintf(BuildShader(pShader).c_str()); DbgVshPrintf("-----------------------\n"); // HACK: Azurik. Prevent Direct3D from trying to assemble this. From 504b8ddba633125755634183c0df942f7fc15104 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 30 Nov 2019 23:46:56 +1300 Subject: [PATCH 08/77] Fix swizzle behaviour --- src/core/hle/D3D8/XbVertexShader.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index c9cafc8d8..cdb769349 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2993,7 +2993,8 @@ std::string ToHlsl(VSH_IMD_PARAMETER& paramMeta) hlsl << VshGetRegisterName(param.ParameterType) << param.Address; } - // Only bother printing the swizzle if it is not .xyzw + // Write the swizzle if we need to + // Only bother printing the swizzle if it is not the default .xyzw if (!(param.Swizzle[0] == SWIZZLE_X && param.Swizzle[1] == SWIZZLE_Y && param.Swizzle[2] == SWIZZLE_Z && @@ -3001,15 +3002,23 @@ std::string ToHlsl(VSH_IMD_PARAMETER& paramMeta) { hlsl << "."; - // Find the last difference, so we don't write repeated trailing swizzles - // "var.x" instead of "var.xxxx" - auto lastDiffIndex = 0; - for (int i = 1; i < 4; i++) { - if (param.Swizzle[i] != param.Swizzle[i-1]) - lastDiffIndex = i; + // We'll try to simplify swizzles if we can + int swizzles; + + // If all swizzles are the same, we only need to write one out + if (param.Swizzle[0] == param.Swizzle[1] && + param.Swizzle[0] == param.Swizzle[2] && + param.Swizzle[0] == param.Swizzle[3]) { + swizzles = 1; + } + else { + // We need to use the full swizzle + // Note we can't always remove trailing repeats, like in VS asm + // As it may change the type from float4, to float3 or float2 + swizzles = 4; } - for (int i = 0; i <= lastDiffIndex; i++) + for (int i = 0; i < swizzles; i++) { char Swizzle = '?'; switch (param.Swizzle[i]) From 780cb6c5782008ae3c754cf089f12d318e900332 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 30 Nov 2019 23:47:38 +1300 Subject: [PATCH 09/77] fix rcc typo --- src/core/hle/D3D8/XbVertexShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index cdb769349..a5dc49da5 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -3136,7 +3136,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << ToHlsl("dest = rcp(src0)", xboxInstruction); break; case ILU_RCC: - hlsl << ToHlsl("dest = rcc(src0)", xboxInstruction); + hlsl << ToHlsl("dest = x_rcc(src0)", xboxInstruction); break; case ILU_RSQ: hlsl << ToHlsl("dest = rsqrt(src0)", xboxInstruction); From 650174ea83372497c1b356b5c3c56a08ca802833 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sun, 1 Dec 2019 12:18:46 +1300 Subject: [PATCH 10/77] - Reverse screenspace transform properly - Move Xbox variable 'a' inside main - Fix rcc reversed clamp --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 43 +++++++++++++++++++---------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index d090e0d61..f12947ef1 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -18,10 +18,14 @@ struct VS_OUTPUT float4 oT3 : TEXCOORD3; // Texture Coord 0 }; -extern float4 hostConstants[192]; // Constant registers -float4 c(int index); +// Constant registers +extern float4 hostConstants[192]; -static int a; // Xbox index register +// Map Xbox [-96, 95] to Host [0, 191] +// Account for Xbox's negative constant indexes +float4 c(int index) { + return hostConstants[index + 96]; +} int toXboxIndex(float src0) { // The address register should be floored @@ -63,7 +67,7 @@ float x_rcc(float src0) { // Clamp return (r > 0) ? clamp(r, 5.42101e-020, 1.84467e+019) - : clamp(r, -5.42101e-020, -1.84467e+019); + : clamp(r, -1.84467e+019, -5.42101e-020); } float4 x_lit(float4 src0) { @@ -81,6 +85,22 @@ float4 x_lit(float4 src0) { return dest; } +float4 reverseScreenspaceTransform(float4 oPos) { + // On Xbox, oPos should contain the vertex position in screenspace + // Conventionally, each Xbox Vertex Shader includes instructions like this + // mul oPos.xyz, r12, c-38 + // +rcc r1.x, r12.w + // mad oPos.xyz, r12, r1.x, c-37 + // where c-37 and c-38 are reserved transform values + + // Lets hope c-37 and c-38 contain the conventional values + oPos.xyz -= c(-37); // reverse offset + oPos.xyz *= oPos.w; // reverse perspective divide + oPos.xyz /= c(-38); // reverse scale + + return oPos; +} + VS_OUTPUT main(const VS_INPUT xIn) { // Input registers @@ -90,6 +110,9 @@ VS_OUTPUT main(const VS_INPUT xIn) float4 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 1); // TODO correct? + // Xbox index register + int a; + // Output variables float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // TODO correct? @@ -117,14 +140,10 @@ VS_OUTPUT main(const VS_INPUT xIn) // - // TODO fix scaling - // Apply scaling hack from existing code - oPos = oPos * c(-38) + c(-37); - // Copy variables to output struct VS_OUTPUT xOut; - xOut.oPos = oPos; + xOut.oPos = reverseScreenspaceTransform(oPos); xOut.oD0 = oD0; xOut.oD1 = oD1; xOut.oFog = oFog; @@ -138,9 +157,3 @@ VS_OUTPUT main(const VS_INPUT xIn) return xOut; } - -// Account for Xbox's negative constant indexes -// Map Xbox [-96, 95] to Host [0, 191] -float4 c(int index) { - return hostConstants[index + 96]; -} From ef3b5d721735d4aecf26a35df3203179039ba217 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sun, 1 Dec 2019 12:30:06 +1300 Subject: [PATCH 11/77] Fix bias typo --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index f12947ef1..c5d69ff2f 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -33,8 +33,7 @@ int toXboxIndex(float src0) { // some titles produce values just below the threshold of the next integer. // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) - return floor(src0) + 0.00000001; // TODO test -} + return floor(src0 + 0.0001); // TODO test float x_dph(float4 src0, float4 src1) { return dot(src0, src1) + src1.w; From c3fbc46b7cbb33d0a7089c063830005f3ef23f2a Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sun, 1 Dec 2019 12:39:20 +1300 Subject: [PATCH 12/77] c() bounds check --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index c5d69ff2f..60fa8acc7 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -24,6 +24,10 @@ extern float4 hostConstants[192]; // Map Xbox [-96, 95] to Host [0, 191] // Account for Xbox's negative constant indexes float4 c(int index) { + // Out-of-range reads return 0 + if (index < -96 || index > 95) + return float4(0, 0, 0, 0); + return hostConstants[index + 96]; } From 65f6ff349781a08819ad82a2c7553cc43b1666ea Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sun, 1 Dec 2019 17:55:07 +1300 Subject: [PATCH 13/77] fix missing bracket --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 60fa8acc7..16f0f4212 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -38,6 +38,7 @@ int toXboxIndex(float src0) { // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) return floor(src0 + 0.0001); // TODO test +} float x_dph(float4 src0, float4 src1) { return dot(src0, src1) + src1.w; From 551516a52a0176fd64bf0727d351570d319b2180 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sun, 1 Dec 2019 20:33:54 +1300 Subject: [PATCH 14/77] fix typo --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 16f0f4212..0b16ec9b1 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -53,7 +53,7 @@ float4 x_sge(float4 src0, float4 src1) { return dest; } -float4 x_sle(float4 src0, float4 src1) { +float4 x_slt(float4 src0, float4 src1) { float4 dest; dest.x = (src0.x < src1.x) ? 1.0f : 0.0f; dest.y = (src0.y < src1.y) ? 1.0f : 0.0f; From 2102bffee22d9a8619830f0a8592d9f6cbd4e4d4 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Mon, 2 Dec 2019 21:09:11 +1300 Subject: [PATCH 15/77] Ensure dp3 and dph use float3 dot products --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 0b16ec9b1..64b276554 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -40,8 +40,16 @@ int toXboxIndex(float src0) { return floor(src0 + 0.0001); // TODO test } +float x_dp4(float4 src0, float4 src1) { + return dot(src0, src1); +} + +float x_dp3(float3 src0, float3 src1) { + return dot(src0, src1); +} + float x_dph(float4 src0, float4 src1) { - return dot(src0, src1) + src1.w; + return x_dp3(src0, src1) + src1.w; } float4 x_sge(float4 src0, float4 src1) { From 620c98da809955e8ac71ac8710500120d38903a5 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Mon, 2 Dec 2019 22:37:50 +1300 Subject: [PATCH 16/77] Interpret r12 as oPos --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 64b276554..8471f72db 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -1,3 +1,5 @@ +#define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox + struct VS_INPUT { float4 v[16] : TEXCOORD; From 8382adfaabe39981de7794546a53f05452100cd7 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Mon, 2 Dec 2019 22:47:21 +1300 Subject: [PATCH 17/77] fixup dp3, dph --- src/core/hle/D3D8/XbVertexShader.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index a5dc49da5..231ceaf2b 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -3089,16 +3089,16 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << ToHlsl("dest = src0 + src1", xboxInstruction); break; case MAC_MAD: - hlsl << ToHlsl("dest = mad(src0, src1, src2)", xboxInstruction); + hlsl << ToHlsl("dest = src0 * src1 + src2", xboxInstruction); break; case MAC_DP3: - hlsl << ToHlsl("dest = dot(src0, src1)", xboxInstruction); + hlsl << ToHlsl("dest = x_dp3(src0, src1)", xboxInstruction); break; case MAC_DPH: - hlsl << ToHlsl("dest = dot(src0, src1) + src1.w", xboxInstruction); + hlsl << ToHlsl("dest = x_dph(src0, src1)", xboxInstruction); break; case MAC_DP4: - hlsl << ToHlsl("dest = dot(src0, src1)", xboxInstruction); + hlsl << ToHlsl("dest = x_dp4(src0, src1)", xboxInstruction); break; case MAC_DST: hlsl << ToHlsl("dest = x_dst(src0, src1)", xboxInstruction); From fd1555535eee498bb6ff9ed61fa7f470ea3cffe0 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Mon, 2 Dec 2019 22:47:59 +1300 Subject: [PATCH 18/77] HACK Get HLSL compiling --- projects/cxbx/CMakeLists.txt | 1 + src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 54 +++++++++---------- src/core/hle/D3D8/XbVertexShader.cpp | 65 +++++++++++++---------- src/core/hle/D3D8/XbVertexShader.h | 4 +- 4 files changed, 69 insertions(+), 55 deletions(-) diff --git a/projects/cxbx/CMakeLists.txt b/projects/cxbx/CMakeLists.txt index d80e95868..d269042b6 100644 --- a/projects/cxbx/CMakeLists.txt +++ b/projects/cxbx/CMakeLists.txt @@ -160,6 +160,7 @@ endif() set(WINS_LIB legacy_stdio_definitions d3d9 + d3dcompiler dinput8 dxguid odbc32 diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 7722a4436..453c1ab43 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -4033,7 +4033,7 @@ HRESULT WINAPI XTL::EMUPATCH(D3DDevice_CreateVertexShader) g_pD3DDevice->SetVertexDeclaration(pCxbxVertexShader->pHostVertexDeclaration); DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexDeclaration"); - LPD3DXBUFFER pRecompiledBuffer = nullptr; + ID3DBlob *pRecompiledBuffer = nullptr; DWORD XboxFunctionSize = 0; DWORD *pRecompiledFunction = nullptr; if (SUCCEEDED(hRet) && pFunction) @@ -4074,36 +4074,36 @@ HRESULT WINAPI XTL::EMUPATCH(D3DDevice_CreateVertexShader) } //* Fallback to dummy shader. - if (FAILED(hRet)) - { - static const char dummy[] = - "vs.1.1\n" - "dcl_position v0\n" - "dp4 oPos.x, v0, c96\n" - "dp4 oPos.y, v0, c97\n" - "dp4 oPos.z, v0, c98\n" - "dp4 oPos.w, v0, c99\n"; + //if (FAILED(hRet)) + //{ + // static const char dummy[] = + // "vs.1.1\n" + // "dcl_position v0\n" + // "dp4 oPos.x, v0, c96\n" + // "dp4 oPos.y, v0, c97\n" + // "dp4 oPos.z, v0, c98\n" + // "dp4 oPos.w, v0, c99\n"; - EmuLog(LOG_LEVEL::WARNING, "Trying fallback:\n%s", dummy); + // EmuLog(LOG_LEVEL::WARNING, "Trying fallback:\n%s", dummy); - hRet = D3DXAssembleShader( - dummy, - strlen(dummy), - /*pDefines=*/nullptr, - /*pInclude=*/nullptr, - /*Flags=*/0, // Was D3DXASM_SKIPVALIDATION - /*ppCompiledShader=*/&pRecompiledBuffer, - /*ppCompilationErrors*/nullptr); + // hRet = D3DXAssembleShader( + // dummy, + // strlen(dummy), + // /*pDefines=*/nullptr, + // /*pInclude=*/nullptr, + // /*Flags=*/0, // Was D3DXASM_SKIPVALIDATION + // /*ppCompiledShader=*/&pRecompiledBuffer, + // /*ppCompilationErrors*/nullptr); - DEBUG_D3DRESULT(hRet, "D3DXAssembleShader"); + // DEBUG_D3DRESULT(hRet, "D3DXAssembleShader"); - hRet = g_pD3DDevice->CreateVertexShader - ( - (DWORD*)pRecompiledBuffer->GetBufferPointer(), - &pHostVertexShader - ); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexShader(fallback)"); - } + // hRet = g_pD3DDevice->CreateVertexShader + // ( + // (DWORD*)pRecompiledBuffer->GetBufferPointer(), + // &pHostVertexShader + // ); + // DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexShader(fallback)"); + //} if (pRecompiledBuffer != nullptr) { diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 231ceaf2b..83c3939c5 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2663,6 +2663,9 @@ std::string VshPostProcess_TruncateMovA(std::string shader) { return std::regex_replace(shader, movA, truncate); } +#include +#include + // Post process the shader as a string std::string VshPostProcess(std::string shader) { shader = VshPostProcess_Expp(shader); @@ -2680,14 +2683,14 @@ extern HRESULT EmuRecompileVshFunction D3DVERTEXELEMENT *pRecompiledDeclaration, bool *pbUseDeclarationOnly, DWORD *pXboxFunctionSize, - LPD3DXBUFFER *ppRecompiledShader + ID3DBlob **ppRecompiledShader ) { XTL::X_VSH_SHADER_HEADER *pXboxVertexShaderHeader = (XTL::X_VSH_SHADER_HEADER*)pXboxFunction; - DWORD *pToken; + DWORD *pToken; boolean EOI = false; - VSH_XBOX_SHADER *pShader = (VSH_XBOX_SHADER*)calloc(1, sizeof(VSH_XBOX_SHADER)); - LPD3DXBUFFER pErrors = nullptr; + VSH_XBOX_SHADER *pShader = (VSH_XBOX_SHADER*)calloc(1, sizeof(VSH_XBOX_SHADER)); + ID3DBlob *pErrors; HRESULT hRet = 0; // TODO: support this situation.. @@ -2749,19 +2752,29 @@ extern HRESULT EmuRecompileVshFunction std::stringstream& pXboxShaderDisassembly = std::stringstream(); std::stringstream& pHostShaderDisassembly = std::stringstream(); + //static std::ifstream t("Xb.hlsl"); + static std::ifstream t("C:\\Users\\OEM\\Desktop\\repos\\Cxbx-Reloaded\\src\\core\\hle\\D3D8\\Direct3D9\\Xb.hlsl"); + static std::string hlslTemplate((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + DbgVshPrintf("-- Before conversion --\n"); VshWriteShader(pShader, pXboxShaderDisassembly, pRecompiledDeclaration, FALSE); DbgVshPrintf("%s", pXboxShaderDisassembly.str().c_str()); DbgVshPrintf("-----------------------\n"); + auto hlslTest = BuildShader(pShader); + hlslTest = std::regex_replace(hlslTemplate, std::regex("// "), hlslTest); DbgVshPrintf("-- HLSL conversion 1 ---\n"); - DbgVshPrintf(BuildShader(pShader).c_str()); + DbgVshPrintf(hlslTest.c_str()); DbgVshPrintf("-----------------------\n"); VshConvertShader(pShader, bNoReservedConstants); VshWriteShader(pShader, pHostShaderDisassembly, pRecompiledDeclaration, TRUE); + //auto hlslTest = BuildShader(pShader); + //hlslTest = std::regex_replace(hlslTemplate, std::regex("// "), hlslTest); + // Post process the final shader auto finalHostShader = VshPostProcess(pHostShaderDisassembly.str()); @@ -2778,43 +2791,41 @@ extern HRESULT EmuRecompileVshFunction { EmuLog(LOG_LEVEL::WARNING, "Replacing empty vertex shader with fallback"); - static const char dummy[] = + finalHostShader = std::string( "vs.2.x\n" "dcl_position v0\n" "dp4 oPos.x, v0, c96\n" "dp4 oPos.y, v0, c97\n" "dp4 oPos.z, v0, c98\n" - "dp4 oPos.w, v0, c99\n"; - - hRet = D3DXAssembleShader( - dummy, - strlen(dummy), - /*pDefines=*/nullptr, - /*pInclude=*/nullptr, - /*Flags=*/0, // Was D3DXASM_SKIPVALIDATION, - /*ppCompiledShader=*/ppRecompiledShader, - /*ppCompilationErrors*/nullptr); + "dp4 oPos.w, v0, c99\n" + ); } else { - hRet = D3DXAssembleShader( - finalHostShader.c_str(), - finalHostShader.length(), - /*pDefines=*/nullptr, - /*pInclude=*/nullptr, - /*Flags=*/0, // Was D3DXASM_SKIPVALIDATION, - /*ppCompiledShader=*/ppRecompiledShader, - /*ppCompilationErrors*/&pErrors); + hRet = D3DCompile( + hlslTest.c_str(), + hlslTest.length(), + nullptr, // pSourceName + nullptr, // pDefines + nullptr, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + "vs_3_0", // shader profile + 0, // flags1 + 0, // flags2 + ppRecompiledShader, // out + &pErrors // ppErrorMsgs out + ); } if (FAILED(hRet)) { EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); - EmuLog(LOG_LEVEL::WARNING, "%s", pErrors->GetBufferPointer()); + EmuLog(LOG_LEVEL::WARNING, "%s", (char*)(pErrors)->GetBufferPointer()); + LOG_TEST_CASE((char *)pErrors->GetBufferPointer()); } - if( pErrors ) - pErrors->Release(); + if (pErrors) + (pErrors)->Release(); } free(pShader); diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index 88997a1b9..36b963a49 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -25,6 +25,8 @@ #ifndef XBVERTEXSHADER_H #define XBVERTEXSHADER_H +#include + #include "core\hle\D3D8\XbD3D8Types.h" // for X_VSH_MAX_ATTRIBUTES // Host vertex shader counts @@ -112,7 +114,7 @@ extern HRESULT EmuRecompileVshFunction D3DVERTEXELEMENT *pRecompiledDeclaration, bool *pbUseDeclarationOnly, DWORD *pXboxFunctionSize, - LPD3DXBUFFER *ppRecompiledShader + ID3DBlob **ppRecompiledShader ); extern void FreeVertexDynamicPatch(CxbxVertexShader *pVertexShader); From 19f0affd215e97c57fd96afbfa617a4ef7ec912a Mon Sep 17 00:00:00 2001 From: Luke Usher Date: Fri, 26 Jul 2019 14:15:48 +0100 Subject: [PATCH 19/77] Negate Screen Space Transforms instead of deleting from shader --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 141 +++++++++++++++------- src/core/hle/D3D8/XbD3D8Types.h | 18 ++- src/core/hle/D3D8/XbVertexShader.cpp | 8 +- 3 files changed, 116 insertions(+), 51 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 453c1ab43..8a8af1349 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -3742,20 +3742,88 @@ void ValidateRenderTargetDimensions(DWORD HostRenderTarget_Width, DWORD HostRend } } +float GetZScaleForSurface(XTL::X_D3DSurface* pSurface) +{ + // If no surface was present, fallback to 1 + if (pSurface == xbnullptr) { + return 1; + } + + auto format = GetXboxPixelContainerFormat(pSurface); + switch (format) { + case XTL::X_D3DFMT_D16: + case XTL::X_D3DFMT_LIN_D16: + return 65535.0f; + + case XTL::X_D3DFMT_D24S8: + case XTL::X_D3DFMT_LIN_D24S8: + return 16777215.0f; + + case XTL::X_D3DFMT_F16: + case XTL::X_D3DFMT_LIN_F16: + return 511.9375f; + + case XTL::X_D3DFMT_F24S8: + case XTL::X_D3DFMT_LIN_F24S8: + // 24bit floating point is close to precision maximum, so a lower value is used + // We can't use a double here since the vertex shader is only at float precision + return 1.0e30f; + } + + // Default to 1 if unknown depth format + LOG_TEST_CASE("GetZScaleForSurface: Unknown Xbox Depth Format"); + return 1; +} + +void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4]) +{ + // Store viewport offset and scale in constant registers + // used in shaders to transform back from screen space (Xbox Shader Output) to Clip space (Host Shader Output) + D3DVIEWPORT ViewPort; + g_pD3DDevice->GetViewport(&ViewPort); + + // Calculate Width/Height scale & offset + float scaleWidth = (2.0f / ViewPort.Width) * g_RenderScaleFactor; + float scaleHeight = (2.0f / ViewPort.Height) * g_RenderScaleFactor; + float offsetWidth = scaleWidth; + float offsetHeight = scaleHeight; + + // Calculate Z scale & offset + float zScale = GetZScaleForSurface(g_pXbox_DepthStencil); + float scaleZ = zScale * (ViewPort.MaxZ - ViewPort.MinZ); + float offsetZ = zScale * ViewPort.MinZ; + + vOffset[0] = offsetWidth + ViewPort.X; + vOffset[1] = offsetHeight + ViewPort.Y; + vOffset[2] = offsetZ; + vOffset[3] = 0.0f; + + vScale[0] = scaleWidth; + vScale[1] = scaleHeight; + vScale[2] = scaleZ; + vScale[3] = 0.0f; +} + void UpdateViewPortOffsetAndScaleConstants() { + float vOffset[4], vScale[4]; + GetViewPortOffsetAndScale(vOffset, vScale); + float vScaleReversed[4] = { 1.0f / (double)vScale[0], 1.0f / (double)vScale[1], 1.0f / (double)vScale[2], 0 }; + + g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_SCALE_MIRROR_INVERTED, vScaleReversed, 1); + g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_OFFSET_MIRROR, vOffset, 1); + + // Set 0 and 1 constant, used to compare and transform W when required + float ZeroOne[] = { 0, 1, 0, 0 }; + g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_SCALE_ZERO_ONE, ZeroOne, 1); + // Store viewport offset and scale in constant registers 58 (c-38) and // 59 (c-37) used for screen space transformation. + // We only do this if X_D3DSCM_NORESERVEDCONSTANTS is not set, since enabling this flag frees up these registers for shader used if (g_Xbox_VertexShaderConstantMode != X_D3DSCM_NORESERVEDCONSTANTS) { - D3DVIEWPORT ViewPort; - g_pD3DDevice->GetViewport(&ViewPort); - - float vScale[] = { (2.0f / ViewPort.Width) * g_RenderScaleFactor, (-2.0f / ViewPort.Height) * g_RenderScaleFactor, 0.0f, 0.0f }; - static float vOffset[] = { -1.0f, 1.0f, 0.0f, 1.0f }; - - g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_RESERVED_CONSTANT1_CORRECTED, vScale, 1); - g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_RESERVED_CONSTANT2_CORRECTED, vOffset, 1); + g_pD3DDevice->SetVertexShaderConstantF(X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION, vScale, 1); + g_pD3DDevice->SetVertexShaderConstantF(X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION, vOffset, 1); } } @@ -3892,37 +3960,20 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_GetViewportOffsetAndScale) // Test case : TMNT(R)2 // Test case : TMNT(R)3 -#if 0 - float fScaleX = 1.0f; - float fScaleY = 1.0f; - float fScaleZ = 1.0f; - float fOffsetX = 0.5 + 1.0/32; - float fOffsetY = 0.5 + 1.0/32; - X_D3DVIEWPORT8 Viewport; + float vOffset[4], vScale[4]; + GetViewPortOffsetAndScale(vOffset, vScale); - EMUPATCH(D3DDevice_GetViewport)(&Viewport); + pOffset->x = vOffset[0]; + pOffset->y = vOffset[1]; + pOffset->z = vOffset[2]; + pOffset->w = vOffset[3]; - pScale->x = (float)Viewport.Width * 0.5f * fScaleX; - pScale->y = (float)Viewport.Height * -0.5f * fScaleY; - pScale->z = (Viewport.MaxZ - Viewport.MinZ) * fScaleZ; - pScale->w = 0; - - pOffset->x = (float)Viewport.Width * fScaleX * 0.5f + (float)Viewport.X * fScaleX + fOffsetX; - pOffset->y = (float)Viewport.Height * fScaleY * 0.5f + (float)Viewport.Y * fScaleY + fOffsetY; - pOffset->z = Viewport.MinZ * fScaleZ; - pOffset->w = 0; -#else - pScale->x = 1.0f; - pScale->y = 1.0f; - pScale->z = 1.0f; - pScale->w = 1.0f; - - pOffset->x = 0.0f; - pOffset->y = 0.0f; - pOffset->z = 0.0f; - pOffset->w = 0.0f; -#endif + pScale->x = vScale[0]; + pScale->y = vScale[1]; + pScale->z = vScale[2]; + pScale->w = vScale[3]; } + // LTCG specific D3DDevice_SetShaderConstantMode function... // This uses a custom calling convention where parameter is passed in EAX VOID __stdcall XTL::EMUPATCH(D3DDevice_SetShaderConstantMode_0) @@ -4223,7 +4274,7 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant) // Xbox vertex shader constants range from -96 to 95 // The host does not support negative, so we adjust to 0..191 - Register += X_D3DVS_CONSTREG_BIAS; + Register += X_D3DSCM_CORRECTION; HRESULT hRet; hRet = g_pD3DDevice->SetVertexShaderConstantF( @@ -4254,7 +4305,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant1) // The XDK uses a macro to automatically adjust to 0..191 range // but D3DDevice_SetVertexShaderConstant expects -96..95 range // so we adjust before forwarding - EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, 1); + EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, 1); } // ****************************************************************** @@ -4271,7 +4322,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant1Fast) // The XDK uses a macro to automatically adjust to 0..191 range // but D3DDevice_SetVertexShaderConstant expects -96..95 range // so we adjust before forwarding - EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, 1); + EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, 1); } // ****************************************************************** @@ -4288,7 +4339,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant4) // The XDK uses a macro to automatically adjust to 0..191 range // but D3DDevice_SetVertexShaderConstant expects -96..95 range // so we adjust before forwarding - EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, 4); + EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, 4); } // ****************************************************************** @@ -4306,7 +4357,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstantNotInline) // The XDK uses a macro to automatically adjust to 0..191 range // but D3DDevice_SetVertexShaderConstant expects -96..95 range // so we adjust before forwarding - EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, ConstantCount / 4); + EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, ConstantCount / 4); } // ****************************************************************** @@ -4324,7 +4375,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstantNotInlineFast) // The XDK uses a macro to automatically adjust to 0..191 range // but D3DDevice_SetVertexShaderConstant expects -96..95 range // so we adjust before forwarding - EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, ConstantCount / 4); + EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, ConstantCount / 4); } // LTCG specific D3DDevice_SetTexture function... @@ -7028,7 +7079,7 @@ void CxbxUpdateNativeD3DResources() auto nv2a = g_NV2A->GetDeviceState(); for(int i = 0; i < X_D3DVS_CONSTREG_COUNT; i++) { // Skip vOffset and vScale constants, we don't want our values to be overwritten by accident - if (i == X_D3DVS_RESERVED_CONSTANT1_CORRECTED || i == X_D3DVS_RESERVED_CONSTANT2_CORRECTED) { + if (i == X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED || i == X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED) { continue; } @@ -7721,6 +7772,8 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetRenderTarget) DWORD XboxRenderTarget_Height = GetPixelContainerHeight(g_pXbox_RenderTarget); ValidateRenderTargetDimensions(HostRenderTarget_Width, HostRenderTarget_Height, XboxRenderTarget_Width, XboxRenderTarget_Height); } + + UpdateViewPortOffsetAndScaleConstants(); } // LTCG specific D3DDevice_SetPalette function... @@ -7967,7 +8020,7 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_GetVertexShaderConstant) // Xbox vertex shader constants range from -96 to 95 // The host does not support negative, so we adjust to 0..191 - Register += X_D3DVS_CONSTREG_BIAS; + Register += X_D3DSCM_CORRECTION; HRESULT hRet = g_pD3DDevice->GetVertexShaderConstantF ( diff --git a/src/core/hle/D3D8/XbD3D8Types.h b/src/core/hle/D3D8/XbD3D8Types.h index e730c91f3..a217d11f2 100644 --- a/src/core/hle/D3D8/XbD3D8Types.h +++ b/src/core/hle/D3D8/XbD3D8Types.h @@ -983,14 +983,26 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; #define X_D3DSCM_192CONSTANTSANDFIXEDPIPELINE 0x02 // Unsupported? #define X_D3DSCM_NORESERVEDCONSTANTS 0x10 // Do not reserve constant -38 and -37 -// Xbox vertex shader constants -#define X_D3DVS_CONSTREG_BIAS 96 // Add 96 to arrive at the range 0..191 (instead of -96..95) -#define X_D3DVS_CONSTREG_COUNT 192 +#define X_D3DSCM_RESERVED_CONSTANT_SCALE -38 // Becomes 58 after correction, contains Scale v +#define X_D3DSCM_RESERVED_CONSTANT_OFFSET -37 // Becomes 59 after correction, contains Offset + +#define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of 96..95) +#define X_D3DVS_CONSTREG_COUNT 192 #define X_D3DVS_RESERVED_CONSTANT1 -38 // Becomes 58 after correction, contains Scale v #define X_D3DVS_RESERVED_CONSTANT2 -37 // Becomes 59 after correction, contains Offset #define X_D3DVS_RESERVED_CONSTANT1_CORRECTED (X_D3DVS_RESERVED_CONSTANT1 + X_D3DVS_CONSTREG_BIAS) #define X_D3DVS_RESERVED_CONSTANT2_CORRECTED (X_D3DVS_RESERVED_CONSTANT2 + X_D3DVS_CONSTREG_BIAS) +// Special Registers, used to pass additional information to the shaders +#define X_D3DVS_CONSTREG_VERTEXDATA4F_BASE (X_D3DVS_CONSTREG_COUNT + 1) +#define X_D3DVS_CONSTREG_VERTEXDATA4F_END (X_D3DVS_CONSTREG_VERTEXDATA4F_BASE + 14) +#define X_D3DVS_VIEWPORT_SCALE_MIRROR_INVERTED (X_D3DVS_CONSTREG_VERTEXDATA4F_END + 1) +#define X_D3DVS_VIEWPORT_OFFSET_MIRROR (X_D3DVS_VIEWPORT_SCALE_MIRROR_INVERTED + 1) +#define X_D3DVS_VIEWPORT_SCALE_ZERO_ONE (X_D3DVS_VIEWPORT_OFFSET_MIRROR + 1) + +#define X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION) +#define X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION) + // Xbox vertex declaration token bit masks #define X_D3DVSD_MASK_TESSUV 0x10000000 #define X_D3DVSD_MASK_SKIP 0x10000000 // Skips (normally) dwords diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 83c3939c5..85b0e7679 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1474,7 +1474,7 @@ static void VshRemoveScreenSpaceInstructions(VSH_XBOX_SHADER *pShader) MulIntermediate.Parameters[1].Active = TRUE; MulIntermediate.Parameters[1].IndexesWithA0_X = FALSE; MulIntermediate.Parameters[1].Parameter.ParameterType = PARAM_C; - MulIntermediate.Parameters[1].Parameter.Address = ConvertCRegister(X_D3DVS_RESERVED_CONSTANT1_CORRECTED); + MulIntermediate.Parameters[1].Parameter.Address = ConvertCRegister(X_D3DSCM_RESERVED_CONSTANT_SCALE); MulIntermediate.Parameters[1].Parameter.Neg = FALSE; VshSetSwizzle(&MulIntermediate.Parameters[1], SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); MulIntermediate.Parameters[2].Active = FALSE; @@ -1487,7 +1487,7 @@ static void VshRemoveScreenSpaceInstructions(VSH_XBOX_SHADER *pShader) AddIntermediate.Output.Address = OREG_OPOS; AddIntermediate.Parameters[0].Parameter.ParameterType = PARAM_R; AddIntermediate.Parameters[0].Parameter.Address = X_VSH_TEMP_SCRATCH; - AddIntermediate.Parameters[1].Parameter.Address = ConvertCRegister(X_D3DVS_RESERVED_CONSTANT2_CORRECTED); + AddIntermediate.Parameters[1].Parameter.Address = ConvertCRegister(X_D3DSCM_RESERVED_CONSTANT_OFFSET); VshInsertIntermediate(pShader, &AddIntermediate, ++i); } } @@ -1603,7 +1603,7 @@ static boolean VshConvertShader(VSH_XBOX_SHADER *pShader, if (pIntermediate->Parameters[j].Parameter.ParameterType == PARAM_C) { //if(pIntermediate->Parameters[j].Parameter.Address < 0) - pIntermediate->Parameters[j].Parameter.Address += X_D3DVS_CONSTREG_BIAS; + pIntermediate->Parameters[j].Parameter.Address += X_D3DSCM_CORRECTION; } if (pIntermediate->Parameters[j].Parameter.ParameterType == PARAM_V) { @@ -1624,7 +1624,7 @@ static boolean VshConvertShader(VSH_XBOX_SHADER *pShader, if(pIntermediate->Output.Type == IMD_OUTPUT_C) { //if(pIntermediate->Output.Address < 0) - pIntermediate->Output.Address += X_D3DVS_CONSTREG_BIAS; + pIntermediate->Output.Address += X_D3DSCM_CORRECTION; } From 5d21af8a0e49485fa7afdb2260c199b0b4410223 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Tue, 3 Dec 2019 22:38:44 +1300 Subject: [PATCH 20/77] Implement more HLSL functions --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 45 +++++++++++++++++++++++++++- src/core/hle/D3D8/XbVertexShader.cpp | 16 +++++----- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 8471f72db..074aff892 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -33,7 +33,7 @@ float4 c(int index) { return hostConstants[index + 96]; } -int toXboxIndex(float src0) { +int x_arl(float src0) { // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) // some titles produce values just below the threshold of the next integer. @@ -42,6 +42,49 @@ int toXboxIndex(float src0) { return floor(src0 + 0.0001); // TODO test } +// TODO order functions +float4 x_mov(float4 src0) { + return src0; +} + +float4 x_add(float4 src0, float4 src1) { + return src0 + src1; +} + +float4 x_mul(float4 src0, float4 src1) { + return src0 * src1; +} + +float4 x_mad(float4 src0, float4 src1, float4 src3) { + return src0 * src1 + src3; +} + +float4 x_dst(float4 src0, float4 src1) { + return dst(src0, src1); +} + +float4 x_min(float4 src0, float4 src1) { + return src0 * src1; +} + +float4 x_max(float4 src0, float4 src1) { + return max(src0, src1); +} + +float4 x_exp(float src0) { + float x = pow(2, floor(src0)); + float fractional = frac(src0); + float power = pow(2, src0); + return float4(x, fractional, power, 1); +} + +float4 x_log(float src0) { + float exponent = floor(log(src0)); + float mantissa = 1 / pow(2, exponent); + float logResult = log(src0); + return float4(exponent, mantissa, logResult, 1); +} + float x_dp4(float4 src0, float4 src1) { return dot(src0, src1); } diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 85b0e7679..354c6c948 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2821,7 +2821,7 @@ extern HRESULT EmuRecompileVshFunction { EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); EmuLog(LOG_LEVEL::WARNING, "%s", (char*)(pErrors)->GetBufferPointer()); - LOG_TEST_CASE((char *)pErrors->GetBufferPointer()); + //LOG_TEST_CASE((char *)pErrors->GetBufferPointer()); } if (pErrors) @@ -3091,16 +3091,16 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << "// NOP"; break; case MAC_MOV: - hlsl << ToHlsl("dest = src0", xboxInstruction); + hlsl << ToHlsl("dest = x_mov(src0)", xboxInstruction); break; case MAC_MUL: - hlsl << ToHlsl("dest = src0 * src1", xboxInstruction); + hlsl << ToHlsl("dest = x_mul(src0, src1)", xboxInstruction); break; case MAC_ADD: - hlsl << ToHlsl("dest = src0 + src1", xboxInstruction); + hlsl << ToHlsl("dest = x_add(src0, src1)", xboxInstruction); break; case MAC_MAD: - hlsl << ToHlsl("dest = src0 * src1 + src2", xboxInstruction); + hlsl << ToHlsl("dest = x_mad(src0, src1, src2)", xboxInstruction); break; case MAC_DP3: hlsl << ToHlsl("dest = x_dp3(src0, src1)", xboxInstruction); @@ -3115,10 +3115,10 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << ToHlsl("dest = x_dst(src0, src1)", xboxInstruction); break; case MAC_MIN: - hlsl << ToHlsl("dest = min(src0, src1)", xboxInstruction); + hlsl << ToHlsl("dest = x_min(src0, src1)", xboxInstruction); break; case MAC_MAX: - hlsl << ToHlsl("dest = max(src0, src1)", xboxInstruction); + hlsl << ToHlsl("dest = x_max(src0, src1)", xboxInstruction); break; case MAC_SLT: hlsl << ToHlsl("dest = x_slt(src0, src1)", xboxInstruction); @@ -3127,7 +3127,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) { hlsl << ToHlsl("dest = x_sge(src0, src1)", xboxInstruction); break; case MAC_ARL: - hlsl << ToHlsl("a = toXboxIndex(src0)", xboxInstruction); + hlsl << ToHlsl("a = x_arl(src0)", xboxInstruction); break; default: EmuLog(LOG_LEVEL::WARNING, "TODO message"); From a258d5d7593eb374c78ccf8386ecf382b4122a20 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Tue, 3 Dec 2019 22:39:10 +1300 Subject: [PATCH 21/77] file location hack --- src/core/hle/D3D8/XbVertexShader.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 354c6c948..7dd8eac02 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2753,9 +2753,12 @@ extern HRESULT EmuRecompileVshFunction std::stringstream& pHostShaderDisassembly = std::stringstream(); //static std::ifstream t("Xb.hlsl"); - static std::ifstream t("C:\\Users\\OEM\\Desktop\\repos\\Cxbx-Reloaded\\src\\core\\hle\\D3D8\\Direct3D9\\Xb.hlsl"); - static std::string hlslTemplate((std::istreambuf_iterator(t)), + static std::ifstream t("..\\..\\..\\..\\src\\core\\hle\\D3D8\\Direct3D9\\Xb.hlsl"); + const static std::string hlslTemplate((std::istreambuf_iterator(t)), std::istreambuf_iterator()); + if (t.is_open()) { + t.close(); + } DbgVshPrintf("-- Before conversion --\n"); VshWriteShader(pShader, pXboxShaderDisassembly, pRecompiledDeclaration, FALSE); From e372a80a8db2df366a6f6ad5f6a38a1d1d847a18 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Tue, 3 Dec 2019 23:11:42 +1300 Subject: [PATCH 22/77] Workaround bad masks on single component outputs --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 074aff892..2167f8b6f 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -173,7 +173,9 @@ VS_OUTPUT main(const VS_INPUT xIn) // Output variables float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // TODO correct? - float oFog = 0, oPts = 0; + // Single component outputs + // x is write-only on Xbox. Use float4 as some games use incorrect masks + float4 oFog = 0, oPts = 0; // Initialize input variables v0 = xIn.v[0]; From c1ba5334552b9dbc3b1c7bf06014d2fc3dc1b0ee Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Tue, 3 Dec 2019 11:36:10 +0100 Subject: [PATCH 23/77] Include the hlsl code as a raw string Make all Xbox VSh opcodes consistent (using an x_ function or macro) Replace switch statements with table lookups, making the code much more compact --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 22 ++- src/core/hle/D3D8/XbVertexShader.cpp | 216 +++++++++------------------ 2 files changed, 88 insertions(+), 150 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 2167f8b6f..ebaaae08e 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -1,3 +1,6 @@ +// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : +R"DELIMITER( + #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox struct VS_INPUT @@ -33,6 +36,21 @@ float4 c(int index) { return hostConstants[index + 96]; } +// Generic macro's +//#define x_mov(src0) src0 + +// Macro's for MAC ('Multiply And Accumulate') opcodes +//#define x_mul(src0, src1) src0 * src1 +//#define x_add(src0, src1) src0 + src1 +//#define x_min(src0, src1) min(src0, src1) +//#define x_max(src0, src1) max(src0, src1) +//#define x_mad(src0, src1, src2) src0 * src1 + src2 + +// Macro's for ILU ('Inverse Logic Unit') opcodes +#define x_rcp(src0) rcp(src0) +#define x_rsq(src0) rsqrt(src0) + +// Xbox functions int x_arl(float src0) { // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) @@ -197,7 +215,7 @@ VS_OUTPUT main(const VS_INPUT xIn) // Insert Xbox shader here - // + // // !!MUST CORRESPOND WITH THE REGEX IN EmuRecompileVshFunction!! // Copy variables to output struct VS_OUTPUT xOut; @@ -216,3 +234,5 @@ VS_OUTPUT main(const VS_INPUT xIn) return xOut; } + +// )DELIMITER" /* This terminates the raw string" // */ diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 7dd8eac02..054165f0b 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -912,7 +912,7 @@ static void VshWriteShader(VSH_XBOX_SHADER *pShader, std::stringstream moveConstantsToTemporaries; pDisassembly << "; Input usage declarations --\n"; - for(int i = 0; i < RegVIsUsedByShader.size(); i++){ + for(size_t i = 0; i < RegVIsUsedByShader.size(); i++){ if (RegVIsUsedByShader[i]) { if (!RegVIsPresentInDeclaration[i]) { // Log test case and skip @@ -2752,13 +2752,9 @@ extern HRESULT EmuRecompileVshFunction std::stringstream& pXboxShaderDisassembly = std::stringstream(); std::stringstream& pHostShaderDisassembly = std::stringstream(); - //static std::ifstream t("Xb.hlsl"); - static std::ifstream t("..\\..\\..\\..\\src\\core\\hle\\D3D8\\Direct3D9\\Xb.hlsl"); - const static std::string hlslTemplate((std::istreambuf_iterator(t)), - std::istreambuf_iterator()); - if (t.is_open()) { - t.close(); - } + static std::string hlslTemplate = + #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string + ; DbgVshPrintf("-- Before conversion --\n"); VshWriteShader(pShader, pXboxShaderDisassembly, pRecompiledDeclaration, FALSE); @@ -2980,30 +2976,25 @@ std::string ToHlsl(VSH_IMD_OUTPUT& dest) { std::string ToHlsl(VSH_IMD_PARAMETER& paramMeta) { auto hlsl = std::stringstream(); - auto param = paramMeta.Parameter; - hlsl << (param.Neg ? "-" : ""); - - if (param.ParameterType == PARAM_C){ - hlsl << "c"; + if (param.Neg) { + hlsl << "-"; + } + if (param.ParameterType == PARAM_C) { // We'll use the c() function instead of direct indexing - // Only display the offset if it's not 0. if (paramMeta.IndexesWithA0_X) { - param.Address - ? hlsl << "(a+" << param.Address << ")" - : hlsl << "(a)"; + // Only display the offset if it's not 0. + if (param.Address != 0) { + hlsl << "c(a+" << param.Address << ")"; + } else { + hlsl << "c(a)"; + } + } else { + hlsl << "c(" << param.Address << ")"; } - else { - hlsl << "(" << param.Address << ")"; - } - } - else if (param.ParameterType == PARAM_R && param.Address == 12) { - // Replace r12 with oPos - hlsl << "oPos"; - } - else { + } else { hlsl << VshGetRegisterName(param.ParameterType) << param.Address; } @@ -3014,43 +3005,22 @@ std::string ToHlsl(VSH_IMD_PARAMETER& paramMeta) param.Swizzle[2] == SWIZZLE_Z && param.Swizzle[3] == SWIZZLE_W )) { - hlsl << "."; - // We'll try to simplify swizzles if we can - int swizzles; - // If all swizzles are the same, we only need to write one out - if (param.Swizzle[0] == param.Swizzle[1] && - param.Swizzle[0] == param.Swizzle[2] && - param.Swizzle[0] == param.Swizzle[3]) { - swizzles = 1; - } - else { - // We need to use the full swizzle - // Note we can't always remove trailing repeats, like in VS asm - // As it may change the type from float4, to float3 or float2 + unsigned swizzles = 1; + + // Otherwise, we need to use the full swizzle + if (param.Swizzle[0] != param.Swizzle[1] || + param.Swizzle[0] != param.Swizzle[2] || + param.Swizzle[0] != param.Swizzle[3]) { + // Note, we can't remove trailing repeats, like in VS asm, + // as it may change the type from float4 to float3, float2 or float1! swizzles = 4; } - for (int i = 0; i < swizzles; i++) - { - char Swizzle = '?'; - switch (param.Swizzle[i]) - { - case SWIZZLE_X: - Swizzle = 'x'; - break; - case SWIZZLE_Y: - Swizzle = 'y'; - break; - case SWIZZLE_Z: - Swizzle = 'z'; - break; - case SWIZZLE_W: - Swizzle = 'w'; - break; - } - hlsl << Swizzle; + hlsl << "."; + for (unsigned i = 0; i < swizzles; i++) { + hlsl << "xyzw"[param.Swizzle[i]]; } } @@ -3079,103 +3049,51 @@ std::string ToHlsl(std::string pattern, VSH_INTERMEDIATE_FORMAT& instruction) { std::string BuildShader(VSH_XBOX_SHADER* pShader) { + // HLSL strings for all MAC opcodes, indexed with VSH_MAC + static std::string VSH_MAC_HLSL[] = { + /*MAC_NOP:*/"// MAC_NOP\n", + /*MAC_MOV:*/"dest = x_mov(src0);\n", + /*MAC_MUL:*/"dest = x_mul(src0, src1);\n", + /*MAC_ADD:*/"dest = x_add(src0, src1);\n", + /*MAC_MAD:*/"dest = x_mad(src0, src1, src2);\n", + /*MAC_DP3:*/"dest = x_dp3(src0, src1);\n", + /*MAC_DPH:*/"dest = x_dph(src0, src1);\n", + /*MAC_DP4:*/"dest = x_dp4(src0, src1);\n", + /*MAC_DST:*/"dest = x_dst(src0, src1);\n", + /*MAC_MIN:*/"dest = x_min(src0, src1);\n", + /*MAC_MAX:*/"dest = x_max(src0, src1);\n", + /*MAC_SLT:*/"dest = x_slt(src0, src1);\n", + /*MAC_SGE:*/"dest = x_sge(src0, src1);\n", + /*MAC_ARL:*/"a = x_arl(src0);\n", // Note : For this MAC_ARL case, ToHlsl would always replace 'dest' with 'a', so we optimized this upfront + "// ??? VSH_MAC 14 ???;\n", + "// ??? VSH_MAC 15 ???;\n" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well + }; + + // HLSL strings for all ILU opcodes, indexed with VSH_ILU + static std::string VSH_ILU_HLSL[] = { + /*ILU_NOP:*/"// ILU_NOP\n", + /*ILU_MOV:*/"dest = x_mov(src0);\n", + /*ILU_RCP:*/"dest = x_rcp(src0);\n", + /*ILU_RCC:*/"dest = x_rcc(src0);\n", + /*ILU_RSQ:*/"dest = x_rsq(src0);\n", + /*ILU_EXP:*/"dest = x_exp(src0);\n", + /*ILU_LOG:*/"dest = x_log(src0);\n", + /*ILU_LIT:*/"dest = x_lit(src0);\n" // = 7 - all values of the 3 bits are used + }; + auto hlsl = std::stringstream(); - for (int i = 0; i < pShader->IntermediateCount; i++) { + VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i]; - VSH_INTERMEDIATE_FORMAT xboxInstruction = pShader->Intermediate[i]; - - if (xboxInstruction.InstructionType == IMD_MAC) - { - switch (xboxInstruction.MAC) - { - case MAC_NOP: - hlsl << "// NOP"; - break; - case MAC_MOV: - hlsl << ToHlsl("dest = x_mov(src0)", xboxInstruction); - break; - case MAC_MUL: - hlsl << ToHlsl("dest = x_mul(src0, src1)", xboxInstruction); - break; - case MAC_ADD: - hlsl << ToHlsl("dest = x_add(src0, src1)", xboxInstruction); - break; - case MAC_MAD: - hlsl << ToHlsl("dest = x_mad(src0, src1, src2)", xboxInstruction); - break; - case MAC_DP3: - hlsl << ToHlsl("dest = x_dp3(src0, src1)", xboxInstruction); - break; - case MAC_DPH: - hlsl << ToHlsl("dest = x_dph(src0, src1)", xboxInstruction); - break; - case MAC_DP4: - hlsl << ToHlsl("dest = x_dp4(src0, src1)", xboxInstruction); - break; - case MAC_DST: - hlsl << ToHlsl("dest = x_dst(src0, src1)", xboxInstruction); - break; - case MAC_MIN: - hlsl << ToHlsl("dest = x_min(src0, src1)", xboxInstruction); - break; - case MAC_MAX: - hlsl << ToHlsl("dest = x_max(src0, src1)", xboxInstruction); - break; - case MAC_SLT: - hlsl << ToHlsl("dest = x_slt(src0, src1)", xboxInstruction); - break; - case MAC_SGE: - hlsl << ToHlsl("dest = x_sge(src0, src1)", xboxInstruction); - break; - case MAC_ARL: - hlsl << ToHlsl("a = x_arl(src0)", xboxInstruction); - break; - default: - EmuLog(LOG_LEVEL::WARNING, "TODO message"); - } - } - else if (xboxInstruction.InstructionType == IMD_ILU) - { - switch (xboxInstruction.ILU) - { - case ILU_NOP: - hlsl << "// NOP"; - break; - case ILU_MOV: - hlsl << ToHlsl("dest = src0", xboxInstruction); - break; - case ILU_RCP: - hlsl << ToHlsl("dest = rcp(src0)", xboxInstruction); - break; - case ILU_RCC: - hlsl << ToHlsl("dest = x_rcc(src0)", xboxInstruction); - break; - case ILU_RSQ: - hlsl << ToHlsl("dest = rsqrt(src0)", xboxInstruction); - break; - case ILU_EXP: - hlsl << ToHlsl("dest = x_exp(src0)", xboxInstruction); - break; - case ILU_LOG: - hlsl << ToHlsl("dest = x_log(src0)", xboxInstruction); - break; - case ILU_LIT: - hlsl << ToHlsl("dest = x_lit(src0)", xboxInstruction); - break; - default: - EmuLog(LOG_LEVEL::WARNING, "TODO message"); - } - } - else - { + if (xboxInstruction.InstructionType == IMD_MAC) { + hlsl << ToHlsl(VSH_MAC_HLSL[xboxInstruction.MAC], xboxInstruction); + } else if (xboxInstruction.InstructionType == IMD_ILU) { + hlsl << ToHlsl(VSH_ILU_HLSL[xboxInstruction.ILU], xboxInstruction); + } else { EmuLog(LOG_LEVEL::WARNING, "TODO message"); } - - // Finish the line - hlsl << ";\n"; } + return hlsl.str(); } - From a9d3b5cdde5f2f7b42e967b1fdb56a890e053bd1 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Tue, 3 Dec 2019 11:54:07 +0100 Subject: [PATCH 24/77] Better hlsl consistency --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index ebaaae08e..4c7c325f5 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -1,8 +1,6 @@ // This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : R"DELIMITER( -#define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox - struct VS_INPUT { float4 v[16] : TEXCOORD; @@ -184,6 +182,7 @@ VS_OUTPUT main(const VS_INPUT xIn) // Temporary variables float4 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 1); // TODO correct? + #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox // Xbox index register int a; @@ -191,9 +190,11 @@ VS_OUTPUT main(const VS_INPUT xIn) // Output variables float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // TODO correct? + // Single component outputs // x is write-only on Xbox. Use float4 as some games use incorrect masks - float4 oFog = 0, oPts = 0; + float4 oFog, oPts; + oFog = oPts = 0; // Initialize input variables v0 = xIn.v[0]; From eaa7bbca41510842891e889f7bef6db9bb90d82e Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Wed, 4 Dec 2019 19:25:27 +1300 Subject: [PATCH 25/77] At debug log level, log shader warnings even if the shader compiled --- src/core/hle/D3D8/XbVertexShader.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 054165f0b..2b8691c75 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2819,12 +2819,15 @@ extern HRESULT EmuRecompileVshFunction if (FAILED(hRet)) { EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); - EmuLog(LOG_LEVEL::WARNING, "%s", (char*)(pErrors)->GetBufferPointer()); - //LOG_TEST_CASE((char *)pErrors->GetBufferPointer()); } - if (pErrors) + if (pErrors) { + // Determine the log level + auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; + // Log HLSL compiler errors + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors)->GetBufferPointer()); (pErrors)->Release(); + } } free(pShader); From b61bfd32afcecf54638e08062b1f21bcd148df17 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Wed, 4 Dec 2019 19:27:46 +1300 Subject: [PATCH 26/77] - Use MAC defines, remove functions - Fix truncation compiler warning in x_dp3 --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 61 ++++++++--------------------- 1 file changed, 17 insertions(+), 44 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 4c7c325f5..3d7621165 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -34,17 +34,18 @@ float4 c(int index) { return hostConstants[index + 96]; } -// Generic macro's -//#define x_mov(src0) src0 +// Generic macros +#define x_mov(src0) src0 -// Macro's for MAC ('Multiply And Accumulate') opcodes -//#define x_mul(src0, src1) src0 * src1 -//#define x_add(src0, src1) src0 + src1 -//#define x_min(src0, src1) min(src0, src1) -//#define x_max(src0, src1) max(src0, src1) -//#define x_mad(src0, src1, src2) src0 * src1 + src2 +// Macros for MAC ('Multiply And Accumulate') opcodes +#define x_mul(src0, src1) src0 * src1 +#define x_add(src0, src1) src0 + src1 +#define x_dst(src0, src1) dst(src0, src1) +#define x_min(src0, src1) min(src0, src1) +#define x_max(src0, src1) max(src0, src1) +#define x_mad(src0, src1, src2) src0 * src1 + src2 -// Macro's for ILU ('Inverse Logic Unit') opcodes +// Macros for ILU ('Inverse Logic Unit') opcodes #define x_rcp(src0) rcp(src0) #define x_rsq(src0) rsqrt(src0) @@ -55,36 +56,7 @@ int x_arl(float src0) { // some titles produce values just below the threshold of the next integer. // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) - return floor(src0 + 0.0001); // TODO test -} - -// TODO order functions -float4 x_mov(float4 src0) { - return src0; -} - -float4 x_add(float4 src0, float4 src1) { - return src0 + src1; -} - -float4 x_mul(float4 src0, float4 src1) { - return src0 * src1; -} - -float4 x_mad(float4 src0, float4 src1, float4 src3) { - return src0 * src1 + src3; -} - -float4 x_dst(float4 src0, float4 src1) { - return dst(src0, src1); -} - -float4 x_min(float4 src0, float4 src1) { - return src0 * src1; -} - -float4 x_max(float4 src0, float4 src1) { - return max(src0, src1); + return floor(src0 + 0.0001); } float4 x_exp(float src0) { @@ -101,16 +73,17 @@ float4 x_log(float src0) { return float4(exponent, mantissa, logResult, 1); } -float x_dp4(float4 src0, float4 src1) { - return dot(src0, src1); -} -float x_dp3(float3 src0, float3 src1) { - return dot(src0, src1); +float x_dp3(float4 src0, float4 src1) { + return dot(src0.xyz, src1.xyz); } float x_dph(float4 src0, float4 src1) { return x_dp3(src0, src1) + src1.w; +} + +float x_dp4(float4 src0, float4 src1) { + return dot(src0, src1); } float4 x_sge(float4 src0, float4 src1) { From 05f1f90603f40aecafb940908e671309a9fbc3ae Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Wed, 4 Dec 2019 21:33:01 +1300 Subject: [PATCH 27/77] fixup texcoord comments --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 3d7621165..40a26436b 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -16,9 +16,9 @@ struct VS_OUTPUT float4 oB0 : TEXCOORD4; // Backface Colour 0 float4 oB1 : TEXCOORD5; // Backface Colour 1 float4 oT0 : TEXCOORD0; // Texture Coord 0 - float4 oT1 : TEXCOORD1; // Texture Coord 0 - float4 oT2 : TEXCOORD2; // Texture Coord 0 - float4 oT3 : TEXCOORD3; // Texture Coord 0 + float4 oT1 : TEXCOORD1; // Texture Coord 1 + float4 oT2 : TEXCOORD2; // Texture Coord 2 + float4 oT3 : TEXCOORD3; // Texture Coord 3 }; // Constant registers From 59c8ee936fff15aca1914754904984819b5b8f74 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Wed, 4 Dec 2019 22:20:13 +1300 Subject: [PATCH 28/77] Add line numbers to shader debug --- src/core/hle/D3D8/XbVertexShader.cpp | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 2b8691c75..6ad8d2ca2 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2675,6 +2675,20 @@ std::string VshPostProcess(std::string shader) { extern std::string BuildShader(VSH_XBOX_SHADER* pShader); +std::string DebugPrependLineNumbers(std::string shaderString) { + std::stringstream shader(shaderString); + auto debugShader = std::stringstream(); + + int i = 1; + for (std::string line; std::getline(shader, line); ) { + auto lineNumber = std::to_string(i++); + auto paddedLine = line.insert(0, 3 - lineNumber.size(), ' '); + debugShader << "/* " << lineNumber << " */ " << line << "\n"; + } + + return debugShader.str(); +} + // recompile xbox vertex shader function extern HRESULT EmuRecompileVshFunction ( @@ -2765,7 +2779,7 @@ extern HRESULT EmuRecompileVshFunction hlslTest = std::regex_replace(hlslTemplate, std::regex("// "), hlslTest); DbgVshPrintf("-- HLSL conversion 1 ---\n"); - DbgVshPrintf(hlslTest.c_str()); + DbgVshPrintf(DebugPrependLineNumbers(hlslTest).c_str()); DbgVshPrintf("-----------------------\n"); VshConvertShader(pShader, bNoReservedConstants); @@ -2777,13 +2791,13 @@ extern HRESULT EmuRecompileVshFunction // Post process the final shader auto finalHostShader = VshPostProcess(pHostShaderDisassembly.str()); - DbgVshPrintf("-- After conversion ---\n"); - DbgVshPrintf("%s", finalHostShader.c_str()); - DbgVshPrintf("-----------------------\n"); + //DbgVshPrintf("-- After conversion ---\n"); + //DbgVshPrintf("%s", finalHostShader.c_str()); + //DbgVshPrintf("-----------------------\n"); - DbgVshPrintf("-- HLSL conversion 2 ---\n"); - DbgVshPrintf(BuildShader(pShader).c_str()); - DbgVshPrintf("-----------------------\n"); + //DbgVshPrintf("-- HLSL conversion 2 ---\n"); + //DbgVshPrintf(BuildShader(pShader).c_str()); + //DbgVshPrintf("-----------------------\n"); // HACK: Azurik. Prevent Direct3D from trying to assemble this. if(finalHostShader == "vs.2.x\n") From bd55f17f0d3c25c393fb5924fbfd17ccf4ca0f3c Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Wed, 4 Dec 2019 22:50:10 +1300 Subject: [PATCH 29/77] - Move ILU ops together - Some ops use the .w component by default. Ensure they will use w instead of x, if the default xyzw swizzle was used --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 53 +++++++++++++++++------------ 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 40a26436b..774c5afdc 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -45,11 +45,7 @@ float4 c(int index) { #define x_max(src0, src1) max(src0, src1) #define x_mad(src0, src1, src2) src0 * src1 + src2 -// Macros for ILU ('Inverse Logic Unit') opcodes -#define x_rcp(src0) rcp(src0) -#define x_rsq(src0) rsqrt(src0) - -// Xbox functions +// Xbox MAC functions int x_arl(float src0) { // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) @@ -59,20 +55,6 @@ int x_arl(float src0) { return floor(src0 + 0.0001); } -float4 x_exp(float src0) { - float x = pow(2, floor(src0)); - float fractional = frac(src0); - float power = pow(2, src0); - return float4(x, fractional, power, 1); -} - -float4 x_log(float src0) { - float exponent = floor(log(src0)); - float mantissa = 1 / pow(2, exponent); - float logResult = log(src0); - return float4(exponent, mantissa, logResult, 1); -} - float x_dp3(float4 src0, float4 src1) { return dot(src0.xyz, src1.xyz); @@ -102,18 +84,45 @@ float4 x_slt(float4 src0, float4 src1) { dest.z = (src0.z < src1.z) ? 1.0f : 0.0f; dest.w = (src0.w < src1.w) ? 1.0f : 0.0f; return dest; -} +} + +// Xbox ILU Functions + +float x_rcp(float4 src0) { + return 1 / src0.w; // use w component by default +} // Clamped reciprocal -float x_rcc(float src0) { +float x_rcc(float4 src0) { + float input = src0.w; // use w component by default // Calculate the reciprocal - float r = 1.0f / src0; + float r = 1.0f / input; // Clamp return (r > 0) ? clamp(r, 5.42101e-020, 1.84467e+019) : clamp(r, -1.84467e+019, -5.42101e-020); +} + +float x_rsq(float4 src0) { + return rsqrt(src0.w); // use w component by default +} + +float4 x_exp(float4 src0) { + float input = src0.w; // use w component by default + float x = pow(2, floor(input)); + float fractional = frac(input); + float power = pow(2, input); + return float4(x, fractional, power, 1); +} + +float4 x_log(float4 src0) { + float input = src0.w; // use w component by default + float exponent = floor(log(input)); + float mantissa = 1 / pow(2, exponent); + float logResult = log(input); + return float4(exponent, mantissa, logResult, 1); } float4 x_lit(float4 src0) { From 200b7c493e4dcfdb5fdc7e9e7d4dd6b48d45328f Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Wed, 4 Dec 2019 23:50:43 +1300 Subject: [PATCH 30/77] Use log2, exp2 --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 774c5afdc..91a19c1eb 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -111,17 +111,17 @@ float x_rsq(float4 src0) { float4 x_exp(float4 src0) { float input = src0.w; // use w component by default - float x = pow(2, floor(input)); + float x = exp2(floor(input)); float fractional = frac(input); - float power = pow(2, input); + float power = exp2(input); return float4(x, fractional, power, 1); } float4 x_log(float4 src0) { float input = src0.w; // use w component by default - float exponent = floor(log(input)); - float mantissa = 1 / pow(2, exponent); - float logResult = log(input); + float exponent = floor(log2(input)); + float mantissa = 1 / exp2(exponent); + float logResult = log2(input); return float4(exponent, mantissa, logResult, 1); } From c039562e7fad8b8393d3d6774cc8ce21901d83bb Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Thu, 5 Dec 2019 18:56:31 +0100 Subject: [PATCH 31/77] Reformat HLSL code Prefer HLSL functions over #defines Avoid implicit conversion warnings in HLSL template code Move scalar W selection towards HLSL helper function Apply scalar component fixup after instead of before conversion to HLSL --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 168 +++++++++++++++++---------- src/core/hle/D3D8/XbVertexShader.cpp | 36 +++--- 2 files changed, 117 insertions(+), 87 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 91a19c1eb..d82903ad2 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -26,7 +26,8 @@ extern float4 hostConstants[192]; // Map Xbox [-96, 95] to Host [0, 191] // Account for Xbox's negative constant indexes -float4 c(int index) { +float4 c(int index) +{ // Out-of-range reads return 0 if (index < -96 || index > 95) return float4(0, 0, 0, 0); @@ -34,19 +35,45 @@ float4 c(int index) { return hostConstants[index + 96]; } -// Generic macros -#define x_mov(src0) src0 +// Functions for MAC ('Multiply And Accumulate') opcodes -// Macros for MAC ('Multiply And Accumulate') opcodes -#define x_mul(src0, src1) src0 * src1 -#define x_add(src0, src1) src0 + src1 -#define x_dst(src0, src1) dst(src0, src1) -#define x_min(src0, src1) min(src0, src1) -#define x_max(src0, src1) max(src0, src1) -#define x_mad(src0, src1, src2) src0 * src1 + src2 +float4 x_mov(float4 src0) +{ + return src0; +} -// Xbox MAC functions -int x_arl(float src0) { +float4 x_mul(float4 src0, float4 src1) +{ + return src0 * src1; +} + +float4 x_add(float4 src0, float4 src1) +{ + return src0 + src1; +} + +float4 x_dst(float4 src0, float4 src1) +{ + return dst(src0, src1); +} + +float4 x_min(float4 src0, float4 src1) +{ + return min(src0, src1); +} + +float4 x_max(float4 src0, float4 src1) +{ + return max(src0, src1); +} + +float4 x_mad(float4 src0, float4 src1, float4 src2) +{ + return (src0 * src1) + src2; +} + +int x_arl(float src0) +{ // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) // some titles produce values just below the threshold of the next integer. @@ -55,78 +82,92 @@ int x_arl(float src0) { return floor(src0 + 0.0001); } - -float x_dp3(float4 src0, float4 src1) { +float x_dp3(float4 src0, float4 src1) +{ return dot(src0.xyz, src1.xyz); } -float x_dph(float4 src0, float4 src1) { +float x_dph(float4 src0, float4 src1) +{ return x_dp3(src0, src1) + src1.w; -} - -float x_dp4(float4 src0, float4 src1) { +} + +float x_dp4(float4 src0, float4 src1) +{ return dot(src0, src1); } -float4 x_sge(float4 src0, float4 src1) { +float4 x_sge(float4 src0, float4 src1) +{ float4 dest; - dest.x = (src0.x >= src1.x) ? 1.0f : 0.0f; - dest.y = (src0.y >= src1.y) ? 1.0f : 0.0f; - dest.z = (src0.z >= src1.z) ? 1.0f : 0.0f; - dest.w = (src0.w >= src1.w) ? 1.0f : 0.0f; + dest.x = (src0.x >= src1.x) ? 1 : 0; + dest.y = (src0.y >= src1.y) ? 1 : 0; + dest.z = (src0.z >= src1.z) ? 1 : 0; + dest.w = (src0.w >= src1.w) ? 1 : 0; return dest; } -float4 x_slt(float4 src0, float4 src1) { +float4 x_slt(float4 src0, float4 src1) +{ float4 dest; - dest.x = (src0.x < src1.x) ? 1.0f : 0.0f; - dest.y = (src0.y < src1.y) ? 1.0f : 0.0f; - dest.z = (src0.z < src1.z) ? 1.0f : 0.0f; - dest.w = (src0.w < src1.w) ? 1.0f : 0.0f; + dest.x = (src0.x < src1.x) ? 1 : 0; + dest.y = (src0.y < src1.y) ? 1 : 0; + dest.z = (src0.z < src1.z) ? 1 : 0; + dest.w = (src0.w < src1.w) ? 1 : 0; return dest; -} - -// Xbox ILU Functions - -float x_rcp(float4 src0) { - return 1 / src0.w; // use w component by default -} +} -// Clamped reciprocal -float x_rcc(float4 src0) { - float input = src0.w; // use w component by default +// Xbox ILU Functions + +float scalar_component(float4 src0) +{ + return src0.w; // use w component by default +} + +float x_rcp(float4 src0) +{ + return 1 / scalar_component(src0); +} + +float x_rcc(float4 src0) +{ + float input = scalar_component(src0); // Calculate the reciprocal - float r = 1.0f / input; + float r = 1 / input; // Clamp return (r > 0) - ? clamp(r, 5.42101e-020, 1.84467e+019) - : clamp(r, -1.84467e+019, -5.42101e-020); -} - -float x_rsq(float4 src0) { - return rsqrt(src0.w); // use w component by default -} - -float4 x_exp(float4 src0) { - float input = src0.w; // use w component by default + ? clamp(r, 5.42101e-020f, 1.84467e+019f) + : clamp(r, -1.84467e+019f, -5.42101e-020f); +} + +float x_rsq(float4 src0) +{ + return rsqrt(scalar_component(src0)); +} + +float4 x_exp(float4 src0) +{ + float input = scalar_component(src0); float x = exp2(floor(input)); float fractional = frac(input); float power = exp2(input); return float4(x, fractional, power, 1); } -float4 x_log(float4 src0) { - float input = src0.w; // use w component by default +float4 x_log(float4 src0) +{ + float input = scalar_component(src0); float exponent = floor(log2(input)); float mantissa = 1 / exp2(exponent); float logResult = log2(input); return float4(exponent, mantissa, logResult, 1); } -float4 x_lit(float4 src0) { - const float epsilon = 1.0 / 256.0; +float4 x_lit(float4 src0) +{ + const float epsilon = 1.0f / 256.0f; float diffuse = src0.x; float blinn = src0.y; float specPower = clamp(src0.w, -(128 - epsilon), (128 - epsilon)); @@ -140,7 +181,8 @@ float4 x_lit(float4 src0) { return dest; } -float4 reverseScreenspaceTransform(float4 oPos) { +float4 reverseScreenspaceTransform(float4 oPos) +{ // On Xbox, oPos should contain the vertex position in screenspace // Conventionally, each Xbox Vertex Shader includes instructions like this // mul oPos.xyz, r12, c-38 @@ -149,9 +191,9 @@ float4 reverseScreenspaceTransform(float4 oPos) { // where c-37 and c-38 are reserved transform values // Lets hope c-37 and c-38 contain the conventional values - oPos.xyz -= c(-37); // reverse offset + oPos.xyz -= (float3)c(-37); // reverse offset oPos.xyz *= oPos.w; // reverse perspective divide - oPos.xyz /= c(-38); // reverse scale + oPos.xyz /= (float3)c(-38); // reverse scale return oPos; } @@ -161,21 +203,19 @@ VS_OUTPUT main(const VS_INPUT xIn) // Input registers float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - // Temporary variables + // Temporary registers float4 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 1); // TODO correct? - #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox + #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox - // Xbox index register - int a; + // Address (index) register + int a0_x; // Output variables float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; - oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // TODO correct? - + oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // Pre-initialize w component of outputs to 1 // Single component outputs - // x is write-only on Xbox. Use float4 as some games use incorrect masks - float4 oFog, oPts; + float4 oFog, oPts; // x is write-only on Xbox. Use float4 as some games use incorrect masks oFog = oPts = 0; // Initialize input variables diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 6ad8d2ca2..a12ccf875 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1178,11 +1178,6 @@ static boolean VshAddInstructionMAC_ARL(VSH_SHADER_INSTRUCTION *pInstruction, return TRUE; } -// Dxbx addition : Scalar instructions reading from W should read from X instead -static boolean DxbxFixupScalarParameter(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - VSH_PARAMETER *pParameter); - static boolean VshAddInstructionILU_R(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader, boolean IsCombined) @@ -1260,9 +1255,6 @@ static void VshConvertToIntermediate(VSH_SHADER_INSTRUCTION *pInstruction, // +ILU boolean IsCombined = FALSE; - // Dxbx note : Scalar instructions read from C, but use X instead of W, fix that : - DxbxFixupScalarParameter(pInstruction, pShader, &pInstruction->C); - if(VshAddInstructionMAC_R(pInstruction, pShader, IsCombined)) { if(HasMACO(pInstruction) || @@ -1333,16 +1325,11 @@ static inline void VshSetOutputMask(VSH_IMD_OUTPUT* pOutput, } // Dxbx addition : Scalar instructions reading from W should read from X instead -static boolean DxbxFixupScalarParameter(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - VSH_PARAMETER *pParameter) +static void DxbxFixupScalarParameter(VSH_INTERMEDIATE_FORMAT *pInstruction, VSH_IMD_PARAMETER *pParameter) { - boolean Result; - // The DirectX vertex shader language specifies that the exp, log, rcc, rcp, and rsq instructions // all operate on the "w" component of the input. But the microcode versions of these instructions // actually operate on the "x" component of the input. - Result = false; // Test if this is a scalar instruction : if (pInstruction->ILU == ILU_RCP || @@ -1351,10 +1338,10 @@ static boolean DxbxFixupScalarParameter(VSH_SHADER_INSTRUCTION *pInstruction, pInstruction->ILU == ILU_LOG) { // Test if this parameter reads all components, including W (TODO : Or should we fixup any W reading swizzle?) : - if ((pParameter->Swizzle[0] == SWIZZLE_X) - && (pParameter->Swizzle[1] == SWIZZLE_Y) - && (pParameter->Swizzle[2] == SWIZZLE_Z) - && (pParameter->Swizzle[3] == SWIZZLE_W)) + if ((pParameter->Parameter.Swizzle[0] == SWIZZLE_X) + && (pParameter->Parameter.Swizzle[1] == SWIZZLE_Y) + && (pParameter->Parameter.Swizzle[2] == SWIZZLE_Z) + && (pParameter->Parameter.Swizzle[3] == SWIZZLE_W)) { // Change the read from W into a read from X (this fixes the XDK VolumeLight sample) : VshSetSwizzle(pParameter, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); @@ -1362,8 +1349,6 @@ static boolean DxbxFixupScalarParameter(VSH_SHADER_INSTRUCTION *pInstruction, Result = true; } } - - return Result; } /* @@ -1536,6 +1521,9 @@ static boolean VshConvertShader(VSH_XBOX_SHADER *pShader, // Combining not supported in vs.1.1 pIntermediate->IsCombined = FALSE; + // Dxbx note : Scalar instructions read from C, but use X instead of W, fix that : + DxbxFixupScalarParameter(pIntermediate, &(pIntermediate->Parameters[0])); + if(pIntermediate->Output.Type == IMD_OUTPUT_O && (pIntermediate->Output.Address == OREG_OPTS || pIntermediate->Output.Address == OREG_OFOG)) { // The PC shader assembler doesn't like masks on scalar registers @@ -2841,7 +2829,9 @@ extern HRESULT EmuRecompileVshFunction // Log HLSL compiler errors EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors)->GetBufferPointer()); (pErrors)->Release(); - } + } + + // TODO : If compiling hlsl failed, fall back on assembling finalHostShader? } free(pShader); @@ -3004,9 +2994,9 @@ std::string ToHlsl(VSH_IMD_PARAMETER& paramMeta) if (paramMeta.IndexesWithA0_X) { // Only display the offset if it's not 0. if (param.Address != 0) { - hlsl << "c(a+" << param.Address << ")"; + hlsl << "c(a0_x+" << param.Address << ")"; } else { - hlsl << "c(a)"; + hlsl << "c(a0_x)"; } } else { hlsl << "c(" << param.Address << ")"; From 46fbfad52dc76784aabf690f8162ff12c0ddcd2c Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Fri, 6 Dec 2019 19:03:54 +0100 Subject: [PATCH 32/77] Remove assembly version and replace regex with dedicated streaming --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 35 +- src/core/hle/D3D8/XbVertexShader.cpp | 1113 +++----------------------- 2 files changed, 130 insertions(+), 1018 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index d82903ad2..581044e2a 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -1,6 +1,6 @@ -// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : -R"DELIMITER( - +// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : +R"DELIMITER( +// Xbox HLSL vertex shader (template populated at runtime) struct VS_INPUT { float4 v[16] : TEXCOORD; @@ -21,19 +21,8 @@ struct VS_OUTPUT float4 oT3 : TEXCOORD3; // Texture Coord 3 }; -// Constant registers -extern float4 hostConstants[192]; - -// Map Xbox [-96, 95] to Host [0, 191] -// Account for Xbox's negative constant indexes -float4 c(int index) -{ - // Out-of-range reads return 0 - if (index < -96 || index > 95) - return float4(0, 0, 0, 0); - - return hostConstants[index + 96]; -} +// Xbox constant registers +extern uniform float4 c[192] : register(c0); // Functions for MAC ('Multiply And Accumulate') opcodes @@ -188,12 +177,12 @@ float4 reverseScreenspaceTransform(float4 oPos) // mul oPos.xyz, r12, c-38 // +rcc r1.x, r12.w // mad oPos.xyz, r12, r1.x, c-37 - // where c-37 and c-38 are reserved transform values + // where c-37 and c-38 are reserved transform values // Lets hope c-37 and c-38 contain the conventional values - oPos.xyz -= (float3)c(-37); // reverse offset + oPos.xyz -= (float3)c[-37 + 96]; // reverse offset oPos.xyz *= oPos.w; // reverse perspective divide - oPos.xyz /= (float3)c(-38); // reverse scale + oPos.xyz /= (float3)c[-38 + 96]; // reverse scale return oPos; } @@ -236,10 +225,8 @@ VS_OUTPUT main(const VS_INPUT xIn) v14 = xIn.v[14]; v15 = xIn.v[15]; - // Insert Xbox shader here - - // // !!MUST CORRESPOND WITH THE REGEX IN EmuRecompileVshFunction!! - + // Xbox shader program +// // Copy variables to output struct VS_OUTPUT xOut; @@ -258,4 +245,4 @@ VS_OUTPUT main(const VS_INPUT xIn) return xOut; } -// )DELIMITER" /* This terminates the raw string" // */ +// End of vertex shader )DELIMITER" /* This terminates the raw string" // */ diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index a12ccf875..bc0237bdc 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -41,16 +41,6 @@ #include #include -//#define CXBX_USE_VS30 // Separate the port to Vertex Shader model 3.0 from the port to Direct3D9 -#ifdef CXBX_USE_VS30 - #define VSH_MAX_INSTRUCTION_COUNT VSH_VS30_MAX_INSTRUCTION_COUNT // == 512 -#else - #define VSH_MAX_INSTRUCTION_COUNT VSH_VS2X_MAX_INSTRUCTION_COUNT // == 256 -#endif - -// Internal Vertex Shader version (mustn't conflict with any VERSION_XVS*) -#define VERSION_CXBX 0x7863 // 'cx' Cxbx vertex shader, not an official value, used in VshConvertShader() and VshWriteShader() - #define DbgVshPrintf \ LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) \ if(g_bPrintfOn) printf @@ -68,8 +58,6 @@ typedef enum _VSH_SWIZZLE } VSH_SWIZZLE; -typedef struct DxbxSwizzles { VSH_SWIZZLE s[4]; } DxbxSwizzles; - typedef DWORD DxbxMask, *PDxbxMask; @@ -423,38 +411,6 @@ static const VSH_OPCODE_PARAMS g_OpCodeParams_MAC[] = { /*ILU_NOP, MAC_ARL, */ TRUE, FALSE, FALSE } }; -static const char* MAC_OpCode[] = -{ - "nop", - "mov", - "mul", - "add", - "mad", - "dp3", - "dph", - "dp4", - "dst", - "min", - "max", - "slt", - "sge", - "mova", // really "arl" Dxbx note : Alias for 'mov a0.x' - "???", - "???" -}; - -static const char* ILU_OpCode[] = -{ - "nop", - "mov", - "rcp", - "rcc", - "rsq", - "expp", // The Xbox EXPP instruction behaves like vs_1_1 - "log", - "lit" -}; - static const char* OReg_Name[] = { "oPos", @@ -475,8 +431,7 @@ static const char* OReg_Name[] = "a0.x" }; -std::array RegVIsPresentInDeclaration; -std::array RegVIsUsedByShader; +// TODO : Reinstate and use : std::array RegVIsPresentInDeclaration; /* TODO : map non-FVF Xbox vertex shader handle to CxbxVertexShader (a struct containing a host Xbox vertex shader handle and the original members) std::unordered_map g_CxbxVertexShaders; @@ -683,85 +638,6 @@ static char *VshGetRegisterName(VSH_PARAMETER_TYPE ParameterType) } } -static void VshWriteOutputMask(boolean *OutputMask, - std::stringstream& pDisassembly) -{ - if(OutputMask[0] && OutputMask[1] && OutputMask[2] && OutputMask[3]) - { - // All components are there, no need to print the mask - return; - } - pDisassembly << "." << (OutputMask[0] ? "x" : "") - << (OutputMask[1] ? "y" : "") - << (OutputMask[2] ? "z" : "") - << (OutputMask[3] ? "w" : ""); -} - -static void VshWriteParameter(VSH_IMD_PARAMETER *pParameter, - std::stringstream& pDisassembly) -{ - pDisassembly << ", " << (pParameter->Parameter.Neg ? "-" : "") << VshGetRegisterName(pParameter->Parameter.ParameterType); - if(pParameter->Parameter.ParameterType == PARAM_C && pParameter->IndexesWithA0_X) - { - // Only display the offset if it's not 0. - if(pParameter->Parameter.Address) - { - pDisassembly << "[a0.x+" << pParameter->Parameter.Address << "]"; - } - else - { - pDisassembly << "[a0.x]"; - } - } - else - { - pDisassembly << pParameter->Parameter.Address; - } - // Only bother printing the swizzle if it is not .xyzw - if(!(pParameter->Parameter.Swizzle[0] == SWIZZLE_X && - pParameter->Parameter.Swizzle[1] == SWIZZLE_Y && - pParameter->Parameter.Swizzle[2] == SWIZZLE_Z && - pParameter->Parameter.Swizzle[3] == SWIZZLE_W)) - { - int i; - - pDisassembly << "."; - for (i = 0; i < 4; i++) - { - int j; - char Swizzle = '?'; - switch(pParameter->Parameter.Swizzle[i]) - { - case SWIZZLE_X: - Swizzle = 'x'; - break; - case SWIZZLE_Y: - Swizzle = 'y'; - break; - case SWIZZLE_Z: - Swizzle = 'z'; - break; - case SWIZZLE_W: - Swizzle = 'w'; - break; - } - pDisassembly << Swizzle; - for (j = i; j < 4; j++) - { - if(pParameter->Parameter.Swizzle[i] != pParameter->Parameter.Swizzle[j]) - { - break; - } - } - if(j == 4) - { - break; - } - } - } -} - - char* XboxVertexRegisterAsString(DWORD VertexRegister) { switch (VertexRegister) @@ -870,151 +746,6 @@ D3DDECLUSAGE Xb2PCRegisterType extern D3DCAPS g_D3DCaps; -enum { - X_VSH_TEMPORARY_REGISTER_COUNT = 12, // For Xbox temporary registers r0 to r11, mapped one-on-one to host - X_VSH_TEMP_OPOS = 12, // Used as intermediate storage for oPos (which Xbox can read through r12) - // X_VSH_TEMP_OFOG, // Enable once we treat oFog similar to oPos - // X_VSH_TEMP_OPTS, // Enable once we treat oPts similar to oPos - X_VSH_TEMP_SCRATCH = 13, // Used as intermediate storage in Xbox-to-host opcode conversion - X_VSH_TEMP_VERTEXREGBASE = 14 // Used for (1 up to 16) SetVertexData4f constants -}; - -static void VshWriteShader(VSH_XBOX_SHADER *pShader, - std::stringstream& pDisassembly, - D3DVERTEXELEMENT *pRecompiled, - boolean Truncate) -{ - switch(pShader->ShaderHeader.Version) - { - case VERSION_CXBX: -#ifdef CXBX_USE_VS30 - pDisassembly << "vs.3.0\n"; -#else - pDisassembly << "vs.2.x\n"; -#endif - break; - case VERSION_XVS: - pDisassembly << "xvs.1.1\n"; - break; - case VERSION_XVSS: - pDisassembly << "xvss.1.1\n"; - break; - case VERSION_XVSW: - pDisassembly << "xvsw.1.1\n"; - break; - default: - break; - } - - // Ensure extra temporary registers are assigned at the beginning, as stand-ins for undeclared v registers - // Abusing the truncate flag, which implies we're writing the final host shader - if (Truncate) { - std::stringstream moveConstantsToTemporaries; - - pDisassembly << "; Input usage declarations --\n"; - for(size_t i = 0; i < RegVIsUsedByShader.size(); i++){ - if (RegVIsUsedByShader[i]) { - if (!RegVIsPresentInDeclaration[i]) { - // Log test case and skip - // Any registers hitting this critera were already replaced with constant/temporary reads - // To correctly use the values given in SetVertexData4f. - // We need to move these constant values to temporaries so they can be used as input alongside other constants! - // We count down from the highest available on the host because Xbox titles don't use values that high, and we read from c192 (one above maximum Xbox c191 constant) and up - moveConstantsToTemporaries << "mov r" << (X_VSH_TEMP_VERTEXREGBASE + i) << ", c" << (CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + i) << "\n"; - // test-case : Blade II (before menu's) - // test-case : Namco Museum 50th Anniversary (at boot) - // test-case : Pac-Man World 2 (at boot) - // test-case : The Simpsons Road Rage (leaving menu's, before entering in-game) - // test-case : The SpongeBob SquarePants Movie (before menu's) - LOG_TEST_CASE("Shader uses undeclared Vertex Input Registers"); - continue; - } - - // dcl_texcoord can be useds for any user-defined data - // We need this because there is no reliable way to detect the real usage - // Xbox has no concept of 'usage types', it only requires a list of attribute register numbers. - // So we treat them all as 'user-defined' - pDisassembly << "dcl_texcoord" << i << " v" << i << "\n"; - } - } - - pDisassembly << moveConstantsToTemporaries.str(); - } - - for (int i = 0; i < pShader->IntermediateCount && (i < VSH_MAX_INSTRUCTION_COUNT || !Truncate); i++) - { - VSH_INTERMEDIATE_FORMAT *pIntermediate = &pShader->Intermediate[i]; - - if(i == VSH_MAX_INSTRUCTION_COUNT) - { - pDisassembly << "; -- Passing the truncation limit --\n"; - } - - // Writing combining sign if neccessary - if(pIntermediate->IsCombined) - { - pDisassembly << "+"; - } - - // Print the op code - if(pIntermediate->InstructionType == IMD_MAC) - { - // Dxbx addition : Safeguard against incorrect MAC opcodes : - if (pIntermediate->MAC > MAC_ARL) - pDisassembly << "??? "; - else - pDisassembly << MAC_OpCode[pIntermediate->MAC] << " "; - } - else // IMD_ILU - { - // Dxbx addition : Safeguard against incorrect ILU opcodes : - if (pIntermediate->ILU > ILU_LIT) - pDisassembly << "??? "; - else - pDisassembly << ILU_OpCode[pIntermediate->ILU] << " "; - } - - // Print the output parameter - if(pIntermediate->Output.Type == IMD_OUTPUT_A0X) - { - pDisassembly << "a0.x"; - } - else - { - switch(pIntermediate->Output.Type) - { - case IMD_OUTPUT_C: - pDisassembly << "c" << pIntermediate->Output.Address; - break; - case IMD_OUTPUT_R: - pDisassembly << "r" << pIntermediate->Output.Address; - break; - case IMD_OUTPUT_O: - // Dxbx addition : Safeguard against incorrect VSH_OREG_NAME values : - if ((int)pIntermediate->Output.Address > OREG_A0X) - ; // don't add anything - else - pDisassembly << OReg_Name[pIntermediate->Output.Address]; - break; - default: - CxbxKrnlCleanup("Invalid output register in vertex shader!"); - break; - } - VshWriteOutputMask(pIntermediate->Output.Mask, pDisassembly); - } - // Print the parameters - for (int p = 0; p < 3; p++) - { - VSH_IMD_PARAMETER *pParameter = &pIntermediate->Parameters[p]; - if(pParameter->Active) - { - VshWriteParameter(pParameter, pDisassembly); - } - } - pDisassembly << "\n"; - } -} - static void VshAddParameter(VSH_PARAMETER *pParameter, boolean a0x, VSH_IMD_PARAMETER *pIntermediateParameter) @@ -1312,423 +1043,6 @@ static inline void VshSetSwizzle(VSH_IMD_PARAMETER *pParameter, VshSetSwizzle(&pParameter->Parameter, x, y, z, w); } -static inline void VshSetOutputMask(VSH_IMD_OUTPUT* pOutput, - boolean MaskX, - boolean MaskY, - boolean MaskZ, - boolean MaskW) -{ - pOutput->Mask[0] = MaskX; - pOutput->Mask[1] = MaskY; - pOutput->Mask[2] = MaskZ; - pOutput->Mask[3] = MaskW; -} - -// Dxbx addition : Scalar instructions reading from W should read from X instead -static void DxbxFixupScalarParameter(VSH_INTERMEDIATE_FORMAT *pInstruction, VSH_IMD_PARAMETER *pParameter) -{ - // The DirectX vertex shader language specifies that the exp, log, rcc, rcp, and rsq instructions - // all operate on the "w" component of the input. But the microcode versions of these instructions - // actually operate on the "x" component of the input. - - // Test if this is a scalar instruction : - if (pInstruction->ILU == ILU_RCP || - pInstruction->ILU == ILU_RCC || - pInstruction->ILU == ILU_RSQ || - pInstruction->ILU == ILU_LOG) - { - // Test if this parameter reads all components, including W (TODO : Or should we fixup any W reading swizzle?) : - if ((pParameter->Parameter.Swizzle[0] == SWIZZLE_X) - && (pParameter->Parameter.Swizzle[1] == SWIZZLE_Y) - && (pParameter->Parameter.Swizzle[2] == SWIZZLE_Z) - && (pParameter->Parameter.Swizzle[3] == SWIZZLE_W)) - { - // Change the read from W into a read from X (this fixes the XDK VolumeLight sample) : - VshSetSwizzle(pParameter, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); - DbgVshPrintf("Dxbx fixup on scalar instruction applied; Changed read of uninitialized W into a read of X!\n"); - Result = true; - } - } -} - -/* - mul oPos.xyz, r12, c-38 - +rcc r1.x, r12.w - - mad oPos.xyz, r12, r1.x, c-37 -*/ -static void VshRemoveScreenSpaceInstructions(VSH_XBOX_SHADER *pShader) -{ - int16_t PosC38 = -1; - int deleted = 0; - - for (int i = 0; i < pShader->IntermediateCount; i++) - { - VSH_INTERMEDIATE_FORMAT* pIntermediate = &pShader->Intermediate[i]; - - for (int k = 0; k < 3; k++) - { - if(pIntermediate->Parameters[k].Active) - { - if(pIntermediate->Parameters[k].Parameter.ParameterType == PARAM_C && - !pIntermediate->Parameters[k].IndexesWithA0_X) - { - if(pIntermediate->Parameters[k].Parameter.Address == -37) - { - // Found c-37, remove the instruction - if(k == 2 && - pIntermediate->Parameters[1].Active && - pIntermediate->Parameters[1].Parameter.ParameterType == PARAM_R) - { - DbgVshPrintf("PosC38 = %d i = %d\n", PosC38, i); - for (int j = (i-1); j >= 0; j--) - { - VSH_INTERMEDIATE_FORMAT* pIntermediate1W = &pShader->Intermediate[j]; - // Time to start searching for +rcc r#.x, r12.w - if(pIntermediate1W->InstructionType == IMD_ILU && - pIntermediate1W->ILU == ILU_RCC && - pIntermediate1W->Output.Type == IMD_OUTPUT_R && - pIntermediate1W->Output.Address == - pIntermediate->Parameters[1].Parameter.Address) - { - DbgVshPrintf("Deleted +rcc r1.x, r12.w\n"); - VshDeleteIntermediate(pShader, j); - deleted++; - i--; - //j--; - break; - } - } - } - VshDeleteIntermediate(pShader, i); - deleted++; - i--; - DbgVshPrintf("Deleted mad oPos.xyz, r12, r1.x, c-37\n"); - break; - } - else if(pIntermediate->Parameters[k].Parameter.Address == -38) - { - VshDeleteIntermediate(pShader, i); - PosC38 = i; - deleted++; - i--; - DbgVshPrintf("Deleted mul oPos.xyz, r12, c-38\n"); - } - } - } - } - } - - // If we couldn't find the generic screen space transformation we're - // assuming that the shader writes direct screen coordinates that must be - // normalized. This hack will fail if (a) the shader uses custom screen - // space transformation, (b) reads r13 or r12 after we have written to - // them, or (c) doesn't reserve c-38 and c-37 for scale and offset. - if(deleted != 3) - { - EmuLog(LOG_LEVEL::WARNING, "Applying screen space vertex shader patching hack!"); - for (int i = 0; i < pShader->IntermediateCount; i++) - { - VSH_INTERMEDIATE_FORMAT* pIntermediate = &pShader->Intermediate[i]; - - // Find instructions outputting to oPos. - if( pIntermediate->Output.Type == IMD_OUTPUT_O && - pIntermediate->Output.Address == OREG_OPOS) - { - // Redirect output to r12. - pIntermediate->Output.Type = IMD_OUTPUT_R; - pIntermediate->Output.Address = X_VSH_TEMP_OPOS; - - // Scale r12 to r13. (mul r13.[mask], r12, c58) - VSH_INTERMEDIATE_FORMAT MulIntermediate; - MulIntermediate.IsCombined = FALSE; - MulIntermediate.InstructionType = IMD_MAC; - MulIntermediate.MAC = MAC_MUL; - MulIntermediate.Output.Type = IMD_OUTPUT_R; - MulIntermediate.Output.Address = X_VSH_TEMP_SCRATCH; - MulIntermediate.Output.Mask[0] = pIntermediate->Output.Mask[0]; - MulIntermediate.Output.Mask[1] = pIntermediate->Output.Mask[1]; - MulIntermediate.Output.Mask[2] = pIntermediate->Output.Mask[2]; - MulIntermediate.Output.Mask[3] = pIntermediate->Output.Mask[3]; - MulIntermediate.Parameters[0].Active = TRUE; - MulIntermediate.Parameters[0].IndexesWithA0_X = FALSE; - MulIntermediate.Parameters[0].Parameter.ParameterType = PARAM_R; - MulIntermediate.Parameters[0].Parameter.Address = X_VSH_TEMP_OPOS; - MulIntermediate.Parameters[0].Parameter.Neg = FALSE; - VshSetSwizzle(&MulIntermediate.Parameters[0], SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); - MulIntermediate.Parameters[1].Active = TRUE; - MulIntermediate.Parameters[1].IndexesWithA0_X = FALSE; - MulIntermediate.Parameters[1].Parameter.ParameterType = PARAM_C; - MulIntermediate.Parameters[1].Parameter.Address = ConvertCRegister(X_D3DSCM_RESERVED_CONSTANT_SCALE); - MulIntermediate.Parameters[1].Parameter.Neg = FALSE; - VshSetSwizzle(&MulIntermediate.Parameters[1], SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); - MulIntermediate.Parameters[2].Active = FALSE; - VshInsertIntermediate(pShader, &MulIntermediate, ++i); - - // Add offset with r13 to oPos (add oPos.[mask], r13, c59) - VSH_INTERMEDIATE_FORMAT AddIntermediate = MulIntermediate; - AddIntermediate.MAC = MAC_ADD; - AddIntermediate.Output.Type = IMD_OUTPUT_O; - AddIntermediate.Output.Address = OREG_OPOS; - AddIntermediate.Parameters[0].Parameter.ParameterType = PARAM_R; - AddIntermediate.Parameters[0].Parameter.Address = X_VSH_TEMP_SCRATCH; - AddIntermediate.Parameters[1].Parameter.Address = ConvertCRegister(X_D3DSCM_RESERVED_CONSTANT_OFFSET); - VshInsertIntermediate(pShader, &AddIntermediate, ++i); - } - } - } -} - -static void VshRemoveUnsupportedObRegisters(VSH_XBOX_SHADER *pShader) -{ - int deleted = 0; - - for (int i = 0; i < pShader->IntermediateCount; i++) { - VSH_INTERMEDIATE_FORMAT* pIntermediate = &pShader->Intermediate[i]; - - if (pIntermediate->Output.Type == IMD_OUTPUT_O && (pIntermediate->Output.Address == OREG_OB0 || pIntermediate->Output.Address == OREG_OB1)) { - DbgVshPrintf("Deleted unsupported write to %s\n", OReg_Name[pIntermediate->Output.Address]); - VshDeleteIntermediate(pShader, i); - i--; - } - } -} - -// Converts the intermediate format vertex shader to DirectX 8/9 format -static boolean VshConvertShader(VSH_XBOX_SHADER *pShader, - boolean bNoReservedConstants -) -{ - // TODO: What about state shaders and such? - - pShader->ShaderHeader.Version = VERSION_CXBX; - - // Search for the screen space instructions, and remove them - if(!bNoReservedConstants) - { - VshRemoveScreenSpaceInstructions(pShader); - } - - // Windows does not support back-facing colours, so we remove them from the shaders - // Test Case: Panzer Dragoon Orta - VshRemoveUnsupportedObRegisters(pShader); - - // TODO: Add routine for compacting r register usage so that at least one is freed (two if dph and r12) - - for (int i = 0; i < pShader->IntermediateCount; i++) - { - VSH_INTERMEDIATE_FORMAT* pIntermediate = &pShader->Intermediate[i]; - // Combining not supported in vs.1.1 - pIntermediate->IsCombined = FALSE; - - // Dxbx note : Scalar instructions read from C, but use X instead of W, fix that : - DxbxFixupScalarParameter(pIntermediate, &(pIntermediate->Parameters[0])); - - if(pIntermediate->Output.Type == IMD_OUTPUT_O && (pIntermediate->Output.Address == OREG_OPTS || pIntermediate->Output.Address == OREG_OFOG)) - { - // The PC shader assembler doesn't like masks on scalar registers - VshSetOutputMask(&pIntermediate->Output, TRUE, TRUE, TRUE, TRUE); - - // Fix when mad or mov to a scaler input does not use a replicate swizzle - // MAD Test case: Panzer Dragoon Orta - // MOV Test case: DOA3, Mechassault (Const) - // MUL Test case: Amped - // TODO Previously we applied this fix for specified instructions - // When should we not apply the correction? - if (true) - { - // Clear all but the first swizzle for each parameter - // TODO: Is this sufficient? Perhaps we need to be smart about which swizzle to select - for (int param = 0; param < 3; param++) { - pIntermediate->Parameters[param].Parameter.Swizzle[1] = pIntermediate->Parameters[param].Parameter.Swizzle[0]; - pIntermediate->Parameters[param].Parameter.Swizzle[2] = pIntermediate->Parameters[param].Parameter.Swizzle[0]; - pIntermediate->Parameters[param].Parameter.Swizzle[3] = pIntermediate->Parameters[param].Parameter.Swizzle[0]; - } - } - } - - if(pIntermediate->InstructionType == IMD_ILU && pIntermediate->ILU == ILU_RCC) - { - // Convert rcc to rcp - pIntermediate->ILU = ILU_RCP; - } - - auto sw = pIntermediate->Parameters[0].Parameter.Swizzle; - bool singleSwizzle = sw[0] == sw[1] && sw[1] == sw[2] && sw[2] == sw[3]; - - if (!singleSwizzle) - { - // Fix when RSQ reads from unitialized components - if (pIntermediate->InstructionType == IMD_ILU && pIntermediate->ILU == ILU_RSQ) { - int swizzle = (pIntermediate->Output.Mask[0]) | (pIntermediate->Output.Mask[1] << 1) | (pIntermediate->Output.Mask[2] << 2) | (pIntermediate->Output.Mask[3] << 3); - switch (swizzle) - { - case 1: - VshSetSwizzle(&pIntermediate->Parameters[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); - break; - case 2: - VshSetSwizzle(&pIntermediate->Parameters[0], SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y); - break; - case 4: - VshSetSwizzle(&pIntermediate->Parameters[0], SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z); - break; - case 8: - VshSetSwizzle(&pIntermediate->Parameters[0], SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - break; - case 15: - default: - LOG_TEST_CASE("rsq instruction with invalid swizzle"); - break; - } - } - } - - for (int j = 0; j < 3; j++) - { - //if(pIntermediate->Parameters[j].Active) - { - // Make constant registers range from 0 to 191 instead of -96 to 95 - if (pIntermediate->Parameters[j].Parameter.ParameterType == PARAM_C) - { - //if(pIntermediate->Parameters[j].Parameter.Address < 0) - pIntermediate->Parameters[j].Parameter.Address += X_D3DSCM_CORRECTION; - } - - if (pIntermediate->Parameters[j].Parameter.ParameterType == PARAM_V) { - RegVIsUsedByShader[pIntermediate->Parameters[j].Parameter.Address] = TRUE; - - if (!RegVIsPresentInDeclaration[pIntermediate->Parameters[j].Parameter.Address]) { - // This vertex register was not declared and therefore is not present within the Vertex Data object - // We read from temporary registers instead, that are set based on constants, in-turn, set by SetVertexData4f - // We count down from the highest available on the host because Xbox titles don't use values that high, and we read from c192 (one above maximum Xbox c191 constant) and up - pIntermediate->Parameters[j].Parameter.ParameterType = PARAM_R; - pIntermediate->Parameters[j].Parameter.Address += X_VSH_TEMP_VERTEXREGBASE; - } - } - } - } - - // Make constant registers range from 0 to 191 instead of -96 to 95 - if(pIntermediate->Output.Type == IMD_OUTPUT_C) - { - //if(pIntermediate->Output.Address < 0) - pIntermediate->Output.Address += X_D3DSCM_CORRECTION; - } - - - - if(pIntermediate->InstructionType == IMD_MAC && pIntermediate->MAC == MAC_DPH) - { - // 2010/01/12 - revel8n - attempt to alleviate conversion issues relate to the dph instruction - - // Replace dph with dp3 and add - if(pIntermediate->Output.Type != IMD_OUTPUT_R) - { - // TODO: Complete dph support - EmuLog(LOG_LEVEL::WARNING, "Can't simulate dph for other than output r registers (yet)"); - - VSH_INTERMEDIATE_FORMAT TmpIntermediate = *pIntermediate; - - // modify the instructions - pIntermediate->MAC = MAC_DP3; - pIntermediate->Output.Type = IMD_OUTPUT_R; - pIntermediate->Output.Address = X_VSH_TEMP_SCRATCH; - VshSetOutputMask(&pIntermediate->Output, TRUE, TRUE, TRUE, TRUE); - - TmpIntermediate.MAC = MAC_ADD; - TmpIntermediate.Parameters[0].IndexesWithA0_X = FALSE; - TmpIntermediate.Parameters[0].Parameter.ParameterType = PARAM_R; - TmpIntermediate.Parameters[0].Parameter.Address = X_VSH_TEMP_SCRATCH; - TmpIntermediate.Parameters[0].Parameter.Neg = FALSE; - VshSetSwizzle(&TmpIntermediate.Parameters[1], SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - // Is this output register a scalar - if (TmpIntermediate.Output.Type == IMD_OUTPUT_O) { - if ((TmpIntermediate.Output.Address == OREG_OFOG) || (TmpIntermediate.Output.Address == OREG_OPTS)) { - // This fixes test case "Namco Museum 50th Anniversary" - // The PC shader assembler doesn't like masks on scalar registers - VshSetOutputMask(&TmpIntermediate.Output, TRUE, TRUE, TRUE, TRUE); - // Make the first source parameter use the w swizzle too - VshSetSwizzle(&TmpIntermediate.Parameters[0], SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - } - } - - VshInsertIntermediate(pShader, &TmpIntermediate, i + 1); - } - else - { - VSH_INTERMEDIATE_FORMAT TmpIntermediate = *pIntermediate; - pIntermediate->MAC = MAC_DP3; - TmpIntermediate.MAC = MAC_ADD; - TmpIntermediate.Parameters[0].IndexesWithA0_X = FALSE; - TmpIntermediate.Parameters[0].Parameter.ParameterType = PARAM_R; - TmpIntermediate.Parameters[0].Parameter.Address = TmpIntermediate.Output.Address; - TmpIntermediate.Parameters[0].Parameter.Neg = FALSE; - - int swizzle = (TmpIntermediate.Output.Mask[0]) | (TmpIntermediate.Output.Mask[1] << 1) | (TmpIntermediate.Output.Mask[2] << 2) | (TmpIntermediate.Output.Mask[3] << 3); - switch (swizzle) - { - case 1: - VshSetSwizzle(&TmpIntermediate.Parameters[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); - break; - case 2: - VshSetSwizzle(&TmpIntermediate.Parameters[0], SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y); - break; - case 4: - VshSetSwizzle(&TmpIntermediate.Parameters[0], SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z); - break; - case 8: - VshSetSwizzle(&TmpIntermediate.Parameters[0], SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - break; - case 15: - default: - VshSetSwizzle(&TmpIntermediate.Parameters[0], SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); - break; - } - //VshSetSwizzle(&TmpIntermediate.Parameters[0], SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - VshSetSwizzle(&TmpIntermediate.Parameters[1], SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); - //VshSetOutputMask(&TmpIntermediate.Output, FALSE, FALSE, FALSE, TRUE); - VshInsertIntermediate(pShader, &TmpIntermediate, i + 1); - } - i++; - } - } - - // Replace all writes to oPos with writes to r12. - // On Xbox, oPos is read/write, essentially a 13th temporary register - // In DX9 and vs_2_x, oPos is write-only, so we'll use r12 in its place - // And at the end of the shader, write r12 to oPos - for (int i = 0; i < pShader->IntermediateCount; i++) { - VSH_INTERMEDIATE_FORMAT* pIntermediate = &pShader->Intermediate[i]; - if (pIntermediate->Output.Type == IMD_OUTPUT_O && pIntermediate->Output.Address == OREG_OPOS) { - pIntermediate->Output.Type = IMD_OUTPUT_R; - pIntermediate->Output.Address = X_VSH_TEMP_OPOS; - } - } - - // We append one additional instruction to mov oPos, r12 - // TODO : *IF* r12 is not read after the final write to oPos, - // it'd be more efficient to not-replace this oPos write by r12, - // so that we don't have to do the following : - VSH_INTERMEDIATE_FORMAT MovIntermediate = {0}; - MovIntermediate.MAC = MAC_MOV; - MovIntermediate.Output.Type = IMD_OUTPUT_O; - MovIntermediate.Output.Address = OREG_OPOS; - MovIntermediate.Output.Mask[0] = true; - MovIntermediate.Output.Mask[1] = true; - MovIntermediate.Output.Mask[2] = true; - MovIntermediate.Output.Mask[3] = true; - MovIntermediate.Parameters[0].Active = true; - MovIntermediate.Parameters[0].Parameter.ParameterType = PARAM_R; - MovIntermediate.Parameters[0].Parameter.Address = X_VSH_TEMP_OPOS; - MovIntermediate.Parameters[0].Parameter.Swizzle[0] = SWIZZLE_X; - MovIntermediate.Parameters[0].Parameter.Swizzle[1] = SWIZZLE_Y; - MovIntermediate.Parameters[0].Parameter.Swizzle[2] = SWIZZLE_Z; - MovIntermediate.Parameters[0].Parameter.Swizzle[3] = SWIZZLE_W; - VshInsertIntermediate(pShader, &MovIntermediate, pShader->IntermediateCount); - - return TRUE; -} - // **************************************************************************** // * Vertex shader declaration recompiler // **************************************************************************** @@ -2126,7 +1440,7 @@ private: } // Add this register to the list of declared registers - RegVIsPresentInDeclaration[VertexRegister] = true; + // TODO : Reinstate and use : RegVIsPresentInDeclaration[VertexRegister] = true; DWORD XboxVertexElementDataType = (*pXboxToken & X_D3DVSD_DATATYPEMASK) >> X_D3DVSD_DATATYPESHIFT; WORD XboxVertexElementByteSize = 0; @@ -2453,7 +1767,7 @@ public: IsFixedFunction = bIsFixedFunction; - RegVIsPresentInDeclaration.fill(false); + // TODO : Reinstate and use : RegVIsPresentInDeclaration.fill(false); // First of all some info: // We have to figure out which flags are set and then @@ -2520,147 +1834,6 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration return pHostVertexElements; } -std::string UsingScratch(std::string input) { - return std::regex_replace(input, std::regex("tmp"), "r" + std::to_string(X_VSH_TEMP_SCRATCH)); -} - -// Xbox expp seems to behave the as vs_1_1 -std::string VshPostProcess_Expp(std::string shader) { - // Find usages of exp with each swizzle - // If there's no swizzle, we should still match so we do the calculation - // for all components - const auto xbox_expp_x = std::regex("expp (\\w\\d\\d?)(\\.x)?, (.+)$"); - const auto xbox_expp_y = std::regex("expp (\\w\\d\\d?)(\\.y)?, (.+)$"); - const auto xbox_expp_z = std::regex("expp (\\w\\d\\d?)(\\.z)?, (.+)$"); - const auto xbox_expp_w = std::regex("expp (\\w\\d\\d?)(\\.w)?, (.+)$"); - - // We operate on a scalar so the input should have a swizzle? - - if (std::regex_search(shader, xbox_expp_x)) - LOG_TEST_CASE("Title uses the x component result of expp"); - if (std::regex_search(shader, xbox_expp_w)) - LOG_TEST_CASE("Title uses the w component result of expp"); - - // dest.x = 2 ^ floor(x) - // Test Case: ??? - static auto host_expp_x = UsingScratch( - "; patch expp: dest.x = 2 ^ floor(x)\n" - "frc tmp.x, $3\n" - "add tmp.x, $1$2, -tmp.x\n" - "exp $1.x, tmp.x"); - shader = std::regex_replace(shader, xbox_expp_x, host_expp_x); - - // dest.y = x - floor(x) - // Test Case: Tony Hawk Pro Skater 2X - const auto host_expp_y = - "; patch expp: dest.y = x - floor(x)\n" - "frc $1.y, $3"; - shader = std::regex_replace(shader, xbox_expp_y, host_expp_y); - - // dest.z = approximate 2 ^ x - // Test Case: Mechassault - const auto host_expp_z = - "; patch expp: dest.z = 2 ^ x\n" - "exp $1.z, $3"; - shader = std::regex_replace(shader, xbox_expp_z, host_expp_z); - - // dest.w = 1 - // Test Case: ??? - // TODO do a constant read here - const auto host_expp_w = UsingScratch( - "; patch expp: dest.w = 1\n" - "sub tmp.x, tmp.x, tmp.x\n" // Get 0 - "exp $1.w, tmp.x"); // 2 ^ 0 = 1 - shader = std::regex_replace(shader, xbox_expp_w, host_expp_w); - - return shader; -} - -std::string VshPostProcess_Log(std::string shader) { - const auto xbox_log_x = std::regex("log (\\w\\d\\d?)(\\.x)?, (.+)$"); - const auto xbox_log_y = std::regex("log (\\w\\d\\d?)(\\.y)?, (.+)$"); - const auto xbox_log_z = std::regex("log (\\w\\d\\d?)(\\.z)?, (.+)$"); - const auto xbox_log_w = std::regex("log (\\w\\d\\d?)(\\.w)?, (.+)$"); - - if (std::regex_search(shader, xbox_log_x)) - LOG_TEST_CASE("Title uses the x component result of log"); - if (std::regex_search(shader, xbox_log_y)) - LOG_TEST_CASE("Title uses the y component result of log"); - if (std::regex_search(shader, xbox_log_w)) - LOG_TEST_CASE("Title uses the w component result of log"); - - // exponent and mantissa functions such that - // x = mantissa(x) * 2 ^ exponent(x) - - // dest.x = exponent(x) - // Test Case: ??? - // floor(log(x)) - static auto host_log_x = UsingScratch( - "; patch log: dest.x = exponent(x)\n" - "log tmp.x, $3\n" - "frc $tmp.y, $tmp.x\n" - "sub $1.x, tmp.x, tmp.y"); - shader = std::regex_replace(shader, xbox_log_x, host_log_x); - - // dest.y = mantissa(x) - // Test Case: ??? - // x / 2 ^ exponent(x) - static auto host_log_y = UsingScratch( - "; patch log: dest.y = mantissa(x)\n" - "log tmp.x, $3\n" - "frc $tmp.y, $tmp.x\n" - "sub tmp.x, tmp.x, tmp.y\n" // tmp.x = exponent(x) = floor(log(x)) - "exp tmp.x, tmp.x\n" - "rcp tmp.x, tmp.x\n" // tmp.x = 1 / (2 ^ exponent(x)) - "mul $1.y, $3, tmp.x"); - shader = std::regex_replace(shader, xbox_log_y, host_log_y); - - // dest.z = log(x) - // Test Case: Mechassault (part of the mech glows depending on heat level) - static auto host_log_z = - "; patch log: dest.z = log(x)\n" - "log $1.z, $3"; - shader = std::regex_replace(shader, xbox_log_z, host_log_z); - - // dest.w = 1 - // Test Case: ??? - // TODO do a constant read here - const auto host_log_w = UsingScratch( - "; patch log: dest.w = 1\n" - "sub tmp.x, tmp.x, tmp.x\n" // Get 0 - "exp $1.w, tmp.x"); // 2 ^ 0 = 1 - shader = std::regex_replace(shader, xbox_log_w, host_log_w); - - return shader; -} - -// On Xbox, the special indexing register, a0.x, is truncated -// But on vs_2_x and up, it's rounded to the closest integer -// So we have to truncate it ourselves -// Test Case: Buffy the Vampire Slayer -std::string VshPostProcess_TruncateMovA(std::string shader) { - // find usages of mova - const auto movA = std::regex("mova a0\\.x, (.*)$"); - // The equivalent of floor() with a temp register - // and use the floored value - static auto truncate = UsingScratch( - "; patch mova: a = floor(x)\n" - "frc tmp, $1\n" - "add tmp, $1, -tmp\n" - "mova a0.x, tmp"); - return std::regex_replace(shader, movA, truncate); -} - -#include -#include - -// Post process the shader as a string -std::string VshPostProcess(std::string shader) { - shader = VshPostProcess_Expp(shader); - shader = VshPostProcess_Log(shader); - return VshPostProcess_TruncateMovA(shader); -} - extern std::string BuildShader(VSH_XBOX_SHADER* pShader); std::string DebugPrependLineNumbers(std::string shaderString) { @@ -2670,8 +1843,8 @@ std::string DebugPrependLineNumbers(std::string shaderString) { int i = 1; for (std::string line; std::getline(shader, line); ) { auto lineNumber = std::to_string(i++); - auto paddedLine = line.insert(0, 3 - lineNumber.size(), ' '); - debugShader << "/* " << lineNumber << " */ " << line << "\n"; + auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' '); + debugShader << "/* " << paddedLineNumber << " */ " << line << "\n"; } return debugShader.str(); @@ -2692,7 +1865,7 @@ extern HRESULT EmuRecompileVshFunction DWORD *pToken; boolean EOI = false; VSH_XBOX_SHADER *pShader = (VSH_XBOX_SHADER*)calloc(1, sizeof(VSH_XBOX_SHADER)); - ID3DBlob *pErrors; + ID3DBlob *pErrors = nullptr; HRESULT hRet = 0; // TODO: support this situation.. @@ -2703,15 +1876,14 @@ extern HRESULT EmuRecompileVshFunction *pbUseDeclarationOnly = 0; *pXboxFunctionSize = 0; *ppRecompiledShader = nullptr; - - if(!pShader) - { + + if(!pShader) { EmuLog(LOG_LEVEL::WARNING, "Couldn't allocate memory for vertex shader conversion buffer"); return E_OUTOFMEMORY; - } + } + pShader->ShaderHeader = *pXboxVertexShaderHeader; - switch(pXboxVertexShaderHeader->Version) - { + switch(pXboxVertexShaderHeader->Version) { case VERSION_XVS: break; case VERSION_XVSS: @@ -2728,12 +1900,8 @@ extern HRESULT EmuRecompileVshFunction break; } - if(SUCCEEDED(hRet)) - { - RegVIsUsedByShader.fill(false); - - for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) - { + if(SUCCEEDED(hRet)) { + for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { VSH_SHADER_INSTRUCTION Inst; VshParseInstruction((uint32_t*)pToken, &Inst); @@ -2751,75 +1919,31 @@ extern HRESULT EmuRecompileVshFunction return D3D_OK; } - std::stringstream& pXboxShaderDisassembly = std::stringstream(); - std::stringstream& pHostShaderDisassembly = std::stringstream(); - static std::string hlslTemplate = #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string ; - DbgVshPrintf("-- Before conversion --\n"); - VshWriteShader(pShader, pXboxShaderDisassembly, pRecompiledDeclaration, FALSE); - DbgVshPrintf("%s", pXboxShaderDisassembly.str().c_str()); - DbgVshPrintf("-----------------------\n"); - auto hlslTest = BuildShader(pShader); hlslTest = std::regex_replace(hlslTemplate, std::regex("// "), hlslTest); - DbgVshPrintf("-- HLSL conversion 1 ---\n"); + DbgVshPrintf("--- HLSL conversion ---\n"); DbgVshPrintf(DebugPrependLineNumbers(hlslTest).c_str()); DbgVshPrintf("-----------------------\n"); - - VshConvertShader(pShader, bNoReservedConstants); - VshWriteShader(pShader, pHostShaderDisassembly, pRecompiledDeclaration, TRUE); - - //auto hlslTest = BuildShader(pShader); - //hlslTest = std::regex_replace(hlslTemplate, std::regex("// "), hlslTest); - - // Post process the final shader - auto finalHostShader = VshPostProcess(pHostShaderDisassembly.str()); - - //DbgVshPrintf("-- After conversion ---\n"); - //DbgVshPrintf("%s", finalHostShader.c_str()); - //DbgVshPrintf("-----------------------\n"); - - //DbgVshPrintf("-- HLSL conversion 2 ---\n"); - //DbgVshPrintf(BuildShader(pShader).c_str()); - //DbgVshPrintf("-----------------------\n"); - - // HACK: Azurik. Prevent Direct3D from trying to assemble this. - if(finalHostShader == "vs.2.x\n") - { - EmuLog(LOG_LEVEL::WARNING, "Replacing empty vertex shader with fallback"); - - finalHostShader = std::string( - "vs.2.x\n" - "dcl_position v0\n" - "dp4 oPos.x, v0, c96\n" - "dp4 oPos.y, v0, c97\n" - "dp4 oPos.z, v0, c98\n" - "dp4 oPos.w, v0, c99\n" - ); - } - else - { - hRet = D3DCompile( - hlslTest.c_str(), - hlslTest.length(), - nullptr, // pSourceName - nullptr, // pDefines - nullptr, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - "vs_3_0", // shader profile - 0, // flags1 - 0, // flags2 - ppRecompiledShader, // out - &pErrors // ppErrorMsgs out - ); - } - - if (FAILED(hRet)) - { + + hRet = D3DCompile( + hlslTest.c_str(), + hlslTest.length(), + nullptr, // pSourceName + nullptr, // pDefines + nullptr, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + "vs_3_0", // shader profile + 0, // flags1 + 0, // flags2 + ppRecompiledShader, // out + &pErrors // ppErrorMsgs out + ); + if (FAILED(hRet)) { EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); } @@ -2827,11 +1951,9 @@ extern HRESULT EmuRecompileVshFunction // Determine the log level auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; // Log HLSL compiler errors - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors)->GetBufferPointer()); - (pErrors)->Release(); + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); + pErrors->Release(); } - - // TODO : If compiling hlsl failed, fall back on assembling finalHostShader? } free(pShader); @@ -2948,61 +2070,68 @@ void CxbxImpl_SelectVertexShaderDirect // HLSL outputs -std::string ToHlsl(VSH_IMD_OUTPUT& dest) { - auto hlsl = std::stringstream(); - switch (dest.Type) - { - case IMD_OUTPUT_O: - hlsl << OReg_Name[dest.Address]; - break; - case IMD_OUTPUT_A0X: - hlsl << "a"; - break; +void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) +{ + switch (dest.Type) { case IMD_OUTPUT_C: - hlsl << "c[" << dest.Address << "]"; //todo we can output to constants...? + hlsl << "c[" << dest.Address << "]"; break; case IMD_OUTPUT_R: - hlsl << "r" << dest.Address; + hlsl << "r" << dest.Address; break; - default: + case IMD_OUTPUT_O: + assert(dest.Address < OREG_A0X); + hlsl << OReg_Name[dest.Address]; + break; + case IMD_OUTPUT_A0X: + hlsl << "a0_x"; // Is this actually a valid output? + break; + default: + assert(false); break; } // If we're not writing all channels, write the mask if (!(dest.Mask[0] && dest.Mask[1] && dest.Mask[2] && dest.Mask[3])) - { - hlsl << "." << (dest.Mask[0] ? "x" : "") - << (dest.Mask[1] ? "y" : "") - << (dest.Mask[2] ? "z" : "") - << (dest.Mask[3] ? "w" : ""); + { + hlsl << "."; + unsigned vector_size = 0; + if (dest.Mask[0]) { hlsl << "x"; vector_size++; } + if (dest.Mask[1]) { hlsl << "y"; vector_size++; } + if (dest.Mask[2]) { hlsl << "z"; vector_size++; } + if (dest.Mask[3]) { hlsl << "w"; vector_size++; } + hlsl << " = (float" << vector_size << ")"; + } else { + hlsl << " = "; } - - return hlsl.str(); } -std::string ToHlsl(VSH_IMD_PARAMETER& paramMeta) +void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) { - auto hlsl = std::stringstream(); auto param = paramMeta.Parameter; if (param.Neg) { hlsl << "-"; } - - if (param.ParameterType == PARAM_C) { - // We'll use the c() function instead of direct indexing + + int register_number = param.Address; + if (param.ParameterType == PARAM_C) { + // Map Xbox [-96, 95] to Host [0, 191] + // Account for Xbox's negative constant indexes + register_number += 96; if (paramMeta.IndexesWithA0_X) { // Only display the offset if it's not 0. - if (param.Address != 0) { - hlsl << "c(a0_x+" << param.Address << ")"; - } else { - hlsl << "c(a0_x)"; + if (register_number != 0) { + hlsl << "c[a0_x+" << register_number << "]"; } + else { + hlsl << "c[a0_x]"; + } } else { - hlsl << "c(" << param.Address << ")"; + hlsl << "c[" << register_number << "]"; } } else { - hlsl << VshGetRegisterName(param.ParameterType) << param.Address; + hlsl << VshGetRegisterName(param.ParameterType) << register_number; } // Write the swizzle if we need to @@ -3030,75 +2159,71 @@ std::string ToHlsl(VSH_IMD_PARAMETER& paramMeta) hlsl << "xyzw"[param.Swizzle[i]]; } } - - return hlsl.str(); } -std::string ToHlsl(std::string pattern, VSH_INTERMEDIATE_FORMAT& instruction) { - auto static regDest = std::regex("dest"); - const std::regex regSrc[] = { std::regex("src0"), std::regex("src1"), std::regex("src2") }; - - // TODO use simple string replace - // Warn if we didn't replace anything etc. - // Replace dest - auto hlsl = std::regex_replace(pattern, regDest, ToHlsl(instruction.Output)); - - int srcNum = 0; - for (int i = 0; i < 3; i++) { // TODO remove magic number - if (instruction.Parameters[i].Active) { - hlsl = std::regex_replace(hlsl, regSrc[srcNum], ToHlsl(instruction.Parameters[i])); - srcNum += 1; - } - } - - return hlsl; -} - -std::string BuildShader(VSH_XBOX_SHADER* pShader) { - +std::string BuildShader(VSH_XBOX_SHADER* pShader) +{ // HLSL strings for all MAC opcodes, indexed with VSH_MAC static std::string VSH_MAC_HLSL[] = { - /*MAC_NOP:*/"// MAC_NOP\n", - /*MAC_MOV:*/"dest = x_mov(src0);\n", - /*MAC_MUL:*/"dest = x_mul(src0, src1);\n", - /*MAC_ADD:*/"dest = x_add(src0, src1);\n", - /*MAC_MAD:*/"dest = x_mad(src0, src1, src2);\n", - /*MAC_DP3:*/"dest = x_dp3(src0, src1);\n", - /*MAC_DPH:*/"dest = x_dph(src0, src1);\n", - /*MAC_DP4:*/"dest = x_dp4(src0, src1);\n", - /*MAC_DST:*/"dest = x_dst(src0, src1);\n", - /*MAC_MIN:*/"dest = x_min(src0, src1);\n", - /*MAC_MAX:*/"dest = x_max(src0, src1);\n", - /*MAC_SLT:*/"dest = x_slt(src0, src1);\n", - /*MAC_SGE:*/"dest = x_sge(src0, src1);\n", - /*MAC_ARL:*/"a = x_arl(src0);\n", // Note : For this MAC_ARL case, ToHlsl would always replace 'dest' with 'a', so we optimized this upfront - "// ??? VSH_MAC 14 ???;\n", - "// ??? VSH_MAC 15 ???;\n" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well + /*MAC_NOP:*/"", + /*MAC_MOV:*/"x_mov", + /*MAC_MUL:*/"x_mul", + /*MAC_ADD:*/"x_add", + /*MAC_MAD:*/"x_mad", + /*MAC_DP3:*/"x_dp3", + /*MAC_DPH:*/"x_dph", + /*MAC_DP4:*/"x_dp4", + /*MAC_DST:*/"x_dst", + /*MAC_MIN:*/"x_min", + /*MAC_MAX:*/"x_max", + /*MAC_SLT:*/"x_slt", + /*MAC_SGE:*/"x_sge", + /*MAC_ARL:*/"x_arl", // Note : For this MAC_ARL case, ToHlsl would always replace 'dest' with 'a', so we optimized this upfront + "", + "" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well }; // HLSL strings for all ILU opcodes, indexed with VSH_ILU static std::string VSH_ILU_HLSL[] = { - /*ILU_NOP:*/"// ILU_NOP\n", - /*ILU_MOV:*/"dest = x_mov(src0);\n", - /*ILU_RCP:*/"dest = x_rcp(src0);\n", - /*ILU_RCC:*/"dest = x_rcc(src0);\n", - /*ILU_RSQ:*/"dest = x_rsq(src0);\n", - /*ILU_EXP:*/"dest = x_exp(src0);\n", - /*ILU_LOG:*/"dest = x_log(src0);\n", - /*ILU_LIT:*/"dest = x_lit(src0);\n" // = 7 - all values of the 3 bits are used + /*ILU_NOP:*/"", + /*ILU_MOV:*/"x_mov", + /*ILU_RCP:*/"x_rcp", + /*ILU_RCC:*/"x_rcc", + /*ILU_RSQ:*/"x_rsq", + /*ILU_EXP:*/"x_exp", + /*ILU_LOG:*/"x_log", + /*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used }; auto hlsl = std::stringstream(); for (int i = 0; i < pShader->IntermediateCount; i++) { VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i]; - - if (xboxInstruction.InstructionType == IMD_MAC) { - hlsl << ToHlsl(VSH_MAC_HLSL[xboxInstruction.MAC], xboxInstruction); + + std::string str = ""; + if (xboxInstruction.InstructionType == IMD_MAC) { + if (xboxInstruction.MAC > MAC_NOP && xboxInstruction.MAC <= MAC_ARL) { + str = VSH_MAC_HLSL[xboxInstruction.MAC]; + } } else if (xboxInstruction.InstructionType == IMD_ILU) { - hlsl << ToHlsl(VSH_ILU_HLSL[xboxInstruction.ILU], xboxInstruction); - } else { - EmuLog(LOG_LEVEL::WARNING, "TODO message"); + if (xboxInstruction.ILU > ILU_NOP) { + str = VSH_ILU_HLSL[xboxInstruction.ILU]; + } + } + + if (!str.empty()) { + hlsl << "\n "; + OutputHlsl(hlsl, xboxInstruction.Output); + hlsl << str; // opcode + str = "("; + for (int i = 0; i < 3; i++) { // TODO remove magic number + if (xboxInstruction.Parameters[i].Active) { + hlsl << str; // separator + ParameterHlsl(hlsl, xboxInstruction.Parameters[i]); + str = ", "; + } + } + hlsl << ");"; } } From 127e51302e481455fda156aeb7e63fdc8cfa9aeb Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 9 Dec 2019 15:33:57 +0100 Subject: [PATCH 33/77] Wrap (or replace) HLSL functions with defines, so that destination swizzles work as expected without too much syntax deviation. Also adjusted a few hlshl functions to be more accurate --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 143 +++++++++++---------------- src/core/hle/D3D8/XbVertexShader.cpp | 104 +++++++------------ 2 files changed, 97 insertions(+), 150 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 581044e2a..fad1323a0 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -1,5 +1,5 @@ // This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : -R"DELIMITER( +//R"DELIMITER( // Xbox HLSL vertex shader (template populated at runtime) struct VS_INPUT { @@ -26,67 +26,35 @@ extern uniform float4 c[192] : register(c0); // Functions for MAC ('Multiply And Accumulate') opcodes -float4 x_mov(float4 src0) -{ - return src0; -} +#define x_mov(dest, src0) dest = src0 -float4 x_mul(float4 src0, float4 src1) -{ - return src0 * src1; -} +#define x_mul(dest, src0, src1) dest = src0 * src1 -float4 x_add(float4 src0, float4 src1) -{ - return src0 + src1; -} +#define x_add(dest, src0, src1) dest = src0 + src1 -float4 x_dst(float4 src0, float4 src1) -{ - return dst(src0, src1); -} +#define x_dst(dest, src0, src1) dest = dst(src0, src1) // equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } -float4 x_min(float4 src0, float4 src1) -{ - return min(src0, src1); -} +#define x_min(dest, src0, src1) dest = min(src0, src1) -float4 x_max(float4 src0, float4 src1) -{ - return max(src0, src1); -} +#define x_max(dest, src0, src1) dest = max(src0, src1) -float4 x_mad(float4 src0, float4 src1, float4 src2) -{ - return (src0 * src1) + src2; -} +#define x_mad(dest, src0, src1, src2) dest = (src0 * src1) + src2 -int x_arl(float src0) -{ - // The address register should be floored - // Due to rounding differences with the Xbox (and increased precision on PC?) - // some titles produce values just below the threshold of the next integer. - // We can add a small bias to make sure it's bumped over the threshold - // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) - return floor(src0 + 0.0001); -} +// The address register should be floored +// Due to rounding differences with the Xbox (and increased precision on PC?) +// some titles produce values just below the threshold of the next integer. +// We can add a small bias to make sure it's bumped over the threshold +// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) +#define x_arl(dest, src0) dest = floor(src0 + 0.0001) -float x_dp3(float4 src0, float4 src1) -{ - return dot(src0.xyz, src1.xyz); -} +#define x_dp3(dest, src0, src1) dest = dot((float3)src0, (float3)src1) -float x_dph(float4 src0, float4 src1) -{ - return x_dp3(src0, src1) + src1.w; -} +#define x_dph(dest, src0, src1) x_dp3(src0, src1) + src1.w -float x_dp4(float4 src0, float4 src1) -{ - return dot(src0, src1); -} +#define x_dp4(dest, src0, src1) dest = dot(src0, src1) -float4 x_sge(float4 src0, float4 src1) +#define x_sge(dest, src0) dest = _sge(src0) +float4 _sge(float4 src0, float4 src1) { float4 dest; dest.x = (src0.x >= src1.x) ? 1 : 0; @@ -96,7 +64,8 @@ float4 x_sge(float4 src0, float4 src1) return dest; } -float4 x_slt(float4 src0, float4 src1) +#define x_slt(dest, src0) dest = _slt(src0) +float4 _slt(float4 src0, float4 src1) { float4 dest; dest.x = (src0.x < src1.x) ? 1 : 0; @@ -108,17 +77,13 @@ float4 x_slt(float4 src0, float4 src1) // Xbox ILU Functions -float scalar_component(float4 src0) -{ - return src0.w; // use w component by default -} +#define scalar_component(src0) src0.x -float x_rcp(float4 src0) -{ - return 1 / scalar_component(src0); -} +#define x_rcp(dest, src0) dest = 1 / scalar_component(src0) +// TODO : #define x_rcp(dest, src0) dest = (scalar_component(src0) == 0) ? 1.#INF : (1 / scalar_component(src0)) -float x_rcc(float4 src0) +#define x_rcc(dest, src0) dest = _rcc(src0) +float _rcc(float4 src0) { float input = scalar_component(src0); @@ -131,42 +96,54 @@ float x_rcc(float4 src0) : clamp(r, -1.84467e+019f, -5.42101e-020f); } -float x_rsq(float4 src0) +#define x_rsq(dest, src0) dest = rsqrt(abs(scalar_component(src0))) + +#define x_expp(dest, src0) dest = x_expp(src0) +float4 _expp(float4 src0) { - return rsqrt(scalar_component(src0)); + float input = scalar_component(src0); + float base = floor(input); + + float4 dest; + dest.x = exp2(base); + dest.y = input - base; // Was : frac(input) + dest.z = exp2(input); + dest.w = 1; + + return dest; } -float4 x_exp(float4 src0) +#define x_logp(dest, src0) dest = _logp(src0) +float4 _logp(float4 src0) { - float input = scalar_component(src0); - float x = exp2(floor(input)); - float fractional = frac(input); - float power = exp2(input); - return float4(x, fractional, power, 1); -} - -float4 x_log(float4 src0) -{ - float input = scalar_component(src0); + float input = abs(scalar_component(src0)); float exponent = floor(log2(input)); - float mantissa = 1 / exp2(exponent); - float logResult = log2(input); - return float4(exponent, mantissa, logResult, 1); + + float4 dest; + dest.x = exponent; + dest.y = 1 / exp2(exponent); // mantissa + dest.z = exponent + log2(input); // logResult + dest.w = 1; + + return dest; } - -float4 x_lit(float4 src0) + +#define x_lit(dest, src) dest = _lit(src) +float4 _lit(float4 src0) { - const float epsilon = 1.0f / 256.0f; + const float epsilon = 1.0f / 256.0f; + float diffuse = src0.x; float blinn = src0.y; float specPower = clamp(src0.w, -(128 - epsilon), (128 - epsilon)); float4 dest; dest.x = 1; - dest.y = max(diffuse, 0); - dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; + dest.y = max(0, diffuse); + dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; // TODO : Use exp2(#) instead of pow(2, #) ? + // TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0; dest.w = 1; - + return dest; } diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index bc0237bdc..fe6faf75b 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -155,10 +155,10 @@ VSH_OUTPUT_TYPE; typedef enum _VSH_ARGUMENT_TYPE { PARAM_UNKNOWN = 0, - PARAM_R, // Temporary registers + PARAM_R, // Temporary (scRatch) registers PARAM_V, // Vertex registers PARAM_C, // Constant registers, set by SetVertexShaderConstant - PARAM_O + PARAM_O // = 0?? } VSH_ARGUMENT_TYPE; @@ -246,10 +246,10 @@ typedef struct _VSH_OUTPUT int16_t OutputAddress; // MAC output R register boolean MACRMask[4]; - boolean MACRAddress; + int16_t MACRAddress; // ILU output R register boolean ILURMask[4]; - boolean ILURAddress; + int16_t ILURAddress; } VSH_OUTPUT; @@ -262,7 +262,8 @@ typedef struct _VSH_SHADER_INSTRUCTION VSH_PARAMETER A; VSH_PARAMETER B; VSH_PARAMETER C; - boolean a0x; + boolean a0x; + boolean Final; } VSH_SHADER_INSTRUCTION; @@ -378,7 +379,7 @@ static const VSH_FIELDMAPPING g_FieldMapping[] = // Final instruction { FLD_FINAL, 3, 0, 1 } }; - + static const VSH_OPCODE_PARAMS g_OpCodeParams_ILU[] = { // ILU OP MAC OP ParamA ParamB ParamC @@ -516,7 +517,7 @@ static VSH_OPCODE_PARAMS* VshGetOpCodeParams(VSH_ILU ILU, static void VshParseInstruction(uint32_t *pShaderToken, VSH_SHADER_INSTRUCTION *pInstruction) { - // First get the instruction(s). + // First get the instruction(s). pInstruction->ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); pInstruction->MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); @@ -589,14 +590,14 @@ static void VshParseInstruction(uint32_t *pShaderToken, pInstruction->C.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_W); // Get output // Output register - pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB); + pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB); switch(pInstruction->Output.OutputType) { case OUTPUT_C: pInstruction->Output.OutputAddress = ConvertCRegister(VshGetField(pShaderToken, FLD_OUT_ADDRESS)); break; case OUTPUT_O: - pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF; + pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF; break; } pInstruction->Output.OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX); @@ -617,7 +618,8 @@ static void VshParseInstruction(uint32_t *pShaderToken, pInstruction->Output.ILURMask[3] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_W); pInstruction->Output.ILURAddress = VshGetField(pShaderToken, FLD_OUT_R); // Finally, get a0.x indirect constant addressing - pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); + pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); + pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL); } // Print functions @@ -802,30 +804,6 @@ static VSH_INTERMEDIATE_FORMAT *VshNewIntermediate(VSH_XBOX_SHADER *pShader) return &pShader->Intermediate[pShader->IntermediateCount++]; } -static void VshInsertIntermediate(VSH_XBOX_SHADER *pShader, - VSH_INTERMEDIATE_FORMAT *pIntermediate, - uint16_t Pos) -{ - VshVerifyBufferBounds(pShader); - - for (int i = pShader->IntermediateCount; i >= Pos; i--) - { - pShader->Intermediate[i + 1] = pShader->Intermediate[i]; - } - pShader->Intermediate[Pos] = *pIntermediate; - pShader->IntermediateCount++; -} - -static void VshDeleteIntermediate(VSH_XBOX_SHADER *pShader, - uint16_t Pos) -{ - for (int i = Pos; i < (pShader->IntermediateCount - 1); i++) - { - pShader->Intermediate[i] = pShader->Intermediate[i + 1]; - } - pShader->IntermediateCount--; -} - static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader, boolean IsCombined) @@ -1834,7 +1812,7 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration return pHostVertexElements; } -extern std::string BuildShader(VSH_XBOX_SHADER* pShader); +extern void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader); std::string DebugPrependLineNumbers(std::string shaderString) { std::stringstream shader(shaderString); @@ -1901,12 +1879,18 @@ extern HRESULT EmuRecompileVshFunction } if(SUCCEEDED(hRet)) { + static std::string hlsl_template = + #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string + ; + + auto hlsl_stream = std::stringstream(); + for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { VSH_SHADER_INSTRUCTION Inst; VshParseInstruction((uint32_t*)pToken, &Inst); VshConvertToIntermediate(&Inst, pShader); - EOI = (boolean)VshGetField((uint32_t*)pToken, FLD_FINAL); + EOI = Inst.Final; } // The size of the shader is @@ -1919,20 +1903,17 @@ extern HRESULT EmuRecompileVshFunction return D3D_OK; } - static std::string hlslTemplate = - #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string - ; - - auto hlslTest = BuildShader(pShader); - hlslTest = std::regex_replace(hlslTemplate, std::regex("// "), hlslTest); + BuildShader(hlsl_stream, pShader); + std::string hlsl_str = hlsl_stream.str(); + hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str); DbgVshPrintf("--- HLSL conversion ---\n"); - DbgVshPrintf(DebugPrependLineNumbers(hlslTest).c_str()); + DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); DbgVshPrintf("-----------------------\n"); hRet = D3DCompile( - hlslTest.c_str(), - hlslTest.length(), + hlsl_str.c_str(), + hlsl_str.length(), nullptr, // pSourceName nullptr, // pDefines nullptr, // pInclude // TODO precompile x_* HLSL functions? @@ -2095,14 +2076,10 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) if (!(dest.Mask[0] && dest.Mask[1] && dest.Mask[2] && dest.Mask[3])) { hlsl << "."; - unsigned vector_size = 0; - if (dest.Mask[0]) { hlsl << "x"; vector_size++; } - if (dest.Mask[1]) { hlsl << "y"; vector_size++; } - if (dest.Mask[2]) { hlsl << "z"; vector_size++; } - if (dest.Mask[3]) { hlsl << "w"; vector_size++; } - hlsl << " = (float" << vector_size << ")"; - } else { - hlsl << " = "; + if (dest.Mask[0]) hlsl << "x"; + if (dest.Mask[1]) hlsl << "y"; + if (dest.Mask[2]) hlsl << "z"; + if (dest.Mask[3]) hlsl << "w"; } } @@ -2161,7 +2138,7 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) } } -std::string BuildShader(VSH_XBOX_SHADER* pShader) +void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) { // HLSL strings for all MAC opcodes, indexed with VSH_MAC static std::string VSH_MAC_HLSL[] = { @@ -2178,7 +2155,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) /*MAC_MAX:*/"x_max", /*MAC_SLT:*/"x_slt", /*MAC_SGE:*/"x_sge", - /*MAC_ARL:*/"x_arl", // Note : For this MAC_ARL case, ToHlsl would always replace 'dest' with 'a', so we optimized this upfront + /*MAC_ARL:*/"x_arl", "", "" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well }; @@ -2190,13 +2167,11 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) /*ILU_RCP:*/"x_rcp", /*ILU_RCC:*/"x_rcc", /*ILU_RSQ:*/"x_rsq", - /*ILU_EXP:*/"x_exp", - /*ILU_LOG:*/"x_log", + /*ILU_EXP:*/"x_expp", + /*ILU_LOG:*/"x_logp", /*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used }; - auto hlsl = std::stringstream(); - for (int i = 0; i < pShader->IntermediateCount; i++) { VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i]; @@ -2212,20 +2187,15 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader) } if (!str.empty()) { - hlsl << "\n "; + hlsl << "\n " << str << "("; // opcode OutputHlsl(hlsl, xboxInstruction.Output); - hlsl << str; // opcode - str = "("; - for (int i = 0; i < 3; i++) { // TODO remove magic number + for (int i = 0; i < 3; i++) { if (xboxInstruction.Parameters[i].Active) { - hlsl << str; // separator + hlsl << ", "; ParameterHlsl(hlsl, xboxInstruction.Parameters[i]); - str = ", "; } } hlsl << ");"; } } - - return hlsl.str(); } From 5b2ff4e2789a1709847fdef92e52e63db85bc53f Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 9 Dec 2019 15:48:13 +0100 Subject: [PATCH 34/77] Simpler determination of used parameters per opcode --- src/core/hle/D3D8/XbVertexShader.cpp | 68 ++-------------------------- 1 file changed, 4 insertions(+), 64 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index fe6faf75b..7025fb58e 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -216,18 +216,6 @@ typedef enum _VSH_MAC } VSH_MAC; -typedef struct _VSH_OPCODE_PARAMS -{ - // Dxbx Note : Since we split up g_OpCodeParams into g_OpCodeParams_ILU and g_OpCodeParams_MAC - // the following two members aren't needed anymore : - // VSH_ILU ILU; - // VSH_MAC MAC; - boolean A; - boolean B; - boolean C; -} -VSH_OPCODE_PARAMS; - typedef struct _VSH_PARAMETER { VSH_PARAMETER_TYPE ParameterType; // Parameter type, R, V or C @@ -380,38 +368,6 @@ static const VSH_FIELDMAPPING g_FieldMapping[] = { FLD_FINAL, 3, 0, 1 } }; -static const VSH_OPCODE_PARAMS g_OpCodeParams_ILU[] = -{ - // ILU OP MAC OP ParamA ParamB ParamC - { /*ILU_NOP, MAC_NOP, */ FALSE, FALSE, FALSE }, // Dxbx note : Unused - { /*ILU_MOV, MAC_NOP, */ FALSE, FALSE, TRUE }, - { /*ILU_RCP, MAC_NOP, */ FALSE, FALSE, TRUE }, - { /*ILU_RCC, MAC_NOP, */ FALSE, FALSE, TRUE }, - { /*ILU_RSQ, MAC_NOP, */ FALSE, FALSE, TRUE }, - { /*ILU_EXP, MAC_NOP, */ FALSE, FALSE, TRUE }, - { /*ILU_LOG, MAC_NOP, */ FALSE, FALSE, TRUE }, - { /*ILU_LIT, MAC_NOP, */ FALSE, FALSE, TRUE }, -}; - -static const VSH_OPCODE_PARAMS g_OpCodeParams_MAC[] = -{ - // ILU OP MAC OP ParamA ParamB ParamC - { /*ILU_NOP, MAC_NOP, */ FALSE, FALSE, FALSE }, // Dxbx note : Unused - { /*ILU_NOP, MAC_MOV, */ TRUE, FALSE, FALSE }, - { /*ILU_NOP, MAC_MUL, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_ADD, */ TRUE, FALSE, TRUE }, - { /*ILU_NOP, MAC_MAD, */ TRUE, TRUE, TRUE }, - { /*ILU_NOP, MAC_DP3, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_DPH, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_DP4, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_DST, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_MIN, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_MAX, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_SLT, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_SGE, */ TRUE, TRUE, FALSE }, - { /*ILU_NOP, MAC_ARL, */ TRUE, FALSE, FALSE } -}; - static const char* OReg_Name[] = { "oPos", @@ -502,18 +458,6 @@ uint8_t VshGetField(uint32_t *pShaderToken, g_FieldMapping[FieldName].BitLength)); } -static VSH_OPCODE_PARAMS* VshGetOpCodeParams(VSH_ILU ILU, - VSH_MAC MAC) -{ - if (ILU >= ILU_MOV && ILU <= ILU_LIT) - return (VSH_OPCODE_PARAMS*)&g_OpCodeParams_ILU[ILU]; - else - if (MAC >= MAC_MOV && MAC <= MAC_ARL) - return (VSH_OPCODE_PARAMS*)&g_OpCodeParams_MAC[MAC]; - else - return nullptr; -} - static void VshParseInstruction(uint32_t *pShaderToken, VSH_SHADER_INSTRUCTION *pInstruction) { @@ -763,24 +707,20 @@ static void VshAddParameters(VSH_SHADER_INSTRUCTION *pInstruction, VSH_IMD_PARAMETER *pParameters) { uint8_t ParamCount = 0; - VSH_OPCODE_PARAMS* pParams = VshGetOpCodeParams(ILU, MAC); - - // param A - if(pParams->A) + + if(MAC >= MAC_MOV) { VshAddParameter(&pInstruction->A, pInstruction->a0x, &pParameters[ParamCount]); ParamCount++; } - // param B - if(pParams->B) + if((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) { VshAddParameter(&pInstruction->B, pInstruction->a0x, &pParameters[ParamCount]); ParamCount++; } - // param C - if(pParams->C) + if((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) { VshAddParameter(&pInstruction->C, pInstruction->a0x, &pParameters[ParamCount]); ParamCount++; From 339af7b500b60d1d7f2fb3bd04cdee28f6092f56 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Tue, 10 Dec 2019 12:04:24 +0100 Subject: [PATCH 35/77] Pass mask as argument into the opcode defines. Also fixed a few HLSL issues that crept in the previous commit --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 46 ++++++++++++++-------------- src/core/hle/D3D8/XbVertexShader.cpp | 22 ++++++------- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index fad1323a0..461295521 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -1,5 +1,5 @@ // This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : -//R"DELIMITER( +R"DELIMITER( // Xbox HLSL vertex shader (template populated at runtime) struct VS_INPUT { @@ -26,34 +26,34 @@ extern uniform float4 c[192] : register(c0); // Functions for MAC ('Multiply And Accumulate') opcodes -#define x_mov(dest, src0) dest = src0 +#define x_mov(dest, mask, src0) dest.mask = src0 -#define x_mul(dest, src0, src1) dest = src0 * src1 +#define x_mul(dest, mask, src0, src1) dest.mask = src0 * src1 -#define x_add(dest, src0, src1) dest = src0 + src1 +#define x_add(dest, mask, src0, src1) dest.mask = src0 + src1 -#define x_dst(dest, src0, src1) dest = dst(src0, src1) // equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } +#define x_dst(dest, mask, src0, src1) dest.mask = dst(src0, src1) // equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } -#define x_min(dest, src0, src1) dest = min(src0, src1) +#define x_min(dest, mask, src0, src1) dest.mask = min(src0, src1) -#define x_max(dest, src0, src1) dest = max(src0, src1) +#define x_max(dest, mask, src0, src1) dest.mask = max(src0, src1) -#define x_mad(dest, src0, src1, src2) dest = (src0 * src1) + src2 +#define x_mad(dest, mask, src0, src1, src2) dest.mask = (src0 * src1) + src2 // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) // some titles produce values just below the threshold of the next integer. // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) -#define x_arl(dest, src0) dest = floor(src0 + 0.0001) +#define x_arl(dest, mask, src0) dest.mask = floor(src0 + 0.0001) -#define x_dp3(dest, src0, src1) dest = dot((float3)src0, (float3)src1) +#define x_dp3(dest, mask, src0, src1) dest.mask = dot((float3)src0, (float3)src1) -#define x_dph(dest, src0, src1) x_dp3(src0, src1) + src1.w +#define x_dph(dest, mask, src0, src1) dest.mask = dot((float3)src0, (float3)src1) + src1.w -#define x_dp4(dest, src0, src1) dest = dot(src0, src1) +#define x_dp4(dest, mask, src0, src1) dest.mask = dot(src0, src1) -#define x_sge(dest, src0) dest = _sge(src0) +#define x_sge(dest, mask, src0, src1) dest.mask = _sge(src0, src1).mask float4 _sge(float4 src0, float4 src1) { float4 dest; @@ -64,7 +64,7 @@ float4 _sge(float4 src0, float4 src1) return dest; } -#define x_slt(dest, src0) dest = _slt(src0) +#define x_slt(dest, mask, src0, src1) dest.mask = _slt(src0, src1).mask float4 _slt(float4 src0, float4 src1) { float4 dest; @@ -79,10 +79,10 @@ float4 _slt(float4 src0, float4 src1) #define scalar_component(src0) src0.x -#define x_rcp(dest, src0) dest = 1 / scalar_component(src0) -// TODO : #define x_rcp(dest, src0) dest = (scalar_component(src0) == 0) ? 1.#INF : (1 / scalar_component(src0)) +#define x_rcp(dest, mask, src0) dest.mask = 1 / scalar_component(src0) +// TODO : #define x_rcp(dest, mask, src0) dest.mask = (scalar_component(src0) == 0) ? 1.#INF : (1 / scalar_component(src0)) -#define x_rcc(dest, src0) dest = _rcc(src0) +#define x_rcc(dest, mask, src0) dest.mask = _rcc(src0).mask float _rcc(float4 src0) { float input = scalar_component(src0); @@ -96,9 +96,9 @@ float _rcc(float4 src0) : clamp(r, -1.84467e+019f, -5.42101e-020f); } -#define x_rsq(dest, src0) dest = rsqrt(abs(scalar_component(src0))) +#define x_rsq(dest, mask, src0) dest.mask = rsqrt(abs(scalar_component(src0))) -#define x_expp(dest, src0) dest = x_expp(src0) +#define x_expp(dest, mask, src0) dest.mask = _expp(src0).mask float4 _expp(float4 src0) { float input = scalar_component(src0); @@ -113,7 +113,7 @@ float4 _expp(float4 src0) return dest; } -#define x_logp(dest, src0) dest = _logp(src0) +#define x_logp(dest, mask, src0) dest.mask = _logp(src0).mask float4 _logp(float4 src0) { float input = abs(scalar_component(src0)); @@ -128,7 +128,7 @@ float4 _logp(float4 src0) return dest; } -#define x_lit(dest, src) dest = _lit(src) +#define x_lit(dest, mask, src) dest.mask = _lit(src).mask float4 _lit(float4 src0) { const float epsilon = 1.0f / 256.0f; @@ -140,7 +140,7 @@ float4 _lit(float4 src0) float4 dest; dest.x = 1; dest.y = max(0, diffuse); - dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; // TODO : Use exp2(#) instead of pow(2, #) ? + dest.z = diffuse > 0 ? exp2(specPower * log(blinn)) : 0; // TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0; dest.w = 1; @@ -175,7 +175,7 @@ VS_OUTPUT main(const VS_INPUT xIn) #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox // Address (index) register - int a0_x; + int1 a0; // Output variables float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 7025fb58e..079587756 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -2005,22 +2005,20 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) hlsl << OReg_Name[dest.Address]; break; case IMD_OUTPUT_A0X: - hlsl << "a0_x"; // Is this actually a valid output? + hlsl << "a0"; // Is this actually a valid output? break; default: assert(false); break; } - // If we're not writing all channels, write the mask - if (!(dest.Mask[0] && dest.Mask[1] && dest.Mask[2] && dest.Mask[3])) - { - hlsl << "."; - if (dest.Mask[0]) hlsl << "x"; - if (dest.Mask[1]) hlsl << "y"; - if (dest.Mask[2]) hlsl << "z"; - if (dest.Mask[3]) hlsl << "w"; - } + // Write the mask as a separate argument to the opcode defines + // (No space, so that "dest,mask, ..." looks close to "dest.mask, ...") + hlsl << ","; + if (dest.Mask[0]) hlsl << "x"; + if (dest.Mask[1]) hlsl << "y"; + if (dest.Mask[2]) hlsl << "z"; + if (dest.Mask[3]) hlsl << "w"; } void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) @@ -2039,10 +2037,10 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) if (paramMeta.IndexesWithA0_X) { // Only display the offset if it's not 0. if (register_number != 0) { - hlsl << "c[a0_x+" << register_number << "]"; + hlsl << "c[a0.x+" << register_number << "]"; } else { - hlsl << "c[a0_x]"; + hlsl << "c[a0.x]"; } } else { hlsl << "c[" << register_number << "]"; From 07ceaf96d5d479b5a3f1aed3379d9c79933807bd Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Tue, 10 Dec 2019 18:20:19 +0100 Subject: [PATCH 36/77] Avoid all implicit typecast warnings, by casting all HSLS calculations to a float4, and apply the output mask to that as well --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 35 +++++++++++++++-------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 461295521..ed0c89152 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -26,32 +26,32 @@ extern uniform float4 c[192] : register(c0); // Functions for MAC ('Multiply And Accumulate') opcodes -#define x_mov(dest, mask, src0) dest.mask = src0 +#define x_mov(dest, mask, src0) dest.mask = ((float4)src0).mask -#define x_mul(dest, mask, src0, src1) dest.mask = src0 * src1 +#define x_mul(dest, mask, src0, src1) dest.mask = ((float4)(src0 * src1)).mask -#define x_add(dest, mask, src0, src1) dest.mask = src0 + src1 +#define x_add(dest, mask, src0, src1) dest.mask = ((float4)(src0 + src1)).mask -#define x_dst(dest, mask, src0, src1) dest.mask = dst(src0, src1) // equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } +#define x_dst(dest, mask, src0, src1) dest.mask = dst(src0, src1).mask /* equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } */ -#define x_min(dest, mask, src0, src1) dest.mask = min(src0, src1) +#define x_min(dest, mask, src0, src1) dest.mask = min(src0, src1).mask -#define x_max(dest, mask, src0, src1) dest.mask = max(src0, src1) +#define x_max(dest, mask, src0, src1) dest.mask = max(src0, src1).mask -#define x_mad(dest, mask, src0, src1, src2) dest.mask = (src0 * src1) + src2 +#define x_mad(dest, mask, src0, src1, src2) dest.mask = ((float4)((src0 * src1) + src2)).mask // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) // some titles produce values just below the threshold of the next integer. // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) -#define x_arl(dest, mask, src0) dest.mask = floor(src0 + 0.0001) - -#define x_dp3(dest, mask, src0, src1) dest.mask = dot((float3)src0, (float3)src1) - -#define x_dph(dest, mask, src0, src1) dest.mask = dot((float3)src0, (float3)src1) + src1.w - -#define x_dp4(dest, mask, src0, src1) dest.mask = dot(src0, src1) +#define x_arl(dest, mask, src0) dest.mask = floor(src0 + 0.0001).mask + +#define x_dp3(dest, mask, src0, src1) dest.mask = dot((float3)src0, (float3)src1) /* NO mask! */ + +#define x_dph(dest, mask, src0, src1) dest.mask = dot((float3)src0, (float3)src1) + src1.w /* NO mask! */ + +#define x_dp4(dest, mask, src0, src1) dest.mask = dot(src0, src1) /* NO mask! */ #define x_sge(dest, mask, src0, src1) dest.mask = _sge(src0, src1).mask float4 _sge(float4 src0, float4 src1) @@ -96,7 +96,7 @@ float _rcc(float4 src0) : clamp(r, -1.84467e+019f, -5.42101e-020f); } -#define x_rsq(dest, mask, src0) dest.mask = rsqrt(abs(scalar_component(src0))) +#define x_rsq(dest, mask, src0) dest.mask = rsqrt(abs(scalar_component(src0))) /* NO mask! */ #define x_expp(dest, mask, src0) dest.mask = _expp(src0).mask float4 _expp(float4 src0) @@ -204,14 +204,15 @@ VS_OUTPUT main(const VS_INPUT xIn) // Xbox shader program // + // Copy variables to output struct VS_OUTPUT xOut; xOut.oPos = reverseScreenspaceTransform(oPos); xOut.oD0 = oD0; xOut.oD1 = oD1; - xOut.oFog = oFog; - xOut.oPts = oPts; + xOut.oFog = oFog.x; + xOut.oPts = oPts.x; xOut.oB0 = oB0; xOut.oB1 = oB1; xOut.oT0 = oT0; From 8ed16b9f5ad2f739cef36a39e81668e78ca8a911 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Wed, 11 Dec 2019 12:09:29 +0100 Subject: [PATCH 37/77] Reorder vertex shader decoding code --- src/core/hle/D3D8/XbVertexShader.cpp | 869 +++++++++++++-------------- 1 file changed, 415 insertions(+), 454 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 079587756..57e6eb315 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -294,16 +294,6 @@ typedef struct _VSH_INTERMEDIATE_FORMAT } VSH_INTERMEDIATE_FORMAT; -// Used for xvu spec definition -typedef struct _VSH_FIELDMAPPING -{ - VSH_FIELD_NAME FieldName; - uint8_t SubToken; - uint8_t StartBit; - uint8_t BitLength; -} -VSH_FIELDMAPPING; - typedef struct _VSH_XBOX_SHADER { XTL::X_VSH_SHADER_HEADER ShaderHeader; @@ -312,82 +302,6 @@ typedef struct _VSH_XBOX_SHADER } VSH_XBOX_SHADER; -// Local constants -static const VSH_FIELDMAPPING g_FieldMapping[] = -{ - // Field Name DWORD BitPos BitSize - { FLD_ILU, 1, 25, 3 }, - { FLD_MAC, 1, 21, 4 }, - { FLD_CONST, 1, 13, 8 }, - { FLD_V, 1, 9, 4 }, - // Input A - { FLD_A_NEG, 1, 8, 1 }, - { FLD_A_SWZ_X, 1, 6, 2 }, - { FLD_A_SWZ_Y, 1, 4, 2 }, - { FLD_A_SWZ_Z, 1, 2, 2 }, - { FLD_A_SWZ_W, 1, 0, 2 }, - { FLD_A_R, 2, 28, 4 }, - { FLD_A_MUX, 2, 26, 2 }, - // Input B - { FLD_B_NEG, 2, 25, 1 }, - { FLD_B_SWZ_X, 2, 23, 2 }, - { FLD_B_SWZ_Y, 2, 21, 2 }, - { FLD_B_SWZ_Z, 2, 19, 2 }, - { FLD_B_SWZ_W, 2, 17, 2 }, - { FLD_B_R, 2, 13, 4 }, - { FLD_B_MUX, 2, 11, 2 }, - // Input C - { FLD_C_NEG, 2, 10, 1 }, - { FLD_C_SWZ_X, 2, 8, 2 }, - { FLD_C_SWZ_Y, 2, 6, 2 }, - { FLD_C_SWZ_Z, 2, 4, 2 }, - { FLD_C_SWZ_W, 2, 2, 2 }, - { FLD_C_R_HIGH, 2, 0, 2 }, - { FLD_C_R_LOW, 3, 30, 2 }, - { FLD_C_MUX, 3, 28, 2 }, - // Output - { FLD_OUT_MAC_MASK_X, 3, 27, 1 }, - { FLD_OUT_MAC_MASK_Y, 3, 26, 1 }, - { FLD_OUT_MAC_MASK_Z, 3, 25, 1 }, - { FLD_OUT_MAC_MASK_W, 3, 24, 1 }, - { FLD_OUT_R, 3, 20, 4 }, - { FLD_OUT_ILU_MASK_X, 3, 19, 1 }, - { FLD_OUT_ILU_MASK_Y, 3, 18, 1 }, - { FLD_OUT_ILU_MASK_Z, 3, 17, 1 }, - { FLD_OUT_ILU_MASK_W, 3, 16, 1 }, - { FLD_OUT_O_MASK_X, 3, 15, 1 }, - { FLD_OUT_O_MASK_Y, 3, 14, 1 }, - { FLD_OUT_O_MASK_Z, 3, 13, 1 }, - { FLD_OUT_O_MASK_W, 3, 12, 1 }, - { FLD_OUT_ORB, 3, 11, 1 }, - { FLD_OUT_ADDRESS, 3, 3, 8 }, - { FLD_OUT_MUX, 3, 2, 1 }, - // Relative addressing - { FLD_A0X, 3, 1, 1 }, - // Final instruction - { FLD_FINAL, 3, 0, 1 } -}; - -static const char* OReg_Name[] = -{ - "oPos", - "???", - "???", - "oD0", - "oD1", - "oFog", - "oPts", - "oB0", - "oB1", - "oT0", - "oT1", - "oT2", - "oT3", - "???", - "???", - "a0.x" -}; - // TODO : Reinstate and use : std::array RegVIsPresentInDeclaration; /* TODO : map non-FVF Xbox vertex shader handle to CxbxVertexShader (a struct containing a host Xbox vertex shader handle and the original members) @@ -398,44 +312,8 @@ void CxbxUpdateVertexShader(DWORD XboxVertexShaderHandle) CxbxVertexShader &VertexShader = g_CxbxVertexShaders[XboxVertexShaderHandle]; }*/ -static inline int IsInUse(const boolean *pMask) -{ - return (pMask[0] || pMask[1] || pMask[2] || pMask[3]); -} - -static inline boolean HasMACR(VSH_SHADER_INSTRUCTION *pInstruction) -{ - return IsInUse(pInstruction->Output.MACRMask) && pInstruction->MAC != MAC_NOP; -} - -static inline boolean HasMACO(VSH_SHADER_INSTRUCTION *pInstruction) -{ - return IsInUse(pInstruction->Output.OutputMask) && - pInstruction->Output.OutputMux == OMUX_MAC && - pInstruction->MAC != MAC_NOP; -} - -static inline boolean HasMACARL(VSH_SHADER_INSTRUCTION *pInstruction) -{ - return /*!IsInUse(pInstruction->Output.OutputMask) && - pInstruction->Output.OutputMux == OMUX_MAC &&*/ - pInstruction->MAC == MAC_ARL; -} - -static inline boolean HasILUR(VSH_SHADER_INSTRUCTION *pInstruction) -{ - return IsInUse(pInstruction->Output.ILURMask) && pInstruction->ILU != ILU_NOP; -} - -static inline boolean HasILUO(VSH_SHADER_INSTRUCTION *pInstruction) -{ - return IsInUse(pInstruction->Output.OutputMask) && - pInstruction->Output.OutputMux == OMUX_ILU && - pInstruction->ILU != ILU_NOP; -} - // Retrieves a number of bits in the instruction token -static inline int VshGetFromToken(uint32_t *pShaderToken, +static inline uint32_t VshGetFromToken(uint32_t *pShaderToken, uint8_t SubToken, uint8_t StartBit, uint8_t BitLength) @@ -443,21 +321,80 @@ static inline int VshGetFromToken(uint32_t *pShaderToken, return (pShaderToken[SubToken] >> StartBit) & ~(0xFFFFFFFF << BitLength); } +static uint8_t VshGetField(uint32_t *pShaderToken, + VSH_FIELD_NAME FieldName) +{ + // Used for xvu spec definition + static const struct { + uint8_t SubToken; + uint8_t StartBit; + uint8_t BitLength; + } FieldMapping[/*VSH_FIELD_NAME*/] = { + // SubToken BitPos BitSize + { 1, 25, 3 }, // FLD_ILU, + { 1, 21, 4 }, // FLD_MAC, + { 1, 13, 8 }, // FLD_CONST, + { 1, 9, 4 }, // FLD_V, + // Input A + { 1, 8, 1 }, // FLD_A_NEG, + { 1, 6, 2 }, // FLD_A_SWZ_X, + { 1, 4, 2 }, // FLD_A_SWZ_Y, + { 1, 2, 2 }, // FLD_A_SWZ_Z, + { 1, 0, 2 }, // FLD_A_SWZ_W, + { 2, 28, 4 }, // FLD_A_R, + { 2, 26, 2 }, // FLD_A_MUX, + // Input B + { 2, 25, 1 }, // FLD_B_NEG, + { 2, 23, 2 }, // FLD_B_SWZ_X, + { 2, 21, 2 }, // FLD_B_SWZ_Y, + { 2, 19, 2 }, // FLD_B_SWZ_Z, + { 2, 17, 2 }, // FLD_B_SWZ_W, + { 2, 13, 4 }, // FLD_B_R, + { 2, 11, 2 }, // FLD_B_MUX, + // Input C + { 2, 10, 1 }, // FLD_C_NEG, + { 2, 8, 2 }, // FLD_C_SWZ_X, + { 2, 6, 2 }, // FLD_C_SWZ_Y, + { 2, 4, 2 }, // FLD_C_SWZ_Z, + { 2, 2, 2 }, // FLD_C_SWZ_W, + { 2, 0, 2 }, // FLD_C_R_HIGH, + { 3, 30, 2 }, // FLD_C_R_LOW, + { 3, 28, 2 }, // FLD_C_MUX, + // Output + { 3, 27, 1 }, // FLD_OUT_MAC_MASK_X, + { 3, 26, 1 }, // FLD_OUT_MAC_MASK_Y, + { 3, 25, 1 }, // FLD_OUT_MAC_MASK_Z, + { 3, 24, 1 }, // FLD_OUT_MAC_MASK_W, + { 3, 20, 4 }, // FLD_OUT_R, + { 3, 19, 1 }, // FLD_OUT_ILU_MASK_X, + { 3, 18, 1 }, // FLD_OUT_ILU_MASK_Y, + { 3, 17, 1 }, // FLD_OUT_ILU_MASK_Z, + { 3, 16, 1 }, // FLD_OUT_ILU_MASK_W, + { 3, 15, 1 }, // FLD_OUT_O_MASK_X, + { 3, 14, 1 }, // FLD_OUT_O_MASK_Y, + { 3, 13, 1 }, // FLD_OUT_O_MASK_Z, + { 3, 12, 1 }, // FLD_OUT_O_MASK_W, + { 3, 11, 1 }, // FLD_OUT_ORB, + { 3, 3, 8 }, // FLD_OUT_ADDRESS, + { 3, 2, 1 }, // FLD_OUT_MUX, + // Relative addressing + { 3, 1, 1 }, // FLD_A0X, + // Final instruction + { 3, 0, 1 } // FLD_FINAL, + }; + + return (uint8_t)(VshGetFromToken(pShaderToken, + FieldMapping[FieldName].SubToken, + FieldMapping[FieldName].StartBit, + FieldMapping[FieldName].BitLength)); +} + // Converts the C register address to disassembly format static inline int16_t ConvertCRegister(const int16_t CReg) { return ((((CReg >> 5) & 7) - 3) * 32) + (CReg & 31); } -uint8_t VshGetField(uint32_t *pShaderToken, - VSH_FIELD_NAME FieldName) -{ - return (uint8_t)(VshGetFromToken(pShaderToken, - g_FieldMapping[FieldName].SubToken, - g_FieldMapping[FieldName].StartBit, - g_FieldMapping[FieldName].BitLength)); -} - static void VshParseInstruction(uint32_t *pShaderToken, VSH_SHADER_INSTRUCTION *pInstruction) { @@ -566,65 +503,286 @@ static void VshParseInstruction(uint32_t *pShaderToken, pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL); } -// Print functions -static char *VshGetRegisterName(VSH_PARAMETER_TYPE ParameterType) +static inline int VshIsMaskInUse(const boolean* pMask) { - switch(ParameterType) + return (pMask[0] || pMask[1] || pMask[2] || pMask[3]); +} + +static inline boolean VshInstrWritesToMAC_R(VSH_SHADER_INSTRUCTION* pInstruction) +{ + return VshIsMaskInUse(pInstruction->Output.MACRMask) + && pInstruction->MAC != MAC_NOP; +} + +static inline boolean VshInstrWritesToMAC_O(VSH_SHADER_INSTRUCTION* pInstruction) +{ + return VshIsMaskInUse(pInstruction->Output.OutputMask) + && pInstruction->Output.OutputMux == OMUX_MAC + && pInstruction->MAC != MAC_NOP; +} + +static inline boolean VshInstrWritesToMAC_ARL(VSH_SHADER_INSTRUCTION* pInstruction) +{ + return /*!VshIsMaskInUse(pInstruction->Output.OutputMask) && + pInstruction->Output.OutputMux == OMUX_MAC &&*/ + pInstruction->MAC == MAC_ARL; +} + +static inline boolean VshInstrWritesToILU_R(VSH_SHADER_INSTRUCTION* pInstruction) +{ + return VshIsMaskInUse(pInstruction->Output.ILURMask) + && pInstruction->ILU != ILU_NOP; +} + +static inline boolean VshInstrWritesToILU_O(VSH_SHADER_INSTRUCTION* pInstruction) +{ + return VshIsMaskInUse(pInstruction->Output.OutputMask) + && pInstruction->Output.OutputMux == OMUX_ILU + && pInstruction->ILU != ILU_NOP; +} + +static void VshAddParameter(VSH_PARAMETER *pParameter, + boolean a0x, + VSH_IMD_PARAMETER *pIntermediateParameter) +{ + pIntermediateParameter->Parameter = *pParameter; + pIntermediateParameter->Active = TRUE; + pIntermediateParameter->IndexesWithA0_X = a0x; +} + +static void VshAddParameters(VSH_SHADER_INSTRUCTION *pInstruction, + VSH_ILU ILU, + VSH_MAC MAC, + VSH_IMD_PARAMETER *pParameters) +{ + uint8_t ParamCount = 0; + + if(MAC >= MAC_MOV) { - case PARAM_R: - return "r"; - case PARAM_V: - return "v"; - case PARAM_C: - return "c"; - case PARAM_O: - return "oPos"; - default: - return "?"; + VshAddParameter(&pInstruction->A, pInstruction->a0x, &pParameters[ParamCount]); + ParamCount++; + } + + if((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) + { + VshAddParameter(&pInstruction->B, pInstruction->a0x, &pParameters[ParamCount]); + ParamCount++; + } + + if((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) + { + VshAddParameter(&pInstruction->C, pInstruction->a0x, &pParameters[ParamCount]); + ParamCount++; } } -char* XboxVertexRegisterAsString(DWORD VertexRegister) +static void VshVerifyBufferBounds(VSH_XBOX_SHADER *pShader) { - switch (VertexRegister) - { - case XTL::X_D3DVSDE_VERTEX: // -1 - return "D3DVSDE_VERTEX /* xbox ext. */"; - case XTL::X_D3DVSDE_POSITION: // 0 - return "D3DVSDE_POSITION"; - case XTL::X_D3DVSDE_BLENDWEIGHT: // 1 - return "D3DVSDE_BLENDWEIGHT"; - case XTL::X_D3DVSDE_NORMAL: // 2 - return "D3DVSDE_NORMAL"; - case XTL::X_D3DVSDE_DIFFUSE: // 3 - return "D3DVSDE_DIFFUSE"; - case XTL::X_D3DVSDE_SPECULAR: // 4 - return "D3DVSDE_SPECULAR"; - case XTL::X_D3DVSDE_FOG: // 5 - return "D3DVSDE_FOG"; - case XTL::X_D3DVSDE_POINTSIZE: // 6 - return "D3DVDSE_POINTSIZE"; - case XTL::X_D3DVSDE_BACKDIFFUSE: // 7 - return "D3DVSDE_BACKDIFFUSE /* xbox ext. */"; - case XTL::X_D3DVSDE_BACKSPECULAR: // 8 - return "D3DVSDE_BACKSPECULAR /* xbox ext. */"; - case XTL::X_D3DVSDE_TEXCOORD0: // 9 - return "D3DVSDE_TEXCOORD0"; - case XTL::X_D3DVSDE_TEXCOORD1: // 10 - return "D3DVSDE_TEXCOORD1"; - case XTL::X_D3DVSDE_TEXCOORD2: // 11 - return "D3DVSDE_TEXCOORD2"; - case XTL::X_D3DVSDE_TEXCOORD3: // 12 - return "D3DVSDE_TEXCOORD3"; - case 13: - return "13 /* unknown register */"; - case 14: - return "14 /* unknown register */"; - case 15: - return "15 /* unknown register */"; - default: - return "16 /* or higher, unknown register */"; - } + if(pShader->IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) + { + CxbxKrnlCleanup("Shader exceeds conversion buffer!"); + } +} + +static VSH_INTERMEDIATE_FORMAT *VshNewIntermediate(VSH_XBOX_SHADER *pShader) +{ + VshVerifyBufferBounds(pShader); + + ZeroMemory(&pShader->Intermediate[pShader->IntermediateCount], sizeof(VSH_INTERMEDIATE_FORMAT)); + + return &pShader->Intermediate[pShader->IntermediateCount++]; +} + +static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction, + VSH_XBOX_SHADER *pShader, + boolean IsCombined) +{ + VSH_INTERMEDIATE_FORMAT *pIntermediate; + + if(!VshInstrWritesToMAC_R(pInstruction)) { + return FALSE; + } + + pIntermediate = VshNewIntermediate(pShader); + pIntermediate->IsCombined = IsCombined; + + // Opcode + pIntermediate->InstructionType = IMD_MAC; + pIntermediate->MAC = pInstruction->MAC; + + // Output param + pIntermediate->Output.Type = IMD_OUTPUT_R; + pIntermediate->Output.Address = pInstruction->Output.MACRAddress; + memcpy(pIntermediate->Output.Mask, pInstruction->Output.MACRMask, sizeof(boolean) * 4); + + // Other parameters + VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); + + return TRUE; +} + +static boolean VshAddInstructionMAC_O(VSH_SHADER_INSTRUCTION* pInstruction, + VSH_XBOX_SHADER *pShader, + boolean IsCombined) +{ + VSH_INTERMEDIATE_FORMAT *pIntermediate; + + if(!VshInstrWritesToMAC_O(pInstruction)) { + return FALSE; + } + + pIntermediate = VshNewIntermediate(pShader); + pIntermediate->IsCombined = IsCombined; + + // Opcode + pIntermediate->InstructionType = IMD_MAC; + pIntermediate->MAC = pInstruction->MAC; + + // Output param + pIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; + pIntermediate->Output.Address = pInstruction->Output.OutputAddress; + memcpy(pIntermediate->Output.Mask, pInstruction->Output.OutputMask, sizeof(boolean) * 4); + + // Other parameters + VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); + + return TRUE; +} + +static boolean VshAddInstructionMAC_ARL(VSH_SHADER_INSTRUCTION *pInstruction, + VSH_XBOX_SHADER *pShader, + boolean IsCombined) +{ + VSH_INTERMEDIATE_FORMAT *pIntermediate; + + if(!VshInstrWritesToMAC_ARL(pInstruction)) { + return FALSE; + } + + pIntermediate = VshNewIntermediate(pShader); + pIntermediate->IsCombined = IsCombined; + + // Opcode + pIntermediate->InstructionType = IMD_MAC; + pIntermediate->MAC = pInstruction->MAC; + + // Output param + pIntermediate->Output.Type = IMD_OUTPUT_A0X; + pIntermediate->Output.Address = pInstruction->Output.OutputAddress; + + // Other parameters + VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); + + return TRUE; +} + +static boolean VshAddInstructionILU_R(VSH_SHADER_INSTRUCTION *pInstruction, + VSH_XBOX_SHADER *pShader, + boolean IsCombined) +{ + VSH_INTERMEDIATE_FORMAT *pIntermediate; + + if(!VshInstrWritesToILU_R(pInstruction)) { + return FALSE; + } + + pIntermediate = VshNewIntermediate(pShader); + pIntermediate->IsCombined = IsCombined; + + // Opcode + pIntermediate->InstructionType = IMD_ILU; + pIntermediate->ILU = pInstruction->ILU; + + // Output param + pIntermediate->Output.Type = IMD_OUTPUT_R; + // If this is a combined instruction, only r1 is allowed (R address should not be used) + pIntermediate->Output.Address = IsCombined ? 1 : pInstruction->Output.ILURAddress; + memcpy(pIntermediate->Output.Mask, pInstruction->Output.ILURMask, sizeof(boolean) * 4); + + // Other parameters + VshAddParameters(pInstruction, pInstruction->ILU, MAC_NOP, pIntermediate->Parameters); + + return TRUE; +} + +static boolean VshAddInstructionILU_O(VSH_SHADER_INSTRUCTION *pInstruction, + VSH_XBOX_SHADER *pShader, + boolean IsCombined) +{ + VSH_INTERMEDIATE_FORMAT *pIntermediate; + + if(!VshInstrWritesToILU_O(pInstruction)) { + return FALSE; + } + + pIntermediate = VshNewIntermediate(pShader); + pIntermediate->IsCombined = IsCombined; + + // Opcode + pIntermediate->InstructionType = IMD_ILU; + pIntermediate->ILU = pInstruction->ILU; + + // Output param + pIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; + pIntermediate->Output.Address = pInstruction->Output.OutputAddress; + memcpy(pIntermediate->Output.Mask, pInstruction->Output.OutputMask, sizeof(boolean) * 4); + + // Other parameters + VshAddParameters(pInstruction, pInstruction->ILU, MAC_NOP, pIntermediate->Parameters); + + return TRUE; +} + +static void VshConvertToIntermediate(VSH_SHADER_INSTRUCTION *pInstruction, + VSH_XBOX_SHADER *pShader) +{ + // Five types of instructions: + // MAC + // + // ILU + // + // MAC + // +ILU + // + // MAC + // +MAC + // +ILU + // + // MAC + // +ILU + // +ILU + boolean IsCombined = FALSE; + + if (VshAddInstructionMAC_R(pInstruction, pShader, IsCombined)) { + if (VshInstrWritesToMAC_O(pInstruction) || + VshInstrWritesToILU_R(pInstruction) || + VshInstrWritesToILU_O(pInstruction)) { + IsCombined = TRUE; + } + } + + if (VshAddInstructionMAC_O(pInstruction, pShader, IsCombined)) { + if (VshInstrWritesToILU_R(pInstruction) || + VshInstrWritesToILU_O(pInstruction)) { + IsCombined = TRUE; + } + } + + // Special case, arl (mov a0.x, ...) + if (VshAddInstructionMAC_ARL(pInstruction, pShader, IsCombined)) { + if (VshInstrWritesToILU_R(pInstruction) || + VshInstrWritesToILU_O(pInstruction)) { + IsCombined = TRUE; + } + } + + if (VshAddInstructionILU_R(pInstruction, pShader, IsCombined)) { + if (VshInstrWritesToILU_O(pInstruction)) { + IsCombined = TRUE; + } + } + + (void)VshAddInstructionILU_O(pInstruction, pShader, IsCombined); } #define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) @@ -690,281 +848,55 @@ D3DDECLUSAGE Xb2PCRegisterType return PCRegisterType; } -extern D3DCAPS g_D3DCaps; - -static void VshAddParameter(VSH_PARAMETER *pParameter, - boolean a0x, - VSH_IMD_PARAMETER *pIntermediateParameter) +char* XboxVertexRegisterAsString(DWORD VertexRegister) { - pIntermediateParameter->Parameter = *pParameter; - pIntermediateParameter->Active = TRUE; - pIntermediateParameter->IndexesWithA0_X = a0x; -} - -static void VshAddParameters(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_ILU ILU, - VSH_MAC MAC, - VSH_IMD_PARAMETER *pParameters) -{ - uint8_t ParamCount = 0; - - if(MAC >= MAC_MOV) - { - VshAddParameter(&pInstruction->A, pInstruction->a0x, &pParameters[ParamCount]); - ParamCount++; - } - - if((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) - { - VshAddParameter(&pInstruction->B, pInstruction->a0x, &pParameters[ParamCount]); - ParamCount++; - } - - if((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) - { - VshAddParameter(&pInstruction->C, pInstruction->a0x, &pParameters[ParamCount]); - ParamCount++; - } -} - -static void VshVerifyBufferBounds(VSH_XBOX_SHADER *pShader) -{ - if(pShader->IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) - { - CxbxKrnlCleanup("Shader exceeds conversion buffer!"); - } -} - -static VSH_INTERMEDIATE_FORMAT *VshNewIntermediate(VSH_XBOX_SHADER *pShader) -{ - VshVerifyBufferBounds(pShader); - - ZeroMemory(&pShader->Intermediate[pShader->IntermediateCount], sizeof(VSH_INTERMEDIATE_FORMAT)); - - return &pShader->Intermediate[pShader->IntermediateCount++]; -} - -static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - if(!HasMACR(pInstruction)) - { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_MAC; - pIntermediate->MAC = pInstruction->MAC; - - // Output param - pIntermediate->Output.Type = IMD_OUTPUT_R; - pIntermediate->Output.Address = pInstruction->Output.MACRAddress; - memcpy(pIntermediate->Output.Mask, pInstruction->Output.MACRMask, sizeof(boolean) * 4); - - // Other parameters - VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); - - return TRUE; -} - -static boolean VshAddInstructionMAC_O(VSH_SHADER_INSTRUCTION* pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - if(!HasMACO(pInstruction)) - { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_MAC; - pIntermediate->MAC = pInstruction->MAC; - - // Output param - pIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; - pIntermediate->Output.Address = pInstruction->Output.OutputAddress; - memcpy(pIntermediate->Output.Mask, pInstruction->Output.OutputMask, sizeof(boolean) * 4); - - // Other parameters - VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); - - return TRUE; -} - -static boolean VshAddInstructionMAC_ARL(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - if(!HasMACARL(pInstruction)) - { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_MAC; - pIntermediate->MAC = pInstruction->MAC; - - // Output param - pIntermediate->Output.Type = IMD_OUTPUT_A0X; - pIntermediate->Output.Address = pInstruction->Output.OutputAddress; - - // Other parameters - VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); - - return TRUE; -} - -static boolean VshAddInstructionILU_R(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - if(!HasILUR(pInstruction)) - { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_ILU; - pIntermediate->ILU = pInstruction->ILU; - - // Output param - pIntermediate->Output.Type = IMD_OUTPUT_R; - // If this is a combined instruction, only r1 is allowed (R address should not be used) - pIntermediate->Output.Address = IsCombined ? 1 : pInstruction->Output.ILURAddress; - memcpy(pIntermediate->Output.Mask, pInstruction->Output.ILURMask, sizeof(boolean) * 4); - - // Other parameters - VshAddParameters(pInstruction, pInstruction->ILU, MAC_NOP, pIntermediate->Parameters); - - return TRUE; -} - -static boolean VshAddInstructionILU_O(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - if(!HasILUO(pInstruction)) - { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_ILU; - pIntermediate->ILU = pInstruction->ILU; - - // Output param - pIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; - pIntermediate->Output.Address = pInstruction->Output.OutputAddress; - memcpy(pIntermediate->Output.Mask, pInstruction->Output.OutputMask, sizeof(boolean) * 4); - - // Other parameters - VshAddParameters(pInstruction, pInstruction->ILU, MAC_NOP, pIntermediate->Parameters); - - return TRUE; -} - -static void VshConvertToIntermediate(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader) -{ - // Five types of instructions: - // MAC - // - // ILU - // - // MAC - // +ILU - // - // MAC - // +MAC - // +ILU - // - // MAC - // +ILU - // +ILU - boolean IsCombined = FALSE; - - if(VshAddInstructionMAC_R(pInstruction, pShader, IsCombined)) - { - if(HasMACO(pInstruction) || - HasILUR(pInstruction) || - HasILUO(pInstruction)) - { - IsCombined = TRUE; - } - } - if(VshAddInstructionMAC_O(pInstruction, pShader, IsCombined)) - { - if(HasILUR(pInstruction) || - HasILUO(pInstruction)) - { - IsCombined = TRUE; - } - } - // Special case, arl (mov a0.x, ...) - if(VshAddInstructionMAC_ARL(pInstruction, pShader, IsCombined)) - { - if(HasILUR(pInstruction) || - HasILUO(pInstruction)) - { - IsCombined = TRUE; - } - } - if(VshAddInstructionILU_R(pInstruction, pShader, IsCombined)) - { - if(HasILUO(pInstruction)) - { - IsCombined = TRUE; - } - } - (void)VshAddInstructionILU_O(pInstruction, pShader, IsCombined); -} - -static inline void VshSetSwizzle(VSH_PARAMETER *pParameter, - VSH_SWIZZLE x, - VSH_SWIZZLE y, - VSH_SWIZZLE z, - VSH_SWIZZLE w) -{ - pParameter->Swizzle[0] = x; - pParameter->Swizzle[1] = y; - pParameter->Swizzle[2] = z; - pParameter->Swizzle[3] = w; -} - -static inline void VshSetSwizzle(VSH_IMD_PARAMETER *pParameter, - VSH_SWIZZLE x, - VSH_SWIZZLE y, - VSH_SWIZZLE z, - VSH_SWIZZLE w) -{ - VshSetSwizzle(&pParameter->Parameter, x, y, z, w); + switch (VertexRegister) + { + case XTL::X_D3DVSDE_VERTEX: // -1 + return "D3DVSDE_VERTEX /* xbox ext. */"; + case XTL::X_D3DVSDE_POSITION: // 0 + return "D3DVSDE_POSITION"; + case XTL::X_D3DVSDE_BLENDWEIGHT: // 1 + return "D3DVSDE_BLENDWEIGHT"; + case XTL::X_D3DVSDE_NORMAL: // 2 + return "D3DVSDE_NORMAL"; + case XTL::X_D3DVSDE_DIFFUSE: // 3 + return "D3DVSDE_DIFFUSE"; + case XTL::X_D3DVSDE_SPECULAR: // 4 + return "D3DVSDE_SPECULAR"; + case XTL::X_D3DVSDE_FOG: // 5 + return "D3DVSDE_FOG"; + case XTL::X_D3DVSDE_POINTSIZE: // 6 + return "D3DVDSE_POINTSIZE"; + case XTL::X_D3DVSDE_BACKDIFFUSE: // 7 + return "D3DVSDE_BACKDIFFUSE /* xbox ext. */"; + case XTL::X_D3DVSDE_BACKSPECULAR: // 8 + return "D3DVSDE_BACKSPECULAR /* xbox ext. */"; + case XTL::X_D3DVSDE_TEXCOORD0: // 9 + return "D3DVSDE_TEXCOORD0"; + case XTL::X_D3DVSDE_TEXCOORD1: // 10 + return "D3DVSDE_TEXCOORD1"; + case XTL::X_D3DVSDE_TEXCOORD2: // 11 + return "D3DVSDE_TEXCOORD2"; + case XTL::X_D3DVSDE_TEXCOORD3: // 12 + return "D3DVSDE_TEXCOORD3"; + case 13: + return "13 /* unknown register */"; + case 14: + return "14 /* unknown register */"; + case 15: + return "15 /* unknown register */"; + default: + return "16 /* or higher, unknown register */"; + } } // **************************************************************************** // * Vertex shader declaration recompiler // **************************************************************************** +extern D3DCAPS g_D3DCaps; + class XboxVertexDeclarationConverter { protected: @@ -1993,6 +1925,26 @@ void CxbxImpl_SelectVertexShaderDirect void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) { + static const char* OReg_Name[] = + { + "oPos", + "???", + "???", + "oD0", + "oD1", + "oFog", + "oPts", + "oB0", + "oB1", + "oT0", + "oT1", + "oT2", + "oT3", + "???", + "???", + "a0.x" + }; + switch (dest.Type) { case IMD_OUTPUT_C: hlsl << "c[" << dest.Address << "]"; @@ -2023,6 +1975,15 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) { + // Print functions + static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { + "?", // PARAM_UNKNOWN = 0, + "r", // PARAM_R, // Temporary (scRatch) registers + "v", // PARAM_V, // Vertex registers + "c", // PARAM_C, // Constant registers, set by SetVertexShaderConstant + "oPos" // PARAM_O // = 0?? + }; + auto param = paramMeta.Parameter; if (param.Neg) { @@ -2046,7 +2007,7 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) hlsl << "c[" << register_number << "]"; } } else { - hlsl << VshGetRegisterName(param.ParameterType) << register_number; + hlsl << RegisterName[param.ParameterType] << register_number; } // Write the swizzle if we need to From a0de74ffd9dba754d564ce020440aef97af28362 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Wed, 11 Dec 2019 12:10:45 +0100 Subject: [PATCH 38/77] Make sure all HLSL parameters regardless their swizzle, are cast to float4 before the actual calculation commences --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 63 +++++++++++++++-------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index ed0c89152..4717a7b2e 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -23,37 +23,42 @@ struct VS_OUTPUT // Xbox constant registers extern uniform float4 c[192] : register(c0); + +float4 _cast(float src) { return float4(src); } +float4 _cast(float2 src) { return src.xyyy; } +float4 _cast(float3 src) { return src.xyzz; } +float4 _cast(float4 src) { return src; } // Functions for MAC ('Multiply And Accumulate') opcodes -#define x_mov(dest, mask, src0) dest.mask = ((float4)src0).mask +#define x_mov(dest, mask, src0) dest.mask = (_cast(src0)).mask -#define x_mul(dest, mask, src0, src1) dest.mask = ((float4)(src0 * src1)).mask +#define x_mul(dest, mask, src0, src1) dest.mask = (_cast(src0) * _cast(src1)).mask -#define x_add(dest, mask, src0, src1) dest.mask = ((float4)(src0 + src1)).mask +#define x_add(dest, mask, src0, src1) dest.mask = (_cast(src0) + _cast(src1))).mask -#define x_dst(dest, mask, src0, src1) dest.mask = dst(src0, src1).mask /* equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } */ +#define x_dst(dest, mask, src0, src1) dest.mask = dst(_cast(src0), _cast(src1)).mask /* equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } */ -#define x_min(dest, mask, src0, src1) dest.mask = min(src0, src1).mask +#define x_min(dest, mask, src0, src1) dest.mask = min(_cast(src0), _cast(src1)).mask -#define x_max(dest, mask, src0, src1) dest.mask = max(src0, src1).mask +#define x_max(dest, mask, src0, src1) dest.mask = max(_cast(src0), _cast(src1)).mask -#define x_mad(dest, mask, src0, src1, src2) dest.mask = ((float4)((src0 * src1) + src2)).mask +#define x_mad(dest, mask, src0, src1, src2) dest.mask = (_cast(src0) * _cast(src1) + _cast(src2)).mask // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) // some titles produce values just below the threshold of the next integer. // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) -#define x_arl(dest, mask, src0) dest.mask = floor(src0 + 0.0001).mask +#define x_arl(dest, mask, src0) dest = floor(_cast(src0).x + 0.0001) /* NO mask! */ -#define x_dp3(dest, mask, src0, src1) dest.mask = dot((float3)src0, (float3)src1) /* NO mask! */ +#define x_dp3(dest, mask, src0, src1) dest.mask = dot(_cast(src0).xyz, _cast(src1).xyz) /* NO mask! */ -#define x_dph(dest, mask, src0, src1) dest.mask = dot((float3)src0, (float3)src1) + src1.w /* NO mask! */ +#define x_dph(dest, mask, src0, src1) dest.mask = dot(float4(_cast(src0).xyz, 1), _cast(src1)) + src1.w /* NO mask! */ -#define x_dp4(dest, mask, src0, src1) dest.mask = dot(src0, src1) /* NO mask! */ +#define x_dp4(dest, mask, src0, src1) dest.mask = dot(_cast(src0), _cast(src1)) /* NO mask! */ -#define x_sge(dest, mask, src0, src1) dest.mask = _sge(src0, src1).mask +#define x_sge(dest, mask, src0, src1) dest.mask = _sge(_cast(src0), _cast(src1)).mask float4 _sge(float4 src0, float4 src1) { float4 dest; @@ -64,7 +69,7 @@ float4 _sge(float4 src0, float4 src1) return dest; } -#define x_slt(dest, mask, src0, src1) dest.mask = _slt(src0, src1).mask +#define x_slt(dest, mask, src0, src1) dest.mask = _slt(_cast(src0), _cast(src1)).mask float4 _slt(float4 src0, float4 src1) { float4 dest; @@ -76,17 +81,15 @@ float4 _slt(float4 src0, float4 src1) } // Xbox ILU Functions + +#define _scalar(src0) _cast(src0).x -#define scalar_component(src0) src0.x +#define x_rcp(dest, mask, src0) dest.mask = float4(1 / _scalar(src0)).mask +// TODO : #define x_rcp(dest, mask, src0) dest.mask = (_scalar(src0) == 0) ? 1.#INF : (1 / _scalar(src0)) -#define x_rcp(dest, mask, src0) dest.mask = 1 / scalar_component(src0) -// TODO : #define x_rcp(dest, mask, src0) dest.mask = (scalar_component(src0) == 0) ? 1.#INF : (1 / scalar_component(src0)) - -#define x_rcc(dest, mask, src0) dest.mask = _rcc(src0).mask -float _rcc(float4 src0) +#define x_rcc(dest, mask, src0) dest.mask = _rcc(_scalar(src0)).mask +float _rcc(float input) { - float input = scalar_component(src0); - // Calculate the reciprocal float r = 1 / input; @@ -96,12 +99,11 @@ float _rcc(float4 src0) : clamp(r, -1.84467e+019f, -5.42101e-020f); } -#define x_rsq(dest, mask, src0) dest.mask = rsqrt(abs(scalar_component(src0))) /* NO mask! */ +#define x_rsq(dest, mask, src0) dest.mask = rsqrt(abs(_scalar(src0))) /* NO mask! */ -#define x_expp(dest, mask, src0) dest.mask = _expp(src0).mask -float4 _expp(float4 src0) +#define x_expp(dest, mask, src0) dest.mask = _expp(_scalar(src0)).mask +float4 _expp(float input) { - float input = scalar_component(src0); float base = floor(input); float4 dest; @@ -113,10 +115,9 @@ float4 _expp(float4 src0) return dest; } -#define x_logp(dest, mask, src0) dest.mask = _logp(src0).mask -float4 _logp(float4 src0) +#define x_logp(dest, mask, src0) dest.mask = _logp(_scalar(src0)).mask +float4 _logp(float input) { - float input = abs(scalar_component(src0)); float exponent = floor(log2(input)); float4 dest; @@ -128,7 +129,7 @@ float4 _logp(float4 src0) return dest; } -#define x_lit(dest, mask, src) dest.mask = _lit(src).mask +#define x_lit(dest, mask, src) dest.mask = _lit(_cast(src)).mask float4 _lit(float4 src0) { const float epsilon = 1.0f / 256.0f; @@ -171,11 +172,11 @@ VS_OUTPUT main(const VS_INPUT xIn) // Temporary registers float4 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; - r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 1); // TODO correct? + r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 0); #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox // Address (index) register - int1 a0; + int1 a0 = 0; // Output variables float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; From cb95bbb46f239ffe4fd35f77f32110020d821376 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Wed, 11 Dec 2019 17:05:54 +0100 Subject: [PATCH 39/77] Make sure HLSL scalar outputs are replicated --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 4717a7b2e..9118d5ce1 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -24,10 +24,16 @@ struct VS_OUTPUT // Xbox constant registers extern uniform float4 c[192] : register(c0); +// Overloaded casts, assuring all inputs are treated as float4 float4 _cast(float src) { return float4(src); } float4 _cast(float2 src) { return src.xyyy; } float4 _cast(float3 src) { return src.xyzz; } float4 _cast(float4 src) { return src; } + +float4 _ssss(float src) { return float4(src); } // a scalar output replicated across a 4-component vector +#define _scalar(src0) _cast(src0).x /* a scalar input */ + +// https://www.opengl.org/registry/specs/NV/vertex_program1_1.txt // Functions for MAC ('Multiply And Accumulate') opcodes @@ -52,11 +58,11 @@ float4 _cast(float4 src) { return src; } // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) #define x_arl(dest, mask, src0) dest = floor(_cast(src0).x + 0.0001) /* NO mask! */ -#define x_dp3(dest, mask, src0, src1) dest.mask = dot(_cast(src0).xyz, _cast(src1).xyz) /* NO mask! */ +#define x_dp3(dest, mask, src0, src1) dest.mask = _ssss(dot(_cast(src0).xyz, _cast(src1).xyz)) /* NO mask! */ -#define x_dph(dest, mask, src0, src1) dest.mask = dot(float4(_cast(src0).xyz, 1), _cast(src1)) + src1.w /* NO mask! */ +#define x_dph(dest, mask, src0, src1) dest.mask = _ssss(dot(float4(_cast(src0).xyz, 1), _cast(src1)) + src1.w) /* NO mask! */ -#define x_dp4(dest, mask, src0, src1) dest.mask = dot(_cast(src0), _cast(src1)) /* NO mask! */ +#define x_dp4(dest, mask, src0, src1) dest.mask = _ssss(dot(_cast(src0), _cast(src1))) /* NO mask! */ #define x_sge(dest, mask, src0, src1) dest.mask = _sge(_cast(src0), _cast(src1)).mask float4 _sge(float4 src0, float4 src1) @@ -82,12 +88,10 @@ float4 _slt(float4 src0, float4 src1) // Xbox ILU Functions -#define _scalar(src0) _cast(src0).x - -#define x_rcp(dest, mask, src0) dest.mask = float4(1 / _scalar(src0)).mask +#define x_rcp(dest, mask, src0) dest.mask = _ssss(1 / _scalar(src0)) /* NO mask! */ // TODO : #define x_rcp(dest, mask, src0) dest.mask = (_scalar(src0) == 0) ? 1.#INF : (1 / _scalar(src0)) -#define x_rcc(dest, mask, src0) dest.mask = _rcc(_scalar(src0)).mask +#define x_rcc(dest, mask, src0) dest.mask = _ssss(_rcc(_scalar(src0))) /* NO mask! */ float _rcc(float input) { // Calculate the reciprocal @@ -99,7 +103,7 @@ float _rcc(float input) : clamp(r, -1.84467e+019f, -5.42101e-020f); } -#define x_rsq(dest, mask, src0) dest.mask = rsqrt(abs(_scalar(src0))) /* NO mask! */ +#define x_rsq(dest, mask, src0) dest.mask = _ssss(rsqrt(abs(_scalar(src0)))) /* NO mask! */ #define x_expp(dest, mask, src0) dest.mask = _expp(_scalar(src0)).mask float4 _expp(float input) From 1365d2e7e18e89c6048c86052d7ebb390b85728b Mon Sep 17 00:00:00 2001 From: patrickvl Date: Thu, 12 Dec 2019 00:05:24 +0100 Subject: [PATCH 40/77] Vertex shader HLSL : More commenting, reordering, renaming and fixing --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 185 ++++++++++++++++------------ 1 file changed, 103 insertions(+), 82 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 9118d5ce1..ee5f4aebf 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -6,76 +6,76 @@ struct VS_INPUT float4 v[16] : TEXCOORD; }; -struct VS_OUTPUT +// Output registers +struct VS_OUTPUT { - float4 oPos : POSITION; - float4 oD0 : COLOR0; // Colour 0 - float4 oD1 : COLOR1; // Colour 1 - float oFog : FOG; - float oPts : PSIZE; - float4 oB0 : TEXCOORD4; // Backface Colour 0 - float4 oB1 : TEXCOORD5; // Backface Colour 1 - float4 oT0 : TEXCOORD0; // Texture Coord 0 - float4 oT1 : TEXCOORD1; // Texture Coord 1 - float4 oT2 : TEXCOORD2; // Texture Coord 2 - float4 oT3 : TEXCOORD3; // Texture Coord 3 + float4 oPos : POSITION; // Homogeneous clip space position + float4 oD0 : COLOR0; // Primary color (front-facing) + float4 oD1 : COLOR1; // Secondary color (front-facing) + float oFog : FOG; // Fog coordinate + float oPts : PSIZE; // Point size + float4 oB0 : TEXCOORD4; // Back-facing primary color + float4 oB1 : TEXCOORD5; // Back-facing secondary color + float4 oT0 : TEXCOORD0; // Texture coordinate set 0 + float4 oT1 : TEXCOORD1; // Texture coordinate set 1 + float4 oT2 : TEXCOORD2; // Texture coordinate set 2 + float4 oT3 : TEXCOORD3; // Texture coordinate set 3 }; // Xbox constant registers extern uniform float4 c[192] : register(c0); // Overloaded casts, assuring all inputs are treated as float4 -float4 _cast(float src) { return float4(src); } -float4 _cast(float2 src) { return src.xyyy; } -float4 _cast(float3 src) { return src.xyzz; } -float4 _cast(float4 src) { return src; } +float4 _tof4(float src) { return float4(src, src, src, src); } +float4 _tof4(float2 src) { return src.xyyy; } +float4 _tof4(float3 src) { return src.xyzz; } +float4 _tof4(float4 src) { return src; } +float4 _ssss(float s) { return float4(s, s, s, s); } // a scalar output replicated across a 4-component vector +#define _scalar(src) _tof4(src).x /* a scalar input */ -float4 _ssss(float src) { return float4(src); } // a scalar output replicated across a 4-component vector -#define _scalar(src0) _cast(src0).x /* a scalar input */ - -// https://www.opengl.org/registry/specs/NV/vertex_program1_1.txt +// http://xboxdevwiki.net/NV2A/Vertex_Shader +// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program.txt +// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program1_1.txt // Functions for MAC ('Multiply And Accumulate') opcodes - -#define x_mov(dest, mask, src0) dest.mask = (_cast(src0)).mask - -#define x_mul(dest, mask, src0, src1) dest.mask = (_cast(src0) * _cast(src1)).mask - -#define x_add(dest, mask, src0, src1) dest.mask = (_cast(src0) + _cast(src1))).mask - -#define x_dst(dest, mask, src0, src1) dest.mask = dst(_cast(src0), _cast(src1)).mask /* equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } */ - -#define x_min(dest, mask, src0, src1) dest.mask = min(_cast(src0), _cast(src1)).mask - -#define x_max(dest, mask, src0, src1) dest.mask = max(_cast(src0), _cast(src1)).mask - -#define x_mad(dest, mask, src0, src1, src2) dest.mask = (_cast(src0) * _cast(src1) + _cast(src2)).mask - + +// 2.14.1.10.1 ARL: Address Register Load // The address register should be floored // Due to rounding differences with the Xbox (and increased precision on PC?) // some titles produce values just below the threshold of the next integer. // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) -#define x_arl(dest, mask, src0) dest = floor(_cast(src0).x + 0.0001) /* NO mask! */ +#define x_arl(dest, mask, src0) dest.mask = floor(_tof4(src0).x + 0.0001).mask -#define x_dp3(dest, mask, src0, src1) dest.mask = _ssss(dot(_cast(src0).xyz, _cast(src1).xyz)) /* NO mask! */ +// 2.14.1.10.2 MOV: Move +#define x_mov(dest, mask, src0) dest.mask = (_tof4(src0)).mask -#define x_dph(dest, mask, src0, src1) dest.mask = _ssss(dot(float4(_cast(src0).xyz, 1), _cast(src1)) + src1.w) /* NO mask! */ +// 2.14.1.10.3 MUL: Multiply +#define x_mul(dest, mask, src0, src1) dest.mask = (_tof4(src0) * _tof4(src1)).mask -#define x_dp4(dest, mask, src0, src1) dest.mask = _ssss(dot(_cast(src0), _cast(src1))) /* NO mask! */ - -#define x_sge(dest, mask, src0, src1) dest.mask = _sge(_cast(src0), _cast(src1)).mask -float4 _sge(float4 src0, float4 src1) -{ - float4 dest; - dest.x = (src0.x >= src1.x) ? 1 : 0; - dest.y = (src0.y >= src1.y) ? 1 : 0; - dest.z = (src0.z >= src1.z) ? 1 : 0; - dest.w = (src0.w >= src1.w) ? 1 : 0; - return dest; -} - -#define x_slt(dest, mask, src0, src1) dest.mask = _slt(_cast(src0), _cast(src1)).mask +// 2.14.1.10.4 ADD: Add +#define x_add(dest, mask, src0, src1) dest.mask = (_tof4(src0) + _tof4(src1)).mask + +// 2.14.1.10.5 MAD: Multiply and Add +#define x_mad(dest, mask, src0, src1, src2) dest.mask = (_tof4(src0) * _tof4(src1) + _tof4(src2)).mask + +// 2.14.1.10.8 DP3: Three-Component Dot Product +#define x_dp3(dest, mask, src0, src1) dest.mask = _ssss(dot(_tof4(src0).xyz, _tof4(src1).xyz)).mask + +// 2.14.1.10.9 DP4: Four-Component Dot Product +#define x_dp4(dest, mask, src0, src1) dest.mask = _ssss(dot(_tof4(src0), _tof4(src1))).mask + +// 2.14.1.10.10 DST: Distance Vector +#define x_dst(dest, mask, src0, src1) dest.mask = dst(_tof4(src0), _tof4(src1)).mask /* equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } */ + +// 2.14.1.10.11 MIN: Minimum +#define x_min(dest, mask, src0, src1) dest.mask = min(_tof4(src0), _tof4(src1)).mask + +// 2.14.1.10.12 MAX: Maximum +#define x_max(dest, mask, src0, src1) dest.mask = max(_tof4(src0), _tof4(src1)).mask + +// 2.14.1.10.13 SLT: Set On Less Than +#define x_slt(dest, mask, src0, src1) dest.mask = _slt(_tof4(src0), _tof4(src1)).mask float4 _slt(float4 src0, float4 src1) { float4 dest; @@ -86,25 +86,31 @@ float4 _slt(float4 src0, float4 src1) return dest; } +// 2.14.1.10.14 SGE: Set On Greater or Equal Than +#define x_sge(dest, mask, src0, src1) dest.mask = _sge(_tof4(src0), _tof4(src1)).mask +float4 _sge(float4 src0, float4 src1) +{ + float4 dest; + dest.x = (src0.x >= src1.x) ? 1 : 0; + dest.y = (src0.y >= src1.y) ? 1 : 0; + dest.z = (src0.z >= src1.z) ? 1 : 0; + dest.w = (src0.w >= src1.w) ? 1 : 0; + return dest; +} + +// 2.14.1.10.18 DPH: Homogeneous Dot Product +#define x_dph(dest, mask, src0, src1) dest.mask = _ssss(dot(float4(_tof4(src0).xyz, 1), _tof4(src1)) + src1.w).mask + // Xbox ILU Functions -#define x_rcp(dest, mask, src0) dest.mask = _ssss(1 / _scalar(src0)) /* NO mask! */ +// 2.14.1.10.6 RCP: Reciprocal +#define x_rcp(dest, mask, src0) dest.mask = _ssss(1 / _scalar(src0)).mask // TODO : #define x_rcp(dest, mask, src0) dest.mask = (_scalar(src0) == 0) ? 1.#INF : (1 / _scalar(src0)) - -#define x_rcc(dest, mask, src0) dest.mask = _ssss(_rcc(_scalar(src0))) /* NO mask! */ -float _rcc(float input) -{ - // Calculate the reciprocal - float r = 1 / input; - - // Clamp - return (r > 0) - ? clamp(r, 5.42101e-020f, 1.84467e+019f) - : clamp(r, -1.84467e+019f, -5.42101e-020f); -} - -#define x_rsq(dest, mask, src0) dest.mask = _ssss(rsqrt(abs(_scalar(src0)))) /* NO mask! */ - + +// 2.14.1.10.7 RSQ: Reciprocal Square Root +#define x_rsq(dest, mask, src0) dest.mask = _ssss(rsqrt(abs(_scalar(src0)))).mask + +// 2.14.1.10.15 EXP: Exponential Base 2 #define x_expp(dest, mask, src0) dest.mask = _expp(_scalar(src0)).mask float4 _expp(float input) { @@ -118,7 +124,8 @@ float4 _expp(float input) return dest; } - + +// 2.14.1.10.16 LOG: Logarithm Base 2 #define x_logp(dest, mask, src0) dest.mask = _logp(_scalar(src0)).mask float4 _logp(float input) { @@ -133,7 +140,8 @@ float4 _logp(float input) return dest; } -#define x_lit(dest, mask, src) dest.mask = _lit(_cast(src)).mask +// 2.14.1.10.17 LIT: Light Coefficients +#define x_lit(dest, mask, src) dest.mask = _lit(_tof4(src)).mask float4 _lit(float4 src0) { const float epsilon = 1.0f / 256.0f; @@ -152,6 +160,19 @@ float4 _lit(float4 src0) return dest; } +// 2.14.1.10.19 RCC: Reciprocal Clamped +#define x_rcc(dest, mask, src0) dest.mask = _ssss(_rcc(_scalar(src0))).mask +float _rcc(float input) +{ + // Calculate the reciprocal + float r = 1 / input; + + // Clamp + return (r >= 0) + ? clamp(r, 5.42101e-020f, 1.84467e+019f) // the IEEE 32-bit binary values 0x1F800000 and 0x5F800000 + : clamp(r, -1.84467e+019f, -5.42101e-020f); // the IEEE 32-bit binary values 0xDF800000 and 0x9F800000 +} + float4 reverseScreenspaceTransform(float4 oPos) { // On Xbox, oPos should contain the vertex position in screenspace @@ -171,24 +192,24 @@ float4 reverseScreenspaceTransform(float4 oPos) VS_OUTPUT main(const VS_INPUT xIn) { - // Input registers - float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; + // Output variables + float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; + oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // Pre-initialize w component of outputs to 1 + + // Single component outputs + float4 oFog, oPts; // x is write-only on Xbox. Use float4 as some games use incorrect masks + oFog = oPts = 0; + + // Address (index) register + int1 a0 = 0; // Temporary registers float4 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 0); #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox - // Address (index) register - int1 a0 = 0; - - // Output variables - float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; - oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // Pre-initialize w component of outputs to 1 - // Single component outputs - float4 oFog, oPts; // x is write-only on Xbox. Use float4 as some games use incorrect masks - oFog = oPts = 0; - + // Input registers + float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; // Initialize input variables v0 = xIn.v[0]; v1 = xIn.v[1]; From b330198fe6e756e1e50ef1399cd677cb2247d8e2 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Thu, 12 Dec 2019 00:05:44 +0100 Subject: [PATCH 41/77] Reorder vertex shader code --- src/core/hle/D3D8/XbVertexShader.cpp | 291 ++++++++++++++------------- 1 file changed, 152 insertions(+), 139 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 57e6eb315..b8a9a9c68 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1684,136 +1684,6 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration return pHostVertexElements; } -extern void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader); - -std::string DebugPrependLineNumbers(std::string shaderString) { - std::stringstream shader(shaderString); - auto debugShader = std::stringstream(); - - int i = 1; - for (std::string line; std::getline(shader, line); ) { - auto lineNumber = std::to_string(i++); - auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' '); - debugShader << "/* " << paddedLineNumber << " */ " << line << "\n"; - } - - return debugShader.str(); -} - -// recompile xbox vertex shader function -extern HRESULT EmuRecompileVshFunction -( - DWORD *pXboxFunction, - bool bNoReservedConstants, - D3DVERTEXELEMENT *pRecompiledDeclaration, - bool *pbUseDeclarationOnly, - DWORD *pXboxFunctionSize, - ID3DBlob **ppRecompiledShader -) -{ - XTL::X_VSH_SHADER_HEADER *pXboxVertexShaderHeader = (XTL::X_VSH_SHADER_HEADER*)pXboxFunction; - DWORD *pToken; - boolean EOI = false; - VSH_XBOX_SHADER *pShader = (VSH_XBOX_SHADER*)calloc(1, sizeof(VSH_XBOX_SHADER)); - ID3DBlob *pErrors = nullptr; - HRESULT hRet = 0; - - // TODO: support this situation.. - if(pXboxFunction == xbnullptr) - return E_FAIL; - - // Initialize output arguments to zero - *pbUseDeclarationOnly = 0; - *pXboxFunctionSize = 0; - *ppRecompiledShader = nullptr; - - if(!pShader) { - EmuLog(LOG_LEVEL::WARNING, "Couldn't allocate memory for vertex shader conversion buffer"); - return E_OUTOFMEMORY; - } - - pShader->ShaderHeader = *pXboxVertexShaderHeader; - switch(pXboxVertexShaderHeader->Version) { - case VERSION_XVS: - break; - case VERSION_XVSS: - EmuLog(LOG_LEVEL::WARNING, "Might not support vertex state shaders?"); - hRet = E_FAIL; - break; - case VERSION_XVSW: - EmuLog(LOG_LEVEL::WARNING, "Might not support vertex read/write shaders?"); - hRet = E_FAIL; - break; - default: - EmuLog(LOG_LEVEL::WARNING, "Unknown vertex shader version 0x%02X", pXboxVertexShaderHeader->Version); - hRet = E_FAIL; - break; - } - - if(SUCCEEDED(hRet)) { - static std::string hlsl_template = - #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string - ; - - auto hlsl_stream = std::stringstream(); - - for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { - VSH_SHADER_INSTRUCTION Inst; - - VshParseInstruction((uint32_t*)pToken, &Inst); - VshConvertToIntermediate(&Inst, pShader); - EOI = Inst.Final; - } - - // The size of the shader is - *pXboxFunctionSize = (intptr_t)pToken - (intptr_t)pXboxFunction; - - // Do not attempt to compile empty shaders - if (pShader->IntermediateCount == 0) { - // This is a declaration only shader, so there is no function to recompile - *pbUseDeclarationOnly = 1; - return D3D_OK; - } - - BuildShader(hlsl_stream, pShader); - std::string hlsl_str = hlsl_stream.str(); - hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str); - - DbgVshPrintf("--- HLSL conversion ---\n"); - DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); - DbgVshPrintf("-----------------------\n"); - - hRet = D3DCompile( - hlsl_str.c_str(), - hlsl_str.length(), - nullptr, // pSourceName - nullptr, // pDefines - nullptr, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - "vs_3_0", // shader profile - 0, // flags1 - 0, // flags2 - ppRecompiledShader, // out - &pErrors // ppErrorMsgs out - ); - if (FAILED(hRet)) { - EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); - } - - if (pErrors) { - // Determine the log level - auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; - // Log HLSL compiler errors - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - } - } - - free(pShader); - - return hRet; -} - extern void FreeVertexDynamicPatch(CxbxVertexShader *pVertexShader) { pVertexShader->VertexShaderInfo.NumberOfVertexStreams = 0; @@ -1923,10 +1793,9 @@ void CxbxImpl_SelectVertexShaderDirect // HLSL outputs -void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) +static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) { - static const char* OReg_Name[] = - { + static const char* OReg_Name[/*VSH_OREG_NAME*/] = { "oPos", "???", "???", @@ -1973,7 +1842,7 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) if (dest.Mask[3]) hlsl << "w"; } -void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) +static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) { // Print functions static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { @@ -1999,8 +1868,7 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) // Only display the offset if it's not 0. if (register_number != 0) { hlsl << "c[a0.x+" << register_number << "]"; - } - else { + } else { hlsl << "c[a0.x]"; } } else { @@ -2037,10 +1905,10 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) } } -void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) +static void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) { // HLSL strings for all MAC opcodes, indexed with VSH_MAC - static std::string VSH_MAC_HLSL[] = { + static std::string VSH_MAC_HLSL[/*VSH_MAC*/] = { /*MAC_NOP:*/"", /*MAC_MOV:*/"x_mov", /*MAC_MUL:*/"x_mul", @@ -2060,7 +1928,7 @@ void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) }; // HLSL strings for all ILU opcodes, indexed with VSH_ILU - static std::string VSH_ILU_HLSL[] = { + static std::string VSH_ILU_HLSL[/*VSH_ILU*/] = { /*ILU_NOP:*/"", /*ILU_MOV:*/"x_mov", /*ILU_RCP:*/"x_rcp", @@ -2094,7 +1962,152 @@ void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) ParameterHlsl(hlsl, xboxInstruction.Parameters[i]); } } + hlsl << ");"; } } } + +std::string DebugPrependLineNumbers(std::string shaderString) { + std::stringstream shader(shaderString); + auto debugShader = std::stringstream(); + + int i = 1; + for (std::string line; std::getline(shader, line); ) { + auto lineNumber = std::to_string(i++); + auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' '); + debugShader << "/* " << paddedLineNumber << " */ " << line << "\n"; + } + + return debugShader.str(); +} + +// recompile xbox vertex shader function +extern HRESULT EmuRecompileVshFunction +( + DWORD* pXboxFunction, + bool bNoReservedConstants, + D3DVERTEXELEMENT* pRecompiledDeclaration, + bool* pbUseDeclarationOnly, + DWORD* pXboxFunctionSize, + ID3DBlob** ppRecompiledShader +) +{ + XTL::X_VSH_SHADER_HEADER* pXboxVertexShaderHeader = (XTL::X_VSH_SHADER_HEADER*)pXboxFunction; + DWORD* pToken; + boolean EOI = false; + VSH_XBOX_SHADER* pShader = (VSH_XBOX_SHADER*)calloc(1, sizeof(VSH_XBOX_SHADER)); + ID3DBlob* pErrors = nullptr; + HRESULT hRet = 0; + + // TODO: support this situation.. + if (pXboxFunction == xbnullptr) + return E_FAIL; + + // Initialize output arguments to zero + *pbUseDeclarationOnly = 0; + *pXboxFunctionSize = 0; + *ppRecompiledShader = nullptr; + + if (!pShader) { + EmuLog(LOG_LEVEL::WARNING, "Couldn't allocate memory for vertex shader conversion buffer"); + return E_OUTOFMEMORY; + } + + pShader->ShaderHeader = *pXboxVertexShaderHeader; + switch (pXboxVertexShaderHeader->Version) { + case VERSION_XVS: + break; + case VERSION_XVSS: + EmuLog(LOG_LEVEL::WARNING, "Might not support vertex state shaders?"); + hRet = E_FAIL; + break; + case VERSION_XVSW: + EmuLog(LOG_LEVEL::WARNING, "Might not support vertex read/write shaders?"); + hRet = E_FAIL; + break; + default: + EmuLog(LOG_LEVEL::WARNING, "Unknown vertex shader version 0x%02X", pXboxVertexShaderHeader->Version); + hRet = E_FAIL; + break; + } + + if (SUCCEEDED(hRet)) { + static std::string hlsl_template = + #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string + ; + + auto hlsl_stream = std::stringstream(); + + for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { + VSH_SHADER_INSTRUCTION Inst; + + VshParseInstruction((uint32_t*)pToken, &Inst); + VshConvertToIntermediate(&Inst, pShader); + EOI = Inst.Final; + } + + // The size of the shader is + *pXboxFunctionSize = (intptr_t)pToken - (intptr_t)pXboxFunction; + + // Do not attempt to compile empty shaders + if (pShader->IntermediateCount == 0) { + // This is a declaration only shader, so there is no function to recompile + *pbUseDeclarationOnly = 1; + return D3D_OK; + } + + BuildShader(hlsl_stream, pShader); + std::string hlsl_str = hlsl_stream.str(); + hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str); + + DbgVshPrintf("--- HLSL conversion ---\n"); + DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); + DbgVshPrintf("-----------------------\n"); + + hRet = D3DCompile( + hlsl_str.c_str(), + hlsl_str.length(), + nullptr, // pSourceName + nullptr, // pDefines + nullptr, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + "vs_3_0", // shader profile + 0, // flags1 + 0, // flags2 + ppRecompiledShader, // out + &pErrors // ppErrorMsgs out + ); + if (FAILED(hRet)) { + EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); + } + + // Determine the log level + auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; + if (pErrors) { + // Log HLSL compiler errors + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); + pErrors->Release(); + pErrors = nullptr; + } + + if (!FAILED(hRet)) { + // Log disassembly + hRet = D3DDisassemble( + (*ppRecompiledShader)->GetBufferPointer(), + (*ppRecompiledShader)->GetBufferSize(), + D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, + NULL, + &pErrors + ); + if (pErrors) { + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); + pErrors->Release(); + } + } + } + + free(pShader); + + return hRet; +} From ba5d25e2ed0858708460335240241bbcf4981817 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Thu, 12 Dec 2019 00:19:20 +0100 Subject: [PATCH 42/77] Only disassemble HLSL under debug log level Fix arl by forcing .x mask on a0 parameter --- src/core/hle/D3D8/XbVertexShader.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index b8a9a9c68..164c6f211 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1826,7 +1826,8 @@ static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) hlsl << OReg_Name[dest.Address]; break; case IMD_OUTPUT_A0X: - hlsl << "a0"; // Is this actually a valid output? + hlsl << "a0"; + dest.Mask[0] = true; // force a0.x break; default: assert(false); @@ -2091,6 +2092,8 @@ extern HRESULT EmuRecompileVshFunction pErrors = nullptr; } + LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) + if (g_bPrintfOn) if (!FAILED(hRet)) { // Log disassembly hRet = D3DDisassemble( From 136083d72a5b0eb452d1761d5606c713826f0d9e Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Thu, 12 Dec 2019 21:54:31 +1300 Subject: [PATCH 43/77] SetVertexData4f support --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 30 ++++++------ src/core/hle/D3D8/Direct3D9/Xb.hlsl | 59 ++++++++++++++--------- src/core/hle/D3D8/XbD3D8Types.h | 14 ++---- src/core/hle/D3D8/XbVertexShader.cpp | 11 +++-- src/core/hle/D3D8/XbVertexShader.h | 1 + 5 files changed, 67 insertions(+), 48 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 8a8af1349..70324d22b 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -489,6 +489,17 @@ const char *CxbxGetErrorDescription(HRESULT hResult) return nullptr; } +// TODO move to shader file. Needs to be called whenever a shader or declaration is set +void SetOverrideFlags(CxbxVertexShader* pCxbxVertexShader) { + if (pCxbxVertexShader != nullptr && pCxbxVertexShader->pHostVertexShader != nullptr) { + float overrideFlags[16]; + for (int i = 0; i < 16; i++) { + overrideFlags[i] = !pCxbxVertexShader->VertexShaderInfo.vRegisterInDeclaration[i]; + } + g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE, overrideFlags, 4); + } +} + const char *D3DErrorString(HRESULT hResult) { static char buffer[1024]; @@ -3452,6 +3463,8 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SelectVertexShader) pHostVertexDeclaration = pCxbxVertexShader->pHostVertexDeclaration; pHostVertexShader = pCxbxVertexShader->pHostVertexShader; HostFVF = pCxbxVertexShader->HostFVF; + + SetOverrideFlags(pCxbxVertexShader); } hRet = g_pD3DDevice->SetVertexDeclaration(pHostVertexDeclaration); @@ -3808,15 +3821,10 @@ void UpdateViewPortOffsetAndScaleConstants() { float vOffset[4], vScale[4]; GetViewPortOffsetAndScale(vOffset, vScale); - float vScaleReversed[4] = { 1.0f / (double)vScale[0], 1.0f / (double)vScale[1], 1.0f / (double)vScale[2], 0 }; - g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_SCALE_MIRROR_INVERTED, vScaleReversed, 1); + g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_SCALE_MIRROR, vScale, 1); g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_OFFSET_MIRROR, vOffset, 1); - // Set 0 and 1 constant, used to compare and transform W when required - float ZeroOne[] = { 0, 1, 0, 0 }; - g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_SCALE_ZERO_ONE, ZeroOne, 1); - // Store viewport offset and scale in constant registers 58 (c-38) and // 59 (c-37) used for screen space transformation. // We only do this if X_D3DSCM_NORESERVEDCONSTANTS is not set, since enabling this flag frees up these registers for shader used @@ -6674,18 +6682,12 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetVertexShader) } else { + SetOverrideFlags(pCxbxVertexShader); + hRet = g_pD3DDevice->SetVertexShader(pCxbxVertexShader->pHostVertexShader); DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader(VshHandleIsVertexShader)"); } - // Set default constant values for specular, diffuse, etc - static const float ColorBlack[4] = { 0,0,0,0 }; - static const float ColorWhite[4] = { 1,1,1,1 }; - - g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + XTL::X_D3DVSDE_DIFFUSE, ColorWhite, 1); - g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + XTL::X_D3DVSDE_BACKDIFFUSE, ColorWhite, 1); - g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + XTL::X_D3DVSDE_SPECULAR, ColorBlack, 1); - g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + XTL::X_D3DVSDE_BACKSPECULAR, ColorBlack, 1); } else { hRet = g_pD3DDevice->SetVertexShader(nullptr); DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index ee5f4aebf..563d7aeab 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -25,6 +25,13 @@ struct VS_OUTPUT // Xbox constant registers extern uniform float4 c[192] : register(c0); +// Vertex input overrides for SetVertexData4f support +extern float4 vOverrideValue[16] : register(c192); +extern float4 vOverridePacked[4] : register(c208); + +extern float4 xboxViewportScale : register(c212); +extern float4 xboxViewportOffset : register(c213); + // Overloaded casts, assuring all inputs are treated as float4 float4 _tof4(float src) { return float4(src, src, src, src); } float4 _tof4(float2 src) { return src.xyyy; } @@ -176,16 +183,16 @@ float _rcc(float input) float4 reverseScreenspaceTransform(float4 oPos) { // On Xbox, oPos should contain the vertex position in screenspace + // We need to reverse this transformation // Conventionally, each Xbox Vertex Shader includes instructions like this // mul oPos.xyz, r12, c-38 // +rcc r1.x, r12.w // mad oPos.xyz, r12, r1.x, c-37 // where c-37 and c-38 are reserved transform values - // Lets hope c-37 and c-38 contain the conventional values - oPos.xyz -= (float3)c[-37 + 96]; // reverse offset + oPos.xyz -= xboxViewportOffset.xyz; // reverse offset oPos.xyz *= oPos.w; // reverse perspective divide - oPos.xyz /= (float3)c[-38 + 96]; // reverse scale + oPos.xyz /= xboxViewportScale.xyz; // reverse scale return oPos; } @@ -208,25 +215,33 @@ VS_OUTPUT main(const VS_INPUT xIn) r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 0); #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox - // Input registers - float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - // Initialize input variables - v0 = xIn.v[0]; - v1 = xIn.v[1]; - v2 = xIn.v[2]; - v3 = xIn.v[3]; - v4 = xIn.v[4]; - v5 = xIn.v[5]; - v6 = xIn.v[6]; - v7 = xIn.v[7]; - v8 = xIn.v[8]; - v9 = xIn.v[9]; - v10 = xIn.v[10]; - v11 = xIn.v[11]; - v12 = xIn.v[12]; - v13 = xIn.v[13]; - v14 = xIn.v[14]; - v15 = xIn.v[15]; + // Input registerss + float4 v[16]; + # define v0 v[0] + # define v1 v[1] + # define v2 v[2] + # define v3 v[3] + # define v4 v[4] + # define v5 v[5] + # define v6 v[6] + # define v7 v[7] + # define v8 v[8] + # define v9 v[9] + # define v10 v[10] + # define v11 v[11] + # define v12 v[12] + # define v13 v[13] + # define v14 v[14] + # define v15 v[15] + + // View 4 packed overrides as an array of 16 floats + float vOverride[16] = (float[16])vOverridePacked; + + // Initialize input registers from the vertex buffer + // Or use an override value set with SetVertexData4f + for(uint i = 0; i < 16; i++){ + v[i] = vOverride[i] ? vOverrideValue[i] : xIn.v[i]; + } // Xbox shader program // diff --git a/src/core/hle/D3D8/XbD3D8Types.h b/src/core/hle/D3D8/XbD3D8Types.h index a217d11f2..51b662dda 100644 --- a/src/core/hle/D3D8/XbD3D8Types.h +++ b/src/core/hle/D3D8/XbD3D8Types.h @@ -988,17 +988,13 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; #define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of 96..95) #define X_D3DVS_CONSTREG_COUNT 192 -#define X_D3DVS_RESERVED_CONSTANT1 -38 // Becomes 58 after correction, contains Scale v -#define X_D3DVS_RESERVED_CONSTANT2 -37 // Becomes 59 after correction, contains Offset -#define X_D3DVS_RESERVED_CONSTANT1_CORRECTED (X_D3DVS_RESERVED_CONSTANT1 + X_D3DVS_CONSTREG_BIAS) -#define X_D3DVS_RESERVED_CONSTANT2_CORRECTED (X_D3DVS_RESERVED_CONSTANT2 + X_D3DVS_CONSTREG_BIAS) // Special Registers, used to pass additional information to the shaders -#define X_D3DVS_CONSTREG_VERTEXDATA4F_BASE (X_D3DVS_CONSTREG_COUNT + 1) -#define X_D3DVS_CONSTREG_VERTEXDATA4F_END (X_D3DVS_CONSTREG_VERTEXDATA4F_BASE + 14) -#define X_D3DVS_VIEWPORT_SCALE_MIRROR_INVERTED (X_D3DVS_CONSTREG_VERTEXDATA4F_END + 1) -#define X_D3DVS_VIEWPORT_OFFSET_MIRROR (X_D3DVS_VIEWPORT_SCALE_MIRROR_INVERTED + 1) -#define X_D3DVS_VIEWPORT_SCALE_ZERO_ONE (X_D3DVS_VIEWPORT_OFFSET_MIRROR + 1) +// TODO co-locate shader workaround constants with shader code +#define X_D3DVS_CONSTREG_VERTEXDATA4F_BASE (X_D3DVS_CONSTREG_COUNT) +#define X_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE (X_D3DVS_CONSTREG_VERTEXDATA4F_BASE + 16) +#define X_D3DVS_VIEWPORT_SCALE_MIRROR (X_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE + 4) +#define X_D3DVS_VIEWPORT_OFFSET_MIRROR (X_D3DVS_VIEWPORT_SCALE_MIRROR + 1) #define X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION) #define X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 164c6f211..9bbf82fda 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -302,7 +302,7 @@ typedef struct _VSH_XBOX_SHADER } VSH_XBOX_SHADER; -// TODO : Reinstate and use : std::array RegVIsPresentInDeclaration; +std::array RegVIsPresentInDeclaration; /* TODO : map non-FVF Xbox vertex shader handle to CxbxVertexShader (a struct containing a host Xbox vertex shader handle and the original members) std::unordered_map g_CxbxVertexShaders; @@ -1290,7 +1290,7 @@ private: } // Add this register to the list of declared registers - // TODO : Reinstate and use : RegVIsPresentInDeclaration[VertexRegister] = true; + RegVIsPresentInDeclaration[VertexRegister] = true; DWORD XboxVertexElementDataType = (*pXboxToken & X_D3DVSD_DATATYPEMASK) >> X_D3DVSD_DATATYPESHIFT; WORD XboxVertexElementByteSize = 0; @@ -1617,7 +1617,7 @@ public: IsFixedFunction = bIsFixedFunction; - // TODO : Reinstate and use : RegVIsPresentInDeclaration.fill(false); + RegVIsPresentInDeclaration.fill(false); // First of all some info: // We have to figure out which flags are set and then @@ -1661,6 +1661,11 @@ public: // Free the preprocessed declaration copy free(pXboxVertexDeclarationCopy); + for (int i = 0; i < RegVIsPresentInDeclaration.size(); i++) { + pCxbxVertexShaderInfo->vRegisterInDeclaration[i] = RegVIsPresentInDeclaration[i]; + EmuLog(LOG_LEVEL::DEBUG, "Vertex regs used: v%d %d", i, pCxbxVertexShaderInfo->vRegisterInDeclaration[i]); + } + return Result; } }; diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index 36b963a49..43e9b1b06 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -71,6 +71,7 @@ typedef struct _CxbxVertexShaderInfo { UINT NumberOfVertexStreams; // The number of streams the vertex shader uses CxbxVertexShaderStreamInfo VertexStreams[X_VSH_MAX_STREAMS]; + bool vRegisterInDeclaration[16]; } CxbxVertexShaderInfo; From b9db3017914d49ca2bf88619ce0e03622696ab54 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Thu, 12 Dec 2019 22:09:34 +1300 Subject: [PATCH 44/77] Add LOG_TEST_CASE for writes to constants --- src/core/hle/D3D8/XbVertexShader.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 9bbf82fda..9a34763d0 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1822,6 +1822,7 @@ static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) switch (dest.Type) { case IMD_OUTPUT_C: hlsl << "c[" << dest.Address << "]"; + LOG_TEST_CASE("Vertex shader writes to constant table"); break; case IMD_OUTPUT_R: hlsl << "r" << dest.Address; From 5d2ef854fb096df76000b738fceb47b0faddc0a0 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Thu, 12 Dec 2019 22:42:38 +1300 Subject: [PATCH 45/77] Remove popups about register usage, now that HLSL is used --- src/core/hle/D3D8/XbVertexShader.cpp | 23 +---------------------- src/core/hle/D3D8/XbVertexShader.h | 2 -- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 9a34763d0..09ad36ded 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -939,21 +939,12 @@ private: inline DWORD VshGetVertexRegister(DWORD XboxToken) { DWORD regNum = (XboxToken & X_D3DVSD_VERTEXREGMASK) >> X_D3DVSD_VERTEXREGSHIFT; - if (regNum >= hostTemporaryRegisterCount /*12 for D3D8, D3D9 value depends on host GPU */) { - // test-case : BLiNX: the time sweeper - // test-case : Lego Star Wars - LOG_TEST_CASE("RegNum > NumTemps"); - } return regNum; } inline DWORD VshGetVertexRegisterIn(DWORD XboxToken) { DWORD regNum = (XboxToken & X_D3DVSD_VERTEXREGINMASK) >> X_D3DVSD_VERTEXREGINSHIFT; - if (regNum >= hostTemporaryRegisterCount /*12 for D3D8, D3D9 value depends on host GPU */) { - // test-case : Lego Star Wars - LOG_TEST_CASE("RegNum > NumTemps"); - } return regNum; } @@ -1601,19 +1592,7 @@ public: // Get a preprocessed copy of the original Xbox Vertex Declaration auto pXboxVertexDeclarationCopy = RemoveXboxDeclarationRedefinition(pXboxDeclaration); - pVertexShaderInfoToSet = pCxbxVertexShaderInfo; - hostTemporaryRegisterCount = g_D3DCaps.VS20Caps.NumTemps; - if (hostTemporaryRegisterCount < VSH_MIN_TEMPORARY_REGISTERS) { - LOG_TEST_CASE("g_D3DCaps.VS20Caps.NumTemps < 12 (Host minimal vertex shader temporary register count)"); - } - if (hostTemporaryRegisterCount < 12+1) { // TODO : Use a constant (see X_D3DVSD_REG) - LOG_TEST_CASE("g_D3DCaps.VS20Caps.NumTemps < 12+1 (Xbox vertex shader temporary register count + r12, reading oPos)"); - } - - // Note, that some Direct3D 9 drivers return only the required minimum temporary register count of 12, - // but regardless, shaders that use temporary register numbers above r12 still seem to work correctly. - // So it seems we can't rely on VS20Caps.NumTemps indicating accurately what host hardware supports. - // (Although it could be that the driver switches to software vertex processing when a shader exceeds hardware limits.) + pVertexShaderInfoToSet = pCxbxVertexShaderInfo; IsFixedFunction = bIsFixedFunction; diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index 43e9b1b06..0d9f7ea11 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -30,8 +30,6 @@ #include "core\hle\D3D8\XbD3D8Types.h" // for X_VSH_MAX_ATTRIBUTES // Host vertex shader counts -#define VSH_MIN_TEMPORARY_REGISTERS 12 // Equal to D3DCAPS9.VS20Caps.NumTemps (at least 12 for vs_2_x) - https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx9-graphics-reference-asm-vs-registers-vs-2-x -#define VSH_MAX_TEMPORARY_REGISTERS 32 #define VSH_MAX_INTERMEDIATE_COUNT 1024 // The maximum number of intermediate format slots #define VSH_VS11_MAX_INSTRUCTION_COUNT 128 #define VSH_VS2X_MAX_INSTRUCTION_COUNT 256 From 619b276d81ccde0474673805745cd1ddba403d64 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Thu, 12 Dec 2019 22:58:01 +1300 Subject: [PATCH 46/77] Generate more accurate viewport values, so we can reverse transforms in the vertex shader more generally --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 58 +++++++++++++++++++---- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 70324d22b..7d73c74cd 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -3806,15 +3806,57 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4]) float scaleZ = zScale * (ViewPort.MaxZ - ViewPort.MinZ); float offsetZ = zScale * ViewPort.MinZ; - vOffset[0] = offsetWidth + ViewPort.X; - vOffset[1] = offsetHeight + ViewPort.Y; - vOffset[2] = offsetZ; - vOffset[3] = 0.0f; + // TODO will we need to do something here to support upscaling? + // TODO remove the code above as required - vScale[0] = scaleWidth; - vScale[1] = scaleHeight; - vScale[2] = scaleZ; - vScale[3] = 0.0f; + // Default scale and offset. + // Multisample state will affect these + float xScale = 1; + float yScale = 1; + float xOffset = 0.5; + float yOffset = 0.5; + + // MULTISAMPLE options have offset of 0 + // Various sample sizes have various x and y scales + switch (g_EmuCDPD.XboxPresentationParameters.MultiSampleType) + { + case XTL::X_D3DMULTISAMPLE_2_SAMPLES_MULTISAMPLE_LINEAR: + case XTL::X_D3DMULTISAMPLE_2_SAMPLES_MULTISAMPLE_QUINCUNX: + case XTL::X_D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_LINEAR: + case XTL::X_D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_GAUSSIAN: + xOffset = yOffset = 0; + break; + case XTL::X_D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_HORIZONTAL_LINEAR: + xScale = 2; + break; + case XTL::X_D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_VERTICAL_LINEAR: + yScale = 2; + break; + case XTL::X_D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_LINEAR: + case XTL::X_D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_GAUSSIAN: + xScale = yScale = 2; + break; + case XTL::X_D3DMULTISAMPLE_9_SAMPLES_MULTISAMPLE_GAUSSIAN: + xScale = yScale = 1.5f; + xOffset = yOffset = 0; + break; + case XTL::X_D3DMULTISAMPLE_9_SAMPLES_SUPERSAMPLE_GAUSSIAN: + xScale = yScale = 3.0f; + break; + } + + + // Offset with OGL pixel correction (?) TODO verify + vOffset[0] = xOffset + (2.0f / ViewPort.Width); + vOffset[1] = yOffset + (2.0f / ViewPort.Height); + vOffset[2] = 0; //offsetZ; + vOffset[3] = 0.0f; + + // Scale + vScale[0] = xScale * ViewPort.Width; + vScale[1] = yScale * ViewPort.Height; + vScale[2] = scaleZ; // ? + vScale[3] = 1.0f; // ? } void UpdateViewPortOffsetAndScaleConstants() From 50a51657b0c1bd46f738f2cec265f50f98826551 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Thu, 12 Dec 2019 14:36:14 +0100 Subject: [PATCH 47/77] Force A0's X mask during decoding (in VshAddInstructionMAC_ARL) instead of HLSL conversion (in OutputHlsl) Also applied Unix EOL style (again) --- src/core/hle/D3D8/XbVertexShader.cpp | 170 +++++++++++++-------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 09ad36ded..bfa816d8a 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -250,7 +250,7 @@ typedef struct _VSH_SHADER_INSTRUCTION VSH_PARAMETER A; VSH_PARAMETER B; VSH_PARAMETER C; - boolean a0x; + boolean a0x; boolean Final; } VSH_SHADER_INSTRUCTION; @@ -382,7 +382,7 @@ static uint8_t VshGetField(uint32_t *pShaderToken, // Final instruction { 3, 0, 1 } // FLD_FINAL, }; - + return (uint8_t)(VshGetFromToken(pShaderToken, FieldMapping[FieldName].SubToken, FieldMapping[FieldName].StartBit, @@ -398,7 +398,7 @@ static inline int16_t ConvertCRegister(const int16_t CReg) static void VshParseInstruction(uint32_t *pShaderToken, VSH_SHADER_INSTRUCTION *pInstruction) { - // First get the instruction(s). + // First get the instruction(s). pInstruction->ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); pInstruction->MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); @@ -471,14 +471,14 @@ static void VshParseInstruction(uint32_t *pShaderToken, pInstruction->C.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_W); // Get output // Output register - pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB); + pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB); switch(pInstruction->Output.OutputType) { case OUTPUT_C: pInstruction->Output.OutputAddress = ConvertCRegister(VshGetField(pShaderToken, FLD_OUT_ADDRESS)); break; case OUTPUT_O: - pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF; + pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF; break; } pInstruction->Output.OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX); @@ -499,8 +499,8 @@ static void VshParseInstruction(uint32_t *pShaderToken, pInstruction->Output.ILURMask[3] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_W); pInstruction->Output.ILURAddress = VshGetField(pShaderToken, FLD_OUT_R); // Finally, get a0.x indirect constant addressing - pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); - pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL); + pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); + pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL); } static inline int VshIsMaskInUse(const boolean* pMask) @@ -510,14 +510,14 @@ static inline int VshIsMaskInUse(const boolean* pMask) static inline boolean VshInstrWritesToMAC_R(VSH_SHADER_INSTRUCTION* pInstruction) { - return VshIsMaskInUse(pInstruction->Output.MACRMask) + return VshIsMaskInUse(pInstruction->Output.MACRMask) && pInstruction->MAC != MAC_NOP; } static inline boolean VshInstrWritesToMAC_O(VSH_SHADER_INSTRUCTION* pInstruction) { - return VshIsMaskInUse(pInstruction->Output.OutputMask) - && pInstruction->Output.OutputMux == OMUX_MAC + return VshIsMaskInUse(pInstruction->Output.OutputMask) + && pInstruction->Output.OutputMux == OMUX_MAC && pInstruction->MAC != MAC_NOP; } @@ -530,14 +530,14 @@ static inline boolean VshInstrWritesToMAC_ARL(VSH_SHADER_INSTRUCTION* pInstructi static inline boolean VshInstrWritesToILU_R(VSH_SHADER_INSTRUCTION* pInstruction) { - return VshIsMaskInUse(pInstruction->Output.ILURMask) + return VshIsMaskInUse(pInstruction->Output.ILURMask) && pInstruction->ILU != ILU_NOP; } static inline boolean VshInstrWritesToILU_O(VSH_SHADER_INSTRUCTION* pInstruction) { - return VshIsMaskInUse(pInstruction->Output.OutputMask) - && pInstruction->Output.OutputMux == OMUX_ILU + return VshIsMaskInUse(pInstruction->Output.OutputMask) + && pInstruction->Output.OutputMux == OMUX_ILU && pInstruction->ILU != ILU_NOP; } @@ -556,7 +556,7 @@ static void VshAddParameters(VSH_SHADER_INSTRUCTION *pInstruction, VSH_IMD_PARAMETER *pParameters) { uint8_t ParamCount = 0; - + if(MAC >= MAC_MOV) { VshAddParameter(&pInstruction->A, pInstruction->a0x, &pParameters[ParamCount]); @@ -597,7 +597,7 @@ static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader, boolean IsCombined) { - VSH_INTERMEDIATE_FORMAT *pIntermediate; + VSH_INTERMEDIATE_FORMAT *pIntermediate; if(!VshInstrWritesToMAC_R(pInstruction)) { return FALSE; @@ -625,7 +625,7 @@ static boolean VshAddInstructionMAC_O(VSH_SHADER_INSTRUCTION* pInstruction, VSH_XBOX_SHADER *pShader, boolean IsCombined) { - VSH_INTERMEDIATE_FORMAT *pIntermediate; + VSH_INTERMEDIATE_FORMAT *pIntermediate; if(!VshInstrWritesToMAC_O(pInstruction)) { return FALSE; @@ -653,7 +653,7 @@ static boolean VshAddInstructionMAC_ARL(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader, boolean IsCombined) { - VSH_INTERMEDIATE_FORMAT *pIntermediate; + VSH_INTERMEDIATE_FORMAT *pIntermediate; if(!VshInstrWritesToMAC_ARL(pInstruction)) { return FALSE; @@ -669,6 +669,7 @@ static boolean VshAddInstructionMAC_ARL(VSH_SHADER_INSTRUCTION *pInstruction, // Output param pIntermediate->Output.Type = IMD_OUTPUT_A0X; pIntermediate->Output.Address = pInstruction->Output.OutputAddress; + pIntermediate->Output.Mask[0] = true; // force a0.x // Other parameters VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); @@ -680,7 +681,7 @@ static boolean VshAddInstructionILU_R(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader, boolean IsCombined) { - VSH_INTERMEDIATE_FORMAT *pIntermediate; + VSH_INTERMEDIATE_FORMAT *pIntermediate; if(!VshInstrWritesToILU_R(pInstruction)) { return FALSE; @@ -709,7 +710,7 @@ static boolean VshAddInstructionILU_O(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader, boolean IsCombined) { - VSH_INTERMEDIATE_FORMAT *pIntermediate; + VSH_INTERMEDIATE_FORMAT *pIntermediate; if(!VshInstrWritesToILU_O(pInstruction)) { return FALSE; @@ -760,14 +761,14 @@ static void VshConvertToIntermediate(VSH_SHADER_INSTRUCTION *pInstruction, IsCombined = TRUE; } } - + if (VshAddInstructionMAC_O(pInstruction, pShader, IsCombined)) { if (VshInstrWritesToILU_R(pInstruction) || VshInstrWritesToILU_O(pInstruction)) { IsCombined = TRUE; } } - + // Special case, arl (mov a0.x, ...) if (VshAddInstructionMAC_ARL(pInstruction, pShader, IsCombined)) { if (VshInstrWritesToILU_R(pInstruction) || @@ -775,13 +776,13 @@ static void VshConvertToIntermediate(VSH_SHADER_INSTRUCTION *pInstruction, IsCombined = TRUE; } } - + if (VshAddInstructionILU_R(pInstruction, pShader, IsCombined)) { if (VshInstrWritesToILU_O(pInstruction)) { IsCombined = TRUE; } } - + (void)VshAddInstructionILU_O(pInstruction, pShader, IsCombined); } @@ -1592,7 +1593,7 @@ public: // Get a preprocessed copy of the original Xbox Vertex Declaration auto pXboxVertexDeclarationCopy = RemoveXboxDeclarationRedefinition(pXboxDeclaration); - pVertexShaderInfoToSet = pCxbxVertexShaderInfo; + pVertexShaderInfoToSet = pCxbxVertexShaderInfo; IsFixedFunction = bIsFixedFunction; @@ -1640,7 +1641,7 @@ public: // Free the preprocessed declaration copy free(pXboxVertexDeclarationCopy); - for (int i = 0; i < RegVIsPresentInDeclaration.size(); i++) { + for (size_t i = 0; i < RegVIsPresentInDeclaration.size(); i++) { pCxbxVertexShaderInfo->vRegisterInDeclaration[i] = RegVIsPresentInDeclaration[i]; EmuLog(LOG_LEVEL::DEBUG, "Vertex regs used: v%d %d", i, pCxbxVertexShaderInfo->vRegisterInDeclaration[i]); } @@ -1777,7 +1778,7 @@ void CxbxImpl_SelectVertexShaderDirect // HLSL outputs -static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) +static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) { static const char* OReg_Name[/*VSH_OREG_NAME*/] = { "oPos", @@ -1800,38 +1801,37 @@ static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) switch (dest.Type) { case IMD_OUTPUT_C: - hlsl << "c[" << dest.Address << "]"; - LOG_TEST_CASE("Vertex shader writes to constant table"); + hlsl << "c[" << dest.Address << "]"; + LOG_TEST_CASE("Vertex shader writes to constant table"); break; case IMD_OUTPUT_R: hlsl << "r" << dest.Address; break; - case IMD_OUTPUT_O: + case IMD_OUTPUT_O: assert(dest.Address < OREG_A0X); - hlsl << OReg_Name[dest.Address]; + hlsl << OReg_Name[dest.Address]; break; case IMD_OUTPUT_A0X: - hlsl << "a0"; - dest.Mask[0] = true; // force a0.x + hlsl << "a0"; break; - default: + default: assert(false); break; } - // Write the mask as a separate argument to the opcode defines + // Write the mask as a separate argument to the opcode defines // (No space, so that "dest,mask, ..." looks close to "dest.mask, ...") - hlsl << ","; - if (dest.Mask[0]) hlsl << "x"; - if (dest.Mask[1]) hlsl << "y"; - if (dest.Mask[2]) hlsl << "z"; + hlsl << ","; + if (dest.Mask[0]) hlsl << "x"; + if (dest.Mask[1]) hlsl << "y"; + if (dest.Mask[2]) hlsl << "z"; if (dest.Mask[3]) hlsl << "w"; } static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) { // Print functions - static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { + static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { "?", // PARAM_UNKNOWN = 0, "r", // PARAM_R, // Temporary (scRatch) registers "v", // PARAM_V, // Vertex registers @@ -1844,19 +1844,19 @@ static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) if (param.Neg) { hlsl << "-"; } - + int register_number = param.Address; - if (param.ParameterType == PARAM_C) { + if (param.ParameterType == PARAM_C) { // Map Xbox [-96, 95] to Host [0, 191] // Account for Xbox's negative constant indexes - register_number += 96; + register_number += 96; if (paramMeta.IndexesWithA0_X) { // Only display the offset if it's not 0. if (register_number != 0) { hlsl << "c[a0.x+" << register_number << "]"; } else { - hlsl << "c[a0.x]"; - } + hlsl << "c[a0.x]"; + } } else { hlsl << "c[" << register_number << "]"; } @@ -1891,7 +1891,7 @@ static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) } } -static void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) +static void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) { // HLSL strings for all MAC opcodes, indexed with VSH_MAC static std::string VSH_MAC_HLSL[/*VSH_MAC*/] = { @@ -1927,33 +1927,33 @@ static void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) for (int i = 0; i < pShader->IntermediateCount; i++) { VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i]; - + std::string str = ""; - if (xboxInstruction.InstructionType == IMD_MAC) { - if (xboxInstruction.MAC > MAC_NOP && xboxInstruction.MAC <= MAC_ARL) { - str = VSH_MAC_HLSL[xboxInstruction.MAC]; + if (xboxInstruction.InstructionType == IMD_MAC) { + if (xboxInstruction.MAC > MAC_NOP && xboxInstruction.MAC <= MAC_ARL) { + str = VSH_MAC_HLSL[xboxInstruction.MAC]; } } else if (xboxInstruction.InstructionType == IMD_ILU) { - if (xboxInstruction.ILU > ILU_NOP) { - str = VSH_ILU_HLSL[xboxInstruction.ILU]; + if (xboxInstruction.ILU > ILU_NOP) { + str = VSH_ILU_HLSL[xboxInstruction.ILU]; } - } - - if (!str.empty()) { - hlsl << "\n " << str << "("; // opcode - OutputHlsl(hlsl, xboxInstruction.Output); + } + + if (!str.empty()) { + hlsl << "\n " << str << "("; // opcode + OutputHlsl(hlsl, xboxInstruction.Output); for (int i = 0; i < 3; i++) { - if (xboxInstruction.Parameters[i].Active) { - hlsl << ", "; + if (xboxInstruction.Parameters[i].Active) { + hlsl << ", "; ParameterHlsl(hlsl, xboxInstruction.Parameters[i]); } - } - - hlsl << ");"; + } + + hlsl << ");"; } } } - + std::string DebugPrependLineNumbers(std::string shaderString) { std::stringstream shader(shaderString); auto debugShader = std::stringstream(); @@ -1994,11 +1994,11 @@ extern HRESULT EmuRecompileVshFunction *pbUseDeclarationOnly = 0; *pXboxFunctionSize = 0; *ppRecompiledShader = nullptr; - + if (!pShader) { EmuLog(LOG_LEVEL::WARNING, "Couldn't allocate memory for vertex shader conversion buffer"); return E_OUTOFMEMORY; - } + } pShader->ShaderHeader = *pXboxVertexShaderHeader; switch (pXboxVertexShaderHeader->Version) { @@ -2022,8 +2022,8 @@ extern HRESULT EmuRecompileVshFunction static std::string hlsl_template = #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string ; - - auto hlsl_stream = std::stringstream(); + + auto hlsl_stream = std::stringstream(); for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { VSH_SHADER_INSTRUCTION Inst; @@ -2044,13 +2044,13 @@ extern HRESULT EmuRecompileVshFunction } BuildShader(hlsl_stream, pShader); - std::string hlsl_str = hlsl_stream.str(); + std::string hlsl_str = hlsl_stream.str(); hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str); DbgVshPrintf("--- HLSL conversion ---\n"); DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); DbgVshPrintf("-----------------------\n"); - + hRet = D3DCompile( hlsl_str.c_str(), hlsl_str.length(), @@ -2064,7 +2064,7 @@ extern HRESULT EmuRecompileVshFunction ppRecompiledShader, // out &pErrors // ppErrorMsgs out ); - if (FAILED(hRet)) { + if (FAILED(hRet)) { EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); } @@ -2073,26 +2073,26 @@ extern HRESULT EmuRecompileVshFunction if (pErrors) { // Log HLSL compiler errors EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - pErrors = nullptr; - } - + pErrors->Release(); + pErrors = nullptr; + } + LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) - if (g_bPrintfOn) - if (!FAILED(hRet)) { + if (g_bPrintfOn) + if (!FAILED(hRet)) { // Log disassembly - hRet = D3DDisassemble( - (*ppRecompiledShader)->GetBufferPointer(), - (*ppRecompiledShader)->GetBufferSize(), - D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, - NULL, - &pErrors - ); + hRet = D3DDisassemble( + (*ppRecompiledShader)->GetBufferPointer(), + (*ppRecompiledShader)->GetBufferSize(), + D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, + NULL, + &pErrors + ); if (pErrors) { EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - } - } + pErrors->Release(); + } + } } free(pShader); From d74e5947ee524db0a72fc3328aabd5070c7b72c0 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Thu, 12 Dec 2019 17:35:25 +0100 Subject: [PATCH 48/77] Refactor vertex shader microcode conversion to intermediate isntructions, fixing pairing bugs like ignore MAC R1 writes --- src/core/hle/D3D8/XbVertexShader.cpp | 356 ++++++--------------------- 1 file changed, 78 insertions(+), 278 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index bfa816d8a..363f9d152 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -61,10 +61,10 @@ VSH_SWIZZLE; typedef DWORD DxbxMask, *PDxbxMask; -#define MASK_X 0x001 -#define MASK_Y 0x002 -#define MASK_Z 0x004 -#define MASK_W 0x008 +#define MASK_X 0x008 +#define MASK_Y 0x004 +#define MASK_Z 0x002 +#define MASK_W 0x001 #define MASK_XYZ MASK_X | MASK_Y | MASK_Z #define MASK_XYZW MASK_X | MASK_Y | MASK_Z | MASK_W @@ -101,19 +101,10 @@ typedef enum _VSH_FIELD_NAME FLD_C_R_LOW, FLD_C_MUX, // Output - FLD_OUT_MAC_MASK_X, - FLD_OUT_MAC_MASK_Y, - FLD_OUT_MAC_MASK_Z, - FLD_OUT_MAC_MASK_W, + FLD_OUT_MAC_MASK, FLD_OUT_R, - FLD_OUT_ILU_MASK_X, - FLD_OUT_ILU_MASK_Y, - FLD_OUT_ILU_MASK_Z, - FLD_OUT_ILU_MASK_W, - FLD_OUT_O_MASK_X, - FLD_OUT_O_MASK_Y, - FLD_OUT_O_MASK_Z, - FLD_OUT_O_MASK_W, + FLD_OUT_ILU_MASK, + FLD_OUT_O_MASK, FLD_OUT_ORB, FLD_OUT_ADDRESS, FLD_OUT_MUX, @@ -230,14 +221,14 @@ typedef struct _VSH_OUTPUT // Output register VSH_OUTPUT_MUX OutputMux; // MAC or ILU used as output VSH_OUTPUT_TYPE OutputType; // C or O - boolean OutputMask[4]; + int8_t OutputMask; int16_t OutputAddress; - // MAC output R register - boolean MACRMask[4]; - int16_t MACRAddress; - // ILU output R register - boolean ILURMask[4]; - int16_t ILURAddress; + // MAC output Mask + int8_t MACRMask; + // ILU output mask + int8_t ILURMask; + // MAC,ILU output R register + int16_t RAddress; } VSH_OUTPUT; @@ -265,8 +256,8 @@ VSH_IMD_INSTRUCTION_TYPE; typedef struct _VSH_IMD_OUTPUT { VSH_IMD_OUTPUT_TYPE Type; - boolean Mask[4]; int16_t Address; + int8_t Mask; } VSH_IMD_OUTPUT; @@ -284,8 +275,6 @@ VSH_IMD_PARAMETER; typedef struct _VSH_INTERMEDIATE_FORMAT { - - boolean IsCombined; VSH_IMD_INSTRUCTION_TYPE InstructionType; VSH_MAC MAC; VSH_ILU ILU; @@ -361,19 +350,10 @@ static uint8_t VshGetField(uint32_t *pShaderToken, { 3, 30, 2 }, // FLD_C_R_LOW, { 3, 28, 2 }, // FLD_C_MUX, // Output - { 3, 27, 1 }, // FLD_OUT_MAC_MASK_X, - { 3, 26, 1 }, // FLD_OUT_MAC_MASK_Y, - { 3, 25, 1 }, // FLD_OUT_MAC_MASK_Z, - { 3, 24, 1 }, // FLD_OUT_MAC_MASK_W, + { 3, 24, 4 }, // FLD_OUT_MAC_MASK, { 3, 20, 4 }, // FLD_OUT_R, - { 3, 19, 1 }, // FLD_OUT_ILU_MASK_X, - { 3, 18, 1 }, // FLD_OUT_ILU_MASK_Y, - { 3, 17, 1 }, // FLD_OUT_ILU_MASK_Z, - { 3, 16, 1 }, // FLD_OUT_ILU_MASK_W, - { 3, 15, 1 }, // FLD_OUT_O_MASK_X, - { 3, 14, 1 }, // FLD_OUT_O_MASK_Y, - { 3, 13, 1 }, // FLD_OUT_O_MASK_Z, - { 3, 12, 1 }, // FLD_OUT_O_MASK_W, + { 3, 16, 4 }, // FLD_OUT_ILU_MASK, + { 3, 12, 4 }, // FLD_OUT_O_MASK, { 3, 11, 1 }, // FLD_OUT_ORB, { 3, 3, 8 }, // FLD_OUT_ADDRESS, { 3, 2, 1 }, // FLD_OUT_MUX, @@ -482,65 +462,15 @@ static void VshParseInstruction(uint32_t *pShaderToken, break; } pInstruction->Output.OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX); - pInstruction->Output.OutputMask[0] = VshGetField(pShaderToken, FLD_OUT_O_MASK_X); - pInstruction->Output.OutputMask[1] = VshGetField(pShaderToken, FLD_OUT_O_MASK_Y); - pInstruction->Output.OutputMask[2] = VshGetField(pShaderToken, FLD_OUT_O_MASK_Z); - pInstruction->Output.OutputMask[3] = VshGetField(pShaderToken, FLD_OUT_O_MASK_W); - // MAC output - pInstruction->Output.MACRMask[0] = VshGetField(pShaderToken, FLD_OUT_MAC_MASK_X); - pInstruction->Output.MACRMask[1] = VshGetField(pShaderToken, FLD_OUT_MAC_MASK_Y); - pInstruction->Output.MACRMask[2] = VshGetField(pShaderToken, FLD_OUT_MAC_MASK_Z); - pInstruction->Output.MACRMask[3] = VshGetField(pShaderToken, FLD_OUT_MAC_MASK_W); - pInstruction->Output.MACRAddress = VshGetField(pShaderToken, FLD_OUT_R); - // ILU output - pInstruction->Output.ILURMask[0] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_X); - pInstruction->Output.ILURMask[1] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_Y); - pInstruction->Output.ILURMask[2] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_Z); - pInstruction->Output.ILURMask[3] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_W); - pInstruction->Output.ILURAddress = VshGetField(pShaderToken, FLD_OUT_R); + pInstruction->Output.OutputMask = VshGetField(pShaderToken, FLD_OUT_O_MASK); + pInstruction->Output.MACRMask = VshGetField(pShaderToken, FLD_OUT_MAC_MASK); + pInstruction->Output.ILURMask = VshGetField(pShaderToken, FLD_OUT_ILU_MASK); + pInstruction->Output.RAddress = VshGetField(pShaderToken, FLD_OUT_R); // Finally, get a0.x indirect constant addressing pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL); } -static inline int VshIsMaskInUse(const boolean* pMask) -{ - return (pMask[0] || pMask[1] || pMask[2] || pMask[3]); -} - -static inline boolean VshInstrWritesToMAC_R(VSH_SHADER_INSTRUCTION* pInstruction) -{ - return VshIsMaskInUse(pInstruction->Output.MACRMask) - && pInstruction->MAC != MAC_NOP; -} - -static inline boolean VshInstrWritesToMAC_O(VSH_SHADER_INSTRUCTION* pInstruction) -{ - return VshIsMaskInUse(pInstruction->Output.OutputMask) - && pInstruction->Output.OutputMux == OMUX_MAC - && pInstruction->MAC != MAC_NOP; -} - -static inline boolean VshInstrWritesToMAC_ARL(VSH_SHADER_INSTRUCTION* pInstruction) -{ - return /*!VshIsMaskInUse(pInstruction->Output.OutputMask) && - pInstruction->Output.OutputMux == OMUX_MAC &&*/ - pInstruction->MAC == MAC_ARL; -} - -static inline boolean VshInstrWritesToILU_R(VSH_SHADER_INSTRUCTION* pInstruction) -{ - return VshIsMaskInUse(pInstruction->Output.ILURMask) - && pInstruction->ILU != ILU_NOP; -} - -static inline boolean VshInstrWritesToILU_O(VSH_SHADER_INSTRUCTION* pInstruction) -{ - return VshIsMaskInUse(pInstruction->Output.OutputMask) - && pInstruction->Output.OutputMux == OMUX_ILU - && pInstruction->ILU != ILU_NOP; -} - static void VshAddParameter(VSH_PARAMETER *pParameter, boolean a0x, VSH_IMD_PARAMETER *pIntermediateParameter) @@ -593,197 +523,67 @@ static VSH_INTERMEDIATE_FORMAT *VshNewIntermediate(VSH_XBOX_SHADER *pShader) return &pShader->Intermediate[pShader->IntermediateCount++]; } -static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) +static void VshAddIntermediateOpcode( + VSH_SHADER_INSTRUCTION* pInstruction, + VSH_XBOX_SHADER *pShader, + VSH_IMD_INSTRUCTION_TYPE instr_type, + int8_t mask) { - VSH_INTERMEDIATE_FORMAT *pIntermediate; + int R = pInstruction->Output.RAddress; + // Test for paired opcodes + if ((pInstruction->MAC != MAC_NOP) && (pInstruction->ILU != ILU_NOP)) { + if (instr_type == IMD_ILU) { + // Paired ILU opcodes can only write to R1 + R = 1; + } else if (R == 1) { + // Ignore paired MAC opcodes that write to R1 + mask = 0; + } + } - if(!VshInstrWritesToMAC_R(pInstruction)) { - return FALSE; - } + if (mask > 0) { + VSH_INTERMEDIATE_FORMAT* pIntermediate = VshNewIntermediate(pShader); + pIntermediate->InstructionType = instr_type; + pIntermediate->MAC = instr_type == IMD_MAC ? pInstruction->MAC : MAC_NOP; + pIntermediate->ILU = instr_type == IMD_ILU ? pInstruction->ILU : ILU_NOP; + if (pInstruction->MAC == MAC_ARL) { + pIntermediate->Output.Type = IMD_OUTPUT_A0X; + pIntermediate->Output.Address = 0; + } else { + pIntermediate->Output.Type = IMD_OUTPUT_R; + pIntermediate->Output.Address = R; + } + pIntermediate->Output.Mask = mask; + VshAddParameters(pInstruction, pIntermediate->ILU, pIntermediate->MAC, pIntermediate->Parameters); + } - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_MAC; - pIntermediate->MAC = pInstruction->MAC; - - // Output param - pIntermediate->Output.Type = IMD_OUTPUT_R; - pIntermediate->Output.Address = pInstruction->Output.MACRAddress; - memcpy(pIntermediate->Output.Mask, pInstruction->Output.MACRMask, sizeof(boolean) * 4); - - // Other parameters - VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); - - return TRUE; -} - -static boolean VshAddInstructionMAC_O(VSH_SHADER_INSTRUCTION* pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - - if(!VshInstrWritesToMAC_O(pInstruction)) { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_MAC; - pIntermediate->MAC = pInstruction->MAC; - - // Output param - pIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; - pIntermediate->Output.Address = pInstruction->Output.OutputAddress; - memcpy(pIntermediate->Output.Mask, pInstruction->Output.OutputMask, sizeof(boolean) * 4); - - // Other parameters - VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); - - return TRUE; -} - -static boolean VshAddInstructionMAC_ARL(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - - if(!VshInstrWritesToMAC_ARL(pInstruction)) { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_MAC; - pIntermediate->MAC = pInstruction->MAC; - - // Output param - pIntermediate->Output.Type = IMD_OUTPUT_A0X; - pIntermediate->Output.Address = pInstruction->Output.OutputAddress; - pIntermediate->Output.Mask[0] = true; // force a0.x - - // Other parameters - VshAddParameters(pInstruction, ILU_NOP, pInstruction->MAC, pIntermediate->Parameters); - - return TRUE; -} - -static boolean VshAddInstructionILU_R(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - - if(!VshInstrWritesToILU_R(pInstruction)) { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_ILU; - pIntermediate->ILU = pInstruction->ILU; - - // Output param - pIntermediate->Output.Type = IMD_OUTPUT_R; - // If this is a combined instruction, only r1 is allowed (R address should not be used) - pIntermediate->Output.Address = IsCombined ? 1 : pInstruction->Output.ILURAddress; - memcpy(pIntermediate->Output.Mask, pInstruction->Output.ILURMask, sizeof(boolean) * 4); - - // Other parameters - VshAddParameters(pInstruction, pInstruction->ILU, MAC_NOP, pIntermediate->Parameters); - - return TRUE; -} - -static boolean VshAddInstructionILU_O(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader, - boolean IsCombined) -{ - VSH_INTERMEDIATE_FORMAT *pIntermediate; - - if(!VshInstrWritesToILU_O(pInstruction)) { - return FALSE; - } - - pIntermediate = VshNewIntermediate(pShader); - pIntermediate->IsCombined = IsCombined; - - // Opcode - pIntermediate->InstructionType = IMD_ILU; - pIntermediate->ILU = pInstruction->ILU; - - // Output param - pIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; - pIntermediate->Output.Address = pInstruction->Output.OutputAddress; - memcpy(pIntermediate->Output.Mask, pInstruction->Output.OutputMask, sizeof(boolean) * 4); - - // Other parameters - VshAddParameters(pInstruction, pInstruction->ILU, MAC_NOP, pIntermediate->Parameters); - - return TRUE; + // Is the output mask set? + if (pInstruction->Output.OutputMask > 0) { + // Check if we must add a muxed opcode too + if ((uint8_t)(pInstruction->Output.OutputMux) == (uint8_t)instr_type) { + VSH_INTERMEDIATE_FORMAT* pMuxedIntermediate = VshNewIntermediate(pShader); + pMuxedIntermediate->InstructionType = instr_type; + pMuxedIntermediate->MAC = instr_type == IMD_MAC ? pInstruction->MAC : MAC_NOP; + pMuxedIntermediate->ILU = instr_type == IMD_ILU ? pInstruction->ILU : ILU_NOP; + pMuxedIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; + pMuxedIntermediate->Output.Address = pInstruction->Output.OutputAddress; + pMuxedIntermediate->Output.Mask = pInstruction->Output.OutputMask; + VshAddParameters(pInstruction, pMuxedIntermediate->ILU, pMuxedIntermediate->MAC, pMuxedIntermediate->Parameters); + } + } } static void VshConvertToIntermediate(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader) { - // Five types of instructions: - // MAC - // - // ILU - // - // MAC - // +ILU - // - // MAC - // +MAC - // +ILU - // - // MAC - // +ILU - // +ILU - boolean IsCombined = FALSE; + if (pInstruction->MAC != MAC_NOP) { + int8_t mask = pInstruction->MAC == MAC_ARL ? MASK_X : pInstruction->Output.MACRMask; + VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, mask); + } - if (VshAddInstructionMAC_R(pInstruction, pShader, IsCombined)) { - if (VshInstrWritesToMAC_O(pInstruction) || - VshInstrWritesToILU_R(pInstruction) || - VshInstrWritesToILU_O(pInstruction)) { - IsCombined = TRUE; - } - } - - if (VshAddInstructionMAC_O(pInstruction, pShader, IsCombined)) { - if (VshInstrWritesToILU_R(pInstruction) || - VshInstrWritesToILU_O(pInstruction)) { - IsCombined = TRUE; - } - } - - // Special case, arl (mov a0.x, ...) - if (VshAddInstructionMAC_ARL(pInstruction, pShader, IsCombined)) { - if (VshInstrWritesToILU_R(pInstruction) || - VshInstrWritesToILU_O(pInstruction)) { - IsCombined = TRUE; - } - } - - if (VshAddInstructionILU_R(pInstruction, pShader, IsCombined)) { - if (VshInstrWritesToILU_O(pInstruction)) { - IsCombined = TRUE; - } - } - - (void)VshAddInstructionILU_O(pInstruction, pShader, IsCombined); + if (pInstruction->ILU != ILU_NOP) { + VshAddIntermediateOpcode(pInstruction, pShader, IMD_ILU, pInstruction->Output.ILURMask); + } } #define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) @@ -1822,10 +1622,10 @@ static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) // Write the mask as a separate argument to the opcode defines // (No space, so that "dest,mask, ..." looks close to "dest.mask, ...") hlsl << ","; - if (dest.Mask[0]) hlsl << "x"; - if (dest.Mask[1]) hlsl << "y"; - if (dest.Mask[2]) hlsl << "z"; - if (dest.Mask[3]) hlsl << "w"; + if (dest.Mask & MASK_X) hlsl << "x"; + if (dest.Mask & MASK_Y) hlsl << "y"; + if (dest.Mask & MASK_Z) hlsl << "z"; + if (dest.Mask & MASK_W) hlsl << "w"; } static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) From ff1412b3d926e6025d6e89be0566aaccf3eb6656 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Fri, 13 Dec 2019 18:20:57 +0100 Subject: [PATCH 49/77] Use an HLSL accessor function for reading constant registers, allowing Xbox-native negative indices and out-of-bounds handling. This makes the generated HLSL look closed to the original Xbox vertex shader assembly. --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 31 +++++++++++++++++++++------- src/core/hle/D3D8/XbD3D8Types.h | 2 +- src/core/hle/D3D8/XbVertexShader.cpp | 21 +++++++++++-------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 563d7aeab..a01c7c0c4 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -21,16 +21,19 @@ struct VS_OUTPUT float4 oT2 : TEXCOORD2; // Texture coordinate set 2 float4 oT3 : TEXCOORD3; // Texture coordinate set 3 }; - -// Xbox constant registers -extern uniform float4 c[192] : register(c0); + +#define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of -96..95) +#define X_D3DVS_CONSTREG_COUNT 192 + +// Xbox constant registers +uniform float4 C[X_D3DVS_CONSTREG_COUNT] : register(c0); // Vertex input overrides for SetVertexData4f support -extern float4 vOverrideValue[16] : register(c192); -extern float4 vOverridePacked[4] : register(c208); +uniform float4 vOverrideValue[16] : register(c192); +uniform float4 vOverridePacked[4] : register(c208); -extern float4 xboxViewportScale : register(c212); -extern float4 xboxViewportOffset : register(c213); +uniform float4 xboxViewportScale : register(c212); +uniform float4 xboxViewportOffset : register(c213); // Overloaded casts, assuring all inputs are treated as float4 float4 _tof4(float src) { return float4(src, src, src, src); } @@ -40,6 +43,20 @@ float4 _tof4(float4 src) { return src; } float4 _ssss(float s) { return float4(s, s, s, s); } // a scalar output replicated across a 4-component vector #define _scalar(src) _tof4(src).x /* a scalar input */ +float4 c(int register_number) +{ + // Map Xbox [-96, 95] to Host [0, 191] + // Account for Xbox's negative constant indexes + register_number += X_D3DSCM_CORRECTION; + if (register_number < 0) + return 0; + + if (register_number >= X_D3DVS_CONSTREG_COUNT) // X_D3DVS_CONSTREG_COUNT + return 0; + + return C[register_number]; +} + // http://xboxdevwiki.net/NV2A/Vertex_Shader // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program.txt // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program1_1.txt diff --git a/src/core/hle/D3D8/XbD3D8Types.h b/src/core/hle/D3D8/XbD3D8Types.h index 51b662dda..e1da19c71 100644 --- a/src/core/hle/D3D8/XbD3D8Types.h +++ b/src/core/hle/D3D8/XbD3D8Types.h @@ -986,7 +986,7 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; #define X_D3DSCM_RESERVED_CONSTANT_SCALE -38 // Becomes 58 after correction, contains Scale v #define X_D3DSCM_RESERVED_CONSTANT_OFFSET -37 // Becomes 59 after correction, contains Offset -#define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of 96..95) +#define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of -96..95) #define X_D3DVS_CONSTREG_COUNT 192 // Special Registers, used to pass additional information to the shaders diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 363f9d152..98c2ec5a0 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1601,7 +1601,9 @@ static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) switch (dest.Type) { case IMD_OUTPUT_C: - hlsl << "c[" << dest.Address << "]"; + // Access the HLSL capital C[] constants array, with the index bias applied : + // TODO : Avoid out-of-bound writes (perhaps writing to a reserverd index?) + hlsl << "C[" << dest.Address + X_D3DSCM_CORRECTION << "]"; LOG_TEST_CASE("Vertex shader writes to constant table"); break; case IMD_OUTPUT_R: @@ -1647,18 +1649,19 @@ static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) int register_number = param.Address; if (param.ParameterType == PARAM_C) { - // Map Xbox [-96, 95] to Host [0, 191] - // Account for Xbox's negative constant indexes - register_number += 96; + // Access constant registers through our HLSL c() function, + // which allows dumping negative indices (like Xbox shaders), + // and which returns zero when out-of-bounds indices are passed in: if (paramMeta.IndexesWithA0_X) { - // Only display the offset if it's not 0. - if (register_number != 0) { - hlsl << "c[a0.x+" << register_number << "]"; + if (register_number == 0) { + hlsl << "c(a0.x)"; // Hide the offset if it's 0 + } else if (register_number < 0) { + hlsl << "c(a0.x" << register_number << ")"; // minus is part of the offset } else { - hlsl << "c[a0.x]"; + hlsl << "c(a0.x+" << register_number << ")"; // show addition character } } else { - hlsl << "c[" << register_number << "]"; + hlsl << "c(" << register_number << ")"; } } else { hlsl << RegisterName[param.ParameterType] << register_number; From ec2032bf0dc5c922ae788f7675fd886ecf860a7a Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Fri, 13 Dec 2019 18:44:16 +0100 Subject: [PATCH 50/77] Made sure that intermediate vertex shader instructions are never empty Move A0_X indexing boolean from per-parameter to per-instruction, where it belongs. --- src/core/hle/D3D8/XbVertexShader.cpp | 72 +++++++++++++--------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 98c2ec5a0..ea231e381 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -265,11 +265,6 @@ typedef struct _VSH_IMD_PARAMETER { boolean Active; VSH_PARAMETER Parameter; - // There is only a single address register in Microsoft DirectX 8.0. - // The address register, designated as a0.x, may be used as signed - // integer offset in relative addressing into the constant register file. - // c[a0.x + n] - boolean IndexesWithA0_X; } VSH_IMD_PARAMETER; @@ -280,6 +275,11 @@ typedef struct _VSH_INTERMEDIATE_FORMAT VSH_ILU ILU; VSH_IMD_OUTPUT Output; VSH_IMD_PARAMETER Parameters[3]; + // There is only a single address register in Microsoft DirectX 8.0. + // The address register, designated as a0.x, may be used as signed + // integer offset in relative addressing into the constant register file. + // c[a0.x + n] + boolean IndexesWithA0_X; } VSH_INTERMEDIATE_FORMAT; @@ -472,12 +472,10 @@ static void VshParseInstruction(uint32_t *pShaderToken, } static void VshAddParameter(VSH_PARAMETER *pParameter, - boolean a0x, VSH_IMD_PARAMETER *pIntermediateParameter) { pIntermediateParameter->Parameter = *pParameter; pIntermediateParameter->Active = TRUE; - pIntermediateParameter->IndexesWithA0_X = a0x; } static void VshAddParameters(VSH_SHADER_INSTRUCTION *pInstruction, @@ -489,19 +487,19 @@ static void VshAddParameters(VSH_SHADER_INSTRUCTION *pInstruction, if(MAC >= MAC_MOV) { - VshAddParameter(&pInstruction->A, pInstruction->a0x, &pParameters[ParamCount]); + VshAddParameter(&pInstruction->A, &pParameters[ParamCount]); ParamCount++; } if((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) { - VshAddParameter(&pInstruction->B, pInstruction->a0x, &pParameters[ParamCount]); + VshAddParameter(&pInstruction->B, &pParameters[ParamCount]); ParamCount++; } if((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) { - VshAddParameter(&pInstruction->C, pInstruction->a0x, &pParameters[ParamCount]); + VshAddParameter(&pInstruction->C, &pParameters[ParamCount]); ParamCount++; } } @@ -554,6 +552,7 @@ static void VshAddIntermediateOpcode( pIntermediate->Output.Address = R; } pIntermediate->Output.Mask = mask; + pIntermediate->IndexesWithA0_X = pInstruction->a0x; VshAddParameters(pInstruction, pIntermediate->ILU, pIntermediate->MAC, pIntermediate->Parameters); } @@ -567,7 +566,8 @@ static void VshAddIntermediateOpcode( pMuxedIntermediate->ILU = instr_type == IMD_ILU ? pInstruction->ILU : ILU_NOP; pMuxedIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; pMuxedIntermediate->Output.Address = pInstruction->Output.OutputAddress; - pMuxedIntermediate->Output.Mask = pInstruction->Output.OutputMask; + pMuxedIntermediate->Output.Mask = pInstruction->Output.OutputMask; + pMuxedIntermediate->IndexesWithA0_X = pInstruction->a0x; VshAddParameters(pInstruction, pMuxedIntermediate->ILU, pMuxedIntermediate->MAC, pMuxedIntermediate->Parameters); } } @@ -575,8 +575,8 @@ static void VshAddIntermediateOpcode( static void VshConvertToIntermediate(VSH_SHADER_INSTRUCTION *pInstruction, VSH_XBOX_SHADER *pShader) -{ - if (pInstruction->MAC != MAC_NOP) { +{ + if (pInstruction->MAC > MAC_NOP && pInstruction->MAC <= MAC_ARL) { int8_t mask = pInstruction->MAC == MAC_ARL ? MASK_X : pInstruction->Output.MACRMask; VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, mask); } @@ -1630,7 +1630,7 @@ static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) if (dest.Mask & MASK_W) hlsl << "w"; } -static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) +static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta, bool IndexesWithA0_X) { // Print functions static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { @@ -1647,24 +1647,23 @@ static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta) hlsl << "-"; } - int register_number = param.Address; if (param.ParameterType == PARAM_C) { // Access constant registers through our HLSL c() function, // which allows dumping negative indices (like Xbox shaders), // and which returns zero when out-of-bounds indices are passed in: - if (paramMeta.IndexesWithA0_X) { - if (register_number == 0) { + if (IndexesWithA0_X) { + if (param.Address == 0) { hlsl << "c(a0.x)"; // Hide the offset if it's 0 - } else if (register_number < 0) { - hlsl << "c(a0.x" << register_number << ")"; // minus is part of the offset + } else if (param.Address < 0) { + hlsl << "c(a0.x" << param.Address << ")"; // minus is part of the offset } else { - hlsl << "c(a0.x+" << register_number << ")"; // show addition character + hlsl << "c(a0.x+" << param.Address << ")"; // show addition character } } else { - hlsl << "c(" << register_number << ")"; + hlsl << "c(" << param.Address << ")"; } } else { - hlsl << RegisterName[param.ParameterType] << register_number; + hlsl << RegisterName[param.ParameterType] << param.Address; } // Write the swizzle if we need to @@ -1733,27 +1732,22 @@ static void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) std::string str = ""; if (xboxInstruction.InstructionType == IMD_MAC) { - if (xboxInstruction.MAC > MAC_NOP && xboxInstruction.MAC <= MAC_ARL) { - str = VSH_MAC_HLSL[xboxInstruction.MAC]; - } + str = VSH_MAC_HLSL[xboxInstruction.MAC]; } else if (xboxInstruction.InstructionType == IMD_ILU) { - if (xboxInstruction.ILU > ILU_NOP) { - str = VSH_ILU_HLSL[xboxInstruction.ILU]; + str = VSH_ILU_HLSL[xboxInstruction.ILU]; + } + + assert(!str.empty()); + hlsl << "\n " << str << "("; // opcode + OutputHlsl(hlsl, xboxInstruction.Output); + for (int i = 0; i < 3; i++) { + if (xboxInstruction.Parameters[i].Active) { + hlsl << ", "; + ParameterHlsl(hlsl, xboxInstruction.Parameters[i], xboxInstruction.IndexesWithA0_X); } } - if (!str.empty()) { - hlsl << "\n " << str << "("; // opcode - OutputHlsl(hlsl, xboxInstruction.Output); - for (int i = 0; i < 3; i++) { - if (xboxInstruction.Parameters[i].Active) { - hlsl << ", "; - ParameterHlsl(hlsl, xboxInstruction.Parameters[i]); - } - } - - hlsl << ");"; - } + hlsl << ");"; } } From 0aa0dce8fbfda8d1df404c23bce9ea464bf8d826 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 14 Dec 2019 10:58:06 +1300 Subject: [PATCH 51/77] Hack to make sure pre-transformed and regular geometry go into the same coordinate space --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 7d73c74cd..d637ddfe8 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -2846,7 +2846,7 @@ void Direct3D_CreateDevice_Start // Disable multisampling for now, this fixes an issue where GTA3 only renders to half-screen // TODO: Find a better way of fixing this, we cannot just create larger backbuffers as it breaks // many games, despite working in the dashboard - pPresentationParameters->MultiSampleType = XTL::X_D3DMULTISAMPLE_NONE; + pPresentationParameters->MultiSampleType = XTL::X_D3DMULTISAMPLE_NONE; // create default device *before* calling Xbox Direct3D_CreateDevice trampline // to avoid hitting EMUPATCH'es that need a valid g_pD3DDevice @@ -3845,16 +3845,25 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4]) break; } + // Xbox correct values? + xOffset = xOffset + (1.0f / 32.0f); + yOffset = yOffset + (1.0f / 32.0f); + xScale = xScale * ViewPort.Width; + yScale = yScale * ViewPort.Height; - // Offset with OGL pixel correction (?) TODO verify - vOffset[0] = xOffset + (2.0f / ViewPort.Width); - vOffset[1] = yOffset + (2.0f / ViewPort.Height); + // HACK: Add a host correction factor to these values + // So that after we reverse the screenspace transformation + // Pre-transformed 2d geometry is in the same space as the 3d geometry...? + + // Offset with a host correction + vOffset[0] = xOffset + (0.5 * ViewPort.Width / g_RenderScaleFactor); + vOffset[1] = yOffset + (0.5 * ViewPort.Height / g_RenderScaleFactor); vOffset[2] = 0; //offsetZ; vOffset[3] = 0.0f; - // Scale - vScale[0] = xScale * ViewPort.Width; - vScale[1] = yScale * ViewPort.Height; + // Scale with a host correction + vScale[0] = xScale * (1.0f / (2.0f * g_RenderScaleFactor)); + vScale[1] = yScale * (1.0f / (-2.0f * g_RenderScaleFactor)); vScale[2] = scaleZ; // ? vScale[3] = 1.0f; // ? } From ab1061634dbb2eef1f74d69396642eea2eedb8a9 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Tue, 10 Dec 2019 23:08:09 +1300 Subject: [PATCH 52/77] TMP mechassault hack --- src/core/kernel/exports/EmuKrnlOb.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/kernel/exports/EmuKrnlOb.cpp b/src/core/kernel/exports/EmuKrnlOb.cpp index 87400d476..802b34f09 100644 --- a/src/core/kernel/exports/EmuKrnlOb.cpp +++ b/src/core/kernel/exports/EmuKrnlOb.cpp @@ -859,12 +859,12 @@ XBSYSAPI EXPORTNUM(246) xboxkrnl::NTSTATUS NTAPI xboxkrnl::ObReferenceObjectByHa // HACK: Since we forward to NtDll::NtCreateEvent, this *might* be a Windows handle instead of our own // In this case, we must return the input handle // Test Case: Xbox Live Dashboard, Network Test (or any other Xbox Live connection) - DWORD flags = 0; - if (GetHandleInformation(Handle, &flags)) { - // This was a Windows Handle, so return it. - *ReturnedObject = Handle; - return STATUS_SUCCESS; - } + //DWORD flags = 0; + //if (GetHandleInformation(Handle, &flags)) { + // // This was a Windows Handle, so return it. + // *ReturnedObject = Handle; + // return STATUS_SUCCESS; + //} status = STATUS_INVALID_HANDLE; } From f8b1e35482f7181624d949ad6221ed941eefc17d Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 14 Dec 2019 17:30:34 +1300 Subject: [PATCH 53/77] fix pInstruction typo --- src/core/hle/D3D8/XbVertexShader.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index ea231e381..307e61384 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -543,14 +543,16 @@ static void VshAddIntermediateOpcode( VSH_INTERMEDIATE_FORMAT* pIntermediate = VshNewIntermediate(pShader); pIntermediate->InstructionType = instr_type; pIntermediate->MAC = instr_type == IMD_MAC ? pInstruction->MAC : MAC_NOP; - pIntermediate->ILU = instr_type == IMD_ILU ? pInstruction->ILU : ILU_NOP; - if (pInstruction->MAC == MAC_ARL) { + pIntermediate->ILU = instr_type == IMD_ILU ? pInstruction->ILU : ILU_NOP; + + if (pIntermediate->MAC == MAC_ARL) { pIntermediate->Output.Type = IMD_OUTPUT_A0X; pIntermediate->Output.Address = 0; } else { pIntermediate->Output.Type = IMD_OUTPUT_R; pIntermediate->Output.Address = R; - } + } + pIntermediate->Output.Mask = mask; pIntermediate->IndexesWithA0_X = pInstruction->a0x; VshAddParameters(pInstruction, pIntermediate->ILU, pIntermediate->MAC, pIntermediate->Parameters); From 1cebf018e2500d8ed302b469f65354ce83ffb496 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 14 Dec 2019 18:03:40 +1300 Subject: [PATCH 54/77] Use int instead of uint for compatibility with optimizer level 0 --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index a01c7c0c4..7d7e3c094 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -256,7 +256,7 @@ VS_OUTPUT main(const VS_INPUT xIn) // Initialize input registers from the vertex buffer // Or use an override value set with SetVertexData4f - for(uint i = 0; i < 16; i++){ + for(int i = 0; i < 16; i++){ v[i] = vOverride[i] ? vOverrideValue[i] : xIn.v[i]; } From f9e5f51815041a709b541ef38904a25a5ec6cb23 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sat, 14 Dec 2019 19:02:35 +1300 Subject: [PATCH 55/77] Fix dph src1.w double-add --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index 7d7e3c094..a902d3366 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -123,7 +123,11 @@ float4 _sge(float4 src0, float4 src1) } // 2.14.1.10.18 DPH: Homogeneous Dot Product -#define x_dph(dest, mask, src0, src1) dest.mask = _ssss(dot(float4(_tof4(src0).xyz, 1), _tof4(src1)) + src1.w).mask +#define x_dph(dest, mask, src0, src1) dest.mask = _ssss(_dph(_tof4(src0), _tof4(src1))).mask +float _dph(float4 src0, float4 src1) +{ + return dot(src0.xyz, src1.xyz) + src1.w; +} // Xbox ILU Functions From 9ee7bb451f662883b1d50657c24d0cf674f48e91 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Sun, 15 Dec 2019 22:47:04 +1300 Subject: [PATCH 56/77] Fix logp z component --- src/core/hle/D3D8/Direct3D9/Xb.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/Xb.hlsl index a902d3366..b68a4d7a6 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/Xb.hlsl @@ -162,7 +162,7 @@ float4 _logp(float input) float4 dest; dest.x = exponent; dest.y = 1 / exp2(exponent); // mantissa - dest.z = exponent + log2(input); // logResult + dest.z = log2(input); dest.w = 1; return dest; From cc594ca8d79b9181b81e7f37a6a2c4d7dccdc9de Mon Sep 17 00:00:00 2001 From: patrickvl Date: Sat, 14 Dec 2019 18:29:58 +0100 Subject: [PATCH 57/77] Further simplifications in vertex shader decoding to intermediate instructions --- src/core/hle/D3D8/XbVertexShader.cpp | 259 ++++++++++++--------------- src/core/hle/D3D8/XbVertexShader.h | 5 +- 2 files changed, 117 insertions(+), 147 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 307e61384..958256f48 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -58,9 +58,6 @@ typedef enum _VSH_SWIZZLE } VSH_SWIZZLE; -typedef DWORD DxbxMask, -*PDxbxMask; - #define MASK_X 0x008 #define MASK_Y 0x004 #define MASK_Z 0x002 @@ -210,7 +207,7 @@ VSH_MAC; typedef struct _VSH_PARAMETER { VSH_PARAMETER_TYPE ParameterType; // Parameter type, R, V or C - boolean Neg; // TRUE if negated, FALSE if not + bool Neg; // true if negated, false if not VSH_SWIZZLE Swizzle[4]; // The four swizzles int16_t Address; // Register address } @@ -241,8 +238,8 @@ typedef struct _VSH_SHADER_INSTRUCTION VSH_PARAMETER A; VSH_PARAMETER B; VSH_PARAMETER C; - boolean a0x; - boolean Final; + bool a0x; + bool Final; } VSH_SHADER_INSTRUCTION; @@ -263,7 +260,7 @@ VSH_IMD_OUTPUT; typedef struct _VSH_IMD_PARAMETER { - boolean Active; + bool Active; VSH_PARAMETER Parameter; } VSH_IMD_PARAMETER; @@ -279,15 +276,15 @@ typedef struct _VSH_INTERMEDIATE_FORMAT // The address register, designated as a0.x, may be used as signed // integer offset in relative addressing into the constant register file. // c[a0.x + n] - boolean IndexesWithA0_X; + bool IndexesWithA0_X; } VSH_INTERMEDIATE_FORMAT; typedef struct _VSH_XBOX_SHADER { - XTL::X_VSH_SHADER_HEADER ShaderHeader; - uint16_t IntermediateCount; - VSH_INTERMEDIATE_FORMAT Intermediate[VSH_MAX_INTERMEDIATE_COUNT]; + XTL::X_VSH_SHADER_HEADER ShaderHeader; + uint16_t IntermediateCount; + VSH_INTERMEDIATE_FORMAT Intermediate[VSH_MAX_INTERMEDIATE_COUNT]; } VSH_XBOX_SHADER; @@ -302,16 +299,18 @@ void CxbxUpdateVertexShader(DWORD XboxVertexShaderHandle) }*/ // Retrieves a number of bits in the instruction token -static inline uint32_t VshGetFromToken(uint32_t *pShaderToken, - uint8_t SubToken, - uint8_t StartBit, - uint8_t BitLength) +static inline uint32_t VshGetFromToken( + uint32_t *pShaderToken, + uint8_t SubToken, + uint8_t StartBit, + uint8_t BitLength) { return (pShaderToken[SubToken] >> StartBit) & ~(0xFFFFFFFF << BitLength); } -static uint8_t VshGetField(uint32_t *pShaderToken, - VSH_FIELD_NAME FieldName) +static uint8_t VshGetField( + uint32_t *pShaderToken, + VSH_FIELD_NAME FieldName) { // Used for xvu spec definition static const struct { @@ -375,8 +374,9 @@ static inline int16_t ConvertCRegister(const int16_t CReg) return ((((CReg >> 5) & 7) - 3) * 32) + (CReg & 31); } -static void VshParseInstruction(uint32_t *pShaderToken, - VSH_SHADER_INSTRUCTION *pInstruction) +static void VshParseInstruction( + uint32_t *pShaderToken, + VSH_SHADER_INSTRUCTION *pInstruction) { // First get the instruction(s). pInstruction->ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); @@ -471,121 +471,90 @@ static void VshParseInstruction(uint32_t *pShaderToken, pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL); } -static void VshAddParameter(VSH_PARAMETER *pParameter, - VSH_IMD_PARAMETER *pIntermediateParameter) -{ - pIntermediateParameter->Parameter = *pParameter; - pIntermediateParameter->Active = TRUE; -} - -static void VshAddParameters(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_ILU ILU, - VSH_MAC MAC, - VSH_IMD_PARAMETER *pParameters) -{ - uint8_t ParamCount = 0; - - if(MAC >= MAC_MOV) - { - VshAddParameter(&pInstruction->A, &pParameters[ParamCount]); - ParamCount++; - } - - if((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) - { - VshAddParameter(&pInstruction->B, &pParameters[ParamCount]); - ParamCount++; - } - - if((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) - { - VshAddParameter(&pInstruction->C, &pParameters[ParamCount]); - ParamCount++; - } -} - -static void VshVerifyBufferBounds(VSH_XBOX_SHADER *pShader) -{ - if(pShader->IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) - { - CxbxKrnlCleanup("Shader exceeds conversion buffer!"); - } -} - -static VSH_INTERMEDIATE_FORMAT *VshNewIntermediate(VSH_XBOX_SHADER *pShader) -{ - VshVerifyBufferBounds(pShader); - - ZeroMemory(&pShader->Intermediate[pShader->IntermediateCount], sizeof(VSH_INTERMEDIATE_FORMAT)); - - return &pShader->Intermediate[pShader->IntermediateCount++]; -} - static void VshAddIntermediateOpcode( VSH_SHADER_INSTRUCTION* pInstruction, - VSH_XBOX_SHADER *pShader, - VSH_IMD_INSTRUCTION_TYPE instr_type, - int8_t mask) + VSH_XBOX_SHADER* pShader, + VSH_IMD_INSTRUCTION_TYPE instr_type, + VSH_IMD_OUTPUT_TYPE output_type, + int16_t output_address, + int8_t output_mask) { - int R = pInstruction->Output.RAddress; - // Test for paired opcodes - if ((pInstruction->MAC != MAC_NOP) && (pInstruction->ILU != ILU_NOP)) { - if (instr_type == IMD_ILU) { - // Paired ILU opcodes can only write to R1 - R = 1; - } else if (R == 1) { - // Ignore paired MAC opcodes that write to R1 - mask = 0; - } - } - - if (mask > 0) { - VSH_INTERMEDIATE_FORMAT* pIntermediate = VshNewIntermediate(pShader); - pIntermediate->InstructionType = instr_type; - pIntermediate->MAC = instr_type == IMD_MAC ? pInstruction->MAC : MAC_NOP; - pIntermediate->ILU = instr_type == IMD_ILU ? pInstruction->ILU : ILU_NOP; - - if (pIntermediate->MAC == MAC_ARL) { - pIntermediate->Output.Type = IMD_OUTPUT_A0X; - pIntermediate->Output.Address = 0; - } else { - pIntermediate->Output.Type = IMD_OUTPUT_R; - pIntermediate->Output.Address = R; - } - - pIntermediate->Output.Mask = mask; - pIntermediate->IndexesWithA0_X = pInstruction->a0x; - VshAddParameters(pInstruction, pIntermediate->ILU, pIntermediate->MAC, pIntermediate->Parameters); - } - // Is the output mask set? - if (pInstruction->Output.OutputMask > 0) { - // Check if we must add a muxed opcode too - if ((uint8_t)(pInstruction->Output.OutputMux) == (uint8_t)instr_type) { - VSH_INTERMEDIATE_FORMAT* pMuxedIntermediate = VshNewIntermediate(pShader); - pMuxedIntermediate->InstructionType = instr_type; - pMuxedIntermediate->MAC = instr_type == IMD_MAC ? pInstruction->MAC : MAC_NOP; - pMuxedIntermediate->ILU = instr_type == IMD_ILU ? pInstruction->ILU : ILU_NOP; - pMuxedIntermediate->Output.Type = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; - pMuxedIntermediate->Output.Address = pInstruction->Output.OutputAddress; - pMuxedIntermediate->Output.Mask = pInstruction->Output.OutputMask; - pMuxedIntermediate->IndexesWithA0_X = pInstruction->a0x; - VshAddParameters(pInstruction, pMuxedIntermediate->ILU, pMuxedIntermediate->MAC, pMuxedIntermediate->Parameters); - } + if (output_mask == 0) { + return; + } + + if(pShader->IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) { + CxbxKrnlCleanup("Shader exceeds conversion buffer!"); + } + + VSH_MAC MAC = (instr_type == IMD_MAC) ? pInstruction->MAC : MAC_NOP; + VSH_ILU ILU = (instr_type == IMD_ILU) ? pInstruction->ILU : ILU_NOP; + VSH_INTERMEDIATE_FORMAT* pIntermediate = &pShader->Intermediate[pShader->IntermediateCount++]; + + pIntermediate->InstructionType = instr_type; + pIntermediate->MAC = MAC; + pIntermediate->ILU = ILU; + pIntermediate->Output.Type = output_type; + pIntermediate->Output.Address = output_address; + pIntermediate->Output.Mask = output_mask; + pIntermediate->IndexesWithA0_X = pInstruction->a0x; + + // Parameters[0].Active will always be set, but [1] and [2] may not, so reset them: + pIntermediate->Parameters[1].Active = false; + pIntermediate->Parameters[2].Active = false; + uint8_t ParamCount = 0; + if(MAC >= MAC_MOV) { + pIntermediate->Parameters[ParamCount].Parameter = pInstruction->A; + pIntermediate->Parameters[ParamCount++].Active = true; + } + + if((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) { + pIntermediate->Parameters[ParamCount].Parameter = pInstruction->B; + pIntermediate->Parameters[ParamCount++].Active = true; + } + + if((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) { + pIntermediate->Parameters[ParamCount].Parameter = pInstruction->C; + pIntermediate->Parameters[ParamCount++].Active = true; } } -static void VshConvertToIntermediate(VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader) +static void VshConvertToIntermediate( + VSH_SHADER_INSTRUCTION *pInstruction, + VSH_XBOX_SHADER *pShader) { + // Test for paired opcodes + bool bIsPaired = (pInstruction->MAC != MAC_NOP) && (pInstruction->ILU != ILU_NOP); + VSH_IMD_OUTPUT_TYPE OutputType2 = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; + + // Check if there's a MAC opcode if (pInstruction->MAC > MAC_NOP && pInstruction->MAC <= MAC_ARL) { - int8_t mask = pInstruction->MAC == MAC_ARL ? MASK_X : pInstruction->Output.MACRMask; - VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, mask); + if (bIsPaired && pInstruction->Output.RAddress == 1) { + // Ignore paired MAC opcodes that write to R1 + } else { + if (pInstruction->MAC == MAC_ARL) { + VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, IMD_OUTPUT_A0X, 0, MASK_X); + } else { + VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, IMD_OUTPUT_R, pInstruction->Output.RAddress, pInstruction->Output.MACRMask); + } + } + + // Check if we must add a muxed MAC opcode as well + if (pInstruction->Output.OutputMux == OMUX_MAC) { + VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, OutputType2, pInstruction->Output.OutputAddress, pInstruction->Output.OutputMask); + } } - if (pInstruction->ILU != ILU_NOP) { - VshAddIntermediateOpcode(pInstruction, pShader, IMD_ILU, pInstruction->Output.ILURMask); - } + // Check if there's an ILU opcode + if (pInstruction->ILU != ILU_NOP) { + // Paired ILU opcodes will only write to R1 + VshAddIntermediateOpcode(pInstruction, pShader, IMD_ILU, IMD_OUTPUT_R, bIsPaired ? 1 : pInstruction->Output.RAddress, pInstruction->Output.ILURMask); + // Check if we must add a muxed ILU opcode as well + if (pInstruction->Output.OutputMux == OMUX_ILU) { + VshAddIntermediateOpcode(pInstruction, pShader, IMD_ILU, OutputType2, pInstruction->Output.OutputAddress, pInstruction->Output.OutputMask); + } + } } #define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) @@ -1001,8 +970,8 @@ private: // new stream pCurrentVertexShaderStreamInfo = &(pVertexShaderInfoToSet->VertexStreams[StreamNumber]); - pCurrentVertexShaderStreamInfo->NeedPatch = FALSE; - pCurrentVertexShaderStreamInfo->DeclPosition = FALSE; + pCurrentVertexShaderStreamInfo->NeedPatch = false; + pCurrentVertexShaderStreamInfo->DeclPosition = false; pCurrentVertexShaderStreamInfo->CurrentStreamNumber = 0; pCurrentVertexShaderStreamInfo->HostVertexStride = 0; pCurrentVertexShaderStreamInfo->NumberOfVertexElements = 0; @@ -1019,7 +988,7 @@ private: UINT XboxVertexElementDataType, UINT XboxVertexElementByteSize, UINT HostVertexElementByteSize, - BOOL NeedPatching) + bool NeedPatching) { CxbxVertexShaderStreamElement* pCurrentElement = &(pCurrentVertexShaderStreamInfo->VertexElements[pCurrentVertexShaderStreamInfo->NumberOfVertexElements]); pCurrentElement->XboxType = XboxVertexElementDataType; @@ -1049,7 +1018,7 @@ private: // Register a 'skip' element, so that Xbox data will be skipped // without increasing host stride - this does require patching : - VshConvert_RegisterVertexElement(XTL::X_D3DVSDT_NONE, SkipBytesCount, /*HostSize=*/0, /*NeedPatching=*/TRUE); + VshConvert_RegisterVertexElement(XTL::X_D3DVSDT_NONE, SkipBytesCount, /*HostSize=*/0, /*NeedPatching=*/true); } void VshConvertToken_STREAMDATA_SKIP(DWORD *pXboxToken) @@ -1067,7 +1036,7 @@ private: void VshConvertToken_STREAMDATA_REG(DWORD *pXboxToken) { DWORD VertexRegister = VshGetVertexRegister(*pXboxToken); - BOOL NeedPatching = FALSE; + bool NeedPatching = false; BYTE Index; BYTE HostVertexRegisterType; @@ -1132,7 +1101,7 @@ private: HostVertexElementByteSize = 1 * sizeof(FLOAT); } XboxVertexElementByteSize = 1 * sizeof(XTL::SHORT); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_NORMSHORT2: // 0x21: if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT2N) { @@ -1145,7 +1114,7 @@ private: HostVertexElementDataType = D3DDECLTYPE_FLOAT2; HostVertexElementByteSize = 2 * sizeof(FLOAT); XboxVertexElementByteSize = 2 * sizeof(XTL::SHORT); - NeedPatching = TRUE; + NeedPatching = true; } break; case XTL::X_D3DVSDT_NORMSHORT3: // 0x31: @@ -1159,7 +1128,7 @@ private: HostVertexElementByteSize = 3 * sizeof(FLOAT); } XboxVertexElementByteSize = 3 * sizeof(XTL::SHORT); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_NORMSHORT4: // 0x41: if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT4N) { @@ -1172,26 +1141,26 @@ private: HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 4 * sizeof(XTL::SHORT); - NeedPatching = TRUE; + NeedPatching = true; } break; case XTL::X_D3DVSDT_NORMPACKED3: // 0x16: HostVertexElementDataType = D3DDECLTYPE_FLOAT3; HostVertexElementByteSize = 3 * sizeof(FLOAT); XboxVertexElementByteSize = 1 * sizeof(XTL::DWORD); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_SHORT1: // 0x15: HostVertexElementDataType = D3DDECLTYPE_SHORT2; HostVertexElementByteSize = 2 * sizeof(SHORT); XboxVertexElementByteSize = 1 * sizeof(XTL::SHORT); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_SHORT3: // 0x35: HostVertexElementDataType = D3DDECLTYPE_SHORT4; HostVertexElementByteSize = 4 * sizeof(SHORT); XboxVertexElementByteSize = 3 * sizeof(XTL::SHORT); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_PBYTE1: // 0x14: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { @@ -1204,7 +1173,7 @@ private: HostVertexElementByteSize = 1 * sizeof(FLOAT); } XboxVertexElementByteSize = 1 * sizeof(XTL::BYTE); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_PBYTE2: // 0x24: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { @@ -1217,7 +1186,7 @@ private: HostVertexElementByteSize = 2 * sizeof(FLOAT); } XboxVertexElementByteSize = 2 * sizeof(XTL::BYTE); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_PBYTE3: // 0x34: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { @@ -1230,7 +1199,7 @@ private: HostVertexElementByteSize = 3 * sizeof(FLOAT); } XboxVertexElementByteSize = 3 * sizeof(XTL::BYTE); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_PBYTE4: // 0x44: // Test-case : Panzer @@ -1244,14 +1213,14 @@ private: HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 4 * sizeof(XTL::BYTE); - NeedPatching = TRUE; + NeedPatching = true; } break; case XTL::X_D3DVSDT_FLOAT2H: // 0x72: HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 3 * sizeof(FLOAT); - NeedPatching = TRUE; + NeedPatching = true; break; case XTL::X_D3DVSDT_NONE: // 0x02: // No host element data, so no patching @@ -1477,7 +1446,7 @@ extern void FreeVertexDynamicPatch(CxbxVertexShader *pVertexShader) } // Checks for failed vertex shaders, and shaders that would need patching -boolean VshHandleIsValidShader(DWORD XboxVertexShaderHandle) +bool VshHandleIsValidShader(DWORD XboxVertexShaderHandle) { #if 0 //printf( "VS = 0x%.08X\n", XboxVertexShaderHandle ); @@ -1486,7 +1455,7 @@ boolean VshHandleIsValidShader(DWORD XboxVertexShaderHandle) if (pCxbxVertexShader) { if (pCxbxVertexShader->XboxStatus != 0) { - return FALSE; + return false; } /* for (uint32 i = 0; i < pCxbxVertexShader->VertexShaderInfo.NumberOfVertexStreams; i++) @@ -1495,13 +1464,13 @@ boolean VshHandleIsValidShader(DWORD XboxVertexShaderHandle) { // Just for caching purposes pCxbxVertexShader->XboxStatus = 0x80000001; - return FALSE; + return false; } } */ } #endif - return TRUE; + return true; } extern boolean IsValidCurrentShader(void) @@ -1780,7 +1749,7 @@ extern HRESULT EmuRecompileVshFunction { XTL::X_VSH_SHADER_HEADER* pXboxVertexShaderHeader = (XTL::X_VSH_SHADER_HEADER*)pXboxFunction; DWORD* pToken; - boolean EOI = false; + bool EOI = false; VSH_XBOX_SHADER* pShader = (VSH_XBOX_SHADER*)calloc(1, sizeof(VSH_XBOX_SHADER)); ID3DBlob* pErrors = nullptr; HRESULT hRet = 0; diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index 0d9f7ea11..11087fe07 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -30,10 +30,11 @@ #include "core\hle\D3D8\XbD3D8Types.h" // for X_VSH_MAX_ATTRIBUTES // Host vertex shader counts -#define VSH_MAX_INTERMEDIATE_COUNT 1024 // The maximum number of intermediate format slots #define VSH_VS11_MAX_INSTRUCTION_COUNT 128 #define VSH_VS2X_MAX_INSTRUCTION_COUNT 256 -#define VSH_VS30_MAX_INSTRUCTION_COUNT 512 +#define VSH_VS30_MAX_INSTRUCTION_COUNT 512 + +#define VSH_MAX_INTERMEDIATE_COUNT (X_VSH_MAX_INSTRUCTION_COUNT * 3) // The maximum number of shader function slots typedef struct _CxbxVertexShaderStreamElement { From 5fe0a16cc6e4424f37f89aad60767b67f6299ce3 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Sun, 15 Dec 2019 14:41:47 +0100 Subject: [PATCH 58/77] Renamed vertex shader hlsl file, and prepared it for future extensions to behave closer to specifications (nothing changed for now) --- ...{Xb.hlsl => CxbxVertexShaderTemplate.hlsl} | 99 ++++++++++++++----- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 28 +++--- src/core/hle/D3D8/XbVertexShader.cpp | 2 +- src/core/hle/D3D8/XbVertexShader.h | 2 +- 4 files changed, 89 insertions(+), 42 deletions(-) rename src/core/hle/D3D8/Direct3D9/{Xb.hlsl => CxbxVertexShaderTemplate.hlsl} (82%) diff --git a/src/core/hle/D3D8/Direct3D9/Xb.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl similarity index 82% rename from src/core/hle/D3D8/Direct3D9/Xb.hlsl rename to src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index b68a4d7a6..9a582ab70 100644 --- a/src/core/hle/D3D8/Direct3D9/Xb.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -57,6 +57,23 @@ float4 c(int register_number) return C[register_number]; } +// Due to rounding differences with the Xbox (and increased precision on PC?) +// some titles produce values just below the threshold of the next integer. +// We can add a small bias to make sure it's bumped over the threshold +// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) +#define BIAS 0.0001 +// TODO : Use 0.001 like xqemu? + +// 2.14.1.11 Vertex Program Floating Point Requirements +// The floor operations used by the ARL and EXP instructions must +// operate identically. Specifically, the EXP instruction's floor(t.x) +// intermediate result must exactly match the integer stored in the +// address register by the ARL instruction. +float x_floor(float src) +{ + return floor(src + BIAS); +} + // http://xboxdevwiki.net/NV2A/Vertex_Shader // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program.txt // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program1_1.txt @@ -65,11 +82,7 @@ float4 c(int register_number) // 2.14.1.10.1 ARL: Address Register Load // The address register should be floored -// Due to rounding differences with the Xbox (and increased precision on PC?) -// some titles produce values just below the threshold of the next integer. -// We can add a small bias to make sure it's bumped over the threshold -// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) -#define x_arl(dest, mask, src0) dest.mask = floor(_tof4(src0).x + 0.0001).mask +#define x_arl(dest, mask, src0) dest.mask = x_floor(_tof4(src0).x).mask // 2.14.1.10.2 MOV: Move #define x_mov(dest, mask, src0) dest.mask = (_tof4(src0)).mask @@ -132,39 +145,73 @@ float _dph(float4 src0, float4 src1) // Xbox ILU Functions // 2.14.1.10.6 RCP: Reciprocal -#define x_rcp(dest, mask, src0) dest.mask = _ssss(1 / _scalar(src0)).mask -// TODO : #define x_rcp(dest, mask, src0) dest.mask = (_scalar(src0) == 0) ? 1.#INF : (1 / _scalar(src0)) +#define x_rcp(dest, mask, src0) dest.mask = _ssss(_rcp(_scalar(src0))).mask +float _rcp(float src) +{ +#if 0 // TODO : Enable + if (src == 1) return 1; + if (src == 0) return 1.#INF; +#endif + return 1/ src; +} // 2.14.1.10.7 RSQ: Reciprocal Square Root -#define x_rsq(dest, mask, src0) dest.mask = _ssss(rsqrt(abs(_scalar(src0)))).mask +#define x_rsq(dest, mask, src0) dest.mask = _ssss(_rsq(_scalar(src0))).mask +float _rsq(float src) +{ + float a = abs(src); +#if 0 // TODO : Enable + if (a == 1) return 1; + if (a == 0) return 1.#INF; +#endif + return rsqrt(a); +} // 2.14.1.10.15 EXP: Exponential Base 2 #define x_expp(dest, mask, src0) dest.mask = _expp(_scalar(src0)).mask -float4 _expp(float input) +float4 _expp(float src) { - float base = floor(input); + float floor_src = x_floor(src); - float4 dest; - dest.x = exp2(base); - dest.y = input - base; // Was : frac(input) - dest.z = exp2(input); + float4 dest; + dest.x = exp2(floor_src); + dest.y = src - floor_src; + dest.z = exp2(src); dest.w = 1; - return dest; + return dest; } // 2.14.1.10.16 LOG: Logarithm Base 2 #define x_logp(dest, mask, src0) dest.mask = _logp(_scalar(src0)).mask -float4 _logp(float input) -{ - float exponent = floor(log2(input)); - +float4 _logp(float src) +{ float4 dest; - dest.x = exponent; - dest.y = 1 / exp2(exponent); // mantissa - dest.z = log2(input); - dest.w = 1; - +#if 0 // TODO : Enable + float t = abs(src); + if (t != 0) { + if (t == 1.#INF) { + dest.x = 1.#INF; + dest.y = 1; + dest.z = 1.#INF; + } else { +#endif + float exponent = floor(log2(src)); // TODO : x_floor + float mantissa = 1 / exp2(exponent); + float z = log2(src); // TODO : exponent + log2(mantissa); // TODO : Or log2(t)? + // TODO : float exponent = frexp(src + BIAS, /*out*/mantissa); + dest.x = exponent; + dest.y = mantissa; + dest.z = z; +#if 0 + } + } else { + dest.x = -1.#INF; + dest.y = 1; + dest.z = -1.#INF; + } +#endif + dest.w = 1; return dest; } @@ -190,10 +237,10 @@ float4 _lit(float4 src0) // 2.14.1.10.19 RCC: Reciprocal Clamped #define x_rcc(dest, mask, src0) dest.mask = _ssss(_rcc(_scalar(src0))).mask -float _rcc(float input) +float _rcc(float src) { // Calculate the reciprocal - float r = 1 / input; + float r = 1 / src; // Clamp return (r >= 0) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index d637ddfe8..5793691ae 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -3811,10 +3811,10 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4]) // Default scale and offset. // Multisample state will affect these - float xScale = 1; - float yScale = 1; - float xOffset = 0.5; - float yOffset = 0.5; + float xScale = 1.0f; + float yScale = 1.0f; + float xOffset = 0.5f; + float yOffset = 0.5f; // MULTISAMPLE options have offset of 0 // Various sample sizes have various x and y scales @@ -3824,21 +3824,21 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4]) case XTL::X_D3DMULTISAMPLE_2_SAMPLES_MULTISAMPLE_QUINCUNX: case XTL::X_D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_LINEAR: case XTL::X_D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_GAUSSIAN: - xOffset = yOffset = 0; + xOffset = yOffset = 0.0f; break; case XTL::X_D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_HORIZONTAL_LINEAR: - xScale = 2; + xScale = 2.0f; break; case XTL::X_D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_VERTICAL_LINEAR: - yScale = 2; + yScale = 2.0f; break; case XTL::X_D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_LINEAR: case XTL::X_D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_GAUSSIAN: - xScale = yScale = 2; + xScale = yScale = 2.0f; break; case XTL::X_D3DMULTISAMPLE_9_SAMPLES_MULTISAMPLE_GAUSSIAN: xScale = yScale = 1.5f; - xOffset = yOffset = 0; + xOffset = yOffset = 0.0f; break; case XTL::X_D3DMULTISAMPLE_9_SAMPLES_SUPERSAMPLE_GAUSSIAN: xScale = yScale = 3.0f; @@ -3856,14 +3856,14 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4]) // Pre-transformed 2d geometry is in the same space as the 3d geometry...? // Offset with a host correction - vOffset[0] = xOffset + (0.5 * ViewPort.Width / g_RenderScaleFactor); - vOffset[1] = yOffset + (0.5 * ViewPort.Height / g_RenderScaleFactor); - vOffset[2] = 0; //offsetZ; + vOffset[0] = xOffset + (0.5f * (float)ViewPort.Width / (float)g_RenderScaleFactor); + vOffset[1] = yOffset + (0.5f * (float)ViewPort.Height / (float)g_RenderScaleFactor); + vOffset[2] = 0.0f; //offsetZ; vOffset[3] = 0.0f; // Scale with a host correction - vScale[0] = xScale * (1.0f / (2.0f * g_RenderScaleFactor)); - vScale[1] = yScale * (1.0f / (-2.0f * g_RenderScaleFactor)); + vScale[0] = xScale * (1.0f / ( 2.0f * (float)g_RenderScaleFactor)); + vScale[1] = yScale * (1.0f / (-2.0f * (float)g_RenderScaleFactor)); vScale[2] = scaleZ; // ? vScale[3] = 1.0f; // ? } diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 958256f48..c634b4dc8 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1788,7 +1788,7 @@ extern HRESULT EmuRecompileVshFunction if (SUCCEEDED(hRet)) { static std::string hlsl_template = - #include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string + #include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" // Note : This included .hlsl defines a raw string ; auto hlsl_stream = std::stringstream(); diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index 11087fe07..b634935cd 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -57,7 +57,7 @@ CxbxVertexShaderStreamElement; typedef struct _CxbxVertexShaderStreamInfo { - BOOL NeedPatch; // This is to know whether it's data which must be patched + bool NeedPatch; // This is to know whether it's data which must be patched BOOL DeclPosition; WORD HostVertexStride; DWORD NumberOfVertexElements; // Number of the stream data types From eeced5f0a989826c437f4ffa436118c985a5ce66 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Sun, 15 Dec 2019 16:24:56 +0100 Subject: [PATCH 59/77] Removed one unnecessary intermediate vertex shader decoding layer --- src/core/hle/D3D8/XbVertexShader.cpp | 365 ++++++++++++--------------- 1 file changed, 159 insertions(+), 206 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index c634b4dc8..53d1ddeed 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -204,45 +204,6 @@ typedef enum _VSH_MAC } VSH_MAC; -typedef struct _VSH_PARAMETER -{ - VSH_PARAMETER_TYPE ParameterType; // Parameter type, R, V or C - bool Neg; // true if negated, false if not - VSH_SWIZZLE Swizzle[4]; // The four swizzles - int16_t Address; // Register address -} -VSH_PARAMETER; - -typedef struct _VSH_OUTPUT -{ - // Output register - VSH_OUTPUT_MUX OutputMux; // MAC or ILU used as output - VSH_OUTPUT_TYPE OutputType; // C or O - int8_t OutputMask; - int16_t OutputAddress; - // MAC output Mask - int8_t MACRMask; - // ILU output mask - int8_t ILURMask; - // MAC,ILU output R register - int16_t RAddress; -} -VSH_OUTPUT; - -// The raw, parsed shader instruction (can be many combined [paired] instructions) -typedef struct _VSH_SHADER_INSTRUCTION -{ - VSH_ILU ILU; - VSH_MAC MAC; - VSH_OUTPUT Output; - VSH_PARAMETER A; - VSH_PARAMETER B; - VSH_PARAMETER C; - bool a0x; - bool Final; -} -VSH_SHADER_INSTRUCTION; - typedef enum _VSH_IMD_INSTRUCTION_TYPE { IMD_MAC, @@ -260,8 +221,11 @@ VSH_IMD_OUTPUT; typedef struct _VSH_IMD_PARAMETER { - bool Active; - VSH_PARAMETER Parameter; + bool Active; + VSH_PARAMETER_TYPE ParameterType; // Parameter type, R, V or C + bool Neg; // true if negated, false if not + VSH_SWIZZLE Swizzle[4]; // The four swizzles + int16_t Address; // Register address } VSH_IMD_PARAMETER; @@ -373,188 +337,183 @@ static inline int16_t ConvertCRegister(const int16_t CReg) { return ((((CReg >> 5) & 7) - 3) * 32) + (CReg & 31); } - -static void VshParseInstruction( - uint32_t *pShaderToken, - VSH_SHADER_INSTRUCTION *pInstruction) -{ - // First get the instruction(s). - pInstruction->ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); - pInstruction->MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); - - // Get parameter A - pInstruction->A.ParameterType = (VSH_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_A_MUX); - switch(pInstruction->A.ParameterType) - { - case PARAM_R: - pInstruction->A.Address = VshGetField(pShaderToken, FLD_A_R); - break; - case PARAM_V: - pInstruction->A.Address = VshGetField(pShaderToken, FLD_V); - break; - case PARAM_C: - pInstruction->A.Address = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); - break; - default: - EmuLog(LOG_LEVEL::WARNING, "Invalid instruction, parameter A type unknown %d", pInstruction->A.ParameterType); - return; - } - pInstruction->A.Neg = VshGetField(pShaderToken, FLD_A_NEG); - pInstruction->A.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_A_SWZ_X); - pInstruction->A.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_A_SWZ_Y); - pInstruction->A.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_A_SWZ_Z); - pInstruction->A.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_A_SWZ_W); - // Get parameter B - pInstruction->B.ParameterType = (VSH_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_B_MUX); - switch(pInstruction->B.ParameterType) - { - case PARAM_R: - pInstruction->B.Address = VshGetField(pShaderToken, FLD_B_R); - break; - case PARAM_V: - pInstruction->B.Address = VshGetField(pShaderToken, FLD_V); - break; - case PARAM_C: - pInstruction->B.Address = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); - break; - default: - DbgVshPrintf("Invalid instruction, parameter B type unknown %d\n", pInstruction->B.ParameterType); - return; - } - pInstruction->B.Neg = VshGetField(pShaderToken, FLD_B_NEG); - pInstruction->B.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_B_SWZ_X); - pInstruction->B.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_B_SWZ_Y); - pInstruction->B.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_B_SWZ_Z); - pInstruction->B.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_B_SWZ_W); - // Get parameter C - pInstruction->C.ParameterType = (VSH_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_C_MUX); - switch(pInstruction->C.ParameterType) - { - case PARAM_R: - pInstruction->C.Address = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | - VshGetField(pShaderToken, FLD_C_R_LOW); - break; - case PARAM_V: - pInstruction->C.Address = VshGetField(pShaderToken, FLD_V); - break; - case PARAM_C: - pInstruction->C.Address = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); - break; - default: - DbgVshPrintf("Invalid instruction, parameter C type unknown %d\n", pInstruction->C.ParameterType); - return; - } - pInstruction->C.Neg = VshGetField(pShaderToken, FLD_C_NEG); - pInstruction->C.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_X); - pInstruction->C.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_Y); - pInstruction->C.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_Z); - pInstruction->C.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_W); - // Get output - // Output register - pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB); - switch(pInstruction->Output.OutputType) - { - case OUTPUT_C: - pInstruction->Output.OutputAddress = ConvertCRegister(VshGetField(pShaderToken, FLD_OUT_ADDRESS)); - break; - case OUTPUT_O: - pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF; - break; - } - pInstruction->Output.OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX); - pInstruction->Output.OutputMask = VshGetField(pShaderToken, FLD_OUT_O_MASK); - pInstruction->Output.MACRMask = VshGetField(pShaderToken, FLD_OUT_MAC_MASK); - pInstruction->Output.ILURMask = VshGetField(pShaderToken, FLD_OUT_ILU_MASK); - pInstruction->Output.RAddress = VshGetField(pShaderToken, FLD_OUT_R); - // Finally, get a0.x indirect constant addressing - pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); - pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL); -} - -static void VshAddIntermediateOpcode( - VSH_SHADER_INSTRUCTION* pInstruction, - VSH_XBOX_SHADER* pShader, - VSH_IMD_INSTRUCTION_TYPE instr_type, - VSH_IMD_OUTPUT_TYPE output_type, - int16_t output_address, - int8_t output_mask) -{ - // Is the output mask set? - if (output_mask == 0) { - return; + +static void VshConvertIntermediateParam(VSH_IMD_PARAMETER& Param, + uint32_t* pShaderToken, + VSH_FIELD_NAME FLD_MUX, + VSH_FIELD_NAME FLD_NEG, + uint16_t R, + uint16_t V, + uint16_t C) +{ + Param.Active = true; + Param.ParameterType = (VSH_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_MUX); + switch (Param.ParameterType) { + case PARAM_R: + Param.Address = R; + break; + case PARAM_V: + Param.Address = V; + break; + case PARAM_C: + Param.Address = C; + break; + default: + LOG_TEST_CASE("parameter type unknown"); } - - if(pShader->IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) { - CxbxKrnlCleanup("Shader exceeds conversion buffer!"); - } - VSH_MAC MAC = (instr_type == IMD_MAC) ? pInstruction->MAC : MAC_NOP; - VSH_ILU ILU = (instr_type == IMD_ILU) ? pInstruction->ILU : ILU_NOP; - VSH_INTERMEDIATE_FORMAT* pIntermediate = &pShader->Intermediate[pShader->IntermediateCount++]; + int d = FLD_NEG - FLD_A_NEG; + Param.Neg = VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_NEG)); + Param.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_X)); + Param.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Y)); + Param.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Z)); + Param.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_W)); +} - pIntermediate->InstructionType = instr_type; - pIntermediate->MAC = MAC; - pIntermediate->ILU = ILU; - pIntermediate->Output.Type = output_type; - pIntermediate->Output.Address = output_address; - pIntermediate->Output.Mask = output_mask; - pIntermediate->IndexesWithA0_X = pInstruction->a0x; +static void VshConvertIntermediateParams( + VSH_INTERMEDIATE_FORMAT *pIntermediate, + uint32_t* pShaderToken) +{ + // Get a0.x indirect constant addressing + pIntermediate->IndexesWithA0_X = VshGetField(pShaderToken, FLD_A0X) > 0; + + int16_t R; + int16_t V = VshGetField(pShaderToken, FLD_V); + int16_t C = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); + uint8_t ParamCount = 0; // Parameters[0].Active will always be set, but [1] and [2] may not, so reset them: pIntermediate->Parameters[1].Active = false; pIntermediate->Parameters[2].Active = false; - uint8_t ParamCount = 0; - if(MAC >= MAC_MOV) { - pIntermediate->Parameters[ParamCount].Parameter = pInstruction->A; - pIntermediate->Parameters[ParamCount++].Active = true; + if(pIntermediate->MAC >= MAC_MOV) { + // Get parameter A + R = VshGetField(pShaderToken, FLD_A_R); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_A_MUX, FLD_A_NEG, R, V, C); } - if((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) { - pIntermediate->Parameters[ParamCount].Parameter = pInstruction->B; - pIntermediate->Parameters[ParamCount++].Active = true; + if((pIntermediate->MAC == MAC_MUL) || ((pIntermediate->MAC >= MAC_MAD) && (pIntermediate->MAC <= MAC_SGE))) { + // Get parameter B + R = VshGetField(pShaderToken, FLD_B_R); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_B_MUX, FLD_B_NEG, R, V, C); } - if((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) { - pIntermediate->Parameters[ParamCount].Parameter = pInstruction->C; - pIntermediate->Parameters[ParamCount++].Active = true; + if((pIntermediate->ILU >= ILU_MOV) || (pIntermediate->MAC == MAC_ADD) || (pIntermediate->MAC == MAC_MAD)) { + // Get parameter C + R = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | VshGetField(pShaderToken, FLD_C_R_LOW); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_C_MUX, FLD_C_NEG, R, V, C); } } -static void VshConvertToIntermediate( - VSH_SHADER_INSTRUCTION *pInstruction, - VSH_XBOX_SHADER *pShader) +static VSH_INTERMEDIATE_FORMAT* VshAddIntermediateInstruction( + VSH_XBOX_SHADER* pShader, + VSH_IMD_OUTPUT_TYPE output_type, + int16_t output_address, + int8_t output_mask) { + // Is the output mask set? + if (output_mask == 0) { + return nullptr; + } + + if (pShader->IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) { + CxbxKrnlCleanup("Shader exceeds conversion buffer!"); + } + + VSH_INTERMEDIATE_FORMAT* pIntermediate = &(pShader->Intermediate[pShader->IntermediateCount++]); + pIntermediate->Output.Type = output_type; + pIntermediate->Output.Address = output_address; + pIntermediate->Output.Mask = output_mask; + return pIntermediate; +} + +static void VshAddIntermediateMACOpcode( + VSH_XBOX_SHADER* pShader, + uint32_t* pShaderToken, + VSH_MAC MAC, + VSH_IMD_OUTPUT_TYPE output_type, + int16_t output_address, + int8_t output_mask) +{ + VSH_INTERMEDIATE_FORMAT* pIntermediate = VshAddIntermediateInstruction(pShader, output_type, output_address, output_mask); + if (!pIntermediate) return; + + pIntermediate->InstructionType = IMD_MAC; + pIntermediate->MAC = MAC; + pIntermediate->ILU = ILU_NOP; + VshConvertIntermediateParams(pIntermediate, pShaderToken); +} + +static void VshAddIntermediateILUOpcode( + VSH_XBOX_SHADER* pShader, + uint32_t* pShaderToken, + VSH_ILU ILU, + VSH_IMD_OUTPUT_TYPE output_type, + int16_t output_address, + int8_t output_mask) +{ + VSH_INTERMEDIATE_FORMAT* pIntermediate = VshAddIntermediateInstruction(pShader, output_type, output_address, output_mask); + if (!pIntermediate) return; + + pIntermediate->InstructionType = IMD_ILU; + pIntermediate->MAC = MAC_NOP; + pIntermediate->ILU = ILU; + VshConvertIntermediateParams(pIntermediate, pShaderToken); +} + +static bool VshConvertToIntermediate( + VSH_XBOX_SHADER *pShader, + uint32_t *pShaderToken) +{ + // First get the instruction(s). + VSH_ILU ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); + VSH_MAC MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); + + // Output register + VSH_IMD_OUTPUT_TYPE OutputType; + int16_t OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS); + if ((VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB) == OUTPUT_C) { + OutputType = IMD_OUTPUT_C; + OutputAddress = ConvertCRegister(OutputAddress); + } else { // OUTPUT_O: + OutputType = IMD_OUTPUT_O; + OutputAddress = OutputAddress & 0xF; + } + + // MAC,ILU output R register + int16_t RAddress = VshGetField(pShaderToken, FLD_OUT_R); + // Test for paired opcodes - bool bIsPaired = (pInstruction->MAC != MAC_NOP) && (pInstruction->ILU != ILU_NOP); - VSH_IMD_OUTPUT_TYPE OutputType2 = pInstruction->Output.OutputType == OUTPUT_C ? IMD_OUTPUT_C : IMD_OUTPUT_O; + bool bIsPaired = (MAC != MAC_NOP) && (ILU != ILU_NOP); // Check if there's a MAC opcode - if (pInstruction->MAC > MAC_NOP && pInstruction->MAC <= MAC_ARL) { - if (bIsPaired && pInstruction->Output.RAddress == 1) { + if (MAC > MAC_NOP && MAC <= MAC_ARL) { + if (bIsPaired && RAddress == 1) { // Ignore paired MAC opcodes that write to R1 } else { - if (pInstruction->MAC == MAC_ARL) { - VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, IMD_OUTPUT_A0X, 0, MASK_X); + if (MAC == MAC_ARL) { + VshAddIntermediateMACOpcode(pShader, pShaderToken, MAC, IMD_OUTPUT_A0X, 0, MASK_X); } else { - VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, IMD_OUTPUT_R, pInstruction->Output.RAddress, pInstruction->Output.MACRMask); + VshAddIntermediateMACOpcode(pShader, pShaderToken, MAC, IMD_OUTPUT_R, RAddress, VshGetField(pShaderToken, FLD_OUT_MAC_MASK)); } } // Check if we must add a muxed MAC opcode as well - if (pInstruction->Output.OutputMux == OMUX_MAC) { - VshAddIntermediateOpcode(pInstruction, pShader, IMD_MAC, OutputType2, pInstruction->Output.OutputAddress, pInstruction->Output.OutputMask); + if ((VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX) == OMUX_MAC) { + VshAddIntermediateMACOpcode(pShader, pShaderToken, MAC, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); } } // Check if there's an ILU opcode - if (pInstruction->ILU != ILU_NOP) { + if (ILU != ILU_NOP) { // Paired ILU opcodes will only write to R1 - VshAddIntermediateOpcode(pInstruction, pShader, IMD_ILU, IMD_OUTPUT_R, bIsPaired ? 1 : pInstruction->Output.RAddress, pInstruction->Output.ILURMask); + VshAddIntermediateILUOpcode(pShader, pShaderToken, ILU, IMD_OUTPUT_R, bIsPaired ? 1 : RAddress, VshGetField(pShaderToken, FLD_OUT_ILU_MASK)); // Check if we must add a muxed ILU opcode as well - if (pInstruction->Output.OutputMux == OMUX_ILU) { - VshAddIntermediateOpcode(pInstruction, pShader, IMD_ILU, OutputType2, pInstruction->Output.OutputAddress, pInstruction->Output.OutputMask); + if ((VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX) == OMUX_ILU) { + VshAddIntermediateILUOpcode(pShader, pShaderToken, ILU, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); } } + + return VshGetField(pShaderToken, FLD_FINAL); } #define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) @@ -1601,7 +1560,7 @@ static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) if (dest.Mask & MASK_W) hlsl << "w"; } -static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta, bool IndexesWithA0_X) +static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& param, bool IndexesWithA0_X) { // Print functions static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { @@ -1612,8 +1571,6 @@ static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta, "oPos" // PARAM_O // = 0?? }; - auto param = paramMeta.Parameter; - if (param.Neg) { hlsl << "-"; } @@ -1699,22 +1656,22 @@ static void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) }; for (int i = 0; i < pShader->IntermediateCount; i++) { - VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i]; + VSH_INTERMEDIATE_FORMAT& IntermediateInstruction = pShader->Intermediate[i]; std::string str = ""; - if (xboxInstruction.InstructionType == IMD_MAC) { - str = VSH_MAC_HLSL[xboxInstruction.MAC]; - } else if (xboxInstruction.InstructionType == IMD_ILU) { - str = VSH_ILU_HLSL[xboxInstruction.ILU]; + if (IntermediateInstruction.InstructionType == IMD_MAC) { + str = VSH_MAC_HLSL[IntermediateInstruction.MAC]; + } else if (IntermediateInstruction.InstructionType == IMD_ILU) { + str = VSH_ILU_HLSL[IntermediateInstruction.ILU]; } assert(!str.empty()); hlsl << "\n " << str << "("; // opcode - OutputHlsl(hlsl, xboxInstruction.Output); + OutputHlsl(hlsl, IntermediateInstruction.Output); for (int i = 0; i < 3; i++) { - if (xboxInstruction.Parameters[i].Active) { + if (IntermediateInstruction.Parameters[i].Active) { hlsl << ", "; - ParameterHlsl(hlsl, xboxInstruction.Parameters[i], xboxInstruction.IndexesWithA0_X); + ParameterHlsl(hlsl, IntermediateInstruction.Parameters[i], IntermediateInstruction.IndexesWithA0_X); } } @@ -1794,11 +1751,7 @@ extern HRESULT EmuRecompileVshFunction auto hlsl_stream = std::stringstream(); for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { - VSH_SHADER_INSTRUCTION Inst; - - VshParseInstruction((uint32_t*)pToken, &Inst); - VshConvertToIntermediate(&Inst, pShader); - EOI = Inst.Final; + EOI = VshConvertToIntermediate(pShader, (uint32_t*)pToken); } // The size of the shader is From 18a27a64de9e17e2dd35cfa6b24ad8b391f6464b Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 16 Dec 2019 11:59:42 +0100 Subject: [PATCH 60/77] Refactor vertex shader decoding types and functions into a class --- src/core/hle/D3D8/XbVertexShader.cpp | 1341 +++++++++++++------------- 1 file changed, 651 insertions(+), 690 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 53d1ddeed..d627854de 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -45,476 +45,617 @@ LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) \ if(g_bPrintfOn) printf +std::array RegVIsPresentInDeclaration; // TODO : Scope this better than global + // **************************************************************************** // * Vertex shader function recompiler // **************************************************************************** -typedef enum _VSH_SWIZZLE +class XboxVertexShaderDecoder { - SWIZZLE_X = 0, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W -} -VSH_SWIZZLE; +private: + // Xbox Vertex SHader microcode types -#define MASK_X 0x008 -#define MASK_Y 0x004 -#define MASK_Z 0x002 -#define MASK_W 0x001 -#define MASK_XYZ MASK_X | MASK_Y | MASK_Z -#define MASK_XYZW MASK_X | MASK_Y | MASK_Z | MASK_W - -// Local types -typedef enum _VSH_FIELD_NAME -{ - FLD_ILU = 0, - FLD_MAC, - FLD_CONST, - FLD_V, - // Input A - FLD_A_NEG, - FLD_A_SWZ_X, - FLD_A_SWZ_Y, - FLD_A_SWZ_Z, - FLD_A_SWZ_W, - FLD_A_R, - FLD_A_MUX, - // Input B - FLD_B_NEG, - FLD_B_SWZ_X, - FLD_B_SWZ_Y, - FLD_B_SWZ_Z, - FLD_B_SWZ_W, - FLD_B_R, - FLD_B_MUX, - // Input C - FLD_C_NEG, - FLD_C_SWZ_X, - FLD_C_SWZ_Y, - FLD_C_SWZ_Z, - FLD_C_SWZ_W, - FLD_C_R_HIGH, - FLD_C_R_LOW, - FLD_C_MUX, - // Output - FLD_OUT_MAC_MASK, - FLD_OUT_R, - FLD_OUT_ILU_MASK, - FLD_OUT_O_MASK, - FLD_OUT_ORB, - FLD_OUT_ADDRESS, - FLD_OUT_MUX, - // Relative addressing - FLD_A0X, - // Final instruction - FLD_FINAL -} -VSH_FIELD_NAME; - -typedef enum _VSH_OREG_NAME -{ - OREG_OPOS, // 0 - OREG_UNUSED1, // 1 - OREG_UNUSED2, // 2 - OREG_OD0, // 3 - OREG_OD1, // 4 - OREG_OFOG, // 5 - OREG_OPTS, // 6 - OREG_OB0, // 7 - OREG_OB1, // 8 - OREG_OT0, // 9 - OREG_OT1, // 10 - OREG_OT2, // 11 - OREG_OT3, // 12 - OREG_UNUSED3, // 13 - OREG_UNUSED4, // 14 - OREG_A0X // 15 - all values of the 4 bits are used -} -VSH_OREG_NAME; - -typedef enum _VSH_OUTPUT_TYPE -{ - OUTPUT_C = 0, - OUTPUT_O -} -VSH_OUTPUT_TYPE; - -typedef enum _VSH_ARGUMENT_TYPE -{ - PARAM_UNKNOWN = 0, - PARAM_R, // Temporary (scRatch) registers - PARAM_V, // Vertex registers - PARAM_C, // Constant registers, set by SetVertexShaderConstant - PARAM_O // = 0?? -} -VSH_ARGUMENT_TYPE; - -typedef VSH_ARGUMENT_TYPE VSH_PARAMETER_TYPE; // Alias, to indicate difference between a parameter and a generic argument - -typedef enum _VSH_OUTPUT_MUX -{ - OMUX_MAC = 0, - OMUX_ILU -} -VSH_OUTPUT_MUX; - -typedef enum _VSH_IMD_OUTPUT_TYPE -{ - IMD_OUTPUT_C, - IMD_OUTPUT_R, - IMD_OUTPUT_O, - IMD_OUTPUT_A0X -} -VSH_IMD_OUTPUT_TYPE; - -// Dxbx note : ILU stands for 'Inverse Logic Unit' opcodes -typedef enum _VSH_ILU -{ - ILU_NOP = 0, - ILU_MOV, - ILU_RCP, - ILU_RCC, - ILU_RSQ, - ILU_EXP, - ILU_LOG, - ILU_LIT // = 7 - all values of the 3 bits are used -} -VSH_ILU; - -// Dxbx note : MAC stands for 'Multiply And Accumulate' opcodes -typedef enum _VSH_MAC -{ - MAC_NOP = 0, - MAC_MOV, - MAC_MUL, - MAC_ADD, - MAC_MAD, - MAC_DP3, - MAC_DPH, - MAC_DP4, - MAC_DST, - MAC_MIN, - MAC_MAX, - MAC_SLT, - MAC_SGE, - MAC_ARL - // ??? 14 - // ??? 15 - 2 values of the 4 bits are undefined -} -VSH_MAC; - -typedef enum _VSH_IMD_INSTRUCTION_TYPE -{ - IMD_MAC, - IMD_ILU -} -VSH_IMD_INSTRUCTION_TYPE; - -typedef struct _VSH_IMD_OUTPUT -{ - VSH_IMD_OUTPUT_TYPE Type; - int16_t Address; - int8_t Mask; -} -VSH_IMD_OUTPUT; - -typedef struct _VSH_IMD_PARAMETER -{ - bool Active; - VSH_PARAMETER_TYPE ParameterType; // Parameter type, R, V or C - bool Neg; // true if negated, false if not - VSH_SWIZZLE Swizzle[4]; // The four swizzles - int16_t Address; // Register address -} -VSH_IMD_PARAMETER; - -typedef struct _VSH_INTERMEDIATE_FORMAT -{ - VSH_IMD_INSTRUCTION_TYPE InstructionType; - VSH_MAC MAC; - VSH_ILU ILU; - VSH_IMD_OUTPUT Output; - VSH_IMD_PARAMETER Parameters[3]; - // There is only a single address register in Microsoft DirectX 8.0. - // The address register, designated as a0.x, may be used as signed - // integer offset in relative addressing into the constant register file. - // c[a0.x + n] - bool IndexesWithA0_X; -} -VSH_INTERMEDIATE_FORMAT; - -typedef struct _VSH_XBOX_SHADER -{ - XTL::X_VSH_SHADER_HEADER ShaderHeader; - uint16_t IntermediateCount; - VSH_INTERMEDIATE_FORMAT Intermediate[VSH_MAX_INTERMEDIATE_COUNT]; -} -VSH_XBOX_SHADER; - -std::array RegVIsPresentInDeclaration; - -/* TODO : map non-FVF Xbox vertex shader handle to CxbxVertexShader (a struct containing a host Xbox vertex shader handle and the original members) -std::unordered_map g_CxbxVertexShaders; - -void CxbxUpdateVertexShader(DWORD XboxVertexShaderHandle) -{ - CxbxVertexShader &VertexShader = g_CxbxVertexShaders[XboxVertexShaderHandle]; -}*/ - -// Retrieves a number of bits in the instruction token -static inline uint32_t VshGetFromToken( - uint32_t *pShaderToken, - uint8_t SubToken, - uint8_t StartBit, - uint8_t BitLength) -{ - return (pShaderToken[SubToken] >> StartBit) & ~(0xFFFFFFFF << BitLength); -} - -static uint8_t VshGetField( - uint32_t *pShaderToken, - VSH_FIELD_NAME FieldName) -{ - // Used for xvu spec definition - static const struct { - uint8_t SubToken; - uint8_t StartBit; - uint8_t BitLength; - } FieldMapping[/*VSH_FIELD_NAME*/] = { - // SubToken BitPos BitSize - { 1, 25, 3 }, // FLD_ILU, - { 1, 21, 4 }, // FLD_MAC, - { 1, 13, 8 }, // FLD_CONST, - { 1, 9, 4 }, // FLD_V, - // Input A - { 1, 8, 1 }, // FLD_A_NEG, - { 1, 6, 2 }, // FLD_A_SWZ_X, - { 1, 4, 2 }, // FLD_A_SWZ_Y, - { 1, 2, 2 }, // FLD_A_SWZ_Z, - { 1, 0, 2 }, // FLD_A_SWZ_W, - { 2, 28, 4 }, // FLD_A_R, - { 2, 26, 2 }, // FLD_A_MUX, - // Input B - { 2, 25, 1 }, // FLD_B_NEG, - { 2, 23, 2 }, // FLD_B_SWZ_X, - { 2, 21, 2 }, // FLD_B_SWZ_Y, - { 2, 19, 2 }, // FLD_B_SWZ_Z, - { 2, 17, 2 }, // FLD_B_SWZ_W, - { 2, 13, 4 }, // FLD_B_R, - { 2, 11, 2 }, // FLD_B_MUX, - // Input C - { 2, 10, 1 }, // FLD_C_NEG, - { 2, 8, 2 }, // FLD_C_SWZ_X, - { 2, 6, 2 }, // FLD_C_SWZ_Y, - { 2, 4, 2 }, // FLD_C_SWZ_Z, - { 2, 2, 2 }, // FLD_C_SWZ_W, - { 2, 0, 2 }, // FLD_C_R_HIGH, - { 3, 30, 2 }, // FLD_C_R_LOW, - { 3, 28, 2 }, // FLD_C_MUX, - // Output - { 3, 24, 4 }, // FLD_OUT_MAC_MASK, - { 3, 20, 4 }, // FLD_OUT_R, - { 3, 16, 4 }, // FLD_OUT_ILU_MASK, - { 3, 12, 4 }, // FLD_OUT_O_MASK, - { 3, 11, 1 }, // FLD_OUT_ORB, - { 3, 3, 8 }, // FLD_OUT_ADDRESS, - { 3, 2, 1 }, // FLD_OUT_MUX, - // Relative addressing - { 3, 1, 1 }, // FLD_A0X, - // Final instruction - { 3, 0, 1 } // FLD_FINAL, + enum VSH_SWIZZLE { + SWIZZLE_X = 0, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W }; - return (uint8_t)(VshGetFromToken(pShaderToken, - FieldMapping[FieldName].SubToken, - FieldMapping[FieldName].StartBit, - FieldMapping[FieldName].BitLength)); -} + #define MASK_X 0x008 + #define MASK_Y 0x004 + #define MASK_Z 0x002 + #define MASK_W 0x001 -// Converts the C register address to disassembly format -static inline int16_t ConvertCRegister(const int16_t CReg) -{ - return ((((CReg >> 5) & 7) - 3) * 32) + (CReg & 31); -} - -static void VshConvertIntermediateParam(VSH_IMD_PARAMETER& Param, - uint32_t* pShaderToken, - VSH_FIELD_NAME FLD_MUX, - VSH_FIELD_NAME FLD_NEG, - uint16_t R, - uint16_t V, - uint16_t C) -{ - Param.Active = true; - Param.ParameterType = (VSH_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_MUX); - switch (Param.ParameterType) { - case PARAM_R: - Param.Address = R; - break; - case PARAM_V: - Param.Address = V; - break; - case PARAM_C: - Param.Address = C; - break; - default: - LOG_TEST_CASE("parameter type unknown"); - } - - int d = FLD_NEG - FLD_A_NEG; - Param.Neg = VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_NEG)); - Param.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_X)); - Param.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Y)); - Param.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Z)); - Param.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_W)); -} - -static void VshConvertIntermediateParams( - VSH_INTERMEDIATE_FORMAT *pIntermediate, - uint32_t* pShaderToken) -{ - // Get a0.x indirect constant addressing - pIntermediate->IndexesWithA0_X = VshGetField(pShaderToken, FLD_A0X) > 0; - - int16_t R; - int16_t V = VshGetField(pShaderToken, FLD_V); - int16_t C = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); - uint8_t ParamCount = 0; - - // Parameters[0].Active will always be set, but [1] and [2] may not, so reset them: - pIntermediate->Parameters[1].Active = false; - pIntermediate->Parameters[2].Active = false; - if(pIntermediate->MAC >= MAC_MOV) { - // Get parameter A - R = VshGetField(pShaderToken, FLD_A_R); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_A_MUX, FLD_A_NEG, R, V, C); - } + enum VSH_OREG_NAME { + OREG_OPOS, // 0 + OREG_UNUSED1, // 1 + OREG_UNUSED2, // 2 + OREG_OD0, // 3 + OREG_OD1, // 4 + OREG_OFOG, // 5 + OREG_OPTS, // 6 + OREG_OB0, // 7 + OREG_OB1, // 8 + OREG_OT0, // 9 + OREG_OT1, // 10 + OREG_OT2, // 11 + OREG_OT3, // 12 + OREG_UNUSED3, // 13 + OREG_UNUSED4, // 14 + OREG_A0X // 15 - all values of the 4 bits are used + }; - if((pIntermediate->MAC == MAC_MUL) || ((pIntermediate->MAC >= MAC_MAD) && (pIntermediate->MAC <= MAC_SGE))) { - // Get parameter B - R = VshGetField(pShaderToken, FLD_B_R); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_B_MUX, FLD_B_NEG, R, V, C); + enum VSH_OUTPUT_TYPE { + OUTPUT_C = 0, + OUTPUT_O + }; + + enum VSH_PARAMETER_TYPE { + PARAM_UNKNOWN = 0, + PARAM_R, // Temporary (scRatch) registers + PARAM_V, // Vertex registers + PARAM_C, // Constant registers, set by SetVertexShaderConstant + PARAM_O // = 0?? + }; + + enum VSH_OUTPUT_MUX { + OMUX_MAC = 0, + OMUX_ILU + }; + + enum VSH_ILU { // Dxbx note : ILU stands for 'Inverse Logic Unit' opcodes + ILU_NOP = 0, + ILU_MOV, + ILU_RCP, + ILU_RCC, + ILU_RSQ, + ILU_EXP, + ILU_LOG, + ILU_LIT // = 7 - all values of the 3 bits are used + }; + + enum VSH_MAC { // Dxbx note : MAC stands for 'Multiply And Accumulate' opcodes + MAC_NOP = 0, + MAC_MOV, + MAC_MUL, + MAC_ADD, + MAC_MAD, + MAC_DP3, + MAC_DPH, + MAC_DP4, + MAC_DST, + MAC_MIN, + MAC_MAX, + MAC_SLT, + MAC_SGE, + MAC_ARL + // ??? 14 + // ??? 15 - 2 values of the 4 bits are undefined + }; + + // Host intermediate vertex shader types + + enum VSH_FIELD_NAME { + FLD_ILU = 0, + FLD_MAC, + FLD_CONST, + FLD_V, + // Input A + FLD_A_NEG, + FLD_A_SWZ_X, + FLD_A_SWZ_Y, + FLD_A_SWZ_Z, + FLD_A_SWZ_W, + FLD_A_R, + FLD_A_MUX, + // Input B + FLD_B_NEG, + FLD_B_SWZ_X, + FLD_B_SWZ_Y, + FLD_B_SWZ_Z, + FLD_B_SWZ_W, + FLD_B_R, + FLD_B_MUX, + // Input C + FLD_C_NEG, + FLD_C_SWZ_X, + FLD_C_SWZ_Y, + FLD_C_SWZ_Z, + FLD_C_SWZ_W, + FLD_C_R_HIGH, + FLD_C_R_LOW, + FLD_C_MUX, + // Output + FLD_OUT_MAC_MASK, + FLD_OUT_R, + FLD_OUT_ILU_MASK, + FLD_OUT_O_MASK, + FLD_OUT_ORB, + FLD_OUT_ADDRESS, + FLD_OUT_MUX, + // Relative addressing + FLD_A0X, + // Final instruction + FLD_FINAL + }; + + enum VSH_IMD_INSTRUCTION_TYPE { + IMD_MAC, + IMD_ILU + }; + + enum VSH_IMD_OUTPUT_TYPE { + IMD_OUTPUT_C, + IMD_OUTPUT_R, + IMD_OUTPUT_O, + IMD_OUTPUT_A0X + } ; + + typedef struct _VSH_IMD_OUTPUT { + VSH_IMD_OUTPUT_TYPE Type; + int16_t Address; + int8_t Mask; + } VSH_IMD_OUTPUT; + + typedef struct _VSH_IMD_PARAMETER { + bool Active; + VSH_PARAMETER_TYPE ParameterType; // Parameter type, R, V or C + bool Neg; // true if negated, false if not + VSH_SWIZZLE Swizzle[4]; // The four swizzles + int16_t Address; // Register address + } VSH_IMD_PARAMETER; + + typedef struct _VSH_INTERMEDIATE_FORMAT { + VSH_IMD_INSTRUCTION_TYPE InstructionType; + VSH_MAC MAC; + VSH_ILU ILU; + VSH_IMD_OUTPUT Output; + VSH_IMD_PARAMETER Parameters[3]; + // There is only a single address register in Microsoft DirectX 8.0. + // The address register, designated as a0.x, may be used as signed + // integer offset in relative addressing into the constant register file. + // c[a0.x + n] + bool IndexesWithA0_X; + } VSH_INTERMEDIATE_FORMAT; + + // State variables : + + uint16_t IntermediateCount; + VSH_INTERMEDIATE_FORMAT Intermediate[VSH_MAX_INTERMEDIATE_COUNT]; + + // Retrieves a number of bits in the instruction token + static inline uint32_t VshGetFromToken( + uint32_t* pShaderToken, + uint8_t SubToken, + uint8_t StartBit, + uint8_t BitLength) + { + return (pShaderToken[SubToken] >> StartBit) & ~(0xFFFFFFFF << BitLength); } - if((pIntermediate->ILU >= ILU_MOV) || (pIntermediate->MAC == MAC_ADD) || (pIntermediate->MAC == MAC_MAD)) { - // Get parameter C - R = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | VshGetField(pShaderToken, FLD_C_R_LOW); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_C_MUX, FLD_C_NEG, R, V, C); + static uint8_t VshGetField( + uint32_t* pShaderToken, + VSH_FIELD_NAME FieldName) + { + // Used for xvu spec definition + static const struct { + uint8_t SubToken; + uint8_t StartBit; + uint8_t BitLength; + } FieldMapping[/*VSH_FIELD_NAME*/] = { + // SubToken BitPos BitSize + { 1, 25, 3 }, // FLD_ILU, + { 1, 21, 4 }, // FLD_MAC, + { 1, 13, 8 }, // FLD_CONST, + { 1, 9, 4 }, // FLD_V, + // Input A + { 1, 8, 1 }, // FLD_A_NEG, + { 1, 6, 2 }, // FLD_A_SWZ_X, + { 1, 4, 2 }, // FLD_A_SWZ_Y, + { 1, 2, 2 }, // FLD_A_SWZ_Z, + { 1, 0, 2 }, // FLD_A_SWZ_W, + { 2, 28, 4 }, // FLD_A_R, + { 2, 26, 2 }, // FLD_A_MUX, + // Input B + { 2, 25, 1 }, // FLD_B_NEG, + { 2, 23, 2 }, // FLD_B_SWZ_X, + { 2, 21, 2 }, // FLD_B_SWZ_Y, + { 2, 19, 2 }, // FLD_B_SWZ_Z, + { 2, 17, 2 }, // FLD_B_SWZ_W, + { 2, 13, 4 }, // FLD_B_R, + { 2, 11, 2 }, // FLD_B_MUX, + // Input C + { 2, 10, 1 }, // FLD_C_NEG, + { 2, 8, 2 }, // FLD_C_SWZ_X, + { 2, 6, 2 }, // FLD_C_SWZ_Y, + { 2, 4, 2 }, // FLD_C_SWZ_Z, + { 2, 2, 2 }, // FLD_C_SWZ_W, + { 2, 0, 2 }, // FLD_C_R_HIGH, + { 3, 30, 2 }, // FLD_C_R_LOW, + { 3, 28, 2 }, // FLD_C_MUX, + // Output + { 3, 24, 4 }, // FLD_OUT_MAC_MASK, + { 3, 20, 4 }, // FLD_OUT_R, + { 3, 16, 4 }, // FLD_OUT_ILU_MASK, + { 3, 12, 4 }, // FLD_OUT_O_MASK, + { 3, 11, 1 }, // FLD_OUT_ORB, + { 3, 3, 8 }, // FLD_OUT_ADDRESS, + { 3, 2, 1 }, // FLD_OUT_MUX, + // Relative addressing + { 3, 1, 1 }, // FLD_A0X, + // Final instruction + { 3, 0, 1 } // FLD_FINAL, + }; + + return (uint8_t)(VshGetFromToken(pShaderToken, + FieldMapping[FieldName].SubToken, + FieldMapping[FieldName].StartBit, + FieldMapping[FieldName].BitLength)); } -} -static VSH_INTERMEDIATE_FORMAT* VshAddIntermediateInstruction( - VSH_XBOX_SHADER* pShader, - VSH_IMD_OUTPUT_TYPE output_type, - int16_t output_address, - int8_t output_mask) -{ - // Is the output mask set? - if (output_mask == 0) { - return nullptr; - } - - if (pShader->IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) { - CxbxKrnlCleanup("Shader exceeds conversion buffer!"); + // Converts the C register address to disassembly format + static inline int16_t ConvertCRegister(const int16_t CReg) + { + return ((((CReg >> 5) & 7) - 3) * 32) + (CReg & 31); } - - VSH_INTERMEDIATE_FORMAT* pIntermediate = &(pShader->Intermediate[pShader->IntermediateCount++]); - pIntermediate->Output.Type = output_type; - pIntermediate->Output.Address = output_address; - pIntermediate->Output.Mask = output_mask; - return pIntermediate; -} -static void VshAddIntermediateMACOpcode( - VSH_XBOX_SHADER* pShader, - uint32_t* pShaderToken, - VSH_MAC MAC, - VSH_IMD_OUTPUT_TYPE output_type, - int16_t output_address, - int8_t output_mask) -{ - VSH_INTERMEDIATE_FORMAT* pIntermediate = VshAddIntermediateInstruction(pShader, output_type, output_address, output_mask); - if (!pIntermediate) return; - - pIntermediate->InstructionType = IMD_MAC; - pIntermediate->MAC = MAC; - pIntermediate->ILU = ILU_NOP; - VshConvertIntermediateParams(pIntermediate, pShaderToken); -} + static void VshConvertIntermediateParam(VSH_IMD_PARAMETER& Param, + uint32_t* pShaderToken, + VSH_FIELD_NAME FLD_MUX, + VSH_FIELD_NAME FLD_NEG, + uint16_t R, + uint16_t V, + uint16_t C) + { + Param.Active = true; + Param.ParameterType = (VSH_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_MUX); + switch (Param.ParameterType) { + case PARAM_R: + Param.Address = R; + break; + case PARAM_V: + Param.Address = V; + break; + case PARAM_C: + Param.Address = C; + break; + default: + LOG_TEST_CASE("parameter type unknown"); + } -static void VshAddIntermediateILUOpcode( - VSH_XBOX_SHADER* pShader, - uint32_t* pShaderToken, - VSH_ILU ILU, - VSH_IMD_OUTPUT_TYPE output_type, - int16_t output_address, - int8_t output_mask) -{ - VSH_INTERMEDIATE_FORMAT* pIntermediate = VshAddIntermediateInstruction(pShader, output_type, output_address, output_mask); - if (!pIntermediate) return; - - pIntermediate->InstructionType = IMD_ILU; - pIntermediate->MAC = MAC_NOP; - pIntermediate->ILU = ILU; - VshConvertIntermediateParams(pIntermediate, pShaderToken); -} + int d = FLD_NEG - FLD_A_NEG; + Param.Neg = VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_NEG)); + Param.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_X)); + Param.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Y)); + Param.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Z)); + Param.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_W)); + } -static bool VshConvertToIntermediate( - VSH_XBOX_SHADER *pShader, - uint32_t *pShaderToken) -{ - // First get the instruction(s). - VSH_ILU ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); - VSH_MAC MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); + static void VshConvertIntermediateParams( + VSH_INTERMEDIATE_FORMAT* pIntermediate, + uint32_t* pShaderToken) + { + // Get a0.x indirect constant addressing + pIntermediate->IndexesWithA0_X = VshGetField(pShaderToken, FLD_A0X) > 0; // Applies to IMD_OUTPUT_C parameter reads - // Output register - VSH_IMD_OUTPUT_TYPE OutputType; - int16_t OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS); - if ((VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB) == OUTPUT_C) { - OutputType = IMD_OUTPUT_C; - OutputAddress = ConvertCRegister(OutputAddress); - } else { // OUTPUT_O: - OutputType = IMD_OUTPUT_O; - OutputAddress = OutputAddress & 0xF; - } - - // MAC,ILU output R register - int16_t RAddress = VshGetField(pShaderToken, FLD_OUT_R); - - // Test for paired opcodes - bool bIsPaired = (MAC != MAC_NOP) && (ILU != ILU_NOP); - - // Check if there's a MAC opcode - if (MAC > MAC_NOP && MAC <= MAC_ARL) { - if (bIsPaired && RAddress == 1) { - // Ignore paired MAC opcodes that write to R1 - } else { - if (MAC == MAC_ARL) { - VshAddIntermediateMACOpcode(pShader, pShaderToken, MAC, IMD_OUTPUT_A0X, 0, MASK_X); + int16_t R; + int16_t V = VshGetField(pShaderToken, FLD_V); + int16_t C = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); + uint8_t ParamCount = 0; + + // Parameters[0].Active will always be set, but [1] and [2] may not, so reset them: + pIntermediate->Parameters[1].Active = false; + pIntermediate->Parameters[2].Active = false; + if (pIntermediate->MAC >= MAC_MOV) { + // Get parameter A + R = VshGetField(pShaderToken, FLD_A_R); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_A_MUX, FLD_A_NEG, R, V, C); + } + + if ((pIntermediate->MAC == MAC_MUL) || ((pIntermediate->MAC >= MAC_MAD) && (pIntermediate->MAC <= MAC_SGE))) { + // Get parameter B + R = VshGetField(pShaderToken, FLD_B_R); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_B_MUX, FLD_B_NEG, R, V, C); + } + + if ((pIntermediate->ILU >= ILU_MOV) || (pIntermediate->MAC == MAC_ADD) || (pIntermediate->MAC == MAC_MAD)) { + // Get parameter C + R = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | VshGetField(pShaderToken, FLD_C_R_LOW); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_C_MUX, FLD_C_NEG, R, V, C); + } + } + + VSH_INTERMEDIATE_FORMAT* VshAddIntermediateInstruction( + VSH_IMD_OUTPUT_TYPE output_type, + int16_t output_address, + int8_t output_mask) + { + // Is the output mask set? + if (output_mask == 0) { + return nullptr; + } + + if (IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) { + CxbxKrnlCleanup("Shader exceeds conversion buffer!"); + } + + VSH_INTERMEDIATE_FORMAT* pIntermediate = &(Intermediate[IntermediateCount++]); + pIntermediate->Output.Type = output_type; + pIntermediate->Output.Address = output_address; + pIntermediate->Output.Mask = output_mask; + return pIntermediate; + } + + void VshAddIntermediateMACOpcode( + uint32_t* pShaderToken, + VSH_MAC MAC, + VSH_IMD_OUTPUT_TYPE output_type, + int16_t output_address, + int8_t output_mask) + { + VSH_INTERMEDIATE_FORMAT* pIntermediate = VshAddIntermediateInstruction(output_type, output_address, output_mask); + if (!pIntermediate) return; + + pIntermediate->InstructionType = IMD_MAC; + pIntermediate->MAC = MAC; + pIntermediate->ILU = ILU_NOP; + VshConvertIntermediateParams(pIntermediate, pShaderToken); + } + + void VshAddIntermediateILUOpcode( + uint32_t* pShaderToken, + VSH_ILU ILU, + VSH_IMD_OUTPUT_TYPE output_type, + int16_t output_address, + int8_t output_mask) + { + VSH_INTERMEDIATE_FORMAT* pIntermediate = VshAddIntermediateInstruction(output_type, output_address, output_mask); + if (!pIntermediate) return; + + pIntermediate->InstructionType = IMD_ILU; + pIntermediate->MAC = MAC_NOP; + pIntermediate->ILU = ILU; + VshConvertIntermediateParams(pIntermediate, pShaderToken); + } + +public: + bool VshConvertToIntermediate(uint32_t* pShaderToken) + { + // First get the instruction(s). + VSH_ILU ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); + VSH_MAC MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); + + // Output register + VSH_OUTPUT_MUX OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX); + int16_t OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS); + VSH_IMD_OUTPUT_TYPE OutputType; + if ((VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB) == OUTPUT_C) { + OutputType = IMD_OUTPUT_C; + OutputAddress = ConvertCRegister(OutputAddress); + } else { // OUTPUT_O: + OutputType = IMD_OUTPUT_O; + OutputAddress = OutputAddress & 0xF; + } + + // MAC,ILU output R register + int16_t RAddress = VshGetField(pShaderToken, FLD_OUT_R); + + // Test for paired opcodes + bool bIsPaired = (MAC != MAC_NOP) && (ILU != ILU_NOP); + + // Check if there's a MAC opcode + if (MAC > MAC_NOP && MAC <= MAC_ARL) { + if (bIsPaired && RAddress == 1) { + // Ignore paired MAC opcodes that write to R1 } else { - VshAddIntermediateMACOpcode(pShader, pShaderToken, MAC, IMD_OUTPUT_R, RAddress, VshGetField(pShaderToken, FLD_OUT_MAC_MASK)); - } - } - - // Check if we must add a muxed MAC opcode as well - if ((VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX) == OMUX_MAC) { - VshAddIntermediateMACOpcode(pShader, pShaderToken, MAC, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); + if (MAC == MAC_ARL) { + VshAddIntermediateMACOpcode(pShaderToken, MAC, IMD_OUTPUT_A0X, 0, MASK_X); + } else { + VshAddIntermediateMACOpcode(pShaderToken, MAC, IMD_OUTPUT_R, RAddress, VshGetField(pShaderToken, FLD_OUT_MAC_MASK)); + } + } + + // Check if we must add a muxed MAC opcode as well + if (OutputMux == OMUX_MAC) { + VshAddIntermediateMACOpcode(pShaderToken, MAC, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); + } + } + + // Check if there's an ILU opcode + if (ILU != ILU_NOP) { + // Paired ILU opcodes will only write to R1 + VshAddIntermediateILUOpcode(pShaderToken, ILU, IMD_OUTPUT_R, bIsPaired ? 1 : RAddress, VshGetField(pShaderToken, FLD_OUT_ILU_MASK)); + // Check if we must add a muxed ILU opcode as well + if (OutputMux == OMUX_ILU) { + VshAddIntermediateILUOpcode(pShaderToken, ILU, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); + } + } + + return VshGetField(pShaderToken, FLD_FINAL) == 0; + } + + // HLSL generation - TODO : Move this to another (friend) class?? +private: + static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) + { + static const char* OReg_Name[/*VSH_OREG_NAME*/] = { + "oPos", + "???", + "???", + "oD0", + "oD1", + "oFog", + "oPts", + "oB0", + "oB1", + "oT0", + "oT1", + "oT2", + "oT3", + "???", + "???", + "a0.x" + }; + + switch (dest.Type) { + case IMD_OUTPUT_C: + // Access the HLSL capital C[] constants array, with the index bias applied : + // TODO : Avoid out-of-bound writes (perhaps writing to a reserverd index?) + hlsl << "C[" << dest.Address + X_D3DSCM_CORRECTION << "]"; + LOG_TEST_CASE("Vertex shader writes to constant table"); + break; + case IMD_OUTPUT_R: + hlsl << "r" << dest.Address; + break; + case IMD_OUTPUT_O: + assert(dest.Address < OREG_A0X); + hlsl << OReg_Name[dest.Address]; + break; + case IMD_OUTPUT_A0X: + hlsl << "a0"; + break; + default: + assert(false); + break; + } + + // Write the mask as a separate argument to the opcode defines + // (No space, so that "dest,mask, ..." looks close to "dest.mask, ...") + hlsl << ","; + if (dest.Mask & MASK_X) hlsl << "x"; + if (dest.Mask & MASK_Y) hlsl << "y"; + if (dest.Mask & MASK_Z) hlsl << "z"; + if (dest.Mask & MASK_W) hlsl << "w"; + } + + static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& param, bool IndexesWithA0_X) + { + // Print functions + static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { + "?", // PARAM_UNKNOWN = 0, + "r", // PARAM_R, // Temporary (scRatch) registers + "v", // PARAM_V, // Vertex registers + "c", // PARAM_C, // Constant registers, set by SetVertexShaderConstant + "oPos" // PARAM_O // = 0?? + }; + + if (param.Neg) { + hlsl << "-"; + } + + if (param.ParameterType == PARAM_C) { + // Access constant registers through our HLSL c() function, + // which allows dumping negative indices (like Xbox shaders), + // and which returns zero when out-of-bounds indices are passed in: + if (IndexesWithA0_X) { + if (param.Address == 0) { + hlsl << "c(a0.x)"; // Hide the offset if it's 0 + } + else if (param.Address < 0) { + hlsl << "c(a0.x" << param.Address << ")"; // minus is part of the offset + } + else { + hlsl << "c(a0.x+" << param.Address << ")"; // show addition character + } + } + else { + hlsl << "c(" << param.Address << ")"; + } + } + else { + hlsl << RegisterName[param.ParameterType] << param.Address; + } + + // Write the swizzle if we need to + // Only bother printing the swizzle if it is not the default .xyzw + if (!(param.Swizzle[0] == SWIZZLE_X && + param.Swizzle[1] == SWIZZLE_Y && + param.Swizzle[2] == SWIZZLE_Z && + param.Swizzle[3] == SWIZZLE_W)) + { + // We'll try to simplify swizzles if we can + // If all swizzles are the same, we only need to write one out + unsigned swizzles = 1; + + // Otherwise, we need to use the full swizzle + if (param.Swizzle[0] != param.Swizzle[1] || + param.Swizzle[0] != param.Swizzle[2] || + param.Swizzle[0] != param.Swizzle[3]) { + // Note, we can't remove trailing repeats, like in VS asm, + // as it may change the type from float4 to float3, float2 or float1! + swizzles = 4; + } + + hlsl << "."; + for (unsigned i = 0; i < swizzles; i++) { + hlsl << "xyzw"[param.Swizzle[i]]; + } } } - // Check if there's an ILU opcode - if (ILU != ILU_NOP) { - // Paired ILU opcodes will only write to R1 - VshAddIntermediateILUOpcode(pShader, pShaderToken, ILU, IMD_OUTPUT_R, bIsPaired ? 1 : RAddress, VshGetField(pShaderToken, FLD_OUT_ILU_MASK)); - // Check if we must add a muxed ILU opcode as well - if ((VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX) == OMUX_ILU) { - VshAddIntermediateILUOpcode(pShader, pShaderToken, ILU, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); +public: + bool BuildShader(std::stringstream& hlsl) + { + // HLSL strings for all MAC opcodes, indexed with VSH_MAC + static std::string VSH_MAC_HLSL[/*VSH_MAC*/] = { + /*MAC_NOP:*/"", + /*MAC_MOV:*/"x_mov", + /*MAC_MUL:*/"x_mul", + /*MAC_ADD:*/"x_add", + /*MAC_MAD:*/"x_mad", + /*MAC_DP3:*/"x_dp3", + /*MAC_DPH:*/"x_dph", + /*MAC_DP4:*/"x_dp4", + /*MAC_DST:*/"x_dst", + /*MAC_MIN:*/"x_min", + /*MAC_MAX:*/"x_max", + /*MAC_SLT:*/"x_slt", + /*MAC_SGE:*/"x_sge", + /*MAC_ARL:*/"x_arl", + "", + "" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well + }; + + // HLSL strings for all ILU opcodes, indexed with VSH_ILU + static std::string VSH_ILU_HLSL[/*VSH_ILU*/] = { + /*ILU_NOP:*/"", + /*ILU_MOV:*/"x_mov", + /*ILU_RCP:*/"x_rcp", + /*ILU_RCC:*/"x_rcc", + /*ILU_RSQ:*/"x_rsq", + /*ILU_EXP:*/"x_expp", + /*ILU_LOG:*/"x_logp", + /*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used + }; + + for (int i = 0; i < IntermediateCount; i++) { + VSH_INTERMEDIATE_FORMAT& IntermediateInstruction = Intermediate[i]; + + std::string str; + if (IntermediateInstruction.InstructionType == IMD_MAC) { + str = VSH_MAC_HLSL[IntermediateInstruction.MAC]; + } else { + assert(IntermediateInstruction.InstructionType == IMD_ILU); + str = VSH_ILU_HLSL[IntermediateInstruction.ILU]; + } + + hlsl << "\n " << str << "("; // opcode + OutputHlsl(hlsl, IntermediateInstruction.Output); + for (int i = 0; i < 3; i++) { + if (IntermediateInstruction.Parameters[i].Active) { + hlsl << ", "; + ParameterHlsl(hlsl, IntermediateInstruction.Parameters[i], IntermediateInstruction.IndexesWithA0_X); + } + } + + hlsl << ");"; } - } - - return VshGetField(pShaderToken, FLD_FINAL); -} + + return IntermediateCount > 0; + } +}; #define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) @@ -1506,179 +1647,6 @@ void CxbxImpl_SelectVertexShaderDirect LOG_UNIMPLEMENTED(); } -// HLSL outputs - -static void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) -{ - static const char* OReg_Name[/*VSH_OREG_NAME*/] = { - "oPos", - "???", - "???", - "oD0", - "oD1", - "oFog", - "oPts", - "oB0", - "oB1", - "oT0", - "oT1", - "oT2", - "oT3", - "???", - "???", - "a0.x" - }; - - switch (dest.Type) { - case IMD_OUTPUT_C: - // Access the HLSL capital C[] constants array, with the index bias applied : - // TODO : Avoid out-of-bound writes (perhaps writing to a reserverd index?) - hlsl << "C[" << dest.Address + X_D3DSCM_CORRECTION << "]"; - LOG_TEST_CASE("Vertex shader writes to constant table"); - break; - case IMD_OUTPUT_R: - hlsl << "r" << dest.Address; - break; - case IMD_OUTPUT_O: - assert(dest.Address < OREG_A0X); - hlsl << OReg_Name[dest.Address]; - break; - case IMD_OUTPUT_A0X: - hlsl << "a0"; - break; - default: - assert(false); - break; - } - - // Write the mask as a separate argument to the opcode defines - // (No space, so that "dest,mask, ..." looks close to "dest.mask, ...") - hlsl << ","; - if (dest.Mask & MASK_X) hlsl << "x"; - if (dest.Mask & MASK_Y) hlsl << "y"; - if (dest.Mask & MASK_Z) hlsl << "z"; - if (dest.Mask & MASK_W) hlsl << "w"; -} - -static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& param, bool IndexesWithA0_X) -{ - // Print functions - static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { - "?", // PARAM_UNKNOWN = 0, - "r", // PARAM_R, // Temporary (scRatch) registers - "v", // PARAM_V, // Vertex registers - "c", // PARAM_C, // Constant registers, set by SetVertexShaderConstant - "oPos" // PARAM_O // = 0?? - }; - - if (param.Neg) { - hlsl << "-"; - } - - if (param.ParameterType == PARAM_C) { - // Access constant registers through our HLSL c() function, - // which allows dumping negative indices (like Xbox shaders), - // and which returns zero when out-of-bounds indices are passed in: - if (IndexesWithA0_X) { - if (param.Address == 0) { - hlsl << "c(a0.x)"; // Hide the offset if it's 0 - } else if (param.Address < 0) { - hlsl << "c(a0.x" << param.Address << ")"; // minus is part of the offset - } else { - hlsl << "c(a0.x+" << param.Address << ")"; // show addition character - } - } else { - hlsl << "c(" << param.Address << ")"; - } - } else { - hlsl << RegisterName[param.ParameterType] << param.Address; - } - - // Write the swizzle if we need to - // Only bother printing the swizzle if it is not the default .xyzw - if (!(param.Swizzle[0] == SWIZZLE_X && - param.Swizzle[1] == SWIZZLE_Y && - param.Swizzle[2] == SWIZZLE_Z && - param.Swizzle[3] == SWIZZLE_W )) - { - // We'll try to simplify swizzles if we can - // If all swizzles are the same, we only need to write one out - unsigned swizzles = 1; - - // Otherwise, we need to use the full swizzle - if (param.Swizzle[0] != param.Swizzle[1] || - param.Swizzle[0] != param.Swizzle[2] || - param.Swizzle[0] != param.Swizzle[3]) { - // Note, we can't remove trailing repeats, like in VS asm, - // as it may change the type from float4 to float3, float2 or float1! - swizzles = 4; - } - - hlsl << "."; - for (unsigned i = 0; i < swizzles; i++) { - hlsl << "xyzw"[param.Swizzle[i]]; - } - } -} - -static void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader) -{ - // HLSL strings for all MAC opcodes, indexed with VSH_MAC - static std::string VSH_MAC_HLSL[/*VSH_MAC*/] = { - /*MAC_NOP:*/"", - /*MAC_MOV:*/"x_mov", - /*MAC_MUL:*/"x_mul", - /*MAC_ADD:*/"x_add", - /*MAC_MAD:*/"x_mad", - /*MAC_DP3:*/"x_dp3", - /*MAC_DPH:*/"x_dph", - /*MAC_DP4:*/"x_dp4", - /*MAC_DST:*/"x_dst", - /*MAC_MIN:*/"x_min", - /*MAC_MAX:*/"x_max", - /*MAC_SLT:*/"x_slt", - /*MAC_SGE:*/"x_sge", - /*MAC_ARL:*/"x_arl", - "", - "" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well - }; - - // HLSL strings for all ILU opcodes, indexed with VSH_ILU - static std::string VSH_ILU_HLSL[/*VSH_ILU*/] = { - /*ILU_NOP:*/"", - /*ILU_MOV:*/"x_mov", - /*ILU_RCP:*/"x_rcp", - /*ILU_RCC:*/"x_rcc", - /*ILU_RSQ:*/"x_rsq", - /*ILU_EXP:*/"x_expp", - /*ILU_LOG:*/"x_logp", - /*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used - }; - - for (int i = 0; i < pShader->IntermediateCount; i++) { - VSH_INTERMEDIATE_FORMAT& IntermediateInstruction = pShader->Intermediate[i]; - - std::string str = ""; - if (IntermediateInstruction.InstructionType == IMD_MAC) { - str = VSH_MAC_HLSL[IntermediateInstruction.MAC]; - } else if (IntermediateInstruction.InstructionType == IMD_ILU) { - str = VSH_ILU_HLSL[IntermediateInstruction.ILU]; - } - - assert(!str.empty()); - hlsl << "\n " << str << "("; // opcode - OutputHlsl(hlsl, IntermediateInstruction.Output); - for (int i = 0; i < 3; i++) { - if (IntermediateInstruction.Parameters[i].Active) { - hlsl << ", "; - ParameterHlsl(hlsl, IntermediateInstruction.Parameters[i], IntermediateInstruction.IndexesWithA0_X); - } - } - - hlsl << ");"; - } -} - std::string DebugPrependLineNumbers(std::string shaderString) { std::stringstream shader(shaderString); auto debugShader = std::stringstream(); @@ -1705,27 +1673,21 @@ extern HRESULT EmuRecompileVshFunction ) { XTL::X_VSH_SHADER_HEADER* pXboxVertexShaderHeader = (XTL::X_VSH_SHADER_HEADER*)pXboxFunction; - DWORD* pToken; - bool EOI = false; - VSH_XBOX_SHADER* pShader = (VSH_XBOX_SHADER*)calloc(1, sizeof(VSH_XBOX_SHADER)); + uint32_t* pToken; + XboxVertexShaderDecoder VshDecoder; ID3DBlob* pErrors = nullptr; HRESULT hRet = 0; // TODO: support this situation.. - if (pXboxFunction == xbnullptr) + if (pXboxFunction == xbnullptr) { return E_FAIL; + } // Initialize output arguments to zero - *pbUseDeclarationOnly = 0; + *pbUseDeclarationOnly = false; *pXboxFunctionSize = 0; *ppRecompiledShader = nullptr; - if (!pShader) { - EmuLog(LOG_LEVEL::WARNING, "Couldn't allocate memory for vertex shader conversion buffer"); - return E_OUTOFMEMORY; - } - - pShader->ShaderHeader = *pXboxVertexShaderHeader; switch (pXboxVertexShaderHeader->Version) { case VERSION_XVS: break; @@ -1743,80 +1705,79 @@ extern HRESULT EmuRecompileVshFunction break; } - if (SUCCEEDED(hRet)) { - static std::string hlsl_template = - #include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" // Note : This included .hlsl defines a raw string - ; + if (!SUCCEEDED(hRet)) return hRet; - auto hlsl_stream = std::stringstream(); + static std::string hlsl_template = + #include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" // Note : This included .hlsl defines a raw string + ; - for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { - EOI = VshConvertToIntermediate(pShader, (uint32_t*)pToken); - } + // Decode the vertex shader program tokens into an intermediate representation + pToken = (uint32_t*)((uintptr_t)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); + while (VshDecoder.VshConvertToIntermediate(pToken)) { + pToken += X_VSH_INSTRUCTION_SIZE; + } - // The size of the shader is - *pXboxFunctionSize = (intptr_t)pToken - (intptr_t)pXboxFunction; + // The size of the shader is + pToken += X_VSH_INSTRUCTION_SIZE; // always at least one token + *pXboxFunctionSize = (intptr_t)pToken - (intptr_t)pXboxFunction; + auto hlsl_stream = std::stringstream(); + if (!VshDecoder.BuildShader(hlsl_stream)) { // Do not attempt to compile empty shaders - if (pShader->IntermediateCount == 0) { - // This is a declaration only shader, so there is no function to recompile - *pbUseDeclarationOnly = 1; - return D3D_OK; - } + // This is a declaration only shader, so there is no function to recompile + *pbUseDeclarationOnly = true; + return D3D_OK; + } - BuildShader(hlsl_stream, pShader); - std::string hlsl_str = hlsl_stream.str(); - hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str); + std::string hlsl_str = hlsl_stream.str(); + hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str); - DbgVshPrintf("--- HLSL conversion ---\n"); - DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); - DbgVshPrintf("-----------------------\n"); + DbgVshPrintf("--- HLSL conversion ---\n"); + DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); + DbgVshPrintf("-----------------------\n"); - hRet = D3DCompile( - hlsl_str.c_str(), - hlsl_str.length(), - nullptr, // pSourceName - nullptr, // pDefines - nullptr, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - "vs_3_0", // shader profile - 0, // flags1 - 0, // flags2 - ppRecompiledShader, // out - &pErrors // ppErrorMsgs out + hRet = D3DCompile( + hlsl_str.c_str(), + hlsl_str.length(), + nullptr, // pSourceName + nullptr, // pDefines + nullptr, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + "vs_3_0", // shader profile + 0, // flags1 + 0, // flags2 + ppRecompiledShader, // out + &pErrors // ppErrorMsgs out + ); + if (FAILED(hRet)) { + EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); + } + + // Determine the log level + auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; + if (pErrors) { + // Log HLSL compiler errors + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); + pErrors->Release(); + pErrors = nullptr; + } + + LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) + if (g_bPrintfOn) + if (!FAILED(hRet)) { + // Log disassembly + hRet = D3DDisassemble( + (*ppRecompiledShader)->GetBufferPointer(), + (*ppRecompiledShader)->GetBufferSize(), + D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, + NULL, + &pErrors ); - if (FAILED(hRet)) { - EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); - } - - // Determine the log level - auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; if (pErrors) { - // Log HLSL compiler errors EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); pErrors->Release(); - pErrors = nullptr; - } - - LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) - if (g_bPrintfOn) - if (!FAILED(hRet)) { - // Log disassembly - hRet = D3DDisassemble( - (*ppRecompiledShader)->GetBufferPointer(), - (*ppRecompiledShader)->GetBufferSize(), - D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, - NULL, - &pErrors - ); - if (pErrors) { - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - } } } - free(pShader); - return hRet; } From ebbb8e961f1e17143606d2679abbbf56d66ab7f6 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 16 Dec 2019 12:08:35 +0100 Subject: [PATCH 61/77] Moved a few functions to a better suitable location, and marked a few class functions 'static' --- src/core/hle/D3D8/XbVertexShader.cpp | 222 +++++++++++++-------------- 1 file changed, 111 insertions(+), 111 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index d627854de..64e1ea204 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -657,112 +657,6 @@ public: } }; -#define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) - -D3DDECLUSAGE Xb2PCRegisterType -( - DWORD VertexRegister, - BYTE& PCUsageIndex -) -{ - D3DDECLUSAGE PCRegisterType; - PCUsageIndex = 0; - - switch (VertexRegister) - { - case XTL::X_D3DVSDE_VERTEX: // -1 - PCRegisterType = D3DDECLUSAGE_UNSUPPORTED; - break; - case XTL::X_D3DVSDE_POSITION: // 0 - PCRegisterType = D3DDECLUSAGE_POSITION; - break; - case XTL::X_D3DVSDE_BLENDWEIGHT: // 1 - PCRegisterType = D3DDECLUSAGE_BLENDWEIGHT; - break; - case XTL::X_D3DVSDE_NORMAL: // 2 - PCRegisterType = D3DDECLUSAGE_NORMAL; - break; - case XTL::X_D3DVSDE_DIFFUSE: // 3 - PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 0; - break; - case XTL::X_D3DVSDE_SPECULAR: // 4 - PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 1; - break; - case XTL::X_D3DVSDE_FOG: // 5 - PCRegisterType = D3DDECLUSAGE_FOG; - break; - case XTL::X_D3DVSDE_POINTSIZE: // 6 - PCRegisterType = D3DDECLUSAGE_PSIZE; - break; - case XTL::X_D3DVSDE_BACKDIFFUSE: // 7 - PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 2; - break; - case XTL::X_D3DVSDE_BACKSPECULAR: // 8 - PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 3; - break; - case XTL::X_D3DVSDE_TEXCOORD0: // 9 - PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 0; - break; - case XTL::X_D3DVSDE_TEXCOORD1: // 10 - PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 1; - break; - case XTL::X_D3DVSDE_TEXCOORD2: // 11 - PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 2; - break; - case XTL::X_D3DVSDE_TEXCOORD3: // 12 - PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 3; - break; - default: - PCRegisterType = D3DDECLUSAGE_UNSUPPORTED; - break; - } - - return PCRegisterType; -} - -char* XboxVertexRegisterAsString(DWORD VertexRegister) -{ - switch (VertexRegister) - { - case XTL::X_D3DVSDE_VERTEX: // -1 - return "D3DVSDE_VERTEX /* xbox ext. */"; - case XTL::X_D3DVSDE_POSITION: // 0 - return "D3DVSDE_POSITION"; - case XTL::X_D3DVSDE_BLENDWEIGHT: // 1 - return "D3DVSDE_BLENDWEIGHT"; - case XTL::X_D3DVSDE_NORMAL: // 2 - return "D3DVSDE_NORMAL"; - case XTL::X_D3DVSDE_DIFFUSE: // 3 - return "D3DVSDE_DIFFUSE"; - case XTL::X_D3DVSDE_SPECULAR: // 4 - return "D3DVSDE_SPECULAR"; - case XTL::X_D3DVSDE_FOG: // 5 - return "D3DVSDE_FOG"; - case XTL::X_D3DVSDE_POINTSIZE: // 6 - return "D3DVDSE_POINTSIZE"; - case XTL::X_D3DVSDE_BACKDIFFUSE: // 7 - return "D3DVSDE_BACKDIFFUSE /* xbox ext. */"; - case XTL::X_D3DVSDE_BACKSPECULAR: // 8 - return "D3DVSDE_BACKSPECULAR /* xbox ext. */"; - case XTL::X_D3DVSDE_TEXCOORD0: // 9 - return "D3DVSDE_TEXCOORD0"; - case XTL::X_D3DVSDE_TEXCOORD1: // 10 - return "D3DVSDE_TEXCOORD1"; - case XTL::X_D3DVSDE_TEXCOORD2: // 11 - return "D3DVSDE_TEXCOORD2"; - case XTL::X_D3DVSDE_TEXCOORD3: // 12 - return "D3DVSDE_TEXCOORD3"; - case 13: - return "13 /* unknown register */"; - case 14: - return "14 /* unknown register */"; - case 15: - return "15 /* unknown register */"; - default: - return "16 /* or higher, unknown register */"; - } -} - // **************************************************************************** // * Vertex shader declaration recompiler // **************************************************************************** @@ -785,6 +679,112 @@ public: DWORD HostDeclarationSize; private: + #define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) + + static D3DDECLUSAGE Xb2PCRegisterType + ( + DWORD VertexRegister, + BYTE& PCUsageIndex + ) + { + D3DDECLUSAGE PCRegisterType; + PCUsageIndex = 0; + + switch (VertexRegister) + { + case XTL::X_D3DVSDE_VERTEX: // -1 + PCRegisterType = D3DDECLUSAGE_UNSUPPORTED; + break; + case XTL::X_D3DVSDE_POSITION: // 0 + PCRegisterType = D3DDECLUSAGE_POSITION; + break; + case XTL::X_D3DVSDE_BLENDWEIGHT: // 1 + PCRegisterType = D3DDECLUSAGE_BLENDWEIGHT; + break; + case XTL::X_D3DVSDE_NORMAL: // 2 + PCRegisterType = D3DDECLUSAGE_NORMAL; + break; + case XTL::X_D3DVSDE_DIFFUSE: // 3 + PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 0; + break; + case XTL::X_D3DVSDE_SPECULAR: // 4 + PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 1; + break; + case XTL::X_D3DVSDE_FOG: // 5 + PCRegisterType = D3DDECLUSAGE_FOG; + break; + case XTL::X_D3DVSDE_POINTSIZE: // 6 + PCRegisterType = D3DDECLUSAGE_PSIZE; + break; + case XTL::X_D3DVSDE_BACKDIFFUSE: // 7 + PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 2; + break; + case XTL::X_D3DVSDE_BACKSPECULAR: // 8 + PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 3; + break; + case XTL::X_D3DVSDE_TEXCOORD0: // 9 + PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 0; + break; + case XTL::X_D3DVSDE_TEXCOORD1: // 10 + PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 1; + break; + case XTL::X_D3DVSDE_TEXCOORD2: // 11 + PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 2; + break; + case XTL::X_D3DVSDE_TEXCOORD3: // 12 + PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 3; + break; + default: + PCRegisterType = D3DDECLUSAGE_UNSUPPORTED; + break; + } + + return PCRegisterType; + } + + static char* XboxVertexRegisterAsString(DWORD VertexRegister) + { + switch (VertexRegister) + { + case XTL::X_D3DVSDE_VERTEX: // -1 + return "D3DVSDE_VERTEX /* xbox ext. */"; + case XTL::X_D3DVSDE_POSITION: // 0 + return "D3DVSDE_POSITION"; + case XTL::X_D3DVSDE_BLENDWEIGHT: // 1 + return "D3DVSDE_BLENDWEIGHT"; + case XTL::X_D3DVSDE_NORMAL: // 2 + return "D3DVSDE_NORMAL"; + case XTL::X_D3DVSDE_DIFFUSE: // 3 + return "D3DVSDE_DIFFUSE"; + case XTL::X_D3DVSDE_SPECULAR: // 4 + return "D3DVSDE_SPECULAR"; + case XTL::X_D3DVSDE_FOG: // 5 + return "D3DVSDE_FOG"; + case XTL::X_D3DVSDE_POINTSIZE: // 6 + return "D3DVDSE_POINTSIZE"; + case XTL::X_D3DVSDE_BACKDIFFUSE: // 7 + return "D3DVSDE_BACKDIFFUSE /* xbox ext. */"; + case XTL::X_D3DVSDE_BACKSPECULAR: // 8 + return "D3DVSDE_BACKSPECULAR /* xbox ext. */"; + case XTL::X_D3DVSDE_TEXCOORD0: // 9 + return "D3DVSDE_TEXCOORD0"; + case XTL::X_D3DVSDE_TEXCOORD1: // 10 + return "D3DVSDE_TEXCOORD1"; + case XTL::X_D3DVSDE_TEXCOORD2: // 11 + return "D3DVSDE_TEXCOORD2"; + case XTL::X_D3DVSDE_TEXCOORD3: // 12 + return "D3DVSDE_TEXCOORD3"; + case 13: + return "13 /* unknown register */"; + case 14: + return "14 /* unknown register */"; + case 15: + return "15 /* unknown register */"; + default: + return "16 /* or higher, unknown register */"; + } + } + // VERTEX SHADER static DWORD VshGetDeclarationCount(DWORD *pXboxDeclaration) @@ -808,13 +808,13 @@ private: return (XboxToken & X_D3DVSD_STREAMNUMBERMASK) >> X_D3DVSD_STREAMNUMBERSHIFT; } - inline DWORD VshGetVertexRegister(DWORD XboxToken) + static inline DWORD VshGetVertexRegister(DWORD XboxToken) { DWORD regNum = (XboxToken & X_D3DVSD_VERTEXREGMASK) >> X_D3DVSD_VERTEXREGSHIFT; return regNum; } - inline DWORD VshGetVertexRegisterIn(DWORD XboxToken) + static inline DWORD VshGetVertexRegisterIn(DWORD XboxToken) { DWORD regNum = (XboxToken & X_D3DVSD_VERTEXREGINMASK) >> X_D3DVSD_VERTEXREGINSHIFT; return regNum; @@ -1006,7 +1006,7 @@ private: DbgVshPrintf("// NbrStreams: %d\n", iNumberOfVertexStreams); } - void VshConvertToken_NOP(DWORD *pXboxToken) + static void VshConvertToken_NOP(DWORD *pXboxToken) { if(*pXboxToken != X_D3DVSD_NOP()) { @@ -1014,7 +1014,7 @@ private: } } - DWORD VshConvertToken_CONSTMEM(DWORD *pXboxToken) + static DWORD VshConvertToken_CONSTMEM(DWORD *pXboxToken) { // DWORD ConstantAddress = (*pXboxToken & X_D3DVSD_CONSTADDRESSMASK) >> X_D3DVSD_CONSTADDRESSSHIFT; DWORD Count = (*pXboxToken & X_D3DVSD_CONSTCOUNTMASK) >> X_D3DVSD_CONSTCOUNTSHIFT; @@ -1411,7 +1411,7 @@ private: return Step; } - DWORD* RemoveXboxDeclarationRedefinition(DWORD* pXboxDeclaration) + static DWORD* RemoveXboxDeclarationRedefinition(DWORD* pXboxDeclaration) { // Detect and remove register redefinitions by preprocessing the Xbox Vertex Declaration // Test Case: King Kong From ac36e6a0d4d18f21cacdd61f71d71894e3fc4497 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 16 Dec 2019 15:34:29 +0100 Subject: [PATCH 62/77] Avoid using uninitialized class members --- src/core/hle/D3D8/XbVertexShader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 64e1ea204..a024559fb 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -222,8 +222,8 @@ private: // State variables : - uint16_t IntermediateCount; - VSH_INTERMEDIATE_FORMAT Intermediate[VSH_MAX_INTERMEDIATE_COUNT]; + uint16_t IntermediateCount = 0; + VSH_INTERMEDIATE_FORMAT Intermediate[VSH_MAX_INTERMEDIATE_COUNT] = {}; // Retrieves a number of bits in the instruction token static inline uint32_t VshGetFromToken( From 325e663c9fec91a4a6d8454f817f37548178cc54 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 16 Dec 2019 18:13:09 +0100 Subject: [PATCH 63/77] Further cleanup --- src/core/hle/D3D8/XbVertexShader.cpp | 136 +++++++++------------------ 1 file changed, 46 insertions(+), 90 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index a024559fb..a88874216 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -181,11 +181,6 @@ private: FLD_FINAL }; - enum VSH_IMD_INSTRUCTION_TYPE { - IMD_MAC, - IMD_ILU - }; - enum VSH_IMD_OUTPUT_TYPE { IMD_OUTPUT_C, IMD_OUTPUT_R, @@ -208,7 +203,7 @@ private: } VSH_IMD_PARAMETER; typedef struct _VSH_INTERMEDIATE_FORMAT { - VSH_IMD_INSTRUCTION_TYPE InstructionType; + bool IsMAC; // otherwise ILU VSH_MAC MAC; VSH_ILU ILU; VSH_IMD_OUTPUT Output; @@ -333,48 +328,17 @@ private: Param.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_W)); } - static void VshConvertIntermediateParams( - VSH_INTERMEDIATE_FORMAT* pIntermediate, - uint32_t* pShaderToken) - { - // Get a0.x indirect constant addressing - pIntermediate->IndexesWithA0_X = VshGetField(pShaderToken, FLD_A0X) > 0; // Applies to IMD_OUTPUT_C parameter reads - - int16_t R; - int16_t V = VshGetField(pShaderToken, FLD_V); - int16_t C = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); - uint8_t ParamCount = 0; - - // Parameters[0].Active will always be set, but [1] and [2] may not, so reset them: - pIntermediate->Parameters[1].Active = false; - pIntermediate->Parameters[2].Active = false; - if (pIntermediate->MAC >= MAC_MOV) { - // Get parameter A - R = VshGetField(pShaderToken, FLD_A_R); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_A_MUX, FLD_A_NEG, R, V, C); - } - - if ((pIntermediate->MAC == MAC_MUL) || ((pIntermediate->MAC >= MAC_MAD) && (pIntermediate->MAC <= MAC_SGE))) { - // Get parameter B - R = VshGetField(pShaderToken, FLD_B_R); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_B_MUX, FLD_B_NEG, R, V, C); - } - - if ((pIntermediate->ILU >= ILU_MOV) || (pIntermediate->MAC == MAC_ADD) || (pIntermediate->MAC == MAC_MAD)) { - // Get parameter C - R = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | VshGetField(pShaderToken, FLD_C_R_LOW); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_C_MUX, FLD_C_NEG, R, V, C); - } - } - - VSH_INTERMEDIATE_FORMAT* VshAddIntermediateInstruction( + void VshAddIntermediateInstruction( + uint32_t* pShaderToken, + VSH_MAC MAC, + VSH_ILU ILU, VSH_IMD_OUTPUT_TYPE output_type, int16_t output_address, int8_t output_mask) { // Is the output mask set? if (output_mask == 0) { - return nullptr; + return; } if (IntermediateCount >= VSH_MAX_INTERMEDIATE_COUNT) { @@ -382,42 +346,40 @@ private: } VSH_INTERMEDIATE_FORMAT* pIntermediate = &(Intermediate[IntermediateCount++]); + pIntermediate->IsMAC = MAC > MAC_NOP; + pIntermediate->MAC = MAC; + pIntermediate->ILU = ILU; pIntermediate->Output.Type = output_type; pIntermediate->Output.Address = output_address; pIntermediate->Output.Mask = output_mask; - return pIntermediate; - } + // Get a0.x indirect constant addressing + pIntermediate->IndexesWithA0_X = VshGetField(pShaderToken, FLD_A0X) > 0; // Applies to PARAM_C parameter reads - void VshAddIntermediateMACOpcode( - uint32_t* pShaderToken, - VSH_MAC MAC, - VSH_IMD_OUTPUT_TYPE output_type, - int16_t output_address, - int8_t output_mask) - { - VSH_INTERMEDIATE_FORMAT* pIntermediate = VshAddIntermediateInstruction(output_type, output_address, output_mask); - if (!pIntermediate) return; + int16_t R; + int16_t V = VshGetField(pShaderToken, FLD_V); + int16_t C = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); + unsigned ParamCount = 0; - pIntermediate->InstructionType = IMD_MAC; - pIntermediate->MAC = MAC; - pIntermediate->ILU = ILU_NOP; - VshConvertIntermediateParams(pIntermediate, pShaderToken); - } + // Parameters[0].Active will always be set, but [1] and [2] may not, so reset them: + pIntermediate->Parameters[1].Active = false; + pIntermediate->Parameters[2].Active = false; + if (MAC >= MAC_MOV) { + // Get parameter A + R = VshGetField(pShaderToken, FLD_A_R); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_A_MUX, FLD_A_NEG, R, V, C); + } - void VshAddIntermediateILUOpcode( - uint32_t* pShaderToken, - VSH_ILU ILU, - VSH_IMD_OUTPUT_TYPE output_type, - int16_t output_address, - int8_t output_mask) - { - VSH_INTERMEDIATE_FORMAT* pIntermediate = VshAddIntermediateInstruction(output_type, output_address, output_mask); - if (!pIntermediate) return; + if ((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) { + // Get parameter B + R = VshGetField(pShaderToken, FLD_B_R); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_B_MUX, FLD_B_NEG, R, V, C); + } - pIntermediate->InstructionType = IMD_ILU; - pIntermediate->MAC = MAC_NOP; - pIntermediate->ILU = ILU; - VshConvertIntermediateParams(pIntermediate, pShaderToken); + if ((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) { + // Get parameter C + R = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | VshGetField(pShaderToken, FLD_C_R_LOW); + VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_C_MUX, FLD_C_NEG, R, V, C); + } } public: @@ -426,6 +388,7 @@ public: // First get the instruction(s). VSH_ILU ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); VSH_MAC MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); + if (MAC > MAC_ARL) LOG_TEST_CASE("Unknown MAC"); // Output register VSH_OUTPUT_MUX OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX); @@ -451,25 +414,25 @@ public: // Ignore paired MAC opcodes that write to R1 } else { if (MAC == MAC_ARL) { - VshAddIntermediateMACOpcode(pShaderToken, MAC, IMD_OUTPUT_A0X, 0, MASK_X); + VshAddIntermediateInstruction(pShaderToken, MAC, ILU_NOP, IMD_OUTPUT_A0X, 0, MASK_X); } else { - VshAddIntermediateMACOpcode(pShaderToken, MAC, IMD_OUTPUT_R, RAddress, VshGetField(pShaderToken, FLD_OUT_MAC_MASK)); + VshAddIntermediateInstruction(pShaderToken, MAC, ILU_NOP, IMD_OUTPUT_R, RAddress, VshGetField(pShaderToken, FLD_OUT_MAC_MASK)); } } // Check if we must add a muxed MAC opcode as well if (OutputMux == OMUX_MAC) { - VshAddIntermediateMACOpcode(pShaderToken, MAC, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); + VshAddIntermediateInstruction(pShaderToken, MAC, ILU_NOP, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); } } // Check if there's an ILU opcode if (ILU != ILU_NOP) { // Paired ILU opcodes will only write to R1 - VshAddIntermediateILUOpcode(pShaderToken, ILU, IMD_OUTPUT_R, bIsPaired ? 1 : RAddress, VshGetField(pShaderToken, FLD_OUT_ILU_MASK)); + VshAddIntermediateInstruction(pShaderToken, MAC_NOP, ILU, IMD_OUTPUT_R, bIsPaired ? 1 : RAddress, VshGetField(pShaderToken, FLD_OUT_ILU_MASK)); // Check if we must add a muxed ILU opcode as well if (OutputMux == OMUX_ILU) { - VshAddIntermediateILUOpcode(pShaderToken, ILU, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); + VshAddIntermediateInstruction(pShaderToken, MAC_NOP, ILU, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK)); } } @@ -502,7 +465,7 @@ private: switch (dest.Type) { case IMD_OUTPUT_C: // Access the HLSL capital C[] constants array, with the index bias applied : - // TODO : Avoid out-of-bound writes (perhaps writing to a reserverd index?) + // TODO : Avoid out-of-bound writes (perhaps writing to a reserved index?) hlsl << "C[" << dest.Address + X_D3DSCM_CORRECTION << "]"; LOG_TEST_CASE("Vertex shader writes to constant table"); break; @@ -532,7 +495,6 @@ private: static void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& param, bool IndexesWithA0_X) { - // Print functions static char* RegisterName[/*VSH_PARAMETER_TYPE*/] = { "?", // PARAM_UNKNOWN = 0, "r", // PARAM_R, // Temporary (scRatch) registers @@ -552,19 +514,15 @@ private: if (IndexesWithA0_X) { if (param.Address == 0) { hlsl << "c(a0.x)"; // Hide the offset if it's 0 - } - else if (param.Address < 0) { + } else if (param.Address < 0) { hlsl << "c(a0.x" << param.Address << ")"; // minus is part of the offset - } - else { + } else { hlsl << "c(a0.x+" << param.Address << ")"; // show addition character } - } - else { + } else { hlsl << "c(" << param.Address << ")"; } - } - else { + } else { hlsl << RegisterName[param.ParameterType] << param.Address; } @@ -573,8 +531,7 @@ private: if (!(param.Swizzle[0] == SWIZZLE_X && param.Swizzle[1] == SWIZZLE_Y && param.Swizzle[2] == SWIZZLE_Z && - param.Swizzle[3] == SWIZZLE_W)) - { + param.Swizzle[3] == SWIZZLE_W)) { // We'll try to simplify swizzles if we can // If all swizzles are the same, we only need to write one out unsigned swizzles = 1; @@ -634,10 +591,9 @@ public: VSH_INTERMEDIATE_FORMAT& IntermediateInstruction = Intermediate[i]; std::string str; - if (IntermediateInstruction.InstructionType == IMD_MAC) { + if (IntermediateInstruction.IsMAC) { str = VSH_MAC_HLSL[IntermediateInstruction.MAC]; } else { - assert(IntermediateInstruction.InstructionType == IMD_ILU); str = VSH_ILU_HLSL[IntermediateInstruction.ILU]; } From adc065b0bcae3aba5334b44c14e1fe45da871a65 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 16 Dec 2019 18:17:43 +0100 Subject: [PATCH 64/77] Revert "TMP mechassault hack" This reverts commit d616e94c117d3e1f0e8ce35bfc97d9f845d3c5b7. --- src/core/kernel/exports/EmuKrnlOb.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/kernel/exports/EmuKrnlOb.cpp b/src/core/kernel/exports/EmuKrnlOb.cpp index 802b34f09..87400d476 100644 --- a/src/core/kernel/exports/EmuKrnlOb.cpp +++ b/src/core/kernel/exports/EmuKrnlOb.cpp @@ -859,12 +859,12 @@ XBSYSAPI EXPORTNUM(246) xboxkrnl::NTSTATUS NTAPI xboxkrnl::ObReferenceObjectByHa // HACK: Since we forward to NtDll::NtCreateEvent, this *might* be a Windows handle instead of our own // In this case, we must return the input handle // Test Case: Xbox Live Dashboard, Network Test (or any other Xbox Live connection) - //DWORD flags = 0; - //if (GetHandleInformation(Handle, &flags)) { - // // This was a Windows Handle, so return it. - // *ReturnedObject = Handle; - // return STATUS_SUCCESS; - //} + DWORD flags = 0; + if (GetHandleInformation(Handle, &flags)) { + // This was a Windows Handle, so return it. + *ReturnedObject = Handle; + return STATUS_SUCCESS; + } status = STATUS_INVALID_HANDLE; } From 949aecd8620697edd7fb9ec32566db897714cebb Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Tue, 17 Dec 2019 18:35:04 +1300 Subject: [PATCH 65/77] Set optimization level 0 --- src/core/hle/D3D8/XbVertexShader.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index a88874216..fbf8ea4d2 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1692,6 +1692,10 @@ extern HRESULT EmuRecompileVshFunction DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); DbgVshPrintf("-----------------------\n"); + // Level 0 for fastest runtime compilation + // TODO Can we recompile an optimized shader in the background? + UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL0; + hRet = D3DCompile( hlsl_str.c_str(), hlsl_str.length(), @@ -1700,7 +1704,7 @@ extern HRESULT EmuRecompileVshFunction nullptr, // pInclude // TODO precompile x_* HLSL functions? "main", // shader entry poiint "vs_3_0", // shader profile - 0, // flags1 + flags1, // flags1 0, // flags2 ppRecompiledShader, // out &pErrors // ppErrorMsgs out From 13df2538537db4e68ba8155a76eed944888a1637 Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Tue, 17 Dec 2019 18:35:21 +1300 Subject: [PATCH 66/77] regex_replace first occurrence only --- src/core/hle/D3D8/XbVertexShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index fbf8ea4d2..dc1fe276a 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1686,7 +1686,7 @@ extern HRESULT EmuRecompileVshFunction } std::string hlsl_str = hlsl_stream.str(); - hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str); + hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str, std::regex_constants::format_first_only); DbgVshPrintf("--- HLSL conversion ---\n"); DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); From 016f8361b5d315c7d49e7f92b37f56ebad1a95bb Mon Sep 17 00:00:00 2001 From: Anthony Miles Date: Tue, 17 Dec 2019 20:21:08 +1300 Subject: [PATCH 67/77] Remove dummy shader fallback --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 32 ----------------------- 1 file changed, 32 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 5793691ae..ca9d5e0ee 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -4183,38 +4183,6 @@ HRESULT WINAPI XTL::EMUPATCH(D3DDevice_CreateVertexShader) DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexShader"); } - //* Fallback to dummy shader. - //if (FAILED(hRet)) - //{ - // static const char dummy[] = - // "vs.1.1\n" - // "dcl_position v0\n" - // "dp4 oPos.x, v0, c96\n" - // "dp4 oPos.y, v0, c97\n" - // "dp4 oPos.z, v0, c98\n" - // "dp4 oPos.w, v0, c99\n"; - - // EmuLog(LOG_LEVEL::WARNING, "Trying fallback:\n%s", dummy); - - // hRet = D3DXAssembleShader( - // dummy, - // strlen(dummy), - // /*pDefines=*/nullptr, - // /*pInclude=*/nullptr, - // /*Flags=*/0, // Was D3DXASM_SKIPVALIDATION - // /*ppCompiledShader=*/&pRecompiledBuffer, - // /*ppCompilationErrors*/nullptr); - - // DEBUG_D3DRESULT(hRet, "D3DXAssembleShader"); - - // hRet = g_pD3DDevice->CreateVertexShader - // ( - // (DWORD*)pRecompiledBuffer->GetBufferPointer(), - // &pHostVertexShader - // ); - // DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexShader(fallback)"); - //} - if (pRecompiledBuffer != nullptr) { pRecompiledBuffer->Release(); From 9634329033402f02ab3759b3daf8985f01fd7f11 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Tue, 17 Dec 2019 15:27:46 +0100 Subject: [PATCH 68/77] Avoid regex_replace by cutting up HLSL template into two raw strings. --- .../Direct3D9/CxbxVertexShaderTemplate.hlsl | 353 +++++++++--------- src/core/hle/D3D8/XbVertexShader.cpp | 11 +- 2 files changed, 183 insertions(+), 181 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index 9a582ab70..569ea6127 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -1,13 +1,13 @@ -// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : -R"DELIMITER( -// Xbox HLSL vertex shader (template populated at runtime) +// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : +R"DELIMITER(// Xbox HLSL vertex shader (template populated at runtime) + struct VS_INPUT { float4 v[16] : TEXCOORD; }; // Output registers -struct VS_OUTPUT +struct VS_OUTPUT { float4 oPos : POSITION; // Homogeneous clip space position float4 oD0 : COLOR0; // Primary color (front-facing) @@ -21,98 +21,98 @@ struct VS_OUTPUT float4 oT2 : TEXCOORD2; // Texture coordinate set 2 float4 oT3 : TEXCOORD3; // Texture coordinate set 3 }; - -#define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of -96..95) -#define X_D3DVS_CONSTREG_COUNT 192 - -// Xbox constant registers + +#define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of -96..95) +#define X_D3DVS_CONSTREG_COUNT 192 + +// Xbox constant registers uniform float4 C[X_D3DVS_CONSTREG_COUNT] : register(c0); - -// Vertex input overrides for SetVertexData4f support -uniform float4 vOverrideValue[16] : register(c192); -uniform float4 vOverridePacked[4] : register(c208); - -uniform float4 xboxViewportScale : register(c212); -uniform float4 xboxViewportOffset : register(c213); - -// Overloaded casts, assuring all inputs are treated as float4 -float4 _tof4(float src) { return float4(src, src, src, src); } -float4 _tof4(float2 src) { return src.xyyy; } -float4 _tof4(float3 src) { return src.xyzz; } -float4 _tof4(float4 src) { return src; } -float4 _ssss(float s) { return float4(s, s, s, s); } // a scalar output replicated across a 4-component vector + +// Vertex input overrides for SetVertexData4f support +uniform float4 vOverrideValue[16] : register(c192); +uniform float4 vOverridePacked[4] : register(c208); + +uniform float4 xboxViewportScale : register(c212); +uniform float4 xboxViewportOffset : register(c213); + +// Overloaded casts, assuring all inputs are treated as float4 +float4 _tof4(float src) { return float4(src, src, src, src); } +float4 _tof4(float2 src) { return src.xyyy; } +float4 _tof4(float3 src) { return src.xyzz; } +float4 _tof4(float4 src) { return src; } +float4 _ssss(float s) { return float4(s, s, s, s); } // a scalar output replicated across a 4-component vector #define _scalar(src) _tof4(src).x /* a scalar input */ - -float4 c(int register_number) -{ + +float4 c(int register_number) +{ // Map Xbox [-96, 95] to Host [0, 191] // Account for Xbox's negative constant indexes register_number += X_D3DSCM_CORRECTION; - if (register_number < 0) - return 0; - - if (register_number >= X_D3DVS_CONSTREG_COUNT) // X_D3DVS_CONSTREG_COUNT - return 0; - - return C[register_number]; -} - + if (register_number < 0) + return 0; + + if (register_number >= X_D3DVS_CONSTREG_COUNT) // X_D3DVS_CONSTREG_COUNT + return 0; + + return C[register_number]; +} + // Due to rounding differences with the Xbox (and increased precision on PC?) // some titles produce values just below the threshold of the next integer. // We can add a small bias to make sure it's bumped over the threshold // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader) -#define BIAS 0.0001 -// TODO : Use 0.001 like xqemu? - -// 2.14.1.11 Vertex Program Floating Point Requirements -// The floor operations used by the ARL and EXP instructions must -// operate identically. Specifically, the EXP instruction's floor(t.x) -// intermediate result must exactly match the integer stored in the -// address register by the ARL instruction. -float x_floor(float src) -{ - return floor(src + BIAS); -} - -// http://xboxdevwiki.net/NV2A/Vertex_Shader -// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program.txt -// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program1_1.txt +#define BIAS 0.0001 +// TODO : Use 0.001 like xqemu? + +// 2.14.1.11 Vertex Program Floating Point Requirements +// The floor operations used by the ARL and EXP instructions must +// operate identically. Specifically, the EXP instruction's floor(t.x) +// intermediate result must exactly match the integer stored in the +// address register by the ARL instruction. +float x_floor(float src) +{ + return floor(src + BIAS); +} + +// http://xboxdevwiki.net/NV2A/Vertex_Shader +// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program.txt +// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program1_1.txt // Functions for MAC ('Multiply And Accumulate') opcodes - -// 2.14.1.10.1 ARL: Address Register Load + +// 2.14.1.10.1 ARL: Address Register Load // The address register should be floored -#define x_arl(dest, mask, src0) dest.mask = x_floor(_tof4(src0).x).mask - +#define x_arl(dest, mask, src0) dest.mask = x_floor(_tof4(src0).x).mask + // 2.14.1.10.2 MOV: Move #define x_mov(dest, mask, src0) dest.mask = (_tof4(src0)).mask - + // 2.14.1.10.3 MUL: Multiply -#define x_mul(dest, mask, src0, src1) dest.mask = (_tof4(src0) * _tof4(src1)).mask - +#define x_mul(dest, mask, src0, src1) dest.mask = (_tof4(src0) * _tof4(src1)).mask + // 2.14.1.10.4 ADD: Add -#define x_add(dest, mask, src0, src1) dest.mask = (_tof4(src0) + _tof4(src1)).mask - +#define x_add(dest, mask, src0, src1) dest.mask = (_tof4(src0) + _tof4(src1)).mask + // 2.14.1.10.5 MAD: Multiply and Add -#define x_mad(dest, mask, src0, src1, src2) dest.mask = (_tof4(src0) * _tof4(src1) + _tof4(src2)).mask - -// 2.14.1.10.8 DP3: Three-Component Dot Product -#define x_dp3(dest, mask, src0, src1) dest.mask = _ssss(dot(_tof4(src0).xyz, _tof4(src1).xyz)).mask - -// 2.14.1.10.9 DP4: Four-Component Dot Product -#define x_dp4(dest, mask, src0, src1) dest.mask = _ssss(dot(_tof4(src0), _tof4(src1))).mask - +#define x_mad(dest, mask, src0, src1, src2) dest.mask = (_tof4(src0) * _tof4(src1) + _tof4(src2)).mask + +// 2.14.1.10.8 DP3: Three-Component Dot Product +#define x_dp3(dest, mask, src0, src1) dest.mask = _ssss(dot(_tof4(src0).xyz, _tof4(src1).xyz)).mask + +// 2.14.1.10.9 DP4: Four-Component Dot Product +#define x_dp4(dest, mask, src0, src1) dest.mask = _ssss(dot(_tof4(src0), _tof4(src1))).mask + // 2.14.1.10.10 DST: Distance Vector -#define x_dst(dest, mask, src0, src1) dest.mask = dst(_tof4(src0), _tof4(src1)).mask /* equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } */ - +#define x_dst(dest, mask, src0, src1) dest.mask = dst(_tof4(src0), _tof4(src1)).mask /* equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } */ + // 2.14.1.10.11 MIN: Minimum -#define x_min(dest, mask, src0, src1) dest.mask = min(_tof4(src0), _tof4(src1)).mask - +#define x_min(dest, mask, src0, src1) dest.mask = min(_tof4(src0), _tof4(src1)).mask + // 2.14.1.10.12 MAX: Maximum -#define x_max(dest, mask, src0, src1) dest.mask = max(_tof4(src0), _tof4(src1)).mask - +#define x_max(dest, mask, src0, src1) dest.mask = max(_tof4(src0), _tof4(src1)).mask + // 2.14.1.10.13 SLT: Set On Less Than -#define x_slt(dest, mask, src0, src1) dest.mask = _slt(_tof4(src0), _tof4(src1)).mask +#define x_slt(dest, mask, src0, src1) dest.mask = _slt(_tof4(src0), _tof4(src1)).mask float4 _slt(float4 src0, float4 src1) { float4 dest; @@ -124,7 +124,7 @@ float4 _slt(float4 src0, float4 src1) } // 2.14.1.10.14 SGE: Set On Greater or Equal Than -#define x_sge(dest, mask, src0, src1) dest.mask = _sge(_tof4(src0), _tof4(src1)).mask +#define x_sge(dest, mask, src0, src1) dest.mask = _sge(_tof4(src0), _tof4(src1)).mask float4 _sge(float4 src0, float4 src1) { float4 dest; @@ -134,92 +134,92 @@ float4 _sge(float4 src0, float4 src1) dest.w = (src0.w >= src1.w) ? 1 : 0; return dest; } - -// 2.14.1.10.18 DPH: Homogeneous Dot Product -#define x_dph(dest, mask, src0, src1) dest.mask = _ssss(_dph(_tof4(src0), _tof4(src1))).mask -float _dph(float4 src0, float4 src1) -{ - return dot(src0.xyz, src1.xyz) + src1.w; -} - + +// 2.14.1.10.18 DPH: Homogeneous Dot Product +#define x_dph(dest, mask, src0, src1) dest.mask = _ssss(_dph(_tof4(src0), _tof4(src1))).mask +float _dph(float4 src0, float4 src1) +{ + return dot(src0.xyz, src1.xyz) + src1.w; +} + // Xbox ILU Functions - -// 2.14.1.10.6 RCP: Reciprocal -#define x_rcp(dest, mask, src0) dest.mask = _ssss(_rcp(_scalar(src0))).mask -float _rcp(float src) -{ -#if 0 // TODO : Enable - if (src == 1) return 1; - if (src == 0) return 1.#INF; -#endif + +// 2.14.1.10.6 RCP: Reciprocal +#define x_rcp(dest, mask, src0) dest.mask = _ssss(_rcp(_scalar(src0))).mask +float _rcp(float src) +{ +#if 0 // TODO : Enable + if (src == 1) return 1; + if (src == 0) return 1.#INF; +#endif return 1/ src; -} - +} + // 2.14.1.10.7 RSQ: Reciprocal Square Root -#define x_rsq(dest, mask, src0) dest.mask = _ssss(_rsq(_scalar(src0))).mask -float _rsq(float src) -{ - float a = abs(src); -#if 0 // TODO : Enable - if (a == 1) return 1; - if (a == 0) return 1.#INF; -#endif +#define x_rsq(dest, mask, src0) dest.mask = _ssss(_rsq(_scalar(src0))).mask +float _rsq(float src) +{ + float a = abs(src); +#if 0 // TODO : Enable + if (a == 1) return 1; + if (a == 0) return 1.#INF; +#endif return rsqrt(a); -} - +} + // 2.14.1.10.15 EXP: Exponential Base 2 -#define x_expp(dest, mask, src0) dest.mask = _expp(_scalar(src0)).mask +#define x_expp(dest, mask, src0) dest.mask = _expp(_scalar(src0)).mask float4 _expp(float src) { float floor_src = x_floor(src); - - float4 dest; + + float4 dest; dest.x = exp2(floor_src); dest.y = src - floor_src; dest.z = exp2(src); dest.w = 1; - return dest; + return dest; } - + // 2.14.1.10.16 LOG: Logarithm Base 2 -#define x_logp(dest, mask, src0) dest.mask = _logp(_scalar(src0)).mask +#define x_logp(dest, mask, src0) dest.mask = _logp(_scalar(src0)).mask float4 _logp(float src) -{ +{ float4 dest; -#if 0 // TODO : Enable - float t = abs(src); - if (t != 0) { - if (t == 1.#INF) { - dest.x = 1.#INF; - dest.y = 1; - dest.z = 1.#INF; - } else { -#endif - float exponent = floor(log2(src)); // TODO : x_floor - float mantissa = 1 / exp2(exponent); - float z = log2(src); // TODO : exponent + log2(mantissa); // TODO : Or log2(t)? - // TODO : float exponent = frexp(src + BIAS, /*out*/mantissa); +#if 0 // TODO : Enable + float t = abs(src); + if (t != 0) { + if (t == 1.#INF) { + dest.x = 1.#INF; + dest.y = 1; + dest.z = 1.#INF; + } else { +#endif + float exponent = floor(log2(src)); // TODO : x_floor + float mantissa = 1 / exp2(exponent); + float z = log2(src); // TODO : exponent + log2(mantissa); // TODO : Or log2(t)? + // TODO : float exponent = frexp(src + BIAS, /*out*/mantissa); dest.x = exponent; dest.y = mantissa; dest.z = z; -#if 0 - } +#if 0 + } } else { - dest.x = -1.#INF; - dest.y = 1; - dest.z = -1.#INF; - } -#endif + dest.x = -1.#INF; + dest.y = 1; + dest.z = -1.#INF; + } +#endif dest.w = 1; return dest; } - -// 2.14.1.10.17 LIT: Light Coefficients + +// 2.14.1.10.17 LIT: Light Coefficients #define x_lit(dest, mask, src) dest.mask = _lit(_tof4(src)).mask float4 _lit(float4 src0) { - const float epsilon = 1.0f / 256.0f; + const float epsilon = 1.0f / 256.0f; float diffuse = src0.x; float blinn = src0.y; @@ -228,15 +228,15 @@ float4 _lit(float4 src0) float4 dest; dest.x = 1; dest.y = max(0, diffuse); - dest.z = diffuse > 0 ? exp2(specPower * log(blinn)) : 0; + dest.z = diffuse > 0 ? exp2(specPower * log(blinn)) : 0; // TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0; dest.w = 1; - + return dest; } -// 2.14.1.10.19 RCC: Reciprocal Clamped -#define x_rcc(dest, mask, src0) dest.mask = _ssss(_rcc(_scalar(src0))).mask +// 2.14.1.10.19 RCC: Reciprocal Clamped +#define x_rcc(dest, mask, src0) dest.mask = _ssss(_rcc(_scalar(src0))).mask float _rcc(float src) { // Calculate the reciprocal @@ -247,20 +247,20 @@ float _rcc(float src) ? clamp(r, 5.42101e-020f, 1.84467e+019f) // the IEEE 32-bit binary values 0x1F800000 and 0x5F800000 : clamp(r, -1.84467e+019f, -5.42101e-020f); // the IEEE 32-bit binary values 0xDF800000 and 0x9F800000 } - + float4 reverseScreenspaceTransform(float4 oPos) { // On Xbox, oPos should contain the vertex position in screenspace - // We need to reverse this transformation + // We need to reverse this transformation // Conventionally, each Xbox Vertex Shader includes instructions like this // mul oPos.xyz, r12, c-38 // +rcc r1.x, r12.w // mad oPos.xyz, r12, r1.x, c-37 - // where c-37 and c-38 are reserved transform values + // where c-37 and c-38 are reserved transform values - oPos.xyz -= xboxViewportOffset.xyz; // reverse offset + oPos.xyz -= xboxViewportOffset.xyz; // reverse offset oPos.xyz *= oPos.w; // reverse perspective divide - oPos.xyz /= xboxViewportScale.xyz; // reverse scale + oPos.xyz /= xboxViewportScale.xyz; // reverse scale return oPos; } @@ -270,7 +270,7 @@ VS_OUTPUT main(const VS_INPUT xIn) // Output variables float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3; oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // Pre-initialize w component of outputs to 1 - + // Single component outputs float4 oFog, oPts; // x is write-only on Xbox. Use float4 as some games use incorrect masks oFog = oPts = 0; @@ -283,37 +283,38 @@ VS_OUTPUT main(const VS_INPUT xIn) r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 0); #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox - // Input registerss - float4 v[16]; - # define v0 v[0] - # define v1 v[1] - # define v2 v[2] - # define v3 v[3] - # define v4 v[4] - # define v5 v[5] - # define v6 v[6] - # define v7 v[7] - # define v8 v[8] - # define v9 v[9] - # define v10 v[10] - # define v11 v[11] - # define v12 v[12] - # define v13 v[13] - # define v14 v[14] - # define v15 v[15] - - // View 4 packed overrides as an array of 16 floats - float vOverride[16] = (float[16])vOverridePacked; - - // Initialize input registers from the vertex buffer - // Or use an override value set with SetVertexData4f - for(int i = 0; i < 16; i++){ - v[i] = vOverride[i] ? vOverrideValue[i] : xIn.v[i]; - } + // Input registerss + float4 v[16]; + # define v0 v[0] + # define v1 v[1] + # define v2 v[2] + # define v3 v[3] + # define v4 v[4] + # define v5 v[5] + # define v6 v[6] + # define v7 v[7] + # define v8 v[8] + # define v9 v[9] + # define v10 v[10] + # define v11 v[11] + # define v12 v[12] + # define v13 v[13] + # define v14 v[14] + # define v15 v[15] + + // View 4 packed overrides as an array of 16 floats + float vOverride[16] = (float[16])vOverridePacked; + + // Initialize input registers from the vertex buffer + // Or use an override value set with SetVertexData4f + for(int i = 0; i < 16; i++){ + v[i] = vOverride[i] ? vOverrideValue[i] : xIn.v[i]; + } + + // Xbox shader program)DELIMITER", /* This terminates the header raw string" // */ + +R"DELIMITER( - // Xbox shader program -// - // Copy variables to output struct VS_OUTPUT xOut; @@ -332,4 +333,4 @@ VS_OUTPUT main(const VS_INPUT xIn) return xOut; } -// End of vertex shader )DELIMITER" /* This terminates the raw string" // */ +// End of vertex shader footer)DELIMITER" /* This terminates the footer raw string" // */ diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index dc1fe276a..91de74208 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -36,7 +36,6 @@ #include "XbD3D8Types.h" // For X_D3DVSDE_* #include -#include #include #include #include @@ -1663,9 +1662,10 @@ extern HRESULT EmuRecompileVshFunction if (!SUCCEEDED(hRet)) return hRet; - static std::string hlsl_template = - #include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" // Note : This included .hlsl defines a raw string - ; + // Include HLSL header and footer as raw strings : + static std::string hlsl_template[2] = { + #include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" + }; // Decode the vertex shader program tokens into an intermediate representation pToken = (uint32_t*)((uintptr_t)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); @@ -1678,6 +1678,7 @@ extern HRESULT EmuRecompileVshFunction *pXboxFunctionSize = (intptr_t)pToken - (intptr_t)pXboxFunction; auto hlsl_stream = std::stringstream(); + hlsl_stream << hlsl_template[0]; // Start with the HLSL template header if (!VshDecoder.BuildShader(hlsl_stream)) { // Do not attempt to compile empty shaders // This is a declaration only shader, so there is no function to recompile @@ -1685,8 +1686,8 @@ extern HRESULT EmuRecompileVshFunction return D3D_OK; } + hlsl_stream << hlsl_template[1]; // Finish with the HLSL template footer std::string hlsl_str = hlsl_stream.str(); - hlsl_str = std::regex_replace(hlsl_template, std::regex("// "), hlsl_str, std::regex_constants::format_first_only); DbgVshPrintf("--- HLSL conversion ---\n"); DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str()); From c3ad911049b18a782dcc69f90bcf4f42591407e5 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Tue, 17 Dec 2019 17:32:29 +0100 Subject: [PATCH 69/77] Removed temporary CMake entries --- CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bd8f5211..778e66096 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,7 +129,6 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/common/util/gloffscreen/gloffscreen.h" "${CXBXR_ROOT_DIR}/src/common/XADPCM.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" - #"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/HlslVertexShader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/ResourceTracker.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/XbConvert.h" @@ -263,7 +262,6 @@ file (GLOB CXBXR_SOURCE_EMU "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/RenderStates.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/TextureStates.cpp" - #"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/HlslVertexShader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/ResourceTracker.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/XbConvert.cpp" From 499297993c74ea14a9d9869032694c456ee0e59a Mon Sep 17 00:00:00 2001 From: patrickvl Date: Wed, 18 Dec 2019 16:41:12 +0100 Subject: [PATCH 70/77] Remove IsMAC and replace it in HLSL generation with it's underlying condition --- src/core/hle/D3D8/XbVertexShader.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 91de74208..03d59c832 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -202,7 +202,6 @@ private: } VSH_IMD_PARAMETER; typedef struct _VSH_INTERMEDIATE_FORMAT { - bool IsMAC; // otherwise ILU VSH_MAC MAC; VSH_ILU ILU; VSH_IMD_OUTPUT Output; @@ -345,7 +344,6 @@ private: } VSH_INTERMEDIATE_FORMAT* pIntermediate = &(Intermediate[IntermediateCount++]); - pIntermediate->IsMAC = MAC > MAC_NOP; pIntermediate->MAC = MAC; pIntermediate->ILU = ILU; pIntermediate->Output.Type = output_type; @@ -590,7 +588,7 @@ public: VSH_INTERMEDIATE_FORMAT& IntermediateInstruction = Intermediate[i]; std::string str; - if (IntermediateInstruction.IsMAC) { + if (IntermediateInstruction.MAC > MAC_NOP) { str = VSH_MAC_HLSL[IntermediateInstruction.MAC]; } else { str = VSH_ILU_HLSL[IntermediateInstruction.ILU]; From d28b4836fd7e4d3971bdcef146bbc37848384910 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Wed, 18 Dec 2019 21:45:44 +0100 Subject: [PATCH 71/77] Halve the selection of input attrbibute or constant, using lerp. For this, made sure that the crossover value (as set in SetOverrideFlags) is either 0 or 1. Also replaced the v0..v15 defines by actual variables. Instead, manually unrolled the initialization code via a concatenating define. --- .../Direct3D9/CxbxVertexShaderTemplate.hlsl | 33 ++++++------------- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 2 +- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index 569ea6127..ab510d6ca 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -283,33 +283,20 @@ VS_OUTPUT main(const VS_INPUT xIn) r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 0); #define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox - // Input registerss - float4 v[16]; - # define v0 v[0] - # define v1 v[1] - # define v2 v[2] - # define v3 v[3] - # define v4 v[4] - # define v5 v[5] - # define v6 v[6] - # define v7 v[7] - # define v8 v[8] - # define v9 v[9] - # define v10 v[10] - # define v11 v[11] - # define v12 v[12] - # define v13 v[13] - # define v14 v[14] - # define v15 v[15] - + // Input registers + float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; + // View 4 packed overrides as an array of 16 floats float vOverride[16] = (float[16])vOverridePacked; // Initialize input registers from the vertex buffer - // Or use an override value set with SetVertexData4f - for(int i = 0; i < 16; i++){ - v[i] = vOverride[i] ? vOverrideValue[i] : xIn.v[i]; - } + // Or use an override value set with SetVertexData4f + #define init_v(i) v##i = lerp(xIn.v[i], vOverride[i], vOverrideValue[i]); + // Note : unroll manually instead of for-loop, because of the ## concatenation + init_v( 0); init_v( 1); init_v( 2); init_v( 3); + init_v( 4); init_v( 5); init_v( 6); init_v( 7); + init_v( 8); init_v( 9); init_v(10); init_v(11); + init_v(12); init_v(13); init_v(14); init_v(15); // Xbox shader program)DELIMITER", /* This terminates the header raw string" // */ diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index ca9d5e0ee..40ec07720 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -494,7 +494,7 @@ void SetOverrideFlags(CxbxVertexShader* pCxbxVertexShader) { if (pCxbxVertexShader != nullptr && pCxbxVertexShader->pHostVertexShader != nullptr) { float overrideFlags[16]; for (int i = 0; i < 16; i++) { - overrideFlags[i] = !pCxbxVertexShader->VertexShaderInfo.vRegisterInDeclaration[i]; + overrideFlags[i] = pCxbxVertexShader->VertexShaderInfo.vRegisterInDeclaration[i] ? 1.0f : 0.0f; } g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE, overrideFlags, 4); } From f3d8120c6f8d1eeb055f42f4960fc1ef35b96440 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Wed, 18 Dec 2019 22:30:10 +0100 Subject: [PATCH 72/77] Revert unrelated changes Made RegVIsPresentInDeclaration a class XboxVertexDeclarationConverter protected data member. --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 2 +- src/core/hle/D3D8/XbVertexShader.cpp | 45 +++++++++++------------ src/core/hle/D3D8/XbVertexShader.h | 2 +- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 40ec07720..4e9fca8e3 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -2846,7 +2846,7 @@ void Direct3D_CreateDevice_Start // Disable multisampling for now, this fixes an issue where GTA3 only renders to half-screen // TODO: Find a better way of fixing this, we cannot just create larger backbuffers as it breaks // many games, despite working in the dashboard - pPresentationParameters->MultiSampleType = XTL::X_D3DMULTISAMPLE_NONE; + pPresentationParameters->MultiSampleType = XTL::X_D3DMULTISAMPLE_NONE; // create default device *before* calling Xbox Direct3D_CreateDevice trampline // to avoid hitting EMUPATCH'es that need a valid g_pD3DDevice diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 03d59c832..b289db8c0 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -44,8 +44,6 @@ LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) \ if(g_bPrintfOn) printf -std::array RegVIsPresentInDeclaration; // TODO : Scope this better than global - // **************************************************************************** // * Vertex shader function recompiler // **************************************************************************** @@ -625,6 +623,7 @@ protected: DWORD hostTemporaryRegisterCount; bool IsFixedFunction; D3DVERTEXELEMENT* pRecompiled; + std::array RegVIsPresentInDeclaration; public: // Output @@ -1023,8 +1022,8 @@ private: // new stream pCurrentVertexShaderStreamInfo = &(pVertexShaderInfoToSet->VertexStreams[StreamNumber]); - pCurrentVertexShaderStreamInfo->NeedPatch = false; - pCurrentVertexShaderStreamInfo->DeclPosition = false; + pCurrentVertexShaderStreamInfo->NeedPatch = FALSE; + pCurrentVertexShaderStreamInfo->DeclPosition = FALSE; pCurrentVertexShaderStreamInfo->CurrentStreamNumber = 0; pCurrentVertexShaderStreamInfo->HostVertexStride = 0; pCurrentVertexShaderStreamInfo->NumberOfVertexElements = 0; @@ -1041,7 +1040,7 @@ private: UINT XboxVertexElementDataType, UINT XboxVertexElementByteSize, UINT HostVertexElementByteSize, - bool NeedPatching) + BOOL NeedPatching) { CxbxVertexShaderStreamElement* pCurrentElement = &(pCurrentVertexShaderStreamInfo->VertexElements[pCurrentVertexShaderStreamInfo->NumberOfVertexElements]); pCurrentElement->XboxType = XboxVertexElementDataType; @@ -1071,7 +1070,7 @@ private: // Register a 'skip' element, so that Xbox data will be skipped // without increasing host stride - this does require patching : - VshConvert_RegisterVertexElement(XTL::X_D3DVSDT_NONE, SkipBytesCount, /*HostSize=*/0, /*NeedPatching=*/true); + VshConvert_RegisterVertexElement(XTL::X_D3DVSDT_NONE, SkipBytesCount, /*HostSize=*/0, /*NeedPatching=*/TRUE); } void VshConvertToken_STREAMDATA_SKIP(DWORD *pXboxToken) @@ -1089,7 +1088,7 @@ private: void VshConvertToken_STREAMDATA_REG(DWORD *pXboxToken) { DWORD VertexRegister = VshGetVertexRegister(*pXboxToken); - bool NeedPatching = false; + BOOL NeedPatching = FALSE; BYTE Index; BYTE HostVertexRegisterType; @@ -1154,7 +1153,7 @@ private: HostVertexElementByteSize = 1 * sizeof(FLOAT); } XboxVertexElementByteSize = 1 * sizeof(XTL::SHORT); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_NORMSHORT2: // 0x21: if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT2N) { @@ -1167,7 +1166,7 @@ private: HostVertexElementDataType = D3DDECLTYPE_FLOAT2; HostVertexElementByteSize = 2 * sizeof(FLOAT); XboxVertexElementByteSize = 2 * sizeof(XTL::SHORT); - NeedPatching = true; + NeedPatching = TRUE; } break; case XTL::X_D3DVSDT_NORMSHORT3: // 0x31: @@ -1181,7 +1180,7 @@ private: HostVertexElementByteSize = 3 * sizeof(FLOAT); } XboxVertexElementByteSize = 3 * sizeof(XTL::SHORT); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_NORMSHORT4: // 0x41: if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT4N) { @@ -1194,26 +1193,26 @@ private: HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 4 * sizeof(XTL::SHORT); - NeedPatching = true; + NeedPatching = TRUE; } break; case XTL::X_D3DVSDT_NORMPACKED3: // 0x16: HostVertexElementDataType = D3DDECLTYPE_FLOAT3; HostVertexElementByteSize = 3 * sizeof(FLOAT); XboxVertexElementByteSize = 1 * sizeof(XTL::DWORD); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_SHORT1: // 0x15: HostVertexElementDataType = D3DDECLTYPE_SHORT2; HostVertexElementByteSize = 2 * sizeof(SHORT); XboxVertexElementByteSize = 1 * sizeof(XTL::SHORT); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_SHORT3: // 0x35: HostVertexElementDataType = D3DDECLTYPE_SHORT4; HostVertexElementByteSize = 4 * sizeof(SHORT); XboxVertexElementByteSize = 3 * sizeof(XTL::SHORT); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_PBYTE1: // 0x14: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { @@ -1226,7 +1225,7 @@ private: HostVertexElementByteSize = 1 * sizeof(FLOAT); } XboxVertexElementByteSize = 1 * sizeof(XTL::BYTE); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_PBYTE2: // 0x24: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { @@ -1239,7 +1238,7 @@ private: HostVertexElementByteSize = 2 * sizeof(FLOAT); } XboxVertexElementByteSize = 2 * sizeof(XTL::BYTE); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_PBYTE3: // 0x34: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { @@ -1252,7 +1251,7 @@ private: HostVertexElementByteSize = 3 * sizeof(FLOAT); } XboxVertexElementByteSize = 3 * sizeof(XTL::BYTE); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_PBYTE4: // 0x44: // Test-case : Panzer @@ -1266,14 +1265,14 @@ private: HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 4 * sizeof(XTL::BYTE); - NeedPatching = true; + NeedPatching = TRUE; } break; case XTL::X_D3DVSDT_FLOAT2H: // 0x72: HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 3 * sizeof(FLOAT); - NeedPatching = true; + NeedPatching = TRUE; break; case XTL::X_D3DVSDT_NONE: // 0x02: // No host element data, so no patching @@ -1499,7 +1498,7 @@ extern void FreeVertexDynamicPatch(CxbxVertexShader *pVertexShader) } // Checks for failed vertex shaders, and shaders that would need patching -bool VshHandleIsValidShader(DWORD XboxVertexShaderHandle) +boolean VshHandleIsValidShader(DWORD XboxVertexShaderHandle) { #if 0 //printf( "VS = 0x%.08X\n", XboxVertexShaderHandle ); @@ -1508,7 +1507,7 @@ bool VshHandleIsValidShader(DWORD XboxVertexShaderHandle) if (pCxbxVertexShader) { if (pCxbxVertexShader->XboxStatus != 0) { - return false; + return FALSE; } /* for (uint32 i = 0; i < pCxbxVertexShader->VertexShaderInfo.NumberOfVertexStreams; i++) @@ -1517,13 +1516,13 @@ bool VshHandleIsValidShader(DWORD XboxVertexShaderHandle) { // Just for caching purposes pCxbxVertexShader->XboxStatus = 0x80000001; - return false; + return FALSE; } } */ } #endif - return true; + return TRUE; } extern boolean IsValidCurrentShader(void) diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index b634935cd..11087fe07 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -57,7 +57,7 @@ CxbxVertexShaderStreamElement; typedef struct _CxbxVertexShaderStreamInfo { - bool NeedPatch; // This is to know whether it's data which must be patched + BOOL NeedPatch; // This is to know whether it's data which must be patched BOOL DeclPosition; WORD HostVertexStride; DWORD NumberOfVertexElements; // Number of the stream data types From 1f83a28a01f266657124ffe071852f071a9c3172 Mon Sep 17 00:00:00 2001 From: patrickvl Date: Wed, 18 Dec 2019 22:56:28 +0100 Subject: [PATCH 73/77] Trigger test case popup (instead of just log) when vertex shader can't be compiled (which gives us valuable feedback for now). Removed obsolete hostTemporaryRegisterCount variable --- src/core/hle/D3D8/XbVertexShader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index b289db8c0..14f9c90fd 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -620,7 +620,6 @@ protected: // Internal variables CxbxVertexShaderInfo* pVertexShaderInfoToSet; CxbxVertexShaderStreamInfo* pCurrentVertexShaderStreamInfo = nullptr; - DWORD hostTemporaryRegisterCount; bool IsFixedFunction; D3DVERTEXELEMENT* pRecompiled; std::array RegVIsPresentInDeclaration; @@ -1708,7 +1707,8 @@ extern HRESULT EmuRecompileVshFunction &pErrors // ppErrorMsgs out ); if (FAILED(hRet)) { - EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); + LOG_TEST_CASE("Couldn't assemble recompiled vertex shader"); + //EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); } // Determine the log level From f9410dace0660d4af9b1a791feaf5a50f5b040cb Mon Sep 17 00:00:00 2001 From: patrickvl Date: Thu, 19 Dec 2019 00:50:43 +0100 Subject: [PATCH 74/77] Added Register out-of-range LOG_TEST_CASE's Named consistently CXBX specific vertex shader constants --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 10 +++++++--- src/core/hle/D3D8/XbD3D8Types.h | 11 ++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 4e9fca8e3..e79285008 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -496,7 +496,7 @@ void SetOverrideFlags(CxbxVertexShader* pCxbxVertexShader) { for (int i = 0; i < 16; i++) { overrideFlags[i] = pCxbxVertexShader->VertexShaderInfo.vRegisterInDeclaration[i] ? 1.0f : 0.0f; } - g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE, overrideFlags, 4); + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE, overrideFlags, 4); } } @@ -3873,8 +3873,8 @@ void UpdateViewPortOffsetAndScaleConstants() float vOffset[4], vScale[4]; GetViewPortOffsetAndScale(vOffset, vScale); - g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_SCALE_MIRROR, vScale, 1); - g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_VIEWPORT_OFFSET_MIRROR, vOffset, 1); + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_VIEWPORT_SCALE_MIRROR, vScale, 1); + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR, vOffset, 1); // Store viewport offset and scale in constant registers 58 (c-38) and // 59 (c-37) used for screen space transformation. @@ -4303,6 +4303,8 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant) // The host does not support negative, so we adjust to 0..191 Register += X_D3DSCM_CORRECTION; + if (Register < 0) LOG_TEST_CASE("Register < 0"); + if (Register + ConstantCount > X_D3DVS_CONSTREG_COUNT) LOG_TEST_CASE("Register + ConstantCount > X_D3DVS_CONSTREG_COUNT"); HRESULT hRet; hRet = g_pD3DDevice->SetVertexShaderConstantF( Register, @@ -4647,6 +4649,8 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetVertexData4f) // not present in the vertex declaration. // We use range 193 and up to store these values, as Xbox shaders stop at c192! FLOAT values[] = {a,b,c,d}; + if (Register < 0) LOG_TEST_CASE("Register < 0"); + if (Register >= 16) LOG_TEST_CASE("Register >= 16"); g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + Register, values, 1); } diff --git a/src/core/hle/D3D8/XbD3D8Types.h b/src/core/hle/D3D8/XbD3D8Types.h index e1da19c71..974a0b26a 100644 --- a/src/core/hle/D3D8/XbD3D8Types.h +++ b/src/core/hle/D3D8/XbD3D8Types.h @@ -991,10 +991,10 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; // Special Registers, used to pass additional information to the shaders // TODO co-locate shader workaround constants with shader code -#define X_D3DVS_CONSTREG_VERTEXDATA4F_BASE (X_D3DVS_CONSTREG_COUNT) -#define X_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE (X_D3DVS_CONSTREG_VERTEXDATA4F_BASE + 16) -#define X_D3DVS_VIEWPORT_SCALE_MIRROR (X_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE + 4) -#define X_D3DVS_VIEWPORT_OFFSET_MIRROR (X_D3DVS_VIEWPORT_SCALE_MIRROR + 1) +#define CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE (X_D3DVS_CONSTREG_COUNT) +#define CXBX_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE (CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + 16) +#define CXBX_D3DVS_VIEWPORT_SCALE_MIRROR (CXBX_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE + 4) +#define CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR (CXBX_D3DVS_VIEWPORT_SCALE_MIRROR + 1) #define X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION) #define X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION) @@ -1217,9 +1217,6 @@ typedef DWORD NV2AMETHOD; // Below declarations are used by Cxbx, not by the Xbox!!! // -// Host vertex shader counts -#define CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE X_D3DVS_CONSTREG_COUNT - } // end of namespace XTL #endif From 270462d22c309d6e7a3acdd339baccb9670dcc11 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Thu, 19 Dec 2019 17:12:52 +0100 Subject: [PATCH 75/77] Replaced Parameter.Active with ParamCount --- src/core/hle/D3D8/XbVertexShader.cpp | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 14f9c90fd..9df4a2603 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -192,7 +192,6 @@ private: } VSH_IMD_OUTPUT; typedef struct _VSH_IMD_PARAMETER { - bool Active; VSH_PARAMETER_TYPE ParameterType; // Parameter type, R, V or C bool Neg; // true if negated, false if not VSH_SWIZZLE Swizzle[4]; // The four swizzles @@ -203,6 +202,7 @@ private: VSH_MAC MAC; VSH_ILU ILU; VSH_IMD_OUTPUT Output; + unsigned ParamCount; VSH_IMD_PARAMETER Parameters[3]; // There is only a single address register in Microsoft DirectX 8.0. // The address register, designated as a0.x, may be used as signed @@ -300,7 +300,6 @@ private: uint16_t V, uint16_t C) { - Param.Active = true; Param.ParameterType = (VSH_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_MUX); switch (Param.ParameterType) { case PARAM_R: @@ -317,7 +316,7 @@ private: } int d = FLD_NEG - FLD_A_NEG; - Param.Neg = VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_NEG)); + Param.Neg = VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_NEG)) > 0; Param.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_X)); Param.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Y)); Param.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Z)); @@ -353,27 +352,23 @@ private: int16_t R; int16_t V = VshGetField(pShaderToken, FLD_V); int16_t C = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST)); - unsigned ParamCount = 0; - - // Parameters[0].Active will always be set, but [1] and [2] may not, so reset them: - pIntermediate->Parameters[1].Active = false; - pIntermediate->Parameters[2].Active = false; + pIntermediate->ParamCount = 0; if (MAC >= MAC_MOV) { // Get parameter A R = VshGetField(pShaderToken, FLD_A_R); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_A_MUX, FLD_A_NEG, R, V, C); + VshConvertIntermediateParam(pIntermediate->Parameters[pIntermediate->ParamCount++], pShaderToken, FLD_A_MUX, FLD_A_NEG, R, V, C); } if ((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) { // Get parameter B R = VshGetField(pShaderToken, FLD_B_R); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_B_MUX, FLD_B_NEG, R, V, C); + VshConvertIntermediateParam(pIntermediate->Parameters[pIntermediate->ParamCount++], pShaderToken, FLD_B_MUX, FLD_B_NEG, R, V, C); } if ((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) { // Get parameter C R = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | VshGetField(pShaderToken, FLD_C_R_LOW); - VshConvertIntermediateParam(pIntermediate->Parameters[ParamCount++], pShaderToken, FLD_C_MUX, FLD_C_NEG, R, V, C); + VshConvertIntermediateParam(pIntermediate->Parameters[pIntermediate->ParamCount++], pShaderToken, FLD_C_MUX, FLD_C_NEG, R, V, C); } } @@ -594,11 +589,9 @@ public: hlsl << "\n " << str << "("; // opcode OutputHlsl(hlsl, IntermediateInstruction.Output); - for (int i = 0; i < 3; i++) { - if (IntermediateInstruction.Parameters[i].Active) { - hlsl << ", "; - ParameterHlsl(hlsl, IntermediateInstruction.Parameters[i], IntermediateInstruction.IndexesWithA0_X); - } + for (unsigned i = 0; i < IntermediateInstruction.ParamCount; i++) { + hlsl << ", "; + ParameterHlsl(hlsl, IntermediateInstruction.Parameters[i], IntermediateInstruction.IndexesWithA0_X); } hlsl << ");"; @@ -1708,7 +1701,7 @@ extern HRESULT EmuRecompileVshFunction ); if (FAILED(hRet)) { LOG_TEST_CASE("Couldn't assemble recompiled vertex shader"); - //EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); + //EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); } // Determine the log level From 56af31aede98bd6b28804bd828115e9d8a3f53e3 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Thu, 19 Dec 2019 18:16:36 +0100 Subject: [PATCH 76/77] Remove unneeded HostDeclarationSize --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 2 -- src/core/hle/D3D8/XbVertexShader.cpp | 5 +---- src/core/hle/D3D8/XbVertexShader.h | 1 - 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index e79285008..366cdd188 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -4122,14 +4122,12 @@ HRESULT WINAPI XTL::EMUPATCH(D3DDevice_CreateVertexShader) // Now, we can create the host vertex shader DWORD XboxDeclarationCount = 0; - DWORD HostDeclarationSize = 0; CxbxVertexShader* pCxbxVertexShader = (CxbxVertexShader*)calloc(1, sizeof(CxbxVertexShader)); D3DVERTEXELEMENT *pRecompiledDeclaration = nullptr; pRecompiledDeclaration = EmuRecompileVshDeclaration((DWORD*)pDeclaration, /*bIsFixedFunction=*/pFunction == xbnullptr, &XboxDeclarationCount, - &HostDeclarationSize, &pCxbxVertexShader->VertexShaderInfo); // Create the vertex declaration diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 9df4a2603..1f874b7c7 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -620,7 +620,6 @@ protected: public: // Output DWORD XboxDeclarationCount; - DWORD HostDeclarationSize; private: #define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) @@ -1425,7 +1424,7 @@ public: // Calculate size of declaration XboxDeclarationCount = VshGetDeclarationCount(pXboxVertexDeclarationCopy); // For Direct3D9, we need to reserve at least twice the number of elements, as one token can generate two registers (in and out) : - HostDeclarationSize = XboxDeclarationCount * sizeof(D3DVERTEXELEMENT) * 2; + unsigned HostDeclarationSize = XboxDeclarationCount * sizeof(D3DVERTEXELEMENT) * 2; D3DVERTEXELEMENT *Result = (D3DVERTEXELEMENT *)calloc(1, HostDeclarationSize); pRecompiled = Result; @@ -1470,7 +1469,6 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration DWORD *pXboxDeclaration, bool bIsFixedFunction, DWORD *pXboxDeclarationCount, - DWORD *pHostDeclarationSize, CxbxVertexShaderInfo *pCxbxVertexShaderInfo ) { @@ -1479,7 +1477,6 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration D3DVERTEXELEMENT* pHostVertexElements = Converter.Convert(pXboxDeclaration, bIsFixedFunction, pCxbxVertexShaderInfo); *pXboxDeclarationCount = Converter.XboxDeclarationCount; - *pHostDeclarationSize = Converter.HostDeclarationSize; return pHostVertexElements; } diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index 11087fe07..efc907360 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -102,7 +102,6 @@ extern D3DVERTEXELEMENT *EmuRecompileVshDeclaration DWORD *pXboxDeclaration, bool bIsFixedFunction, DWORD *pXboxDeclarationCount, - DWORD *pHostDeclarationSize, CxbxVertexShaderInfo *pCxbxVertexShaderInfo ); From f23c9adae14f553cf666aa55b3593fac452c1058 Mon Sep 17 00:00:00 2001 From: Luke Usher Date: Thu, 19 Dec 2019 19:34:46 +0000 Subject: [PATCH 77/77] Fix vertex state shader compilation in Spy vs Spy^ --- src/core/hle/D3D8/XbVertexShader.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 1f874b7c7..c5bbbaea3 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -1633,8 +1633,7 @@ extern HRESULT EmuRecompileVshFunction case VERSION_XVS: break; case VERSION_XVSS: - EmuLog(LOG_LEVEL::WARNING, "Might not support vertex state shaders?"); - hRet = E_FAIL; + LOG_TEST_CASE("Might not support vertex state shaders?"); break; case VERSION_XVSW: EmuLog(LOG_LEVEL::WARNING, "Might not support vertex read/write shaders?"); @@ -1697,8 +1696,27 @@ extern HRESULT EmuRecompileVshFunction &pErrors // ppErrorMsgs out ); if (FAILED(hRet)) { - LOG_TEST_CASE("Couldn't assemble recompiled vertex shader"); - //EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); + // Attempt to retry in compatibility mode, this allows some vertex-state shaders to compile + // Test Case: Spy vs Spy + flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY; + hRet = D3DCompile( + hlsl_str.c_str(), + hlsl_str.length(), + nullptr, // pSourceName + nullptr, // pDefines + nullptr, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + "vs_3_0", // shader profile + flags1, // flags1 + 0, // flags2 + ppRecompiledShader, // out + &pErrors // ppErrorMsgs out + ); + + if (FAILED(hRet)) { + LOG_TEST_CASE("Couldn't assemble recompiled vertex shader"); + //EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); + } } // Determine the log level