diff --git a/CMakeLists.txt b/CMakeLists.txt index 53d635031..584f04cf8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -143,6 +143,7 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/core/common/video/RenderBase.hpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderPassthrough.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" @@ -151,7 +152,7 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.h" - "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderCache.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/FixedFunctionState.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/ResourceTracker.h" @@ -324,7 +325,7 @@ file (GLOB CXBXR_SOURCE_EMU "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/TextureStates.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.cpp" - "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderCache.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/FixedFunctionState.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/ResourceTracker.cpp" @@ -468,14 +469,19 @@ install(FILES ${cxbxr_INSTALL_files} DESTINATION bin ) -install(FILES - "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" - "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" - "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" - "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" - "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" - DESTINATION bin/hlsl +# Copy HLSL files to the output directory, which are loaded at runtime +set(CXBXR_HLSL_FILES ${CXBXR_HEADER_EMU}) +list(FILTER CXBXR_HLSL_FILES INCLUDE REGEX ".*/src/core/hle/D3D8/Direct3D9/[^/]+\.hlsli?") +add_custom_command( + TARGET misc-batch POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/hlsl + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CXBXR_HLSL_FILES} "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/hlsl" + # These files can be edited. + # Create backup copies for convenience of restoring original shader behaviour. + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/hlsl/backup + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CXBXR_HLSL_FILES} "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/hlsl/backup" ) +install(DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/hlsl DESTINATION bin) set(cxbxr_GLEW_DLL "${CMAKE_SOURCE_DIR}/import/glew-2.0.0/bin/Release/Win32/glew32.dll") diff --git a/projects/misc/batch.cmake b/projects/misc/batch.cmake index 9f1f15550..e3678c490 100644 --- a/projects/misc/batch.cmake +++ b/projects/misc/batch.cmake @@ -27,14 +27,3 @@ message("Runtime Build Directory: ${TargetRunTimeDir}") # Copy glew32.dll to build type's folder. set(CXBXR_GLEW_DLL "${CMAKE_SOURCE_DIR}/import/glew-2.0.0/bin/Release/Win32/glew32.dll") file(COPY ${CXBXR_GLEW_DLL} DESTINATION ${TargetRunTimeDir}) - -# Copy certain HLSL files to the output directory, which we will load at runtime -set(CXBXR_HLSL_FILES -"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" -"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" -"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" -"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" -) -set(HlslOutputDir ${TargetRunTimeDir}/hlsl) -file(MAKE_DIRECTORY ${HlslOutputDir}) -file(COPY ${CXBXR_HLSL_FILES} DESTINATION ${HlslOutputDir}) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index d80c6f98d..f4ef3864e 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -1,6 +1,3 @@ -// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : -R"DELIMITER( - struct PS_INPUT // Declared identical to vertex shader output (see VS_OUTPUT) { float2 iPos : VPOS; // Screen space x,y pixel location @@ -92,10 +89,9 @@ uniform const float FRONTFACE_FACTOR : register(c27); // Note : PSH_XBOX_CONSTA #define PS_FINALCOMBINERSETTING_CLAMP_SUM #endif -)DELIMITER", /* This terminates the 1st raw string within the 16380 single-byte characters limit. // */ -// See https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/compiler-error-c2026?f1url=%3FappId%3DDev15IDEF1%26l%3DEN-US%26k%3Dk(C2026)%26rd%3Dtrue&view=vs-2019 -// Second raw string : -R"DELIMITER( + // Hardcoded state will be inserted here + // + // End hardcoded state // PS_COMBINERCOUNT_UNIQUE_C0 steers whether for C0 to use combiner stage-specific constants c0_0 .. c0_7, or c0_0 for all stages #ifdef PS_COMBINERCOUNT_UNIQUE_C0 @@ -173,10 +169,6 @@ R"DELIMITER( // HLSL : https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-lerp // lerp(x, y, s ) x*(1-s ) + y*s == x + s(y-x) // lerp(s2, s1, s0) s2*(1-s0) + s1*s0 -)DELIMITER", /* This terminates the 1st raw string within the 16380 single-byte characters limit. // */ -// See https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/compiler-error-c2026?f1url=%3FappId%3DDev15IDEF1%26l%3DEN-US%26k%3Dk(C2026)%26rd%3Dtrue&view=vs-2019 -// Second raw string : -R"DELIMITER( float m21d(const float input) { @@ -379,10 +371,9 @@ PS_OUTPUT main(const PS_INPUT xIn) v1 = isFrontFace ? xIn.iD1 : xIn.iB1; // Specular front/back fog = float4(c_fog.rgb, xIn.iFog); // color from PSH_XBOX_CONSTANT_FOG, alpha from vertex shader output / pixel shader input - // Xbox shader program -)DELIMITER", /* This terminates the 2nd raw string within the 16380 single-byte characters limit. // */ -// Third and last raw string, the footer : -R"DELIMITER( + // Xbox shader program will be inserted here + // + // End Xbox shader program // Copy r0.rgba to output PS_OUTPUT xOut; @@ -391,5 +382,3 @@ R"DELIMITER( return xOut; } - -// End of pixel shader footer)DELIMITER" /* This terminates the footer raw string" // */ diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderPassthrough.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderPassthrough.hlsl new file mode 100644 index 000000000..9938662a5 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderPassthrough.hlsl @@ -0,0 +1,131 @@ +// Xbox HLSL pretransformed vertex shader + +// Default values for vertex registers, and whether to use them +uniform float4 vRegisterDefaultValues[16] : register(c192); +uniform float4 vRegisterDefaultFlagsPacked[4] : register(c208); + +uniform float4 xboxScreenspaceScale : register(c212); +uniform float4 xboxScreenspaceOffset : register(c213); + + +uniform float4 xboxTextureScale[4] : register(c214); + +// Parameters for mapping the shader's fog output value to a fog factor +uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO + +struct VS_INPUT +{ + float4 v[16] : TEXCOORD; +}; + +// Output registers +struct VS_OUTPUT +{ + float4 oPos : POSITION; // Homogeneous clip space position + float4 oD0 : COLOR0; // Primary color (front-facing) + float4 oD1 : COLOR1; // Secondary color (front-facing) + float oFog : FOG; // Fog coordinate + float oPts : PSIZE; // Point size + float4 oB0 : TEXCOORD4; // Back-facing primary color + float4 oB1 : TEXCOORD5; // Back-facing secondary color + float4 oT0 : TEXCOORD0; // Texture coordinate set 0 + float4 oT1 : TEXCOORD1; // Texture coordinate set 1 + float4 oT2 : TEXCOORD2; // Texture coordinate set 2 + float4 oT3 : TEXCOORD3; // Texture coordinate set 3 +}; + +float4 reverseScreenspaceTransform(float4 oPos) +{ + // Scale screenspace coordinates (0 to viewport width/height) to -1 to +1 range + + // On Xbox, oPos should contain the vertex position in screenspace + // We need to reverse this transformation + // Conventionally, each Xbox Vertex Shader includes instructions like this + // mul oPos.xyz, r12, c-38 + // +rcc r1.x, r12.w + // mad oPos.xyz, r12, r1.x, c-37 + // where c-37 and c-38 are reserved transform values + + // Reverse screenspace offset + oPos -= xboxScreenspaceOffset; + // Reverse screenspace scale + oPos /= xboxScreenspaceScale; + + // Ensure w is nonzero + if(oPos.w == 0) oPos.w = 1; + // Reverse perspective divide + oPos.xyz *= oPos.w; + return oPos; +} + +VS_OUTPUT main(const VS_INPUT xIn) +{ + // Input registers + float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; + + // Unpack 16 flags from 4 float4 constant registers + float vRegisterDefaultFlags[16] = (float[16])vRegisterDefaultFlagsPacked; + + // Initialize input registers from the vertex buffer data + // Or use the register's default value (which can be changed by the title) + #define init_v(i) v##i = lerp(xIn.v[i], vRegisterDefaultValues[i], vRegisterDefaultFlags[i]); + // Note : unroll manually instead of for-loop, because of the ## concatenation + init_v( 0); init_v( 1); init_v( 2); init_v( 3); + init_v( 4); init_v( 5); init_v( 6); init_v( 7); + init_v( 8); init_v( 9); init_v(10); init_v(11); + init_v(12); init_v(13); init_v(14); init_v(15); + + // For passthrough, map output variables to their corresponding input registers + float4 oPos = v0; + float4 oD0 = v3; + float4 oD1 = v4; + float4 oFog = v5; + float4 oPts = v6; + float4 oB0 = v7; + float4 oB1 = v8; + float4 oT0 = v9; + float4 oT1 = v10; + float4 oT2 = v11; + float4 oT3 = v12; + + // Copy variables to output struct + VS_OUTPUT xOut; + + // Fogging + // TODO: deduplicate + const float fogDepth = abs(oFog.x); + const float fogTableMode = CxbxFogInfo.x; + const float fogDensity = CxbxFogInfo.y; + const float fogStart = CxbxFogInfo.z; + const float fogEnd = CxbxFogInfo.w; + + const float FOG_TABLE_NONE = 0; + const float FOG_TABLE_EXP = 1; + const float FOG_TABLE_EXP2 = 2; + const float FOG_TABLE_LINEAR = 3; + + float fogFactor; + if(fogTableMode == FOG_TABLE_NONE) + fogFactor = fogDepth; + if(fogTableMode == FOG_TABLE_EXP) + fogFactor = 1 / exp(fogDepth * fogDensity); /* / 1 / e^(d * density)*/ + if(fogTableMode == FOG_TABLE_EXP2) + fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/ + if(fogTableMode == FOG_TABLE_LINEAR) + fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart); + + xOut.oPos = reverseScreenspaceTransform(oPos); + xOut.oD0 = saturate(oD0); + xOut.oD1 = saturate(oD1); + xOut.oFog = fogFactor; // Note : Xbox clamps fog in pixel shader + xOut.oPts = oPts.x; + xOut.oB0 = saturate(oB0); + xOut.oB1 = saturate(oB1); + // Scale textures (TODO: or should we apply this to the input register values?) + xOut.oT0 = oT0 / xboxTextureScale[0]; + xOut.oT1 = oT1 / xboxTextureScale[1]; + xOut.oT2 = oT2 / xboxTextureScale[2]; + xOut.oT3 = oT3 / xboxTextureScale[3]; + + return xOut; +} diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index 94f705b1c..ecb7307df 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -1,6 +1,3 @@ -// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : -R"DELIMITER(// Xbox HLSL vertex shader (template populated at runtime) - struct VS_INPUT { float4 v[16] : TEXCOORD; @@ -326,9 +323,9 @@ VS_OUTPUT main(const VS_INPUT xIn) // Temp variable for paired VS instruction float4 temp; - // Xbox shader program)DELIMITER", /* This terminates the header raw string" // */ - -R"DELIMITER( + // Xbox shader program will be inserted here + // + // End Xbox shader program // Copy variables to output struct VS_OUTPUT xOut; @@ -371,5 +368,3 @@ R"DELIMITER( return xOut; } - -// End of vertex shader footer)DELIMITER" /* This terminates the footer raw string" // */ diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 9237c8197..34a55e8b6 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -42,6 +42,7 @@ #include "..\FixedFunctionState.h" #include "core\hle\D3D8\ResourceTracker.h" #include "core\hle\D3D8\Direct3D9\Direct3D9.h" // For LPDIRECTDRAWSURFACE7 +#include "core\hle\D3D8\Direct3D9\Shader.h" // For InitShaderHotloading #include "core\hle\D3D8\XbVertexBuffer.h" #include "core\hle\D3D8\XbVertexShader.h" #include "core\hle\D3D8\XbPixelShader.h" // For DxbxUpdateActivePixelShader @@ -62,7 +63,7 @@ #include "common\input\DInputKeyboardMouse.h" #include "common\input\InputManager.h" #include "common/util/strConverter.hpp" // for utf8_to_utf16 -#include "VertexShaderSource.h" +#include "VertexShaderCache.h" #include "Timer.h" #include @@ -682,6 +683,10 @@ void CxbxInitWindow(bool bFullInit) g_renderbase->SetWindowRelease([] { ImGui_ImplWin32_Shutdown(); }); + + (void) g_ShaderSources.Update(); + g_ShaderSources.InitShaderHotloading(); + } void DrawUEM(HWND hWnd) @@ -2273,7 +2278,7 @@ static void CreateDefaultD3D9Device DrawInitialBlackScreen(); // Set up cache - g_VertexShaderSource.ResetD3DDevice(g_pD3DDevice); + g_VertexShaderCache.ResetD3DDevice(g_pD3DDevice); // Set up ImGui's render backend ImGui_ImplDX9_Init(g_pD3DDevice); diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index 384780a70..313140362 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -289,12 +289,7 @@ bool IsTextureSampled(DecodedRegisterCombiner* pShader, int reg) void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) { - // Include HLSL header and footer as raw strings : - static const std::string hlsl_template[4] = { - #include "core\hle\D3D8\Direct3D9\CxbxPixelShaderTemplate.hlsl" - }; - - hlsl << hlsl_template[0]; // Start with the HLSL template header + hlsl << g_ShaderSources.pixelShaderTemplateHlsl[0]; // Start with the HLSL template header hlsl << "\n#define ALPHAKILL {" << (pShader->AlphaKill[0] ? "true, " : "false, ") @@ -341,9 +336,9 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementV1, "PS_FINALCOMBINERSETTING_COMPLEMENT_V1"); OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementR0, "PS_FINALCOMBINERSETTING_COMPLEMENT_R0"); OutputDefineFlag(hlsl, pShader->FinalCombiner.ClampSum, "PS_FINALCOMBINERSETTING_CLAMP_SUM"); + hlsl << '\n'; - hlsl << hlsl_template[1]; - hlsl << hlsl_template[2]; + hlsl << g_ShaderSources.pixelShaderTemplateHlsl[1]; // Generate all four texture stages for (unsigned i = 0; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) { @@ -390,7 +385,7 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) FinalCombinerStageHlsl(hlsl, pShader->FinalCombiner, pShader->hasFinalCombiner); - hlsl << hlsl_template[3]; // Finish with the HLSL template footer + hlsl << g_ShaderSources.pixelShaderTemplateHlsl[2]; // Finish with the HLSL template footer } // recompile xbox pixel shader function diff --git a/src/core/hle/D3D8/Direct3D9/Shader.cpp b/src/core/hle/D3D8/Direct3D9/Shader.cpp index 382fc0aab..63a7228e7 100644 --- a/src/core/hle/D3D8/Direct3D9/Shader.cpp +++ b/src/core/hle/D3D8/Direct3D9/Shader.cpp @@ -29,10 +29,18 @@ #include #include "Shader.h" +#include "common/FilePaths.hpp" // For szFilePath_CxbxReloaded_Exe #include "core\kernel\init\CxbxKrnl.h" // LOG_TEST_CASE #include "core\kernel\support\Emu.h" // EmuLog + +#include +#include +#include +#include //#include +ShaderSources g_ShaderSources; + std::string DebugPrependLineNumbers(std::string shaderString) { std::stringstream shader(shaderString); auto debugShader = std::stringstream(); @@ -140,3 +148,173 @@ extern HRESULT EmuCompileShader return hRet; } + +std::ifstream OpenWithRetry(const std::string& path) { + auto fstream = std::ifstream(path); + int failures = 0; + while (fstream.fail()) { + Sleep(50); + fstream = std::ifstream(path); + + if (failures++ > 10) { + // crash? + CxbxrAbort("Error opening shader file: %s", path); + break; + } + } + + return fstream; +} + +int ShaderSources::Update() { + int versionOnDisk = shaderVersionOnDisk; + if (shaderVersionLoadedFromDisk != versionOnDisk) { + LoadShadersFromDisk(); + shaderVersionLoadedFromDisk = versionOnDisk; + } + + return shaderVersionLoadedFromDisk; +} + +void ShaderSources::LoadShadersFromDisk() { + const auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) + .parent_path() + .append("hlsl"); + + // Pixel Shader Template + { + std::stringstream tmp; + auto dir = hlslDir; + dir.append("CxbxPixelShaderTemplate.hlsl"); + tmp << OpenWithRetry(dir.string()).rdbuf(); + std::string hlsl = tmp.str(); + + // Split the HLSL file on insertion points + std::array insertionPoints = { + "// \n", + "// \n", + }; + int pos = 0; + for (int i = 0; i < insertionPoints.size(); i++) { + auto insertionPoint = insertionPoints[i]; + auto index = hlsl.find(insertionPoint, pos); + + if (index == std::string::npos) { + // Handle broken shaders + this->pixelShaderTemplateHlsl[i] = ""; + } + else { + this->pixelShaderTemplateHlsl[i] = hlsl.substr(pos, index - pos); + pos = index + insertionPoint.length(); + } + } + this->pixelShaderTemplateHlsl[insertionPoints.size()] = hlsl.substr(pos); + } + + // Fixed Function Pixel Shader + { + auto dir = hlslDir; + this->fixedFunctionPixelShaderPath = dir.append("FixedFunctionPixelShader.hlsl").string(); + std::stringstream tmp; + tmp << OpenWithRetry(this->fixedFunctionPixelShaderPath).rdbuf(); + this->fixedFunctionPixelShaderHlsl = tmp.str(); + } + + // Vertex Shader Template + { + std::stringstream tmp; + auto dir = hlslDir; + dir.append("CxbxVertexShaderTemplate.hlsl"); + tmp << OpenWithRetry(dir.string()).rdbuf(); + std::string hlsl = tmp.str(); + + const std::string insertionPoint = "// \n"; + auto index = hlsl.find(insertionPoint); + + if (index == std::string::npos) { + // Handle broken shaders + this->vertexShaderTemplateHlsl[0] = hlsl; + this->vertexShaderTemplateHlsl[1] = ""; + } + else + { + this->vertexShaderTemplateHlsl[0] = hlsl.substr(0, index); + this->vertexShaderTemplateHlsl[1] = hlsl.substr(index + insertionPoint.length()); + } + } + + // Fixed Function Vertex Shader + { + auto dir = hlslDir; + this->fixedFunctionVertexShaderPath = dir.append("FixedFunctionVertexShader.hlsl").string(); + std::stringstream tmp; + tmp << OpenWithRetry(this->fixedFunctionVertexShaderPath).rdbuf(); + this->fixedFunctionVertexShaderHlsl = tmp.str(); + } + + // Passthrough Vertex Shader + { + auto dir = hlslDir; + this->vertexShaderPassthroughPath = dir.append("CxbxVertexShaderPassthrough.hlsl").string(); + std::stringstream tmp; + tmp << OpenWithRetry(this->vertexShaderPassthroughPath).rdbuf(); + this->vertexShaderPassthroughHlsl = tmp.str(); + } +} + +void ShaderSources::InitShaderHotloading() { + static std::jthread fsWatcherThread; + + if (fsWatcherThread.joinable()) { + EmuLog(LOG_LEVEL::ERROR2, "Ignoring request to start shader file watcher - it has already been started."); + return; + } + + EmuLog(LOG_LEVEL::DEBUG, "Starting shader file watcher..."); + + fsWatcherThread = std::jthread([]{ + // Determine the filename and directory for the fixed function shader + char cxbxExePath[MAX_PATH]; + GetModuleFileName(GetModuleHandle(nullptr), cxbxExePath, MAX_PATH); + auto hlslDir = std::filesystem::path(cxbxExePath).parent_path().append("hlsl/"); + + HANDLE changeHandle = FindFirstChangeNotification(hlslDir.string().c_str(), false, FILE_NOTIFY_CHANGE_LAST_WRITE); + + if (changeHandle == INVALID_HANDLE_VALUE) { + DWORD errorCode = GetLastError(); + EmuLog(LOG_LEVEL::ERROR2, "Error initializing shader file watcher: %d", errorCode); + + return 1; + } + + while (true) { + if (FindNextChangeNotification(changeHandle)) { + WaitForSingleObject(changeHandle, INFINITE); + + // Wait for changes to stop.. + // Will usually be at least two - one for the file and one for the directory + while (true) { + FindNextChangeNotification(changeHandle); + if (WaitForSingleObject(changeHandle, 100) == WAIT_TIMEOUT) { + break; + } + } + + EmuLog(LOG_LEVEL::DEBUG, "Change detected in shader folder"); + + g_ShaderSources.shaderVersionOnDisk++; + } + else { + EmuLog(LOG_LEVEL::ERROR2, "Shader filewatcher failed to get the next notification"); + break; + } + } + + EmuLog(LOG_LEVEL::DEBUG, "Shader file watcher exiting..."); + + // until there is a way to disable hotloading + // this is always an error + FindCloseChangeNotification(changeHandle); + return 1; + }); +} diff --git a/src/core/hle/D3D8/Direct3D9/Shader.h b/src/core/hle/D3D8/Direct3D9/Shader.h index 1a89d14a5..3f9c21db1 100644 --- a/src/core/hle/D3D8/Direct3D9/Shader.h +++ b/src/core/hle/D3D8/Direct3D9/Shader.h @@ -1,5 +1,6 @@ #pragma once +#include #include // std::string #include // ID3DBlob (via d3d9.h > d3d11shader.h > d3dcommon.h) @@ -10,3 +11,38 @@ extern HRESULT EmuCompileShader ID3DBlob** ppHostShader, const char* pSourceName = nullptr ); + +struct ShaderSources { + // Pixel Shader + std::string pixelShaderTemplateHlsl[3]; + + std::string fixedFunctionPixelShaderHlsl; + std::string fixedFunctionPixelShaderPath; + + // Vertex Shader + std::string vertexShaderTemplateHlsl[2]; + + std::string fixedFunctionVertexShaderHlsl; + std::string fixedFunctionVertexShaderPath; + + std::string vertexShaderPassthroughHlsl; + std::string vertexShaderPassthroughPath; + + // Load shaders from disk (if out-of-date) + // and return the current loaded shader version + int Update(); + + // Start a thread to watch for changes in the shader folder + void InitShaderHotloading(); + +private: + void LoadShadersFromDisk(); + + // counts upwards on every change detected to the shader source files at runtime + std::atomic_int shaderVersionOnDisk = 0; + // current loaded shader version + // Initialized to < shaderVersionOnDisk + int shaderVersionLoadedFromDisk = -1; +}; + +extern ShaderSources g_ShaderSources; diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp index dbf13a73d..c030f9e5a 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp @@ -4,9 +4,7 @@ #include "VertexShader.h" // EmuCompileVertexShader #include "core\kernel\init\CxbxKrnl.h" // implicit CxbxKrnl_Xbe used in LOG_TEST_CASE #include "core\kernel\support\Emu.h" // LOG_TEST_CASE (via Logging.h) -#include "common/FilePaths.hpp" // For szFilePath_CxbxReloaded_Exe -#include #include // std::stringstream extern const char* g_vs_model = vs_model_3_0; @@ -290,26 +288,23 @@ extern HRESULT EmuCompileVertexShader ID3DBlob** ppHostShader ) { - // Include HLSL header and footer as raw strings : - static std::string hlsl_template[2] = { - #include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" - }; - - auto hlsl_stream = std::stringstream(); - hlsl_stream << hlsl_template[0]; // Start with the HLSL template header assert(pIntermediateShader->Instructions.size() > 0); - BuildShader(pIntermediateShader, hlsl_stream); - hlsl_stream << hlsl_template[1]; // Finish with the HLSL template footer + // Combine the shader template with the shader program + auto hlsl_stream = std::stringstream(); + hlsl_stream << g_ShaderSources.vertexShaderTemplateHlsl[0]; // Start with the HLSL template header + BuildShader(pIntermediateShader, hlsl_stream); + hlsl_stream << g_ShaderSources.vertexShaderTemplateHlsl[1]; // Finish with the HLSL template footer std::string hlsl_str = hlsl_stream.str(); - HRESULT hRet = EmuCompileShader(hlsl_str, g_vs_model, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); + const char* notionalSourceName = "CxbxVertexShaderTemplate.hlsl"; + HRESULT hRet = EmuCompileShader(hlsl_str, g_vs_model, ppHostShader, notionalSourceName); if (FAILED(hRet) && (g_vs_model != vs_model_3_0)) { // If the shader failed in the default vertex shader model, retry in vs_model_3_0 // This allows shaders too large for 2_a to be compiled (Test Case: Shenmue 2) EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Retrying with shader model 3.0"); - hRet = EmuCompileShader(hlsl_str, vs_model_3_0, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); + hRet = EmuCompileShader(hlsl_str, vs_model_3_0, ppHostShader, notionalSourceName); } return hRet; @@ -317,174 +312,10 @@ extern HRESULT EmuCompileVertexShader extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader) { - static ID3DBlob* pShader = nullptr; - - // TODO does this need to be thread safe? - if (pShader == nullptr) { - // Determine the filename and directory for the fixed function shader - auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) - .parent_path() - .append("hlsl"); - - auto sourceFile = hlslDir.append("FixedFunctionVertexShader.hlsl").string(); - - // Load the shader into a string - std::ifstream hlslStream(sourceFile); - std::stringstream hlsl; - hlsl << hlslStream.rdbuf(); - - // Compile the shader - EmuCompileShader(hlsl.str(), g_vs_model, &pShader, sourceFile.c_str()); - } - - *ppHostShader = pShader; + EmuCompileShader(g_ShaderSources.fixedFunctionVertexShaderHlsl, g_vs_model, ppHostShader, g_ShaderSources.fixedFunctionVertexShaderPath.c_str()); }; -static ID3DBlob* pPassthroughShader = nullptr; - -extern HRESULT EmuCompileXboxPassthrough(ID3DBlob** ppHostShader) +extern void EmuCompileXboxPassthrough(ID3DBlob** ppHostShader) { - // TODO does this need to be thread safe? - if (pPassthroughShader == nullptr) { - auto hlsl = -R"( -// Xbox HLSL pretransformed vertex shader - -// Default values for vertex registers, and whether to use them -uniform float4 vRegisterDefaultValues[16] : register(c192); -uniform float4 vRegisterDefaultFlagsPacked[4] : register(c208); - -uniform float4 xboxScreenspaceScale : register(c212); -uniform float4 xboxScreenspaceOffset : register(c213); - - -uniform float4 xboxTextureScale[4] : register(c214); - -// Parameters for mapping the shader's fog output value to a fog factor -uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO - -struct VS_INPUT -{ - float4 v[16] : TEXCOORD; -}; - -// Output registers -struct VS_OUTPUT -{ - float4 oPos : POSITION; // Homogeneous clip space position - float4 oD0 : COLOR0; // Primary color (front-facing) - float4 oD1 : COLOR1; // Secondary color (front-facing) - float oFog : FOG; // Fog coordinate - float oPts : PSIZE; // Point size - float4 oB0 : TEXCOORD4; // Back-facing primary color - float4 oB1 : TEXCOORD5; // Back-facing secondary color - float4 oT0 : TEXCOORD0; // Texture coordinate set 0 - float4 oT1 : TEXCOORD1; // Texture coordinate set 1 - float4 oT2 : TEXCOORD2; // Texture coordinate set 2 - float4 oT3 : TEXCOORD3; // Texture coordinate set 3 -}; - -float4 reverseScreenspaceTransform(float4 oPos) -{ - // Scale screenspace coordinates (0 to viewport width/height) to -1 to +1 range - - // On Xbox, oPos should contain the vertex position in screenspace - // We need to reverse this transformation - // Conventionally, each Xbox Vertex Shader includes instructions like this - // mul oPos.xyz, r12, c-38 - // +rcc r1.x, r12.w - // mad oPos.xyz, r12, r1.x, c-37 - // where c-37 and c-38 are reserved transform values - - // Reverse screenspace offset - oPos -= xboxScreenspaceOffset; - // Reverse screenspace scale - oPos /= xboxScreenspaceScale; - - // Ensure w is nonzero - if(oPos.w == 0) oPos.w = 1; - // Reverse perspective divide - oPos.xyz *= oPos.w; - return oPos; -} - -VS_OUTPUT main(const VS_INPUT xIn) -{ - // Input registers - float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - - // Unpack 16 flags from 4 float4 constant registers - float vRegisterDefaultFlags[16] = (float[16])vRegisterDefaultFlagsPacked; - - // Initialize input registers from the vertex buffer data - // Or use the register's default value (which can be changed by the title) - #define init_v(i) v##i = lerp(xIn.v[i], vRegisterDefaultValues[i], vRegisterDefaultFlags[i]); - // Note : unroll manually instead of for-loop, because of the ## concatenation - init_v( 0); init_v( 1); init_v( 2); init_v( 3); - init_v( 4); init_v( 5); init_v( 6); init_v( 7); - init_v( 8); init_v( 9); init_v(10); init_v(11); - init_v(12); init_v(13); init_v(14); init_v(15); - - // For passthrough, map output variables to their corresponding input registers - float4 oPos = v0; - float4 oD0 = v3; - float4 oD1 = v4; - float4 oFog = v5; - float4 oPts = v6; - float4 oB0 = v7; - float4 oB1 = v8; - float4 oT0 = v9; - float4 oT1 = v10; - float4 oT2 = v11; - float4 oT3 = v12; - - // Copy variables to output struct - VS_OUTPUT xOut; - - // Fogging - // TODO deduplicate - const float fogDepth = abs(oFog.x); - const float fogTableMode = CxbxFogInfo.x; - const float fogDensity = CxbxFogInfo.y; - const float fogStart = CxbxFogInfo.z; - const float fogEnd = CxbxFogInfo.w; - - const float FOG_TABLE_NONE = 0; - const float FOG_TABLE_EXP = 1; - const float FOG_TABLE_EXP2 = 2; - const float FOG_TABLE_LINEAR = 3; - - float fogFactor; - if(fogTableMode == FOG_TABLE_NONE) - fogFactor = fogDepth; - if(fogTableMode == FOG_TABLE_EXP) - fogFactor = 1 / exp(fogDepth * fogDensity); /* / 1 / e^(d * density)*/ - if(fogTableMode == FOG_TABLE_EXP2) - fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/ - if(fogTableMode == FOG_TABLE_LINEAR) - fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart); - - xOut.oPos = reverseScreenspaceTransform(oPos); - xOut.oD0 = saturate(oD0); - xOut.oD1 = saturate(oD1); - xOut.oFog = fogFactor; // Note : Xbox clamps fog in pixel shader - xOut.oPts = oPts.x; - xOut.oB0 = saturate(oB0); - xOut.oB1 = saturate(oB1); - // Scale textures (TODO : or should we apply this to the input register values?) - xOut.oT0 = oT0 / xboxTextureScale[0]; - xOut.oT1 = oT1 / xboxTextureScale[1]; - xOut.oT2 = oT2 / xboxTextureScale[2]; - xOut.oT3 = oT3 / xboxTextureScale[3]; - - return xOut; -} -)"; - - EmuCompileShader(hlsl, g_vs_model, &pPassthroughShader, "passthrough.hlsl"); - } - - *ppHostShader = pPassthroughShader; - - return 0; + EmuCompileShader(g_ShaderSources.vertexShaderPassthroughHlsl, g_vs_model, ppHostShader, g_ShaderSources.vertexShaderPassthroughPath.c_str()); } diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.h b/src/core/hle/D3D8/Direct3D9/VertexShader.h index c2ef913b2..46c0c11eb 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.h +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.h @@ -21,5 +21,5 @@ extern HRESULT EmuCompileVertexShader extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader); -extern HRESULT EmuCompileXboxPassthrough(ID3DBlob** ppHostShader); +extern void EmuCompileXboxPassthrough(ID3DBlob** ppHostShader); diff --git a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp b/src/core/hle/D3D8/Direct3D9/VertexShaderCache.cpp similarity index 83% rename from src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp rename to src/core/hle/D3D8/Direct3D9/VertexShaderCache.cpp index 1748651ea..bcd123777 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShaderCache.cpp @@ -1,15 +1,16 @@ #define LOG_PREFIX CXBXR_MODULE::VSHCACHE -#include "VertexShaderSource.h" +#include "VertexShaderCache.h" #include "core/kernel/init/CxbxKrnl.h" #include "util/hasher.h" #include "core/kernel/support/Emu.h" -VertexShaderSource g_VertexShaderSource = VertexShaderSource(); +VertexShaderCache g_VertexShaderCache = VertexShaderCache(); // FIXME : This should really be released and created in step with the D3D device lifecycle rather than being a thing on its own // (And the ResetD3DDevice method should be removed) + ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, ShaderKey key) { ID3DBlob* pCompiledShader; @@ -25,7 +26,7 @@ ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, S // Find a shader // Return true if the shader was found - bool VertexShaderSource::_FindShader(ShaderKey key, LazyVertexShader** ppLazyShader) { + bool VertexShaderCache::_FindShader(ShaderKey key, LazyVertexShader** ppLazyShader) { auto it = cache.find(key); if (it == cache.end()) { // We didn't find anything! Was CreateShader called? @@ -39,7 +40,7 @@ ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, S // Create a new shader // If the shader was already created, just increase its reference count -ShaderKey VertexShaderSource::CreateShader(const xbox::dword_xt* pXboxFunction, DWORD *pXboxFunctionSize) { +ShaderKey VertexShaderCache::CreateShader(const xbox::dword_xt* pXboxFunction, DWORD *pXboxFunctionSize) { IntermediateVertexShader intermediateShader; *pXboxFunctionSize = GetVshFunctionSize(pXboxFunction); @@ -86,7 +87,7 @@ ShaderKey VertexShaderSource::CreateShader(const xbox::dword_xt* pXboxFunction, } // Get a shader using the given key -IDirect3DVertexShader* VertexShaderSource::GetShader(ShaderKey key) +IDirect3DVertexShader* VertexShaderCache::GetShader(ShaderKey key) { LazyVertexShader* pLazyShader = nullptr; @@ -113,6 +114,12 @@ IDirect3DVertexShader* VertexShaderSource::GetShader(ShaderKey key) EmuLog(LOG_LEVEL::DEBUG, "Waiting for shader %llx...", key); pCompiledShader = pLazyShader->compileResult.get(); + if (!pCompiledShader) { + EmuLog(LOG_LEVEL::ERROR2, "Failed to compile vertex shader for %llx", key); + pLazyShader->isReady = true; + return nullptr; + } + // Create the shader auto hRet = pD3DDevice->CreateVertexShader ( @@ -145,7 +152,7 @@ IDirect3DVertexShader* VertexShaderSource::GetShader(ShaderKey key) } // Release a shader. Doesn't actually release any resources for now -void VertexShaderSource::ReleaseShader(ShaderKey key) +void VertexShaderCache::ReleaseShader(ShaderKey key) { // For now, don't bother releasing any shaders LazyVertexShader* pLazyShader; @@ -165,8 +172,25 @@ void VertexShaderSource::ReleaseShader(ShaderKey key) } } -void VertexShaderSource::ResetD3DDevice(IDirect3DDevice9* newDevice) +void VertexShaderCache::ResetD3DDevice(IDirect3DDevice9* newDevice) { EmuLog(LOG_LEVEL::DEBUG, "Resetting D3D device"); + cache.clear(); this->pD3DDevice = newDevice; } + +void VertexShaderCache::Clear() +{ + for (auto& x : cache) { + if (!x.second.isReady) { + auto pBlob = x.second.compileResult.get(); + if (pBlob) { + pBlob->Release(); + } + } + else if(x.second.pHostVertexShader) { + x.second.pHostVertexShader->Release(); + } + } + cache.clear(); +} diff --git a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h b/src/core/hle/D3D8/Direct3D9/VertexShaderCache.h similarity index 91% rename from src/core/hle/D3D8/Direct3D9/VertexShaderSource.h rename to src/core/hle/D3D8/Direct3D9/VertexShaderCache.h index da215c85e..fc9ddfca4 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h +++ b/src/core/hle/D3D8/Direct3D9/VertexShaderCache.h @@ -8,7 +8,7 @@ typedef uint64_t ShaderKey; // Manages creation and caching of vertex shaders -class VertexShaderSource { +class VertexShaderCache { public: ShaderKey CreateShader(const xbox::dword_xt* pXboxFunction, DWORD* pXboxFunctionSize); @@ -16,6 +16,7 @@ public: void ReleaseShader(ShaderKey key); void ResetD3DDevice(IDirect3DDevice9* pD3DDevice); + void Clear(); // TODO // WriteCacheToDisk @@ -42,6 +43,6 @@ private: bool _FindShader(ShaderKey key, LazyVertexShader** ppLazyShader); }; -extern VertexShaderSource g_VertexShaderSource; +extern VertexShaderCache g_VertexShaderCache; #endif diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 2cdcbe4ff..927a446e8 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -39,6 +39,7 @@ #include "core\kernel\support\Emu.h" #include "core\hle\D3D8\Direct3D9\Direct3D9.h" // For g_pD3DDevice, g_pXbox_PixelShader +#include "core\hle\D3D8\Direct3D9\Shader.h" // For g_ShaderSources #include "core\hle\D3D8\XbPixelShader.h" #include "core\hle\D3D8\Direct3D9\PixelShader.h" // EmuCompilePixelShader #include "core\hle\D3D8\XbD3D8Logging.h" // For D3DErrorString() @@ -663,33 +664,6 @@ constexpr int PSH_XBOX_CONSTANT_FRONTFACE_FACTOR = PSH_XBOX_CONSTANT_LUM + 4; // // This concludes the set of constants that need to be set on host : constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_FRONTFACE_FACTOR + 1; // = 28 -std::string GetFixedFunctionShaderTemplate() { - static bool loaded = false; - static std::string hlslString; - - // TODO does this need to be thread safe? - if (!loaded) { - loaded = true; - - // Determine the filename and directory for the fixed function shader - // TODO make this a relative path so we guarantee an LPCSTR for D3DCompile - auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) - .parent_path() - .append("hlsl"); - - auto sourceFile = hlslDir.append("FixedFunctionPixelShader.hlsl").string(); - - // Load the shader into a string - std::ifstream hlslStream(sourceFile); - std::stringstream hlsl; - hlsl << hlslStream.rdbuf(); - - hlslString = hlsl.str(); - } - - return hlslString; -} - std::string_view GetD3DTOPString(int d3dtop) { static constexpr std::string_view opToString[] = { #ifdef ENABLE_FF_ALPHAKILL @@ -790,6 +764,21 @@ IDirect3DPixelShader9* GetFixedFunctionShader() // TODO move this cache elsewhere - and flush it when the device is released! static std::unordered_map ffPsCache = {}; + // Support hotloading hlsl + static int pixelShaderVersion = -1; + int shaderVersion = g_ShaderSources.Update(); + if (pixelShaderVersion != shaderVersion) { + pixelShaderVersion = shaderVersion; + g_pD3DDevice->SetPixelShader(nullptr); + + for (auto& hostShader : ffPsCache) { + if (hostShader.second) + hostShader.second->Release(); + } + + ffPsCache.clear(); + } + // Create a key from state that will be baked in to the shader PsTextureHardcodedState states[4] = {}; int sampleType[4] = { SAMPLE_NONE, SAMPLE_NONE, SAMPLE_NONE, SAMPLE_NONE }; @@ -872,68 +861,74 @@ IDirect3DPixelShader9* GetFixedFunctionShader() } // Build and compile a new shader - auto hlslTemplate = GetFixedFunctionShaderTemplate(); + std::string hlslTemplate = g_ShaderSources.fixedFunctionPixelShaderHlsl; // In D3D9 it seems we need to know hardcode if we're doing a 2D or 3D lookup const std::string sampleTypePattern = "TEXTURE_SAMPLE_TYPE;"; auto sampleTypeReplace = hlslTemplate.find(sampleTypePattern); + std::string finalShader = hlslTemplate; - static constexpr std::string_view typeToString[] = { - "SAMPLE_NONE", - "SAMPLE_2D", - "SAMPLE_3D", - "SAMPLE_CUBE" - }; + if (sampleTypeReplace != std::string::npos) { + static constexpr std::string_view typeToString[] = { + "SAMPLE_NONE", + "SAMPLE_2D", + "SAMPLE_3D", + "SAMPLE_CUBE" + }; - std::stringstream sampleTypeString; - sampleTypeString << "{" - << typeToString[sampleType[0]] << ", " - << typeToString[sampleType[1]] << ", " - << typeToString[sampleType[2]] << ", " - << typeToString[sampleType[3]] << "};"; + std::stringstream sampleTypeString; + sampleTypeString << "{" + << typeToString[sampleType[0]] << ", " + << typeToString[sampleType[1]] << ", " + << typeToString[sampleType[2]] << ", " + << typeToString[sampleType[3]] << "};"; - auto finalShader = hlslTemplate.replace(sampleTypeReplace, sampleTypePattern.size(), sampleTypeString.str()); + finalShader = hlslTemplate.replace(sampleTypeReplace, sampleTypePattern.size(), sampleTypeString.str()); + } // Hardcode the texture stage operations and arguments // So the shader handles exactly one combination of values const std::string stageDef = "// STAGE DEFINITIONS"; - auto stageDefInsert = finalShader.find(stageDef) + stageDef.size(); + auto stageDefInsert = finalShader.find(stageDef); + if (stageDefInsert != std::string::npos) { + stageDefInsert += stageDef.size(); - std::stringstream stageSetup; - stageSetup << '\n'; + std::stringstream stageSetup; + stageSetup << '\n'; - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 4; i++) { #ifdef ENABLE_FF_ALPHAKILL - // Even when a stage is disabled, we still have to fully initialize it's values, to prevent - // "error X4000: variable 'stages' used without having been completely initialized" + // Even when a stage is disabled, we still have to fully initialize it's values, to prevent + // "error X4000: variable 'stages' used without having been completely initialized" #else - // The stage is initialized to be disabled - // We don't have to output anything - if (states[i].COLOROP == X_D3DTOP_DISABLE) - continue; + // The stage is initialized to be disabled + // We don't have to output anything + if (states[i].COLOROP == X_D3DTOP_DISABLE) + continue; #endif - std::string target = "stages[" + std::to_string(i) + "]."; + std::string target = "stages[" + std::to_string(i) + "]."; - auto s = states[i]; - stageSetup << target << "COLOROP = " << GetD3DTOPString(s.COLOROP) << ";\n"; + auto s = states[i]; + stageSetup << target << "COLOROP = " << GetD3DTOPString(s.COLOROP) << ";\n"; - stageSetup << target << "COLORARG0 = " << GetD3DTASumString(s.COLORARG0) << ";\n"; - stageSetup << target << "COLORARG1 = " << GetD3DTASumString(s.COLORARG1) << ";\n"; - stageSetup << target << "COLORARG2 = " << GetD3DTASumString(s.COLORARG2) << ";\n"; + stageSetup << target << "COLORARG0 = " << GetD3DTASumString(s.COLORARG0) << ";\n"; + stageSetup << target << "COLORARG1 = " << GetD3DTASumString(s.COLORARG1) << ";\n"; + stageSetup << target << "COLORARG2 = " << GetD3DTASumString(s.COLORARG2) << ";\n"; - stageSetup << target << "ALPHAOP = " << GetD3DTOPString(s.ALPHAOP) << ";\n"; + stageSetup << target << "ALPHAOP = " << GetD3DTOPString(s.ALPHAOP) << ";\n"; - stageSetup << target << "ALPHAARG0 = " << GetD3DTASumString(s.ALPHAARG0) << ";\n"; - stageSetup << target << "ALPHAARG1 = " << GetD3DTASumString(s.ALPHAARG1) << ";\n"; - stageSetup << target << "ALPHAARG2 = " << GetD3DTASumString(s.ALPHAARG2) << ";\n"; + stageSetup << target << "ALPHAARG0 = " << GetD3DTASumString(s.ALPHAARG0) << ";\n"; + stageSetup << target << "ALPHAARG1 = " << GetD3DTASumString(s.ALPHAARG1) << ";\n"; + stageSetup << target << "ALPHAARG2 = " << GetD3DTASumString(s.ALPHAARG2) << ";\n"; - stageSetup << target << "RESULTARG = " << GetD3DTASumString(s.RESULTARG, false) << ";\n"; - stageSetup << '\n'; + stageSetup << target << "RESULTARG = " << GetD3DTASumString(s.RESULTARG, false) << ";\n"; + stageSetup << '\n'; + } + + finalShader = finalShader.insert(stageDefInsert, stageSetup.str()); } - finalShader = finalShader.insert(stageDefInsert, stageSetup.str()); - // Compile the shader ID3DBlob* pShaderBlob; @@ -945,12 +940,15 @@ IDirect3DPixelShader9* GetFixedFunctionShader() auto pseudoSourceFile = hlslDir.append(pseudoFileName).string(); EmuCompileShader(finalShader, "ps_3_0", &pShaderBlob, pseudoSourceFile.c_str()); - // Create shader object for the device IDirect3DPixelShader9* pShader = nullptr; - auto hRet = g_pD3DDevice->CreatePixelShader((DWORD*)pShaderBlob->GetBufferPointer(), &pShader); - if (hRet != S_OK) - CxbxrAbort("Failed to compile fixed function pixel shader"); - pShaderBlob->Release(); + if (pShaderBlob) { + // Create shader object for the device + auto hRet = g_pD3DDevice->CreatePixelShader((DWORD*)pShaderBlob->GetBufferPointer(), &pShader); + if (hRet != S_OK) { + EmuLog(LOG_LEVEL::ERROR2, "Failed to compile fixed function pixel shader"); + } + pShaderBlob->Release(); + } // Insert the shader into the cache ffPsCache[key] = pShader; @@ -1030,6 +1028,21 @@ void DxbxUpdateActivePixelShader() // NOPATCH // Fetch all other values that are used in the IsEquivalent check : CompletePSDef.SnapshotRuntimeVariables(); + // Support hotloading hlsl + static int pixelShaderVersion = -1; + int shaderVersion = g_ShaderSources.Update(); + if (pixelShaderVersion != shaderVersion) { + pixelShaderVersion = shaderVersion; + g_pD3DDevice->SetPixelShader(nullptr); + + for (auto& hostShader : g_RecompiledPixelShaders) { + if (hostShader.ConvertedPixelShader) + hostShader.ConvertedPixelShader->Release(); + } + + g_RecompiledPixelShaders.clear(); + } + // Now, see if we already have a shader compiled for this definition : // TODO : Change g_RecompiledPixelShaders into an unordered_map, hash just the identifying PSDef members, and add cache eviction (clearing host resources when pruning) const PSH_RECOMPILED_SHADER* RecompiledPixelShader = nullptr; diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index b30ca8956..b205edc8f 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -34,7 +34,8 @@ #include "core\kernel\support\Emu.h" #include "core\hle\D3D8\Direct3D9\Direct3D9.h" // For g_Xbox_VertexShader_Handle #include "core\hle\D3D8\Direct3D9\RenderStates.h" // For XboxRenderStateConverter -#include "core\hle\D3D8\Direct3D9\VertexShaderSource.h" // For g_VertexShaderSource +#include "core\hle\D3D8\Direct3D9\VertexShaderCache.h" // For g_VertexShaderCache +#include "core\hle\D3D8\Direct3D9\Shader.h" // For g_ShaderSources #include "core\hle\D3D8\XbVertexBuffer.h" // For CxbxImpl_SetVertexData4f #include "core\hle\D3D8\XbVertexShader.h" #include "core\hle\D3D8\XbD3D8Logging.h" // For DEBUG_D3DRESULT @@ -49,6 +50,7 @@ #include #include #include +#include // External symbols : extern xbox::X_STREAMINPUT g_Xbox_SetStreamSource[X_VSH_MAX_STREAMS]; // Declared in XbVertexBuffer.cpp @@ -1124,10 +1126,49 @@ IDirect3DVertexDeclaration* CxbxCreateHostVertexDeclaration(D3DVERTEXELEMENT *pD return pHostVertexDeclaration; } -static IDirect3DVertexShader* passthroughshader; +IDirect3DVertexShader* InitShader(void (*compileFunc)(ID3DBlob**), const char* label) { + IDirect3DVertexShader* shader = nullptr; + + ID3DBlob* pBlob = nullptr; + compileFunc(&pBlob); + if (pBlob) { + HRESULT hRet = g_pD3DDevice->CreateVertexShader((DWORD*)pBlob->GetBufferPointer(), &shader); + pBlob->Release(); + if (FAILED(hRet)) CxbxrAbort("Failed to create shader: %s", label); + } + + return shader; +} + void CxbxUpdateHostVertexShader() { extern bool g_bUsePassthroughHLSL; // TMP glue + // TODO: move d3d9 state to VertexShader.cpp + static IDirect3DVertexShader* fixedFunctionShader = nullptr; // TODO: move to shader cache + static IDirect3DVertexShader* passthroughShader = nullptr; + static int vertexShaderVersion = -1; + + int shaderVersion = g_ShaderSources.Update(); + if (vertexShaderVersion != shaderVersion) { + vertexShaderVersion = shaderVersion; + g_pD3DDevice->SetVertexShader(nullptr); + + EmuLog(LOG_LEVEL::INFO, "Loading vertex shaders..."); + + g_VertexShaderCache.Clear(); + + if (fixedFunctionShader) { + fixedFunctionShader->Release(); + fixedFunctionShader = nullptr; + } + fixedFunctionShader = InitShader(EmuCompileFixedFunction, "Fixed Function Vertex Shader"); + + if (passthroughShader) { + passthroughShader->Release(); + passthroughShader = nullptr; + } + passthroughShader = InitShader(EmuCompileXboxPassthrough, "Passthrough Vertex Shader"); + } // TODO Call this when state is dirty // Rather than every time state changes @@ -1135,43 +1176,20 @@ void CxbxUpdateHostVertexShader() LOG_INIT; // Allows use of DEBUG_D3DRESULT if (g_Xbox_VertexShaderMode == VertexShaderMode::FixedFunction) { - IDirect3DVertexShader* fixedFunctionShader = nullptr; - HRESULT hRet; - - if (g_UseFixedFunctionVertexShader) { - static IDirect3DVertexShader* ffHlsl = nullptr; - if (ffHlsl == nullptr) { - ID3DBlob* pBlob = nullptr; - EmuCompileFixedFunction(&pBlob); - if (pBlob) { - hRet = g_pD3DDevice->CreateVertexShader((DWORD*)pBlob->GetBufferPointer(), &ffHlsl); - if (FAILED(hRet)) CxbxrAbort("Failed to create fixed-function shader"); - } - } - fixedFunctionShader = ffHlsl; - } - - hRet = g_pD3DDevice->SetVertexShader(fixedFunctionShader); + HRESULT hRet = g_pD3DDevice->SetVertexShader(fixedFunctionShader); if (FAILED(hRet)) CxbxrAbort("Failed to set fixed-function shader"); } else if (g_Xbox_VertexShaderMode == VertexShaderMode::Passthrough && g_bUsePassthroughHLSL) { - if (passthroughshader == nullptr) { - ID3DBlob* pBlob = nullptr; - EmuCompileXboxPassthrough(&pBlob); - if (pBlob) { - g_pD3DDevice->CreateVertexShader((DWORD*)pBlob->GetBufferPointer(), &passthroughshader); - } - } - - HRESULT hRet = g_pD3DDevice->SetVertexShader(passthroughshader); + HRESULT hRet = g_pD3DDevice->SetVertexShader(passthroughShader); + if (FAILED(hRet)) CxbxrAbort("Failed to set passthrough shader"); } else { auto pTokens = GetCxbxVertexShaderSlotPtr(g_Xbox_VertexShader_FunctionSlots_StartAddress); assert(pTokens); // Create a vertex shader from the tokens DWORD shaderSize; - auto VertexShaderKey = g_VertexShaderSource.CreateShader(pTokens, &shaderSize); - IDirect3DVertexShader* pHostVertexShader = g_VertexShaderSource.GetShader(VertexShaderKey); + auto VertexShaderKey = g_VertexShaderCache.CreateShader(pTokens, &shaderSize); + IDirect3DVertexShader* pHostVertexShader = g_VertexShaderCache.GetShader(VertexShaderKey); HRESULT hRet = g_pD3DDevice->SetVertexShader(pHostVertexShader); DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); } @@ -1559,7 +1577,7 @@ void CxbxImpl_DeleteVertexShader(DWORD Handle) RegisterCxbxVertexDeclaration(pCxbxVertexDeclaration->Key, nullptr); // Remove from cache (which will free present pCxbxVertexDeclaration) // Release the host vertex shader - g_VertexShaderSource.ReleaseShader(pCxbxVertexShader->Key); + g_VertexShaderCache.ReleaseShader(pCxbxVertexShader->Key); #endif }