diff --git a/CMakeLists.txt b/CMakeLists.txt index d9e343970..901dcbac3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,6 +134,8 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.h" @@ -448,6 +450,8 @@ install(FILES ${cxbxr_INSTALL_files} install(FILES "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" + "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" + "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" DESTINATION bin/hlsl diff --git a/projects/misc/batch.cmake b/projects/misc/batch.cmake index e29fafe0d..9f1f15550 100644 --- a/projects/misc/batch.cmake +++ b/projects/misc/batch.cmake @@ -32,6 +32,8 @@ file(COPY ${CXBXR_GLEW_DLL} DESTINATION ${TargetRunTimeDir}) set(CXBXR_HLSL_FILES "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" +"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" +"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" ) set(HlslOutputDir ${TargetRunTimeDir}/hlsl) file(MAKE_DIRECTORY ${HlslOutputDir}) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 982ce4861..4b7a6a55d 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -1980,7 +1980,7 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar } else if (wParam == VK_F2) { - g_UseFixedFunctionVertexShader = !g_UseFixedFunctionVertexShader; + g_UseFixedFunctionPixelShader = !g_UseFixedFunctionPixelShader; } else if (wParam == VK_F3) { @@ -6406,11 +6406,6 @@ void UpdateFixedFunctionShaderLight(int d3dLightIndex, Light* pShaderLight, D3DX pShaderLight->SpotIntensityDivisor = cos(d3dLight->Theta / 2) - cos(d3dLight->Phi / 2); } -float AsFloat(uint32_t value) { - auto v = value; - return *(float*)&v; -} - void UpdateFixedFunctionVertexShaderState() { extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue @@ -6498,8 +6493,7 @@ void UpdateFixedFunctionVertexShaderState() // FIXME remove when fixed function PS is implemented // Note if we are using the fixed function pixel shader // We only want to produce the fog depth value in the VS, not the fog factor - auto psIsFixedFunction = g_pXbox_PixelShader == nullptr; - ffShaderState.Fog.TableMode = psIsFixedFunction ? D3DFOG_NONE : fogTableMode; + ffShaderState.Fog.TableMode = !g_UseFixedFunctionPixelShader ? D3DFOG_NONE : fogTableMode; // Determine how fog depth is calculated if (fogEnable && fogTableMode != D3DFOG_NONE) { diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl new file mode 100644 index 000000000..6e54a6966 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl @@ -0,0 +1,288 @@ +#include "FixedFunctionPixelShader.hlsli" + +uniform FixedFunctionPixelShaderState state : register(c0); +sampler samplers[4] : register(s0); + +struct PS_INPUT // Declared identical to vertex shader output (see VS_OUTPUT) +{ + float2 iPos : VPOS; // Screen space x,y pixel location + float4 iD0 : COLOR0; // Front-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iD1 : COLOR1; // Front-facing secondary (specular) vertex color (clamped to 0..1) + float iFog : FOG; + float iPts : PSIZE; + float4 iB0 : TEXCOORD4; // Back-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iB1 : TEXCOORD5; // Back-facing secondary (specular) vertex color (clamped to 0..1) + float4 iT[4] : TEXCOORD0; // Texture Coord 0 + float iFF : VFACE; // Front facing if > 0 +}; + +// These 'D3DTA' texture argument values +// may be used during each texture stage +struct TextureArgs { + float4 CURRENT; + float4 TEXTURE; + float4 DIFFUSE; + float4 SPECULAR; + float4 TEMP; + float4 TFACTOR; +}; + +static float4 TexCoords[4]; + +// When creating an instance of the fixed function shader +// we string-replace the assignment below with a value +// The define keeps the shader compilable without the replacement +#define TEXTURE_SAMPLE_TYPE {SAMPLE_2D, SAMPLE_2D, SAMPLE_2D, SAMPLE_2D}; +static int TextureSampleType[4] = TEXTURE_SAMPLE_TYPE; + +bool HasFlag(float value, float flag) { + // http://theinstructionlimit.com/encoding-boolean-flags-into-a-float-in-hlsl + return fmod(value, flag) >= flag / 2; +} + +float4 GetArg(float arg, TextureArgs ctx) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dta + bool alphaReplicate = HasFlag(arg, X_D3DTA_ALPHAREPLICATE); + bool complement = HasFlag(arg, X_D3DTA_COMPLEMENT); + arg = arg % 16; + + float4 o; + + if (arg == X_D3DTA_DIFFUSE) + o = ctx.DIFFUSE; + if (arg == X_D3DTA_CURRENT) + o = ctx.CURRENT; + if (arg == X_D3DTA_TEXTURE) + o = ctx.TEXTURE; + if (arg == X_D3DTA_TFACTOR) + o = ctx.TFACTOR; + if (arg == X_D3DTA_SPECULAR) + o = ctx.SPECULAR; + if (arg == X_D3DTA_TEMP) + o = ctx.TEMP; + + if (alphaReplicate) + return o.aaaa; + else if (complement) + return 1 - o; + else + return o; +} + +float4 ExecuteTextureOp(float op, float4 arg1, float4 arg2, float4 arg0, TextureArgs ctx, PsTextureStageState stage) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtextureop + + // Note if we use ifs here instead of else if + // D3DCompile may stackoverflow at runtime + + // X_D3DTOP_DISABLE can only be reached by ALPHAOP + // It's documented as undefined behaviour + // Test case: DoA:Xtreme menu + if (op == X_D3DTOP_DISABLE) + return ctx.CURRENT; + else if (op == X_D3DTOP_SELECTARG1) + return arg1; + else if (op == X_D3DTOP_SELECTARG2) + return arg2; + else if (op == X_D3DTOP_MODULATE) + return arg1 * arg2; + else if (op == X_D3DTOP_MODULATE2X) + return 2 * (arg1 * arg2); + else if (op == X_D3DTOP_MODULATE4X) + return 4 * (arg1 * arg2); + else if (op == X_D3DTOP_ADD) + return arg1 + arg2; + else if (op == X_D3DTOP_ADDSIGNED) + return arg1 + arg2 - 0.5; + else if (op == X_D3DTOP_ADDSIGNED2X) + return 2 * (arg1 + arg2 - 0.5); + else if (op == X_D3DTOP_SUBTRACT) + return arg1 - arg2; + else if (op == X_D3DTOP_ADDSMOOTH) + return arg1 + arg2 * (1 - arg1); + else if (op == X_D3DTOP_BLENDDIFFUSEALPHA) + return arg1 * ctx.DIFFUSE.a + arg2 * (1 - ctx.DIFFUSE.a); + else if (op == X_D3DTOP_BLENDCURRENTALPHA) + return arg1 * ctx.CURRENT.a + arg2 * (1 - ctx.CURRENT.a); + else if (op == X_D3DTOP_BLENDTEXTUREALPHA) + return arg1 * ctx.TEXTURE.a + arg2 * (1 - ctx.TEXTURE.a); + else if (op == X_D3DTOP_BLENDFACTORALPHA) + return arg1 * ctx.TFACTOR.a + arg2 * (1 - ctx.TFACTOR.a); + else if (op == X_D3DTOP_BLENDTEXTUREALPHAPM) + return arg1 + arg2 * (1 - ctx.TEXTURE.a); + else if (op == X_D3DTOP_PREMODULATE) + return arg1; // Note this also multiplies the next stage's CURRENT by its texture + else if (op == X_D3DTOP_MODULATEALPHA_ADDCOLOR) + return float4(arg1.rgb + arg1.a * arg2.rgb, 1); + else if (op == X_D3DTOP_MODULATECOLOR_ADDALPHA) + return float4(arg1.rgb * arg2.rgb + arg1.a, 1); + else if (op == X_D3DTOP_MODULATEINVALPHA_ADDCOLOR) + return float4((1 - arg1.a) * arg2.rgb + arg1.rgb, 1); + else if (op == X_D3DTOP_MODULATEINVCOLOR_ADDALPHA) + return float4((1 - arg1.rgb) * arg2.rgb + arg1.a, 1); + else if (op == X_D3DTOP_DOTPRODUCT3) + return dot(arg1.rgb, arg2.rgb).rrrr; + // Note arg0 below is arg1 in D3D docs + // since it becomes the first argument for operations supporting 3 arguments... + else if (op == X_D3DTOP_MULTIPLYADD) + return arg0 + arg1 * arg2; + else if (op == X_D3DTOP_LERP) + return arg0 * arg1 + (1 - arg0) * arg2; + else if (op == X_D3DTOP_BUMPENVMAP) + return float4( + arg1.x * stage.BUMPENVMAT00 + arg1.y * stage.BUMPENVMAT10, + arg1.x * stage.BUMPENVMAT01 + arg1.y * stage.BUMPENVMAT11, + 1, 1); + else if (op == X_D3DTOP_BUMPENVMAPLUMINANCE) + return float4( + arg1.x * stage.BUMPENVMAT00 + arg1.y * stage.BUMPENVMAT10, + arg1.x * stage.BUMPENVMAT01 + arg1.y * stage.BUMPENVMAT11, + arg1.z * stage.BUMPENVLSCALE + stage.BUMPENVLOFFSET, + 1); + + // Something is amiss... we should have returned by now! + // Return a bright colour + return float4(0, 1, 1, 1); +} + +TextureArgs ExecuteTextureStage( + int i, + TextureArgs ctx, + PsTextureHardcodedState s, + int previousOp +) +{ + // Early exit if this stage is disabled (and therefore all further stages are too) + if (s.COLOROP == X_D3DTOP_DISABLE) + return ctx; + + PsTextureStageState stage = state.stages[i]; + + // Determine the texture for this stage + float3 offset = float3(0, 0, 0); + float4 factor = float4(1, 1, 1, 1); + + // Bumpmap special case + if (previousOp == X_D3DTOP_BUMPENVMAP || + previousOp == X_D3DTOP_BUMPENVMAPLUMINANCE) { + // Assume U, V, L is in CURRENT + // Add U', V', to the texture coordinates + // And multiply by L' + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/bump-mapping-formulas + offset.xy = ctx.CURRENT.xy; + factor.rgb = ctx.CURRENT.z; + } + + // Sample the texture + float4 t = float4(1, 1, 1, 1); + if (stage.IsTextureSet) { + int type = TextureSampleType[i]; + if (type == SAMPLE_2D) + t = tex2D(samplers[i], TexCoords[i].xy + offset.xy); + else if (type == SAMPLE_3D) + t = tex3D(samplers[i], TexCoords[i].xyz + offset.xyz); + else if (type == SAMPLE_CUBE) + t = texCUBE(samplers[i], TexCoords[i].xyz + offset.xyz); + } + + // Assign the final value for TEXTURE + ctx.TEXTURE = t * factor; + + // Premodulate special case + if (previousOp == X_D3DTOP_PREMODULATE) { + ctx.CURRENT *= ctx.TEXTURE; + } + + // Get arguments for the texture operation + // Almost all operate on 2 arguments, Arg1 and Arg2 + // Arg0 is a third argument that seems to have been tacked on + // for MULTIPLYADD and LERP + + // Colour operation arguments + float4 cArg1 = GetArg(s.COLORARG1, ctx); + float4 cArg2 = GetArg(s.COLORARG2, ctx); + float4 cArg0 = GetArg(s.COLORARG0, ctx); + + // Alpha operation arguments + float4 aArg1 = GetArg(s.ALPHAARG1, ctx); + float4 aArg2 = GetArg(s.ALPHAARG2, ctx); + float4 aArg0 = GetArg(s.ALPHAARG0, ctx); + + // Execute texture operation + float4 value; + value.rgb = ExecuteTextureOp(s.COLOROP, cArg1, cArg2, cArg0, ctx, stage).rgb; + value.a = ExecuteTextureOp(s.ALPHAOP, aArg1, aArg2, aArg0, ctx, stage).a; + + // Save the result + // Note RESULTARG should either be CURRENT or TEMP + // But some titles seem to set it to DIFFUSE + // Use CURRENT for anything other than TEMP + // Test case: DoA 3 + if (s.RESULTARG == X_D3DTA_TEMP) + ctx.TEMP = value; + else + ctx.CURRENT = value; + + return ctx; +} + +float4 main(const PS_INPUT input) : COLOR { + + TexCoords = input.iT; + + // Each stage is passed and returns + // a set of texture arguments + // And will usually update the CURRENT value + TextureArgs ctx; + + // The CURRENT register + // Default to the diffuse value + // TODO determine whether to use the front or back colours + // and set them here + ctx.CURRENT = input.iD0; + ctx.DIFFUSE = input.iD0; + ctx.SPECULAR = input.iD1; + // The TEMP register + // Default to 0 + ctx.TEMP = float4(0, 0, 0, 0); + ctx.TFACTOR = state.TextureFactor; + + PsTextureHardcodedState stages[4]; + stages[0].COLOROP = X_D3DTOP_DISABLE; + stages[1].COLOROP = X_D3DTOP_DISABLE; + stages[2].COLOROP = X_D3DTOP_DISABLE; + stages[3].COLOROP = X_D3DTOP_DISABLE; + + // Define stages + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + // We'll find comment below and insert the definitions after it + // STAGE DEFINITIONS + // END STAGE DEFINITIONS + + // Run each stage + int previousOp = -1; + for (int i = 0; i < 4; i++) { + + ctx = ExecuteTextureStage( + i, + ctx, + stages[i], + previousOp + ); + + previousOp = stages[i].COLOROP; + } + + // Add fog if enabled + if (state.FogEnable) { + ctx.CURRENT.rgb = lerp(state.FogColor.rgb, ctx.CURRENT.rgb, saturate(input.iFog)); + } + + // Add specular if enabled + if (state.SpecularEnable) { + ctx.CURRENT.rgb += ctx.SPECULAR.rgb; + } + + // Output whatever is in current at the end + return ctx.CURRENT; +} diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli new file mode 100644 index 000000000..af86b040e --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli @@ -0,0 +1,141 @@ +// C++ / HLSL shared state block for fixed function support +#ifdef __cplusplus +#pragma once + +#include +#include // for D3DFORMAT, D3DLIGHT9, etc +#include // for D3DXVECTOR4, etc +#include + +#define float4x4 D3DMATRIX +#define float4 D3DXVECTOR4 +#define float3 D3DVECTOR +#define float2 D3DXVECTOR2 +#define arr(name, type, length) std::array name + +#else +// HLSL +#define arr(name, type, length) type name[length] +#define alignas(x) +#define const static +#endif // __cplusplus + +#ifdef __cplusplus +namespace FixedFunctionPixelShader { +#endif + // From X_D3DTOP + const float X_D3DTOP_DISABLE = 1; + const float X_D3DTOP_SELECTARG1 = 2; + const float X_D3DTOP_SELECTARG2 = 3; + const float X_D3DTOP_MODULATE = 4; + const float X_D3DTOP_MODULATE2X = 5; + const float X_D3DTOP_MODULATE4X = 6; + const float X_D3DTOP_ADD = 7; + const float X_D3DTOP_ADDSIGNED = 8; + const float X_D3DTOP_ADDSIGNED2X = 9; + const float X_D3DTOP_SUBTRACT = 10; + const float X_D3DTOP_ADDSMOOTH = 11; + const float X_D3DTOP_BLENDDIFFUSEALPHA = 12; + const float X_D3DTOP_BLENDCURRENTALPHA = 13; + const float X_D3DTOP_BLENDTEXTUREALPHA = 14; + const float X_D3DTOP_BLENDFACTORALPHA = 15; + const float X_D3DTOP_BLENDTEXTUREALPHAPM = 16; + const float X_D3DTOP_PREMODULATE = 17; + const float X_D3DTOP_MODULATEALPHA_ADDCOLOR = 18; + const float X_D3DTOP_MODULATECOLOR_ADDALPHA = 19; + const float X_D3DTOP_MODULATEINVALPHA_ADDCOLOR = 20; + const float X_D3DTOP_MODULATEINVCOLOR_ADDALPHA = 21; + const float X_D3DTOP_DOTPRODUCT3 = 22; + const float X_D3DTOP_MULTIPLYADD = 23; + const float X_D3DTOP_LERP = 24; + const float X_D3DTOP_BUMPENVMAP = 25; + const float X_D3DTOP_BUMPENVMAPLUMINANCE = 26; + + // D3DTA taken from D3D9 - we don't have Xbox definitions + // for these so I guess they're the same? + const float X_D3DTA_DIFFUSE = 0x00000000; // select diffuse color (read only) + const float X_D3DTA_CURRENT = 0x00000001; // select stage destination register (read/write) + const float X_D3DTA_TEXTURE = 0x00000002; // select texture color (read only) + const float X_D3DTA_TFACTOR = 0x00000003; // select D3DRS_TEXTUREFACTOR (read only) + const float X_D3DTA_SPECULAR = 0x00000004; // select specular color (read only) + const float X_D3DTA_TEMP = 0x00000005; // select temporary register color (read/write) + const float X_D3DTA_CONSTANT = 0x00000006; // select texture stage constant + const float X_D3DTA_COMPLEMENT = 0x00000010; // take 1.0 - x (read modifier) + const float X_D3DTA_ALPHAREPLICATE = 0x00000020; // replicate alpha to color components (read modifier) + + const int SAMPLE_2D = 0; + const int SAMPLE_3D = 1; + const int SAMPLE_CUBE = 2; + + // This state is passed to the shader + struct PsTextureStageState { + // Values correspond to XD3D8 version of D3DTEXTURESTAGESTATETYPE + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + + /* Samplers for now are configured elsewhere already + constexpr DWORD X_D3DTSS_ADDRESSU = 0; + constexpr DWORD X_D3DTSS_ADDRESSV = 1; + constexpr DWORD X_D3DTSS_ADDRESSW = 2; + constexpr DWORD X_D3DTSS_MAGFILTER = 3; + constexpr DWORD X_D3DTSS_MINFILTER = 4; + constexpr DWORD X_D3DTSS_MIPFILTER = 5; + constexpr DWORD X_D3DTSS_MIPMAPLODBIAS = 6; + constexpr DWORD X_D3DTSS_MAXMIPLEVEL = 7; + constexpr DWORD X_D3DTSS_MAXANISOTROPY = 8; + */ + + alignas(16) float COLORKEYOP; // Unimplemented Xbox extension! + alignas(16) float COLORSIGN; // Unimplemented Xbox extension! + alignas(16) float ALPHAKILL; // Unimplemented Xbox extension! + // TEXTURETRANSFORMFLAGS handled by the VS + alignas(16) float BUMPENVMAT00; + alignas(16) float BUMPENVMAT01; + alignas(16) float BUMPENVMAT11; + alignas(16) float BUMPENVMAT10; + alignas(16) float BUMPENVLSCALE; + alignas(16) float BUMPENVLOFFSET; + // TEXCOORDINDEX handled by the VS + // BORDERCOLOR set on sampler + alignas(16) float COLORKEYCOLOR; // Unimplemented Xbox extension! + + // Misc properties + alignas(16) float IsTextureSet; + }; + + // This state is compiled into the shader + // Values correspond to XD3D8 version of D3DTEXTURESTAGESTATETYPE + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + struct PsTextureHardcodedState { + alignas(16) float COLOROP; + alignas(16) float COLORARG0; + alignas(16) float COLORARG1; + alignas(16) float COLORARG2; + alignas(16) float ALPHAOP; + alignas(16) float ALPHAARG0; + alignas(16) float ALPHAARG1; + alignas(16) float ALPHAARG2; + alignas(16) float RESULTARG; + }; + + struct FixedFunctionPixelShaderState { + alignas(16) arr(stages, PsTextureStageState, 4); + alignas(16) float4 TextureFactor; + alignas(16) float SpecularEnable; + alignas(16) float FogEnable; + alignas(16) float3 FogColor; + }; +#ifdef __cplusplus +} // FixedFunctionPixelShader namespace +#endif + +#ifdef __cplusplus +#undef float4x4 +#undef float4 +#undef float3 +#undef float2 +#undef arr +#else // HLSL +#undef arr +#undef alignas +#undef const +#endif // __cplusplus diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 44c40bc24..3d3727a58 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -44,10 +44,14 @@ #include "core\hle\D3D8\XbD3D8Logging.h" // For D3DErrorString() #include "core\kernel\init\CxbxKrnl.h" // For CxbxKrnlCleanup() +#include "util\hasher.h" +#include "core\hle\D3D8\Direct3D9\FixedFunctionPixelShader.hlsli" #include // assert() #include #include +#include +#include #include "Direct3D9\RenderStates.h" // For XboxRenderStateConverter #include "Direct3D9\TextureStates.h" // For XboxTextureStateConverter @@ -638,6 +642,306 @@ constexpr int PSH_XBOX_CONSTANT_FRONTFACE_FACTOR = PSH_XBOX_CONSTANT_LUM + 4; // // This concludes the set of constants that need to be set on host : constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_FRONTFACE_FACTOR + 1; // = 28 +std::string GetFixedFunctionShaderTemplate() { + static bool loaded = false; + static std::string hlslString; + + // TODO does this need to be thread safe? + if (!loaded) { + loaded = true; + + // Determine the filename and directory for the fixed function shader + // TODO make this a relative path so we guarantee an LPCSTR for D3DCompile + auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) + .parent_path() + .append("hlsl"); + + auto sourceFile = hlslDir.append("FixedFunctionPixelShader.hlsl").string(); + + // Load the shader into a string + std::ifstream hlslStream(sourceFile); + std::stringstream hlsl; + hlsl << hlslStream.rdbuf(); + + hlslString = hlsl.str(); + } + + return hlslString; +} + +std::string_view GetD3DTOPString(int d3dtop) { + static constexpr std::string_view opToString[] = { + "UNDEFINED", // 0 + "X_D3DTOP_DISABLE", // 1 + "X_D3DTOP_SELECTARG1", // 2 + "X_D3DTOP_SELECTARG2", // 3 + "X_D3DTOP_MODULATE", // 4 + "X_D3DTOP_MODULATE2X", // 5 + "X_D3DTOP_MODULATE4X", // 6 + "X_D3DTOP_ADD", // 7 + "X_D3DTOP_ADDSIGNED", // 8 + "X_D3DTOP_ADDSIGNED2X", // 9 + "X_D3DTOP_SUBTRACT", // 10 + "X_D3DTOP_ADDSMOOTH", // 11 + "X_D3DTOP_BLENDDIFFUSEALPHA", // 12 + "X_D3DTOP_BLENDCURRENTALPHA", // 13 + "X_D3DTOP_BLENDTEXTUREALPHA", // 14 + "X_D3DTOP_BLENDFACTORALPHA", // 15 + "X_D3DTOP_BLENDTEXTUREALPHAPM", // 16 + "X_D3DTOP_PREMODULATE", // 17 + "X_D3DTOP_MODULATEALPHA_ADDCOLOR", // 18 + "X_D3DTOP_MODULATECOLOR_ADDALPHA", // 19 + "X_D3DTOP_MODULATEINVALPHA_ADDCOLOR", // 20 + "X_D3DTOP_MODULATEINVCOLOR_ADDALPHA", // 21 + "X_D3DTOP_DOTPRODUCT3", // 22 + "X_D3DTOP_MULTIPLYADD", // 23 + "X_D3DTOP_LERP", // 24 + "X_D3DTOP_BUMPENVMAP", // 25 + "X_D3DTOP_BUMPENVMAPLUMINANCE", // 26 + }; + + if (d3dtop < 1 || d3dtop > 26) { + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture operation %d", d3dtop); + d3dtop = 0; // undefined + } + + return opToString[d3dtop]; +} + +// Get a string equivalent of ' + ' +std::string GetD3DTASumString(int d3dta, bool allowModifier = true) { + using namespace FixedFunctionPixelShader; + + static const std::string argToString[] = { + "X_D3DTA_DIFFUSE", // 0 + "X_D3DTA_CURRENT", // 1 + "X_D3DTA_TEXTURE", // 2 + "X_D3DTA_TFACTOR", // 3 + "X_D3DTA_SPECULAR", // 4 + "X_D3DTA_TEMP", // 5 + "X_D3DTA_CONSTANT", // 6 + "UNDEFINED", // 7 + }; + + // Write a texture argument + const int flagMask = 0x30; + int iFlags = d3dta & flagMask; + int i = d3dta & ~flagMask; + + if (i < 0 || i > 6) { + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture argument %d on texture arg", i); + i = 7; // undefined + } + + auto str = argToString[i]; + if (iFlags) { + if (!allowModifier) { + EmuLog(LOG_LEVEL::ERROR2, "Modifier not expected on texture argument"); + } + + if (iFlags == X_D3DTA_COMPLEMENT) + str += " + X_D3DTA_COMPLEMENT"; + else if (iFlags == X_D3DTA_ALPHAREPLICATE) + str += " + X_D3DTA_ALPHAREPLICATE"; + else { + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture modifier %d", iFlags); + str += " /* + UNKNOWN MODIFIER */"; + } + } + + return str; +} + +// TODO we have to create and cache shaders over and over and over and over +// Deduplicate this resource management +IDirect3DPixelShader9* GetFixedFunctionShader() +{ + using namespace FixedFunctionPixelShader; + + // TODO move this cache elsewhere - and flush it when the device is released! + static std::unordered_map ffPsCache = {}; + + // Create a key from state that will be baked in to the shader + PsTextureHardcodedState states[4] = {}; + int sampleType[4] = { SAMPLE_2D, SAMPLE_2D, SAMPLE_2D, SAMPLE_2D }; + bool pointSpriteEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); + + bool previousStageDisabled = false; + for (int i = 0; i < 4; i++) { + // Determine the COLOROP + // Usually we execute stages up to the first disabled stage + // However, if point sprites are enabled, we just execute stage 3 + bool forceDisable = + (!pointSpriteEnable && previousStageDisabled) || + (pointSpriteEnable && i < 3); + auto colorOp = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLOROP); + states[i].COLOROP = forceDisable ? X_D3DTOP_DISABLE : colorOp; + + // If the stage is disabled we don't want its configuration to affect the key + // Move on to the next stage + if (colorOp == X_D3DTOP_DISABLE) { + previousStageDisabled = true; + continue; + } + + // Get sample type + // TODO move XD3D8 resource query functions out of Direct3D9.cpp so we can use them here + if (g_pXbox_SetTexture[i]) { + auto format = g_pXbox_SetTexture[i]->Format; + // SampleType is initialized to SAMPLE_2D + if (format & X_D3DFORMAT_CUBEMAP) + sampleType[i] = SAMPLE_CUBE; + else if (((format & X_D3DFORMAT_DIMENSION_MASK) >> X_D3DFORMAT_DIMENSION_SHIFT) > 2) + sampleType[i] = SAMPLE_3D; + } + + states[i].COLORARG0 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG0); + states[i].COLORARG1 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1); + states[i].COLORARG2 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG2); + + states[i].ALPHAOP = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAOP); + states[i].ALPHAARG0 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG0); + states[i].ALPHAARG1 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG1); + states[i].ALPHAARG2 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG2); + + states[i].RESULTARG = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_RESULTARG); + } + + // Create a key from the shader state + // Note currently this is padded since it's what we send to the GPU + auto key = 3 * ComputeHash(states, sizeof(states)) + + ComputeHash(sampleType, sizeof(sampleType)); + + auto got = ffPsCache.find(key); + if (got != ffPsCache.end()) { + // We have a shader. Great! + return got->second; + } + + // Build and compile a new shader + auto hlslTemplate = GetFixedFunctionShaderTemplate(); + + // In D3D9 it seems we need to know hardcode if we're doing a 2D or 3D lookup + const std::string sampleTypePattern = "TEXTURE_SAMPLE_TYPE;"; + auto sampleTypeReplace = hlslTemplate.find(sampleTypePattern); + + static constexpr std::string_view typeToString[] = { + "SAMPLE_2D", + "SAMPLE_3D", + "SAMPLE_CUBE" + }; + + std::stringstream sampleTypeString; + sampleTypeString << "{" + << typeToString[sampleType[0]] << ", " + << typeToString[sampleType[1]] << ", " + << typeToString[sampleType[2]] << ", " + << typeToString[sampleType[3]] << "};"; + + auto finalShader = hlslTemplate.replace(sampleTypeReplace, sampleTypePattern.size(), sampleTypeString.str()); + + // Hardcode the texture stage operations and arguments + // So the shader handles exactly one combination of values + const std::string stageDef = "// STAGE DEFINITIONS"; + auto stageDefInsert = finalShader.find(stageDef) + stageDef.size(); + + std::stringstream stageSetup; + stageSetup << '\n'; + + for (int i = 0; i < 4; i++) { + // The stage is initialized to be disabled + // We don't have to output anything + if (states[i].COLOROP == X_D3DTOP_DISABLE) + continue; + + std::string target = "stages[" + std::to_string(i) + "]."; + + auto s = states[i]; + stageSetup << target << "COLOROP = " << GetD3DTOPString(s.COLOROP) << ";\n"; + + // TODO handle texture arg flags + stageSetup << target << "COLORARG0 = " << GetD3DTASumString(s.COLORARG0) << ";\n"; + stageSetup << target << "COLORARG1 = " << GetD3DTASumString(s.COLORARG1) << ";\n"; + stageSetup << target << "COLORARG2 = " << GetD3DTASumString(s.COLORARG2) << ";\n"; + + stageSetup << target << "ALPHAOP = " << GetD3DTOPString(s.ALPHAOP) << ";\n"; + + if (states[i].ALPHAOP != X_D3DTOP_DISABLE) { + stageSetup << target << "ALPHAARG0 = " << GetD3DTASumString(s.ALPHAARG0) << ";\n"; + stageSetup << target << "ALPHAARG1 = " << GetD3DTASumString(s.ALPHAARG1) << ";\n"; + stageSetup << target << "ALPHAARG2 = " << GetD3DTASumString(s.ALPHAARG2) << ";\n"; + } + + stageSetup << target << "RESULTARG = " << GetD3DTASumString(s.RESULTARG, false) << ";\n"; + stageSetup << '\n'; + } + + finalShader = finalShader.insert(stageDefInsert, stageSetup.str()); + + // Compile the shader + ID3DBlob* pShaderBlob; + + auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) + .parent_path() + .append("hlsl"); + + auto pseudoFileName = "FixedFunctionPixelShader-" + std::to_string(key) + ".hlsl"; + auto pseudoSourceFile = hlslDir.append(pseudoFileName).string(); + EmuCompileShader(finalShader, "ps_3_0", &pShaderBlob, pseudoSourceFile.c_str()); + + // Create shader object for the device + IDirect3DPixelShader9* pShader = nullptr; + auto hRet = g_pD3DDevice->CreatePixelShader((DWORD*)pShaderBlob->GetBufferPointer(), &pShader); + if (hRet != S_OK) + CxbxKrnlCleanup("Failed to compile fixed function pixel shader"); + pShaderBlob->Release(); + + // Insert the shader into the cache + ffPsCache[key] = pShader; + + return pShader; +}; + +float AsFloat(uint32_t value) { + auto v = value; + return *(float*)&v; +} + +// Set constant state for the fixed function pixel shader +void UpdateFixedFunctionPixelShaderState() +{ + using namespace FixedFunctionPixelShader; + + FixedFunctionPixelShaderState ffPsState; + ffPsState.TextureFactor = (D3DXVECTOR4)((D3DXCOLOR)(XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_TEXTUREFACTOR))); + ffPsState.SpecularEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_SPECULARENABLE); + ffPsState.FogEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGENABLE); + ffPsState.FogColor = (D3DXVECTOR3)((D3DXCOLOR)XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR)); + + // Texture state + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + + auto stage = &ffPsState.stages[i]; + + stage->COLORKEYOP = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORKEYOP); + stage->COLORSIGN = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORSIGN); + stage->ALPHAKILL = XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAKILL); + stage->BUMPENVMAT00 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT00)); + stage->BUMPENVMAT01 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT01)); + stage->BUMPENVMAT10 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT10)); + stage->BUMPENVMAT11 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT11)); + stage->BUMPENVLSCALE = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVLSCALE)); + stage->BUMPENVLOFFSET = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVLOFFSET)); + stage->COLORKEYCOLOR = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORKEYCOLOR); + + stage->IsTextureSet = g_pXbox_SetTexture[i] != nullptr; + } + + const int size = (sizeof(FixedFunctionPixelShaderState) + 16 - 1) / 16; + g_pD3DDevice->SetPixelShaderConstantF(0, (float*)&ffPsState, size); +} + +bool g_UseFixedFunctionPixelShader = true; void DxbxUpdateActivePixelShader() // NOPATCH { // The first RenderState is PSAlpha, @@ -654,7 +958,13 @@ void DxbxUpdateActivePixelShader() // NOPATCH const xbox::X_D3DPIXELSHADERDEF *pPSDef = g_pXbox_PixelShader != nullptr ? (xbox::X_D3DPIXELSHADERDEF*)(XboxRenderStates.GetPixelShaderRenderStatePointer()) : nullptr; if (pPSDef == nullptr) { - g_pD3DDevice->SetPixelShader(nullptr); + IDirect3DPixelShader9* pShader = nullptr; + if (g_UseFixedFunctionPixelShader) { + pShader = GetFixedFunctionShader(); + UpdateFixedFunctionPixelShaderState(); + } + + g_pD3DDevice->SetPixelShader(pShader); return; } diff --git a/src/core/hle/D3D8/XbPixelShader.h b/src/core/hle/D3D8/XbPixelShader.h index 3d9c2cd1b..49e812259 100644 --- a/src/core/hle/D3D8/XbPixelShader.h +++ b/src/core/hle/D3D8/XbPixelShader.h @@ -507,6 +507,7 @@ struct DecodedRegisterCombiner { void Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef); }; +extern bool g_UseFixedFunctionPixelShader; // PatrickvL's Dxbx pixel shader translation void DxbxUpdateActivePixelShader(); // NOPATCH