diff --git a/CMakeLists.txt b/CMakeLists.txt index d764aa57a..901dcbac3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,10 +131,15 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/core/common/imgui/settings.h" "${CXBXR_ROOT_DIR}/src/core/common/imgui/video.hpp" "${CXBXR_ROOT_DIR}/src/core/common/video/RenderBase.hpp" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.h" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.h" @@ -294,7 +299,9 @@ file (GLOB CXBXR_SOURCE_EMU "${CXBXR_ROOT_DIR}/src/core/common/imgui/video.cpp" "${CXBXR_ROOT_DIR}/src/core/common/video/RenderBase.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/RenderStates.cpp" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/TextureStates.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp" @@ -442,6 +449,9 @@ install(FILES ${cxbxr_INSTALL_files} ) install(FILES + "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" + "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" + "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" DESTINATION bin/hlsl diff --git a/projects/misc/batch.cmake b/projects/misc/batch.cmake index e29fafe0d..9f1f15550 100644 --- a/projects/misc/batch.cmake +++ b/projects/misc/batch.cmake @@ -32,6 +32,8 @@ file(COPY ${CXBXR_GLEW_DLL} DESTINATION ${TargetRunTimeDir}) set(CXBXR_HLSL_FILES "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" +"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" +"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" ) set(HlslOutputDir ${TargetRunTimeDir}/hlsl) file(MAKE_DIRECTORY ${HlslOutputDir}) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl new file mode 100644 index 000000000..9edffe760 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -0,0 +1,374 @@ +// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : +R"DELIMITER( + +struct PS_INPUT // Declared identical to vertex shader output (see VS_OUTPUT) +{ + float2 iPos : VPOS; // Screen space x,y pixel location + float4 iD0 : COLOR0; // Front-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iD1 : COLOR1; // Front-facing secondary (specular) vertex color (clamped to 0..1) + float iFog : FOG; + float iPts : PSIZE; + float4 iB0 : TEXCOORD4; // Back-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iB1 : TEXCOORD5; // Back-facing secondary (specular) vertex color (clamped to 0..1) + float4 iT0 : TEXCOORD0; // Texture Coord 0 + float4 iT1 : TEXCOORD1; // Texture Coord 1 + float4 iT2 : TEXCOORD2; // Texture Coord 2 + float4 iT3 : TEXCOORD3; // Texture Coord 3 + float iFF : VFACE; // Front facing if > 0 +}; + +struct PS_OUTPUT +{ + float4 oR0 : COLOR; +}; + +// Source register modifier macro's, based on enum PS_INPUTMAPPING : +// TODO : Should all these 'max(0, x)' actually be 'saturate(x)'? This, because the operation may actually clamp the register value to the range [0..1] +#define s_sat(x) saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // OK for final combiner // Clamps negative x to 0 // Was : max(0, x), then abs(x) (Test case: Scaler) +#define s_comp(x) (1 - saturate(x)) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // OK for final combiner // Complements x (1-x) // Was : 1- min(max(0, x), 1) +#define s_bx2(x) (( 2 * max(0, x)) - 1) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] +#define s_negbx2(x) ((-2 * max(0, x)) + 1) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates +#define s_bias(x) (max(0, x) - 0.5) // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5 +#define s_negbias(x) (-max(0, x) + 0.5) // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates +#define s_ident(x) x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // invalid for final combiner // No modifier, x is passed without alteration +#define s_neg(x) (-x) // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // invalid for final combiner // Negate + +// Destination register modifier macro's, based on enum PS_COMBINEROUTPUT : +#define d_ident(x) x // PS_COMBINEROUTPUT_IDENTITY= 0x00L, // +#define d_bias(x) (x - 0.5) // PS_COMBINEROUTPUT_BIAS= 0x08L, // Subtracts 0.5 from outputs +#define d_x2(x) ( x * 2) // PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // Scales outputs by 2 +#define d_bx2(x) ((x - 0.5) * 2) // PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // Subtracts 0.5 from outputs and scales by 2 +#define d_x4(x) ( x * 4) // PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // Scales outputs by 4 +#define d_bx4(x) ((x - 0.5) * 4) // PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS= 0x28L, // Subtracts 0.5 from outputs and scales by 4 +#define d_d2(x) ( x / 2) // PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // Divides outputs by 2 +#define d_bd2(x) ((x - 0.5) / 2) // PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS= 0x38L, // Subtracts 0.5 from outputs and divides by 2 + +// Constant registers +uniform const float4 c0_[8] : register(c0); +uniform const float4 c1_[8] : register(c8); +uniform const float4 c_fog : register(c18); // Note : Maps to PSH_XBOX_CONSTANT_FOG, assigned to fog.rgb + +// Constant registers used only in final combiner stage (xfc 'opcode') : +uniform const float4 FC0 : register(c16); // Note : Maps to PSH_XBOX_CONSTANT_FC0, must be generated as argument to xfc instead of C0 +uniform const float4 FC1 : register(c17); // Note : Maps to PSH_XBOX_CONSTANT_FC1, must be generated as argument to xfc instead of C1 +uniform const float4 BEM[4] : register(c19); // Note : PSH_XBOX_CONSTANT_BEM for 4 texture stages +uniform const float4 LUM[4] : register(c23); // Note : PSH_XBOX_CONSTANT_LUM for 4 texture stages +uniform const float FRONTFACE_FACTOR : register(c27); // Note : PSH_XBOX_CONSTANT_LUM for 4 texture stages + + +#define CM_LT(c) if(c < 0) clip(-1); // = PS_COMPAREMODE_[RSTQ]_LT +#define CM_GE(c) if(c >= 0) clip(-1); // = PS_COMPAREMODE_[RSTQ]_GE + +#if 0 + // Compiler-defines/symbols which must be defined when their bit/value is set in the corresponding register : + // Generated by PixelShader.cpp::BuildShader() + + // Data from X_D3DTSS_ALPHAKILL : + #define ALPHAKILL {false, false, false, false} + + // Bits from PSCombinerCount (a.k.a. PSCombinerCountFlags) : + #define PS_COMBINERCOUNT 2 + #define PS_COMBINERCOUNT_UNIQUE_C0 + #define PS_COMBINERCOUNT_UNIQUE_C1 + #define PS_COMBINERCOUNT_MUX_MSB + + // Generate defines like this, based on actual values : + #define PS_COMPAREMODE_0(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) + #define PS_COMPAREMODE_1(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) + #define PS_COMPAREMODE_2(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) + #define PS_COMPAREMODE_3(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) + + // Input texture register mappings for texture stage 1, 2 and 3 (stage 0 has no input-texture) + static const int PS_INPUTTEXTURE_[4] = { -1, 0, 0, 0 }; + + // Dot mappings for texture stage 1, 2 and 3 (stage 0 performs no dot product) + #define PS_DOTMAPPING_1 PS_DOTMAPPING_MINUS1_TO_1_D3D + #define PS_DOTMAPPING_2 PS_DOTMAPPING_MINUS1_TO_1_D3D + #define PS_DOTMAPPING_3 PS_DOTMAPPING_MINUS1_TO_1_D3D + + // Bits from FinalCombinerFlags (the 4th byte in PSFinalCombinerInputsEFG) : + #define PS_FINALCOMBINERSETTING_COMPLEMENT_V1 + #define PS_FINALCOMBINERSETTING_COMPLEMENT_R0 + #define PS_FINALCOMBINERSETTING_CLAMP_SUM +#endif + +)DELIMITER", /* This terminates the 1st raw string within the 16380 single-byte characters limit. // */ +// See https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/compiler-error-c2026?f1url=%3FappId%3DDev15IDEF1%26l%3DEN-US%26k%3Dk(C2026)%26rd%3Dtrue&view=vs-2019 +// Second raw string : +R"DELIMITER( + +// PS_COMBINERCOUNT_UNIQUE_C0 steers whether for C0 to use combiner stage-specific constants c0_0 .. c0_7, or c0_0 for all stages +#ifdef PS_COMBINERCOUNT_UNIQUE_C0 + #define C0 c0_[stage] // concatenate stage to form c0_0 .. c0_7 +#else // PS_COMBINERCOUNT_SAME_C0 + #define C0 c0_[0] // always resolve to c0_0 +#endif + +// PS_COMBINERCOUNT_UNIQUE_C1 steers whether for C1 to use combiner stage-specific constants c1_0 .. c1_7, or c1_0 for all stages +#ifdef PS_COMBINERCOUNT_UNIQUE_C1 + #define C1 c1_[stage] // concatenate stage to form c1_0 .. c1_7 +#else // PS_COMBINERCOUNT_SAME_C1 + #define C1 c1_[0] // always resolve to c1_0 +#endif + +// PS_COMBINERCOUNT_MUX_MSB steers the 'muxing' operation in the XMMC opcode, +// checking either the Most Significant Bit (MSB) or Least (LSB) of the r0 register. +// (In practice, LSB is seldom encountered, we have zero known test-cases.) +#ifdef PS_COMBINERCOUNT_MUX_MSB + #define FCS_MUX (r0.a >= 0.5) // Check r0.a MSB; Having range upto 1 this should be equal to : (((r0.a * 255) /*mod 256*/) >= 128) +#else // PS_COMBINERCOUNT_MUX_LSB + #define FCS_MUX (((r0.a * 255) mod 2) >= 1) // Check r0.b LSB; Get LSB by converting 1 into 255 (highest 8-bit value) and using modulo 2. TODO : Verify correctness +#endif + +// PS_FINALCOMBINERSETTING_COMPLEMENT_V1, when defined, applies a modifier to the v1 input when calculating the sum register +#ifdef PS_FINALCOMBINERSETTING_COMPLEMENT_V1 + #define FCS_V1 s_comp // making it use 1-complement, +#else + #define FCS_V1 s_ident // otherwise identity mapping. +#endif + +// PS_FINALCOMBINERSETTING_COMPLEMENT_R0, when defined, applies a modifier to the r0 input when calculating the sum register +#ifdef PS_FINALCOMBINERSETTING_COMPLEMENT_R0 + #define FCS_R0 s_comp // making it use 1-complement, +#else + #define FCS_R0 s_ident // otherwise identity mapping. +#endif + +// PS_FINALCOMBINERSETTING_CLAMP_SUM, when defined, applies a modifier to the sum register +#ifdef PS_FINALCOMBINERSETTING_CLAMP_SUM + #define FCS_SUM s_sat // making it clamp negative to zero, +#else + #define FCS_SUM s_ident // otherwise identity mapping. TODO : Confirm correctness +#endif + +// Xbox supports only one 'pixel shader' opcode, but bit flags tunes it's function; +// Here, effective all 5 Xbox opcodes, extended with a variable macro {xop_m(m,...)} for destination modifier : +// Note : Since both d0 AND d1 could be the same output register, calculation of d2 can re-use only one (d0 or d1) +#define xmma(d0, d1, d2, s0, s1, s2, s3, m, tmp) tmp = d0 = m(s0 * s1); d1 = m(s2 * s3); d2 = d1 + tmp // PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD +#define xmmc(d0, d1, d2, s0, s1, s2, s3, m, tmp) tmp = d0 = m(s0 * s1); d1 = m(s2 * s3); d2 = FCS_MUX ? d1 : tmp // PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a + +#define xdm(d0, d1, s0, s1, s2, s3, m) d0 = m(dot(s0 , s1)); d1 = m( s2 * s3 ) // PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only // PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L, +#define xdd(d0, d1, s0, s1, s2, s3, m) d0 = m(dot(s0 , s1)); d1 = m(dot(s2 , s3)) // PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only // PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L, +#define xmd(d0, d1, s0, s1, s2, s3, m) d0 = m( s0 * s1 ); d1 = m(dot(s2 , s3)) // PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only // PS_COMBINEROUTPUT_CD_MULTIPLY= 0x01L, + +// After the register combiner stages, there's one (optional) final combiner step, consisting of 4 parts; +// All the 7 final combiner inputs operate on rgb only and clamp negative input to zero: +#define fcin(r) saturate(r) +// Special purpose registers prod and sum operate on rgb only, and have alpha set to zero +#define xfc_sum sum = FCS_SUM(float4(FCS_V1(fcin(v1.rgb)) + FCS_R0(fcin(r0.rgb)), 0)) // Note : perform sum first, so prod can use its result +#define xfc_prod(e, f) prod = float4(fcin(e) * fcin(f), 0) // Note : prod can't have modifiers +// Color and Alpha calculations are performed, potentially using sum and/or prod and/or fog registers +#define xfc_rgb(a, b, c, d) r0.rgb = lerp(fcin(c), fcin(b), fcin(a)) + fcin(d) // Note : perform rgb and alpha last, so prod and sum can be used as inputs +#define xfc_alpha(g) r0.a = fcin(g) + +// Glue them all together, so we can generate a one-liner closing off the stages : +#define xfc(a, b, c, d, e, f, g) xfc_sum; xfc_prod(e, f); xfc_rgb(a, b, c, d); xfc_alpha(g) +// Note : If xfc is not generated (when PSFinalCombinerInputsABCD and PSFinalCombinerEFG are both 0), r0.rgba is still returned as pixel shader output + +// GLSL : https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/mix.xhtml +// mix(x, y, a ) x*(1-a ) + y*a +// +// HLSL : https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-lerp +// lerp(x, y, s ) x*(1-s ) + y*s == x + s(y-x) +// lerp(s2, s1, s0) s2*(1-s0) + s1*s0 +)DELIMITER", /* This terminates the 1st raw string within the 16380 single-byte characters limit. // */ +// See https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/compiler-error-c2026?f1url=%3FappId%3DDev15IDEF1%26l%3DEN-US%26k%3Dk(C2026)%26rd%3Dtrue&view=vs-2019 +// Second raw string : +R"DELIMITER( + +float m21d(const float input) +{ + int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255 + tmp -= 128; // 0 lowers to -128, 128 lowers to 0, 255 lowers to 127 + return (float)tmp / 127; // -128 scales to -1.007874016, 0 scales to 0.0, 127 scales to 1.0 +} + +float m21g(const float input) +{ + int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255 + if (tmp >= 128) { + tmp -= 256; // 128 lowers to -128, 255 lowers to -1 + } // 0 stays 0, 127 stays 127 + + return ((float)tmp + 0.5) / 127.5; +} + +float m21(const float input) +{ + int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255 + if (tmp >= 128) { + tmp -= 256; // 128 lowers to -128, 255 lowers to -1 + } // 0 stays 0, 127 stays 127 + + return (float)tmp / 127; // -128 scales to -1.007874016, 0 scales to 0.0, 127 scales to 1.0 +} + +// Note : each component seems already in range [0..1], but two must be combined into one +#define TwoIntoOne(a,b) (((a * 255) * 256) + (b * 255)) / 255 // TODO : Verify whether this works at all ! +#define CalcHiLo(in) H = TwoIntoOne(in.x, in.y); L = TwoIntoOne(in.z, in.w) // TODO : Verify whether this works at all ! + +// Dot mappings over the output value of a (4 component 8 bit unsigned) texture stage register into a (3 component float) vector value, for use in a dot product calculation: +#define PS_DOTMAPPING_ZERO_TO_ONE(in) dm = in.rgb // :r8g8b8a8->(r,g,b): 0x00=>0, 0xff=>1 thus : output = (input / 0xff ) +#define PS_DOTMAPPING_MINUS1_TO_1_D3D(in) dm = float3(m21d(in.x), m21d(in.y), m21d(in.z)) // :r8g8b8a8->(r,g,b): 0x00=>-128/127, 0x01=>-1, 0x80=>0, 0xff=>1 thus : output = ((input - 0x100 ) / 0x7f ) +#define PS_DOTMAPPING_MINUS1_TO_1_GL(in) dm = float3(m21g(in.x), m21g(in.y), m21g(in.z)) // :r8g8b8a8->(r,g,b): 0x80=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x80 ) (see https://en.wikipedia.org/wiki/Two's_complement) +#define PS_DOTMAPPING_MINUS1_TO_1(in) dm = float3(m21(in.x), m21(in.y), m21(in.z)) // :r8g8b8a8->(r,g,b): 0x80=>-128/127, ?0x81=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x7f ) (see https://en.wikipedia.org/wiki/Two's_complement) + +#define PS_DOTMAPPING_HILO_1(in) CalcHiLo(in); dm = float3(H, L, 1) // :H16L16 ->(H,L,1): 0x0000=>0, 0xffff=>1 thus : output = (input / 0xffff) +#define PS_DOTMAPPING_HILO_HEMISPHERE_D3D(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)):? 0x8000=>-1, 0x0000=>0, 0x7fff=32767/32768 thus : output = ((input - 0x10000) / 0x7fff) +#define PS_DOTMAPPING_HILO_HEMISPHERE_GL(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)):? 0x8000=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x8000) +#define PS_DOTMAPPING_HILO_HEMISPHERE(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)): 0x8000=>-32768/32767, 0x8001=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x7fff) + +// Declare one sampler per each {Sampler Type, Texture Stage} combination +// TODO : Generate sampler status? +sampler samplers[4] : register(s0); + +// Declare alphakill as a variable (avoiding a constant, to allow false's to be optimized away) : +#ifndef ALPHAKILL + #define ALPHAKILL {false, false, false, false} +#endif +static bool alphakill[4] = ALPHAKILL; + +float4 PostProcessTexel(const int ts, float4 t) +{ + if (alphakill[ts]) + if (t.a == 0) + discard; + + return t; +} + +// Actual texture sampling per texture stage (ts), using the sampling vector (s) as input, +// abstracting away the specifics of accessing above sampler declarations (usefull for future Direct3D 10+ sampler arrays) +float4 Sample2D(int ts, float3 s) +{ + float4 result = tex2D(samplers[ts], s.xy); // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) + return PostProcessTexel(ts, result); +} + +float4 Sample3D(int ts, float3 s) +{ + float4 result = tex3D(samplers[ts], s.xyz); + return PostProcessTexel(ts, result); +} + +float4 Sample6F(int ts, float3 s) +{ + float4 result = texCUBE(samplers[ts], s.xyz); + return PostProcessTexel(ts, result); +} + +// Test-case JSRF (boost-dash effect). +float3 DoBumpEnv(const float4 TexCoord, const float4 BumpEnvMat, const float4 src) +{ + // Convert the input bump map (source texture) value range into two's complement signed values (from (0, +1) to (-1, +1), using s_bx2): + const float4 BumpMap = s_bx2(src); // Note : medieval discovered s_bias improved JSRF, PatrickvL changed it into s_bx2 thanks to http://www.rastertek.com/dx11tut20.html + // TODO : The above should be removed, and replaced by some form of COLORSIGN handling, which may not be possible inside this pixel shader, because filtering-during-sampling would cause artifacts. + + const float u = TexCoord.x + (BumpEnvMat.x * BumpMap.r) + (BumpEnvMat.z * BumpMap.g); // Or : TexCoord.x + dot(BumpEnvMat.xz, BumpMap.rg) + const float v = TexCoord.y + (BumpEnvMat.y * BumpMap.r) + (BumpEnvMat.w * BumpMap.g); // Or : TexCoord.y + dot(BumpEnvMat.yw, BumpMap.rg) + + return float3(u, v, 0); +} + +// Map texture registers to their array elements. Having texture registers in an array allows indexed access to them +#define t0 t[0] +#define t1 t[1] +#define t2 t[2] +#define t3 t[3] + +// Resolve a stage number via 'input texture (index) mapping' to it's corresponding output texture register (rgba?) +#define src(ts) t[PS_INPUTTEXTURE_[ts]] + +// Calculate the dot result for a given texture stage. Since any given stage is input-mapped to always be less than or equal the stage it appears in, this won't cause read-ahead issues +// Test case: BumpDemo demo +#define CalcDot(ts) PS_DOTMAPPING_ ## ts(src(ts)); dot_[ts] = dot(iT[ts].xyz, dm) + +// Addressing operations + +// Clamps input texture coordinates to the range [0..1] +// Note alpha is passed through rather than set to one like ps_1_3 'texcoord' +// Test case: Metal Arms (menu skybox clouds, alpha is specifically set in the VS) +#define Passthru(ts) float4(saturate(iT[ts])) +#define Brdf(ts) float3(t[ts-2].y, t[ts-1].y, t[ts-2].x - t[ts-1].x) // TODO : Complete 16 bit phi/sigma retrieval from float4 texture register. Perhaps use CalcHiLo? +#define Normal2(ts) float3(dot_[ts-1], dot_[ts], 0) // Preceding and current stage dot result. Will be input for Sample2D. +#define Normal3(ts) float3(dot_[ts-2], dot_[ts-1], dot_[ts]) // Two preceding and current stage dot result. +#define Eye float3(iT[1].w, iT[2].w, iT[3].w) // 4th (q) component of input texture coordinates 1, 2 and 3. Only used by texm3x3vspec/PS_TEXTUREMODES_DOT_RFLCT_SPEC, always at stage 3. TODO : Map iT[1/2/3] through PS_INPUTTEXTURE_[]? +#define Reflect(n, e) 2 * (dot(n, e) / dot(n, n)) * n - e // https://documentation.help/directx8_c/texm3x3vspec.htm +#define BumpEnv(ts) DoBumpEnv(iT[ts], BEM[ts], src(ts)) // Will be input for Sample2D. +#define LSO(ts) (LUM[ts].x * src(ts).b) + LUM[ts].y // Uses PSH_XBOX_CONSTANT_LUM .x = D3DTSS_BUMPENVLSCALE .y = D3DTSS_BUMPENVLOFFSET + +// Implementations for all possible texture modes, with stage as argument (prefixed with valid stages and corresponding pixel shader 1.3 assembly texture addressing instructions) +// For ease of understanding, all follow this plan : Optional specifics, or dot calculation (some with normal selection) and sampling vector determination. All end by deriving a value and assigning this to the stage's texture register. +/*0123 tex */ #define PS_TEXTUREMODES_NONE(ts) v = black; t[ts] = v // Seems to work +/*0123 tex */ #define PS_TEXTUREMODES_PROJECT2D(ts) s = iT[ts].xyz; v = Sample2D(ts, s); t[ts] = v // Seems to work (are x/w and y/w implicit?) [1] +/*0123 tex */ #define PS_TEXTUREMODES_PROJECT3D(ts) s = iT[ts].xyz; v = Sample3D(ts, s); t[ts] = v // Seems to work (is z/w implicit?) +/*0123 tex */ #define PS_TEXTUREMODES_CUBEMAP(ts) s = iT[ts].xyz; v = Sample6F(ts, s); t[ts] = v // TODO : Test +/*0123 texcoord */ #define PS_TEXTUREMODES_PASSTHRU(ts) v = Passthru(ts); t[ts] = v // Seems to work +/*0123 texkill */ #define PS_TEXTUREMODES_CLIPPLANE(ts) PS_COMPAREMODE_ ## ts(iT[ts]); v = black; t[ts] = v // Seems to work (setting black to texture register, in case it gets read) +/*-123 texbem */ #define PS_TEXTUREMODES_BUMPENVMAP(ts) s = BumpEnv(ts); v = Sample2D(ts, s); t[ts] = v // Seems to work +/*-123 texbeml */ #define PS_TEXTUREMODES_BUMPENVMAP_LUM(ts) PS_TEXTUREMODES_BUMPENVMAP(ts); v.rgb *= LSO(ts); t[ts] = v // TODO : Test +/*--23 texbrdf */ #define PS_TEXTUREMODES_BRDF(ts) s = Brdf(ts); v = Sample3D(ts, s); t[ts] = v // TODO : Test (t[ts-2] is 16 bit eyePhi,eyeSigma; t[ts-1] is lightPhi,lightSigma) +/*--23 texm3x2tex */ #define PS_TEXTUREMODES_DOT_ST(ts) CalcDot(ts); n = Normal2(ts); s = n; v = Sample2D(ts, s); t[ts] = v // TODO : Test +/*--23 texm3x2depth */ #define PS_TEXTUREMODES_DOT_ZW(ts) CalcDot(ts); n = Normal2(ts); if (n.y==0) v=1;else v = n.x / n.y; t[ts] = v // TODO : Make depth-check use result of division, but how? +/*--2- texm3x3diff */ #define PS_TEXTUREMODES_DOT_RFLCT_DIFF(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample6F(ts, s); t[ts] = v // TODO : Test +/*---3 texm3x3vspec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC(ts) CalcDot(ts); n = Normal3(ts); s = Reflect(n, Eye); v = Sample6F(ts, s); t[ts] = v // TODO : Test +/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_3D(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample3D(ts, s); t[ts] = v // TODO : Test +/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_CUBE(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample6F(ts, s); t[ts] = v // TODO : Test +/*-123 texreg2ar */ #define PS_TEXTUREMODES_DPNDNT_AR(ts) s = src(ts).arg; v = Sample2D(ts, s); t[ts] = v // TODO : Test [1] +/*-123 texreg2bg */ #define PS_TEXTUREMODES_DPNDNT_GB(ts) s = src(ts).gba; v = Sample2D(ts, s); t[ts] = v // TODO : Test [1] +// TODO replace dm with dot_[ts]? Confirm BumpDemo 'Cubemap only' modes +/*-12- texm3x2pad */ #define PS_TEXTUREMODES_DOTPRODUCT(ts) CalcDot(ts); v = float4(dm, 0); t[ts] = v // TODO : Test all dot mapping (setting texture register, in case it gets read - test-case : BumpDemo) +/*---3 texm3x3spec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST(ts) CalcDot(ts); n = Normal3(ts); s = Reflect(n, C0); v = Sample6F(ts, s); t[ts] = v // TODO : Test +// [1] Note : 3rd component set to s.z is just an (ignored) placeholder to produce a float3 (made unique, to avoid the potential complexity of repeated components) + +PS_OUTPUT main(const PS_INPUT xIn) +{ + // Local constants + const float4 zero = 0; + const float4 half = 0.5; // = s_negbias(zero) + const float4 one = 1; // = s_comp(zero) + const float4 black = float4(0, 0, 0, 1); // opaque black + const float4 iT[4] = { xIn.iT0, xIn.iT1, xIn.iT2, xIn.iT3 }; // Map input texture coordinates to an array, for indexing purposes + + // Xbox register variables + float4 r0, r1; // Temporary registers + float4 t[4]; // Texture coordinate registers + float4 v0, v1; // Vertex color registers + float4 _discard; // Write-only discard 'register' (we assume the HLSL compilers' optimization pass will remove assignments to this) + float4 fog; // Read-only fog register, reading alpha is only allowed in final combiner + float4 sum, prod; // Special purpose registers for xfc (final combiner) operation + + // Helper variables + int stage = 0; // Write-only variable, emitted as prefix-comment before each 'opcode', used in C0 and C1 macro's (and should thus get optimized away), initialized to zero for use of C0 in PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST + float4 tmp; + float H, L; // HILO (high/low) temps + float dot_[4]; + float3 dm; // Dot mapping temporary + float3 n; // Normal vector (based on preceding dot_[] values) + float3 s; // Actual texture coordinate sampling coordinates (temporary) + float4 v; // Texture value (temporary) + + // Determine if this is a front face or backface + bool isFrontFace = (xIn.iFF * FRONTFACE_FACTOR) >= 0; + + // Initialize variables + r0 = r1 = black; // Note : r0.a/r1.a will be overwritten by t0.a/t1.a (opaque_black will be retained for PS_TEXTUREMODES_NONE) + // Note : VFACE/FrontFace has been unreliable, investigate again if some test-case shows bland colors + v0 = isFrontFace ? xIn.iD0 : xIn.iB0; // Diffuse front/back + v1 = isFrontFace ? xIn.iD1 : xIn.iB1; // Specular front/back + fog = float4(c_fog.rgb, xIn.iFog); // color from PSH_XBOX_CONSTANT_FOG, alpha from vertex shader output / pixel shader input + + // Xbox shader program +)DELIMITER", /* This terminates the 2nd raw string within the 16380 single-byte characters limit. // */ +// Third and last raw string, the footer : +R"DELIMITER( + + // Copy r0.rgba to output + PS_OUTPUT xOut; + + xOut.oR0 = r0; + + return xOut; +} + +// End of pixel shader footer)DELIMITER" /* This terminates the footer raw string" // */ diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index 6163a39d5..477cbb9ac 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -7,13 +7,13 @@ struct VS_INPUT }; // Output registers -struct VS_OUTPUT +struct VS_OUTPUT // Declared identical to pixel shader input (see PS_INPUT) { float4 oPos : POSITION; // Homogeneous clip space position float4 oD0 : COLOR0; // Primary color (front-facing) float4 oD1 : COLOR1; // Secondary color (front-facing) float oFog : FOG; // Fog coordinate - float oPts : PSIZE; // Point size + float oPts : PSIZE; // Point size float4 oB0 : TEXCOORD4; // Back-facing primary color float4 oB1 : TEXCOORD5; // Back-facing secondary color float4 oT0 : TEXCOORD0; // Texture coordinate set 0 @@ -37,6 +37,9 @@ uniform float4 xboxScreenspaceOffset : register(c213); uniform float4 xboxTextureScale[4] : register(c214); +// Parameters for mapping the shader's fog output value to a fog factor +uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO + // Overloaded casts, assuring all inputs are treated as float4 float4 _tof4(float src) { return float4(src, src, src, src); } float4 _tof4(float2 src) { return src.xyyy; } @@ -294,7 +297,8 @@ VS_OUTPUT main(const VS_INPUT xIn) // Single component outputs float4 oFog, oPts; // x is write-only on Xbox. Use float4 as some games use incorrect masks - oFog = oPts = 0; + oFog = 1; // Default to no fog. Test case: Lego Star Wars II + oPts = 0; // Address (index) register int1 a0 = 0; @@ -324,12 +328,35 @@ VS_OUTPUT main(const VS_INPUT xIn) R"DELIMITER( // Copy variables to output struct - VS_OUTPUT xOut; + VS_OUTPUT xOut; + // Fogging + // TODO deduplicate + const float fogDepth = oFog.x; // Don't abs this value! Test-case : DolphinClassic xdk sample + const float fogTableMode = CxbxFogInfo.x; + const float fogDensity = CxbxFogInfo.y; + const float fogStart = CxbxFogInfo.z; + const float fogEnd = CxbxFogInfo.w; + + const float FOG_TABLE_NONE = 0; + const float FOG_TABLE_EXP = 1; + const float FOG_TABLE_EXP2 = 2; + const float FOG_TABLE_LINEAR = 3; + + float fogFactor; + if(fogTableMode == FOG_TABLE_NONE) + fogFactor = fogDepth; + if(fogTableMode == FOG_TABLE_EXP) + fogFactor = 1 / exp(fogDepth * fogDensity); /* / 1 / e^(d * density)*/ + if(fogTableMode == FOG_TABLE_EXP2) + fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/ + if(fogTableMode == FOG_TABLE_LINEAR) + fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart); + xOut.oPos = reverseScreenspaceTransform(oPos); xOut.oD0 = saturate(oD0); xOut.oD1 = saturate(oD1); - xOut.oFog = oFog.x; // Note : Xbox clamps fog in pixel shader + xOut.oFog = fogFactor; // Note : Xbox clamps fog in pixel shader -> *NEEDS TESTING* /was oFog.x xOut.oPts = oPts.x; xOut.oB0 = saturate(oB0); xOut.oB1 = saturate(oB1); diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index adaa7eb70..65b524a0a 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -297,7 +297,7 @@ g_EmuCDPD; #define XB_TRAMPOLINES(XB_MACRO) \ XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_CreateVertexShader, (CONST xbox::dword_xt*, CONST xbox::dword_xt*, xbox::dword_xt*, xbox::dword_xt) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader, (xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader_0, () ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader_0, () ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_GetBackBuffer, (xbox::int_xt, D3DBACKBUFFER_TYPE, xbox::X_D3DSurface**) ); \ XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetBackBuffer2, (xbox::int_xt) ); \ XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_GetDepthStencilSurface, (xbox::X_D3DSurface**) ); \ @@ -337,9 +337,9 @@ g_EmuCDPD; XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShader_0, () ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShaderInput, (xbox::dword_xt, xbox::uint_xt, xbox::X_STREAMINPUT*) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetViewport, (CONST xbox::X_D3DVIEWPORT8*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform_0, () ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_MultiplyTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_MultiplyTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource, (xbox::X_D3DResource*) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource__LTCG, (xbox::void_xt) ); \ XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::dword_xt, xbox::X_D3DPRESENT_PARAMETERS*, xbox::X_D3DDevice**)); \ @@ -348,7 +348,7 @@ g_EmuCDPD; XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_4, (xbox::X_D3DPRESENT_PARAMETERS*) ); \ XB_MACRO(xbox::void_xt, WINAPI, Lock2DSurface, (xbox::X_D3DPixelContainer*, D3DCUBEMAP_FACES, xbox::uint_xt, D3DLOCKED_RECT*, RECT*, xbox::dword_xt) ); \ XB_MACRO(xbox::void_xt, WINAPI, Lock3DSurface, (xbox::X_D3DPixelContainer*, xbox::uint_xt, D3DLOCKED_BOX*, D3DBOX*, xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3D_CommonSetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*, void*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3D_CommonSetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*, void*) ); \ XB_TRAMPOLINES(XB_trampoline_declare); @@ -1980,7 +1980,7 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar } else if (wParam == VK_F2) { - g_UseFixedFunctionVertexShader = !g_UseFixedFunctionVertexShader; + g_UseFixedFunctionPixelShader = !g_UseFixedFunctionPixelShader; } else if (wParam == VK_F3) { @@ -4184,7 +4184,7 @@ void ValidateRenderTargetDimensions(DWORD HostRenderTarget_Width, DWORD HostRend } } -float GetZScaleForSurface(xbox::X_D3DSurface* pSurface) +float GetZScaleForPixelContainer(xbox::X_D3DPixelContainer* pSurface) { // If no surface was present, fallback to 1 if (pSurface == xbox::zeroptr) { @@ -5012,15 +5012,15 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_Clear) // Scale the fill based on our scale factor and MSAA scale float aaX, aaY; GetMultiSampleScaleRaw(aaX, aaY); - aaX *= g_RenderUpscaleFactor; - aaY *= g_RenderUpscaleFactor; + float Xscale = aaX * g_RenderUpscaleFactor; + float Yscale = aaY * g_RenderUpscaleFactor; std::vector rects(Count); for (DWORD i = 0; i < Count; i++) { - rects[i].x1 = static_cast(pRects[i].x1 * aaX); - rects[i].x2 = static_cast(pRects[i].x2 * aaX); - rects[i].y1 = static_cast(pRects[i].y1 * aaY); - rects[i].y2 = static_cast(pRects[i].y2 * aaY); + rects[i].x1 = static_cast(pRects[i].x1 * Xscale); + rects[i].x2 = static_cast(pRects[i].x2 * Xscale); + rects[i].y1 = static_cast(pRects[i].y1 * Yscale); + rects[i].y2 = static_cast(pRects[i].y2 * Yscale); } hRet = g_pD3DDevice->Clear(Count, rects.data(), HostFlags, Color, Z, Stencil); } else { @@ -6406,13 +6406,9 @@ void UpdateFixedFunctionShaderLight(int d3dLightIndex, Light* pShaderLight, D3DX pShaderLight->SpotIntensityDivisor = cos(d3dLight->Theta / 2) - cos(d3dLight->Phi / 2); } -float AsFloat(uint32_t value) { - auto v = value; - return *(float*)&v; -} - void UpdateFixedFunctionVertexShaderState() { + extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue using namespace xbox; // Vertex blending @@ -6452,7 +6448,11 @@ void UpdateFixedFunctionVertexShaderState() } // Lighting - ffShaderState.Modes.Lighting = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_LIGHTING); + // Point sprites aren't lit - 'each point is always rendered with constant colors.' + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/point-sprites + bool PointSpriteEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSPRITEENABLE); + bool LightingEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_LIGHTING); + ffShaderState.Modes.Lighting = LightingEnable && !PointSpriteEnable; ffShaderState.Modes.TwoSidedLighting = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_TWOSIDEDLIGHTING); ffShaderState.Modes.LocalViewer = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_LOCALVIEWER); @@ -6467,28 +6467,39 @@ void UpdateFixedFunctionVertexShaderState() ffShaderState.Modes.BackSpecularMaterialSource = (float)(ColorVertex ? XboxRenderStates.GetXboxRenderState(X_D3DRS_BACKSPECULARMATERIALSOURCE) : D3DMCS_MATERIAL); ffShaderState.Modes.BackEmissiveMaterialSource = (float)(ColorVertex ? XboxRenderStates.GetXboxRenderState(X_D3DRS_BACKEMISSIVEMATERIALSOURCE) : D3DMCS_MATERIAL); - // Point sprites - auto pointSize = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE); - auto pointSizeMin = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE_MIN); - auto pointSizeMax = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE_MAX); - ffShaderState.PointSprite.PointSize = *reinterpret_cast(&pointSize); - ffShaderState.PointSprite.PointSizeMin = *reinterpret_cast(&pointSizeMin); - ffShaderState.PointSprite.PointSizeMax = *reinterpret_cast(&pointSizeMax); - + // Point sprites; Fetch required variables + float pointSize = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE); + float pointSize_Min = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MIN); + float pointSize_Max = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MAX); bool PointScaleEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALEENABLE); - auto scaleA = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_A); - auto scaleB = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_B); - auto scaleC = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_C); - ffShaderState.PointSprite.ScaleABC.x = PointScaleEnable ? *reinterpret_cast(&scaleA) : 1.0f; - ffShaderState.PointSprite.ScaleABC.y = PointScaleEnable ? *reinterpret_cast(&scaleB) : 0.0f; - ffShaderState.PointSprite.ScaleABC.z = PointScaleEnable ? *reinterpret_cast(&scaleC) : 0.0f; - ffShaderState.PointSprite.XboxRenderTargetHeight = PointScaleEnable ? (float)GetPixelContainerHeight(g_pXbox_RenderTarget) : 1.0f; + float pointScale_A = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_A); + float pointScale_B = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_B); + float pointScale_C = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_C); + float renderTargetHeight = (float)GetPixelContainerHeight(g_pXbox_RenderTarget); + // Make sure to disable point scaling when point sprites are not enabled + PointScaleEnable &= PointSpriteEnable; + // Set variables in shader state + ffShaderState.PointSprite.PointSize = PointSpriteEnable ? pointSize : 1.0f; + ffShaderState.PointSprite.PointSize_Min = PointSpriteEnable ? pointSize_Min : 1.0f; + ffShaderState.PointSprite.PointSize_Max = PointSpriteEnable ? pointSize_Max : 1.0f; + ffShaderState.PointSprite.PointScaleABC.x = PointScaleEnable ? pointScale_A : 1.0f; + ffShaderState.PointSprite.PointScaleABC.y = PointScaleEnable ? pointScale_B : 0.0f; + ffShaderState.PointSprite.PointScaleABC.z = PointScaleEnable ? pointScale_C : 0.0f; + ffShaderState.PointSprite.XboxRenderTargetHeight = PointScaleEnable ? renderTargetHeight : 1.0f; ffShaderState.PointSprite.RenderUpscaleFactor = g_RenderUpscaleFactor; // Fog + // Determine how the fog depth is transformed into the fog factor + auto fogEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGENABLE); + auto fogTableMode = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGTABLEMODE); + ffShaderState.Fog.Enable = fogEnable; + // FIXME remove when fixed function PS is implemented + // Note if we are using the fixed function pixel shader + // We only want to produce the fog depth value in the VS, not the fog factor + ffShaderState.Fog.TableMode = !g_UseFixedFunctionPixelShader ? D3DFOG_NONE : fogTableMode; + // Determine how fog depth is calculated - if (XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGENABLE) && - XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGTABLEMODE) != D3DFOG_NONE) { + if (fogEnable && fogTableMode != D3DFOG_NONE) { auto proj = &ffShaderState.Transforms.Projection; if (XboxRenderStates.GetXboxRenderState(X_D3DRS_RANGEFOGENABLE)) { @@ -6508,13 +6519,20 @@ void UpdateFixedFunctionVertexShaderState() // JSRF (non-compliant projection matrix) ffShaderState.Fog.DepthMode = FixedFunctionVertexShader::FOG_DEPTH_W; } + + auto density = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGDENSITY); + auto fogStart = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGSTART); + auto fogEnd = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGEND); + ffShaderState.Fog.Density = *reinterpret_cast(&density); + ffShaderState.Fog.Start = *reinterpret_cast(&fogStart); + ffShaderState.Fog.End = *reinterpret_cast(&fogEnd); } else { ffShaderState.Fog.DepthMode = FixedFunctionVertexShader::FOG_DEPTH_NONE; } // Texture state - for (int i = 0; i < 4; i++) { + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { auto transformFlags = XboxTextureStates.Get(i, X_D3DTSS_TEXTURETRANSFORMFLAGS); ffShaderState.TextureStates[i].TextureTransformFlagsCount = (float)(transformFlags & ~D3DTTFF_PROJECTED); ffShaderState.TextureStates[i].TextureTransformFlagsProjected = (float)(transformFlags & D3DTTFF_PROJECTED); @@ -6524,9 +6542,14 @@ void UpdateFixedFunctionVertexShaderState() ffShaderState.TextureStates[i].TexCoordIndexGen = (float)(texCoordIndex >> 16); // D3DTSS_TCI flags } - // TexCoord component counts - extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue + // Read current TexCoord component counts xbox::X_VERTEXATTRIBUTEFORMAT* pXboxVertexAttributeFormat = GetXboxVertexAttributeFormat(); + // Note : There seem to be other ways to access this, but we can use only this one; + // This, because CxbxGetVertexDeclaration() can't be used, since it doesn't track VertexAttributes + // (plus, it contains the overhead of shader lookup). + // Another, GetXboxVertexShader(), can't be used, because it doesn't honor vertex attribute overrides + // like those that apply for g_InlineVertexBuffer_DeclarationOverride and active SetVertexShaderInput. + // Also, the xbox::X_D3DVertexShader.Dimensionality[] field contains somewhat strange values. for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { auto vertexDataFormat = pXboxVertexAttributeFormat->Slots[xbox::X_D3DVSDE_TEXCOORD0 + i].Format; ffShaderState.TexCoordComponentCount[i] = (float)GetXboxVertexDataComponentCount(vertexDataFormat); @@ -7522,10 +7545,22 @@ void CxbxUpdateHostTextureScaling() *texCoordScale = { width, height, - (float)CxbxGetPixelContainerDepth(pXboxBaseTexture), + 1.0f, // TODO should this be mip levels for volume textures? 1.0f }; } + + // When a depth buffer is used as a texture + // We do 'Native Shadow Mapping' + // https://aras-p.info/texts/D3D9GPUHacks.html + // The z texture coordinate component holds a depth value, which needs to be normalized + // TODO implement handling for + // - X_D3DRS_SHADOWFUNC + // - X_D3DRS_POLYGONOFFSETZSLOPESCALE + // - X_D3DRS_POLYGONOFFSETZOFFSET + if (EmuXBFormatIsDepthBuffer(XboxFormat)) { + (*texCoordScale)[2] = (float)GetZScaleForPixelContainer(pXboxBaseTexture); + } } // Pass above determined texture scaling factors to our HLSL shader. // Note : CxbxVertexShaderTemplate.hlsl applies texture scaling on @@ -7612,6 +7647,14 @@ void CxbxUpdateHostVertexShaderConstants() // Need for Speed: Hot Pursuit 2 (car select) CxbxUpdateHostViewPortOffsetAndScaleConstants(); } + + // Placed this here until we find a better place + const uint32_t fogTableMode = XboxRenderStates.GetXboxRenderState(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGTABLEMODE); + const float fogDensity = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGDENSITY); + const float fogStart = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGSTART); + const float fogEnd = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGEND); + float fogStuff[4] = { (float)fogTableMode, fogDensity, fogStart, fogEnd }; + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_FOGINFO, fogStuff, 1); } void CxbxUpdateHostViewport() { @@ -7628,16 +7671,16 @@ void CxbxUpdateHostViewport() { LOG_TEST_CASE("Could not get rendertarget dimensions while setting the viewport"); } - aaScaleX *= g_RenderUpscaleFactor; - aaScaleY *= g_RenderUpscaleFactor; + float Xscale = aaScaleX * g_RenderUpscaleFactor; + float Yscale = aaScaleY * g_RenderUpscaleFactor; if (g_Xbox_VertexShaderMode == VertexShaderMode::FixedFunction) { // Set viewport D3DVIEWPORT hostViewport = g_Xbox_Viewport; - hostViewport.X *= aaScaleX; - hostViewport.Y *= aaScaleY; - hostViewport.Width *= aaScaleX; - hostViewport.Height *= aaScaleY; + hostViewport.X *= Xscale; + hostViewport.Y *= Yscale; + hostViewport.Width *= Xscale; + hostViewport.Height *= Yscale; g_pD3DDevice->SetViewport(&hostViewport); // Reset scissor rect @@ -7669,10 +7712,10 @@ void CxbxUpdateHostViewport() { // Scissor to viewport g_pD3DDevice->SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE); RECT viewportRect; - viewportRect.left = g_Xbox_Viewport.X * aaScaleX; - viewportRect.top = g_Xbox_Viewport.Y * aaScaleY; - viewportRect.right = viewportRect.left + g_Xbox_Viewport.Width * aaScaleX; - viewportRect.bottom = viewportRect.top + g_Xbox_Viewport.Height * aaScaleY; + viewportRect.left = g_Xbox_Viewport.X * Xscale; + viewportRect.top = g_Xbox_Viewport.Y * Yscale; + viewportRect.right = viewportRect.left + (g_Xbox_Viewport.Width * Xscale); + viewportRect.bottom = viewportRect.top + (g_Xbox_Viewport.Height * Yscale); g_pD3DDevice->SetScissorRect(&viewportRect); } } @@ -7848,9 +7891,13 @@ xbox::void_xt CxbxImpl_SetPixelShader(xbox::dword_xt Handle) // Cache the active shader handle g_pXbox_PixelShader = (xbox::X_PixelShader*)Handle; - // Copy the Pixel Shader data to our RenderState handler + // Copy the Pixel Shader data to our RenderState handler (this includes values for pixel shader constants) // This mirrors the fact that unpatched SetPixelShader does the same thing! // This shouldn't be necessary anymore, but shaders still break if we don't do this + // This breakage might be caused by our push-buffer processing could be "trailing behind" what our patches do; + // By writing to render state during this patch, we avoid missing out on updates that push buffer commands would perform. + // However, any updates that occur mid-way can overwrite what we store here, and still cause problems! + // The only viable solution for that would be to draw entirely based on push-buffer handling (which might require removing possibly all D3D patches!) if (g_pXbox_PixelShader != nullptr) { // TODO : If D3DDevice_SetPixelShader() in XDKs don't overwrite the X_D3DRS_PS_RESERVED slot with PSDef.PSTextureModes, // store it here and restore after memcpy, or alternatively, perform two separate memcpy's (the halves before, and after the reserved slot). @@ -8397,7 +8444,7 @@ static void CxbxImpl_SetRenderTarget // The currenct depth stencil is always replaced by whats passed in here (even a null) g_pXbox_DepthStencil = pNewZStencil; - g_ZScale = GetZScaleForSurface(g_pXbox_DepthStencil); // TODO : Discern between Xbox and host and do this in UpdateDepthStencilFlags? + g_ZScale = GetZScaleForPixelContainer(g_pXbox_DepthStencil); // TODO : Discern between Xbox and host and do this in UpdateDepthStencilFlags? pHostDepthStencil = GetHostSurface(g_pXbox_DepthStencil, D3DUSAGE_DEPTHSTENCIL); HRESULT hRet; diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl new file mode 100644 index 000000000..fbe40b1e0 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl @@ -0,0 +1,297 @@ +#include "FixedFunctionPixelShader.hlsli" + +uniform FixedFunctionPixelShaderState state : register(c0); +sampler samplers[4] : register(s0); + +struct PS_INPUT // Declared identical to vertex shader output (see VS_OUTPUT) +{ + float2 iPos : VPOS; // Screen space x,y pixel location + float4 iD0 : COLOR0; // Front-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iD1 : COLOR1; // Front-facing secondary (specular) vertex color (clamped to 0..1) + float iFog : FOG; + float iPts : PSIZE; + float4 iB0 : TEXCOORD4; // Back-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iB1 : TEXCOORD5; // Back-facing secondary (specular) vertex color (clamped to 0..1) + float4 iT[4] : TEXCOORD0; // Texture Coord 0 + float iFF : VFACE; // Front facing if > 0 +}; + +// These 'D3DTA' texture argument values +// may be used during each texture stage +struct TextureArgs { + float4 CURRENT; + float4 TEXTURE; + float4 DIFFUSE; + float4 SPECULAR; + float4 TEMP; + float4 TFACTOR; +}; + +static float4 TexCoords[4]; + +// When creating an instance of the fixed function shader +// we string-replace the assignment below with a value +// The define keeps the shader compilable without the replacement +#define TEXTURE_SAMPLE_TYPE {SAMPLE_2D, SAMPLE_2D, SAMPLE_2D, SAMPLE_2D}; +static int TextureSampleType[4] = TEXTURE_SAMPLE_TYPE; + +bool HasFlag(float value, float flag) { + // http://theinstructionlimit.com/encoding-boolean-flags-into-a-float-in-hlsl + return fmod(value, flag) >= flag / 2; +} + +float4 GetArg(float arg, TextureArgs ctx) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dta + bool alphaReplicate = HasFlag(arg, X_D3DTA_ALPHAREPLICATE); + bool complement = HasFlag(arg, X_D3DTA_COMPLEMENT); + arg = arg % 16; + + float4 o; + + if (arg == X_D3DTA_DIFFUSE) + o = ctx.DIFFUSE; + if (arg == X_D3DTA_CURRENT) + o = ctx.CURRENT; + if (arg == X_D3DTA_TEXTURE) + o = ctx.TEXTURE; + if (arg == X_D3DTA_TFACTOR) + o = ctx.TFACTOR; + if (arg == X_D3DTA_SPECULAR) + o = ctx.SPECULAR; + if (arg == X_D3DTA_TEMP) + o = ctx.TEMP; + + if (alphaReplicate) + return o.aaaa; + else if (complement) + return 1 - o; + else + return o; +} + +float4 ExecuteTextureOp(float op, float4 arg1, float4 arg2, float4 arg0, TextureArgs ctx, PsTextureStageState stage) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtextureop + + // Note if we use ifs here instead of else if + // D3DCompile may stackoverflow at runtime + if (op == X_D3DTOP_SELECTARG1) + return arg1; + else if (op == X_D3DTOP_SELECTARG2) + return arg2; + else if (op == X_D3DTOP_MODULATE) + return arg1 * arg2; + else if (op == X_D3DTOP_MODULATE2X) + return 2 * (arg1 * arg2); + else if (op == X_D3DTOP_MODULATE4X) + return 4 * (arg1 * arg2); + else if (op == X_D3DTOP_ADD) + return arg1 + arg2; + else if (op == X_D3DTOP_ADDSIGNED) + return arg1 + arg2 - 0.5; + else if (op == X_D3DTOP_ADDSIGNED2X) + return 2 * (arg1 + arg2 - 0.5); + else if (op == X_D3DTOP_SUBTRACT) + return arg1 - arg2; + else if (op == X_D3DTOP_ADDSMOOTH) + return arg1 + arg2 * (1 - arg1); + else if (op == X_D3DTOP_BLENDDIFFUSEALPHA) + return arg1 * ctx.DIFFUSE.a + arg2 * (1 - ctx.DIFFUSE.a); + else if (op == X_D3DTOP_BLENDCURRENTALPHA) + return arg1 * ctx.CURRENT.a + arg2 * (1 - ctx.CURRENT.a); + else if (op == X_D3DTOP_BLENDTEXTUREALPHA) + return arg1 * ctx.TEXTURE.a + arg2 * (1 - ctx.TEXTURE.a); + else if (op == X_D3DTOP_BLENDFACTORALPHA) + return arg1 * ctx.TFACTOR.a + arg2 * (1 - ctx.TFACTOR.a); + else if (op == X_D3DTOP_BLENDTEXTUREALPHAPM) + return arg1 + arg2 * (1 - ctx.TEXTURE.a); + else if (op == X_D3DTOP_PREMODULATE) + return arg1; // Note this also multiplies the next stage's CURRENT by its texture + else if (op == X_D3DTOP_MODULATEALPHA_ADDCOLOR) + return float4(arg1.rgb + arg1.a * arg2.rgb, 1); + else if (op == X_D3DTOP_MODULATECOLOR_ADDALPHA) + return float4(arg1.rgb * arg2.rgb + arg1.a, 1); + else if (op == X_D3DTOP_MODULATEINVALPHA_ADDCOLOR) + return float4((1 - arg1.a) * arg2.rgb + arg1.rgb, 1); + else if (op == X_D3DTOP_MODULATEINVCOLOR_ADDALPHA) + return float4((1 - arg1.rgb) * arg2.rgb + arg1.a, 1); + else if (op == X_D3DTOP_DOTPRODUCT3) + // Test case: PerPixelLighting + return saturate(dot( + (arg1.rgb - 0.5) * 2, + (arg2.rgb - 0.5) * 2 + )); + // Note arg0 below is arg1 in D3D docs + // since it becomes the first argument for operations supporting 3 arguments... + else if (op == X_D3DTOP_MULTIPLYADD) + return arg0 + arg1 * arg2; + else if (op == X_D3DTOP_LERP) + return arg0 * arg1 + (1 - arg0) * arg2; + else if (op == X_D3DTOP_BUMPENVMAP) + return float4( + arg1.x * stage.BUMPENVMAT00 + arg1.y * stage.BUMPENVMAT10, + arg1.x * stage.BUMPENVMAT01 + arg1.y * stage.BUMPENVMAT11, + 1, 1); + else if (op == X_D3DTOP_BUMPENVMAPLUMINANCE) + return float4( + arg1.x * stage.BUMPENVMAT00 + arg1.y * stage.BUMPENVMAT10, + arg1.x * stage.BUMPENVMAT01 + arg1.y * stage.BUMPENVMAT11, + arg1.z * stage.BUMPENVLSCALE + stage.BUMPENVLOFFSET, + 1); + + // Something is amiss... we should have returned by now! + // Return a bright colour + return float4(0, 1, 1, 1); +} + +TextureArgs ExecuteTextureStage( + int i, + TextureArgs ctx, + PsTextureHardcodedState s, + int previousOp +) +{ + // Early exit if this stage is disabled (and therefore all further stages are too) + if (s.COLOROP == X_D3DTOP_DISABLE) + return ctx; + + PsTextureStageState stage = state.stages[i]; + + // Determine the texture for this stage + float3 offset = float3(0, 0, 0); + float4 factor = float4(1, 1, 1, 1); + + // Bumpmap special case + if (previousOp == X_D3DTOP_BUMPENVMAP || + previousOp == X_D3DTOP_BUMPENVMAPLUMINANCE) { + // Assume U, V, L is in CURRENT + // Add U', V', to the texture coordinates + // And multiply by L' + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/bump-mapping-formulas + offset.xy = ctx.CURRENT.xy; + factor.rgb = ctx.CURRENT.z; + } + + // Sample the texture + float4 t; + int type = TextureSampleType[i]; + if (type == SAMPLE_NONE) + t = 1; // Test case JSRF + else if (type == SAMPLE_2D) + t = tex2D(samplers[i], TexCoords[i].xy + offset.xy); + else if (type == SAMPLE_3D) + t = tex3D(samplers[i], TexCoords[i].xyz + offset.xyz); + else if (type == SAMPLE_CUBE) + t = texCUBE(samplers[i], TexCoords[i].xyz + offset.xyz); + +#ifdef ENABLE_FF_ALPHAKILL + if (stage.ALPHAKILL) + if (t.a == 0) + discard; + +#endif + // Assign the final value for TEXTURE + ctx.TEXTURE = t * factor; + + // Premodulate special case + if (previousOp == X_D3DTOP_PREMODULATE) { + ctx.CURRENT *= ctx.TEXTURE; + } + + // Get arguments for the texture operation + // Almost all operate on 2 arguments, Arg1 and Arg2 + // Arg0 is a third argument that seems to have been tacked on + // for MULTIPLYADD and LERP + + // Colour operation arguments + float4 cArg1 = GetArg(s.COLORARG1, ctx); + float4 cArg2 = GetArg(s.COLORARG2, ctx); + float4 cArg0 = GetArg(s.COLORARG0, ctx); + + // Alpha operation arguments + float4 aArg1 = GetArg(s.ALPHAARG1, ctx); + float4 aArg2 = GetArg(s.ALPHAARG2, ctx); + float4 aArg0 = GetArg(s.ALPHAARG0, ctx); + + // Execute texture operation + // ALPHAOP == X_D3DTOP_DISABLE is undefined behaviour + // Using an intermediate value matches known cases... + // Test case: DoA:Xtreme (menu water), GTA III (logos), Crash Wrath of Cortex (relics UI) + static float4 value = 1; + value.rgb = ExecuteTextureOp(s.COLOROP, cArg1, cArg2, cArg0, ctx, stage).rgb; + if (s.ALPHAOP != X_D3DTOP_DISABLE) { + value.a = ExecuteTextureOp(s.ALPHAOP, aArg1, aArg2, aArg0, ctx, stage).a; + } + + // Save the result + // Note RESULTARG should either be CURRENT or TEMP + // But some titles seem to set it to DIFFUSE + // Use CURRENT for anything other than TEMP + // Test case: DoA 3 + if (s.RESULTARG == X_D3DTA_TEMP) + ctx.TEMP = value; + else + ctx.CURRENT = value; + + return ctx; +} + +float4 main(const PS_INPUT input) : COLOR { + + TexCoords = input.iT; + + // Each stage is passed and returns + // a set of texture arguments + // And will usually update the CURRENT value + TextureArgs ctx; + + // The CURRENT register + // Default to the diffuse value + // TODO determine whether to use the front or back colours + // and set them here + ctx.CURRENT = input.iD0; + ctx.DIFFUSE = input.iD0; + ctx.SPECULAR = input.iD1; + // The TEMP register + // Default to 0 + ctx.TEMP = float4(0, 0, 0, 0); + ctx.TFACTOR = state.TextureFactor; + + PsTextureHardcodedState stages[4]; + stages[0].COLOROP = X_D3DTOP_DISABLE; + stages[1].COLOROP = X_D3DTOP_DISABLE; + stages[2].COLOROP = X_D3DTOP_DISABLE; + stages[3].COLOROP = X_D3DTOP_DISABLE; + + // Define stages + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + // We'll find comment below and insert the definitions after it + // STAGE DEFINITIONS + // END STAGE DEFINITIONS + + // Run each stage + int previousOp = -1; + for (int i = 0; i < 4; i++) { + + ctx = ExecuteTextureStage( + i, + ctx, + stages[i], + previousOp + ); + + previousOp = stages[i].COLOROP; + } + + // Add fog if enabled + if (state.FogEnable) { + ctx.CURRENT.rgb = lerp(state.FogColor.rgb, ctx.CURRENT.rgb, saturate(input.iFog)); + } + + // Add specular if enabled + if (state.SpecularEnable) { + ctx.CURRENT.rgb += ctx.SPECULAR.rgb; + } + + // Output whatever is in current at the end + return ctx.CURRENT; +} diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli new file mode 100644 index 000000000..f2458947c --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli @@ -0,0 +1,143 @@ +// C++ / HLSL shared state block for fixed function support +#ifdef __cplusplus +#pragma once + +#include +#include // for D3DFORMAT, D3DLIGHT9, etc +#include // for D3DXVECTOR4, etc +#include + +#define float4x4 D3DMATRIX +#define float4 D3DXVECTOR4 +#define float3 D3DVECTOR +#define float2 D3DXVECTOR2 +#define arr(name, type, length) std::array name + +#else +// HLSL +#define arr(name, type, length) type name[length] +#define alignas(x) +#define const static +#endif // __cplusplus + +#ifdef __cplusplus +namespace FixedFunctionPixelShader { +#endif + // From X_D3DTOP + const float X_D3DTOP_DISABLE = 1; + const float X_D3DTOP_SELECTARG1 = 2; + const float X_D3DTOP_SELECTARG2 = 3; + const float X_D3DTOP_MODULATE = 4; + const float X_D3DTOP_MODULATE2X = 5; + const float X_D3DTOP_MODULATE4X = 6; + const float X_D3DTOP_ADD = 7; + const float X_D3DTOP_ADDSIGNED = 8; + const float X_D3DTOP_ADDSIGNED2X = 9; + const float X_D3DTOP_SUBTRACT = 10; + const float X_D3DTOP_ADDSMOOTH = 11; + const float X_D3DTOP_BLENDDIFFUSEALPHA = 12; + const float X_D3DTOP_BLENDCURRENTALPHA = 13; + const float X_D3DTOP_BLENDTEXTUREALPHA = 14; + const float X_D3DTOP_BLENDFACTORALPHA = 15; + const float X_D3DTOP_BLENDTEXTUREALPHAPM = 16; + const float X_D3DTOP_PREMODULATE = 17; + const float X_D3DTOP_MODULATEALPHA_ADDCOLOR = 18; + const float X_D3DTOP_MODULATECOLOR_ADDALPHA = 19; + const float X_D3DTOP_MODULATEINVALPHA_ADDCOLOR = 20; + const float X_D3DTOP_MODULATEINVCOLOR_ADDALPHA = 21; + const float X_D3DTOP_DOTPRODUCT3 = 22; + const float X_D3DTOP_MULTIPLYADD = 23; + const float X_D3DTOP_LERP = 24; + const float X_D3DTOP_BUMPENVMAP = 25; + const float X_D3DTOP_BUMPENVMAPLUMINANCE = 26; + + // D3DTA taken from D3D9 - we don't have Xbox definitions + // for these so I guess they're the same? + const float X_D3DTA_DIFFUSE = 0x00000000; // select diffuse color (read only) + const float X_D3DTA_CURRENT = 0x00000001; // select stage destination register (read/write) + const float X_D3DTA_TEXTURE = 0x00000002; // select texture color (read only) + const float X_D3DTA_TFACTOR = 0x00000003; // select D3DRS_TEXTUREFACTOR (read only) + const float X_D3DTA_SPECULAR = 0x00000004; // select specular color (read only) + const float X_D3DTA_TEMP = 0x00000005; // select temporary register color (read/write) + const float X_D3DTA_CONSTANT = 0x00000006; // select texture stage constant + const float X_D3DTA_COMPLEMENT = 0x00000010; // take 1.0 - x (read modifier) + const float X_D3DTA_ALPHAREPLICATE = 0x00000020; // replicate alpha to color components (read modifier) + + const int SAMPLE_NONE = 0; + const int SAMPLE_2D = 1; + const int SAMPLE_3D = 2; + const int SAMPLE_CUBE = 3; + + // This state is passed to the shader + struct PsTextureStageState { + // Values correspond to XD3D8 version of D3DTEXTURESTAGESTATETYPE + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + + /* Samplers for now are configured elsewhere already + constexpr DWORD X_D3DTSS_ADDRESSU = 0; + constexpr DWORD X_D3DTSS_ADDRESSV = 1; + constexpr DWORD X_D3DTSS_ADDRESSW = 2; + constexpr DWORD X_D3DTSS_MAGFILTER = 3; + constexpr DWORD X_D3DTSS_MINFILTER = 4; + constexpr DWORD X_D3DTSS_MIPFILTER = 5; + constexpr DWORD X_D3DTSS_MIPMAPLODBIAS = 6; + constexpr DWORD X_D3DTSS_MAXMIPLEVEL = 7; + constexpr DWORD X_D3DTSS_MAXANISOTROPY = 8; + */ + + alignas(16) float COLORKEYOP; // Unimplemented Xbox extension! + alignas(16) float COLORSIGN; // Unimplemented Xbox extension! +#ifdef ENABLE_FF_ALPHAKILL + alignas(16) float ALPHAKILL; // Xbox extension! +#else + alignas(16) float ALPHAKILL; // Unimplemented Xbox extension! +#endif + // TEXTURETRANSFORMFLAGS handled by the VS + alignas(16) float BUMPENVMAT00; + alignas(16) float BUMPENVMAT01; + alignas(16) float BUMPENVMAT11; + alignas(16) float BUMPENVMAT10; + alignas(16) float BUMPENVLSCALE; + alignas(16) float BUMPENVLOFFSET; + // TEXCOORDINDEX handled by the VS + // BORDERCOLOR set on sampler + alignas(16) float COLORKEYCOLOR; // Unimplemented Xbox extension! + }; + + // This state is compiled into the shader + // Values correspond to XD3D8 version of D3DTEXTURESTAGESTATETYPE + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + struct PsTextureHardcodedState { + alignas(16) float COLOROP; + alignas(16) float COLORARG0; + alignas(16) float COLORARG1; + alignas(16) float COLORARG2; + alignas(16) float ALPHAOP; + alignas(16) float ALPHAARG0; + alignas(16) float ALPHAARG1; + alignas(16) float ALPHAARG2; + alignas(16) float RESULTARG; + }; + + struct FixedFunctionPixelShaderState { + alignas(16) arr(stages, PsTextureStageState, 4); + alignas(16) float4 TextureFactor; + alignas(16) float SpecularEnable; + alignas(16) float FogEnable; + alignas(16) float3 FogColor; + }; +#ifdef __cplusplus +} // FixedFunctionPixelShader namespace +#endif + +#ifdef __cplusplus +#undef float4x4 +#undef float4 +#undef float3 +#undef float2 +#undef arr +#else // HLSL +#undef arr +#undef alignas +#undef const +#endif // __cplusplus diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl index cdab93f0f..5ae1bdb88 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl @@ -19,10 +19,13 @@ struct VS_INPUT #else float4 pos : POSITION; float4 bw : BLENDWEIGHT; - float4 color[2] : COLOR; + float4 normal : NORMAL; + float4 color[2] : COLOR; + float1 fogCoord : FOG; + float1 pointSize : PSIZE; float4 backColor[2] : TEXCOORD4; - float4 normal : NORMAL; float4 texcoord[4] : TEXCOORD; + float4 reserved[3] : TEXCOORD6; #endif }; @@ -59,12 +62,17 @@ float4 Get(const VS_INPUT xIn, const uint index) if(index == normal) return xIn.normal; if(index == diffuse) return xIn.color[0]; if(index == specular) return xIn.color[1]; + if(index == fogCoord) return xIn.fogCoord; + if(index == pointSize) return xIn.pointSize; if(index == backDiffuse) return xIn.backColor[0]; if(index == backSpecular) return xIn.backColor[1]; if(index == texcoord0) return xIn.texcoord[0]; if(index == texcoord1) return xIn.texcoord[1]; if(index == texcoord2) return xIn.texcoord[2]; if(index == texcoord3) return xIn.texcoord[3]; + if(index == reserved0) return xIn.reserved[0]; + if(index == reserved1) return xIn.reserved[1]; + if(index == reserved2) return xIn.reserved[2]; return 1; #endif } @@ -265,11 +273,11 @@ Material DoMaterial(const uint index, const uint diffuseReg, const uint specular float DoFog(const VS_INPUT xIn) { - // TODO implement properly - // Until we have pixel shader HLSL we are still leaning on D3D renderstates for fogging - // So we are not doing any fog density calculations here + if (!state.Fog.Enable) + return 1; // No fog! // http://developer.download.nvidia.com/assets/gamedev/docs/Fog2.pdf + // Obtain the fog depth value 'd' float fogDepth; if (state.Fog.DepthMode == FixedFunctionVertexShader::FOG_DEPTH_NONE) @@ -281,7 +289,19 @@ float DoFog(const VS_INPUT xIn) if (state.Fog.DepthMode == FixedFunctionVertexShader::FOG_DEPTH_W) fogDepth = Projection.Position.w; - return fogDepth; + // Calculate the fog factor + // Some of this might be better done in the pixel shader? + float fogFactor; + if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_NONE) + fogFactor = fogDepth; + if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_EXP) + fogFactor = 1 / exp(fogDepth * state.Fog.Density); // 1 / e^(d * density) + if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_EXP2) + fogFactor = 1 / exp(pow(fogDepth * state.Fog.Density, 2)); // 1 / e^((d * density)^2) + if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_LINEAR) + fogFactor = (state.Fog.End - fogDepth) / (state.Fog.End - state.Fog.Start); // (end - d) / (end - start) + + return fogFactor; } float4 DoTexCoord(const uint stage, const VS_INPUT xIn) @@ -389,12 +409,13 @@ float4 DoTexCoord(const uint stage, const VS_INPUT xIn) float DoPointSpriteSize() { const PointSprite ps = state.PointSprite; - float pointSize = ps.PointSize; - float A = ps.ScaleABC.x; - float B = ps.ScaleABC.y; - float C = ps.ScaleABC.z; - // Note : if (ps.PointScaleEnable) not required because when disabled, CPU sets RenderTargetHeight and ScaleA to 1, and ScaleB and ScaleC to 0 + float pointSize = ps.PointSize; + const float A = ps.PointScaleABC.x; + const float B = ps.PointScaleABC.y; + const float C = ps.PointScaleABC.z; + + // Note : if (ps.PointScaleEnable) not required because when disabled, CPU sets RenderTargetHeight and PointScale _A to 1, and _B and _C to 0 { const float eyeDistance = length(View.Position); const float factor = A + (B * eyeDistance) + (C * (eyeDistance * eyeDistance)); @@ -402,7 +423,7 @@ float DoPointSpriteSize() pointSize *= ps.XboxRenderTargetHeight * sqrt(1 / factor); } - return clamp(pointSize, ps.PointSizeMin, ps.PointSizeMax) * ps.RenderUpscaleFactor; + return clamp(pointSize, ps.PointSize_Min, ps.PointSize_Max) * ps.RenderUpscaleFactor; } VS_INPUT InitializeInputRegisters(const VS_INPUT xInput) @@ -423,16 +444,21 @@ VS_INPUT InitializeInputRegisters(const VS_INPUT xInput) if(i == normal) xIn.normal = value; if(i == diffuse) xIn.color[0] = value; if(i == specular) xIn.color[1] = value; + if(i == fogCoord) xIn.fogCoord = value.x; // Note : Untested + if(i == pointSize) xIn.pointSize = value.x; // Note : Untested if(i == backDiffuse) xIn.backColor[0] = value; if(i == backSpecular) xIn.backColor[1] = value; if(i == texcoord0) xIn.texcoord[0] = value; if(i == texcoord1) xIn.texcoord[1] = value; if(i == texcoord2) xIn.texcoord[2] = value; if(i == texcoord3) xIn.texcoord[3] = value; + if(i == reserved0) xIn.reserved[0] = value; // Note : Untested + if(i == reserved1) xIn.reserved[1] = value; // Note : Untested + if(i == reserved2) xIn.reserved[2] = value; // Note : Untested #endif } - return xIn; + return xIn; // Note : Untested setters are required to avoid "variable 'xIn' used without having been completely initialized" here } VS_OUTPUT main(const VS_INPUT xInput) diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli index b909b97e4..5009fac06 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli @@ -29,6 +29,12 @@ namespace FixedFunctionVertexShader { const float FOG_DEPTH_W = 2; // Fog depth is based distance of the vertex from the eye position const float FOG_DEPTH_RANGE = 3; + + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/fog-formulas + const float FOG_TABLE_NONE = 0; + const float FOG_TABLE_EXP = 1; + const float FOG_TABLE_EXP2 = 2; + const float FOG_TABLE_LINEAR = 3; } // Shared HLSL structures @@ -105,11 +111,11 @@ struct Modes { struct PointSprite { alignas(16) float PointSize; - alignas(16) float PointSizeMin; - alignas(16) float PointSizeMax; + alignas(16) float PointSize_Min; + alignas(16) float PointSize_Max; // alignas(16) float PointScaleEnable; alignas(16) float XboxRenderTargetHeight; - alignas(16) float3 ScaleABC; + alignas(16) float3 PointScaleABC; alignas(16) float RenderUpscaleFactor; }; @@ -121,7 +127,12 @@ struct TextureState { }; struct Fog { + alignas(16) float Enable; alignas(16) float DepthMode; + alignas(16) float TableMode; + alignas(16) float Density; // EXP fog density + alignas(16) float Start; // LINEAR fog start + alignas(16) float End; // LINEAR fog end }; // Vertex lighting diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp new file mode 100644 index 000000000..384780a70 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -0,0 +1,408 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check it. +// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com +// ****************************************************************** +// * +// * This file is part of the Cxbx project. +// * +// * Cxbx and Cxbe are free software; you can redistribute them +// * and/or modify them under the terms of the GNU General Public +// * License as published by the Free Software Foundation; either +// * version 2 of the license, or (at your option) any later version. +// * +// * This program is distributed in the hope that it will be useful, +// * but WITHOUT ANY WARRANTY; without even the implied warranty of +// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// * GNU General Public License for more details. +// * +// * You should have recieved a copy of the GNU General Public License +// * along with this program; see the file COPYING. +// * If not, write to the Free Software Foundation, Inc., +// * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA. +// * +// * 2020 PatrickvL +// * +// * All rights reserved +// * +// ****************************************************************** + +#define LOG_PREFIX CXBXR_MODULE::PXSH + +#include // std::stringstream +#include "Shader.h" // EmuCompileShader +#include "PixelShader.h" // EmuCompilePixelShader +//#include "core\kernel\init\CxbxKrnl.h" +//#include "core\kernel\support\Emu.h" + +extern const char* g_ps_model = ps_model_3_0; + +// HLSL pixel shader generation + +static const std::string register_str[16+2] = { + "_discard", // PS_REGISTER_DISCARD = 0x00L, // w + "C0", // PS_REGISTER_C0 = 0x01L, // r + "C1", // PS_REGISTER_C1 = 0x02L, // r + "fog", // PS_REGISTER_FOG = 0x03L, // r + "v0", // PS_REGISTER_V0 = 0x04L, // r/w + "v1", // PS_REGISTER_V1 = 0x05L, // r/w + "?r6?", + "?r7?", + "t0", // PS_REGISTER_T0 = 0x08L, // r/w + "t1", // PS_REGISTER_T1 = 0x09L, // r/w + "t2", // PS_REGISTER_T2 = 0x0aL, // r/w + "t3", // PS_REGISTER_T3 = 0x0bL, // r/w + "r0", // PS_REGISTER_R0 = 0x0cL, // r/w + "r1", // PS_REGISTER_R1 = 0x0dL, // r/w + "sum", // PS_REGISTER_V1R0_SUM = 0x0eL, // r + "prod", // PS_REGISTER_EF_PROD = 0x0fL, // r + + // Cxbx extension; Separate final combiner constant registers : + "FC0", // PS_REGISTER_FC0 = 0x10L, // r + "FC1", // PS_REGISTER_FC1 = 0x11L, // r +}; + +static const unsigned channel_index_Alpha = 0; +static const unsigned channel_index_RGB = 1; +static const unsigned channel_index_BlueToAlpha = 2; // Note : RGB pipeline (sometimes referred to as "portion") can (besides reading .rgb) expand blue to alpha as well + +void InputRegisterHLSL(std::stringstream& hlsl, RPSInputRegister &input, unsigned channel_index, bool isLast = false, int isFinalCombiner = 0) +{ + static const std::string pipeline_channel_str[3][2] = { + ".b", ".a", // [0][*] dest Alpha : [0] = PS_CHANNEL_BLUE, [1] = PS_CHANNEL_ALPHA >> 4 + ".rgb", ".aaa", // [1][*] dest RGB : [0] = PS_CHANNEL_RGB, [1] = PS_CHANNEL_ALPHA >> 4 + ".rgbb", ".aaaa", // [2][*] dest RGB+BlueToAlpha : [0] = PS_CHANNEL_RGB, [1] = PS_CHANNEL_ALPHA >> 4 (test-case : TechCertGame) TODO : Verify .aaaa is indeed unreachable (BlueToAlpha being forbidden for Alpha channel + }; + + static const std::string input_mapping_str[8][3] = { + // [*][0] = PS_REGISTER_ZERO-derived constants, based on enum PS_INPUTMAPPING : + // [*][1] = Source register modifier macro's, based on enum PS_INPUTMAPPING : + // [*][2] = Final combiner source register modifier macro's, based on enum PS_INPUTMAPPING : + "zero", "s_sat", "s_sat", // saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, OK for final combiner // Clamps negative x to 0 + "one", "s_comp", "s_comp", // ( 1.0 - saturate(x) ) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, OK for final combiner // Complements x (1-x) + "-one", "s_bx2", "N/A", // ( 2.0 * max(0.0, x) - 1.0) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, invalid for final combiner // Shifts range from [0..1] to [-1..1] + "one", "s_negbx2", "N/A", // (-2.0 * max(0.0, x) + 1.0) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates + "-half", "s_bias", "N/A", // (max(0.0, x) - 0.5) // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5 + "half", "s_negbias", "N/A", // (-max(0.0, x) + 0.5) // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates + "zero", "s_ident", "N/A", // x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, invalid for final combiner // No modifier, x is passed without alteration + "zero", "s_neg", "N/A" // -x // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, invalid for final combiner // Negate + }; + + // Generate channel selector + std::string channel_str = pipeline_channel_str[channel_index][input.Channel >> 4]; + + unsigned input_mapping_index = (input.InputMapping >> 5) & 0x07; // Converts PS_INPUTMAPPING to an index into input_mapping_str + if (input.Reg == PS_REGISTER_ZERO) { // = PS_REGISTER_DISCARD + // Generate a constant per input mapping (instead of applying that on register_str[PS_REGISTER_DISCARD]) + hlsl << input_mapping_str[input_mapping_index][0] << channel_str; + } + else { + // Or an actual register (with an input mapping function applied) + switch (input_mapping_index) { + case PS_INPUTMAPPING_SIGNED_IDENTITY >> 5: + // Note : signed identity doesn't alter the argument, so avoid cluttering the output by leaving it out + hlsl << register_str[input.Reg] << channel_str; + break; + case PS_INPUTMAPPING_SIGNED_NEGATE >> 5: + // Note : signed negate can be written in short-hand using a minus sign + hlsl << '-' << register_str[input.Reg] << channel_str; + break; + default: + hlsl << input_mapping_str[input_mapping_index][1 + isFinalCombiner] << '(' << register_str[input.Reg] << channel_str << ')'; + break; + } + } + + if (!isLast) + hlsl << ','; +} + +static const std::string opcode_comment[6][2] = { + "xdd", "d0=s0 dot s1, d1=s2 dot s3", // dot/dot/discard > calculating AB=A.B and CD=C.D + "xdm", "d0=s0 dot s1, d1=s2*s3", // dot/mul/discard > calculating AB=A.B and CD=C*D + "xmd", "d0=s0*s1, d1=s2 dot s3", // mul/dot/discard > calculating AB=A*B and CD=C.D + "xmma", "d0=s0*s1, d1=s2*s3, d2={s2*s3}+{s0*s1}", // mul/mul/sum > calculating AB=A*B and CD=C*D and Sum=CD+AB + "xmmc", "d0=s0*s1, d1=s2*s3, d2={r0.a>0.5}?{s2*s3}:{s0*s1}", // mul/mul/mux > calculating AB=A*B and CD=C*D and Mux=CD?AB + "xfc", "sum=r0+v1, prod=s4*s5, r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.b" +}; + +void CombinerStageHlsl(std::stringstream& hlsl, RPSCombinerStageChannel& stage, unsigned channel_index) +{ + // Determine which opcode we're dealing with (xdd, xdm, xmma or xmma) + // Note : DotProduct can only be set for RGB (see RPSCombinerStageChannel::Decode) + unsigned opcode; + if (stage.OutputAB.DotProduct) { + if (stage.OutputCD.DotProduct) + opcode = 0; // xdd + else + opcode = 1; // xdm + } else { + if (stage.OutputCD.DotProduct) + opcode = 2; // xmd TODO : Verify + else + if (!stage.AB_CD_MUX) + opcode = 3; // xmma + else + opcode = 4; // xmmc + } + + // Early exit when all outputs are discarded + if ((stage.OutputAB.Reg == PS_REGISTER_DISCARD) && (stage.OutputCD.Reg == PS_REGISTER_DISCARD)) { + // xdd, xdm and xmd have just 2 outputs, but xmma and xmmc must also check their 3rd output + if ((opcode <= 2) || (stage.OutputMUX_SUM.Reg == PS_REGISTER_DISCARD)) { + hlsl << "// discarded"; + return; + } + } + + // Determine output channels (only channel_index_RGB can increase to channel_index_BlueToAlpha) : + static const std::string dst_channels[3] = { ".a", ".rgb", ".rgba" }; + unsigned AB_channel_index = channel_index + stage.OutputAB.BlueToAlpha; + unsigned CD_channel_index = channel_index + stage.OutputCD.BlueToAlpha; + + // Generate 2 (or 3 for xmma/xmmc) output arguments + // Note : BlueToAlpha can only be set for RGB (see RPSCombinerStageChannel::Decode) + std::stringstream arguments; + arguments << register_str[stage.OutputAB.Reg] << dst_channels[AB_channel_index]; + arguments << ',' << register_str[stage.OutputCD.Reg] << dst_channels[CD_channel_index]; + // xmma and xmmc have a 3rd output (which doesn't support the BlueToAlpha flag) + if (opcode >= 3) { + // TODO : Figure out how to support BlueToAlpha source to MUX_SUM.rgb scenario + // If the xmma_m and xmmc_m macro's can't handle this, we may need to drop + // those macro's, and generate the HLSL here (alas, as we try to avoid that). + arguments << ',' << register_str[stage.OutputMUX_SUM.Reg] << dst_channels[channel_index]; + } + // Insert a visual separation between the output arguments, and the 4 input arguments + arguments << ", "; + // Generate 4 input arguments + InputRegisterHLSL(arguments, stage.OutputAB.Input[0], AB_channel_index); + InputRegisterHLSL(arguments, stage.OutputAB.Input[1], AB_channel_index); + InputRegisterHLSL(arguments, stage.OutputCD.Input[0], CD_channel_index); + InputRegisterHLSL(arguments, stage.OutputCD.Input[1], CD_channel_index); + + + // Generate combiner output modifier + static const std::string output_modifier_str[8] = { + "d_ident", // y = x // PS_COMBINEROUTPUT_OUTPUTMAPPING_IDENTITY= 0x00L + "d_bias", // y = (x - 0.5) // PS_COMBINEROUTPUT_OUTPUTMAPPING_BIAS= 0x08L // Subtracts 0.5 from outputs + "d_x2", // y = x * 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1= 0x10L // Scales outputs by 2 + "d_bx2", // y = (x - 0.5) * 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1_BIAS= 0x18L // Subtracts 0.5 from outputs and scales by 2 + "d_x4", // y = x * 4 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2= 0x20L // Scales outputs by 4 + "d_bx4", // y = (x - 0.5) * 4 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS= 0x28L // Subtracts 0.5 from outputs and scales by 4 + "d_d2", // y = x / 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1= 0x30L // Divides outputs by 2 + "d_bd2" // y = (x - 0.5) / 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS= 0x38L // Subtracts 0.5 from outputs and divides by 2 + }; + + std::string output_modifier = output_modifier_str[stage.CombinerOutputMapping >> 3]; + + // Concatenate it all together into an opcode 'call' (which resolves into macro expressions) + hlsl << opcode_comment[opcode][0] << '(' << arguments.str() << ' ' << output_modifier; + + // xmma and xmmc require a temporary register with channel designation + if (opcode >= 3) + hlsl << ",tmp" << dst_channels[AB_channel_index]; // TODO : + + hlsl << "); // " << opcode_comment[opcode][1]; +} + +void FinalCombinerStageHlsl(std::stringstream& hlsl, RPSFinalCombiner& fc, bool hasFinalCombiner) +{ + std::stringstream arguments; + + if (hasFinalCombiner) { + hlsl << "\n // Final combiner xfc was defined in the shader"; + } + else { + hlsl << "\n // Final combiner xfc generated from XD3D8 renderstates"; + } + + for (unsigned i = 0; i < 7; i++) { // Generate A, B, C, D, E, F, G input arguments + // Note : Most final combiner inputs are treated as RGB, but G is single-channel (.a or .b) + bool isLast = (i == 6); + unsigned channel_index = isLast ? channel_index_Alpha : channel_index_RGB; + InputRegisterHLSL(arguments, fc.Input[i], channel_index, isLast, /*isFinalCombiner=*/1); + } + + // Concatenate it all together into the xfc opcode 'call' (which resolves into macro expressions) + // Note : The xfc opcode macro does not have an output modifier argument + hlsl << "\n " << opcode_comment[5][0] << "(" << arguments.str() << "); // " << opcode_comment[5][1]; +} + +void OutputDefineFlag(std::stringstream& hlsl, bool enabled, std::string_view define_enabled, std::string_view define_disabled = "") +{ + if (define_disabled.length() > 0) { + if (enabled) { + hlsl << "\n#define " << define_enabled << " // not " << define_disabled; + } + else { + hlsl << "\n#define " << define_disabled << " // not " << define_enabled; + } + } + else { + if (enabled) { + hlsl << "\n#define " << define_enabled; + } + else { + hlsl << "\n#undef " << define_enabled; + } + } +} + +/* Disabled, until BumpDemo is fixed (which with this code, inadvertedly skips stage 1 and 2 dotproducts) : +bool IsTextureSampled(DecodedRegisterCombiner* pShader, int reg) +{ + // TODO : Instead searching like this afterwards, simply set a boolean for each texture-read detected during decoding + // TODO : Extend detection if textures can also be used indirectly thru PSInputTexture (without mention in actual combiner stages) + for (unsigned i = 0; i < pShader->NumberOfCombiners; i++) { + // Is an output calculated, and does any of the inputs read from the given (texture) register? + if (pShader->Combiners[i].RGB.OutputAB.Reg != PS_REGISTER_DISCARD) { + if (pShader->Combiners[i].RGB.OutputAB.Input[0].Reg == reg) return true; + if (pShader->Combiners[i].RGB.OutputAB.Input[1].Reg == reg) return true; + } + if (pShader->Combiners[i].RGB.OutputCD.Reg != PS_REGISTER_DISCARD) { + if (pShader->Combiners[i].RGB.OutputCD.Input[0].Reg == reg) return true; + if (pShader->Combiners[i].RGB.OutputCD.Input[1].Reg == reg) return true; + } + if (pShader->Combiners[i].Alpha.OutputAB.Reg != PS_REGISTER_DISCARD) { + if (pShader->Combiners[i].Alpha.OutputAB.Input[0].Reg == reg) return true; + if (pShader->Combiners[i].Alpha.OutputAB.Input[1].Reg == reg) return true; + } + if (pShader->Combiners[i].Alpha.OutputCD.Reg != PS_REGISTER_DISCARD) { + if (pShader->Combiners[i].Alpha.OutputCD.Input[0].Reg == reg) return true; + if (pShader->Combiners[i].Alpha.OutputCD.Input[1].Reg == reg) return true; + } + // Is the given register writen to? Then no sampling took place + if (pShader->Combiners[i].RGB.OutputAB.Reg == reg) return false; + if (pShader->Combiners[i].RGB.OutputCD.Reg == reg) return false; + if (pShader->Combiners[i].RGB.OutputMUX_SUM.Reg == reg) return false; + if (pShader->Combiners[i].Alpha.OutputAB.Reg == reg) return false; + if (pShader->Combiners[i].Alpha.OutputCD.Reg == reg) return false; + if (pShader->Combiners[i].Alpha.OutputMUX_SUM.Reg == reg) return false; + } + + if (pShader->hasFinalCombiner) { + for (unsigned i = 0; i < 7; i++) { + if (pShader->FinalCombiner.Input[i].Reg == reg) return true; + } + } + + return false; +} */ + +void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) +{ + // Include HLSL header and footer as raw strings : + static const std::string hlsl_template[4] = { + #include "core\hle\D3D8\Direct3D9\CxbxPixelShaderTemplate.hlsl" + }; + + hlsl << hlsl_template[0]; // Start with the HLSL template header + + hlsl << "\n#define ALPHAKILL {" + << (pShader->AlphaKill[0] ? "true, " : "false, ") + << (pShader->AlphaKill[1] ? "true, " : "false, ") + << (pShader->AlphaKill[2] ? "true, " : "false, ") + << (pShader->AlphaKill[3] ? "true}" : "false}"); + + hlsl << "\n#define PS_COMBINERCOUNT " << pShader->NumberOfCombiners; + if (pShader->NumberOfCombiners > 0) { + OutputDefineFlag(hlsl, pShader->CombinerHasUniqueC0, "PS_COMBINERCOUNT_UNIQUE_C0", "PS_COMBINERCOUNT_SAME_C0"); + OutputDefineFlag(hlsl, pShader->CombinerHasUniqueC1, "PS_COMBINERCOUNT_UNIQUE_C1", "PS_COMBINERCOUNT_SAME_C1"); + OutputDefineFlag(hlsl, pShader->CombinerMuxesOnMsb, "PS_COMBINERCOUNT_MUX_MSB", "PS_COMBINERCOUNT_MUX_LSB"); + } + + for (unsigned i = 0; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) { + hlsl << "\n#define PS_COMPAREMODE_" << i << "(in)" + << (pShader->PSCompareMode[i][0] ? " CM_GE(in.x)" : " CM_LT(in.x)") // PS_COMPAREMODE_S_[GE|LT] + << (pShader->PSCompareMode[i][1] ? " CM_GE(in.y)" : " CM_LT(in.y)") // PS_COMPAREMODE_T_[GE|LT] + << (pShader->PSCompareMode[i][2] ? " CM_GE(in.z)" : " CM_LT(in.z)") // PS_COMPAREMODE_R_[GE|LT] + << (pShader->PSCompareMode[i][3] ? " CM_GE(in.w)" : " CM_LT(in.w)");// PS_COMPAREMODE_Q_[GE|LT] + } + + hlsl << "\nstatic const int PS_INPUTTEXTURE_[4] = { -1, " + << pShader->PSInputTexture[1] << ", " + << pShader->PSInputTexture[2] << ", " + << pShader->PSInputTexture[3] << " };"; + + // Generate #defines required by CxbxPixelShaderTemplate.hlsl : + for (unsigned i = 1; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) { + static const std::string dotmapping_str[8] = { + "PS_DOTMAPPING_ZERO_TO_ONE", // = 0x00L, // - * * * + "PS_DOTMAPPING_MINUS1_TO_1_D3D", // = 0x01L, // - * * * + "PS_DOTMAPPING_MINUS1_TO_1_GL", // = 0x02L, // - * * * + "PS_DOTMAPPING_MINUS1_TO_1", // = 0x03L, // - * * * + "PS_DOTMAPPING_HILO_1", // = 0x04L, // - * * * + "PS_DOTMAPPING_HILO_HEMISPHERE_D3D", // = 0x05L, // - * * * + "PS_DOTMAPPING_HILO_HEMISPHERE_GL", // = 0x06L, // - * * * + "PS_DOTMAPPING_HILO_HEMISPHERE" // = 0x07L, // - * * * + }; + + hlsl << "\n#define PS_DOTMAPPING_" << i << " " << dotmapping_str[(unsigned)pShader->PSDotMapping[i]]; + } + + OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementV1, "PS_FINALCOMBINERSETTING_COMPLEMENT_V1"); + OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementR0, "PS_FINALCOMBINERSETTING_COMPLEMENT_R0"); + OutputDefineFlag(hlsl, pShader->FinalCombiner.ClampSum, "PS_FINALCOMBINERSETTING_CLAMP_SUM"); + + hlsl << hlsl_template[1]; + hlsl << hlsl_template[2]; + + // Generate all four texture stages + for (unsigned i = 0; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) { + static const std::string texturemode_str[19] = { + "PS_TEXTUREMODES_NONE", // = 0x00L, // * * * * + "PS_TEXTUREMODES_PROJECT2D", // = 0x01L, // * * * * + "PS_TEXTUREMODES_PROJECT3D", // = 0x02L, // * * * * + "PS_TEXTUREMODES_CUBEMAP", // = 0x03L, // * * * * + "PS_TEXTUREMODES_PASSTHRU", // = 0x04L, // * * * * + "PS_TEXTUREMODES_CLIPPLANE", // = 0x05L, // * * * * + "PS_TEXTUREMODES_BUMPENVMAP", // = 0x06L, // - * * * + "PS_TEXTUREMODES_BUMPENVMAP_LUM", // = 0x07L, // - * * * + "PS_TEXTUREMODES_BRDF", // = 0x08L, // - - * * + "PS_TEXTUREMODES_DOT_ST", // = 0x09L, // - - * * + "PS_TEXTUREMODES_DOT_ZW", // = 0x0aL, // - - * * + "PS_TEXTUREMODES_DOT_RFLCT_DIFF", // = 0x0bL, // - - * - + "PS_TEXTUREMODES_DOT_RFLCT_SPEC", // = 0x0cL, // - - - * + "PS_TEXTUREMODES_DOT_STR_3D", // = 0x0dL, // - - - * + "PS_TEXTUREMODES_DOT_STR_CUBE", // = 0x0eL, // - - - * + "PS_TEXTUREMODES_DPNDNT_AR", // = 0x0fL, // - * * * + "PS_TEXTUREMODES_DPNDNT_GB", // = 0x10L, // - * * * + "PS_TEXTUREMODES_DOTPRODUCT", // = 0x11L, // - * * - + "PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST", // = 0x12L, // - - - * + }; + + /* Disabled, until BumpDemo is fixed (which with this code, inadvertedly skips stage 1 and 2 dotproducts) : + // Skip stages never read, to avoid compilation overhead + if (!IsTextureSampled(pShader, PS_REGISTER_T0 + i)) + continue; */ + + hlsl << "\n " << texturemode_str[pShader->PSTextureModes[i]] << "(" << i << ");"; + // On Xbox, r0.a is initialized to t0.a (and r1.a to t1.a ?) : + if (i == 0) hlsl << " r0.a = t0.a;"; + if (i == 1) hlsl << " r1.a = t1.a;"; + } + + // Generate all combiners (rgb and alpha) + for (unsigned i = 0; i < pShader->NumberOfCombiners; i++) { + hlsl << "\n stage = " << i << "; "; + CombinerStageHlsl(hlsl, pShader->Combiners[i].RGB, channel_index_RGB); + hlsl << "\n /* + */ "; + CombinerStageHlsl(hlsl, pShader->Combiners[i].Alpha, channel_index_Alpha); + } + + FinalCombinerStageHlsl(hlsl, pShader->FinalCombiner, pShader->hasFinalCombiner); + + hlsl << hlsl_template[3]; // Finish with the HLSL template footer +} + +// recompile xbox pixel shader function +extern HRESULT EmuCompilePixelShader +( + DecodedRegisterCombiner* pIntermediateShader, + ID3DBlob** ppHostShader +) +{ + auto hlsl_stream = std::stringstream(); + BuildShader(pIntermediateShader, hlsl_stream); + std::string hlsl_str = hlsl_stream.str(); + + return EmuCompileShader(hlsl_str, g_ps_model, ppHostShader); +} diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.h b/src/core/hle/D3D8/Direct3D9/PixelShader.h new file mode 100644 index 000000000..d092139a0 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.h @@ -0,0 +1,14 @@ +#pragma once + +#include "Shader.h" // ID3DBlob (via d3dcompiler.h > d3d11shader.h > d3dcommon.h) +#include "core\hle\D3D8\XbPixelShader.h" // DecodedRegisterCombiner + +static const char* ps_model_2_a = "ps_2_a"; +static const char* ps_model_3_0 = "ps_3_0"; +extern const char* g_ps_model; + +extern HRESULT EmuCompilePixelShader +( + DecodedRegisterCombiner* pIntermediateShader, + ID3DBlob** ppHostShader +); diff --git a/src/core/hle/D3D8/Direct3D9/RenderStates.cpp b/src/core/hle/D3D8/Direct3D9/RenderStates.cpp index f53eda030..e617eb30b 100644 --- a/src/core/hle/D3D8/Direct3D9/RenderStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/RenderStates.cpp @@ -186,6 +186,16 @@ uint32_t XboxRenderStateConverter::GetXboxRenderState(uint32_t State) return D3D__RenderState[XboxRenderStateOffsets[State]]; } +float XboxRenderStateConverter::GetXboxRenderStateAsFloat(uint32_t State) +{ + if (!XboxRenderStateExists(State)) { + EmuLog(LOG_LEVEL::WARNING, "Attempt to read a Renderstate (%s) that does not exist in the current D3D8 XDK Version (%d)", GetDxbxRenderStateInfo(State).S, g_LibVersion_D3D8); + return 0; + } + + return *reinterpret_cast(&(D3D__RenderState[XboxRenderStateOffsets[State]])); +} + void XboxRenderStateConverter::StoreInitialValues() { for (unsigned int RenderState = xbox::X_D3DRS_FIRST; RenderState <= xbox::X_D3DRS_LAST; RenderState++) { diff --git a/src/core/hle/D3D8/Direct3D9/RenderStates.h b/src/core/hle/D3D8/Direct3D9/RenderStates.h index 78bfab944..dbc161ddb 100644 --- a/src/core/hle/D3D8/Direct3D9/RenderStates.h +++ b/src/core/hle/D3D8/Direct3D9/RenderStates.h @@ -46,6 +46,7 @@ public: void SetWireFrameMode(int mode); void SetDirty(); uint32_t GetXboxRenderState(uint32_t State); + float GetXboxRenderStateAsFloat(uint32_t State); private: void VerifyAndFixDeferredRenderStateOffset(); void DeriveRenderStateOffsetFromDeferredRenderStateOffset(); diff --git a/src/core/hle/D3D8/Direct3D9/Shader.cpp b/src/core/hle/D3D8/Direct3D9/Shader.cpp new file mode 100644 index 000000000..382fc0aab --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/Shader.cpp @@ -0,0 +1,142 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check it. +// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com +// ****************************************************************** +// * +// * This file is part of the Cxbx project. +// * +// * Cxbx and Cxbe are free software; you can redistribute them +// * and/or modify them under the terms of the GNU General Public +// * License as published by the Free Software Foundation; either +// * version 2 of the license, or (at your option) any later version. +// * +// * This program is distributed in the hope that it will be useful, +// * but WITHOUT ANY WARRANTY; without even the implied warranty of +// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// * GNU General Public License for more details. +// * +// * You should have recieved a copy of the GNU General Public License +// * along with this program; see the file COPYING. +// * If not, write to the Free Software Foundation, Inc., +// * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA. +// * +// * 2020 PatrickvL +// * +// * All rights reserved +// * +// ****************************************************************** + +#define LOG_PREFIX CXBXR_MODULE::VTXSH // TODO : Introduce generic HLSL logging + +#include +#include "Shader.h" +#include "core\kernel\init\CxbxKrnl.h" // LOG_TEST_CASE +#include "core\kernel\support\Emu.h" // EmuLog +//#include + +std::string DebugPrependLineNumbers(std::string shaderString) { + std::stringstream shader(shaderString); + auto debugShader = std::stringstream(); + + int i = 1; + for (std::string line; std::getline(shader, line); ) { + auto lineNumber = std::to_string(i++); + auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' '); + debugShader << "/* " << paddedLineNumber << " */ " << line << "\n"; + } + + return debugShader.str(); +} + +extern HRESULT EmuCompileShader +( + std::string hlsl_str, + const char* shader_profile, + ID3DBlob** ppHostShader, + const char* pSourceName +) +{ + ID3DBlob* pErrors = nullptr; + ID3DBlob* pErrorsCompatibility = nullptr; + HRESULT hRet = 0; + + EmuLog(LOG_LEVEL::DEBUG, "--- HLSL conversion ---"); + EmuLog(LOG_LEVEL::DEBUG, DebugPrependLineNumbers(hlsl_str).c_str()); + EmuLog(LOG_LEVEL::DEBUG, "-----------------------"); + + + UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL3; + + hRet = D3DCompile( + hlsl_str.c_str(), + hlsl_str.length(), + pSourceName, + nullptr, // pDefines + D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + shader_profile, + flags1, // flags1 + 0, // flags2 + ppHostShader, // out + &pErrors // ppErrorMsgs out + ); + if (FAILED(hRet)) { + EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Recompiling in compatibility mode"); + // Attempt to retry in compatibility mode, this allows some vertex-state shaders to compile + // Test Case: Spy vs Spy + flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_AVOID_FLOW_CONTROL; + hRet = D3DCompile( + hlsl_str.c_str(), + hlsl_str.length(), + pSourceName, + nullptr, // pDefines + D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + shader_profile, + flags1, // flags1 + 0, // flags2 + ppHostShader, // out + &pErrorsCompatibility // ppErrorMsgs out + ); + + if (FAILED(hRet)) { + LOG_TEST_CASE("Couldn't assemble recompiled shader"); + //EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled shader"); + } + } + + // Determine the log level + auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; + if (pErrors) { + // Log errors from the initial compilation + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); + pErrors->Release(); + pErrors = nullptr; + } + + // Failure to recompile in compatibility mode ignored for now + if (pErrorsCompatibility != nullptr) { + pErrorsCompatibility->Release(); + pErrorsCompatibility = nullptr; + } + + LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) { + if (g_bPrintfOn) { + if (!FAILED(hRet)) { + // Log disassembly + hRet = D3DDisassemble( + (*ppHostShader)->GetBufferPointer(), + (*ppHostShader)->GetBufferSize(), + D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, + NULL, + &pErrors + ); + if (pErrors) { + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); + pErrors->Release(); + } + } + } + } + + return hRet; +} diff --git a/src/core/hle/D3D8/Direct3D9/Shader.h b/src/core/hle/D3D8/Direct3D9/Shader.h new file mode 100644 index 000000000..1a89d14a5 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/Shader.h @@ -0,0 +1,12 @@ +#pragma once + +#include // std::string +#include // ID3DBlob (via d3d9.h > d3d11shader.h > d3dcommon.h) + +extern HRESULT EmuCompileShader +( + std::string hlsl_str, + const char* shader_profile, + ID3DBlob** ppHostShader, + const char* pSourceName = nullptr +); diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp index 0a0a07f64..15182a217 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp @@ -33,6 +33,7 @@ #include "EmuShared.h" #include "core/hle/Intercept.hpp" #include "RenderStates.h" +#include "core/hle/D3D8/XbVertexShader.h" // For g_UseFixedFunctionVertexShader, g_Xbox_VertexShaderMode and VertexShaderMode::FixedFunction #include "core/hle/D3D8/Direct3D9/Direct3D9.h" // For g_pD3DDevice #include @@ -123,7 +124,6 @@ void XboxTextureStateConverter::BuildTextureStateMappingTable() DWORD XboxTextureStateConverter::GetHostTextureOpValue(DWORD Value) { - bool bOldOrder = g_LibVersion_D3D8 <= 3948; // Verified old order in 3944, new order in 4039 switch (Value) { case xbox::X_D3DTOP_DISABLE: return D3DTOP_DISABLE; case xbox::X_D3DTOP_SELECTARG1: return D3DTOP_SELECTARG1; @@ -137,10 +137,10 @@ DWORD XboxTextureStateConverter::GetHostTextureOpValue(DWORD Value) case xbox::X_D3DTOP_SUBTRACT: return D3DTOP_SUBTRACT; case xbox::X_D3DTOP_ADDSMOOTH: return D3DTOP_ADDSMOOTH; case xbox::X_D3DTOP_BLENDDIFFUSEALPHA: return D3DTOP_BLENDDIFFUSEALPHA; - case 0x0D/*xbox::X_D3DTOP_BLENDCURRENTALPHA */: return bOldOrder ? D3DTOP_BLENDTEXTUREALPHA : D3DTOP_BLENDCURRENTALPHA; - case 0x0E/*xbox::X_D3DTOP_BLENDTEXTUREALPHA */: return bOldOrder ? D3DTOP_BLENDFACTORALPHA : D3DTOP_BLENDTEXTUREALPHA; - case 0x0F/*xbox::X_D3DTOP_BLENDFACTORALPHA */: return bOldOrder ? D3DTOP_BLENDTEXTUREALPHAPM : D3DTOP_BLENDFACTORALPHA; - case 0x10/*xbox::X_D3DTOP_BLENDTEXTUREALPHAPM*/: return bOldOrder ? D3DTOP_BLENDCURRENTALPHA : D3DTOP_BLENDTEXTUREALPHAPM; + case xbox::X_D3DTOP_BLENDCURRENTALPHA: return D3DTOP_BLENDCURRENTALPHA; + case xbox::X_D3DTOP_BLENDTEXTUREALPHA: return D3DTOP_BLENDTEXTUREALPHA; + case xbox::X_D3DTOP_BLENDFACTORALPHA: return D3DTOP_BLENDFACTORALPHA; + case xbox::X_D3DTOP_BLENDTEXTUREALPHAPM: return D3DTOP_BLENDTEXTUREALPHAPM; case xbox::X_D3DTOP_PREMODULATE: return D3DTOP_PREMODULATE; case xbox::X_D3DTOP_MODULATEALPHA_ADDCOLOR: return D3DTOP_MODULATEALPHA_ADDCOLOR; case xbox::X_D3DTOP_MODULATECOLOR_ADDALPHA: return D3DTOP_MODULATECOLOR_ADDALPHA; @@ -165,8 +165,10 @@ void XboxTextureStateConverter::Apply() // Track if we need to overwrite state 0 with 3 because of Point Sprites // The Xbox NV2A uses only Stage 3 for point-sprites, so we emulate this // by mapping Stage 3 to Stage 0, and disabling all stages > 0 + // TODO use stage 3 when we roll our own point sprites after moving off D3D9 bool pointSpriteOverride = false; - bool pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); + bool pointSpritesEnabled = false; + pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); if (pointSpritesEnabled) { pointSpriteOverride = true; } @@ -177,7 +179,7 @@ void XboxTextureStateConverter::Apply() for (int State = xbox::X_D3DTSS_FIRST; State <= xbox::X_D3DTSS_LAST; State++) { // Read the value of the current stage/state from the Xbox data structure - DWORD XboxValue = Get(XboxStage, State); // OR D3D__TextureState[(XboxStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[State]]; + DWORD XboxValue = Get(XboxStage, State); DWORD PcValue = XboxValue; // If the state hasn't changed, skip setting it @@ -275,9 +277,13 @@ void XboxTextureStateConverter::Apply() case xbox::X_D3DTSS_COLORARG0: case xbox::X_D3DTSS_COLORARG1: case xbox::X_D3DTSS_COLORARG2: case xbox::X_D3DTSS_ALPHAARG0: case xbox::X_D3DTSS_ALPHAARG1: case xbox::X_D3DTSS_ALPHAARG2: case xbox::X_D3DTSS_RESULTARG: case xbox::X_D3DTSS_TEXTURETRANSFORMFLAGS: + break; case xbox::X_D3DTSS_BUMPENVMAT00: case xbox::X_D3DTSS_BUMPENVMAT01: case xbox::X_D3DTSS_BUMPENVMAT11: case xbox::X_D3DTSS_BUMPENVMAT10: case xbox::X_D3DTSS_BUMPENVLSCALE: case xbox::X_D3DTSS_BUMPENVLOFFSET: +#if 0 // New, doesn't work yet + continue; // Note : Since DxbxUpdateActivePixelShader() reads these too, you'd expect here we could skip, but alas. TODO: Fix PS HLSL to not depend on host D3D TSS +#endif case xbox::X_D3DTSS_BORDERCOLOR: case xbox::X_D3DTSS_MIPMAPLODBIAS: case xbox::X_D3DTSS_MAXMIPLEVEL: case xbox::X_D3DTSS_MAXANISOTROPY: break; @@ -330,11 +336,37 @@ void XboxTextureStateConverter::Apply() } } +// Normalize values which may have different mappings per XDK version +DWORD NormalizeValue(DWORD xboxState, DWORD value) { + if (g_LibVersion_D3D8 <= 3948) { + // D3DTOP verified old order in 3948, new order in 4039 + switch (xboxState) { + case xbox::X_D3DTSS_COLOROP: + case xbox::X_D3DTSS_ALPHAOP: + switch (value) { + case 13: + return xbox::X_D3DTOP_BLENDTEXTUREALPHA; + case 14: + return xbox::X_D3DTOP_BLENDFACTORALPHA; + case 15: + return xbox::X_D3DTOP_BLENDTEXTUREALPHAPM; + case 16: + return xbox::X_D3DTOP_BLENDCURRENTALPHA; + } + } + } + + return value; +} + uint32_t XboxTextureStateConverter::Get(int textureStage, DWORD xboxState) { if (textureStage < 0 || textureStage > 3) CxbxKrnlCleanup("Requested texture stage was out of range: %d", textureStage); if (xboxState < xbox::X_D3DTSS_FIRST || xboxState > xbox::X_D3DTSS_LAST) CxbxKrnlCleanup("Requested texture state was out of range: %d", xboxState); - return D3D__TextureState[(textureStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[xboxState]]; + // Read the value of the current stage/state from the Xbox data structure + DWORD rawValue = D3D__TextureState[(textureStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[xboxState]]; + + return NormalizeValue(xboxState, rawValue); } diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.h b/src/core/hle/D3D8/Direct3D9/TextureStates.h index d9f6e3742..0859eb893 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.h +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.h @@ -45,6 +45,8 @@ private: void BuildTextureStateMappingTable(); DWORD GetHostTextureOpValue(DWORD XboxTextureOp); + // Pointer to Xbox texture states + // Note mappings may change between XDK versions uint32_t* D3D__TextureState = nullptr; std::array XboxTextureStateOffsets; XboxRenderStateConverter* pXboxRenderStates; diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp index d6b8d1047..597b6c169 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp @@ -1,13 +1,14 @@ #define LOG_PREFIX CXBXR_MODULE::VTXSH -#include "VertexShader.h" -#include "core\kernel\init\CxbxKrnl.h" -#include "core\kernel\support\Emu.h" +#include "Shader.h" // EmuCompileShader +#include "VertexShader.h" // EmuCompileVertexShader +#include "core\kernel\init\CxbxKrnl.h" // implicit CxbxKrnl_Xbe used in LOG_TEST_CASE +#include "core\kernel\support\Emu.h" // LOG_TEST_CASE (via Logging.h) #include -#include +#include // std::stringstream -extern const char* g_vs_model = vs_model_2_a; +extern const char* g_vs_model = vs_model_3_0; // HLSL generation void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) @@ -56,6 +57,20 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) // Write the mask as a separate argument to the opcode defines // (No space, so that "dest,mask, ..." looks close to "dest.mask, ...") hlsl << ","; + + // Detect oFog masks other than x + // Test case: Lego Star Wars II (menu) + if (dest.Type == IMD_OUTPUT_O && + dest.Address == OREG_OFOG && + dest.Mask != MASK_X) + { + LOG_TEST_CASE("Vertex shader uses oFog mask other than x"); + EmuLog(LOG_LEVEL::WARNING, "oFog mask was %#x", dest.Mask); + hlsl << "x"; // write to x instead + return; + } + + // Write the mask if (dest.Mask & MASK_X) hlsl << "x"; if (dest.Mask & MASK_Y) hlsl << "y"; if (dest.Mask & MASK_Z) hlsl << "z"; @@ -181,133 +196,8 @@ void BuildShader(IntermediateVertexShader* pShader, std::stringstream& hlsl) } } -std::string DebugPrependLineNumbers(std::string shaderString) { - std::stringstream shader(shaderString); - auto debugShader = std::stringstream(); - - int i = 1; - for (std::string line; std::getline(shader, line); ) { - auto lineNumber = std::to_string(i++); - auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' '); - debugShader << "/* " << paddedLineNumber << " */ " << line << "\n"; - } - - return debugShader.str(); -} - -HRESULT CompileHlsl(const std::string& hlsl, ID3DBlob** ppHostShader, const char* pSourceName) -{ - // TODO include header in vertex shader - //xbox::X_VSH_SHADER_HEADER* pXboxVertexShaderHeader = (xbox::X_VSH_SHADER_HEADER*)pXboxFunction; - ID3DBlob* pErrors = nullptr; - ID3DBlob* pErrorsCompatibility = nullptr; - HRESULT hRet = 0; - auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; - - UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL3; - hRet = D3DCompile( - hlsl.c_str(), - hlsl.length(), - pSourceName, // pSourceName - nullptr, // pDefines - D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - g_vs_model, // shader profile - flags1, // flags1 - 0, // flags2 - ppHostShader, // out - &pErrors // ppErrorMsgs out - ); - - // If the shader failed in the default vertex shader model, retry in vs_model_3_0 - // This allows shaders too large for 2_a to be compiled (Test Case: Shenmue 2) - if (FAILED(hRet)) { - if (pErrors) { - // Log HLSL compiler errors - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - pErrors = nullptr; - } - - EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Retrying with shader model 3.0"); - hRet = D3DCompile( - hlsl.c_str(), - hlsl.length(), - pSourceName, // pSourceName - nullptr, // pDefines - D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - vs_model_3_0, // shader profile - flags1, // flags1 - 0, // flags2 - ppHostShader, // out - &pErrors // ppErrorMsgs out - ); - } - - // If the shader failed again, retry in compatibility mode - if (FAILED(hRet)) { - EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Recompiling in compatibility mode"); - // Attempt to retry in compatibility mode, this allows some vertex-state shaders to compile - // Test Case: Spy vs Spy - flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_AVOID_FLOW_CONTROL; - hRet = D3DCompile( - hlsl.c_str(), - hlsl.length(), - pSourceName, // pSourceName - nullptr, // pDefines - D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - g_vs_model, // shader profile - flags1, // flags1 - 0, // flags2 - ppHostShader, // out - &pErrorsCompatibility // ppErrorMsgs out - ); - - if (FAILED(hRet)) { - LOG_TEST_CASE("Couldn't assemble vertex shader"); - } - } - - // Determine the log level - if (pErrors) { - // Log errors from the initial compilation - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - pErrors = nullptr; - } - - // Failure to recompile in compatibility mode ignored for now - if (pErrorsCompatibility != nullptr) { - pErrorsCompatibility->Release(); - pErrorsCompatibility = nullptr; - } - - LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) { - if (g_bPrintfOn) { - if (!FAILED(hRet)) { - // Log disassembly - hRet = D3DDisassemble( - (*ppHostShader)->GetBufferPointer(), - (*ppHostShader)->GetBufferSize(), - D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, - NULL, - &pErrors - ); - if (pErrors) { - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - } - } - } - } - - return hRet; -} - // recompile xbox vertex shader function -extern HRESULT EmuCompileShader +extern HRESULT EmuCompileVertexShader ( IntermediateVertexShader* pIntermediateShader, ID3DBlob** ppHostShader @@ -326,11 +216,16 @@ extern HRESULT EmuCompileShader hlsl_stream << hlsl_template[1]; // Finish with the HLSL template footer std::string hlsl_str = hlsl_stream.str(); - EmuLog(LOG_LEVEL::DEBUG, "--- HLSL conversion ---"); - EmuLog(LOG_LEVEL::DEBUG, DebugPrependLineNumbers(hlsl_str).c_str()); - EmuLog(LOG_LEVEL::DEBUG, "-----------------------"); - - return CompileHlsl(hlsl_str, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); + HRESULT hRet = EmuCompileShader(hlsl_str, g_vs_model, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); + + if (FAILED(hRet) && (g_vs_model != vs_model_3_0)) { + // If the shader failed in the default vertex shader model, retry in vs_model_3_0 + // This allows shaders too large for 2_a to be compiled (Test Case: Shenmue 2) + EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Retrying with shader model 3.0"); + hRet = EmuCompileShader(hlsl_str, vs_model_3_0, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); + } + + return hRet; } extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader) @@ -352,7 +247,7 @@ extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader) hlsl << hlslStream.rdbuf(); // Compile the shader - CompileHlsl(hlsl.str(), &pShader, sourceFile.c_str()); + EmuCompileShader(hlsl.str(), g_vs_model, &pShader, sourceFile.c_str()); } *ppHostShader = pShader; @@ -378,6 +273,9 @@ uniform float4 xboxScreenspaceOffset : register(c213); uniform float4 xboxTextureScale[4] : register(c214); +// Parameters for mapping the shader's fog output value to a fog factor +uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO + struct VS_INPUT { float4 v[16] : TEXCOORD; @@ -456,10 +354,33 @@ VS_OUTPUT main(const VS_INPUT xIn) // Copy variables to output struct VS_OUTPUT xOut; + // Fogging + // TODO deduplicate + const float fogDepth = abs(oFog.x); + const float fogTableMode = CxbxFogInfo.x; + const float fogDensity = CxbxFogInfo.y; + const float fogStart = CxbxFogInfo.z; + const float fogEnd = CxbxFogInfo.w; + + const float FOG_TABLE_NONE = 0; + const float FOG_TABLE_EXP = 1; + const float FOG_TABLE_EXP2 = 2; + const float FOG_TABLE_LINEAR = 3; + + float fogFactor; + if(fogTableMode == FOG_TABLE_NONE) + fogFactor = fogDepth; + if(fogTableMode == FOG_TABLE_EXP) + fogFactor = 1 / exp(fogDepth * fogDensity); /* / 1 / e^(d * density)*/ + if(fogTableMode == FOG_TABLE_EXP2) + fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/ + if(fogTableMode == FOG_TABLE_LINEAR) + fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart); + xOut.oPos = reverseScreenspaceTransform(oPos); xOut.oD0 = saturate(oD0); xOut.oD1 = saturate(oD1); - xOut.oFog = oFog.x; // Note : Xbox clamps fog in pixel shader + xOut.oFog = fogFactor; // Note : Xbox clamps fog in pixel shader xOut.oPts = oPts.x; xOut.oB0 = saturate(oB0); xOut.oB1 = saturate(oB1); @@ -473,7 +394,7 @@ VS_OUTPUT main(const VS_INPUT xIn) } )"; - CompileHlsl(hlsl, &pPassthroughShader, "passthrough.hlsl"); + EmuCompileShader(hlsl, g_vs_model, &pPassthroughShader, "passthrough.hlsl"); } *ppHostShader = pPassthroughShader; diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.h b/src/core/hle/D3D8/Direct3D9/VertexShader.h index 29d8cc57c..c2ef913b2 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.h +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.h @@ -1,6 +1,4 @@ - -#ifndef DIRECT3D9VERTEXSHADER_H -#define DIRECT3D9VERTEXSHADER_H +#pragma once #include "core\hle\D3D8\XbVertexShader.h" #include "FixedFunctionVertexShaderState.hlsli" @@ -15,7 +13,7 @@ static const char* vs_model_2_a = "vs_2_a"; static const char* vs_model_3_0 = "vs_3_0"; extern const char* g_vs_model; -extern HRESULT EmuCompileShader +extern HRESULT EmuCompileVertexShader ( IntermediateVertexShader* pIntermediateShader, ID3DBlob** ppHostShader @@ -25,4 +23,3 @@ extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader); extern HRESULT EmuCompileXboxPassthrough(ID3DBlob** ppHostShader); -#endif diff --git a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp b/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp index a00fa252f..1748651ea 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp @@ -13,7 +13,7 @@ VertexShaderSource g_VertexShaderSource = VertexShaderSource(); ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, ShaderKey key) { ID3DBlob* pCompiledShader; - auto hRet = EmuCompileShader( + auto hRet = EmuCompileVertexShader( &intermediateShader, &pCompiledShader ); diff --git a/src/core/hle/D3D8/XbConvert.cpp b/src/core/hle/D3D8/XbConvert.cpp index 8c55acae4..1ea57bd1b 100644 --- a/src/core/hle/D3D8/XbConvert.cpp +++ b/src/core/hle/D3D8/XbConvert.cpp @@ -1339,14 +1339,23 @@ void EmuUnswizzleBox } // EmuUnswizzleBox NOPATCH // Notes : -// * most renderstates were introduced in the (lowest known) XDK version : 3424 -// * additional renderstates were introduced between 3434 and 4627 -// * we MUST list exact versions for each of those, since their inserts impacts mapping! -// * renderstates were finalized in 4627 (so no change after that version) -// * renderstates after D3DRS_MULTISAMPLEMASK have no host mapping, thus no impact -// * D3DRS_MULTISAMPLETYPE seems the only renderstate that got removed (after 3944, before 4039) -// * all renderstates marked 3424 are also verified present in 3944 -const RenderStateInfo DxbxRenderStateInfo[] = { +// * Most renderstates were introduced in the (lowest known) XDK version : 3424 +// * Some titles use XDK version 3911 +// * The lowest XDK version that has been verified is : 3944 +// * All renderstates marked 3424 are also verified to be present in 3944 +// * Twenty-three additional renderstates were introduced after 3944 and up to 4627; +// * D3DRS_DEPTHCLIPCONTROL, D3DRS_STIPPLEENABLE, D3DRS_SIMPLE_UNUSED8..D3DRS_SIMPLE_UNUSED1, +// * D3DRS_SWAPFILTER, D3DRS_PRESENTATIONINTERVAL, D3DRS_DEFERRED_UNUSED8..D3DRS_DEFERRED_UNUSED1, +// * D3DRS_MULTISAMPLEMODE, D3DRS_MULTISAMPLERENDERTARGETMODE, and D3DRS_SAMPLEALPHA +// * One renderstate, D3DRS_MULTISAMPLETYPE, was removed (after 3944, before 4039, perhaps even 4034) +// * Around when D3DRS_MULTISAMPLETYPE was removed, D3DRS_MULTISAMPLEMODE was introduced (after 3944, before or at 4039, perhaps even 4034) +// * We MUST list exact versions for all above mentioned renderstates, since their inserts impacts mapping! +// * Renderstates verified to be introduced at 4039 or earlier, may have been introduced at 4034 or earlier +// * Renderstates were finalized in 4627 (so no change after that version) +// * XDK versions that have been verified : 3944, 4039, 4134, 4242, 4361, 4432, 4531, 4627, 4721, 4831, 4928, 5028, 5120, 5233, 5344, 5455, 5558, 5659, 5788, 5849, 5933 +// * Renderstates with uncertain validity are marked "Verified absent in #XDK#" and/or "present in #XDK#". Some have "Might be introduced "... "in between" or "around #XDK#" +// * Renderstates after D3DRS_MULTISAMPLEMASK have no host DX9 D3DRS mapping, thus no impact +const RenderStateInfo DxbxRenderStateInfo[1+xbox::X_D3DRS_DONOTCULLUNCOMPRESSED] = { // String Ord Version Type Method Native { "D3DRS_PSALPHAINPUTS0" /*= 0*/, 3424, xtDWORD, NV2A_RC_IN_ALPHA(0) }, @@ -1495,8 +1504,8 @@ const RenderStateInfo DxbxRenderStateInfo[] = { { "D3DRS_VERTEXBLEND" /*= 137*/, 3424, xtD3DVERTEXBLENDFLAGS, NV2A_SKIN_MODE, D3DRS_VERTEXBLEND }, { "D3DRS_FOGCOLOR" /*= 138*/, 3424, xtD3DCOLOR, NV2A_FOG_COLOR, D3DRS_FOGCOLOR }, // SwapRgb { "D3DRS_FILLMODE" /*= 139*/, 3424, xtD3DFILLMODE, NV2A_POLYGON_MODE_FRONT, D3DRS_FILLMODE }, - { "D3DRS_BACKFILLMODE" /*= 140*/, 3424, xtD3DFILLMODE, 0 }, // nsp. - { "D3DRS_TWOSIDEDLIGHTING" /*= 141*/, 3424, xtBOOL, NV2A_POLYGON_MODE_BACK }, // nsp. + { "D3DRS_BACKFILLMODE" /*= 140*/, 3424, xtD3DFILLMODE, NV2A_POLYGON_MODE_BACK }, // nsp. + { "D3DRS_TWOSIDEDLIGHTING" /*= 141*/, 3424, xtBOOL, 0 }, // nsp. // FIXME map from NV2A_LIGHT_MODEL { "D3DRS_NORMALIZENORMALS" /*= 142*/, 3424, xtBOOL, NV2A_NORMALIZE_ENABLE, D3DRS_NORMALIZENORMALS }, { "D3DRS_ZENABLE" /*= 143*/, 3424, xtBOOL, NV2A_DEPTH_TEST_ENABLE, D3DRS_ZENABLE }, // D3DZBUFFERTYPE? { "D3DRS_STENCILENABLE" /*= 144*/, 3424, xtBOOL, NV2A_STENCIL_ENABLE, D3DRS_STENCILENABLE }, diff --git a/src/core/hle/D3D8/XbD3D8Types.h b/src/core/hle/D3D8/XbD3D8Types.h index c5d5dc88d..33040d5c0 100644 --- a/src/core/hle/D3D8/XbD3D8Types.h +++ b/src/core/hle/D3D8/XbD3D8Types.h @@ -1039,10 +1039,8 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; // TODO co-locate shader workaround constants with shader code #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE (X_D3DVS_CONSTREG_COUNT) #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_SIZE 16 - #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE + CXBX_D3DVS_CONSTREG_VREGDEFAULTS_SIZE) #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE 4 - #define CXBX_D3DVS_SCREENSPACE_SCALE_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE + CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE) #define CXBX_D3DVS_NORMALIZE_SCALE_SIZE 1 @@ -1052,6 +1050,8 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; #define CXBX_D3DVS_TEXTURES_SCALE_BASE (CXBX_D3DVS_SCREENSPACE_OFFSET_BASE + CXBX_D3DVS_NORMALIZE_OFFSET_SIZE) #define CXBX_D3DVS_TEXTURES_SCALE_SIZE 4 +#define CXBX_D3DVS_CONSTREG_FOGINFO (CXBX_D3DVS_TEXTURES_SCALE_BASE + CXBX_D3DVS_TEXTURES_SCALE_SIZE) + #define X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION) #define X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION) diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 9d0af8b27..bd38db034 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -20,6 +20,7 @@ // * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA. // * // * (c) 2002-2003 kingofc +// * 2020 PatrickvL // * // * All rights reserved // * @@ -32,25 +33,6 @@ With the help of this parser it is possible to generate Direct3D pixel shader assembly code. - TODO: - - fix BumpDemo - (after second recompilation the shader does not work, - can also be something in CxbxKrnl because it looks like no - textures are set. Check cubemap loading from resourcesd!!!) - => seems to work now, the problem is that I don't know - how it must look on a real xbox - - - add reference counting constants which were added as c variables - if they are compiled away (optimization of the command, etc.) - decrement the reference count and when it reaches 0 remove - the constant (to save the num of vars) - - - add _sat feature - * Support as instruction modifier, - if necessary as mov_sat x, y - - - When porting to DirectX 9, expand this to pixel shader model 2.0 or up - - Alternatively, translate to HLSL and let D3DXCompileShader/D3DCompile figure it out */ #define LOG_PREFIX CXBXR_MODULE::PXSH @@ -58,5626 +40,32 @@ #include "core\kernel\support\Emu.h" #include "core\hle\D3D8\Direct3D9\Direct3D9.h" // For g_pD3DDevice, g_pXbox_PixelShader #include "core\hle\D3D8\XbPixelShader.h" +#include "core\hle\D3D8\Direct3D9\PixelShader.h" // EmuCompilePixelShader #include "core\hle\D3D8\XbD3D8Logging.h" // For D3DErrorString() #include "core\kernel\init\CxbxKrnl.h" // For CxbxKrnlCleanup() +#include "util\hasher.h" +#include "core\hle\D3D8\Direct3D9\FixedFunctionPixelShader.hlsli" #include // assert() #include #include +#include +#include #include "Direct3D9\RenderStates.h" // For XboxRenderStateConverter +#include "Direct3D9\TextureStates.h" // For XboxTextureStateConverter #include extern XboxRenderStateConverter XboxRenderStates; // Declared in Direct3D9.cpp +extern XboxTextureStateConverter XboxTextureStates; // Declared in Direct3D9.cpp + #define DbgPshPrintf \ LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) \ if(g_bPrintfOn) printf - -/*---------------------------------------------------------------------------*/ -/* Texture configuration - The following members of the D3DPixelShaderDef */ -/* structure define the addressing modes of each of the four texture stages:*/ -/* PSTextureModes */ -/* PSDotMapping */ -/* PSInputTexture */ -/* PSCompareMode */ -/*---------------------------------------------------------------------------*/ - -// ========================================================================================================= -// PSTextureModes -// --------.--------.--------.---xxxxx stage0 -// --------.--------.------xx.xxx----- stage1 -// --------.--------.-xxxxx--.-------- stage2 -// --------.----xxxx.x-------.-------- stage3 - -#define PS_TEXTUREMODES(t0,t1,t2,t3) (((t3)<<15)|((t2)<<10)|((t1)<<5)|(t0)) - -/* -Texture modes: -NONE :stage inactive -PROJECT2D :argb = texture(s/q, t/q) -PROJECT3D :argb = texture(s/q, t/q, r/q) -CUBEMAP :argb = cubemap(s,t,r) -PASSTHRU :argb = s,t,r,q -CLIPPLANE :pixel not drawn if s,t,r, or q < 0. PSCompareMode affects comparison -BUMPENVMAP :argb=texture(s+mat00*src.r+mat01*src.g, - t+mat10*src.r+mat11*src.g) - mat00 set via D3DTSS_BUMPENVMAT00, etc. -BUMPENVMAP_LUM :argb=texture(s+mat00*src.r+mat01*src.g, - t+mat10*src.r+mat11*src.g); - rgb *= (lum_scale*src.b + lum_bias); (a is not affected) - lum_scale set by D3DTSS_BUMPENVLSCALE - lum_bias set by D3DTSS_BUMPENVLOFFSET - mat00 set via D3DTSS_BUMPENVMAT00, etc. -BRDF :argb = texture(eyeSigma, lightSigma, dPhi) - eyeSigma = Sigma of eye vector in spherical coordinates - lightSigma = Sigma of light vector in spherical coordinates - dPhi = Phi of eye - Phi of light -DOT_ST :argb = texture(, (s,t,r).(src.r,src.g,src.b)) -DOT_ZW :frag depth = (/((s,t,r).(src.r,src.g,src.b)) -DOT_RFLCT_DIFF :n = (,(s,t,r).(src.r,src.g,src.b),) - argb = cubemap(n) -DOT_RFLCT_SPEC :n = (,,(s,t,r).(src.r,src.g,src.b)) - r = 2*n*(n.e)/(n.n) - e where e is eye vector built from q coord of each stage - argb = cubemap(r) -DOT_STR_3D :argb=texture((,,(s,t,r).(src.r,src.g,src.b))) -DOT_STR_CUBE :argb=cubemap((,,(s,t,r).(src.r,src.g,src.b))) -DEPENDENT_AR :argb = texture(src.a, src.r) -DEPENDENT_GB :argb = texture(src.g, src.b) -DOTPRODUCT :argb = (s,t,r).(src.r,src.g,src.b) -DOT_RFLCT_SPEC_CONST :n = (,,(s,t,r).(src.r,src.g,src.b)) - r = 2*n*(n.e)/(n.n) - e where e is eye vector set via SetEyeVector() - argb = cubemap(r) -*/ - -enum PS_TEXTUREMODES -{ // valid in stage 0 1 2 3 - PS_TEXTUREMODES_NONE= 0x00L, // * * * * - PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * * - PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * * - PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * * - PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * * - PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * * - PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * * - PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * * - PS_TEXTUREMODES_BRDF= 0x08L, // - - * * - PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * * - PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * * - PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * - - PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - * - PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - * - PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - * - PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * * - PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * * - PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * - - PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - * - // 0x13-0x1f reserved -}; - -// ========================================================================================================= -// PSDotMapping -// --------.--------.--------.-----xxx // stage1 -// --------.--------.--------.-xxx---- // stage2 -// --------.--------.-----xxx.-------- // stage3 - -#define PS_DOTMAPPING(t0,t1,t2,t3) (((t3)<<8)|((t2)<<4)|(t1)) - -// Mappings: -// ZERO_TO_ONE :rgb->(r,g,b): 0x0=>0.0, 0xff=>1.0 -// MINUS1_TO_1_D3D :rgb->(r,g,b): 0x0=>-128/127, 0x01=>-1.0, 0x80=>0.0, 0xff=>1.0 -// MINUS1_TO_1_GL :rgb->(r,g,b): 0x80=>-1.0, 0x0=>0.0, 0x7f=>1.0 -// MINUS1_TO_1 :rgb->(r,g,b): 0x80=>-128/127, 0x81=>-1.0, 0x0=>0.0, 0x7f=>1.0 -// HILO_1 :HL->(H,L,1.0): 0x0000=>0.0, 0xffff=>1.0 -// HILO_HEMISPHERE :HL->(H,L,sqrt(1-H*H-L*L)): 0x8001=>-1.0, 0x0=>0.0, 0x7fff=>1.0, 0x8000=>-32768/32767 - -enum PS_DOTMAPPING -{ // valid in stage 0 1 2 3 - PS_DOTMAPPING_ZERO_TO_ONE= 0x00L, // - * * * - PS_DOTMAPPING_MINUS1_TO_1_D3D= 0x01L, // - * * * - PS_DOTMAPPING_MINUS1_TO_1_GL= 0x02L, // - * * * - PS_DOTMAPPING_MINUS1_TO_1= 0x03L, // - * * * - PS_DOTMAPPING_HILO_1= 0x04L, // - * * * - // ? 0x05L ? - // ? 0x06L ? - PS_DOTMAPPING_HILO_HEMISPHERE= 0x07L, // - * * * -}; - -// ========================================================================================================= -// PSCompareMode -// --------.--------.--------.----xxxx // stage0 -// --------.--------.--------.xxxx---- // stage1 -// --------.--------.----xxxx.-------- // stage2 -// --------.--------.xxxx----.-------- // stage3 - -#define PS_COMPAREMODE(t0,t1,t2,t3) (((t3)<<12)|((t2)<<8)|((t1)<<4)|(t0)) - -enum PS_COMPAREMODE -{ - PS_COMPAREMODE_S_LT= 0x00L, - PS_COMPAREMODE_S_GE= 0x01L, - - PS_COMPAREMODE_T_LT= 0x00L, - PS_COMPAREMODE_T_GE= 0x02L, - - PS_COMPAREMODE_R_LT= 0x00L, - PS_COMPAREMODE_R_GE= 0x04L, - - PS_COMPAREMODE_Q_LT= 0x00L, - PS_COMPAREMODE_Q_GE= 0x08L, -}; - -// ========================================================================================================= -// PSInputTexture -// --------.-------x.--------.-------- // stage2 -// --------.--xx----.--------.-------- // stage3 -// -// Selects the other texture to use as an input in the following texture modes: -// DOT_ST, DOT_STR_3D, DOT_STR_CUBE, DOT_ZW, DOT_RFLCT_SPEC, -// DOT_RFLCT_DIFF, DPNDNT_AR, DPNDNT_GB, BUMPENVMAP, -// BUMPENVMAP_LUM, DOT_PRODUCT - -#define PS_INPUTTEXTURE(t0,t1,t2,t3) (((t3)<<20)|((t2)<<16)) - - -/*---------------------------------------------------------------------------------*/ -/* Color combiners - The following members of the D3DPixelShaderDef structure */ -/* define the state for the eight stages of color combiners: */ -/* PSCombinerCount - Number of stages */ -/* PSAlphaInputs[8] - Inputs for alpha portion of each stage */ -/* PSRGBInputs[8] - Inputs for RGB portion of each stage */ -/* PSConstant0[8] - Constant 0 for each stage */ -/* PSConstant1[8] - Constant 1 for each stage */ -/* PSFinalCombinerConstant0 - Constant 0 for final combiner */ -/* PSFinalCombinerConstant1 - Constant 1 for final combiner */ -/* PSAlphaOutputs[8] - Outputs for alpha portion of each stage */ -/* PSRGBOutputs[8] - Outputs for RGB portion of each stage */ -/*---------------------------------------------------------------------------------*/ - - -// ========================================================================================================= -// PSCombinerCount -// --------.--------.--------.----xxxx // number of combiners (1-8) -// --------.--------.-------x.-------- // mux bit (0= LSB, 1= MSB) -// --------.--------.---x----.-------- // separate C0 -// --------.-------x.--------.-------- // separate C1 - -#define PS_COMBINERCOUNT(count, flags) (((flags)<<8)|(count)) -// count is 1-8, flags contains one or more values from PS_COMBINERCOUNTFLAGS - -enum PS_COMBINERCOUNTFLAGS -{ - PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb - PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb - - PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage - PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage - - PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage - PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage -}; - -// ========================================================================================================= -// PSRGBInputs[0-7] -// PSAlphaInputs[0-7] -// PSFinalCombinerInputsABCD -// PSFinalCombinerInputsEFG -// --------.--------.--------.----xxxx // D register -// --------.--------.--------.---x---- // D channel (0= RGB/BLUE, 1= ALPHA) -// --------.--------.--------.xxx----- // D input mapping -// --------.--------.----xxxx.-------- // C register -// --------.--------.---x----.-------- // C channel (0= RGB/BLUE, 1= ALPHA) -// --------.--------.xxx-----.-------- // C input mapping -// --------.----xxxx.--------.-------- // B register -// --------.---x----.--------.-------- // B channel (0= RGB/BLUE, 1= ALPHA) -// --------.xxx-----.--------.-------- // B input mapping -// ----xxxx.--------.--------.-------- // A register -// ---x----.--------.--------.-------- // A channel (0= RGB/BLUE, 1= ALPHA) -// xxx-----.--------.--------.-------- // A input mapping - -// examples: -// -// shader.PSRGBInputs[3]= PS_COMBINERINPUTS( -// PS_REGISTER_T0 | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB, -// PS_REGISTER_C0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA, -// PS_REGISTER_ZERO, -// PS_REGISTER_ZERO); -// -// shader.PSFinalCombinerInputsABCD= PS_COMBINERINPUTS( -// PS_REGISTER_T0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA, -// PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB, -// PS_REGISTER_EFPROD | PS_INPUTMAPPING_UNSIGNED_INVERT | PS_CHANNEL_RGB, -// PS_REGISTER_ZERO); -// -// PS_FINALCOMBINERSETTING is set in 4th field of PSFinalCombinerInputsEFG with PS_COMBINERINPUTS -// example: -// -// shader.PSFinalCombinerInputsEFG= PS_COMBINERINPUTS( -// PS_REGISTER_R0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB, -// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB, -// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_BLUE, -// PS_FINALCOMBINERSETTING_CLAMP_SUM | PS_FINALCOMBINERSETTING_COMPLEMENT_R0); - -#define PS_COMBINERINPUTS(a,b,c,d) (((a)<<24)|((b)<<16)|((c)<<8)|(d)) -// For PSFinalCombinerInputsEFG, -// a,b,c contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING for input E,F, and G -// d contains values from PS_FINALCOMBINERSETTING -// For all other inputs, -// a,b,c,d each contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING - -// The input can have the following mappings applied : -// -// PS_INPUTMAPPING_UNSIGNED_IDENTITY : y = max(0,x) = 1*max(0,x) + 0.0 -// PS_INPUTMAPPING_UNSIGNED_INVERT : y = 1 - max(0,x) = -1*max(0,x) + 1.0 -// PS_INPUTMAPPING_EXPAND_NORMAL : y = 2*max(0,x) - 1 = 2*max(0,x) - 1.0 -// PS_INPUTMAPPING_EXPAND_NEGATE : y = 1 - 2*max(0,x) = -2*max(0,x) + 1.0 -// PS_INPUTMAPPING_HALFBIAS_NORMAL : y = max(0,x) - 1/2 = 1*max(0,x) - 0.5 -// PS_INPUTMAPPING_HALFBIAS_NEGATE : y = 1/2 - max(0,x) = -1*max(0,x) + 0.5 -// PS_INPUTMAPPING_SIGNED_IDENTITY : y = x = 1* x + 0.0 -// PS_INPUTMAPPING_SIGNED_NEGATE : y = -x = -1* x + 0.0 -// -// (Note : I don't know for sure if the max() operation mentioned above is indeed what happens, -// as there's no further documentation available on this. Native Direct3D can clamp with the -// '_sat' instruction modifier, but that's not really the same as these Xbox1 input mappings.) -// -// When the input register is PS_ZERO, the above mappings result in the following constants: -// -// PS_REGISTER_NEGATIVE_ONE (PS_INPUTMAPPING_EXPAND_NORMAL on zero) : y = -1.0 -// PS_REGISTER_NEGATIVE_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NORMAL on zero) : y = -0.5 -// PS_REGISTER_ZERO itself : y = 0.0 -// PS_REGISTER_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NEGATE on zero) : y = 0.5 -// PS_REGISTER_ONE (PS_INPUTMAPPING_UNSIGNED_INVERT on zero) : y = 1.0 -// (Note : It has no define, but PS_INPUTMAPPING_EXPAND_NEGATE on zero results in ONE too!) - -enum PS_INPUTMAPPING -{ - PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) OK for final combiner: y = abs(x) - PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) OK for final combiner: y = 1 - x - PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2*max(0,x) - 1 invalid for final combiner - PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2*max(0,x) invalid for final combiner - PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 invalid for final combiner - PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) invalid for final combiner - PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x invalid for final combiner - PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x invalid for final combiner -}; - -enum PS_REGISTER -{ - PS_REGISTER_ZERO= 0x00L, // r - PS_REGISTER_DISCARD= 0x00L, // w - PS_REGISTER_C0= 0x01L, // r - PS_REGISTER_C1= 0x02L, // r - PS_REGISTER_FOG= 0x03L, // r - PS_REGISTER_V0= 0x04L, // r/w - PS_REGISTER_V1= 0x05L, // r/w - PS_REGISTER_T0= 0x08L, // r/w - PS_REGISTER_T1= 0x09L, // r/w - PS_REGISTER_T2= 0x0aL, // r/w - PS_REGISTER_T3= 0x0bL, // r/w - PS_REGISTER_R0= 0x0cL, // r/w - PS_REGISTER_R1= 0x0dL, // r/w - PS_REGISTER_V1R0_SUM= 0x0eL, // r - PS_REGISTER_EF_PROD= 0x0fL, // r - - PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // 0x20 OK for final combiner - PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // 0x40 invalid for final combiner - PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // 0xa0 invalid for final combiner - PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // 0x80 invalid for final combiner - - PS_REGISTER_CXBX_PROD = PS_REGISTER_ZERO | PS_INPUTMAPPING_SIGNED_IDENTITY, // Cxbx internal use -}; - -// FOG ALPHA is only available in final combiner -// V1R0_SUM and EF_PROD are only available in final combiner (A,B,C,D inputs only) -// V1R0_SUM_ALPHA and EF_PROD_ALPHA are not available -// R0_ALPHA is initialized to T0_ALPHA in stage0 - -enum PS_CHANNEL -{ - PS_CHANNEL_RGB= 0x00, // used as RGB source - PS_CHANNEL_BLUE= 0x00, // used as ALPHA source - PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source -}; - -constexpr DWORD PS_ChannelMask = (DWORD)PS_CHANNEL_ALPHA; -constexpr DWORD PS_NoChannelMask = (DWORD)(~PS_ChannelMask); -constexpr DWORD PS_AlphaChannelsMask = (DWORD)(PS_ChannelMask | (PS_ChannelMask << 8) | (PS_ChannelMask << 16) | (PS_ChannelMask << 24)); -constexpr DWORD PS_NoChannelsMask = (DWORD)(~PS_AlphaChannelsMask); - -enum PS_FINALCOMBINERSETTING -{ - PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1] - PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping (1 - v1) is used as an input to the sum rather than v1 - PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping (1 - r0) is used as an input to the sum rather than r0 -}; - -// ========================================================================================================= -// PSRGBOutputs[0-7] -// PSAlphaOutputs[0-7] -// --------.--------.--------.----xxxx // CD register -// --------.--------.--------.xxxx---- // AB register -// --------.--------.----xxxx.-------- // SUM register -// --------.--------.---x----.-------- // CD output (0= multiply, 1= dot product) -// --------.--------.--x-----.-------- // AB output (0= multiply, 1= dot product) -// --------.--------.-x------.-------- // AB_CD mux/sum select (0= sum, 1= mux) -// --------.------xx.x-------.-------- // Output mapping -// --------.-----x--.--------.-------- // CD blue to alpha -// --------.----x---.--------.-------- // AB blue to alpha - -#define PS_COMBINEROUTPUTS(ab,cd,mux_sum,flags) (((flags)<<12)|((mux_sum)<<8)|((ab)<<4)|(cd)) -// ab,cd,mux_sum contain a value from PS_REGISTER -// flags contains values from PS_COMBINEROUTPUT - -enum PS_COMBINEROUTPUT -{ - PS_COMBINEROUTPUT_IDENTITY= 0x00L, // y = x - PS_COMBINEROUTPUT_BIAS= 0x08L, // y = x - 0.5 - PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // y = x*2 - PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5)*2 - PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // y = x*4 - // PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS= 0x28L, // y = (x - 0.5)*4 - PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // y = x/2 - // PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS= 0x38L, // y = (x - 0.5)/2 - - PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only - - PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only - - PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L, - PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only - - PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L, - PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only - - PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD - PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a -}; - -// AB_CD register output must be DISCARD if either AB_DOT_PRODUCT or CD_DOT_PRODUCT are set - -// ========================================================================================================= -// PSC0Mapping -// PSC1Mapping -// --------.--------.--------.----xxxx // offset of D3D constant for stage 0 -// --------.--------.--------.xxxx---- // offset of D3D constant for stage 1 -// --------.--------.----xxxx.-------- // offset of D3D constant for stage 2 -// --------.--------.xxxx----.-------- // offset of D3D constant for stage 3 -// --------.----xxxx.--------.-------- // offset of D3D constant for stage 4 -// --------.xxxx----.--------.-------- // offset of D3D constant for stage 5 -// ----xxxx.--------.--------.-------- // offset of D3D constant for stage 6 -// xxxx----.--------.--------.-------- // offset of D3D constant for stage 7 - -#define PS_CONSTANTMAPPING(s0,s1,s2,s3,s4,s5,s6,s7) \ - (((DWORD)(s0)&0xf)<< 0) | (((DWORD)(s1)&0xf)<< 4) | \ - (((DWORD)(s2)&0xf)<< 8) | (((DWORD)(s3)&0xf)<<12) | \ - (((DWORD)(s4)&0xf)<<16) | (((DWORD)(s5)&0xf)<<20) | \ - (((DWORD)(s6)&0xf)<<24) | (((DWORD)(s7)&0xf)<<28) -// s0-s7 contain the offset of the D3D constant that corresponds to the -// c0 or c1 constant in stages 0 through 7. These mappings are only used in -// SetPixelShaderConstant(). - -// ========================================================================================================= -// PSFinalCombinerConstants -// --------.--------.--------.----xxxx // offset of D3D constant for C0 -// --------.--------.--------.xxxx---- // offset of D3D constant for C1 -// --------.--------.-------x.-------- // Adjust texture flag - -#define PS_FINALCOMBINERCONSTANTS(c0,c1,flags) (((DWORD)(flags) << 8) | ((DWORD)(c0)&0xf)<< 0) | (((DWORD)(c1)&0xf)<< 4) -// c0 and c1 contain the offset of the D3D constant that corresponds to the -// constants in the final combiner. These mappings are only used in -// SetPixelShaderConstant(). Flags contains values from PS_GLOBALFLAGS - -enum PS_GLOBALFLAGS -{ - // if this flag is set, the texture mode for each texture stage is adjusted as follows: - // if set texture is a cubemap, - // change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_CUBEMAP - // change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_CUBEMAP - // change PS_TEXTUREMODES_DOT_STR_3D to PS_TEXTUREMODES_DOT_STR_CUBE - // if set texture is a volume texture, - // change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_PROJECT3D - // change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT3D - // change PS_TEXTUREMODES_DOT_STR_CUBE to PS_TEXTUREMODES_DOT_STR_3D - // if set texture is neither cubemap or volume texture, - // change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_PROJECT2D - // change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT2D - - PS_GLOBALFLAGS_NO_TEXMODE_ADJUST= 0x0000L, // don"t adjust texture modes - PS_GLOBALFLAGS_TEXMODE_ADJUST= 0x0001L, // adjust texture modes according to set texture -}; - -enum PSH_OPCODE -{ - PO_COMMENT, - PO_PS, - PO_DEF, - PO_DCL, // Note : ps.2.0 and up only - PO_DCL_2D, // Note : ps.2.0 and up only - PO_DCL_CUBE, // Note : ps.2.0 and up only - PO_DCL_VOLUME, // Note : ps.2.0 and up only - PO_TEX, - PO_TEXLD, // Note : ps.1.4 only - PO_TEXLD2, // Note : ps.2.0 and up only - PO_TEXBEM, - PO_TEXBEML, - PO_TEXBRDF, // Xbox ext. - PO_TEXCOORD, - PO_TEXCRD, // Note: ps.1.4 only - PO_TEXKILL, - PO_TEXREG2AR, - PO_TEXREG2GB, - PO_TEXDP3, // Note : ps.1.3 only - PO_TEXDP3TEX, // Note : ps.1.3 only - PO_TEXM3X2TEX, - PO_TEXM3X2DEPTH, // Note : requires ps.1.3 and a preceding texm3x2pad - PO_TEXM3X3DIFF, // Xbox ext. - PO_TEXM3X3VSPEC, - PO_TEXM3X3TEX, // Note : Uses a cube texture - PO_TEXM3X2PAD, // Note : Must be combined with texm3x2tex or texm3x2depth - PO_TEXM3X3PAD, - PO_TEXM3X3SPEC, // NOTE : NEEDS 3 ARGUMENTS! - // Direct3D8 arithmetic instructions : - PO_ADD, - PO_CMP, - PO_CND, - PO_DP3, // dp3 d, s1,s2 : d=s0 dot s1 (replicated to all channels, .rgb=color only, .a=color+alpha) - PO_DP4, // dp3 d, s1,s2 : d.r=d.g=d.b=d.a=(s1.r*s2.r)+(s1.g*s2.g)+(s1.b*s2.b)+(s1.a*s2.a) - PO_LRP, - PO_MAD, - PO_MOV, - PO_MUL, - PO_NOP, - PO_SUB, - PO_RCP, // Note: ps.2.0 and up only - // Xbox1 opcodes : - PO_XMMA, - PO_XMMC, - PO_XDM, - PO_XDD, - PO_XFC, - PO_XPS, -}; - -const struct { char *mn; int _Out; int _In; char *note; } PSH_OPCODE_DEFS[/*PSH_OPCODE*/] = { - // Pixel shader header opcodes (must be specified in this order) : - {/* PO_COMMENT */ /*mn:*/";", /*_Out: */ 0, /*_In: */ 0, /*note:*/"" }, // - {/* PO_PS */ /*mn:*/"ps", /*_Out: */ 0, /*_In: */ 0, /*note:*/"" }, // Must occur once - {/* PO_DEF */ /*mn:*/"def", /*_Out: */ 1, /*_In: */ 4, /*note:*/"" }, // Output must be a PARAM_C, arguments must be 4 floats [0.00f .. 1.00f] - {/* PO_DCL */ /*mn:*/"dcl", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, // Note : ps.2.0 and up only - {/* PO_DCL_2D */ /*mn:*/"dcl_2d", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, // Note : ps.2.0 and up only - {/* PO_DCL_CUBE */ /*mn:*/"dcl_cube", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, // Note : ps.2.0 and up only - {/* PO_DCL_VOLUME */ /*mn:*/"dcl_volume", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, // Note : ps.2.0 and up only - {/* PO_TEX */ /*mn:*/"tex", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, - {/* PO_TEXLD */ /*mn:*/"texld", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // Note : ps.1.4 and up only - {/* PO_TEXLD2 */ /*mn:*/"texld", /*_Out: */ 1, /*_In: */ 2, /*note:*/"" }, // Note : ps.1.4 and up only - {/* PO_TEXBEM */ /*mn:*/"texbem", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXBEML */ /*mn:*/"texbeml", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXBRDF */ /*mn:*/"texbrdf", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // /*note: */ Not supported by Direct3D8 ? - {/* PO_TEXCOORD */ /*mn:*/"texcoord", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, - {/* PO_TEXCRD */ /*mn:*/"texcrd", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // Note: ps.1.4 only - {/* PO_TEXKILL */ /*mn:*/"texkill", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, - {/* PO_TEXDP3 */ /*mn:*/"texdp3", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXDP3TEX */ /*mn:*/"texdp3tex", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X2TEX */ /*mn:*/"texm3x2tex", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X2DEPTH */ /*mn:*/"texm3x2depth", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // /*note: */ requires ps.1.3 and a preceding texm3x2pad - {/* PO_TEXM3X3DIFF */ /*mn:*/"texm3x3diff", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // /*note: */ Not supported by Direct3D8 ? - {/* PO_TEXM3X3VSPEC */ /*mn:*/"texm3x3vspec", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X3TEX */ /*mn:*/"texm3x3tex", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // /*note: */ Uses a cube texture - {/* PO_TEXREG2AR */ /*mn:*/"texreg2ar", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXREG2GB */ /*mn:*/"texreg2gb", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X2PAD */ /*mn:*/"texm3x2pad", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X3PAD */ /*mn:*/"texm3x3pad", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X3SPEC */ /*mn:*/"texm3x3spec", /*_Out: */ 1, /*_In: */ 2, /*note:*/"" }, - // Arithmetic opcodes : - {/* PO_ADD */ /*mn:*/"add", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0+s1" }, - {/* PO_CMP */ /*mn:*/"cmp", /*_Out: */ 1, /*_In: */ 3, /*note:*/"d0={s0>=0?s1:s2}" }, - {/* PO_CND */ /*mn:*/"cnd", /*_Out: */ 1, /*_In: */ 3, /*note:*/"d0={s0.a>0.5?s1:s2}" }, // 1st input must be "r0.a" - {/* PO_DP3 */ /*mn:*/"dp3", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0 dot3 s1" }, - {/* PO_DP4 */ /*mn:*/"dp4", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0 dot4 s1" }, - {/* PO_LRP */ /*mn:*/"lrp", /*_Out: */ 1, /*_In: */ 3, /*note:*/"d0=s0*s1+{1-s0}*s2=s0*{s1-s2}+s2" }, - {/* PO_MAD */ /*mn:*/"mad", /*_Out: */ 1, /*_In: */ 3, /*note:*/"d0=s0*s1+s2" }, - {/* PO_MOV */ /*mn:*/"mov", /*_Out: */ 1, /*_In: */ 1, /*note:*/"d0=s0" }, - {/* PO_MUL */ /*mn:*/"mul", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0*s1" }, - {/* PO_NOP */ /*mn:*/"nop", /*_Out: */ 0, /*_In: */ 0, /*note:*/"" }, - {/* PO_SUB */ /*mn:*/"sub", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0-s1" }, - {/* PO_RCP */ /*mn:*/"rcp", /*_Out: */ 1, /*_In: */ 1, /*note:*/"d0=1/s0" }, // Note: ps.2.0 and up only - // Xbox-only {NV2A} opcodes : - {/* PO_XMMA */ /*mn:*/"xmma", /*_Out: */ 3, /*_In: */ 4, /*note:*/"d0=s0*s1, d1=s2*s3, d2={s0*s1}+{s2*s3}" }, - {/* PO_XMMC */ /*mn:*/"xmmc", /*_Out: */ 3, /*_In: */ 4, /*note:*/"d0=s0*s1, d1=s2*s3, d2={r0.a>0.5}?{s0*s1}:{s2*s3}" }, - {/* PO_XDM */ /*mn:*/"xdm", /*_Out: */ 2, /*_In: */ 4, /*note:*/"d0=s0 dot s1, d1=s2*s3" }, - {/* PO_XDD */ /*mn:*/"xdd", /*_Out: */ 2, /*_In: */ 4, /*note:*/"d0=s0 dot s1, d1=s2 dot s3" }, - {/* PO_XFC */ /*mn:*/"xfc", /*_Out: */ 0, /*_In: */ 7, /*note:*/"r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.a, prod=s4*s5, sum=r0+v1" }, - {/* PO_XPS */ /*mn:*/"xps", /*_Out: */ 0, /*_In: */ 0, /*note:*/"" }, // Must occur once -}; - -enum PSH_ARGUMENT_TYPE -{ - PARAM_VALUE, // Xbox only; Numberic constants used in Xbox-only opcodes - PARAM_DISCARD, // Xbox only; - PARAM_FOG, // Final combiner only; Read-only register fog register - PARAM_V1R0_SUM, // Final combiner only; Read-only register that contains the result of V1+R0 - PARAM_EF_PROD, // Final combiner only; Read-only register that contains the result of final combiner parameters E * F - PARAM_oDepth, // Output depth register - PARAM_R, // Temporary registers (unassigned except r0.a, which on NV2A is initially set to t0.a) - PARAM_T, // Textures - PARAM_V, // Vertex colors - PARAM_C, // Constant registers, set by def opcodes or SetPixelShaderConstant - PARAM_S, // Sampler registers - PARAM_oC, // Output color registers -}; - -const char *PSH_ARGUMENT_TYPE_Str[/*PSH_ARGUMENT_TYPE*/] = { -// Prefix # r/w Input? Output? Note - "", // * r No No Used for numeric constants like -1, 0, 1 - "discard", // * w No Yes Only for xbox opcodes (native opcodes have single output - discards must be removed) - "fog", // 1 r Yes No Only for final combiner parameter - "sum", // 1 r Yes No Only for final combiner parameter - "prod", // 1 r Yes No Only for final combiner parameter - "oDepth", // - "r", // 2 r/w Yes Yes We fake a few extra registers and resolve them in FixupPixelShader - "t", // 4 r/w Yes Yes D3D9 cannot write to these! - "v", // 2 r Yes Yes - "c", // 16 r Yes No Xbox has 8*c0,c1=16, while PC D3D8 has only 8, we try to reduce that in FixupPixelShader - "s", // 16 - No Yes - "oC", // -}; - -constexpr int XFC_COMBINERSTAGENR = xbox::X_PSH_COMBINECOUNT; // Always call XFC 'stage 9', 1 after the 8th combiner - -constexpr int PSH_XBOX_MAX_C_REGISTER_COUNT = 16; -constexpr int PSH_XBOX_MAX_R_REGISTER_COUNT = 2; -constexpr int PSH_XBOX_MAX_T_REGISTER_COUNT = 4; -constexpr int PSH_XBOX_MAX_V_REGISTER_COUNT = 2; - -// Mapping indices of Xbox register combiner constants to host pixel shader constants; -// The first 16 are identity-mapped (C0_1 .. C0_7 are C0 .. C7 on host, C1_0 .. C1_7 are C8 .. C15 on host) : -constexpr int PSH_XBOX_CONSTANT_C0 = 0; // = 0..15 -// Then two final combiner constants : -constexpr int PSH_XBOX_CONSTANT_FC0 = PSH_XBOX_CONSTANT_C0 + PSH_XBOX_MAX_C_REGISTER_COUNT; // = 16 -constexpr int PSH_XBOX_CONSTANT_FC1 = PSH_XBOX_CONSTANT_FC0 + 1; // = 17 -// Fog requires a constant (as host PS1.4 doesn't support the FOG register) -constexpr int PSH_XBOX_CONSTANT_FOG = PSH_XBOX_CONSTANT_FC1 + 1; // = 18 -// Bump Environment Material registers -constexpr int PSH_XBOX_CONSTANT_BEM = PSH_XBOX_CONSTANT_FOG + 1; // = 19..22 -// Bump map Luminance registers -constexpr int PSH_XBOX_CONSTANT_LUM = PSH_XBOX_CONSTANT_BEM + 4; // = 23..26 -// This concludes the set of constants that need to be set on host : -constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_LUM + 4; // = 27 -// After those, we need two constants for literal values, which we DEF'ine in ConvertConstantsToNative : -constexpr int PSH_XBOX_CONSTANT_MUL0 = PSH_XBOX_CONSTANT_MAX; // = 27 -constexpr int PSH_XBOX_CONSTANT_MUL1 = PSH_XBOX_CONSTANT_MUL0 + 1; // = 28 - -constexpr int FakeRegNr_Sum = PSH_XBOX_MAX_T_REGISTER_COUNT + 0; -constexpr int FakeRegNr_Prod = PSH_XBOX_MAX_T_REGISTER_COUNT + 1; -constexpr int FakeRegNr_Xmm1 = PSH_XBOX_MAX_T_REGISTER_COUNT + 2; -constexpr int FakeRegNr_Xmm2 = PSH_XBOX_MAX_T_REGISTER_COUNT + 3; - -enum PSH_INST_MODIFIER { - INSMOD_NONE, // y = x - INSMOD_BIAS, // y = x - 0.5 // Xbox only : TODO : Fixup occurrances! - INSMOD_X2, // y = x * 2 - INSMOD_BX2, // y = (x - 0.5) * 2 // Xbox only : TODO : Fixup occurrances! - INSMOD_X4, // y = x * 4 - INSMOD_D2, // y = x * 0.5 - INSMOD_SAT, // Xbox doesn"t support this, but has ARGMOD_SATURATE instead - INSMOD_X8, // y = x * 8 // ps 1.4 only - INSMOD_D4, // y = x * 0.25 // ps 1.4 only - INSMOD_D8, // y = x * 0.125 // ps 1.4 only -}; - -const char *PSH_INST_MODIFIER_Str[/*PSH_INST_MODIFIER*/] = { - "", - "_bias", - "_x2", - "_bx2", - "_x4", - "_d2", - "_sat", - "_x8", - "_d4", - "_d8", -}; - -// Four argument modifiers (applied in this order) : -// 1: Inversion (invert or negate : "1-" or "-") -// 2: Apply bias ("_bias") -// 3: Apply scale ("_x2", "_bx2", "_x4", or "_d2") -// 4: Apply clamp ("_sat") -enum PSH_ARG_MODIFIER { - ARGMOD_IDENTITY, // y = x - - ARGMOD_INVERT, // y = 1-x -> 0..1 > 1..0 - ARGMOD_NEGATE, // y = -x -> 0..1 > 0..-1 - - ARGMOD_BIAS, // y = x-0.5 -> 0..1 > -0.5..0.5 - - ARGMOD_SCALE_X2, // y = x*2 -> 0..1 > 0..2 - ARGMOD_SCALE_BX2, // y = (x*2)-1 -> 0..1 > -1..1 - ARGMOD_SCALE_X4, // y = x*4 -> 0..1 > 0..4 - ARGMOD_SCALE_D2, // y = x/2 -> 0..1 > 0..0.5 - - ARGMOD_SATURATE, // Xbox - not available in PS1.3 (can be done on output instead) - - ARGMOD_ALPHA_REPLICATE, - ARGMOD_BLUE_REPLICATE // PS1.1-PS1.3 only allow this if destination writemask = .a -}; - -typedef DWORD PSH_ARG_MODIFIERs; // = set of PSH_ARG_MODIFIER; - -const char *PSH_ARG_MODIFIER_Str[/*PSH_ARG_MODIFIER*/] = { - "%s", - - "1-%s", - "-%s", - - "%s_bias", - - "%s_x2", - "%s_bx2", - "%s_x4", - "%s_d2", - - "%s_sat", - - "%s", // .a is added via Mask - "%s" // .b idem -}; - -struct RPSRegisterObject { - bool IsAlpha; - PS_REGISTER Reg; - - void Decode(uint8_t Value, bool aIsAlpha); - std::string DecodedToString(); -}; - -struct RPSInputRegister : RPSRegisterObject { - PS_CHANNEL Channel; - PS_INPUTMAPPING InputMapping; - - void Decode(uint8_t Value, bool aIsAlpha); - std::string DecodedToString(); -}; - -struct RPSCombinerOutput : RPSRegisterObject { - RPSInputRegister Input1; // Called InputA or InputC (depending if it's inside the AB or CD combiner) - RPSInputRegister Input2; // Called InputC or InputD (depending if it's inside the AB or CD combiner) - bool DotProduct; // False=Multiply, True=DotProduct - bool BlueToAlpha; // False=Alpha-to-Alpha, True=Blue-to-Alpha - - void Decode(uint8_t Value, DWORD PSInputs, bool aIsAlpha); -}; - -struct RPSCombinerOutputMuxSum : RPSRegisterObject { - RPSCombinerOutput OutputAB; // Contains InputA and InputB (as Input1 and Input2) - RPSCombinerOutput OutputCD; // Contains InputC and InputD (as Input1 and Input2) -}; - -struct RPSCombinerStageChannel { - RPSCombinerOutputMuxSum OutputSUM; // Contains OutputAB, OutputCD - PS_COMBINEROUTPUT CombinerOutputFlags; - bool AB_CD_SUM; // True=AB+CD, False=MUX(AB;CD) based on R0.a - - void Decode(DWORD PSInputs, DWORD PSOutputs, bool aIsAlpha = false); -}; - -struct RPSCombinerStage { - RPSCombinerStageChannel RGB; - RPSCombinerStageChannel Alpha; -}; - -struct RPSFinalCombiner { - RPSInputRegister InputA; - RPSInputRegister InputB; - RPSInputRegister InputC; - RPSInputRegister InputD; - RPSInputRegister InputE; - RPSInputRegister InputF; - RPSInputRegister InputG; - - PS_FINALCOMBINERSETTING FinalCombinerFlags; - - uint8_t FinalCombinerC0Mapping; - uint8_t FinalCombinerC1Mapping; - - DWORD dwPS_GLOBALFLAGS; - - void Decode(const DWORD PSFinalCombinerInputsABCD, const DWORD PSFinalCombinerInputsEFG, const DWORD PSFinalCombinerConstants); -}; - -constexpr DWORD MASK_NONE = 0x000; -constexpr DWORD MASK_R = 0x001; -constexpr DWORD MASK_G = 0x002; -constexpr DWORD MASK_B = 0x004; -constexpr DWORD MASK_A = 0x008; -constexpr DWORD MASK_RGB = MASK_R | MASK_G | MASK_B; -constexpr DWORD MASK_RGBA = MASK_R | MASK_G | MASK_B | MASK_A; - -enum - TArgumentType { - atInput, atOutput, atFinalCombiner -}; - -typedef struct _PSH_RECOMPILED_SHADER { - xbox::X_D3DPIXELSHADERDEF PSDef; - std::string NewShaderStr; - IDirect3DPixelShader* ConvertedHandle; -} PSH_RECOMPILED_SHADER, -*PPSH_RECOMPILED_SHADER; - -typedef struct _PSH_IMD_ARGUMENT { - PSH_ARGUMENT_TYPE Type; // For parameters: R, T, V or C For output : Discard, R, T or V - int16_t Address; // Register address - DWORD Mask; - PSH_ARG_MODIFIERs Modifiers; - float Multiplier; - - void SetConstValue(float Value); - float GetConstValue(); - bool UsesRegister(); - bool IsRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress); // overload; - bool IsRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress, DWORD aMask); // overload; - void SetRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress, DWORD aMask); - bool HasModifier(PSH_ARG_MODIFIER modifier); - bool SetScaleConstRegister(float factor, const PSH_RECOMPILED_SHADER& pRecompiled); - bool SetScaleBemLumRegister(D3DTEXTURESTAGESTATETYPE factor, int stage, const PSH_RECOMPILED_SHADER& pRecompiled); - std::string ToString(); - bool Decode(const DWORD Value, DWORD aMask, TArgumentType ArgumentType); - void Invert(); - void Negate(); -} PSH_IMD_ARGUMENT, -*PPSH_IMD_ARGUMENT; - -//TPSH_IMD_ARGUMENTArray = array[0..(MaxInt div SizeOf(PSH_IMD_ARGUMENT)) - 1] of PSH_IMD_ARGUMENT; -//PPSH_IMD_ARGUMENTs = ^TPSH_IMD_ARGUMENTArray; - -typedef struct _PSH_INTERMEDIATE_FORMAT { - int CombinerStageNr; - bool IsCombined; - PSH_OPCODE Opcode; - std::string CommentString; - PSH_INST_MODIFIER Modifier; - PSH_IMD_ARGUMENT Output[3]; // 3 = xmm* output count - PSH_IMD_ARGUMENT Parameters[7]; // 7 = xfc parameter count - - _PSH_INTERMEDIATE_FORMAT *Initialize(const PSH_OPCODE aOpcode); - std::string ToString(); - bool IsArithmetic(); - void ScaleOutput(float aFactor); - bool ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress); // overload; - bool ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, DWORD aMask); // overload; - bool ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, int& addressCount, int& total); // overload; - bool WritesToRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress); // overload; - bool WritesToRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, DWORD aMask); // overload; - void SwapParameter(const int Index1, const int Index2); - void XSwapOutput(); - bool MoveRemovableParametersRight(const int Index1, const int Index2); - bool XMoveNonRegisterOutputsRight(); - void XCopySecondOpcodeToFirst(const PSH_OPCODE aOpcode); - bool Decode(DWORD aCombinerStageNr, DWORD PSInputs, DWORD PSOutputs, DWORD aMask); - bool DecodeFinalCombiner(DWORD aPSFinalCombinerInputsABCD, DWORD aPSFinalCombinerInputsEFG); -} PSH_INTERMEDIATE_FORMAT, -*PPSH_INTERMEDIATE_FORMAT; - -struct PSH_XBOX_SHADER { - uint32_t m_PSVersion; // see D3DPS_VERSION - https://msdn.microsoft.com/en-us/library/windows/desktop/bb172592(v=vs.85).aspx - int MaxConstantFloatRegisters; - int MaxTemporaryRegisters; - int MaxSamplerRegisters; // Sampler (Direct3D 9 asm-ps) - int MaxTextureCoordinateRegisters; - int MaxInputColorRegisters; - int PSH_PC_MAX_REGISTER_COUNT; - - // Reserve enough slots for all shaders, so we need space for 2 constants, 5 lines per texture addressing codes and 10 lines per opcode : : - PSH_INTERMEDIATE_FORMAT Intermediate[2 + (xbox::X_D3DTS_STAGECOUNT * 5) + (xbox::X_PSH_COMBINECOUNT * 10) + 1]; - int IntermediateCount; - - PS_TEXTUREMODES PSTextureModes[xbox::X_D3DTS_STAGECOUNT]; - PS_DOTMAPPING PSDotMapping[xbox::X_D3DTS_STAGECOUNT]; - DWORD PSCompareMode[xbox::X_D3DTS_STAGECOUNT]; - int PSInputTexture[xbox::X_D3DTS_STAGECOUNT]; - - PS_FINALCOMBINERSETTING FinalCombinerFlags; - // Note : The following constants are only needed for PSH_XBOX_SHADER::DecodedToString, - // they are not involved in the actual pixel shader recompilation anymore : - RPSFinalCombiner FinalCombiner; - RPSCombinerStage Combiners[xbox::X_PSH_COMBINECOUNT]; - int NumberOfCombiners; - DWORD CombinerCountFlags; // For PS_COMBINERCOUNTFLAGS - // Read from CombinerCountFlags : - bool CombinerMuxesOnMsb; - bool CombinerHasUniqueC0; - bool CombinerHasUniqueC1; - - int StartPos; - - PSH_RECOMPILED_SHADER Recompiled = {}; - - void SetPSVersion(const uint32_t PSVersion); - - std::string ToString(); - void Log(const char *PhaseStr); - PPSH_INTERMEDIATE_FORMAT NewIntermediate(); - void InsertIntermediate(PPSH_INTERMEDIATE_FORMAT pIntermediate, int Index); - void DeleteIntermediate(int Index); - void DeleteLastIntermediate(); - std::string static OriginalToString(xbox::X_D3DPIXELSHADERDEF *pPSDef); - void Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef); - PSH_RECOMPILED_SHADER Convert(xbox::X_D3DPIXELSHADERDEF *pPSDef); - std::string DecodedToString(xbox::X_D3DPIXELSHADERDEF *pPSDef); - bool _NextIs2D(int Stage); - bool DecodeTextureModes(xbox::X_D3DPIXELSHADERDEF *pPSDef); - int GetTextureStageModifiers(int Stage); - void InsertTex3x2Instructions(int Stage, int inputStage, std::vector& InsertIns); - void InsertTex3x3Instructions(int Stage, int inputStage, std::vector& InsertIns); - bool InsertTextureModeInstruction(xbox::X_D3DPIXELSHADERDEF *pPSDef, int Stage, PSH_OPCODE opcode, std::vector& InsertIns, int& InsertPos); - bool MoveRemovableParametersRight(); - void ConvertXboxOpcodesToNative(xbox::X_D3DPIXELSHADERDEF *pPSDef); - void _SetColor(/*var OUT*/PSH_INTERMEDIATE_FORMAT &NewIns, D3DCOLOR ConstColor); - void _SetColor(/*var OUT*/PSH_INTERMEDIATE_FORMAT &NewIns, D3DCOLORVALUE ConstColor); - bool ConvertConstantsToNative(xbox::X_D3DPIXELSHADERDEF *pPSDef, /*var OUT*/PSH_RECOMPILED_SHADER *Recompiled); - bool RemoveUselessWrites(); - int MaxRegisterCount(PSH_ARGUMENT_TYPE aRegType); - bool IsValidNativeOutputRegister(PSH_ARGUMENT_TYPE aRegType, int index = -1); - int RegisterIsFreeFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress); - int RegisterIsUsedFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress); - int NextFreeRegisterFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int bIndex = -1, int startAddress = 0, int excludeAddress = -1); - bool IsRegisterFreeFromIndexOnwards(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress); - void ReplaceInputRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex = -1); - void ReplaceOutputRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex = -1); - void ReplaceRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex = -1, bool replaceInput = true, bool replaceOutput = true); - bool ConvertXMMToNative_Except3RdOutput(int i); - void ConvertXPSToNative(int i); - void ConvertXMMAToNative(int i); - void ConvertXMMCToNative(int i); - void ConvertXDMToNative(int i); - void ConvertXDDToNative(int i); - void ConvertXFCToNative(int i); - bool FixArgumentModifiers(); - bool CombineInstructions(); - bool RemoveNops(); - bool SimplifyMOV(PPSH_INTERMEDIATE_FORMAT Cur); - bool SimplifyADD(PPSH_INTERMEDIATE_FORMAT Cur); - bool SimplifyMAD(PPSH_INTERMEDIATE_FORMAT Cur, int index); - bool SimplifySUB(PPSH_INTERMEDIATE_FORMAT Cur); - bool SimplifyMUL(PPSH_INTERMEDIATE_FORMAT Cur); - bool SimplifyLRP(PPSH_INTERMEDIATE_FORMAT Cur, int index); - bool FixupCND(PPSH_INTERMEDIATE_FORMAT Cur, int index); - bool FixupPixelShader(); - bool FixInvalidSrcSwizzle(); - bool FixMissingR0a(); - bool FixMissingR1a(); - bool FixCoIssuedOpcodes(); - bool FixInvalidDstRegister(); - bool FixConstantParameters(); - bool FixInstructionModifiers(); - bool FixUninitializedReads(); - bool FixOverusedRegisters(); - bool FinalizeShader(); - - static void GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]); - static void GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]); - static void GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, DWORD psCompareModes[xbox::X_D3DTS_STAGECOUNT]); - static void GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]); -}; - -/* -* Blueshogun's code (useful for debugging the PixelShader binary format) -*/ - -// PS Texture Modes -char* PS_TextureModesStr[/*PS_TEXTUREMODES*/] = -{ - "PS_TEXTUREMODES_NONE", // 0x00 - "PS_TEXTUREMODES_PROJECT2D", // 0x01 - "PS_TEXTUREMODES_PROJECT3D", // 0x02 - "PS_TEXTUREMODES_CUBEMAP", // 0x03 - "PS_TEXTUREMODES_PASSTHRU", // 0x04 - "PS_TEXTUREMODES_CLIPPLANE", // 0x05 - "PS_TEXTUREMODES_BUMPENVMAP", // 0x06 - "PS_TEXTUREMODES_BUMPENVMAP_LUM", // 0x07 - "PS_TEXTUREMODES_BRDF", // 0x08 - "PS_TEXTUREMODES_DOT_ST", // 0x09 - "PS_TEXTUREMODES_DOT_ZW", // 0x0A - "PS_TEXTUREMODES_DOT_RFLCT_DIFF", // 0x0B - "PS_TEXTUREMODES_DOT_RFLCT_SPEC", // 0x0C - "PS_TEXTUREMODES_DOT_STR_3D", // 0x0D - "PS_TEXTUREMODES_DOT_STR_CUBE", // 0x0E - "PS_TEXTUREMODES_DPNDNT_AR", // 0x0F - "PS_TEXTUREMODES_DPNDNT_GB", // 0x10 - "PS_TEXTUREMODES_DOTPRODUCT", // 0x11 - "PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST", // 0x12 - "???", // 0x13 - "???", // 0x14 - "???", // 0x15 - "???", // 0x16 - "???", // 0x17 - "???", // 0x18 - "???", // 0x19 - "???", // 0x1A - "???", // 0x1B - "???", // 0x1C - "???", // 0x1D - "???", // 0x1E - "???", // 0x1F -}; - -// PS DotMapping -char* PS_DotMappingStr[/*PS_DOTMAPPING*/] = -{ - "PS_DOTMAPPING_ZERO_TO_ONE", // 0x00 - "PS_DOTMAPPING_MINUS1_TO_1_D3D", // 0x01 - "PS_DOTMAPPING_MINUS1_TO_1_GL", // 0x02 - "PS_DOTMAPPING_MINUS1_TO_1", // 0x03 - "PS_DOTMAPPING_HILO_1", // 0x04 - "???", // 0x05 - "???", // 0x06 - "PS_DOTMAPPING_HILO_HEMISPHERE", // 0x07 -}; - -#if 1 // array unusable for bitflags -// PS CompareMode -char* PS_CompareModeStr[/*PS_COMPAREMODE*/] = -{ - "PS_COMPAREMODE_S_LT", // 0x00L - "PS_COMPAREMODE_S_GE", // 0x01L - - "PS_COMPAREMODE_T_LT", // 0x00L - "PS_COMPAREMODE_T_GE", // 0x02L - - "???", - - "PS_COMPAREMODE_R_LT", // 0x00L - "PS_COMPAREMODE_R_GE", // 0x04L - - "???", - "???", - "???", - - "PS_COMPAREMODE_Q_LT", // 0x00L - "PS_COMPAREMODE_Q_GE", // 0x08L -}; -#endif - -#if 1 // array unfit for bitflags -// PS CombinerCountFlags -char* PS_CombinerCountFlagsStr[/*PS_COMBINERCOUNTFLAGS*/] = -{ - "PS_COMBINERCOUNT_MUX_LSB", // 0x0000L, // mux on r0.a lsb - "PS_COMBINERCOUNT_MUX_MSB", // 0x0001L, // mux on r0.a msb - - "PS_COMBINERCOUNT_SAME_C0", // 0x0000L, // c0 same in each stage - "PS_COMBINERCOUNT_UNIQUE_C0", // 0x0010L, // c0 unique in each stage - - "PS_COMBINERCOUNT_SAME_C1", // 0x0000L, // c1 same in each stage - "PS_COMBINERCOUNT_UNIQUE_C1", // 0x0100L // c1 unique in each stage -}; -#endif - -// PS InputMapping -std::string PS_InputMappingStr[/*PS_INPUTMAPPING*/] = -{ - "PS_INPUTMAPPING_UNSIGNED_IDENTITY", // 0x00L, // max(0,x) OK for final combiner: y = abs(x) - "PS_INPUTMAPPING_UNSIGNED_INVERT", // 0x20L, // 1 - max(0,x) OK for final combiner: y = 1 - x - "PS_INPUTMAPPING_EXPAND_NORMAL", // 0x40L, // 2*max(0,x) - 1 invalid for final combiner - "PS_INPUTMAPPING_EXPAND_NEGATE", // 0x60L, // 1 - 2*max(0,x) invalid for final combiner - "PS_INPUTMAPPING_HALFBIAS_NORMAL", // 0x80L, // max(0,x) - 1/2 invalid for final combiner - "PS_INPUTMAPPING_HALFBIAS_NEGATE", // 0xa0L, // 1/2 - max(0,x) invalid for final combiner - "PS_INPUTMAPPING_SIGNED_IDENTITY", // 0xc0L, // x invalid for final combiner - "PS_INPUTMAPPING_SIGNED_NEGATE", // 0xe0L, // -x invalid for final combiner -}; - -// PS Register (note, a few have one space, to line up the output a little) -std::string PS_RegisterStr[/*PS_REGISTER*/] = -{ - "PS_REGISTER_ZERO", // 0x00L, // r - "PS_REGISTER_DISCARD", // 0x00L, // w - "PS_REGISTER_C0 ", // 0x01L, // r - "PS_REGISTER_C1 ", // 0x02L, // r - "PS_REGISTER_FOG", // 0x03L, // r - "PS_REGISTER_V0 ", // 0x04L, // r/w - "PS_REGISTER_V1 ", // 0x05L, // r/w - "??", // 0x06 - "??", // 0x07 - "PS_REGISTER_T0 ", // 0x08L, // r/w - "PS_REGISTER_T1 ", // 0x09L, // r/w - "PS_REGISTER_T2 ", // 0x0aL, // r/w - "PS_REGISTER_T3 ", // 0x0bL, // r/w - "PS_REGISTER_R0 ", // 0x0cL, // r/w - "PS_REGISTER_R1 ", // 0x0dL, // r/w - "PS_REGISTER_V1R0_SUM", // 0x0eL, // r - "PS_REGISTER_EF_PROD", // 0x0fL, // r - - "PS_REGISTER_ONE", // PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // OK for final combiner - "PS_REGISTER_NEGATIVE_ONE", // PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // invalid for final combiner - "PS_REGISTER_ONE_HALF", // PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // invalid for final combiner - "PS_REGISTER_NEGATIVE_ONE_HALF" // PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // invalid for final combiner -}; - -// PS Channel -char* PS_ChannelStr[/*PS_CHANNEL*/] = -{ - "PS_CHANNEL_RGB", // 0x00, // used as RGB source - "PS_CHANNEL_BLUE", // 0x00, // used as ALPHA source - "PS_CHANNEL_ALPHA", // 0x10, // used as RGB or ALPHA source -}; - -// PS FinalCombinerSetting -char* PS_FinalCombinerSettingStr[/*PS_FINALCOMBINERSETTING*/] = -{ - "PS_FINALCOMBINERSETTING_CLAMP_SUM", // 0x80, // V1+R0 sum clamped to [0,1] - "PS_FINALCOMBINERSETTING_COMPLEMENT_V1", // 0x40, // unsigned invert mapping - "PS_FINALCOMBINERSETTING_COMPLEMENT_R0", // 0x20, // unsigned invert mapping -}; - -// PS CombineOutput -char* PS_CombineOutputStr[/*PS_COMBINEROUTPUT*/] = -{ - "PS_COMBINEROUTPUT_IDENTITY", // 0x00L, // y = x - "PS_COMBINEROUTPUT_BIAS", // 0x08L, // y = x - 0.5 - "PS_COMBINEROUTPUT_SHIFTLEFT_1", // 0x10L, // y = x*2 - "PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS", // 0x18L, // y = (x - 0.5)*2 = x*2 - 1.0 - "PS_COMBINEROUTPUT_SHIFTLEFT_2", // 0x20L, // y = x*4 - "PS_COMBINEROUTPUT_SHIFTRIGHT_1", // 0x30L, // y = x/2 = x*0.5 - - "PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA", // 0x80L, // RGB only - - "PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA", // 0x40L, // RGB only - - "PS_COMBINEROUTPUT_AB_MULTIPLY", // 0x00L, - "PS_COMBINEROUTPUT_AB_DOT_PRODUCT", // 0x02L, // RGB only - - "PS_COMBINEROUTPUT_CD_MULTIPLY", // 0x00L, - "PS_COMBINEROUTPUT_CD_DOT_PRODUCT", // 0x01L, // RGB only - - "PS_COMBINEROUTPUT_AB_CD_SUM", // 0x00L, // 3rd output is AB+CD - "PS_COMBINEROUTPUT_AB_CD_MUX", // 0x04L, // 3rd output is MUX(AB,CD) based on R0.a -}; - -// PS GlobalFlags -char* PS_GlobalFlagsStr[/*PS_GLOBALFLAGS*/] = -{ - "PS_GLOBALFLAGS_NO_TEXMODE_ADJUST", // 0x0000L, // don't adjust texture modes - "PS_GLOBALFLAGS_TEXMODE_ADJUST", // 0x0001L, // adjust texture modes according to set texture -}; - -const int CONST_NEG_ONE = -2; -const int CONST_NEG_HALF = -1; -const int CONST_ZERO = 0; -const int CONST_POS_HALF = 1; // Note : Instead of 0.5 we use 1 (so we can keep using integers) -const int CONST_POS_ONE = 2; - -/// - -std::string PSCombinerOutputFlagsToStr(const DWORD dwFlags, bool aIsAlpha = false) -{ - std::string Result = PS_CombineOutputStr[0 + ((dwFlags & 0x38) >> 3)]; - Result = Result + " | " + PS_CombineOutputStr[8 + ((dwFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) >> 1)]; - Result = Result + " | " + PS_CombineOutputStr[10 + ((dwFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) >> 0)]; - Result = Result + " | " + PS_CombineOutputStr[12 + ((dwFlags & PS_COMBINEROUTPUT_AB_CD_MUX) >> 2)]; - - if (!aIsAlpha) { - if (dwFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) - Result = Result + " | " + PS_CombineOutputStr[6]; - - if (dwFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) - Result = Result + " | " + PS_CombineOutputStr[7]; - } - - return Result; -} - -std::string PSFinalCombinerSettingToStr(const DWORD dwPS_FINALCOMBINERSETTING) -{ - std::string Result = ""; - if (dwPS_FINALCOMBINERSETTING & PS_FINALCOMBINERSETTING_CLAMP_SUM) - Result = Result + " | " + PS_FinalCombinerSettingStr[0]; - - if (dwPS_FINALCOMBINERSETTING & PS_FINALCOMBINERSETTING_COMPLEMENT_V1) - Result = Result + " | " + PS_FinalCombinerSettingStr[1]; - - if (dwPS_FINALCOMBINERSETTING & PS_FINALCOMBINERSETTING_COMPLEMENT_R0) - Result = Result + " | " + PS_FinalCombinerSettingStr[2]; - - if (!Result.empty()) - Result.erase(0, 3); - - return Result; -} - -/* PSH_IMD_ARGUMENT */ - -void PSH_IMD_ARGUMENT::SetConstValue(float Value) -{ - Type = PARAM_VALUE; - Address = CONST_ZERO; - Multiplier = Value; - Modifiers = 0; -} - -float PSH_IMD_ARGUMENT::GetConstValue() -{ - if (Type != PARAM_VALUE) { - // Anything other than a value-parameter returns a value never checked for : - return INFINITY; - } - - float Result = Multiplier; - - // y = 1-x -> 0..1 > 1..0 - if (HasModifier(ARGMOD_INVERT)) Result = 1.0f-Result; - - // y = -x -> 0..1 > 0..-1 - if (HasModifier(ARGMOD_NEGATE)) Result = -Result; - - // y = x-0.5 -> 0..1 > -0.5..0.5 - if (HasModifier(ARGMOD_BIAS)) Result = Result-0.5f; - - // y = x*2 -> 0..1 > 0..2 - if (HasModifier(ARGMOD_SCALE_X2)) Result = Result*2.0f; - - // y = (x*2)-1 -> 0..1 > -1..1 - if (HasModifier(ARGMOD_SCALE_BX2)) Result = (Result*2.0f)-1.0f; - - // y = x*4 -> 0..1 > 0..4 - if (HasModifier(ARGMOD_SCALE_X4)) Result = Result*4.0f; - - // y = x/2 -> 0..1 > 0..0.5 - if (HasModifier(ARGMOD_SCALE_D2)) Result = Result/2.0f; - - return Result; -} // GetConstValue - -bool PSH_IMD_ARGUMENT::UsesRegister() -{ - return (Type > PARAM_DISCARD); -} - -bool PSH_IMD_ARGUMENT::IsRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress) -{ - return (Type == aRegType) - && (Address == aAddress || aAddress == -1); -} - -bool PSH_IMD_ARGUMENT::IsRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress, DWORD aMask) -{ - return IsRegister(aRegType, aAddress) - // Check the mask itself, but also 'mask-less' : - && (((Mask & aMask) == aMask) || (Mask == 0)); -} - -void PSH_IMD_ARGUMENT::SetRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress, DWORD aMask) -{ - Type = aRegType; - Address = aAddress; - Mask = aMask; -} - -bool PSH_IMD_ARGUMENT::HasModifier(PSH_ARG_MODIFIER modifier) -{ - return (Modifiers & (1 << modifier)) != 0; -} - -bool PSH_IMD_ARGUMENT::SetScaleConstRegister(float factor, const PSH_RECOMPILED_SHADER& pRecompiled) -{ - PSH_ARG_MODIFIERs modifiers = 0; - DWORD mask = Mask; - int address = Address; - - const int mappedConstant0 = PSH_XBOX_CONSTANT_MUL0; - const int mappedConstant1 = PSH_XBOX_CONSTANT_MUL1; - - if (factor < 0.0f) - { - factor = -factor; - modifiers = (1 << ARGMOD_NEGATE); - // This inversion is here to support negative scales, but it's not an actual match yet. - } - - // Note : 'switch(factor)' can't be used here, since that requires an ordinal value (and factor is a float) - if (factor == 1.0f) - { - address = mappedConstant0; - mask = MASK_R; - } - - else if (factor == 2.0f) - { - address = mappedConstant0; - mask = MASK_G; - } - - else if (factor == 4.0f) - { - address = mappedConstant0; - mask = MASK_B; - } - - else if (factor == 8.0f) - { - address = mappedConstant0; - mask = MASK_A; - } - - else if (factor == 0.0f) - { - address = mappedConstant1; - mask = MASK_R; - } - - else if (factor == 1.0f / 2.0f) - { - address = mappedConstant1; - mask = MASK_G; - } - - else if (factor == 1.0f / 4.0f) - { - address = mappedConstant1; - mask = MASK_B; - } - - else if (factor == 1.0f / 8.0f) - { - address = mappedConstant1; - mask = MASK_A; - } - else return false; - - Type = PARAM_C; - Address = address; - Mask = mask; - Modifiers = modifiers; - Multiplier = 1.0f; - - return true; -} - -bool PSH_IMD_ARGUMENT::SetScaleBemLumRegister(D3DTEXTURESTAGESTATETYPE factor, int stage, const PSH_RECOMPILED_SHADER& pRecompiled) -{ - const PSH_ARG_MODIFIERs modifiers = 0; - DWORD mask = Mask; - int address = Address; - - const int mappedConstant0 = PSH_XBOX_CONSTANT_BEM + stage; - const int mappedConstant1 = PSH_XBOX_CONSTANT_LUM + stage; - - switch (factor) - { - case D3DTSS_BUMPENVMAT00: - { - address = mappedConstant0; - mask = MASK_R; - break; - } - case D3DTSS_BUMPENVMAT01: - { - address = mappedConstant0; - mask = MASK_G; - break; - } - case D3DTSS_BUMPENVMAT11: - { - address = mappedConstant0; - mask = MASK_B; - break; - } - case D3DTSS_BUMPENVMAT10: - { - address = mappedConstant0; - mask = MASK_A; - break; - } - case D3DTSS_BUMPENVLSCALE: - { - address = mappedConstant1; - mask = MASK_R; - break; - } - case D3DTSS_BUMPENVLOFFSET: - { - address = mappedConstant1; - mask = MASK_G; - break; - } - default: return false; - } - - Type = PARAM_C; - Address = address; - Mask = mask; - Modifiers = modifiers; - Multiplier = 1.0f; - - return true; -} - -std::string PSH_IMD_ARGUMENT::ToString() -{ - std::string Result; - - if (Type == PARAM_VALUE) - { - Result = std::to_string(GetConstValue()); - if (Result.find(".") > 0) - Result = Result + 'f'; - - return Result; - } - - Result = PSH_ARGUMENT_TYPE_Str[Type]; - - if (Type >= PARAM_R) - Result = Result + std::to_string(Address); - - if (UsesRegister()) - { - for (DWORD Modifier = ARGMOD_IDENTITY; Modifier < ARGMOD_BLUE_REPLICATE; Modifier++) - if (HasModifier((PSH_ARG_MODIFIER)Modifier)) { - char buffer[256]; - Result = std::string(buffer, sprintf(buffer, PSH_ARG_MODIFIER_Str[Modifier], Result.c_str())); - } - - if ((Mask > 0) && (Mask != MASK_RGBA)) - { - Result = Result + '.'; - if ((Mask & MASK_R) > 0) Result = Result + 'r'; - if ((Mask & MASK_G) > 0) Result = Result + 'g'; - if ((Mask & MASK_B) > 0) Result = Result + 'b'; - if ((Mask & MASK_A) > 0) Result = Result + 'a'; - } - } - return Result; -} // ToString - -bool PSH_IMD_ARGUMENT::Decode(const DWORD Value, DWORD aMask, TArgumentType ArgumentType) -{ - PS_REGISTER Reg; - PS_INPUTMAPPING InputMapping; - PS_CHANNEL Channel; - - bool Result = true; - Address = 0; - Mask = aMask; - Modifiers = (1 << ARGMOD_IDENTITY); - Multiplier = 1.0; - - // Determine PS_REGISTER for this argument type : - { - Reg = (PS_REGISTER)(Value & 0xF); - if (ArgumentType == atOutput) - { - // Output arguments may not write to C0 or C1, prevent that : - if ((Reg == PS_REGISTER_C0) || (Reg == PS_REGISTER_C1)) - Reg = PS_REGISTER_CXBX_PROD; // unhandled case - will reach "invalid" else-block - } - else - { - // Input arguments (normal or final combiners) can use the extended PS_REGISTER values : - if (Reg == PS_REGISTER_ZERO) - Reg = (PS_REGISTER)(Value & 0xE0); - - // 'Signed Identity' flag on PS_REGISTER_ZERO has no meaning, treat as zero : - if (Reg == PS_REGISTER_CXBX_PROD) - Reg = PS_REGISTER_ZERO; - - // Prevent decoding final combiner registers outside that mode : - if (ArgumentType != atFinalCombiner) - if ((Reg == PS_REGISTER_FOG) || (Reg == PS_REGISTER_V1R0_SUM) || (Reg == PS_REGISTER_EF_PROD)) - Reg = PS_REGISTER_CXBX_PROD; // unhandled case - will reach "invalid" else-block - } - } - - switch (Reg) { - case PS_REGISTER_ZERO: - { - if (ArgumentType == atOutput) - { - // Mark output arguments as 'discard' and return that fact : - Type = PARAM_DISCARD; - Result = false; - } - else - Type = PARAM_VALUE; - - Address = CONST_ZERO; - Multiplier = 0.0f; - break; - } - case PS_REGISTER_C0: - Type = PARAM_C; - break; - case PS_REGISTER_C1: - { - Type = PARAM_C; - Address = 1; - break; - } - case PS_REGISTER_V0: - Type = PARAM_V; - break; - case PS_REGISTER_V1: - { - Type = PARAM_V; - Address = 1; - break; - } - case PS_REGISTER_T0: - Type = PARAM_T; - break; - case PS_REGISTER_T1: - { - Type = PARAM_T; - Address = 1; - break; - } - case PS_REGISTER_T2: - { - Type = PARAM_T; - Address = 2; - break; - } - case PS_REGISTER_T3: - { - Type = PARAM_T; - Address = 3; - break; - } - case PS_REGISTER_R0: - Type = PARAM_R; - break; - case PS_REGISTER_R1: - { - Type = PARAM_R; - Address = 1; - break; - } - // Registers only available when ArgumentType != atOutput (Reg is capped otherwise) : - case PS_REGISTER_ONE: - { - Type = PARAM_VALUE; - Address = CONST_POS_ONE; - Multiplier = 1.0f; - break; - } - case PS_REGISTER_NEGATIVE_ONE: - { - Type = PARAM_VALUE; - Address = CONST_NEG_ONE; - Multiplier = -1.0f; - break; - } - case PS_REGISTER_ONE_HALF: - { - Type = PARAM_VALUE; - Address = CONST_POS_HALF; - Multiplier = 0.5f; - break; - } - case PS_REGISTER_NEGATIVE_ONE_HALF: - { - Type = PARAM_VALUE; - Address = CONST_NEG_HALF; - Multiplier = -0.5f; - break; - } - // Registers only available when ArgumentType == atFinalCombiner (Reg is capped otherwise) : - case PS_REGISTER_FOG: - Type = PARAM_FOG; - break; - case PS_REGISTER_V1R0_SUM: - Type = PARAM_V1R0_SUM; - break; - case PS_REGISTER_EF_PROD: - Type = PARAM_EF_PROD; - break; - default : - EmuLog(LOG_LEVEL::DEBUG, "INVALID ARGUMENT!"); - - Result = false; - } - - // We're done if this decoding is meant for output parameters, - // or when the input is a value-parameter (already read above) : - if ((ArgumentType == atOutput) - || (Type == PARAM_VALUE) ) - return Result; - - // Handle the Channel Designator : - { - Channel = (PS_CHANNEL)(Value & PS_CHANNEL_ALPHA); - if (Channel == PS_CHANNEL_ALPHA) - // Input comes from alpha portion of input register (valid for both RGB and alpha portions) : - Mask = MASK_A; - else // = PS_CHANNEL_BLUE (for Alpha step) = PS_CHANNEL_BLUE (for RGB step) : - if (aMask == MASK_A) - // Input comes from b portion of input register (valid for alpha portion only) : - Mask = MASK_B; // Note : This is not the same as ARGMOD_BLUE_REPLICATE! - else - // Input comes from the RGB portion of the input register (valid for RGB portion only) : - Mask = aMask; // Note : Is already put here, but makes this code clearer - } - - InputMapping = (PS_INPUTMAPPING)(Value & 0xe0); - -// ARGMOD_BIAS, -// -// ARGMOD_SCALE_X2, ARGMOD_SCALE_BX2, ARGMOD_SCALE_X4, ARGMOD_SCALE_D2, -// -// ARGMOD_SATURATE, -// -// ARGMOD_ALPHA_REPLICATE, ARGMOD_BLUE_REPLICATE]; - - switch (InputMapping) { - case PS_INPUTMAPPING_UNSIGNED_IDENTITY: - Modifiers = (1 << ARGMOD_IDENTITY); - break; - case PS_INPUTMAPPING_UNSIGNED_INVERT: - Modifiers = (1 << ARGMOD_INVERT); - break; - case PS_INPUTMAPPING_EXPAND_NORMAL: - { - Modifiers = (1 << ARGMOD_SCALE_BX2); - Multiplier = 2.0f * Multiplier; - break; - } - case PS_INPUTMAPPING_EXPAND_NEGATE: - { - Modifiers = (1 << ARGMOD_NEGATE); - Multiplier = -Multiplier; - break; - } - case PS_INPUTMAPPING_HALFBIAS_NORMAL: - Modifiers = (1 << ARGMOD_BIAS); - break; -// case PS_INPUTMAPPING_HALFBIAS_NEGATE: -// Modifiers = (1 << ARGMOD_IDENTITY); ??? -// break; - case PS_INPUTMAPPING_SIGNED_IDENTITY: - Modifiers = (1 << ARGMOD_IDENTITY); - break; - case PS_INPUTMAPPING_SIGNED_NEGATE: - { - Modifiers = (1 << ARGMOD_NEGATE); - Multiplier = -Multiplier; - break; - } - } - return Result; -} // Decode - -void PSH_IMD_ARGUMENT::Invert() -{ - if (!HasModifier(ARGMOD_INVERT)) - Modifiers = Modifiers | (1 << ARGMOD_INVERT); - else - Modifiers = Modifiers & ~(1 << ARGMOD_INVERT); -} - -void PSH_IMD_ARGUMENT::Negate() -{ - if (!HasModifier(ARGMOD_NEGATE)) - Modifiers = Modifiers | (1 << ARGMOD_NEGATE); - else - Modifiers = Modifiers & ~(1 << ARGMOD_NEGATE); -} - -/* PSH_INTERMEDIATE_FORMAT */ - -_PSH_INTERMEDIATE_FORMAT *PSH_INTERMEDIATE_FORMAT::Initialize(const PSH_OPCODE aOpcode) -{ - int i; - - Opcode = aOpcode; - Modifier = INSMOD_NONE; - for (i = 0; i < 3; i++) - { - Output[i] = {}; - Output[i].Multiplier = 1.0f; - } - for (i = 0; i < 7; i++) - { - Parameters[i] = {}; - Parameters[i].Multiplier = 1.0f; - } - - return this; -} - -std::string PSH_INTERMEDIATE_FORMAT::ToString() -{ - std::string Result = {}; - int i; - char SeparatorChar; - - switch (Opcode) { - case PO_COMMENT: - { - Result = "; " + CommentString; - return Result; - } - case PO_PS: { - // 1.1 allows reading from 2 textures (which we use in 'cnd') and reading from the .b (blue) channel - // 1.3 allows the use of texm3x2depth (which can occur sometimes) - // 2.0 allows up to r12, c32, t8 and s16 (requires Direct3D9) - // 3.0 allows up to r32, c224, v10 (instead of t via dcl), s16 and vFace (which can do two-sided lighting) - - // Use supplied pixel shader version (if any is given) - DWORD PSVersion = Parameters[6].Mask; - - Result = "ps_" + std::to_string(D3DSHADER_VERSION_MAJOR(PSVersion)) - + "_" + std::to_string(D3DSHADER_VERSION_MINOR(PSVersion)); - return Result; - } - case PO_XPS: { - Result = "xps.1.1"; - return Result; - } - } - - if (IsCombined) - Result = "+"; - else - Result = ""; - - Result = Result + PSH_OPCODE_DEFS[Opcode].mn + PSH_INST_MODIFIER_Str[Modifier]; - - // Output a comma-separated list of output registers : - SeparatorChar = ' '; - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._Out; i++) - { - Result = Result + SeparatorChar + Output[i].ToString(); - SeparatorChar = ','; - } - - // If this opcode has both output and input, put a space between them : - if ((PSH_OPCODE_DEFS[Opcode]._Out > 0) && (PSH_OPCODE_DEFS[Opcode]._In > 0)) - { - Result = Result + ","; - SeparatorChar = ' '; - } - - // Output a comma-separated list of parameters : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - { - Result = Result + SeparatorChar + Parameters[i].ToString(); - SeparatorChar = ','; - } - - if ((!CommentString.empty()) - || (PSH_OPCODE_DEFS[Opcode].note != "")) - Result = Result + " ; " + PSH_OPCODE_DEFS[Opcode].note + " " + CommentString; - - return Result; -} // ToString - -bool PSH_INTERMEDIATE_FORMAT::IsArithmetic() -{ - return (Opcode >= PO_ADD); -} - -void PSH_INTERMEDIATE_FORMAT::ScaleOutput(float aFactor) -{ - assert(aFactor > 0.0f); - - if (aFactor == 1.0f) - return; - - if (aFactor == 0.5f) - { - // Half the output modifier : - switch (Modifier) { - case INSMOD_X8: - Modifier = INSMOD_X4; - break; - case INSMOD_X4: - Modifier = INSMOD_X2; - break; - case INSMOD_X2: - Modifier = INSMOD_NONE; - break; - case INSMOD_NONE: - Modifier = INSMOD_D2; - break; - case INSMOD_D2: - Modifier = INSMOD_D4; - break; - case INSMOD_D4: - Modifier = INSMOD_D8; - break; - } - - return; - } - - if (aFactor == 2.0f) - { - // Double the output modifier : - switch (Modifier) { - case INSMOD_D8: - Modifier = INSMOD_D4; - break; - case INSMOD_D4: - Modifier = INSMOD_D2; - break; - case INSMOD_D2: - Modifier = INSMOD_NONE; - break; - case INSMOD_NONE: - Modifier = INSMOD_X2; - break; - case INSMOD_X2: - Modifier = INSMOD_X4; - break; - case INSMOD_X4: - Modifier = INSMOD_X8; - break; - } - - return; - } -} - -bool PSH_INTERMEDIATE_FORMAT::ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress) // overload; -{ - int i; - bool Result; - - // Check all parameters : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - { - // Check if one of them reads from the given register : - Result = Parameters[i].IsRegister(aRegType, aAddress); - if (Result) - return true; - } - - return false; -} - -bool PSH_INTERMEDIATE_FORMAT::ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, DWORD aMask) // overload; -{ - int i; - bool Result; - - // Check all parameters : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - { - // Check if one of them reads from the given register : - Result = Parameters[i].IsRegister(aRegType, aAddress, aMask); - if (Result) - return true; - } - - return false; -} - -// Used to determine the number of accesses to a register type within an instruction -// For use when determining register access limitations on certain instructions -// addressCount = the number of different registers read of the specified type -// total = the number of accesses to the spcified register type -bool PSH_INTERMEDIATE_FORMAT::ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, int& addressCount, int& total) // overload; -{ - int i; - bool Result; - bool RegisterUsage[256] = { false }; - - addressCount = 0; - total = 0; - - // Check all parameters : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - { - // Check if one of them reads from the given register : - Result = Parameters[i].IsRegister(aRegType, aAddress, 0); - if (Result) - { - ++total; - if (!RegisterUsage[Parameters[i].Address]) - { - RegisterUsage[Parameters[i].Address] = true; - ++addressCount; - } - } - } - - return total > 0; -} - -bool PSH_INTERMEDIATE_FORMAT::WritesToRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress) // overload; -{ - int i; - bool Result; - - // Check the output : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._Out; i++) - { - // Check if one of them writes to the given register : - Result = Output[i].IsRegister(aRegType, aAddress); - if (Result) - return true; - } - - return false; -} - -bool PSH_INTERMEDIATE_FORMAT::WritesToRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, DWORD aMask) // overload; -{ - int i; - bool Result; - - // Check the output : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._Out; i++) - { - // Check if one of them writes to the given register : - Result = Output[i].IsRegister(aRegType, aAddress, aMask); - if (Result) - return true; - } - - return false; -} - -void PSH_INTERMEDIATE_FORMAT::SwapParameter(const int Index1, const int Index2) -// Swaps two parameters. -{ - PSH_IMD_ARGUMENT TmpParameters; - - TmpParameters = Parameters[Index1]; - Parameters[Index1] = Parameters[Index2]; - Parameters[Index2] = TmpParameters; -} - -void PSH_INTERMEDIATE_FORMAT::XSwapOutput() -// Swaps the two outputs, along with their arguments. Applies only to Xbox opcodes. -{ - PSH_IMD_ARGUMENT TmpOutput; - - // Swap output 0 with 1 : - TmpOutput = Output[0]; - Output[0] = Output[1]; - Output[1] = TmpOutput; - - // Swap parameters 0 with 2 and 1 with 3 : - SwapParameter(0, 2); - SwapParameter(1, 3); -} - -bool PSH_INTERMEDIATE_FORMAT::MoveRemovableParametersRight(const int Index1, const int Index2) -// Swaps discarded (and const) parameters to the right position, to ease later conversions. -{ - bool Result = false; - - if ( (!Parameters[Index1].UsesRegister()) - && (Parameters[Index2].UsesRegister())) - { - SwapParameter(Index1, Index2); - Result = true; - } - return Result; -} - -bool PSH_INTERMEDIATE_FORMAT::XMoveNonRegisterOutputsRight() -// Swap discards and constants to the right position, to ease later conversions. Applies only to Xbox opcodes. -{ - bool Result = false; - - // First, check if the left output is discarded, while the second isn't : - if ( (!Output[0].UsesRegister()) - && (Output[1].UsesRegister())) - { - // Swap the outputs, so the discarded version is positioned rightmost : - XSwapOutput(); - Result = true; - } - - // Also try to swap the parameters to the first operation : - if (MoveRemovableParametersRight(0, 1)) - Result = true; - - // Idem for the parameters to second operation : - if (MoveRemovableParametersRight(2, 3)) - Result = true; - return Result; -} - -void PSH_INTERMEDIATE_FORMAT::XCopySecondOpcodeToFirst(const PSH_OPCODE aOpcode) -// Copies second opcode to first position, changing the opcode type on the fly. -{ - Opcode = aOpcode; - Output[0] = Output[1]; - Parameters[0] = Parameters[2]; - Parameters[1] = Parameters[3]; -} - -bool PSH_INTERMEDIATE_FORMAT::Decode(DWORD aCombinerStageNr, DWORD PSInputs, DWORD PSOutputs, DWORD aMask) -{ - DWORD CombinerOutputFlags; - int i; - - bool Result = false; - CombinerStageNr = aCombinerStageNr; - IsCombined = aMask == MASK_A; - - // Decode first two outputs : - if (Output[0].Decode((PSOutputs >> 4) & 0xF, aMask, atOutput)) - Result = true; - if (Output[1].Decode((PSOutputs >> 0) & 0xF, aMask, atOutput)) - Result = true; - - // Get the combiner output flags : - CombinerOutputFlags = (PS_COMBINEROUTPUT)(PSOutputs >> 12); - - // Use that to choose between the four possible operations : - // - xdd (dot/dot/discard) > calculating AB=A.B and CD=C.D - // - xdm (dot/mul/discard) > calculating AB=A.B and CD=C*D - // - xmmc (mul/mul/mux) > calculating AB=A*B and CD=C*D and Mux=AB?CD - // - xmma (mul/mul/sum) > calculating AB=A*B and CD=C*D and Sum=AB+CD - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) > 0) // false=Multiply, true=DotProduct - { - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0) // false=Multiply, true=DotProduct - Opcode = PO_XDD; - else - Opcode = PO_XDM; - - // Note : All arguments are already in-place for these two opcodes. - - // No 3rd output; Assert that (PSOutputs >> 8) & 0xF == PS_REGISTER_DISCARD ? - } - else - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0) // false=Multiply, true=DotProduct - { - // The first operation is a multiply, but the second is a dot-product; - // There's no opcode for that, but we can reverse the two and still use XDM : - Opcode = PO_XDM; - XSwapOutput(); - - // No 3rd output; Assert that (PSOutputs >> 8) & 0xF == PS_REGISTER_DISCARD ? - } - else - { - if (/*AB_CD_SUM=*/(CombinerOutputFlags & PS_COMBINEROUTPUT_AB_CD_MUX) == 0) // true=AB+CD, false=MUX(AB,CD) based on R0.a - Opcode = PO_XMMA; - else - Opcode = PO_XMMC; - - // This has a 3rd output, set that already : - if (Output[2].Decode((PSOutputs >> 8) & 0xF, aMask, atOutput)) - Result = true; - } - - if (Result) - { - // Handle the Output Mapping : - switch (CombinerOutputFlags & 0x38) { - case PS_COMBINEROUTPUT_BIAS: Modifier = INSMOD_BIAS; break; // TODO : Fixup occurrances! - case PS_COMBINEROUTPUT_SHIFTLEFT_1: Modifier = INSMOD_X2; break; - case PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS: Modifier = INSMOD_BX2; break; // TODO : Fixup occurrances! - case PS_COMBINEROUTPUT_SHIFTLEFT_2: Modifier = INSMOD_X4; break; - case PS_COMBINEROUTPUT_SHIFTRIGHT_1: Modifier = INSMOD_D2; break; - default /*PS_COMBINEROUTPUT_IDENTITY*/: Modifier = INSMOD_NONE; break; - } - - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) > 0) // false=Alpha-to-Alpha, true=Blue-to-Alpha - { - // Note : The effect of this flag is not entirely clear - blue to alpha itself is an easy to understand operation, - // but on what output does it operate? AB? or the mux_sum destination register (which doesn't occur when a dot - // operation is executed)? What if AB is discarded, but AB+CD is registered? Also, what happens to the other - // color channels (R,G and A) in that register? The docs seem to imply that AB itself is not changed (as they - // state that the alpha portion is not necessarily discarded), which would mean that only the mux_sum output - // is influenced, but that would imply that this flag has no effect for dot-products (XDD or XDM)... - // And if this is true, how do the blue-to-alpha flags behave if present on both AB and CD? - - // TODO : Rayman does this in some shaders, requires a fixup (as output.b is incorrect and not allowed) - // TODO: Above may not be valid anymore, needs testing - Output[0].Modifiers = Output[0].Modifiers | (1 << ARGMOD_BLUE_REPLICATE); - CommentString += ", d0.a=d0.b"; - } - - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) > 0) // false=Alpha-to-Alpha, true=Blue-to-Alpha - { - Output[1].Modifiers = Output[1].Modifiers | (1 << ARGMOD_BLUE_REPLICATE); - CommentString += ", d1.a=d1.b"; - } - - // Decode all four inputs : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - Parameters[i].Decode((PSInputs >> ((3-i) * 8)) & 0xFF, aMask, atInput); - } - return Result; -} // Decode - -bool PSH_INTERMEDIATE_FORMAT::DecodeFinalCombiner(DWORD aPSFinalCombinerInputsABCD, DWORD aPSFinalCombinerInputsEFG) -{ - int i; -// Note : The sign bit is lost upon input to the final combiner! - -// The final combiner performs the following operations : -// -// prod register = E*F // PS_REGISTER_EF_PROD, useable in A,B,C,D,G -// -// rgbout = A*B + (1-A)*C + D // lrp tmp.rgb, A, B, C // Note : tmp can be r0 if [A,B,C,D] * r0 = [] -// // add r0.rgb, tmp.rgb, D.rgb // Otherwise use a writable register from A;B or C -// -// alphaout = G.a // mov r0.a, G.a // Not necessary if G = r0 -// -// (also the final combiner can read PS_REGISTER_V1R0_SUM, which is equal to v1 + r0) -// Normal optimizations apply, like when A = PS_REGISTER_ZERO, all we have left is C + D (add r0.rgb, C.rgb, D.rgb) -// Also, if D = PS_REGISTER_ZERO, the add can be changed into a mov (if the result isn't already in r0.rgb) - - // Note : Previously, XSokoban lost it's font rendering when the final combiner was emitted, - // when disabled, the font reappeared (in various colors). This was because constants where - // not properly set locally. - - Opcode = PO_XFC; - CombinerStageNr = XFC_COMBINERSTAGENR; - - // Decode A,B,C and D : - for (i = 0; i < 4; i++) - Parameters[i].Decode((aPSFinalCombinerInputsABCD >> ((3-i) * 8)) & 0xFF, MASK_RGB/*?*/, atFinalCombiner); - - // Decode E,F and G : - for (i = 0; i < 3; i++) - Parameters[4+i].Decode((aPSFinalCombinerInputsEFG >> ((3-i) * 8)) & 0xFF, MASK_RGB/*?*/, atFinalCombiner); - - return true; -} - -/* PSH_XBOX_SHADER */ - -void PSH_XBOX_SHADER::SetPSVersion(const uint32_t PSVersion) -{ - m_PSVersion = PSVersion; - - // Source : https://en.wikipedia.org/wiki/High-Level_Shading_Language#Pixel_shader_comparison - if (m_PSVersion >= D3DPS_VERSION(4, 0)) { - MaxInputColorRegisters = 32; - MaxTemporaryRegisters = 4096; - MaxConstantFloatRegisters = 16*4096; - MaxSamplerRegisters = 16; - MaxTextureCoordinateRegisters = 0; // In shader model 4 and up, Dependent texture limit (T) is unlimited - // Note : Input Registers (v#) are now fully floating point and the Texture Coordinate Registers (t#) have been consolidated into it. - - PSH_PC_MAX_REGISTER_COUNT = 16 * 4096; - } - else if (m_PSVersion >= D3DPS_VERSION(3, 0)) { - // Source https://msdn.microsoft.com/en-us/library/windows/desktop/bb172920(v=vs.85).aspx - MaxInputColorRegisters = 10; - MaxTemporaryRegisters = 32; - MaxConstantFloatRegisters = 224; - MaxSamplerRegisters = 16; - MaxTextureCoordinateRegisters = 0; // In shader model 3 and up, Dependent texture limit (T) is unlimited - - PSH_PC_MAX_REGISTER_COUNT = 224; - } - else if (m_PSVersion >= D3DPS_VERSION(2, 0)) { - // Source https://msdn.microsoft.com/en-us/library/windows/desktop/bb172918(v=vs.85).aspx - MaxInputColorRegisters = 2; - MaxTemporaryRegisters = 12; // 12 min/32 max: The number of r# registers is determined by D3DCAPS9.D3DPSHADERCAPS2_0.NumTemps (which ranges from 12 to 32). - MaxConstantFloatRegisters = 32; - MaxSamplerRegisters = 16; - MaxTextureCoordinateRegisters = 8; - - PSH_PC_MAX_REGISTER_COUNT = 32; - } - else - assert(false); // We no longer support less than Direct3D 9 - /* For documentation purposes, keep the below information around : - else if (m_PSVersion >= D3DPS_VERSION(1, 4)) { - // Source https://msdn.microsoft.com/en-us/library/windows/desktop/bb172917(v=vs.85).aspx - MaxConstantFloatRegisters = 8; - MaxTemporaryRegisters = 6; - MaxTextureCoordinateRegisters = 4; - MaxInputColorRegisters = 2; // 2 in phase 2 - MaxSamplerRegisters = 0; // Not yet in shader model 1 - - PSH_PC_MAX_REGISTER_COUNT = 8; - } - else if (m_PSVersion >= D3DPS_VERSION(1, 3)) { - MaxConstantFloatRegisters = 8; - MaxTemporaryRegisters = 2; - MaxTextureCoordinateRegisters = 4; - MaxInputColorRegisters = 2; - MaxSamplerRegisters = 0; // Not yet in shader model 1 - - PSH_PC_MAX_REGISTER_COUNT = 8; - } - else if (m_PSVersion >= D3DPS_VERSION(1, 2)) { - MaxConstantFloatRegisters = 8; - MaxTemporaryRegisters = 2; - MaxTextureCoordinateRegisters = 4; - MaxInputColorRegisters = 2; - MaxSamplerRegisters = 0; // Not yet in shader model 1 - - PSH_PC_MAX_REGISTER_COUNT = 8; - } - else { - // m_PSVersion >= D3DPS_VERSION(1, 1) - MaxConstantFloatRegisters = 8; - MaxTemporaryRegisters = 2; - MaxTextureCoordinateRegisters = 4; // Some sources say 2? - MaxInputColorRegisters = 2; - MaxSamplerRegisters = 0; // Not yet in shader model 1 - - PSH_PC_MAX_REGISTER_COUNT = 8; - } */ -} - -std::string PSH_XBOX_SHADER::ToString() -{ - std::string Result; - int i; - - for (i = 0; i < IntermediateCount; i++) - Result = Result + Intermediate[i].ToString() + "\n"; - - return Result; -} - -void PSH_XBOX_SHADER::Log(const char *PhaseStr) -{ - //if (MayLog(lfUnit)) - { - EmuLog(LOG_LEVEL::DEBUG, "New decoding - %s :", PhaseStr); - EmuLog(LOG_LEVEL::DEBUG, "%s", ToString().c_str()); - } -} - -PPSH_INTERMEDIATE_FORMAT PSH_XBOX_SHADER::NewIntermediate() -{ - PPSH_INTERMEDIATE_FORMAT Result = &Intermediate[IntermediateCount]; - Result->Initialize(PO_COMMENT); - ++IntermediateCount; - return Result; -} - -void PSH_XBOX_SHADER::InsertIntermediate(PPSH_INTERMEDIATE_FORMAT pIntermediate, int Index) -{ - int i; - i = IntermediateCount - 1; - while (i >= Index) - { - Intermediate[i + 1] = Intermediate[i]; - --i; - } - - Intermediate[Index] = *pIntermediate; - ++IntermediateCount; -} - -void PSH_XBOX_SHADER::DeleteIntermediate(int Index) -{ - int i; - for (i = Index; i < IntermediateCount - 1; i++) - Intermediate[i] = Intermediate[i + 1]; - - --IntermediateCount; -} - -void PSH_XBOX_SHADER::DeleteLastIntermediate() -{ - if (IntermediateCount > 0) - DeleteIntermediate(IntermediateCount - 1); -} - -std::string PSH_XBOX_SHADER::OriginalToString(xbox::X_D3DPIXELSHADERDEF *pPSDef) // static -{ - char buffer[4096]; - return std::string(buffer, sprintf(buffer, "PSAphaInputs[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSFinalCombinerInputsABCD = 0x%.08X\n" - "PSFinalCombinerInputsEFG = 0x%.08X\n" - "PSConstant0[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSConstant1[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSAlphaOutputs[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSRGBInputs[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSCompareMode = 0x%.08X\n" - "PSFinalCombinerConstant0 = 0x%.08X\n" - "PSFinalCombinerConstant1 = 0x%.08X\n" - "PSRGBOutputs[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSCombinerCount = 0x%.08X\n" - "PSTextureModes = 0x%.08X\n" - "PSDotMapping = 0x%.08X\n" - "PSInputTexture = 0x%.08X\n" - "PSC0Mapping = 0x%.08X\n" - "PSC1Mapping = 0x%.08X\n" - "PSFinalCombinerConstants = 0x%.08X\n", - pPSDef->PSAlphaInputs[0], pPSDef->PSAlphaInputs[1], pPSDef->PSAlphaInputs[2], pPSDef->PSAlphaInputs[3], - pPSDef->PSAlphaInputs[4], pPSDef->PSAlphaInputs[5], pPSDef->PSAlphaInputs[6], pPSDef->PSAlphaInputs[7], - pPSDef->PSFinalCombinerInputsABCD, - pPSDef->PSFinalCombinerInputsEFG, - pPSDef->PSConstant0[0], pPSDef->PSConstant0[1], pPSDef->PSConstant0[2], pPSDef->PSConstant0[3], - pPSDef->PSConstant0[4], pPSDef->PSConstant0[5], pPSDef->PSConstant0[6], pPSDef->PSConstant0[7], - pPSDef->PSConstant1[0], pPSDef->PSConstant1[1], pPSDef->PSConstant1[2], pPSDef->PSConstant1[3], - pPSDef->PSConstant1[4], pPSDef->PSConstant1[5], pPSDef->PSConstant1[6], pPSDef->PSConstant1[7], - pPSDef->PSAlphaOutputs[0], pPSDef->PSAlphaOutputs[1], pPSDef->PSAlphaOutputs[2], pPSDef->PSAlphaOutputs[3], - pPSDef->PSAlphaOutputs[4], pPSDef->PSAlphaOutputs[5], pPSDef->PSAlphaOutputs[6], pPSDef->PSAlphaOutputs[7], - pPSDef->PSRGBInputs[0], pPSDef->PSRGBInputs[1], pPSDef->PSRGBInputs[2], pPSDef->PSRGBInputs[3], - pPSDef->PSRGBInputs[4], pPSDef->PSRGBInputs[5], pPSDef->PSRGBInputs[6], pPSDef->PSRGBInputs[7], - pPSDef->PSCompareMode, - pPSDef->PSFinalCombinerConstant0, - pPSDef->PSFinalCombinerConstant1, - pPSDef->PSRGBOutputs[0], pPSDef->PSRGBOutputs[1], pPSDef->PSRGBOutputs[2], pPSDef->PSRGBOutputs[3], - pPSDef->PSRGBOutputs[4], pPSDef->PSRGBOutputs[5], pPSDef->PSRGBOutputs[6], pPSDef->PSRGBOutputs[7], - pPSDef->PSCombinerCount, - XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES), /* pPSDef->PSTextureModes is stored in a different place than pPSDef*/ - pPSDef->PSDotMapping, - pPSDef->PSInputTexture, - pPSDef->PSC0Mapping, - pPSDef->PSC1Mapping, - pPSDef->PSFinalCombinerConstants)); -} - -void PSH_XBOX_SHADER::GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]) -{ - for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) - { - psTextureModes[i] = (PS_TEXTUREMODES)((XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> (i * 5)) & 0x1F); - } -} - -void PSH_XBOX_SHADER::GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]) -{ - psDotMapping[0] = (PS_DOTMAPPING)(0); - psDotMapping[1] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> 0) & 0x7); - psDotMapping[2] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> 4) & 0x7); - psDotMapping[3] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> 8) & 0x7); -} - -void PSH_XBOX_SHADER::GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, DWORD psCompareModes[xbox::X_D3DTS_STAGECOUNT]) -{ - for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) - { - psCompareModes[i] = (pPSDef->PSCompareMode >> (i * 4)) & 0xF; - } -} - -void PSH_XBOX_SHADER::GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]) -{ - psInputTexture[0] = -1; // Stage 0 has no predecessors - psInputTexture[1] = 0; // Stage 1 can only use stage 0 - psInputTexture[2] = (pPSDef->PSInputTexture >> 16) & 0x1; // Stage 2 can use stage 0 or 1 - psInputTexture[3] = (pPSDef->PSInputTexture >> 20) & 0x3; // Stage 3 can only use stage 0, 1 or 2 -} - -void PSH_XBOX_SHADER::Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef) -{ - int i; - - /* Azurik likes to create and destroy the same shader every frame! O_o - LogFlags = lfUnit; - if (IsRunning(TITLEID_AZURIK)) - LogFlags = LogFlags | lfExtreme;*/ - - GetPSTextureModes(pPSDef, PSTextureModes); - GetPSCompareModes(pPSDef, PSCompareMode); - GetPSDotMapping(pPSDef, PSDotMapping); - GetPSInputTexture(pPSDef, PSInputTexture); - - NumberOfCombiners = (pPSDef->PSCombinerCount >> 0) & 0xF; - CombinerCountFlags = (pPSDef->PSCombinerCount >> 8); - - CombinerMuxesOnMsb = (CombinerCountFlags & PS_COMBINERCOUNT_MUX_MSB) > 0; - CombinerHasUniqueC0 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C0) > 0; - CombinerHasUniqueC1 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C1) > 0; - - // Backwards compatible decoding (purely for logging) : - { - for (i = 0; i < xbox::X_PSH_COMBINECOUNT; i++) { - Combiners[i].RGB.Decode(pPSDef->PSRGBInputs[i], pPSDef->PSRGBOutputs[i]); - Combiners[i].Alpha.Decode(pPSDef->PSAlphaInputs[i], pPSDef->PSAlphaOutputs[i], /*aIsAlpha=*/true); - } - - FinalCombiner.Decode(pPSDef->PSFinalCombinerInputsABCD, pPSDef->PSFinalCombinerInputsEFG, pPSDef->PSFinalCombinerConstants); - } -} - -PSH_RECOMPILED_SHADER PSH_XBOX_SHADER::Convert(xbox::X_D3DPIXELSHADERDEF *pPSDef) -{ - int i; - Recompiled = {}; - Recompiled.PSDef = *pPSDef; - - // Use a fluent interface to start with a pixel shader version opcode that knowns the host version - NewIntermediate()->Initialize(PO_XPS)->Parameters[6].Mask = m_PSVersion; - - for (i = 0; i < NumberOfCombiners; i++) - { - // Check that the RGB and Alpha inputs do the same operation : - if ( ((pPSDef->PSRGBInputs[i] & PS_NoChannelsMask) == (pPSDef->PSAlphaInputs[i] & PS_NoChannelsMask)) - // Check if all RGB channels are set to read from PS_CHANNEL_RGB : - && ((pPSDef->PSRGBInputs[i] & PS_AlphaChannelsMask) == 0) - // Check if all Alpha channels are set to read from PS_CHANNEL_ALPHA : - && ((pPSDef->PSAlphaInputs[i] & PS_AlphaChannelsMask) == PS_AlphaChannelsMask) - // Check that RGB and Alpha output to the same register(s) : - && (pPSDef->PSRGBOutputs[i] == pPSDef->PSAlphaOutputs[i])) - { - // In this case, we can convert RGB and Alpha together : - if (!NewIntermediate()->Decode(i, pPSDef->PSRGBInputs[i], pPSDef->PSRGBOutputs[i], MASK_RGBA)) - DeleteLastIntermediate(); - } - else - { - // Otherwise, we need to convert RGB and Alpha separately : - if (!NewIntermediate()->Decode(i, pPSDef->PSRGBInputs[i], pPSDef->PSRGBOutputs[i], MASK_RGB)) - DeleteLastIntermediate(); - - if (!NewIntermediate()->Decode(i, pPSDef->PSAlphaInputs[i], pPSDef->PSAlphaOutputs[i], MASK_A)) - DeleteLastIntermediate(); - } - } - - if ((pPSDef->PSFinalCombinerInputsABCD > 0) - || (pPSDef->PSFinalCombinerInputsEFG > 0)) { - if (NewIntermediate()->DecodeFinalCombiner(pPSDef->PSFinalCombinerInputsABCD, pPSDef->PSFinalCombinerInputsEFG)) - { - FinalCombinerFlags = (PS_FINALCOMBINERSETTING)((pPSDef->PSFinalCombinerInputsEFG >> 0) & 0xFF); -// dwPS_GLOBALFLAGS = (pPSDef->PSFinalCombinerConstants >> 8) & 0x1; - } - else - DeleteLastIntermediate(); - } - // Dump the contents of the PixelShader def - //if (MayLog(LogFlags)) - // dump pixel shader definition to string - // TODO : Reinstate : XTL_DumpPixelShaderToFile(pPSDef); - - //if (MayLog(LogFlags)) - { - // print relevant contents to the debug console - EmuLog(LOG_LEVEL::DEBUG, "%s", DecodedToString(pPSDef).c_str()); - } - - // TODO: - // - Insert tex* and def instructions - - Log("Parse result"); - - if (MoveRemovableParametersRight()) - Log("MoveRemovableParametersRight"); - - if (RemoveNops()) - Log("RemoveNops"); - - while (RemoveUselessWrites()) { - Log("RemoveUselessWrites"); - if (RemoveNops()) - Log("RemoveNops"); - } - - if (ConvertConstantsToNative(pPSDef, /*Recompiled=*/&Recompiled)) - Log("ConvertConstantsToNative"); - - // Handle Texture declarations : - if (DecodeTextureModes(pPSDef)) - Log("DecodeTextureModes"); - - ConvertXboxOpcodesToNative(pPSDef); - Log("ConvertXboxOpcodesToNative"); - - while (RemoveUselessWrites()) { // again - Log("RemoveUselessWrites"); - if (RemoveNops()) - Log("RemoveNops"); - } - - // Resolve all differences : - if (FixupPixelShader()) - Log("FixupPixelShader"); - - if (FixInvalidDstRegister()) - Log("FixInvalidDstRegister"); - - if (FixConstantParameters()) - Log("FixConstantParameters"); - - if (FixArgumentModifiers()) - Log("FixArgumentModifiers"); - - if (FixInstructionModifiers()) - Log("FixInstructionModifiers"); - - if (FixInvalidSrcSwizzle()) - Log("FixInvalidSrcSwizzle"); - - if (FixMissingR0a()) - Log("FixMissingR0a"); - - if (FixMissingR1a()) - Log("FixMissingR1a"); - - if (FixCoIssuedOpcodes()) - Log("FixCoIssuedOpcodes"); - - if (FixOverusedRegisters()) - Log("FixOverusedRegisters"); - - if (FixUninitializedReads()) - Log("FixUninitializedReads"); - - if (FinalizeShader()) - Log("FinalizeShader"); - - Log("End result"); - - Recompiled.NewShaderStr = ToString(); - return Recompiled; -} - -std::string PSH_XBOX_SHADER::DecodedToString(xbox::X_D3DPIXELSHADERDEF *pPSDef) -// print relevant contents to the debug console - - #define _AddStr1(aStr) \ - \ - Result = Result + aStr + "\n"; - - #define _AddStr(aStr, ...) \ - {\ - _AddStr1(std::string(buf, sprintf(buf, aStr, __VA_ARGS__))); \ - } -{ - char buf[256]; - int i; - - std::string Result = ""; - // Show the contents to the user - _AddStr1("\n-----PixelShader Definition Contents-----"); - _AddStr1(OriginalToString(pPSDef)); - - if (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) > 0) - { - _AddStr1("\nPSTextureModes ->"); // Texture addressing modes - _AddStr("Stage 0: %s", PS_TextureModesStr[PSTextureModes[0]]); - _AddStr("Stage 1: %s", PS_TextureModesStr[PSTextureModes[1]]); - _AddStr("Stage 2: %s", PS_TextureModesStr[PSTextureModes[2]]); - _AddStr("Stage 3: %s", PS_TextureModesStr[PSTextureModes[3]]); - } - - if (pPSDef->PSDotMapping > 0) // Input mapping for dot product modes - { - _AddStr1("\nPSDotMapping ->"); - _AddStr("Stage 1: %s", PS_DotMappingStr[PSDotMapping[1]]); - _AddStr("Stage 2: %s", PS_DotMappingStr[PSDotMapping[2]]); - _AddStr("Stage 3: %s", PS_DotMappingStr[PSDotMapping[3]]); - } - - if (pPSDef->PSCompareMode > 0) // Compare modes for clipplane texture mode - { - _AddStr1("\nPSCompareMode ->"); - _AddStr("Stage 0: %s", PS_CompareModeStr[(PSCompareMode[0] == 0) ? 0 : 1]); - _AddStr("Stage 1: %s", PS_CompareModeStr[(PSCompareMode[1] == 0) ? 2 : 3]); - _AddStr("Stage 2: %s", PS_CompareModeStr[(PSCompareMode[2] == 0) ? 4 : 5]); - _AddStr("Stage 3: %s", PS_CompareModeStr[(PSCompareMode[3] == 0) ? 6 : 7]); - } - - if (pPSDef->PSInputTexture > 0) // Texture source for some texture modes - { - _AddStr1("\nPSInputTexture ->"); - _AddStr("Stage 1: %d", PSInputTexture[1]); - _AddStr("Stage 2: %d", PSInputTexture[2]); - _AddStr("Stage 3: %d", PSInputTexture[3]); - } - - if (pPSDef->PSCombinerCount > 0) // Active combiner count (Stages 0-7) - { - _AddStr1("\nPSCombinerCount ->"); - _AddStr("Combiners: %d", NumberOfCombiners); - _AddStr("Mux: %s", PS_CombinerCountFlagsStr[(CombinerCountFlags & PS_COMBINERCOUNT_MUX_MSB) == 0 ? 0 : 1]); - _AddStr("C0: %s", PS_CombinerCountFlagsStr[(CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C0) == 0 ? 2 : 3]); - _AddStr("C1: %s", PS_CombinerCountFlagsStr[(CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C1) == 0 ? 4 : 5]); - } - - // Dxbx additions from here onwards : - - for (i = 0; i < NumberOfCombiners; i++) // Loop over all combiner stages - { - _AddStr1("\n"); - - _AddStr("PSRGBOutputs[%d] AB: %s", i, Combiners[i].RGB.OutputSUM.OutputAB.DecodedToString().c_str()); - _AddStr("PSRGBOutputs[%d] CD: %s", i, Combiners[i].RGB.OutputSUM.OutputCD.DecodedToString().c_str()); - _AddStr("PSRGBOutputs[%d] SUM: %s", i, Combiners[i].RGB.OutputSUM.DecodedToString().c_str()); - _AddStr("PSRGBOutputs[%d] flags: %s", i, PSCombinerOutputFlagsToStr(Combiners[i].RGB.CombinerOutputFlags, /*aIsAlpha=*/false).c_str()); - - _AddStr1("\n"); - _AddStr("PSRGBInputs[%d] A: %s", i, Combiners[i].RGB.OutputSUM.OutputAB.Input1.DecodedToString().c_str()); - _AddStr("PSRGBInputs[%d] B: %s", i, Combiners[i].RGB.OutputSUM.OutputAB.Input2.DecodedToString().c_str()); - _AddStr("PSRGBInputs[%d] C: %s", i, Combiners[i].RGB.OutputSUM.OutputCD.Input1.DecodedToString().c_str()); - _AddStr("PSRGBInputs[%d] D: %s", i, Combiners[i].RGB.OutputSUM.OutputCD.Input2.DecodedToString().c_str()); - - _AddStr1("\n"); - _AddStr("PSAlphaOutputs[%d] AB: %s", i, Combiners[i].Alpha.OutputSUM.OutputAB.DecodedToString().c_str()); - _AddStr("PSAlphaOutputs[%d] CD: %s", i, Combiners[i].Alpha.OutputSUM.OutputCD.DecodedToString().c_str()); - _AddStr("PSAlphaOutputs[%d] SUM: %s", i, Combiners[i].Alpha.OutputSUM.DecodedToString().c_str()); - _AddStr("PSAlphaOutputs[%d] flags: %s", i, PSCombinerOutputFlagsToStr(Combiners[i].Alpha.CombinerOutputFlags, /*aIsAlpha=*/true).c_str()); - - _AddStr1("\n"); - _AddStr("PSAlphaInputs[%d] A: %s", i, Combiners[i].Alpha.OutputSUM.OutputAB.Input1.DecodedToString().c_str()); - _AddStr("PSAlphaInputs[%d] B: %s", i, Combiners[i].Alpha.OutputSUM.OutputAB.Input2.DecodedToString().c_str()); - _AddStr("PSAlphaInputs[%d] C: %s", i, Combiners[i].Alpha.OutputSUM.OutputCD.Input1.DecodedToString().c_str()); - _AddStr("PSAlphaInputs[%d] D: %s", i, Combiners[i].Alpha.OutputSUM.OutputCD.Input2.DecodedToString().c_str()); - - _AddStr1("\n"); - _AddStr("PSConstant0[%d] : %x", i, pPSDef->PSConstant0[i]); // C0 for each stage - _AddStr("PSConstant1[%d] : %x", i, pPSDef->PSConstant1[i]); // C1 for each stage - } - - if ((pPSDef->PSFinalCombinerInputsABCD > 0) - || (pPSDef->PSFinalCombinerInputsEFG > 0)) // Final combiner inputs - { - _AddStr("\nPSFinalCombinerConstant0 : %x", pPSDef->PSFinalCombinerConstant0); // C0 in final combiner - _AddStr("PSFinalCombinerConstant1 : %x", pPSDef->PSFinalCombinerConstant1); // C1 in final combiner - - _AddStr1("\nPSFinalCombinerInputsABCD ->"); - _AddStr("Input A: %s", FinalCombiner.InputA.DecodedToString().c_str()); - _AddStr("Input B: %s", FinalCombiner.InputB.DecodedToString().c_str()); - _AddStr("Input C: %s", FinalCombiner.InputC.DecodedToString().c_str()); - _AddStr("Input D: %s", FinalCombiner.InputD.DecodedToString().c_str()); - - _AddStr1("\nPSFinalCombinerInputsEFG ->"); - _AddStr("Input E: %s", FinalCombiner.InputE.DecodedToString().c_str()); - _AddStr("Input F: %s", FinalCombiner.InputF.DecodedToString().c_str()); - _AddStr("Input G: %s", FinalCombiner.InputG.DecodedToString().c_str()); - _AddStr("Final combiner setting: %s", PSFinalCombinerSettingToStr((DWORD)(FinalCombiner.FinalCombinerFlags)).c_str()); - - _AddStr1("\nPSFinalCombinerConstants ->"); // Final combiner constant mapping - _AddStr("Offset of D3D constant for (C0: %d", FinalCombiner.FinalCombinerC0Mapping); - _AddStr("Offset of D3D constant for (C1: %d", FinalCombiner.FinalCombinerC1Mapping); - _AddStr("Adjust texture flag: %s", PS_GlobalFlagsStr[PS_GLOBALFLAGS(FinalCombiner.dwPS_GLOBALFLAGS)]); - } - - _AddStr1("\n"); - return Result; -} - - bool _OpcodeMustStayBeforeTextureMode(PSH_OPCODE Opcode, int i) - { - if (Opcode == PO_XPS) - return true; - - // Before texture modes, only keep the first comment (the one mentioning "xps" got converted into "ps") - if (Opcode == PO_COMMENT) - return (i == 0); - - if (Opcode == PO_PS) - return true; - - if (Opcode == PO_DEF) - return true; - - if (Opcode >= PO_DCL && Opcode <= PO_DCL_VOLUME) - return true; - - return false; - } - - bool PSH_XBOX_SHADER::_NextIs2D(int Stage) - { - if (Stage < xbox::X_D3DTS_STAGECOUNT-1) - return (PSTextureModes[Stage + 1] == PS_TEXTUREMODES_DOT_ST) || (PSTextureModes[Stage + 1] == PS_TEXTUREMODES_DOT_ZW); - else - return false; - } - -bool PSH_XBOX_SHADER::DecodeTextureModes(xbox::X_D3DPIXELSHADERDEF *pPSDef) -{ - int InsertPos; - PSH_INTERMEDIATE_FORMAT Ins = {}; - std::vector InsertIns; - int Stage; - - InsertIns.reserve(32); // arbitrary allotment of instructions - InsertIns.resize(xbox::X_D3DTS_STAGECOUNT); // default initialized to PO_COMMENT instructions - - bool Result = false; - - InsertPos = -1; - do { - ++InsertPos; - } while (_OpcodeMustStayBeforeTextureMode(Intermediate[InsertPos].Opcode, InsertPos)); - - Ins.Initialize(PO_DCL); - for (Stage = 0; Stage < xbox::X_D3DTS_STAGECOUNT; Stage++) - { - if (PSTextureModes[Stage] != PS_TEXTUREMODES_NONE || Stage < PSH_XBOX_MAX_T_REGISTER_COUNT) - { - switch (PSTextureModes[Stage]) - { - case PS_TEXTUREMODES_PROJECT2D: // argb = texture(r/q, s/q) TODO : Apply the division via D3DTOP_BUMPENVMAP ? - case PS_TEXTUREMODES_BUMPENVMAP: - case PS_TEXTUREMODES_BUMPENVMAP_LUM: - case PS_TEXTUREMODES_DOT_ST: - case PS_TEXTUREMODES_DPNDNT_AR: - case PS_TEXTUREMODES_DPNDNT_GB: - { - Ins.Opcode = PO_DCL_2D; - Ins.Output[0].SetRegister(PARAM_S, Stage, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - break; - } - case PS_TEXTUREMODES_PROJECT3D: // argb = texture(r/q, s/q, t/q) Note : 3d textures are sampled using PS_TEXTUREMODES_CUBEMAP - case PS_TEXTUREMODES_BRDF: - case PS_TEXTUREMODES_DOT_STR_3D: - { - Ins.Opcode = PO_DCL_VOLUME; - Ins.Output[0].SetRegister(PARAM_S, Stage, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - break; - } - case PS_TEXTUREMODES_CUBEMAP: // argb = cubemap(r/q, s/q, t/q) - case PS_TEXTUREMODES_DOT_RFLCT_DIFF: - case PS_TEXTUREMODES_DOT_RFLCT_SPEC: - case PS_TEXTUREMODES_DOT_STR_CUBE: - case PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST: - { - Ins.Opcode = PO_DCL_CUBE; - Ins.Output[0].SetRegister(PARAM_S, Stage, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - break; - } - } - - Ins.Opcode = PO_DCL; - Ins.Output[0].SetRegister(PARAM_T, Stage, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - } - } - - for (int j = 0; j < PSH_XBOX_MAX_V_REGISTER_COUNT; ++j) - { - Ins.Opcode = PO_DCL; - Ins.Output[0].SetRegister(PARAM_V, j, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - } - - PSH_OPCODE Opcode; - - Opcode = PO_TEXLD2; - - for (Stage = 0; Stage < xbox::X_D3DTS_STAGECOUNT; Stage++) - { - // TODO : Apply conversions when PS_GLOBALFLAGS_TEXMODE_ADJUST is set (but ... how to check the texture type? read D3DRS_PSTEXTUREMODES?) - - // Convert the texture mode to a texture addressing instruction : - switch (PSTextureModes[Stage]) { // input = q,s,t,r (same layout as a,r,g,b, also known as w,x,y,z) - case PS_TEXTUREMODES_PROJECT2D: // argb = texture(r/q, s/q) TODO : Apply the division via D3DTOP_BUMPENVMAP ? - case PS_TEXTUREMODES_PROJECT3D: // argb = texture(r/q, s/q, t/q) Note : 3d textures are sampled using PS_TEXTUREMODES_CUBEMAP - case PS_TEXTUREMODES_CUBEMAP: { // argb = cubemap(r/q, s/q, t/q) - Opcode = PO_TEXLD2; - - if (m_PSVersion >= D3DPS_VERSION(3, 0)) - continue; - break; - } - case PS_TEXTUREMODES_NONE: - case PS_TEXTUREMODES_PASSTHRU: - Opcode = PO_MOV; - break; - case PS_TEXTUREMODES_CLIPPLANE: Opcode = PO_TEXKILL; break; - case PS_TEXTUREMODES_BUMPENVMAP: Opcode = PO_TEXBEM; break; - case PS_TEXTUREMODES_BUMPENVMAP_LUM: Opcode = PO_TEXBEML; break; - case PS_TEXTUREMODES_BRDF: Opcode = PO_TEXBRDF; break; // Note : Not supported by Direct3D8 ? - case PS_TEXTUREMODES_DOT_ST: Opcode = PO_TEXM3X2TEX; break; - case PS_TEXTUREMODES_DOT_ZW: Opcode = PO_TEXM3X2DEPTH; break; // Note : requires ps.1.3 and a preceding texm3x2pad - case PS_TEXTUREMODES_DOT_RFLCT_DIFF: Opcode = PO_TEXM3X3DIFF; break; // Note : Not supported by Direct3D8 ? - case PS_TEXTUREMODES_DOT_RFLCT_SPEC: Opcode = PO_TEXM3X3VSPEC; break; - case PS_TEXTUREMODES_DOT_STR_3D: Opcode = PO_TEXM3X3TEX; break; // Note : Uses a 3d texture - case PS_TEXTUREMODES_DOT_STR_CUBE: Opcode = PO_TEXM3X3TEX; break; // Note : Uses a cube texture - case PS_TEXTUREMODES_DPNDNT_AR: Opcode = PO_TEXREG2AR; break; - case PS_TEXTUREMODES_DPNDNT_GB: Opcode = PO_TEXREG2GB; break; - case PS_TEXTUREMODES_DOTPRODUCT: - if (_NextIs2D(Stage)) - Opcode = PO_TEXM3X2PAD; - else - Opcode = PO_TEXM3X3PAD; - break; - case PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST: Opcode = PO_TEXM3X3SPEC; break; // Note : Needs 3 arguments! - default: - continue; - } - - InsertTextureModeInstruction(pPSDef, Stage, Opcode, InsertIns, InsertPos); - Result = true; - } - if (Result) - { - for (unsigned i = 0; i < InsertIns.size(); ++i) - { - if (i >= xbox::X_D3DTS_STAGECOUNT || InsertIns[i].Opcode != PO_COMMENT) - { - InsertIntermediate(&InsertIns[i], InsertPos); - ++InsertPos; - } - } - } - StartPos = InsertPos + 1; - return Result; -} - -int PSH_XBOX_SHADER::GetTextureStageModifiers(int Stage) -{ - int modifiers = 0; - switch (PSDotMapping[Stage]) - { - case PS_DOTMAPPING_ZERO_TO_ONE: - break; - case PS_DOTMAPPING_MINUS1_TO_1_D3D: - modifiers = (1 << ARGMOD_SCALE_BX2); - break; - case PS_DOTMAPPING_MINUS1_TO_1_GL: - break; - case PS_DOTMAPPING_MINUS1_TO_1: - break; - case PS_DOTMAPPING_HILO_1: - break; - case PS_DOTMAPPING_HILO_HEMISPHERE: - break; - default: - break; - } - - return modifiers; -} - -void PSH_XBOX_SHADER::InsertTex3x2Instructions(int Stage, int inputStage, std::vector& InsertIns) -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - const int modifiers = GetTextureStageModifiers(Stage); - - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 0, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); -} - -void PSH_XBOX_SHADER::InsertTex3x3Instructions(int Stage, int inputStage, std::vector& InsertIns) -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - const int modifiers = GetTextureStageModifiers(Stage); - - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 2, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 0, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); -} - -bool PSH_XBOX_SHADER::InsertTextureModeInstruction(xbox::X_D3DPIXELSHADERDEF *pPSDef, int Stage, PSH_OPCODE opcode, std::vector& InsertIns, int& InsertPos) -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - bool Result = false; - - PSH_ARGUMENT_TYPE type = PARAM_T; - int inputStage = Stage; - int mask = 0; - - // TODO: Refactor and optimize - // TODO: Update handling to support 1.4? - bool needsInitialization = false; - switch (opcode) - { - case PO_TEXBEM: - case PO_TEXBEML: - { - inputStage = PSInputTexture[Stage]; - - // If the bump-map texture format is X_D3DFMT_X8L8V8U8 or X_D3DFMT_L6V5U5 we need to apply a bias - // This happens because these formats are an alias of unsigned texture formats. - // Fixes an issue with the JSRF boost-dash effect - // NOTE: This assumes that this shader will only ever be used for the input bumpmap texture - // If this causes regressions in other titles, we'll need to be smarter about this - // and include the texture formats in the shader hash, somehow. - bool bias = false; - auto biasModifier = (1 << ARGMOD_SCALE_BX2); - auto pXboxTexture = g_pXbox_SetTexture[inputStage]; - if (pXboxTexture != nullptr) { - extern xbox::X_D3DFORMAT GetXboxPixelContainerFormat(const xbox::X_D3DPixelContainer *pXboxPixelContainer); // TODO : Move to XTL-independent header file - - switch (GetXboxPixelContainerFormat(pXboxTexture)) { - case xbox::X_D3DFMT_L6V5U5: { - extern xbox::X_D3DRESOURCETYPE GetXboxD3DResourceType(const xbox::X_D3DResource *pXboxResource); // TODO : Move to XTL-independent header file - extern bool IsSupportedFormat(xbox::X_D3DFORMAT X_Format, xbox::X_D3DRESOURCETYPE XboxResourceType, DWORD D3DUsage); // TODO : Move to XTL-independent header file - - // L6V5U5 format is converted incorrectly if not supported by the device - xbox::X_D3DRESOURCETYPE XboxResourceType = GetXboxD3DResourceType(pXboxTexture); - DWORD D3DUsage = 0; // TODO : Since it's not yet know how to determine D3DUsage in this case, 'hack' it by using no specific D3DUSAGE_* flags. - - bias = !IsSupportedFormat(/*XboxFormat=*/xbox::X_D3DFMT_L6V5U5, XboxResourceType, D3DUsage); - break; - } - case xbox::X_D3DFMT_X8L8V8U8: { - bias = true; - break; - } - } - } - - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVMAT00, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_R); - - if (bias) { - Ins.Parameters[1].Modifiers = biasModifier; - } - - Ins.Parameters[2].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, MASK_R); - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVMAT10, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_G); - if (bias) { - Ins.Parameters[1].Modifiers = biasModifier; - } - Ins.Parameters[2].SetRegister(PARAM_R, 1, MASK_R); - InsertIns.emplace_back(Ins); - // - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVMAT01, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_R); - if (bias) { - Ins.Parameters[1].Modifiers = biasModifier; - } - Ins.Parameters[2].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, MASK_G); - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVMAT11, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_G); - if (bias) { - Ins.Parameters[1].Modifiers = biasModifier; - } - Ins.Parameters[2].SetRegister(PARAM_R, 1, MASK_G); - InsertIns.emplace_back(Ins); - - Ins.CommentString = ""; - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - Ins.Parameters[1].Modifiers = 0; - InsertIns.emplace_back(Ins); - - if (opcode == PO_TEXBEML) - { - // - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVLSCALE, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_B); - Ins.Parameters[2].SetScaleBemLumRegister(D3DTSS_BUMPENVLOFFSET, Stage, Recompiled); - InsertIns.emplace_back(Ins); - // - Ins.Initialize(PO_MUL); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[1].SetRegister(PARAM_R, 1, MASK_B); - InsertIns.emplace_back(Ins); - } - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - - break; - } - case PO_TEXBRDF: - inputStage = PSInputTexture[Stage]; - break; - case PO_TEXM3X2TEX: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x2Instructions(Stage, inputStage, InsertIns); - - Ins.CommentString = ""; - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X3TEX: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x3Instructions(Stage, inputStage, InsertIns); - - Ins.CommentString = ""; - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X2DEPTH: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x2Instructions(Stage, inputStage, InsertIns); - - Ins.CommentString = ""; - Ins.Initialize(PO_RCP); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_R, 1, MASK_G); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_MUL); - Ins.Modifier = INSMOD_SAT; - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[1].SetRegister(PARAM_R, 1, MASK_B); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_CMP); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].Modifiers = (1 << ARGMOD_NEGATE); - Ins.Parameters[1].SetScaleConstRegister(1.0, Recompiled); - Ins.Parameters[2].SetRegister(PARAM_R, 1, MASK_B); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_oDepth, 0, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, MASK_B); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X3DIFF: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x3Instructions(Stage, inputStage, InsertIns); - - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X3VSPEC: - case PO_TEXM3X3SPEC: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x3Instructions(Stage, inputStage, InsertIns); - - int baseRegister = PSH_XBOX_MAX_R_REGISTER_COUNT + PSH_XBOX_MAX_T_REGISTER_COUNT; - - // get eye-ray vector - Ins.Initialize(PO_COMMENT); - Ins.CommentString = "; get eye-ray vector"; - InsertIns.emplace_back(Ins); - if (opcode == PO_TEXM3X3VSPEC) - { - // E.x - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_T, Stage - 2, MASK_A); - InsertIns.emplace_back(Ins); - // E.y - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_T, Stage - 1, MASK_A); - InsertIns.emplace_back(Ins); - // E.z - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_T, Stage - 0, MASK_A); - InsertIns.emplace_back(Ins); - // E.w - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, MASK_A); - Ins.Parameters[0].SetScaleConstRegister(0.0, Recompiled); - InsertIns.emplace_back(Ins); - } - else - { - // E - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, 0); - Ins.Parameters[0].SetRegister(PARAM_C, 0, 0); - InsertIns.emplace_back(Ins); - } - - // compute reflection vector - Ins.Initialize(PO_COMMENT); - Ins.CommentString = "; compute reflection vector"; - InsertIns.emplace_back(Ins); - // N.E - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, baseRegister + 0, 0); - InsertIns.emplace_back(Ins); - // 2 * (N.E) - Ins.Initialize(PO_MUL); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[1].SetScaleConstRegister(2.0, Recompiled); - InsertIns.emplace_back(Ins); - // N.N - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, 1, 0); - InsertIns.emplace_back(Ins); - // 1 / (N.N) - Ins.Initialize(PO_RCP); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_R, baseRegister + 1, MASK_G); - InsertIns.emplace_back(Ins); - // 2 * N.E / N.N - Ins.Initialize(PO_MUL); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[1].SetRegister(PARAM_R, baseRegister + 1, MASK_G); - InsertIns.emplace_back(Ins); - // 2 * N.E / N.N * N - E - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[2].SetRegister(PARAM_R, baseRegister + 0, 0); - Ins.Parameters[2].Modifiers = (1 << ARGMOD_NEGATE); - InsertIns.emplace_back(Ins); - - Ins.CommentString = ""; - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXREG2AR: - { - inputStage = PSInputTexture[Stage]; - - // E.x - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_T, Stage, MASK_A); - InsertIns.emplace_back(Ins); - // E.y - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_T, Stage, MASK_R); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXREG2GB: - { - inputStage = PSInputTexture[Stage]; - - // E.x - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_T, Stage, MASK_G); - InsertIns.emplace_back(Ins); - // E.y - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_T, Stage, MASK_B); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X2PAD: - case PO_TEXM3X3PAD: - { - inputStage = PSInputTexture[Stage]; - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - - case PO_TEXLD: - case PO_TEXLD2: - case PO_TEXCRD: - case PO_MOV: - needsInitialization = true; - break; - default: - break; - } - - Ins.Initialize(opcode); - - if (needsInitialization) - { - type = PARAM_R; - - // Insert move instructions in reverse order to prevent overwriting wrong register - // Create instructions to move loaded temporary registers into extra temporary registers - InsertIns[xbox::X_D3DTS_STAGECOUNT - Stage - 1].Initialize(PO_MOV); - InsertIns[xbox::X_D3DTS_STAGECOUNT - Stage - 1].Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - InsertIns[xbox::X_D3DTS_STAGECOUNT - Stage - 1].Parameters[0].SetRegister(PARAM_R, Stage, 0); - - if (Ins.Opcode == PO_TEXCRD) - { - mask = MASK_RGB; - } - else - { - } - - // Replace texture coordinate register usage up until first usage as output - int lastUsed = RegisterIsUsedFromIndexUntil(InsertPos, PARAM_T, Stage); - - if (lastUsed >= 0) - { - ReplaceInputRegisterFromIndexOnwards(InsertPos, PARAM_T, Stage, PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, lastUsed); - } - } - Ins.Output[0].SetRegister(type, Stage, mask); - - // For those texture modes that need it, add the source stage as argument : - if (PSH_OPCODE_DEFS[Ins.Opcode]._In >= 1) - { - Ins.Parameters[0].SetRegister(PARAM_T, inputStage, 0); - - if (Ins.Opcode >= PO_TEXDP3TEX && Ins.Opcode <= PO_TEXM3X3SPEC) - { - Ins.Parameters[0].Modifiers = GetTextureStageModifiers(Stage); - } - } - - if (PSH_OPCODE_DEFS[Ins.Opcode]._In >= 2) - { - if (Ins.Opcode == PO_TEXLD2) - { - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - } - - // Add the third argument : - switch (PSTextureModes[Stage]) { - case PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST: - { - Ins.Parameters[1].SetRegister(PARAM_C, 0, 0); - Ins.CommentString = "Dxbx guess"; // TODO : Where do we get the 3rd argument to this? - break; - } - } - } - -// // Warn about unprocessed flag : -// if ((dwPS_GLOBALFLAGS & PS_GLOBALFLAGS_TEXMODE_ADJUST) > 0) -// Ins.CommentString = Ins.CommentString + " PS_GLOBALFLAGS_TEXMODE_ADJUST unhandled!"; - - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - - return Result; -} - -bool PSH_XBOX_SHADER::MoveRemovableParametersRight() -{ - int i; - - bool Result = false; - - // For all opcodes, try to put constant and discarded arguments in the rightmost slot, to ease following analysis : - i = IntermediateCount; - while (i > StartPos) - { - --i; - - switch (Intermediate[i].Opcode) { -// case PO_SUB: // 1-x is not the same as x-1, but can still be reduced - see SimplifySUB - case PO_ADD: - case PO_DP3: - case PO_DP4: - case PO_MUL: // All these opcodes have two swappable parameters, so try that : - if (Intermediate[i].MoveRemovableParametersRight(0, 1)) - Result = true; - break; - - case PO_XMMA: - case PO_XMMC: - case PO_XDD: - if (Intermediate[i].XMoveNonRegisterOutputsRight()) - Result = true; - break; - - case PO_XDM: - { - // Parameters may be swapped for both dot and mul, - // but the opcodes themselves may not, as we handle - // both XDM operations separately below : - if (Intermediate[i].MoveRemovableParametersRight(0, 1)) - Result = true; - - if (Intermediate[i].MoveRemovableParametersRight(2, 3)) - Result = true; - break; - } - } - } - return Result; -} // MoveRemovableParametersRight - - void PSH_XBOX_SHADER::_SetColor(/*var OUT*/PSH_INTERMEDIATE_FORMAT &NewIns, D3DCOLOR ConstColor) - { - D3DXCOLOR XColor; - - // Colors are defined in RGBA format, and range 0.0 - 1.0 (negative values - // can be obtained by supplying PS_INPUTMAPPING_SIGNED_NEGATE to the combiner - // that reads from these constants). - XColor = ConstColor; - NewIns.Parameters[0].SetConstValue(XColor.r); - NewIns.Parameters[1].SetConstValue(XColor.g); - NewIns.Parameters[2].SetConstValue(XColor.b); - NewIns.Parameters[3].SetConstValue(XColor.a); - } - - void PSH_XBOX_SHADER::_SetColor(/*var OUT*/PSH_INTERMEDIATE_FORMAT &NewIns, D3DCOLORVALUE ConstColor) - { - NewIns.Parameters[0].SetConstValue(ConstColor.r); - NewIns.Parameters[1].SetConstValue(ConstColor.g); - NewIns.Parameters[2].SetConstValue(ConstColor.b); - NewIns.Parameters[3].SetConstValue(ConstColor.a); - } - -bool PSH_XBOX_SHADER::ConvertConstantsToNative(xbox::X_D3DPIXELSHADERDEF *pPSDef, /*var OUT*/PSH_RECOMPILED_SHADER *Recompiled) -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - PPSH_IMD_ARGUMENT CurArg; - PSH_INTERMEDIATE_FORMAT NewIns = {}; - - NewIns.Initialize(PO_DEF); - - // Add constants used to represent common powers of 2 used by instruction and argument modifiers - // Represent constant 0.0 and common powers of 2 divisions - NewIns.Output[0].SetRegister(PARAM_C, PSH_XBOX_CONSTANT_MUL1, MASK_RGBA); - _SetColor(NewIns, { 0.0, 1.0 / 2.0, 1.0 / 4.0, 1.0 / 8.0 }); - InsertIntermediate(&NewIns, 1); - - // Represent common powers of 2 constants, also used as multipliers - NewIns.Output[0].SetRegister(PARAM_C, PSH_XBOX_CONSTANT_MUL0, MASK_RGBA); - _SetColor(NewIns, {1.0, 2.0, 4.0, 8.0}); - InsertIntermediate(&NewIns, 1); - - // Loop over all opcodes to update the constant-indexes (Xbox uses C0 and C1 in each combiner) : - for (i = 0; i < IntermediateCount; i++) - { - // Loop over this opcodes' input arguments : - Cur = &(Intermediate[i]); - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._In; j++) - { - // Only handle arguments that address a constant register : - CurArg = &(Cur->Parameters[j]); - - // The Fog register is not supported on PC so we convert it to a constant too : - // (But only if the MASK is not solely accessing the alpha-channel - we don't support that) - if (CurArg->Type == PARAM_FOG) - { - if (CurArg->Mask != MASK_A) - { - CurArg->Type = PARAM_C; - CurArg->Address = PSH_XBOX_CONSTANT_FOG; - CurArg->Mask = CurArg->Mask & (!MASK_A); - } - else - { - // Until we can get Alpha fog from the vertex shader somehow, - // set it to a constant value, so these shaders (like appearing - // in Dolphin samples) still compile and give reasonable output : - CurArg->SetConstValue(1.0); - Cur->CommentString = "FOG.a not emulated, using 1."; - } - - continue; - } - - if (CurArg->Type != PARAM_C) - continue; - - // For each constant being addressed, we find out which Xbox constant it is, - // and map it to a native constant (as far as we have space for them) : - switch (CurArg->Address) { - case 0: // Handle C0 (if present) : - { - // The final combiner has a separate C0 constant : - if (Cur->CombinerStageNr == XFC_COMBINERSTAGENR) - CurArg->Address = PSH_XBOX_CONSTANT_FC0; - else - { - // See if C0 has a unique index per combiner stage : - if (CombinerHasUniqueC0) - // C0 actually ranges from c0 to c7, one for each possible combiner stage (X_D3DRS_PSCONSTANT0_0..X_D3DRS_PSCONSTANT0_7) : - CurArg->Address = Cur->CombinerStageNr; - else - // Non-unique just reads the same C0 in every stage : - CurArg->Address = 0; - } - break; - } - - case 1: // Handle C1 (if present) : - { - // The final combiner has a separate C1 constant : - if (Cur->CombinerStageNr == XFC_COMBINERSTAGENR) - CurArg->Address = PSH_XBOX_CONSTANT_FC1; - else - { - // See if C1 has a unique index per combiner stage : - if (CombinerHasUniqueC1) - // C1 actually ranges from c8 to c15, one for each possible combiner stage (X_D3DRS_PSCONSTANT1_0..X_D3DRS_PSCONSTANT1_7) : - CurArg->Address = Cur->CombinerStageNr + 8; - else - // Non-unique just reads the same C1 in every stage : - CurArg->Address = 1; - } - break; - } - } // switch - } // for arguments - } // for opcodes - - return true; -} // ConvertConstantsToNative - -bool PSH_XBOX_SHADER::RemoveUselessWrites() -// Note : Xbox allows writing to V0 (diffuse color) and V1 (specular color), but native ps.1.3 doesn't! -// Some examples of this behaviour can be seen when running RayMan Arena. -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - PPSH_IMD_ARGUMENT CurArg; - DWORD RegUsage[/*PSH_ARGUMENT_TYPE*/PARAM_C - PARAM_VALUE + 1][224] = {}; // 224 = highest possible PSH_PC_MAX_REGISTER_COUNT - - // TODO : In Polynomial Texture Maps, one extra opcode could be deleted (sub r1.rgb, v0,v0), why doesn't it? - bool Result = false; - - // Mark only R0 (and discard) as initially 'read', as these may not result in a removal : - RegUsage[PARAM_R][0] = MASK_RGBA; - for (i = 0; i < PSH_PC_MAX_REGISTER_COUNT; i++) - RegUsage[PARAM_DISCARD][i] = MASK_RGBA; - - i = IntermediateCount; - while (i > StartPos) - { - --i; - Cur = &(Intermediate[i]); - if (!Cur->IsArithmetic()) - continue; - - // Loop over the output arguments : - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._Out; j++) - { - CurArg = &(Cur->Output[j]); - - // Remove useless flag, to ease up later comparisions : - CurArg->Modifiers = CurArg->Modifiers & ~(1 << ARGMOD_IDENTITY); - - // Discard useless writes : - if ( (CurArg->Address < MaxTemporaryRegisters) - && ((RegUsage[CurArg->Type][CurArg->Address] & CurArg->Mask) == 0)) - { - EmuLog(LOG_LEVEL::DEBUG, "; Removed useless assignment to register %s", CurArg->ToString().c_str()); - CurArg->Type = PARAM_DISCARD; - Result = true; - } - } - - // Loop over the input arguments : - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._In; j++) - { - CurArg = &(Cur->Parameters[j]); - // Skip non-register parameters : - if (!CurArg->UsesRegister()) - continue; - - // Remove useless flag, to ease up later comparisions : - CurArg->Modifiers = CurArg->Modifiers & ~(1 << ARGMOD_IDENTITY); - - // Keep track of all register reads, so that we can discard useless writes : - if (CurArg->Address < MaxTemporaryRegisters) - RegUsage[CurArg->Type][CurArg->Address] = RegUsage[CurArg->Type][CurArg->Address] | CurArg->Mask; - } - } - return Result; -} // RemoveUselessWrites - -void PSH_XBOX_SHADER::ConvertXboxOpcodesToNative(xbox::X_D3DPIXELSHADERDEF *pPSDef) -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - std::string CommentString; - - // Do a bottom-to-top pass, converting all xbox opcodes into a native set of opcodes : - i = IntermediateCount; - while (i > 0) - { - --i; - Cur = &(Intermediate[i]); - - // Convert all Xbox opcodes into native opcodes : - CommentString = Cur->ToString(); - switch (Cur->Opcode) { - case PO_XPS: ConvertXPSToNative(i); break; - case PO_XMMA: ConvertXMMAToNative(i); break; - case PO_XMMC: ConvertXMMCToNative(i); break; - case PO_XDM: ConvertXDMToNative(i); break; - case PO_XDD: ConvertXDDToNative(i); break; - case PO_XFC: ConvertXFCToNative(i); break; // Can only occur once, as the last instruction - default: - CommentString = ""; break; - } - - if (!CommentString.empty()) { - PSH_INTERMEDIATE_FORMAT NewIns = {}; - NewIns.Initialize(PO_COMMENT)->CommentString = CommentString; - InsertIntermediate(&NewIns, i); - } - } -} // ConvertXboxOpcodesToNative - -void PSH_XBOX_SHADER::ConvertXPSToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - - Cur = &(Intermediate[i]); - Cur->Opcode = PO_PS; -} - -bool PSH_XBOX_SHADER::ConvertXMMToNative_Except3RdOutput(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - int InsertPos; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - bool Result = false; - Cur = &(Intermediate[i]); - InsertPos = i; - - // This block is meant for cases where XMMA/XMMC discards the 3rd output : - if (Cur->Output[2].Type == PARAM_DISCARD) - { - // Mark that this XMMA/XMMC opcode is already handled here : - Result = true; - - // The opcode must unconditionally change into a MUL (or two) : - Cur->Opcode = PO_MUL; - - // Is the second output ignored? - if (Cur->Output[1].Type == PARAM_DISCARD) - { - // If the first output is also ignored : - if (Cur->Output[0].Type == PARAM_DISCARD) - // The complete opcode can already be removed early on : - DeleteIntermediate(i); - else - ;// The first output is just a MUL, it's output (and first two parameters) are already in-place, so we're done - - return Result; - } - ++InsertPos; - - // Create a second MUL opcode for the second result : - Ins = *Cur; - Ins.XCopySecondOpcodeToFirst(PO_MUL); - InsertIntermediate(&Ins, InsertPos); - return Result; - } - - // The third output is needed, but what about the first and second output ? - - if (Cur->Output[0].Type == PARAM_DISCARD) - { - Cur->Output[0].Type = PARAM_T; - Cur->Output[0].Address = FakeRegNr_Xmm1; // 'r4' - } - - if (Cur->Output[1].Type == PARAM_DISCARD) - { - Cur->Output[1].Type = PARAM_T; - Cur->Output[1].Address = FakeRegNr_Xmm2; // 'r5' - } - - // Generate a MUL for the 1st output : - Ins = *Cur; - Ins.Opcode = PO_MUL; - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - - // Generate a MUL for the 2nd output : - Ins = *Cur; - Ins.XCopySecondOpcodeToFirst(PO_MUL); - InsertIntermediate(&Ins, InsertPos); - - // Note : If XMMA or XMMC writes to the third argument, we now have - // the first and second stored already (if they where not ignored). - // IF one (or both) are ignored, the intermediate result might be - // needed, but let XMMA/XMMC figure that out first - the resulting - // opcode(s) will probably require the initial opcode's removal! - return Result; -} // ConvertXMMToNative_Except3RdOutput - -void PSH_XBOX_SHADER::ConvertXMMAToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - - // Handle the generic case of XMM, and check if the 3rd (Add) argument is ignored : - if (!ConvertXMMToNative_Except3RdOutput(i)) - { - // Add needs to be stored, we already have 2 MULs, so change the XMMA into an ADD : - Cur = &(Intermediate[i+2]); - Cur->Opcode = PO_ADD; - Cur->Modifier = INSMOD_NONE; - Cur->Parameters[0] = Cur->Output[0]; - Cur->Parameters[1] = Cur->Output[1]; - Cur->Output[0] = Cur->Output[2]; - } -} - -void PSH_XBOX_SHADER::ConvertXMMCToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - - // Handle the generic case of XMM, and check if the 3rd (Compare) argument is ignored : - if (!ConvertXMMToNative_Except3RdOutput(i)) - { - // Add needs to be stored, we already have 2 MULs, so change the XMMC into an CND : - Cur = &(Intermediate[i+2]); - // TODO : If CombinerMuxesOnMsb is False, we should compare to the LeastSignificantBit of r0.a - but how? - Cur->Opcode = PO_CND; - Cur->Modifier = INSMOD_NONE; - // Begin the input of CND with the required r0.a parameter : - Cur->Parameters[0].SetRegister(PARAM_R, 0, MASK_A); - Cur->Parameters[0].Modifiers = (1 << ARGMOD_IDENTITY); - Cur->Parameters[0].Multiplier = 1.0; - // Follow that with the 2 selection registers : - Cur->Parameters[1] = Cur->Output[0]; - Cur->Parameters[2] = Cur->Output[1]; - // And put the result it in the final register : - Cur->Output[0] = Cur->Output[2]; - } -} - -void PSH_XBOX_SHADER::ConvertXDMToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Cur = &(Intermediate[i]); - - // XDM does two operations : - - // a multiply : - if (Cur->Output[1].Type != PARAM_DISCARD) - { - Ins = *Cur; - Ins.XCopySecondOpcodeToFirst(PO_MUL); - InsertIntermediate(&Ins, i+1); - } - - // and a dot product : - if (Cur->Output[0].Type == PARAM_DISCARD) - DeleteIntermediate(i); - else - Cur->Opcode = PO_DP3; -} - -void PSH_XBOX_SHADER::ConvertXDDToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Cur = &(Intermediate[i]); - - // XDD does two operations : - - // ...a dot product : - Cur->Opcode = PO_DP3; - - // and another dot product : - if (Cur->Output[1].Type != PARAM_DISCARD) - { - Ins = *Cur; - Ins.XCopySecondOpcodeToFirst(PO_DP3); - InsertIntermediate(&Ins, i+1); - } -} - -void PSH_XBOX_SHADER::ConvertXFCToNative(int i) -{ - PSH_INTERMEDIATE_FORMAT Cur = {}; - int InsertPos; - bool NeedsProd; - bool NeedsSum; - PPSH_IMD_ARGUMENT CurArg; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - // Get a copy of XFC and remove it already, new instructions will replace it : - Cur = Intermediate[i]; - DeleteIntermediate(i); - InsertPos = i; - // 'final combiner - r0 = A*B + (1-A)*C + D'; - - // See if the final combiner uses the prod or sum input parameters : - NeedsProd = false; - NeedsSum = false; - for (i = 0; i < PSH_OPCODE_DEFS[Cur.Opcode]._In; i++) - { - CurArg = &(Cur.Parameters[i]); - - // Check for the three final-combiner-specific argument types : - switch (CurArg->Type) { - case PARAM_V1R0_SUM: - { - // Change SUM into a fake register, which will be resolved later : - CurArg->Type = PARAM_T; - CurArg->Address = FakeRegNr_Sum; // 'r2' - NeedsSum = true; - break; - } - - case PARAM_EF_PROD: - { - // Change PROD into a fake register, which will be resolved later : - CurArg->Type = PARAM_T; - CurArg->Address = FakeRegNr_Prod; // 'r3' - NeedsProd = true; - break; - } - - case PARAM_FOG: - { - // Change FOG into a constant of 1.0, as we can't simulate it otherwise : -// CurArg->SetConstValue(1.0); -// Cur->CommentString = "final combiner - FOG not emulated, using 1."; - break; - } - } - } // for input - - if (NeedsSum) - { - // Add a new opcode that calculates r0+v1 : - Ins.Initialize(PO_ADD); - Ins.Output[0].SetRegister(PARAM_T, FakeRegNr_Sum, MASK_RGBA); // 'r2' - - Ins.Parameters[0].SetRegister(PARAM_R, 0, MASK_RGB); - Ins.Parameters[1].SetRegister(PARAM_V, 1, MASK_RGB); - - // Take the FinalCombinerFlags that influence this result into account : - if ((FinalCombinerFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0) > 0) - Ins.Parameters[0].Modifiers = (1 << ARGMOD_INVERT); // (1-r0) is used as an input to the sum rather than r0 - if ((FinalCombinerFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1) > 0) - Ins.Parameters[1].Modifiers = (1 << ARGMOD_INVERT); // (1-v1) is used as an input to the sum rather than v1 - if ((FinalCombinerFlags & PS_FINALCOMBINERSETTING_CLAMP_SUM) > 0) - Ins.Modifier = INSMOD_SAT; // V1+R0 sum clamped to [0,1] - - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted final combiner calculation of V1R0_sum register"); - } - - if (NeedsProd) - { - // Add a new opcode that calculates E*F : - Ins.Initialize(PO_MUL); - Ins.Output[0].SetRegister(PARAM_T, FakeRegNr_Prod, MASK_RGBA); // 'r3' - Ins.Parameters[0] = Cur.Parameters[4]; // E - Ins.Parameters[1] = Cur.Parameters[5]; // F - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted final combiner calculation of EF_prod register"); - } - - // The final combiner calculates : r0.rgb=s0*s1 + (1-s0)*s2 + s3 - // Change that into a LRP + ADD, and let the optimizer reduce it; - - // Add a new opcode that calculates r0.rgb=s0*s1 + (1-s0)*s2 via a LRP : - // Set the output to r0.rgb (as r0.a is determined via s6.a) : - - // Watch out! If s3=r0.rgb, then the LRP cannot use r0, but must use r1 as temp! - if (Cur.Parameters[3].IsRegister(PARAM_R, 0, 0)) - Cur.Output[0].SetRegister(PARAM_R, 1, MASK_RGB); - else - Cur.Output[0].SetRegister(PARAM_R, 0, MASK_RGB); - - Ins = Cur; - Ins.Opcode = PO_LRP; - Ins.Modifier = INSMOD_NONE; - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - - // Add a new opcode that calculates r0.rgb=r0.rgb+s3 : - Ins.Opcode = PO_ADD; - Ins.Modifier = Cur.Modifier; - Ins.Output[0] = Cur.Output[0]; // = r0.rgb - Ins.Parameters[0] = Cur.Output[0]; // = r0.rgb - Ins.Parameters[1] = Cur.Parameters[3]; // =s3 from XFC - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - - // See if s6 is something else than "r0.a" : - if (Cur.Parameters[6].ToString() != "r0.a") - { - // Add a new opcode that moves s6 over to r0.a : - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 0, MASK_A); - Ins.Parameters[0] = Cur.Parameters[6]; - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - } -} - -bool PSH_XBOX_SHADER::RemoveNops() -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - bool HasOutput; - - bool Result = false; - i = IntermediateCount; - while (i > StartPos) - { - --i; - Cur = &(Intermediate[i]); - - // Skip opcodes that have no output, but should stay anyway : - if (PSH_OPCODE_DEFS[Cur->Opcode]._Out == 0) - if (Cur->Opcode != PO_NOP) - continue; - - // See if this opcode writes to any of it's outputs : - { - HasOutput = false; - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._Out; j++) - if (Cur->Output[j].Type != PARAM_DISCARD) - { - HasOutput = true; - break; - } - - if (!HasOutput) - { - // Remove the opcode (as it doesn't change anything) : - // This applies to PO_NOP and opcodes that discard all their results : - DeleteIntermediate(i); - Result = true; - continue; - } - } - } - return Result; -} - -int PSH_XBOX_SHADER::MaxRegisterCount(PSH_ARGUMENT_TYPE aRegType) -{ - switch (aRegType) - { - case PARAM_R: - return MaxTemporaryRegisters; - case PARAM_T: - return MaxTextureCoordinateRegisters; - case PARAM_V: - return MaxInputColorRegisters; - case PARAM_C: - return MaxConstantFloatRegisters; - case PARAM_S: - return MaxSamplerRegisters; - } - - return 0; -} - -bool PSH_XBOX_SHADER::IsValidNativeOutputRegister(PSH_ARGUMENT_TYPE aRegType, int index /*= -1*/) -{ - bool valid = (PARAM_R == aRegType) && (MaxRegisterCount(PARAM_R) > index); - - return valid; -} - -int PSH_XBOX_SHADER::RegisterIsFreeFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress) -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - for (i = aIndex; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - // Detect a read : - if (Cur->ReadsFromRegister(aRegType, aAddress)) - { - return -1; - } - // Detect a write : - if (Cur->WritesToRegister(aRegType, aAddress)) - { - break; - } - } - - return i; -} - -int PSH_XBOX_SHADER::RegisterIsUsedFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress) -{ - int result = -1; - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - for (i = aIndex; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - // Detect a read : - if (Cur->ReadsFromRegister(aRegType, aAddress)) - { - result = i; - } - // Detect a write : - if (Cur->WritesToRegister(aRegType, aAddress)) - { - break; - } - } - - return result; -} - -int PSH_XBOX_SHADER::NextFreeRegisterFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int bIndex /*= -1*/, int startAddress /*= 0*/, int excludeAddress /*= -1*/) -{ - const int registerCount = MaxRegisterCount(aRegType); - - if (bIndex < 0 || bIndex < aIndex) - bIndex = IntermediateCount; - - if (startAddress < 0) - startAddress = 0; - - int i; - - for (i = startAddress; i < registerCount; i++) - { - if (i == excludeAddress) - continue; - - if (RegisterIsFreeFromIndexUntil(aIndex, aRegType, i) >= bIndex) - { - return i; - } - } - - return -1; -} - -bool PSH_XBOX_SHADER::IsRegisterFreeFromIndexOnwards(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress) -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - for (i = aIndex; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - // Detect a write or read : - if (Cur->WritesToRegister(aRegType, aAddress) - || Cur->ReadsFromRegister(aRegType, aAddress)) - { - return false; - } - } - - return true; -} - -void PSH_XBOX_SHADER::ReplaceInputRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex /*= -1*/) -{ - ReplaceRegisterFromIndexOnwards(aIndex, aSrcRegType, aSrcAddress, aDstRegType, aDstAddress, endIndex, true, false); -} - -void PSH_XBOX_SHADER::ReplaceOutputRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex /*= -1*/) -{ - ReplaceRegisterFromIndexOnwards(aIndex, aSrcRegType, aSrcAddress, aDstRegType, aDstAddress, endIndex, false, true); -} - -void PSH_XBOX_SHADER::ReplaceRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex /*= -1*/, bool replaceInput /*= true*/, bool replaceOutput /*= true*/) -{ - int i; - int j; - PPSH_INTERMEDIATE_FORMAT Cur; - - for (i = aIndex; i < IntermediateCount && (i <= endIndex || endIndex == -1); i++) - { - Cur = &(Intermediate[i]); - - if (replaceOutput) - { - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._Out; j++) - if (Cur->Output[j].IsRegister(aSrcRegType, aSrcAddress)) - Cur->Output[j].SetRegister(aDstRegType, aDstAddress, Cur->Output[j].Mask); - } - - if (replaceInput) - { - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._In; j++) - if (Cur->Parameters[j].IsRegister(aSrcRegType, aSrcAddress)) - Cur->Parameters[j].SetRegister(aDstRegType, aDstAddress, Cur->Parameters[j].Mask); - } - } -} - -bool PSH_XBOX_SHADER::FixArgumentModifiers() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - bool Result = false; - - // Do a bottom-to-top pass, preventing constant-modifiers via additional MOV's: - i = IntermediateCount; - while (i > 0) - { - --i; - Cur = &(Intermediate[i]); - if (Cur->Opcode < PO_TEX) // TODO : Check explicitly which instruction types are handled below - continue; - - int InsertPos = i; - // Detect modifiers on constant and arguments - for (int p = 0; p < 7 && p < PSH_OPCODE_DEFS[Cur->Opcode]._In; p++) { - if ((Cur->Parameters[p].Type == PARAM_C || Cur->Parameters[p].UsesRegister()) - && ((Cur->Parameters[p].Modifiers & ~(1 << ARGMOD_NEGATE)) != 0)) { - - PSH_INTERMEDIATE_FORMAT Ins = {}; - PSH_IMD_ARGUMENT Arg = {}; - - Arg = Cur->Parameters[p]; - - int excludeAddress = Cur->Output[0].Type == PARAM_R ? Cur->Output[0].Address : -1; - - PSH_ARGUMENT_TYPE type = PARAM_R; - int address = NextFreeRegisterFromIndexUntil(InsertPos, PARAM_R, InsertPos, 0, excludeAddress); - - if (IsValidNativeOutputRegister(Arg.Type, Arg.Address) && RegisterIsFreeFromIndexUntil(InsertPos + 1, Arg.Type, Arg.Address) > InsertPos) - { - type = Arg.Type; - address = Arg.Address; - } - - for (int modifier = ARGMOD_INVERT; modifier < ARGMOD_SATURATE; ++modifier) - { - Arg = Cur->Parameters[p]; - - if (!Arg.HasModifier((PSH_ARG_MODIFIER)modifier)) - continue; - - bool needInsert = false; - switch ((PSH_ARG_MODIFIER)modifier) - { - case ARGMOD_INVERT: - { - if (Arg.HasModifier(ARGMOD_NEGATE)) - { - Ins.Initialize(PO_SUB); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(1.0f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'invert' with 'negate' argument modifier (register - 1)"; - ++modifier; - } - else - { - Ins.Initialize(PO_SUB); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[0].SetScaleConstRegister(1.0f, Recompiled); - Ins.Parameters[1] = Cur->Parameters[p]; - Ins.Parameters[1].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'invert' argument modifier (1 - register)"; - } - needInsert = true; - - break; - } - case ARGMOD_NEGATE: - { - // Skip as this modifier is still supported in current shader models - // Included here for completeness - break; - Ins.Initialize(PO_MOV); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = (1 << ARGMOD_NEGATE); - Ins.CommentString = "Inserted to replace 'negate' argument modifier (-register)"; - needInsert = true; - - break; - } - case ARGMOD_BIAS: - { - Ins.Initialize(PO_SUB); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'bias' argument modifier (register - 0.5)"; - needInsert = true; - - break; - } - case ARGMOD_SCALE_X2: - { - Ins.Initialize(PO_MUL); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(2.0f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'x2' argument modifier (2 * register)"; - needInsert = true; - - break; - } - case ARGMOD_SCALE_BX2: - { - Ins.Initialize(PO_MAD); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[2].SetScaleConstRegister(-1.0f, Recompiled); - Ins.Parameters[1].SetScaleConstRegister(2.0f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'bx2' argument modifier (2 * register - 1)"; - needInsert = true; - - break; - } - case ARGMOD_SCALE_X4: - { - Ins.Initialize(PO_MUL); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(4.0f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'x4' argument modifier (4 * register)"; - needInsert = true; - - break; - } - case ARGMOD_SCALE_D2: - { - Ins.Initialize(PO_MUL); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'd2' argument modifier (0.5 * register)"; - needInsert = true; - - break; - } - default: - { - Ins.Initialize(PO_MOV); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace argument with modifier"; - needInsert = true; - - break; - } - } - - if (needInsert == true) - { - for (int q = p; q < PSH_OPCODE_DEFS[Cur->Opcode]._In; q++) - { - // overwrite all matching parameters to avoid duplicate instructions - if (Arg.Type == Cur->Parameters[q].Type - && Arg.Address == Cur->Parameters[q].Address - && Arg.Mask == Cur->Parameters[q].Mask - && Arg.Modifiers == Cur->Parameters[q].Modifiers - && Arg.Multiplier == Cur->Parameters[q].Multiplier) - { - Cur->Parameters[q] = Ins.Output[0]; - // Apply modifier to register instead of constant - Cur->Parameters[q].Modifiers = (Arg.Modifiers & (1 << ARGMOD_NEGATE)) | (Arg.Modifiers & (~0 << (modifier + 1))); - } - } - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - ++Cur; - EmuLog(LOG_LEVEL::DEBUG, "; Used intermediate move to avoid argument modifier"); - Result = true; - } - } - } - } - } - return Result; -} // FixArgumentModifiers - -bool PSH_XBOX_SHADER::FixConstantParameters() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - bool Result = false; - - // Do a bottom-to-top pass, preventing constant-modifiers via additional MOV's: - i = IntermediateCount; - while (i > StartPos) - { - --i; - Cur = &(Intermediate[i]); - - if (!Cur->IsArithmetic()) - continue; - - for (int p = 0; p < PSH_OPCODE_DEFS[Cur->Opcode]._In; ++p) - { - if (Cur->Parameters[p].Type != PARAM_VALUE) - continue; - - if (Cur->Parameters[p].SetScaleConstRegister(Cur->Parameters[p].GetConstValue(), Recompiled)) - { - EmuLog(LOG_LEVEL::DEBUG, "; Replaced constant value with constant register"); - Result = true; - } - } - } - return Result; -} // FixConstantParameters - -bool PSH_XBOX_SHADER::FixInstructionModifiers() -{ - int i; - int InsertPos; - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - bool Result = false; - - // Do a bottom-to-top pass, preventing constant-modifiers via additional MOV's: - i = IntermediateCount; - while (i > StartPos) - { - InsertPos = i; - --i; - Cur = &(Intermediate[i]); - - if (!Cur->IsArithmetic()) - continue; - - bool insert = true; - switch (Cur->Modifier) - { - case INSMOD_BIAS: // y = x - 0.5 // Xbox only : TODO : Fixup occurrances! - { - Ins.Initialize(PO_SUB); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_bias"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_bias"); - break; - } - case INSMOD_X2: // y = x * 2 - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(2.0f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_x2"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_x2"); - break; - } - case INSMOD_BX2: // y = (x - 0.5) * 2 // Xbox only : TODO : Fixup occurrances! - { - Ins.Initialize(PO_MAD); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(2.0f, Recompiled); - Ins.Parameters[2].SetScaleConstRegister(-1.0f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_bx2"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_bx2"); - break; - } - case INSMOD_X4: // y = x * 4 - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(4.0f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_x4"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_x4"); - break; - } - case INSMOD_D2: // y = x * 0.5 - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_d2"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_d2"); - break; - } - case INSMOD_X8: // y = x * 8 // ps 1.4 only - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(8.0f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_x8"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_x8"); - break; - } - case INSMOD_D4: // y = x * 0.25 // ps 1.4 only - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(0.25f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_d4"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_d4"); - break; - } - case INSMOD_D8: // y = x * 0.125 // ps 1.4 only - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(0.125f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_d8"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_d8"); - break; - } - case INSMOD_SAT: // Xbox doesn"t support this, but has ARGMOD_SATURATE instead - case INSMOD_NONE: // y = x - default: - insert = false; - break; - } - - if (insert) - { - Cur->Modifier = INSMOD_NONE; - InsertIntermediate(&Ins, InsertPos++); - Result = true; - } - - // Handle blue-to-alpha which is technically an instruction modifier, but operates on arguments - for (int i = 0; i < PSH_OPCODE_DEFS[Cur->Opcode]._Out; i++) { - auto& output = Cur->Output[i]; - - if (output.UsesRegister() && output.HasModifier(ARGMOD_BLUE_REPLICATE)) { - Ins.Initialize(PO_MOV); - Ins.Output[0].Type = Ins.Parameters[0].Type = output.Type; - Ins.Output[0].Address = Ins.Parameters[0].Address = output.Address; - Ins.Output[0].Mask = MASK_A; - Ins.Parameters[0].Mask = MASK_B; - Ins.CommentString = "; Inserted Blue-to-Alpha"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted Blue-to-Alpha"); - - InsertIntermediate(&Ins, InsertPos); - Result = true; - } - } - } - return Result; -} // FixInstructionModifiers - -bool PSH_XBOX_SHADER::FinalizeShader() -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_oC, 0, MASK_RGBA); - Ins.Parameters[0].SetRegister(PARAM_R, 0, MASK_RGBA); - InsertIntermediate(&Ins, IntermediateCount); - - return true; -} // FinalizeShader - -//bool PSH_XBOX_SHADER::CombineInstructions() - - bool _CanLerp(PPSH_INTERMEDIATE_FORMAT Mul1, PPSH_INTERMEDIATE_FORMAT Mul2, PPSH_INTERMEDIATE_FORMAT AddOpcode, int Left, int Right) - { - PPSH_IMD_ARGUMENT ParamLeft, ParamRight; - - // Check if Left and Right are the same register : - ParamLeft = &(Mul1->Parameters[Left]); - ParamRight = &(Mul2->Parameters[Right]); - if ((ParamLeft->Type != ParamRight->Type) - || (ParamLeft->Address != ParamRight->Address) - || (ParamLeft->Mask != ParamRight->Mask)) - return false; - - // Is the left argument inverted and the right not (or the other way around) ? - if (ParamLeft->HasModifier(ARGMOD_INVERT) != ParamRight->HasModifier(ARGMOD_INVERT)) - { - // In that case, already move the arguments over to AddOpcode so we create a LRP : - AddOpcode->Parameters[0] = *ParamLeft; - AddOpcode->Parameters[1] = Mul1->Parameters[1-Left]; - AddOpcode->Parameters[2] = Mul2->Parameters[3-Right]; - return true; - } - return false; - } - - bool _CanMad(int ConstOne, PPSH_INTERMEDIATE_FORMAT Mul1, PPSH_INTERMEDIATE_FORMAT Mul2, PPSH_INTERMEDIATE_FORMAT AddOpcode) - { - // Check if the given parameter is 1 : - bool Result = Mul1->Parameters[ConstOne].GetConstValue() == 1.0; - if (Result) - { - // Put the other 3 parameters int the resulting opcode, so we can make it a MAD : - AddOpcode->Parameters[0] = Mul2->Parameters[0]; - AddOpcode->Parameters[1] = Mul2->Parameters[1]; - AddOpcode->Parameters[2] = Mul1->Parameters[1-ConstOne]; - } - return Result; - } - -bool PSH_XBOX_SHADER::CombineInstructions() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Op0; - PPSH_INTERMEDIATE_FORMAT Op1; - PPSH_INTERMEDIATE_FORMAT Op2; - bool CanOptimize; - int j; - int k; - - bool Result = false; - - i = IntermediateCount - 1; - while (i > StartPos) - { - --i; - Op0 = &(Intermediate[i+0]); - Op1 = &(Intermediate[i+1]); - Op2 = &(Intermediate[i+2]); - - // Check if there are two consecutive opcodes reading from a fake R register; - // We outputted these ourselves, in order to ease the conversion and profit - // from having generic optimizations in one place : - if ( (Op0->Output[0].Type == PARAM_T) - && (Op0->Output[0].Address >= PSH_XBOX_MAX_T_REGISTER_COUNT) - && (Op1->Output[0].Type == PARAM_T) - && (Op1->Output[0].Address >= PSH_XBOX_MAX_T_REGISTER_COUNT)) - { - // Did we output those from a CND opcode (originally XMMC) ? - if (Op2->Opcode == PO_CND) - { - if ( (Op0->Opcode == PO_MOV) - && (Op1->Opcode == PO_MOV) - && (Op1->Modifier == Op0->Modifier)) - { - Op2->Modifier = Op0->Modifier; - Op2->Parameters[1] = Op0->Parameters[0]; - Op2->Parameters[2] = Op1->Parameters[0]; - DeleteIntermediate(i); - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,CND via MOV,MOV,CND into a single CND"); - Result = true; - continue; - } - } - - // Did we output those from a ADD opcode (originally XMMA) ? - if (Op2->Opcode == PO_ADD) - { - if ( (Op0->Opcode == PO_MUL) - && (Op1->Opcode == PO_MUL) - && (Op1->Modifier == Op0->Modifier)) - { - // Check if we can lerp - we just need the same register on both sides that's inverted on the other : - if (_CanLerp(Op0, Op1, Op2, 0, 2) - || _CanLerp(Op0, Op1, Op2, 1, 2) - || _CanLerp(Op0, Op1, Op2, 0, 3) - || _CanLerp(Op0, Op1, Op2, 1, 3)) - { - // The lerp can be done, and the correct parameters are already set to Op2, - // so all we need to do now, it fixup the rest and remove the two MOV's : - Op2->Opcode = PO_LRP; - Op2->Modifier = Op0->Modifier; - DeleteIntermediate(i); - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,ADD into a single LRP"); - Result = true; - continue; - } - - // Check if we can mad - we just need a constant 1 in one argument : - if (_CanMad(0, Op0, Op1, Op2) - || _CanMad(1, Op0, Op1, Op2) - || _CanMad(0, Op1, Op0, Op2) - || _CanMad(1, Op1, Op0, Op2)) - { - // The mad can be done, and the correct parameters are already set to Op2, - // so all we need to do now, it fixup the rest and remove the two MOV's : - Op2->Opcode = PO_MAD; - Op2->Modifier = Op0->Modifier; - DeleteIntermediate(i); - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,ADD into a single MAD"); - Result = true; - continue; - } - - // No single opcode possible, so change it into a MUL + MAD : - // The first mul may write to the last output register (without a modifier) : - Op0->Modifier = INSMOD_NONE; - Op0->Output[0] = Op2->Output[0]; - // Change the second MUL into a MAD : - Op1->Opcode = PO_MAD; - Op1->Output[0] = Op2->Output[0]; - Op1->Parameters[2] = Op0->Output[0]; - // Remove the trailing ADD : - DeleteIntermediate(i+2); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,ADD into a MUL,MAD"); - Result = true; - continue; - } - - // Was it a MUL,MUL,ADD? - if ( (Op0->Opcode == PO_MUL) - && (Op1->Opcode == PO_MUL) - && (Op0->Parameters[1].GetConstValue() == 1.0) - && (Op1->Parameters[1].GetConstValue() == 1.0)) - { - // Remove the two MOV's and fold their arguments into a MUL : - Op2->Opcode = PO_MUL; - Op2->Parameters[0] = Op0->Parameters[0]; - Op2->Parameters[1] = Op1->Parameters[0]; - DeleteIntermediate(i); - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,ADD into a MUL"); - Result = true; - continue; - } - } - } - - // Do two neighbouring opcodes output to the same register (without a modifier) ? - if ( (Op0->Output[0].ToString() == Op1->Output[0].ToString()) - && (Op0->Modifier == INSMOD_NONE) - && (Op1->Modifier == INSMOD_NONE)) - { - // Is it MUL,ADD ? - if ( (Op0->Opcode == PO_MUL) - && (Op1->Opcode == PO_ADD)) - { - // Is the output of the MUL input to the ADD ? - if ( (Op0->Output[0].Type == Op1->Parameters[0].Type) - && (Op0->Output[0].Address == Op1->Parameters[0].Address) - && (Op0->Output[0].Modifiers == Op1->Parameters[0].Modifiers)) - // Mask and Multiplier are not important here - { - Op0->Opcode = PO_MAD; - Op0->Parameters[2] = Op1->Parameters[1]; - DeleteIntermediate(i+1); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MUL,ADD into a single MAD"); - Result = true; - continue; - } - } - } - -/* - // Combinations that can be made if their intermediate result is not read again or overwritten later: - - MOV+ADD > ADD (if MOV.Output[0] was only read by ADD.Parameter[0] or ADD.Parameter[1]) - MOV+SUB > SUB (if MOV.Output[0] was only read by SUB.Parameter[0] or SUB.Parameter[1]) - MOV+MUL > MUL (if MOV.Output[0] was only read by MOV.Parameter[0] or MOV.Parameter[1]) - - MUL+MOV > MUL (if MUL.Output[0] was only read by MOV.Parameter[0]) - MUL+ADD > MAD (if MUL.Output[0] was only read by ADD.Parameter[0] or ADD.Parameter[1]) - MUL+SUB > MAD (if MUL.Output[0] was only read by SUB.Parameter[0] - Do invert MAD.Parameter[2]) -*/ - - // We can remove a MOV entirely if the input is not changed while - // the output is read, up until the output is re-written; We can change all - // these occurances into a read from the input of this MOV instead : - // This fixes some shaders in Turok, that are reduced to 8 instead of 9 opcodes. - if ( (Op0->Opcode == PO_MOV) - && (Op0->Modifier == INSMOD_NONE) - && (Op0->Output[0].Mask == MASK_RGBA)) - { - CanOptimize = false; - j = i + 1; - while (j < IntermediateCount) - { - // Don't optimize if the output is needed for CND or CMP (which must read from r0) : - // This fixes : "(Validation Error) First source for cnd instruction must be 'r0.a'" in Modify Pixel Shader XDK sample. - if ( ((Intermediate[j].Opcode == PO_CND) || (Intermediate[j].Opcode == PO_CMP)) - && (Op0->Output[0].IsRegister(PARAM_R, 0))) - break; - - // TODO : Add other prevention rules here (like too many texture-reads, and other scases) - - // We can optimize if the MOV-output is written to again before the end of the shader : - CanOptimize = true; - - // ensure this is not "constant with modifier" optimization pattern to prevent infinite loop - for (int p = 0; p < PSH_OPCODE_DEFS[Intermediate[j].Opcode]._In; p++) - { - if ((Op0->Parameters[0].Type == PARAM_C) - && (Intermediate[j].Parameters[p].Type == Op0->Output[0].Type) - && (Intermediate[j].Parameters[p].Address == Op0->Output[0].Address) - && (Intermediate[j].Parameters[p].Modifiers != 0)) - { - CanOptimize = false; - break; - } - }; - - if (Intermediate[j].WritesToRegister(Op0->Output[0].Type, Op0->Output[0].Address, MASK_RGBA)) - break; - - CanOptimize = false; - ++j; - } - - if (CanOptimize) - { - // Loop over all instructions in between, and try to replace reads : - CanOptimize = false; - while (j > i) - { - // For Intermediate[j].Parameters, change all occurrances of Op0.Output[0] into Op0.Parameters[0] : - for (k = 0; k < PSH_OPCODE_DEFS[Intermediate[j].Opcode]._In; k++) - if ( (Intermediate[j].Parameters[k].Type == Op0->Output[0].Type) - && (Intermediate[j].Parameters[k].Address == Op0->Output[0].Address)) - { - Intermediate[j].Parameters[k].Type = Op0->Parameters[0].Type; - Intermediate[j].Parameters[k].Address = Op0->Parameters[0].Address; - // Signal that a replacement is actually done : - CanOptimize = true; - } - - --j; - } - - if (CanOptimize) - { - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Moved MOV input into following instructions"); - Result = true; - } - } - } - - // Fix Dolphin : - // mul r3, r0,t0 ; d0=s0*s1 - // mov r0.rgb, r3 ; d0=s0 final combiner - FOG not emulated, using 1. - if ( (Op0->Output[0].Type == PARAM_T) - && (Op0->Output[0].Address >= PSH_XBOX_MAX_T_REGISTER_COUNT) - && (Op1->Parameters[0].Type == PARAM_T) - && (Op1->Parameters[0].Address >= PSH_XBOX_MAX_T_REGISTER_COUNT)) - { - if ( (Op0->Opcode == PO_MUL) - && (Op1->Opcode == PO_MOV)) - { - // > mul r0.rgb, r0,t0 - Op0->Output[0] = Op1->Output[0]; - DeleteIntermediate(i+1); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MOV into a MUL"); - Result = true; - continue; - } - } - - // Fix Crash bandicoot xfc leftover r3 : - if (Op0->Output[0].IsRegister(PARAM_T, FakeRegNr_Prod)) // 'r3' - { - // The final combiner uses r3, try to use r1 instead : - if (IsRegisterFreeFromIndexOnwards(i, PARAM_R, 1)) - { - ReplaceRegisterFromIndexOnwards(i, Op0->Output[0].Type, Op0->Output[0].Address, PARAM_R, 1); - EmuLog(LOG_LEVEL::DEBUG, "; Changed fake register by r1"); - Result = true; - continue; - } - } - } // while - return Result; -} // CombineInstructions - -bool PSH_XBOX_SHADER::SimplifyMOV(PPSH_INTERMEDIATE_FORMAT Cur) -{ - bool CanSimplify; - float Factor; - - // NOP-out MOV's that read and write to the same register : - if ( (Cur->Output[0].Type == Cur->Parameters[0].Type) - && (Cur->Output[0].Address == Cur->Parameters[0].Address) - && (Cur->Output[0].Mask == Cur->Parameters[0].Mask)) - { - if (Cur->Output[0].Type == PARAM_VALUE) - CanSimplify = Cur->Output[0].GetConstValue() == Cur->Parameters[0].GetConstValue(); - else - CanSimplify = (Cur->Output[0].Modifiers == Cur->Parameters[0].Modifiers) - && (Cur->Output[0].Multiplier == Cur->Parameters[0].Multiplier); - - if (CanSimplify) - { - Cur->Opcode = PO_NOP; // This nop will be removed in a recursive fixup - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV into a NOP"); - return true; - } - } - - // Does this MOV put a 0 (zero) in the output? - if (Cur->Parameters[0].GetConstValue() == 0.0) - { - // Attempt to find a constant with the value 0, and use that if present. - if (!Cur->Parameters[0].SetScaleConstRegister(0.0f, Recompiled)) - { - // Simulate 0 by subtracting a (guaranteed) register from itself : - // Fixup via "sub d0=v0,v0" : - Cur->Opcode = PO_SUB; - Cur->Parameters[0].Type = PARAM_V; - Cur->Parameters[0].Address = 0; - Cur->Parameters[0].Modifiers = 0; - Cur->Parameters[1] = Cur->Parameters[0]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV 0 into a SUB v0,v0"); - } - else - { - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV 0 into a MOV c0"); - } - - return true; - } - - // Does this MOV put a constant in the output? - if (Cur->Parameters[0].Type == PARAM_VALUE) - { - // TODO : If there's a constant equal to GetConstValue(), use that. - Factor = Cur->Parameters[0].GetConstValue(); - - if (!Cur->Parameters[0].SetScaleConstRegister(Factor, Recompiled)) - { - // Fixup via a SUB (which can calculate a constant value) : - Cur->Opcode = PO_SUB; - Cur->Parameters[0].Type = PARAM_V; - Cur->Parameters[0].Address = 0; - - if (Factor < 0.0) - { - // Simulate -1 by calculating it via a (guaranteed) register : - // We follow this : (-v0) - (1-v0) = -v0 - 1 + v0 = -1 - Cur->Parameters[0].Modifiers = (1 << ARGMOD_NEGATE); - Cur->Parameters[1] = Cur->Parameters[0]; - Cur->Parameters[1].Modifiers = (1 << ARGMOD_INVERT); - // Go on with a positive factor, to ease the scaling : - Factor = -Factor; - } - else - { - // Simulate 1 by calculating it via a (guaranteed) register : - // We follow this : (1-v0) - (-v0) = (1-v0) + v0 = 1 - Cur->Parameters[0].Modifiers = (1 << ARGMOD_INVERT); - Cur->Parameters[1] = Cur->Parameters[0]; - Cur->Parameters[1].Modifiers = (1 << ARGMOD_NEGATE); - } - - // Try to simulate all factors (0.5, 1.0 and 2.0) using an output modifier : - Cur->ScaleOutput(Factor); - - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV {const} into a SUB_factor 1-v0,-v0"); - } - else - { - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV {const} into a MOV c#"); - } - return true; - } - return false; -} - -bool PSH_XBOX_SHADER::SimplifyADD(PPSH_INTERMEDIATE_FORMAT Cur) -{ - // Is this an addition of s0+0 ? - if (Cur->Parameters[1].GetConstValue() == 0.0) - { - // Change it into a MOV (the first argument is already in-place) - Cur->Opcode = PO_MOV; - EmuLog(LOG_LEVEL::DEBUG, "; Changed ADD s0,0 into a MOV s0"); - return true; - } - return false; -} - -bool PSH_XBOX_SHADER::SimplifyMAD(PPSH_INTERMEDIATE_FORMAT Cur, int index) -{ - // Is this 0*s1+s2 or s0*0+s2 ? - if (Cur->Parameters[0].GetConstValue() == 0.0 - || Cur->Parameters[1].GetConstValue() == 0.0) - { - // Change it into s2 : - Cur->Opcode = PO_MOV; - Cur->Parameters[0] = Cur->Parameters[2]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,0 into a MOV s0"); - return true; - } - - // Is this s0*s1+0 ? - if (Cur->Parameters[2].GetConstValue() == 0.0) - { - // Change it into s0*s1 : - Cur->Opcode = PO_MUL; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0, s1,0 into a MUL s0, s1"); - return true; - } - - // Is this s0*1+s2 ? - if (Cur->Parameters[1].GetConstValue() == 1.0) - { - // Change it into s0+s2 : - Cur->Opcode = PO_ADD; - Cur->Parameters[1] = Cur->Parameters[2]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,1,s2 into a ADD s0,s2"); - return true; - } - - // Is this s0*-1+s2 ? - if (Cur->Parameters[1].GetConstValue() == -1.0) - { - // Change it into s2-s0 : - Cur->Opcode = PO_SUB; - Cur->Parameters[1] = Cur->Parameters[0]; - Cur->Parameters[0] = Cur->Parameters[2]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,-1,s2 into a SUB s2,s0"); - return true; - } - - PSH_INTERMEDIATE_FORMAT Ins = {}; - - // Is this 0.5*s1+s2 ? - if (Cur->Parameters[0].GetConstValue() == 0.5f && Cur->Parameters[1].UsesRegister()) - { - if (!Cur->Parameters[0].SetScaleConstRegister(0.5f, Recompiled)) - { - // Change it into s2 : - Cur->Opcode = PO_ADD; - Cur->Parameters[0] = Cur->Parameters[1]; - Cur->Parameters[1] = Cur->Parameters[2]; - - Ins.Initialize(PO_MOV); - Ins.Modifier = INSMOD_D2; - Ins.Output[0] = Ins.Parameters[0] = Cur->Parameters[1]; - Ins.CommentString = "; Inserted to perform division by 2"; - InsertIntermediate(&Ins, index); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD 0.5,s1,s2 into a MOV_d2 s1, s1 ADD s1, s2"); - } - else - { - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD 0.5,s1,s2 into a MAD c#,s1,s2"); - } - return true; - } - - // Is this s0*0.5+s2 ? - if (Cur->Parameters[1].GetConstValue() == 0.5f && Cur->Parameters[0].UsesRegister()) - { - if (!Cur->Parameters[1].SetScaleConstRegister(0.5f, Recompiled)) - { - // Change it into s2 : - Cur->Opcode = PO_ADD; - Cur->Parameters[0] = Cur->Parameters[0]; - Cur->Parameters[1] = Cur->Parameters[2]; - - Ins.Initialize(PO_MOV); - Ins.Modifier = INSMOD_D2; - Ins.Output[0] = Ins.Parameters[0] = Cur->Parameters[0]; - Ins.CommentString = "; Inserted to perform division by 2"; - InsertIntermediate(&Ins, index); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,0.5,s2 into a MOV_d2 s0, s0 ADD s0, s2"); - } - else - { - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,0.5,s2 into a MAD s0,c#,s2"); - } - return true; - } - return false; -} - -bool PSH_XBOX_SHADER::SimplifySUB(PPSH_INTERMEDIATE_FORMAT Cur) -{ - // Is this an subtraction of s0-0 ? - if (Cur->Parameters[1].GetConstValue() == 0.0) - { - // Change it into a MOV (the first argument is already in-place) - Cur->Opcode = PO_MOV; - EmuLog(LOG_LEVEL::DEBUG, "; Changed SUB x, 0 into a MOV x"); - return true; - } - return false; -} - -bool PSH_XBOX_SHADER::SimplifyMUL(PPSH_INTERMEDIATE_FORMAT Cur) -{ - // Is the result of this multiplication zero ? - if (Cur->Parameters[1].GetConstValue() == 0.0) - { - // Change it into a MOV (the 0 argument will be resolve in a recursive MOV fixup) : - Cur->Opcode = PO_MOV; - Cur->Parameters[0].SetConstValue(0.0); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MUL s0,0 into a MOV 0"); - return true; - } - - // Is this a multiply-by-const ? - if (Cur->Parameters[1].Type == PARAM_VALUE) - { - // Change it into a simple MOV and scale the output instead : - Cur->Opcode = PO_MOV; - Cur->ScaleOutput(Cur->Parameters[1].GetConstValue()); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MUL s0,{const} into a MOV_factor s0"); - return true; - } - return false; -} // SimplifyMUL - -bool PSH_XBOX_SHADER::SimplifyLRP(PPSH_INTERMEDIATE_FORMAT Cur, int index) -{ - // LRP calculates : d0=s0*s1+(1-s0)*s2 which can also be read as : d0=s0*(s1-s2)+s2 - - // Is the right part ((1-s0)*s2) zero? - if ((Cur->Parameters[0].GetConstValue() == 1.0) || (Cur->Parameters[2].GetConstValue() == 0.0)) - { - // Change it into a MUL (calculating the left part : s0*s1 : - Cur->Opcode = PO_MUL; - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,s1,s2 (where (1-s0)*s2=0) into a MUL s0,s1"); - return true; - } - - // Is the left part (s0*s1) zero? - if ((Cur->Parameters[0].GetConstValue() == 0.0) || (Cur->Parameters[1].GetConstValue() == 0.0)) - { - // Change it into a MUL (calculating the right part : (1-s0)*s2) : - Cur->Opcode = PO_MUL; - Cur->Parameters[0].Invert(); - Cur->Parameters[1] = Cur->Parameters[2]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,s1,s2 (where s0*s1=0) into a MUL (1-s0),s2"); - return true; - } - - // Is it d0=s0*s1+(1-s0)*1 ? - if (Cur->Parameters[2].GetConstValue() == 1.0) - { - // Change it into a d0=s0*s1+(1-s0) - Cur->Opcode = PO_MAD; - Cur->Parameters[2] = Cur->Parameters[0]; - Cur->Parameters[2].Invert(); - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,s1,1 into a MAD s0,s1,1-s0"); - return true; - } - - // Is it d0=s0*(1-s2)+s2 ? - if (Cur->Parameters[1].GetConstValue() == 1.0) - { - // Change it into a d0=s0*(1-s2)+s2 - Cur->Opcode = PO_MAD; - Cur->Parameters[1] = Cur->Parameters[2]; - Cur->Parameters[1].Invert(); - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,1,s2 into a MAD s0,1-s2,s2"); - return true; - } - - int output = NextFreeRegisterFromIndexUntil(index, PARAM_R, index, 0, Cur->Output[0].Address); - - if (output >= 0) - { - bool insert = false; - for (int p = 0; p < PSH_OPCODE_DEFS[Cur->Opcode]._In; ++p) - { - if (Cur->Output[0].Type == Cur->Parameters[p].Type - && Cur->Output[0].Address == Cur->Parameters[p].Address) - { - insert = true; - Cur->Parameters[p].Address = output; - Cur->Parameters[p].Type = PARAM_R; - } - } - if (insert) - { - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, output, 0); - Ins.Parameters[0].SetRegister(Cur->Output[0].Type, Cur->Output[0].Address, 0); - Ins.CommentString = "; Inserted to avoid LRP parameters referencing the output register"; - InsertIntermediate(&Ins, index); - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,1,s2 into a MAD s0,1-s2,s2"); - return true; - } - } - - return false; -} // SimplifyLRP - -bool PSH_XBOX_SHADER::FixupCND(PPSH_INTERMEDIATE_FORMAT Cur, int index) -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - // TODO: Look into using predicate register - Cur->Opcode = PO_CMP; - - int output = NextFreeRegisterFromIndexUntil(index, PARAM_R, index); - Ins.Initialize(PO_SUB); - Ins.Output[0].SetRegister(PARAM_R, output, Cur->Parameters[0].Mask); - Ins.Parameters[0] = Cur->Parameters[0]; - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Cur->Parameters[0] = Ins.Output[0]; - Cur->Parameters[0].Modifiers = (1 << ARGMOD_NEGATE); - std::swap(Cur->Parameters[1], Cur->Parameters[2]); - Ins.CommentString = Cur->CommentString = "; Changed CND into SUB CMP"; - InsertIntermediate(&Ins, index); - EmuLog(LOG_LEVEL::DEBUG, "; Changed CND into SUB CMP"); - return true; -} - -bool PSH_XBOX_SHADER::FixupPixelShader() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - bool Result = RemoveNops(); - - // TODO : Fixup writes to read-only registers (V0, V1) via another free register (if possible) - // TODO : Fixup the usage of non-existent register numbers (like FakeRegNr_Sum and FakeRegNr_Prod) - // TODO : Fixup the usage of the unsupported INSMOD_BIAS and INSMOD_BX2 instruction modifiers - // TODO : Use the INSMOD_SAT instruction modifier instead of the ARGMOD_SATURATE argument modifier - // TODO : Convert numeric arguments (-2, -1, 0, 1, 2) into modifiers on the other argument - - if (CombineInstructions()) - Result = true; - - if (MoveRemovableParametersRight()) - Result = true; - - // Simplify instructions, which can help to compress the result : - i = IntermediateCount; - while (i > StartPos) - { - --i; - Cur = &(Intermediate[i]); - - switch (Cur->Opcode) { - case PO_MOV: - if (SimplifyMOV(Cur)) - Result = true; - break; - - case PO_ADD: - if (SimplifyADD(Cur)) - Result = true; - break; - - case PO_MAD: - if (SimplifyMAD(Cur, i)) - Result = true; - break; - - case PO_SUB: - if (SimplifySUB(Cur)) - Result = true; - break; - - case PO_MUL: - if (SimplifyMUL(Cur)) - Result = true; - break; - - case PO_LRP: - if (SimplifyLRP(Cur, i)) - Result = true; - break; - - case PO_CND: - if (FixupCND(Cur, i)) - Result = true; - break; - } // case - } // for - - // If the above code made any alteration, repeat it as some changes require a followup (like MUL>MOV>NOP) : - if (Result) - { - Log("Fixup intermediate result"); - FixupPixelShader(); - } - return Result; -} // FixupPixelShader - -bool PSH_XBOX_SHADER::FixInvalidSrcSwizzle() -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - PPSH_IMD_ARGUMENT CurArg; - - bool Result = false; - for (i = StartPos; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - if (Cur->IsArithmetic()) - { - // Loop over the input arguments : - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._In; j++) - { - CurArg = &(Cur->Parameters[j]); - - // Fix "Invalid src swizzle" : - if (CurArg->Mask == MASK_RGB) - { - CurArg->Mask = MASK_RGBA; - Result = true; - } - } - } - } - return Result; -} - -bool PSH_XBOX_SHADER::FixMissingR0a() -// On the Xbox, the alpha portion of the R0 register is initialized to -// the alpha component of texture 0 if texturing is enabled for texture 0 : -{ - int R0aDefaultInsertPos; - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT NewIns = {}; - - // Detect a read of r0.a without a write, as we need to insert a "MOV r0.a, t0.a" as default (like the xbox has) : - R0aDefaultInsertPos = -1; - for (i = 0; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - if (Cur->Opcode < PO_TEX) // TODO : Check explicitly which instruction types are handled below - continue; - - // Make sure if we insert at all, it'll be after the DEF's : - if (R0aDefaultInsertPos < 0) - R0aDefaultInsertPos = i; - - // First, check if r0.a is read by this opcode : - if (Cur->ReadsFromRegister(PARAM_R, 0, MASK_A)) - { - R0aDefaultInsertPos = i; - break; - } - - // If this opcode writes to r0.a, we're done : - if (Cur->WritesToRegister(PARAM_R, 0, MASK_A)) - return false; - } - - if (R0aDefaultInsertPos >= 0) - { - // Insert a new opcode : MOV r0.a, t0.a - NewIns.Initialize(PO_MOV); - NewIns.Output[0].SetRegister(PARAM_R, 0, MASK_A); - NewIns.Parameters[0] = NewIns.Output[0]; - NewIns.Parameters[0].Type = PARAM_T; - NewIns.CommentString = "Inserted r0.a default"; - InsertIntermediate(&NewIns, R0aDefaultInsertPos); - return true; - } - return false; -} // FixMissingR0a - -bool PSH_XBOX_SHADER::FixMissingR1a() -// On the Xbox, the alpha portion of the R1 register is initialized to -// the alpha component of texture 1 if texturing is enabled for texture 1 : -{ - int R1aDefaultInsertPos; - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT NewIns = {}; - - // Detect a read of r1.a without a write, as we need to insert a "MOV r1.a, t1.a" as default (like the xbox has) : - R1aDefaultInsertPos = -1; - for (i = 0; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - if (Cur->Opcode < PO_TEX) // TODO : Check explicitly which instruction types are handled below - continue; - - // First, check if r1.a is read by this opcode : - if (Cur->ReadsFromRegister(PARAM_R, 1, MASK_A)) - { - // Make sure if we insert at all, it'll be after the DEF's : - if (R1aDefaultInsertPos < 0) - R1aDefaultInsertPos = i; - - R1aDefaultInsertPos = i; - break; - } - - // If this opcode writes to r1.a, we're done : - if (Cur->WritesToRegister(PARAM_R, 1, MASK_A)) - return false; - } - - if (R1aDefaultInsertPos >= 0) - { - // Insert a new opcode : MOV r1.a, t1.a - NewIns.Initialize(PO_MOV); - NewIns.Output[0].SetRegister(PARAM_R, 1, MASK_A); - NewIns.Parameters[0] = NewIns.Output[0]; - NewIns.Parameters[0].Type = PARAM_T; - NewIns.CommentString = "Inserted r1.a default"; - InsertIntermediate(&NewIns, R1aDefaultInsertPos); - return true; - } - - return false; -} // FixMissingR1a - -bool PSH_XBOX_SHADER::FixUninitializedReads() -// On the Xbox, the alpha portion of the R1 register is initialized to -// the alpha component of texture 1 if texturing is enabled for texture 1 : -{ - int R1aDefaultInsertPos; - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT NewIns = {}; - bool Result = false; - - int readPositions[32][4] = {}; - int writePositions[32][4] = {}; - int initPositions[32] = {}; - int initMasks[32] = {}; - - // Detect a read of r1.a without a write, as we need to insert a "MOV r1.a, t1.a" as default (like the xbox has) : - R1aDefaultInsertPos = -1; - for (i = 0; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - - for (int j = 0; j < MaxRegisterCount(PARAM_R); ++j) - { - // check reads - if (readPositions[j][0] == 0 && Cur->ReadsFromRegister(PARAM_R, j, MASK_R)) - { - readPositions[j][0] = i; - } - if (readPositions[j][1] == 0 && Cur->ReadsFromRegister(PARAM_R, j, MASK_G)) - { - readPositions[j][1] = i; - } - if (readPositions[j][2] == 0 && Cur->ReadsFromRegister(PARAM_R, j, MASK_B)) - { - readPositions[j][2] = i; - } - if (readPositions[j][3] == 0 && Cur->ReadsFromRegister(PARAM_R, j, MASK_A)) - { - readPositions[j][3] = i; - } - - // check writes - if (writePositions[j][0] == 0 && Cur->WritesToRegister(PARAM_R, j, MASK_R)) - { - writePositions[j][0] = i; - } - if (writePositions[j][1] == 0 && Cur->WritesToRegister(PARAM_R, j, MASK_G)) - { - writePositions[j][1] = i; - } - if (writePositions[j][2] == 0 && Cur->WritesToRegister(PARAM_R, j, MASK_B)) - { - writePositions[j][2] = i; - } - if (writePositions[j][3] == 0 && Cur->WritesToRegister(PARAM_R, j, MASK_A)) - { - writePositions[j][3] = i; - } - } - } - - for (int j = 0; j < MaxRegisterCount(PARAM_R); ++j) - { - int mask = 0; - int pos = IntermediateCount; - for (int i = 0; i < 4; ++i) - { - if (readPositions[j][i] <= writePositions[j][i] && readPositions[j][i] != 0) - { - mask |= (1 << i); - pos = std::min(pos, readPositions[j][i]); - } - } - - initPositions[j] = pos; - initMasks[j] = mask; - } - - NewIns.Initialize(PO_MOV); - NewIns.CommentString = "; Inserted to initialize register"; - for (int j = 0; j < MaxRegisterCount(PARAM_R); ++j) - { - int mask = initMasks[j]; - if (mask) - { - // Insert a new opcode : MOV r#.???, c0.??? - NewIns.Output[0].SetRegister(PARAM_R, j, mask); - NewIns.Parameters[0].SetScaleConstRegister(0.0f, Recompiled); - // r0 and r1 take their alpha from the respective texture coordinate - if (j < PSH_XBOX_MAX_R_REGISTER_COUNT) - { - mask &= MASK_RGB; - } - - InsertIntermediate(&NewIns, std::min(StartPos, initPositions[j])); - Result = true; - } - } - - return Result; -} // FixUninitializedReads - - -bool PSH_XBOX_SHADER::FixCoIssuedOpcodes() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - bool Result = false; - // Since we're targetting m_PSVersion >= D3DPS_VERSION(2, 0), co-issued instructions are no longer supported, thus reset all IsCombined flags : - for (i = StartPos; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - if (Cur->IsArithmetic()) - { - if (Cur->IsCombined) - { - Cur->IsCombined = false; - Result = true; - } - } - } - return Result; -} - -bool PSH_XBOX_SHADER::FixInvalidDstRegister() -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - PPSH_IMD_ARGUMENT CurArg; - - bool Result = false; - for (i = IntermediateCount - 1; i >= StartPos; --i) - { - Cur = &(Intermediate[i]); - // Skip non-arithmetic opcodes - if (!Cur->IsArithmetic()) - continue; - - // Loop over the output arguments : - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._Out; j++) - { - CurArg = &(Cur->Output[j]); - - if (IsValidNativeOutputRegister(CurArg->Type, CurArg->Address)) - continue; - - int lastUsed = RegisterIsUsedFromIndexUntil(i + 1, CurArg->Type, CurArg->Address); - - PSH_ARGUMENT_TYPE dstType = PARAM_R; - int dstIndex = -1; - - if (IsValidNativeOutputRegister(PARAM_T)) - { - dstType = PARAM_T; - dstIndex = NextFreeRegisterFromIndexUntil(i + 1, PARAM_T, lastUsed); - } - - if (dstIndex == -1) - { - dstType = PARAM_R; - dstIndex = NextFreeRegisterFromIndexUntil(i + 1, PARAM_R, lastUsed); - } - - if (dstIndex != -1) - { - Result = true; - - if (Cur->ReadsFromRegister(CurArg->Type, CurArg->Address)) - { - if (lastUsed >= 0) ++lastUsed; - - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(dstType, dstIndex, 0); - Ins.Parameters[0].SetRegister(CurArg->Type, CurArg->Address, 0); - InsertIntermediate(&Ins, i); - ++Cur; - CurArg = &(Cur->Output[j]); - } - - ReplaceInputRegisterFromIndexOnwards(i + 1, CurArg->Type, CurArg->Address, dstType, dstIndex, lastUsed); - - CurArg->Type = dstType; - CurArg->Address = dstIndex; - } - } - } - return Result; -} - -// TODO: Refactor and optimize -bool PSH_XBOX_SHADER::FixOverusedRegisters() -{ - int i; - - bool Result = false; - - PSH_INTERMEDIATE_FORMAT Ins = {}; - Ins.Initialize(PO_MOV); - - // For all opcodes, try to put constant and discarded arguments in the rightmost slot, to ease following analysis : - i = IntermediateCount; - while (i > StartPos) - { - --i; - - int InsertPos = i; - - // Skip this operation on LRP instructions - // This prevents "error X5765: Dest register for LRP cannot be the same as first or third source register" in WWE RAW2 - if (Intermediate[i].Opcode == PO_LRP) { - continue; - } - - // Handle PARAM_C, PARAM_V and PARAM_T (in that order) : - for (int t = PARAM_C; t >= PARAM_T; t--) { - enum PSH_ARGUMENT_TYPE param_t = (enum PSH_ARGUMENT_TYPE)t; - int max_total = (t == PARAM_C) ? 2 : (t == PARAM_V) ? 999 : 1; - int addressCount = 0; - int total = 0; - while (Intermediate[i].ReadsFromRegister(param_t, -1, addressCount, total) && (addressCount > 1 || total > max_total)) - { - for (int p = 0; p < PSH_OPCODE_DEFS[Intermediate[i].Opcode]._In; ++p) - { - if (Intermediate[i].Parameters[p].Type == param_t) - { - int output = NextFreeRegisterFromIndexUntil(i, PARAM_R, i); - - // This inserts a MOV opcode that writes to R, reading from a C, V or T register - Ins.Output[0].SetRegister(PARAM_R, output, 0); - Ins.Parameters[0].SetRegister(Intermediate[i].Parameters[p].Type, Intermediate[i].Parameters[p].Address, 0); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - - ReplaceInputRegisterFromIndexOnwards(InsertPos, Intermediate[InsertPos].Parameters[p].Type, Intermediate[InsertPos].Parameters[p].Address, PARAM_R, output, InsertPos); - Result = true; - break; - } - } - } - } - } - return Result; -} // FixOverusedRegisters - -// TODO : FocusBlur sample needs a zero in 'cnd' opcode - -/* RPSRegisterObject */ - -void RPSRegisterObject::Decode(uint8_t Value, bool aIsAlpha) -{ - IsAlpha = aIsAlpha; - Reg = (PS_REGISTER)(Value); -} - -std::string RPSRegisterObject::DecodedToString() -{ - assert((PS_REGISTER_DISCARD <= Reg) && (Reg <= PS_REGISTER_EF_PROD)); - - return PS_RegisterStr[Reg + 1]; -} - -/* RPSInputRegister */ - -void RPSInputRegister::Decode(uint8_t Value, bool aIsAlpha) -{ - RPSRegisterObject::Decode(Value & PS_NoChannelMask, aIsAlpha); - - Channel = (PS_CHANNEL)(Value & PS_CHANNEL_ALPHA); - InputMapping = (PS_INPUTMAPPING)(Value & 0xe0); - - // Remove the above flags from the register : - Reg = (PS_REGISTER)(Reg & 0xf); - - // Check if the input Register is ZERO, in which case we want to allow the extended registers : - if (Reg == PS_REGISTER_ZERO) - { - switch (InputMapping) { - case PS_REGISTER_ONE: case PS_REGISTER_NEGATIVE_ONE: case PS_REGISTER_ONE_HALF: case PS_REGISTER_NEGATIVE_ONE_HALF: - // These input mapping have their own register - keep these in 'Reg', so we can check for them : - Reg = (PS_REGISTER)(InputMapping); - break; - - case PS_INPUTMAPPING_EXPAND_NEGATE: - // This case has no separate PS_REGISTER define, but when applied to zero, also results in one : - Reg = PS_REGISTER_ONE; - break; - } - } -} - -std::string RPSInputRegister::DecodedToString() -{ - std::string Result; - std::string InputMappingStr = ""; - switch (Reg) { - case PS_REGISTER_ZERO: - { - Result = PS_RegisterStr[0]; - return Result; - } - case PS_REGISTER_ONE: - Result = PS_RegisterStr[0x11]; - break; - case PS_REGISTER_NEGATIVE_ONE: - Result = PS_RegisterStr[0x12]; - break; - case PS_REGISTER_ONE_HALF: - Result = PS_RegisterStr[0x13]; - break; - case PS_REGISTER_NEGATIVE_ONE_HALF: - Result = PS_RegisterStr[0x14]; - break; - default: - Result = RPSRegisterObject::DecodedToString(); - InputMappingStr = " | " + PS_InputMappingStr[(InputMapping >> 5) & 7]; - } - - // Render the channel as a string : - Result = Result + " | " + PS_ChannelStr[(Channel > 0) ? /*Alpha*/2 : (IsAlpha ? /*Blue*/1 : /*RGB*/0)] + InputMappingStr; - return Result; -} - -/* RPSCombinerOutput */ - -void RPSCombinerOutput::Decode(uint8_t Value, DWORD PSInputs, bool aIsAlpha) -{ - RPSRegisterObject::Decode(Value, aIsAlpha); - - // Decode PSAlphaInputs / PSRGBInputs : - Input1.Decode((PSInputs >> 8) & 0xFF, aIsAlpha); - Input2.Decode((PSInputs >> 0) & 0xFF, aIsAlpha); -} - -/* RPSCombinerStageChannel */ - -void RPSCombinerStageChannel::Decode(DWORD PSInputs, DWORD PSOutputs, bool aIsAlpha/* = false*/) -{ - // Get the combiner output flags : - CombinerOutputFlags = (PS_COMBINEROUTPUT)(PSOutputs >> 12); - - // Decompose the combiner output flags : - OutputSUM.OutputAB.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) > 0; // false=Multiply, true=DotProduct - OutputSUM.OutputCD.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0; // false=Multiply, true=DotProduct - - if (!aIsAlpha) - { - OutputSUM.OutputAB.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) > 0; // false=Alpha-to-Alpha, true=Blue-to-Alpha - OutputSUM.OutputCD.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) > 0; // false=Alpha-to-Alpha, true=Blue-to-Alpha - } - - // Decode PSAlphaOutputs / PSRGBOutputs and PSAlphaInputs / PSRGBInputs : - OutputSUM.OutputAB.Decode((PSOutputs >> 4) & 0xF, (PSInputs >> 16) & 0xFFFF, aIsAlpha); - OutputSUM.OutputCD.Decode((PSOutputs >> 0) & 0xF, (PSInputs >> 0) & 0xFFFF, aIsAlpha); - OutputSUM.Decode((PSOutputs >> 8) & 0xF, aIsAlpha); - - AB_CD_SUM = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_CD_MUX) == 0; // true=AB+CD, false=MUX(AB,CD) based on R0.a -} - // Note : On a hardware level, there are only 4 pixel shaders instructions present in the Nvidia NV2A GPU : // - xdd (dot/dot/discard) > calculating AB=A.B and CD=C.D // - xdm (dot/mul/discard) > calculating AB=A.B and CD=C*D @@ -5710,183 +98,885 @@ void RPSCombinerStageChannel::Decode(DWORD PSInputs, DWORD PSOutputs, bool aIsAl // "-C0_bias_x2" shifts range from [ 0..1] to [-1..1] // "-V0_bias_d2" shifts range from [-1..1] to [ 0..1] +/* RPSRegisterObject */ + +void RPSRegisterObject::Decode(uint8_t Value) +{ + Reg = (PS_REGISTER)(Value & PS_REGISTER_MASK); // = 0x0f + + // Validate correctness (see NOTE below) + if (Reg == 6) LOG_TEST_CASE("Unknown PS_REGISTER : 6"); + if (Reg == 7) LOG_TEST_CASE("Unknown PS_REGISTER : 7"); +} + +/* RPSInputRegister */ + +void RPSInputRegister::Decode(uint8_t Value, unsigned stage_nr, bool isRGB) +{ + RPSRegisterObject::Decode(Value); + + Channel = (PS_CHANNEL)(Value & PS_CHANNEL_MASK); // = 0x10 + InputMapping = (PS_INPUTMAPPING)(Value & PS_INPUTMAPPING_MASK); // = 0xe0 + + if (stage_nr == 9) { + // In final combiner stage, convert C0 into FC0, and C1 into FC1, to discern them as separate registers + if (Reg == PS_REGISTER_C0) Reg = PS_REGISTER_FC0; + if (Reg == PS_REGISTER_C1) Reg = PS_REGISTER_FC1; + } + + // Validate correctness (see NOTE below) + if (stage_nr <= xbox::X_PSH_COMBINECOUNT) { + if (Reg == PS_REGISTER_FOG) { + if (!isRGB) LOG_TEST_CASE("PS_REGISTER_FOG input not allowed in Alpha register combiner"); + else if (Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_REGISTER_FOG.a input not allowed in RGB register combiner"); + } + if (Reg == PS_REGISTER_V1R0_SUM) LOG_TEST_CASE("PS_REGISTER_V1R0_SUM input only allowed in final combiner"); + if (Reg == PS_REGISTER_EF_PROD) LOG_TEST_CASE("PS_REGISTER_EF_PROD input only allowed in final combiner"); + } + else { // final combiner + if (InputMapping == PS_INPUTMAPPING_EXPAND_NORMAL) LOG_TEST_CASE("PS_INPUTMAPPING_EXPAND_NORMAL not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_EXPAND_NEGATE) LOG_TEST_CASE("PS_INPUTMAPPING_EXPAND_NEGATE not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_HALFBIAS_NORMAL) LOG_TEST_CASE("PS_INPUTMAPPING_HALFBIAS_NORMAL not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_HALFBIAS_NEGATE) LOG_TEST_CASE("PS_INPUTMAPPING_HALFBIAS_NEGATE not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_SIGNED_IDENTITY) LOG_TEST_CASE("PS_INPUTMAPPING_SIGNED_IDENTITY not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_SIGNED_NEGATE) LOG_TEST_CASE("PS_INPUTMAPPING_SIGNED_NEGATE not allowed in final combiner"); + } +} + +/* RPSCombinerOutput */ + +void RPSCombinerOutput::Decode(uint8_t Value, uint16_t PSInputs, unsigned stage_nr, bool isRGB) +{ + RPSRegisterObject::Decode(Value); + + // Decode PSAlphaInputs / PSRGBInputs : + Input[0].Decode((uint8_t)(PSInputs >> 8), stage_nr, isRGB); + Input[1].Decode((uint8_t)(PSInputs >> 0), stage_nr, isRGB); + + // Validate correctness (see NOTE below) + if (Reg == PS_REGISTER_C0) LOG_TEST_CASE("PS_REGISTER_C0 not allowed as output"); + if (Reg == PS_REGISTER_C1) LOG_TEST_CASE("PS_REGISTER_C1 not allowed as output"); + if (Reg == PS_REGISTER_FOG) LOG_TEST_CASE("PS_REGISTER_FOG not allowed as output"); + if (Reg == PS_REGISTER_V1R0_SUM) LOG_TEST_CASE("PS_REGISTER_V1R0_SUM not allowed as output"); + if (Reg == PS_REGISTER_EF_PROD) LOG_TEST_CASE("PS_REGISTER_EF_PROD not allowed as output"); +} + +/* RPSCombinerStageChannel */ + +void RPSCombinerStageChannel::Decode(uint32_t PSInputs, uint32_t PSOutputs, unsigned stage_nr, bool isRGB) +{ + // Decode PSAlphaOutputs / PSRGBOutputs and PSAlphaInputs / PSRGBInputs : + OutputCD.Decode((uint8_t)(PSOutputs >> 0), (uint16_t)(PSInputs >> 0 ), stage_nr, isRGB); + OutputAB.Decode((uint8_t)(PSOutputs >> 4), (uint16_t)(PSInputs >> 16), stage_nr, isRGB); + OutputMUX_SUM.Decode((uint8_t)(PSOutputs >> 8)); + + // Get the combiner output flags : + PS_COMBINEROUTPUT CombinerOutputFlags = (PS_COMBINEROUTPUT)(PSOutputs >> 12); + + // Decompose the combiner output flags : + OutputCD.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0; // False=Multiply, True=DotProduct + OutputAB.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) > 0; // False=Multiply, True=DotProduct + AB_CD_MUX = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_CD_MUX) > 0; // False=AB+CD, True=MUX(AB,CD) based on R0.a + CombinerOutputMapping = (PS_COMBINEROUTPUT_OUTPUTMAPPING)(CombinerOutputFlags & PS_COMBINEROUTPUT_OUTPUTMAPPING_MASK); // = 0x38 + OutputCD.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) >> 6; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha + OutputAB.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) >> 7; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha + + // Discover test-cases (see TODO below) + // Check for 'discard-all-outputs' + if (OutputAB.DotProduct || OutputCD.DotProduct) { + if ((OutputAB.Reg == PS_REGISTER_DISCARD) && (OutputCD.Reg == PS_REGISTER_DISCARD)) LOG_TEST_CASE("All two outputs discarded"); + } else { + // if ((OutputAB.Reg == PS_REGISTER_DISCARD) && (OutputCD.Reg == PS_REGISTER_DISCARD) && (OutputMUX_SUM.Reg == PS_REGISTER_DISCARD)) LOG_TEST_CASE("All three outputs discarded"); // Test-case : XDK sample : Minnaert (on Stage2.Alpha) + } + + // Validate correctness (see NOTE below) + if ((PSOutputs & ~0x000FFFFF) > 0) LOG_TEST_CASE("Unknown PS_COMBINEROUTPUT flag bits detected"); + if (CombinerOutputMapping == PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS) LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS unsupported on NV2A?"); + if (CombinerOutputMapping == PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS) LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS unsupported on NV2A?"); + if (isRGB) { + if (OutputMUX_SUM.Reg != PS_REGISTER_DISCARD) { + if (OutputCD.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_CD_DOT_PRODUCT detected without PS_REGISTER_DISCARD in MUX_SUM"); + if (OutputAB.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_AB_DOT_PRODUCT detected without PS_REGISTER_DISCARD in MUX_SUM"); + } + if (OutputCD.DotProduct) if (!OutputAB.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_CD_DOT_PRODUCT detected without PS_COMBINEROUTPUT_AB_DOT_PRODUCT (so, xmd 'opcode')"); // Need test-cases Note : Undefined xmd (mul/dot) *is* supported in CxbxPixelShaderTemplate.hlsl + } else { // DotProduct and BlueToAlpha are not valid for Alpha + if (OutputCD.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_CD_DOT_PRODUCT shouldn't be set for Alpha"); + if (OutputAB.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_AB_DOT_PRODUCT shouldn't be set for Alpha"); + if (OutputCD.BlueToAlpha) LOG_TEST_CASE("PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA shouldn't be set for Alpha"); + if (OutputAB.BlueToAlpha) LOG_TEST_CASE("PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA shouldn't be set for Alpha"); + } +} + /* RPSFinalCombiner */ -void RPSFinalCombiner::Decode(const DWORD PSFinalCombinerInputsABCD, const DWORD PSFinalCombinerInputsEFG, const DWORD PSFinalCombinerConstants) +void RPSFinalCombiner::Decode(const uint32_t PSFinalCombinerInputsABCD, const uint32_t PSFinalCombinerInputsEFG) { - InputA.Decode((PSFinalCombinerInputsABCD >> 24) & 0xFF, /*aIsAlpha=*/false); - InputB.Decode((PSFinalCombinerInputsABCD >> 16) & 0xFF, /*aIsAlpha=*/false); - InputC.Decode((PSFinalCombinerInputsABCD >> 8) & 0xFF, /*aIsAlpha=*/false); - InputD.Decode((PSFinalCombinerInputsABCD >> 0) & 0xFF, /*aIsAlpha=*/false); + Input[0].Decode((uint8_t)(PSFinalCombinerInputsABCD >> 24), /*stage_nr=*/9, /*isRGB=*/true); + Input[1].Decode((uint8_t)(PSFinalCombinerInputsABCD >> 16), /*stage_nr=*/9, /*isRGB=*/true); + Input[2].Decode((uint8_t)(PSFinalCombinerInputsABCD >> 8), /*stage_nr=*/9, /*isRGB=*/true); + Input[3].Decode((uint8_t)(PSFinalCombinerInputsABCD >> 0), /*stage_nr=*/9, /*isRGB=*/true); + Input[4].Decode((uint8_t)(PSFinalCombinerInputsEFG >> 24), /*stage_nr=*/9, /*isRGB=*/true); + Input[5].Decode((uint8_t)(PSFinalCombinerInputsEFG >> 16), /*stage_nr=*/9, /*isRGB=*/true); + Input[6].Decode((uint8_t)(PSFinalCombinerInputsEFG >> 8), /*stage_nr=*/9, /*isRGB=*/false); // Note : Final combiner input G must be a single component, and must thus be decoded as Alpha + PS_FINALCOMBINERSETTING FinalCombinerSettingFlags = (PS_FINALCOMBINERSETTING)((PSFinalCombinerInputsEFG >> 0) & 0xFF); - InputE.Decode((PSFinalCombinerInputsEFG >> 24) & 0xFF, /*aIsAlpha=*/false); - InputF.Decode((PSFinalCombinerInputsEFG >> 16) & 0xFF, /*aIsAlpha=*/false); - InputG.Decode((PSFinalCombinerInputsEFG >> 8) & 0xFF, /*aIsAlpha=*/false); - FinalCombinerFlags = (PS_FINALCOMBINERSETTING)((PSFinalCombinerInputsEFG >> 0) & 0xFF); + ComplementV1 = (FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1) > 0; + ComplementR0 = (FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0) > 0; + ClampSum = (FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_CLAMP_SUM) > 0; - FinalCombinerC0Mapping = (PSFinalCombinerConstants >> 0) & 0xF; - FinalCombinerC1Mapping = (PSFinalCombinerConstants >> 4) & 0xF; - dwPS_GLOBALFLAGS = (PSFinalCombinerConstants >> 8) & 0x1; + // Discover test-cases (see TODO below) + // if (Input[0].Channel != PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_RGB/PS_CHANNEL_BLUE detected on final combiner A input"); // Note : test-case ModifyPixelShader uses PS_REGISTER_FOG.rgb and seems to expect .rgb handling (not PS_CHANNEL_BLUE's .b) + if (Input[4].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner E input"); // Need test-case to determine how this should behave (calculating EF_PROD) : .aaa instead of .rgb? + if (Input[5].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner F input"); // Need test-case to determine how this should behave (calculating EF_PROD) : .aaa instead of .rgb? + // if (Input[6].Channel == PS_CHANNEL_BLUE) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner G input"); // PS_CHANNEL_BLUE (==0==PS_CHANNEL_RGB) uses G.b + // if (Input[6].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner G input"); // PS_CHANNEL_ALPHA (==1) uses .a Test-case : XDK samples BumpDemo,BumpEarth,BumpLens,Explosion + + // Validate correctness (see NOTE below) + if ((FinalCombinerSettingFlags & ~0xE0) > 0) LOG_TEST_CASE("Unknown FinalCombinerSetting bits detected"); } -void XTL_DumpPixelShaderToFile(xbox::X_D3DPIXELSHADERDEF *pPSDef) +/* DecodedRegisterCombiner */ + +void DecodedRegisterCombiner::GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]) { - static int PshNumber = 0; // Keep track of how many pixel shaders we've attempted to convert. - // Don't dump more than 100 shaders, to prevent cluttering the filesystem : - if (PshNumber >= 100) - return; + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + psTextureModes[i] = (PS_TEXTUREMODES)((pPSDef->PSTextureModes >> (i * 5)) & PS_TEXTUREMODES_MASK); - char szPSDef[32]; + // Discover test-cases (see TODO below) + // if (psTextureModes[i] == PS_TEXTUREMODES_NONE) LOG_TEST_CASE("PS_TEXTUREMODES_NONE"); + // if (psTextureModes[i] == PS_TEXTUREMODES_PROJECT2D) LOG_TEST_CASE("PS_TEXTUREMODES_PROJECT2D"); + if (psTextureModes[i] == PS_TEXTUREMODES_PROJECT3D) LOG_TEST_CASE("PS_TEXTUREMODES_PROJECT3D"); // Test-case: XDK sample TechCertGame,NoSortAlphaBlend,VolumeLight + if (psTextureModes[i] == PS_TEXTUREMODES_CUBEMAP) LOG_TEST_CASE("PS_TEXTUREMODES_CUBEMAP"); // Test-case : XDK sample TechCertGame,Minnaert TODO : More test cases needed + if (psTextureModes[i] == PS_TEXTUREMODES_PASSTHRU) LOG_TEST_CASE("PS_TEXTUREMODES_PASSTHRU"); // Test-case : XDK sample BumpDemo TODO : More test cases needed + if (psTextureModes[i] == PS_TEXTUREMODES_CLIPPLANE) LOG_TEST_CASE("PS_TEXTUREMODES_CLIPPLANE"); // Test-case : XDK sample UserClipPlane TODO : More test cases needed + // if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP"); // Test-case : XDK sample BumpEarth, BumpLens + if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP_LUM) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP_LUM"); // Test-case : XDK sample BumpEarth TODO : More test cases needed + if (psTextureModes[i] == PS_TEXTUREMODES_BRDF) LOG_TEST_CASE("PS_TEXTUREMODES_BRDF"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_ST) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ST"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_ZW) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ZW"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_DIFF) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_DIFF"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_STR_3D) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_STR_3D"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_STR_CUBE) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_STR_CUBE"); + if (psTextureModes[i] == PS_TEXTUREMODES_DPNDNT_AR) LOG_TEST_CASE("PS_TEXTUREMODES_DPNDNT_AR"); + if (psTextureModes[i] == PS_TEXTUREMODES_DPNDNT_GB) LOG_TEST_CASE("PS_TEXTUREMODES_DPNDNT_GB"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOTPRODUCT) LOG_TEST_CASE("PS_TEXTUREMODES_DOTPRODUCT"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST"); - sprintf(szPSDef, "PSDef%.03d.txt", PshNumber++); - FILE* out = fopen(szPSDef, "w"); - if (out) - { - fprintf(out, PSH_XBOX_SHADER::OriginalToString(pPSDef).c_str()); - fclose(out); - } + // Validate correctness (see NOTE below) + if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP_LUM) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP_LUM only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_BRDF) if (i < 2) LOG_TEST_CASE("PS_TEXTUREMODES_BRDF only allowed in stage 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_ST) { + if (i < 2) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ST only allowed in stage 2 or 3"); + if (psTextureModes[i - 1] != PS_TEXTUREMODES_DOTPRODUCT) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ST only allowed after PS_TEXTUREMODES_DOTPRODUCT"); + } + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_ZW) { + if (i < 2) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ZW only allowed in stage 2 or 3"); + if (psTextureModes[i - 1] != PS_TEXTUREMODES_DOTPRODUCT) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ZW only allowed after PS_TEXTUREMODES_DOTPRODUCT"); + } + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_DIFF) if (i != 2) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_DIFF only allowed in stage 2"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC) if (i < 3) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC only allowed in stage 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_STR_3D) if (i < 3) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_STR_3D only allowed in stage 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_STR_CUBE) if (i < 3) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_STR_CUBE only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DPNDNT_AR) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_DPNDNT_AR only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DPNDNT_GB) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_DPNDNT_GB only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOTPRODUCT) if (i < 1 || i > 2) LOG_TEST_CASE("PS_TEXTUREMODES_DOTPRODUCT only allowed in stage 1 or 2"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST) if (i < 3) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST only allowed in 3"); + if (psTextureModes[i] > PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST) LOG_TEST_CASE("Invalid PS_TEXTUREMODES in stage?"); + } + + // Validate correctness (see NOTE below) + if ((pPSDef->PSTextureModes & ~0x000FFFFF) > 0) LOG_TEST_CASE("Unknown PSTextureModes bits detected"); } -PSH_RECOMPILED_SHADER XTL_EmuRecompilePshDef(xbox::X_D3DPIXELSHADERDEF *pPSDef) +void DecodedRegisterCombiner::GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]) { - uint32_t PSVersion = D3DPS_VERSION(2, 0); // Use pixel shader model 2.0 by default + psDotMapping[0] = (PS_DOTMAPPING)(0); + for (int i = 1; i < xbox::X_D3DTS_STAGECOUNT; i++) { + psDotMapping[i] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> ((i - 1) * 4)) & PS_DOTMAPPING_MASK); - extern D3DCAPS g_D3DCaps; - - if (g_D3DCaps.PixelShaderVersion > D3DPS_VERSION(3, 0)) { - // TODO : Test PSVersion = D3DPS_VERSION(3, 0); // g_D3DCaps.PixelShaderVersion; - // TODO : Make the pixel shader version configurable + // Discover test-cases (see TODO below) + // if (psDotMapping[i] == PS_DOTMAPPING_ZERO_TO_ONE) LOG_TEST_CASE("PS_DOTMAPPING_ZERO_TO_ONE"); // Note : Most common scenario, no need for test-cases + if (psDotMapping[i] == PS_DOTMAPPING_MINUS1_TO_1_D3D) LOG_TEST_CASE("PS_DOTMAPPING_MINUS1_TO_1_D3D"); // Test-case : XDK samples BumpDemo, Minnaert + if (psDotMapping[i] == PS_DOTMAPPING_MINUS1_TO_1_GL) LOG_TEST_CASE("PS_DOTMAPPING_MINUS1_TO_1_GL"); + if (psDotMapping[i] == PS_DOTMAPPING_MINUS1_TO_1) LOG_TEST_CASE("PS_DOTMAPPING_MINUS1_TO_1"); + if (psDotMapping[i] == PS_DOTMAPPING_HILO_1) LOG_TEST_CASE("PS_DOTMAPPING_HILO_1"); + if (psDotMapping[i] == PS_DOTMAPPING_HILO_HEMISPHERE_D3D) LOG_TEST_CASE("PS_DOTMAPPING_HILO_HEMISPHERE_D3D"); + if (psDotMapping[i] == PS_DOTMAPPING_HILO_HEMISPHERE_GL) LOG_TEST_CASE("PS_DOTMAPPING_HILO_HEMISPHERE_GL"); + if (psDotMapping[i] == PS_DOTMAPPING_HILO_HEMISPHERE) LOG_TEST_CASE("PS_DOTMAPPING_HILO_HEMISPHERE"); } - PSH_XBOX_SHADER PSH = {}; - PSH.SetPSVersion(PSVersion); - PSH.Decode(pPSDef); - return PSH.Convert(pPSDef); + // Validate correctness (see NOTE below) + if ((pPSDef->PSDotMapping & ~0x00000777) > 0) LOG_TEST_CASE("Unknown PSDotMapping bits detected"); } -// From Dxbx uState.pas : - -PSH_RECOMPILED_SHADER DxbxRecompilePixelShader(xbox::X_D3DPIXELSHADERDEF *pPSDef) +void DecodedRegisterCombiner::GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, bool psCompareModes[xbox::X_D3DTS_STAGECOUNT][4]) { -static const - char *szDiffusePixelShader = - "ps_2_x\n" - "dcl_2d s0\n" - "dcl t0.xy\n" - "texld r0, t0, s0\n" - "mov oC0, r0\n"; - std::string ConvertedPixelShaderStr; - DWORD hRet; - LPD3DXBUFFER pShader; - LPD3DXBUFFER pErrors; - DWORD *pFunction; - - // Attempt to recompile PixelShader - PSH_RECOMPILED_SHADER Result = XTL_EmuRecompilePshDef(pPSDef); - ConvertedPixelShaderStr = Result.NewShaderStr; - - // assemble the shader - pShader = nullptr; - pErrors = nullptr; - hRet = D3DXAssembleShader( - ConvertedPixelShaderStr.c_str(), - ConvertedPixelShaderStr.length(), - /*pDefines=*/nullptr, - /*pInclude=*/nullptr, - /*Flags=*/0, // D3DXASM_DEBUG, - /*ppCompiledShader=*/&pShader, - /*ppCompilationErrors*/&pErrors); - - if (hRet != D3D_OK) - { - EmuLog(LOG_LEVEL::WARNING, "Could not create pixel shader"); - EmuLog(LOG_LEVEL::WARNING, std::string((char*)pErrors->GetBufferPointer(), pErrors->GetBufferSize()).c_str()); - - printf(ConvertedPixelShaderStr.c_str()); - - hRet = D3DXAssembleShader( - szDiffusePixelShader, - strlen(szDiffusePixelShader), - /*pDefines=*/nullptr, - /*pInclude=*/nullptr, - /*Flags=*/0, // Was D3DXASM_SKIPVALIDATION, - /*ppCompiledShader=*/&pShader, - /*ppCompilationErrors*/&pErrors); - - if (hRet != D3D_OK) { - EmuLog(LOG_LEVEL::WARNING, "Could not create pixel shader"); - EmuLog(LOG_LEVEL::WARNING, std::string((char*)pErrors->GetBufferPointer(), pErrors->GetBufferSize()).c_str()); - CxbxKrnlCleanup("Cannot fall back to the most simple pixel shader!"); + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + uint32_t CompareMode = (pPSDef->PSCompareMode >> (i * 4)) & PS_COMPAREMODE_MASK; + psCompareModes[i][0] = (CompareMode & PS_COMPAREMODE_S_GE) > 0; + psCompareModes[i][1] = (CompareMode & PS_COMPAREMODE_T_GE) > 0; + psCompareModes[i][2] = (CompareMode & PS_COMPAREMODE_R_GE) > 0; + psCompareModes[i][3] = (CompareMode & PS_COMPAREMODE_Q_GE) > 0; } - EmuLog(LOG_LEVEL::WARNING, "We're lying about the creation of a pixel shader!"); - } + // Validate correctness (see NOTE below) + if ((pPSDef->PSCompareMode & ~0x0000FFFF) > 0) LOG_TEST_CASE("Unknown PSCompareMode bits detected"); +} - if (pShader) - { - pFunction = (DWORD*)(pShader->GetBufferPointer()); - if (hRet == D3D_OK) { - // redirect to windows d3d - hRet = g_pD3DDevice->CreatePixelShader - ( - pFunction, - &Result.ConvertedHandle - ); +void DecodedRegisterCombiner::GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]) +{ + psInputTexture[0] = -1; // Stage 0 has no predecessors (should not be used) + psInputTexture[1] = 0; // Stage 1 can only use stage 0 + psInputTexture[2] = (pPSDef->PSInputTexture >> 16) & 0x1; // Stage 2 can use stage 0 or 1 + psInputTexture[3] = (pPSDef->PSInputTexture >> 20) & 0x3; // Stage 3 can only use stage 0, 1 or 2 - if (hRet != D3D_OK) { - printf(D3DErrorString(hRet)); - } + // Discover test-cases (see TODO below) + if (psInputTexture[2] == 0) LOG_TEST_CASE("PS_INPUTTEXTURE(2) uses texture 0"); + // if (psInputTexture[2] == 1) LOG_TEST_CASE("PS_INPUTTEXTURE(2) uses texture 1"); // Test-case : XDK sample BumpEarth,Explosion,ZSprite + if (psInputTexture[3] == 0) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 0"); + // if (psInputTexture[3] == 1) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 1"); // Test-case : XDK sample Explosion,ZSprite + if (psInputTexture[3] == 2) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 2"); + + // Validate correctness (see NOTE below) + if (psInputTexture[3] == 3) LOG_TEST_CASE("PS_INPUTTEXTURE(3) incorrectly uses texture 3"); + if ((pPSDef->PSInputTexture & ~0x00310000) > 0) LOG_TEST_CASE("Unknown PSInputTexture bits detected"); +} + +void DecodedRegisterCombiner::Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef) +{ + NumberOfCombiners = (pPSDef->PSCombinerCount >> 0) & 0xF; + uint32_t CombinerCountFlags = (pPSDef->PSCombinerCount >> 8); // = PS_COMBINERCOUNTFLAGS + + CombinerMuxesOnMsb = (CombinerCountFlags & PS_COMBINERCOUNT_MUX_MSB) > 0; + CombinerHasUniqueC0 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C0) > 0; + CombinerHasUniqueC1 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C1) > 0; + + hasFinalCombiner = (pPSDef->PSFinalCombinerInputsABCD > 0) || (pPSDef->PSFinalCombinerInputsEFG > 0); + + GetPSTextureModes(pPSDef, PSTextureModes); + GetPSCompareModes(pPSDef, PSCompareMode); + GetPSDotMapping(pPSDef, PSDotMapping); + GetPSInputTexture(pPSDef, PSInputTexture); + + for (unsigned i = 0; i < NumberOfCombiners; i++) { + Combiners[i].RGB.Decode(pPSDef->PSRGBInputs[i], pPSDef->PSRGBOutputs[i], /*stage_nr=*/i, /*isRGB=*/true); + Combiners[i].Alpha.Decode(pPSDef->PSAlphaInputs[i], pPSDef->PSAlphaOutputs[i], /*stage_nr=*/i, /*isRGB=*/false); } - // Dxbx note : We must release pShader here, else we would have a resource leak! - pShader->Release(); - pShader = nullptr; - } + if (hasFinalCombiner) { + FinalCombiner.Decode(pPSDef->PSFinalCombinerInputsABCD, pPSDef->PSFinalCombinerInputsEFG); + } - // Dxbx addition : We release pErrors here (or it would become a resource leak!) - if (pErrors) - { - pErrors->Release(); - pErrors = nullptr; - } - return Result; -} // DxbxRecompilePixelShader + TexModeAdjust = ((pPSDef->PSFinalCombinerConstants >> PS_GLOBALFLAGS_SHIFT) & PS_GLOBALFLAGS_TEXMODE_ADJUST) > 0; + + // Discover test-cases (see TODO below) + if (NumberOfCombiners == 0) LOG_TEST_CASE("NumberOfCombiners is zero"); + if (!CombinerMuxesOnMsb) LOG_TEST_CASE("PS_COMBINERCOUNT_MUX_LSB detected"); // Test case required for how to implement the FCS_MUX check on LSB (see PS_COMBINERCOUNT_MUX_LSB in CxbxPixelShaderTemplate.hlsl) Note : test-case ModifyPixelShader hits this by mistake + if (TexModeAdjust) LOG_TEST_CASE("PS_GLOBALFLAGS_TEXMODE_ADJUST detected"); + + // Validate correctness (see NOTE below) + if (NumberOfCombiners > 8) LOG_TEST_CASE("NumberOfCombiners bigger than maximum (of 8)"); + if ((pPSDef->PSCombinerCount & ~0x0001110F) > 0) LOG_TEST_CASE("Unknown PSCombinerCount bits detected"); +} + +// * TODO : For all "Discover test-cases" LOG_TEST_CASE's that lack sufficient test-case mentions, find some, note them in an EOL comment, and comment out the entire check. +// * NOTE : For all "Validate correctness" LOG_TEST_CASE's that ever get hit, investigate what caused it, what should be done, implement that, and update the verification. + +/* PSH_RECOMPILED_SHADER */ + +typedef struct s_CxbxPSDef { + xbox::X_D3DPIXELSHADERDEF PSDef; + xbox::X_D3DRESOURCETYPE ActiveTextureTypes[xbox::X_D3DTS_STAGECOUNT]; + bool DecodedTexModeAdjust; + bool DecodedHasFinalCombiner; + bool RenderStateFogEnable; + bool RenderStateSpecularEnable; + bool AlphaKill[4]; // Read from XboxTextureStates.Get(stage, xbox::X_D3DTSS_ALPHAKILL); + + bool IsEquivalent(const s_CxbxPSDef &Another) + { + // Only compare the [*]-marked members, which forms the unique shader declaration (ignore the constants and most Xbox Direct3D8 run-time fields) : + // [*] DWORD PSAlphaInputs[8]; // X_D3DRS_PSALPHAINPUTS0..X_D3DRS_PSALPHAINPUTS7 : Alpha inputs for each stage + // [*] DWORD PSFinalCombinerInputsABCD; // X_D3DRS_PSFINALCOMBINERINPUTSABCD : Final combiner inputs + // [*] DWORD PSFinalCombinerInputsEFG; // X_D3DRS_PSFINALCOMBINERINPUTSEFG : Final combiner inputs (continued) + if (memcmp(&(PSDef.PSAlphaInputs[0]), &(Another.PSDef.PSAlphaInputs[0]), (8 + 1 + 1) * sizeof(DWORD)) != 0) + return false; + + // [-] DWORD PSConstant0[8]; // X_D3DRS_PSCONSTANT0_0..X_D3DRS_PSCONSTANT0_7 : C0 for each stage + // [-] DWORD PSConstant1[8]; // X_D3DRS_PSCONSTANT1_0..X_D3DRS_PSCONSTANT1_7 : C1 for each stage + // [*] DWORD PSAlphaOutputs[8]; // X_D3DRS_PSALPHAOUTPUTS0..X_D3DRS_PSALPHAOUTPUTS7 : Alpha output for each stage + // [*] DWORD PSRGBInputs[8]; // X_D3DRS_PSRGBINPUTS0..X_D3DRS_PSRGBINPUTS7 : RGB inputs for each stage + // [*] DWORD PSCompareMode; // X_D3DRS_PSCOMPAREMODE : Compare modes for clipplane texture mode + if (memcmp(&(PSDef.PSAlphaOutputs[0]), &(Another.PSDef.PSAlphaOutputs[0]), (8 + 8 + 1) * sizeof(DWORD)) != 0) + return false; + + // [-] DWORD PSFinalCombinerConstant0; // X_D3DRS_PSFINALCOMBINERCONSTANT0 : C0 in final combiner + // [-] DWORD PSFinalCombinerConstant1; // X_D3DRS_PSFINALCOMBINERCONSTANT1 : C1 in final combiner + // [*] DWORD PSRGBOutputs[8]; // X_D3DRS_PSRGBOUTPUTS0..X_D3DRS_PSRGBOUTPUTS7 : Stage 0 RGB outputs + // [*] DWORD PSCombinerCount; // X_D3DRS_PSCOMBINERCOUNT : Active combiner count (Stages 0-7) + // [*] DWORD PSTextureModes; // X_D3DRS_PS_RESERVED (copied from out-of-range X_D3DRS_PSTEXTUREMODES) : Texture addressing modes + // [*] DWORD PSDotMapping; // X_D3DRS_PSDOTMAPPING : Input mapping for dot product modes + // [*] DWORD PSInputTexture; // X_D3DRS_PSINPUTTEXTURE : Texture source for some texture modes + if (memcmp(&(PSDef.PSRGBOutputs[0]), &(Another.PSDef.PSRGBOutputs[0]), (8 + 1 + 1 + 1 + 1) * sizeof(DWORD)) != 0) + return false; + + // [-] DWORD PSC0Mapping; // Mapping of c0 regs to D3D constants + // [-] DWORD PSC1Mapping; // Mapping of c1 regs to D3D constants + // [*] DWORD PSFinalCombinerConstants; // Final combiner constant mapping + // Note : From PSFinalCombinerConstants, only the PS_GLOBALFLAGS_TEXMODE_ADJUST flag must correspond + if (DecodedTexModeAdjust != Another.DecodedTexModeAdjust) + return false; + + for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) + if (AlphaKill[i] != Another.AlphaKill[i]) + return false; + + // All ActiveTextureTypes must correspond as well (otherwise the recompiled shader would sample incorrectly) : + for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) + if (ActiveTextureTypes[i] != Another.ActiveTextureTypes[i]) + return false; + + // Fail if they don't correspond in their use of the final combiner unit + if (DecodedHasFinalCombiner != Another.DecodedHasFinalCombiner) + return false; + + // If they don't use the final combiner unit + if (!DecodedHasFinalCombiner) { + // Fail if they don't correspond on the render states that impact AdjustFinalCombiner + if (RenderStateFogEnable != Another.RenderStateFogEnable) + return false; + + if (RenderStateSpecularEnable != Another.RenderStateSpecularEnable) + return false; + } + + return true; + } + + void SnapshotRuntimeVariables() + { + // These values are checked in IsEquivalent to see if a cached pixel shader matches this declaration + + // Fetch currently active texture types, which impact AdjustTextureModes + for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + extern xbox::X_D3DRESOURCETYPE GetXboxD3DResourceType(const xbox::X_D3DResource *pXboxResource); + + if (g_pXbox_SetTexture[i]) + ActiveTextureTypes[i] = GetXboxD3DResourceType(g_pXbox_SetTexture[i]); + else + ActiveTextureTypes[i] = xbox::X_D3DRTYPE_NONE; + } + + // Pre-decode TexModeAdjust, which impacts AdjustTextureModes + DecodedTexModeAdjust = ((PSDef.PSFinalCombinerConstants >> PS_GLOBALFLAGS_SHIFT) & PS_GLOBALFLAGS_TEXMODE_ADJUST) > 0; + + // Pre-decode hasFinalCombiner, which impacts AdjustFinalCombiner + DecodedHasFinalCombiner = (PSDef.PSFinalCombinerInputsABCD > 0) || (PSDef.PSFinalCombinerInputsEFG > 0); + + // Fetch all render states that impact AdjustFinalCombiner + RenderStateFogEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGENABLE) > 0; + RenderStateSpecularEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_SPECULARENABLE) > 0; + + for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + // Test-cases : XDK sample nosortalphablend, Xbmc-fork (https://github.com/superpea/xbmc-fork/blob/bba40d57db52d11dea7bbf9509c298f7c2b05f4b/xbmc/cores/VideoRenderers/XBoxRenderer.cpp#L134) + // Star Wars: Jedi Academy (https://github.com/RetailGameSourceCode/StarWars_JediAcademy/blob/5b8f0040b3177d8855f7d575ef49b23ed52ff42a/codemp/win32/win_lighteffects.cpp#L299) + AlphaKill[i] = XboxTextureStates.Get(/*stage=*/i, xbox::X_D3DTSS_ALPHAKILL) & 4; // D3DTALPHAKILL_ENABLE + } + } + + void AdjustTextureModes(DecodedRegisterCombiner &RC) + { + // if this flag is set, the texture mode for each texture stage is adjusted as follows: + if (!RC.TexModeAdjust) return; + + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + // First, disable not-assigned textures + if (ActiveTextureTypes[i] == xbox::X_D3DRTYPE_NONE) { + RC.PSTextureModes[i] = PS_TEXTUREMODES_NONE; + continue; + } + + // Then adjust some texture mode according to the currently active textures, so that the shader will use the appropriate sampling method + switch (RC.PSTextureModes[i]) { + case PS_TEXTUREMODES_PROJECT2D: + case PS_TEXTUREMODES_PROJECT3D: + case PS_TEXTUREMODES_CUBEMAP: + if (ActiveTextureTypes[i] == xbox::X_D3DRTYPE_CUBETEXTURE) + RC.PSTextureModes[i] = PS_TEXTUREMODES_CUBEMAP; + else + if (ActiveTextureTypes[i] == xbox::X_D3DRTYPE_VOLUMETEXTURE) + // TODO : Also do this for DepthBuffers (but not EmuXBFormatIsLinear!) : + // || EmuXBFormatIsDepthBuffer(GetXboxPixelContainerFormat(g_pXbox_SetTexture[i])) in { X_D3DFMT_D24S8, X_D3DFMT_F24S8, X_D3DFMT_D16, X_D3DFMT_F16} + RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT3D; + else + RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT2D; + break; + case PS_TEXTUREMODES_DOT_STR_3D: + case PS_TEXTUREMODES_DOT_STR_CUBE: + if (ActiveTextureTypes[i] == xbox::X_D3DRTYPE_CUBETEXTURE) + RC.PSTextureModes[i] = PS_TEXTUREMODES_DOT_STR_CUBE; + else + RC.PSTextureModes[i] = PS_TEXTUREMODES_DOT_STR_3D; + break; + } +/* Was : + switch (ActiveTextureTypes[i]) { + case xbox::X_D3DRTYPE_CUBETEXTURE: + switch (RC.PSTextureModes[i]) { + case PS_TEXTUREMODES_PROJECT2D: RC.PSTextureModes[i] = PS_TEXTUREMODES_CUBEMAP; break; + case PS_TEXTUREMODES_PROJECT3D: RC.PSTextureModes[i] = PS_TEXTUREMODES_CUBEMAP; break; + case PS_TEXTUREMODES_DOT_STR_3D: RC.PSTextureModes[i] = PS_TEXTUREMODES_DOT_STR_CUBE; break; + } break; + case xbox::X_D3DRTYPE_VOLUMETEXTURE: + switch (RC.PSTextureModes[i]) { + case PS_TEXTUREMODES_PROJECT2D: RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT3D; break; + case PS_TEXTUREMODES_CUBEMAP: RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT3D; break; + case PS_TEXTUREMODES_DOT_STR_CUBE: RC.PSTextureModes[i] = PS_TEXTUREMODES_DOT_STR_3D; break; + } break; + case xbox::X_D3DRTYPE_TEXTURE: + switch (RC.PSTextureModes[i]) { + case PS_TEXTUREMODES_PROJECT3D: RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT2D; break; + case PS_TEXTUREMODES_CUBEMAP: RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT2D; break; + } break; + case xbox::X_D3DRTYPE_NONE: + RC.PSTextureModes[i] = PS_TEXTUREMODES_NONE; + break; + } +*/ + } + } + + void AdjustFinalCombiner(DecodedRegisterCombiner &RC) + { + if (RC.hasFinalCombiner) return; + + // Since we're HLE'ing Xbox D3D, mimick how it configures the final combiner when PSDef doesn't : + // TODO : Use the same final combiner when no pixel shader is set! Possibly by generating a DecodedRegisterCombiner with PSCombinerCount zero? + // (This forms the entire Xbox fixed function pixel pipeline, which uses only two renderstates : X_D3DRS_SPECULARENABLE and X_D3DRS_SPECULARENABLE.) + // + // If X_D3DRS_FOGENABLE, configure final combiner to perform this operation : + // if (X_D3DRS_SPECULARENABLE) r0.rgb = lerp(fog.rgb, r0.rgb, fog.a) + v1.rgb; + // else r0.rgb = lerp(fog.rgb, r0.rgb, fog.a); + // r0.a = abs(r0.a); + // Otherwise, if not X_D3DRS_FOGENABLE, configure final combiner to perform this operation : + // if (X_D3DRS_SPECULARENABLE) r0.rgb = r0.rgb + v1.rgb; + // else r0.rgb = r0.rgb; + // Remember : + // xfc.rgb = lerp(C, B, A) + D + // xfc.a = G.b + // Whereby A, B, C and G can use the two xfc-special purpose registers : + // V1R0 = V1 + R0 + // EFPROD = E * F + // ( Or in shorthand : sum=r0+v1, prod=s4*s5, r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.b ) + RC.FinalCombiner.Input[0/*A*/].Channel = PS_CHANNEL_ALPHA; + RC.FinalCombiner.Input[0/*A*/].Reg = PS_REGISTER_FOG; + RC.FinalCombiner.Input[1/*B*/].Reg = PS_REGISTER_R0; + // If fog is disabled, blend R0 with itself + RC.FinalCombiner.Input[2/*C*/].Reg = RenderStateFogEnable ? PS_REGISTER_FOG : PS_REGISTER_R0; + RC.FinalCombiner.Input[3/*D*/].Reg = RenderStateSpecularEnable ? PS_REGISTER_V1 : PS_REGISTER_ZERO; + RC.FinalCombiner.Input[4/*E*/].Reg = PS_REGISTER_ZERO; // Note : Not really needed, should be 0 already + RC.FinalCombiner.Input[5/*F*/].Reg = PS_REGISTER_ZERO; // Note : Not really needed, should be 0 already + RC.FinalCombiner.Input[6/*G*/].Reg = PS_REGISTER_R0; + RC.FinalCombiner.Input[6/*G*/].Channel = PS_CHANNEL_ALPHA; + } + + void PerformRuntimeAdjustments(DecodedRegisterCombiner &RC) + { + RC.AlphaKill[0] = AlphaKill[0]; + RC.AlphaKill[1] = AlphaKill[1]; + RC.AlphaKill[2] = AlphaKill[2]; + RC.AlphaKill[3] = AlphaKill[3]; + AdjustTextureModes(RC); + AdjustFinalCombiner(RC); + } +} +CxbxPSDef; + +typedef struct _PSH_RECOMPILED_SHADER { + CxbxPSDef CompletePSDef; + IDirect3DPixelShader* ConvertedPixelShader; +} PSH_RECOMPILED_SHADER; + +PSH_RECOMPILED_SHADER CxbxRecompilePixelShader(CxbxPSDef &CompletePSDef) +{ + DecodedRegisterCombiner RC = {}; + RC.Decode(&(CompletePSDef.PSDef)); + CompletePSDef.PerformRuntimeAdjustments(RC); + + ID3DBlob *pShader = nullptr; + EmuCompilePixelShader(&RC, &pShader); + + PSH_RECOMPILED_SHADER Result; + Result.CompletePSDef = CompletePSDef; + Result.ConvertedPixelShader = nullptr; + if (pShader) { + DWORD *pFunction = (DWORD*)pShader->GetBufferPointer(); + if (pFunction) { + DWORD hRet = g_pD3DDevice->CreatePixelShader(pFunction, &(Result.ConvertedPixelShader)); + if (hRet != D3D_OK) { + printf(D3DErrorString(hRet)); + } + } + pShader->Release(); + } + + return Result; +} // CxbxRecompilePixelShader std::vector g_RecompiledPixelShaders; -bool ArePSDefsIdentical(const xbox::X_D3DPIXELSHADERDEF &PSDef1, const xbox::X_D3DPIXELSHADERDEF &PSDef2) -{ - // Only compare the [*]-marked members, which forms the unique shader declaration (ignore the constants and Xbox Direct3D8 run-time fields) : - // [*] DWORD PSAlphaInputs[8]; // X_D3DRS_PSALPHAINPUTS0..X_D3DRS_PSALPHAINPUTS7 : Alpha inputs for each stage - // [*] DWORD PSFinalCombinerInputsABCD; // X_D3DRS_PSFINALCOMBINERINPUTSABCD : Final combiner inputs - // [*] DWORD PSFinalCombinerInputsEFG; // X_D3DRS_PSFINALCOMBINERINPUTSEFG : Final combiner inputs (continued) - if (memcmp(&(PSDef1.PSAlphaInputs[0]), &(PSDef2.PSAlphaInputs[0]), (8 + 1 + 1) * sizeof(DWORD)) != 0) - return false; +// Mapping indices of Xbox register combiner constants to host pixel shader constants; +// The first 16 are identity-mapped (C0_1 .. C0_7 are C0 .. C7 on host, C1_0 .. C1_7 are C8 .. C15 on host) : +constexpr int PSH_XBOX_CONSTANT_C0 = 0; // = 0..15 +// Then two final combiner constants : +constexpr int PSH_XBOX_CONSTANT_FC0 = PSH_XBOX_CONSTANT_C0 + PSH_XBOX_MAX_C_REGISTER_COUNT; // = 16 +constexpr int PSH_XBOX_CONSTANT_FC1 = PSH_XBOX_CONSTANT_FC0 + 1; // = 17 +// Fog requires a constant (as host PS1.4 doesn't support the FOG register) +constexpr int PSH_XBOX_CONSTANT_FOG = PSH_XBOX_CONSTANT_FC1 + 1; // = 18 +// Bump Environment Material registers +constexpr int PSH_XBOX_CONSTANT_BEM = PSH_XBOX_CONSTANT_FOG + 1; // = 19..22 +// Bump map Luminance registers +constexpr int PSH_XBOX_CONSTANT_LUM = PSH_XBOX_CONSTANT_BEM + 4; // = 23..26 +// Which winding order to consider as the front face +constexpr int PSH_XBOX_CONSTANT_FRONTFACE_FACTOR = PSH_XBOX_CONSTANT_LUM + 4; // = 27 +// This concludes the set of constants that need to be set on host : +constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_FRONTFACE_FACTOR + 1; // = 28 - // [-] DWORD PSConstant0[8]; // X_D3DRS_PSCONSTANT0_0..X_D3DRS_PSCONSTANT0_7 : C0 for each stage - // [-] DWORD PSConstant1[8]; // X_D3DRS_PSCONSTANT1_0..X_D3DRS_PSCONSTANT1_7 : C1 for each stage - // [*] DWORD PSAlphaOutputs[8]; // X_D3DRS_PSALPHAOUTPUTS0..X_D3DRS_PSALPHAOUTPUTS7 : Alpha output for each stage - // [*] DWORD PSRGBInputs[8]; // X_D3DRS_PSRGBINPUTS0..X_D3DRS_PSRGBINPUTS7 : RGB inputs for each stage - // [*] DWORD PSCompareMode; // X_D3DRS_PSCOMPAREMODE : Compare modes for clipplane texture mode - if (memcmp(&(PSDef1.PSAlphaOutputs[0]), &(PSDef2.PSAlphaOutputs[0]), (8 + 8 + 1) * sizeof(DWORD)) != 0) - return false; +std::string GetFixedFunctionShaderTemplate() { + static bool loaded = false; + static std::string hlslString; - // [-] DWORD PSFinalCombinerConstant0; // X_D3DRS_PSFINALCOMBINERCONSTANT0 : C0 in final combiner - // [-] DWORD PSFinalCombinerConstant1; // X_D3DRS_PSFINALCOMBINERCONSTANT1 : C1 in final combiner - // [*] DWORD PSRGBOutputs[8]; // X_D3DRS_PSRGBOUTPUTS0..X_D3DRS_PSRGBOUTPUTS7 : Stage 0 RGB outputs - // [*] DWORD PSCombinerCount; // X_D3DRS_PSCOMBINERCOUNT : Active combiner count (Stages 0-7) - // [*] DWORD PSTextureModes; // X_D3DRS_PS_RESERVED (copied from out-of-range X_D3DRS_PSTEXTUREMODES) : Texture addressing modes - // [*] DWORD PSDotMapping; // X_D3DRS_PSDOTMAPPING : Input mapping for dot product modes - // [*] DWORD PSInputTexture; // X_D3DRS_PSINPUTTEXTURE : Texture source for some texture modes - if (memcmp(&(PSDef1.PSRGBOutputs[0]), &(PSDef2.PSRGBOutputs[0]), (8 + 1 + 1 + 1 + 1) * sizeof(DWORD)) != 0) - return false; + // TODO does this need to be thread safe? + if (!loaded) { + loaded = true; - // [-] DWORD PSC0Mapping; // Mapping of c0 regs to D3D constants - // [-] DWORD PSC1Mapping; // Mapping of c1 regs to D3D constants - // [-] DWORD PSFinalCombinerConstants; // Final combiner constant mapping - return true; + // Determine the filename and directory for the fixed function shader + // TODO make this a relative path so we guarantee an LPCSTR for D3DCompile + auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) + .parent_path() + .append("hlsl"); + + auto sourceFile = hlslDir.append("FixedFunctionPixelShader.hlsl").string(); + + // Load the shader into a string + std::ifstream hlslStream(sourceFile); + std::stringstream hlsl; + hlsl << hlslStream.rdbuf(); + + hlslString = hlsl.str(); + } + + return hlslString; } +std::string_view GetD3DTOPString(int d3dtop) { + static constexpr std::string_view opToString[] = { +#ifdef ENABLE_FF_ALPHAKILL + "X_D3DTOP_DISABLE", // 0 (Was UNDEFINED, but that doesn't compile) +#else + "UNDEFINED", // 0 +#endif + "X_D3DTOP_DISABLE", // 1 + "X_D3DTOP_SELECTARG1", // 2 + "X_D3DTOP_SELECTARG2", // 3 + "X_D3DTOP_MODULATE", // 4 + "X_D3DTOP_MODULATE2X", // 5 + "X_D3DTOP_MODULATE4X", // 6 + "X_D3DTOP_ADD", // 7 + "X_D3DTOP_ADDSIGNED", // 8 + "X_D3DTOP_ADDSIGNED2X", // 9 + "X_D3DTOP_SUBTRACT", // 10 + "X_D3DTOP_ADDSMOOTH", // 11 + "X_D3DTOP_BLENDDIFFUSEALPHA", // 12 + "X_D3DTOP_BLENDCURRENTALPHA", // 13 + "X_D3DTOP_BLENDTEXTUREALPHA", // 14 + "X_D3DTOP_BLENDFACTORALPHA", // 15 + "X_D3DTOP_BLENDTEXTUREALPHAPM", // 16 + "X_D3DTOP_PREMODULATE", // 17 + "X_D3DTOP_MODULATEALPHA_ADDCOLOR", // 18 + "X_D3DTOP_MODULATECOLOR_ADDALPHA", // 19 + "X_D3DTOP_MODULATEINVALPHA_ADDCOLOR", // 20 + "X_D3DTOP_MODULATEINVCOLOR_ADDALPHA", // 21 + "X_D3DTOP_DOTPRODUCT3", // 22 + "X_D3DTOP_MULTIPLYADD", // 23 + "X_D3DTOP_LERP", // 24 + "X_D3DTOP_BUMPENVMAP", // 25 + "X_D3DTOP_BUMPENVMAPLUMINANCE", // 26 + }; + +#ifdef ENABLE_FF_ALPHAKILL + if (d3dtop < 0 || d3dtop > 26) { +#else + if (d3dtop < 1 || d3dtop > 26) { +#endif + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture operation %d", d3dtop); + d3dtop = 0; // undefined + } + + return opToString[d3dtop]; +} + +// Get a string equivalent of ' + ' +std::string GetD3DTASumString(int d3dta, bool allowModifier = true) { + using namespace FixedFunctionPixelShader; + + static const std::string argToString[] = { + "X_D3DTA_DIFFUSE", // 0 + "X_D3DTA_CURRENT", // 1 + "X_D3DTA_TEXTURE", // 2 + "X_D3DTA_TFACTOR", // 3 + "X_D3DTA_SPECULAR", // 4 + "X_D3DTA_TEMP", // 5 + "X_D3DTA_CONSTANT", // 6 + "UNDEFINED", // 7 + }; + + // Write a texture argument + const int flagMask = 0x30; + int iFlags = d3dta & flagMask; + int i = d3dta & ~flagMask; + + if (i < 0 || i > 6) { + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture argument %d on texture arg", i); + i = 7; // undefined + } + + auto str = argToString[i]; + if (iFlags) { + if (!allowModifier) { + EmuLog(LOG_LEVEL::ERROR2, "Modifier not expected on texture argument"); + } + + if (iFlags == X_D3DTA_COMPLEMENT) + str += " + X_D3DTA_COMPLEMENT"; + else if (iFlags == X_D3DTA_ALPHAREPLICATE) + str += " + X_D3DTA_ALPHAREPLICATE"; + else { + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture modifier %d", iFlags); + str += " /* + UNKNOWN MODIFIER */"; + } + } + + return str; +} + +// TODO we have to create and cache shaders over and over and over and over +// Deduplicate this resource management +IDirect3DPixelShader9* GetFixedFunctionShader() +{ + using namespace FixedFunctionPixelShader; + + // TODO move this cache elsewhere - and flush it when the device is released! + static std::unordered_map ffPsCache = {}; + + // Create a key from state that will be baked in to the shader + PsTextureHardcodedState states[4] = {}; + int sampleType[4] = { SAMPLE_NONE, SAMPLE_NONE, SAMPLE_NONE, SAMPLE_NONE }; + bool pointSpriteEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); + + bool previousStageDisabled = false; + for (int i = 0; i < 4; i++) { + // Determine COLOROP + // This controls both the texture operation for the colour of the stage + // and when to stop processing + // Under certain circumstances we force it to be DISABLE + auto colorOp = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLOROP); + + // Usually we execute stages up to the first disabled stage + // However, if point sprites are enabled, we just execute stage 3 + bool forceDisable = + (!pointSpriteEnable && previousStageDisabled) || + (pointSpriteEnable && i < 3); + + // When a texture stage has D3DTSS_COLORARG1 equal to D3DTA_TEXTURE + // and the texture pointer for the stage is NULL, this stage + // and all stages after it are not processed. + // Test cases: Red Dead Revolver, JSRF + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/texture-blending + // Don't follow the D3D9 docs if SELECTARG2 is in use (PC D3D9 behaviour, nvidia quirk?) + // Test case: Crash Nitro Kart (engine speed UI) + if (!g_pXbox_SetTexture[i] + && (XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1) & 0x7) == X_D3DTA_TEXTURE + && colorOp != xbox::X_D3DTOP_SELECTARG2) + { + forceDisable = true; + } + + // Set the final COLOROP value + states[i].COLOROP = forceDisable ? X_D3DTOP_DISABLE : colorOp; + + // If the stage is disabled we don't want its configuration to affect the key + // Move on to the next stage + if (colorOp == X_D3DTOP_DISABLE) { + previousStageDisabled = true; + continue; + } + + // Get sample type + // TODO move XD3D8 resource query functions out of Direct3D9.cpp so we can use them here + if (g_pXbox_SetTexture[i]) { + auto format = g_pXbox_SetTexture[i]->Format; + if (format & X_D3DFORMAT_CUBEMAP) + sampleType[i] = SAMPLE_CUBE; + else if (((format & X_D3DFORMAT_DIMENSION_MASK) >> X_D3DFORMAT_DIMENSION_SHIFT) > 2) + sampleType[i] = SAMPLE_3D; + else + sampleType[i] = SAMPLE_2D; + } + + states[i].COLORARG0 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG0); + states[i].COLORARG1 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1); + states[i].COLORARG2 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG2); + + auto alphaOp = XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAOP); + if (alphaOp == X_D3DTOP_DISABLE) LOG_TEST_CASE("Alpha stage disabled when colour stage is enabled"); + + states[i].ALPHAOP = (float)alphaOp; + states[i].ALPHAARG0 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG0); + states[i].ALPHAARG1 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG1); + states[i].ALPHAARG2 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG2); + + states[i].RESULTARG = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_RESULTARG); + } + + // Create a key from the shader state + // Note currently this is padded since it's what we send to the GPU + auto key = 3 * ComputeHash(states, sizeof(states)) + + ComputeHash(sampleType, sizeof(sampleType)); + + auto got = ffPsCache.find(key); + if (got != ffPsCache.end()) { + // We have a shader. Great! + return got->second; + } + + // Build and compile a new shader + auto hlslTemplate = GetFixedFunctionShaderTemplate(); + + // In D3D9 it seems we need to know hardcode if we're doing a 2D or 3D lookup + const std::string sampleTypePattern = "TEXTURE_SAMPLE_TYPE;"; + auto sampleTypeReplace = hlslTemplate.find(sampleTypePattern); + + static constexpr std::string_view typeToString[] = { + "SAMPLE_NONE", + "SAMPLE_2D", + "SAMPLE_3D", + "SAMPLE_CUBE" + }; + + std::stringstream sampleTypeString; + sampleTypeString << "{" + << typeToString[sampleType[0]] << ", " + << typeToString[sampleType[1]] << ", " + << typeToString[sampleType[2]] << ", " + << typeToString[sampleType[3]] << "};"; + + auto finalShader = hlslTemplate.replace(sampleTypeReplace, sampleTypePattern.size(), sampleTypeString.str()); + + // Hardcode the texture stage operations and arguments + // So the shader handles exactly one combination of values + const std::string stageDef = "// STAGE DEFINITIONS"; + auto stageDefInsert = finalShader.find(stageDef) + stageDef.size(); + + std::stringstream stageSetup; + stageSetup << '\n'; + + for (int i = 0; i < 4; i++) { +#ifdef ENABLE_FF_ALPHAKILL + // Even when a stage is disabled, we still have to fully initialize it's values, to prevent + // "error X4000: variable 'stages' used without having been completely initialized" +#else + // The stage is initialized to be disabled + // We don't have to output anything + if (states[i].COLOROP == X_D3DTOP_DISABLE) + continue; + +#endif + std::string target = "stages[" + std::to_string(i) + "]."; + + auto s = states[i]; + stageSetup << target << "COLOROP = " << GetD3DTOPString(s.COLOROP) << ";\n"; + + stageSetup << target << "COLORARG0 = " << GetD3DTASumString(s.COLORARG0) << ";\n"; + stageSetup << target << "COLORARG1 = " << GetD3DTASumString(s.COLORARG1) << ";\n"; + stageSetup << target << "COLORARG2 = " << GetD3DTASumString(s.COLORARG2) << ";\n"; + + stageSetup << target << "ALPHAOP = " << GetD3DTOPString(s.ALPHAOP) << ";\n"; + + stageSetup << target << "ALPHAARG0 = " << GetD3DTASumString(s.ALPHAARG0) << ";\n"; + stageSetup << target << "ALPHAARG1 = " << GetD3DTASumString(s.ALPHAARG1) << ";\n"; + stageSetup << target << "ALPHAARG2 = " << GetD3DTASumString(s.ALPHAARG2) << ";\n"; + + stageSetup << target << "RESULTARG = " << GetD3DTASumString(s.RESULTARG, false) << ";\n"; + stageSetup << '\n'; + } + + finalShader = finalShader.insert(stageDefInsert, stageSetup.str()); + + // Compile the shader + ID3DBlob* pShaderBlob; + + auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) + .parent_path() + .append("hlsl"); + + auto pseudoFileName = "FixedFunctionPixelShader-" + std::to_string(key) + ".hlsl"; + auto pseudoSourceFile = hlslDir.append(pseudoFileName).string(); + EmuCompileShader(finalShader, "ps_3_0", &pShaderBlob, pseudoSourceFile.c_str()); + + // Create shader object for the device + IDirect3DPixelShader9* pShader = nullptr; + auto hRet = g_pD3DDevice->CreatePixelShader((DWORD*)pShaderBlob->GetBufferPointer(), &pShader); + if (hRet != S_OK) + CxbxKrnlCleanup("Failed to compile fixed function pixel shader"); + pShaderBlob->Release(); + + // Insert the shader into the cache + ffPsCache[key] = pShader; + + return pShader; +}; + +float AsFloat(uint32_t value) { + auto v = value; + return *(float*)&v; +} + +// Set constant state for the fixed function pixel shader +void UpdateFixedFunctionPixelShaderState() +{ + using namespace FixedFunctionPixelShader; + + FixedFunctionPixelShaderState ffPsState; + ffPsState.TextureFactor = (D3DXVECTOR4)((D3DXCOLOR)(XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_TEXTUREFACTOR))); + ffPsState.SpecularEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_SPECULARENABLE); + ffPsState.FogEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGENABLE); + ffPsState.FogColor = (D3DXVECTOR3)((D3DXCOLOR)XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR)); + + // Texture state + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + + auto stage = &ffPsState.stages[i]; + + stage->COLORKEYOP = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORKEYOP); + stage->COLORSIGN = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORSIGN); + stage->ALPHAKILL = XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAKILL); + stage->BUMPENVMAT00 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT00)); + stage->BUMPENVMAT01 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT01)); + stage->BUMPENVMAT10 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT10)); + stage->BUMPENVMAT11 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT11)); + stage->BUMPENVLSCALE = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVLSCALE)); + stage->BUMPENVLOFFSET = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVLOFFSET)); + stage->COLORKEYCOLOR = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORKEYCOLOR); + } + + const int size = (sizeof(FixedFunctionPixelShaderState) + 16 - 1) / 16; + g_pD3DDevice->SetPixelShaderConstantF(0, (float*)&ffPsState, size); +} + +bool g_UseFixedFunctionPixelShader = true; void DxbxUpdateActivePixelShader() // NOPATCH { // The first RenderState is PSAlpha, @@ -5902,2220 +992,149 @@ void DxbxUpdateActivePixelShader() // NOPATCH // See D3DDevice_SetPixelShaderCommon which implements this const xbox::X_D3DPIXELSHADERDEF *pPSDef = g_pXbox_PixelShader != nullptr ? (xbox::X_D3DPIXELSHADERDEF*)(XboxRenderStates.GetPixelShaderRenderStatePointer()) : nullptr; - - if (pPSDef != nullptr) - { - // Create a copy of the pixel shader definition, as it is residing in render state register slots : - xbox::X_D3DPIXELSHADERDEF PSDefCopy = *pPSDef; - // Copy-in the PSTextureModes value which is stored outside the range of Xbox pixel shader render state slots : - PSDefCopy.PSTextureModes = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES); - - const PSH_RECOMPILED_SHADER* RecompiledPixelShader = nullptr; - - // Now, see if we already have a shader compiled for this declaration : - // TODO : Change g_RecompiledPixelShaders into an unordered_map, hash just the identifying PSDef members, and add cache eviction (clearing host resources when pruning) - for (const auto& it : g_RecompiledPixelShaders) { - if (ArePSDefsIdentical(it.PSDef, PSDefCopy)) { - RecompiledPixelShader = ⁢ - break; - } + if (pPSDef == nullptr) { + IDirect3DPixelShader9* pShader = nullptr; + if (g_UseFixedFunctionPixelShader) { + pShader = GetFixedFunctionShader(); + UpdateFixedFunctionPixelShaderState(); } - // If none was found, recompile this shader and remember it : - if (RecompiledPixelShader == nullptr) { - // Recompile this pixel shader : - g_RecompiledPixelShaders.push_back(DxbxRecompilePixelShader(&PSDefCopy)); - RecompiledPixelShader = &g_RecompiledPixelShaders.back(); + + g_pD3DDevice->SetPixelShader(pShader); + return; + } + + // Create a copy of the pixel shader definition, as it is residing in render state register slots : + CxbxPSDef CompletePSDef; + CompletePSDef.PSDef = *pPSDef; + // Copy-in the PSTextureModes value which is stored outside the range of Xbox pixel shader render state slots : + CompletePSDef.PSDef.PSTextureModes = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES); + // Fetch all other values that are used in the IsEquivalent check : + CompletePSDef.SnapshotRuntimeVariables(); + + // Now, see if we already have a shader compiled for this definition : + // TODO : Change g_RecompiledPixelShaders into an unordered_map, hash just the identifying PSDef members, and add cache eviction (clearing host resources when pruning) + const PSH_RECOMPILED_SHADER* RecompiledPixelShader = nullptr; + for (const auto& it : g_RecompiledPixelShaders) { + if (CompletePSDef.IsEquivalent(it.CompletePSDef)) { + RecompiledPixelShader = ⁢ + break; } + } - // Switch to the converted pixel shader (if it's any different from our currently active - // pixel shader, to avoid many unnecessary state changes on the local side). - IDirect3DPixelShader* ConvertedPixelShaderHandle = RecompiledPixelShader->ConvertedHandle; + // If none was found, recompile this shader and remember it : + if (RecompiledPixelShader == nullptr) { + // Recompile this pixel shader : + g_RecompiledPixelShaders.push_back(CxbxRecompilePixelShader(CompletePSDef)); + RecompiledPixelShader = &g_RecompiledPixelShaders.back(); + } - Microsoft::WRL::ComPtr CurrentPixelShader; - g_pD3DDevice->GetPixelShader(/*out*/CurrentPixelShader.GetAddressOf()); - if (CurrentPixelShader.Get() != ConvertedPixelShaderHandle) - g_pD3DDevice->SetPixelShader(ConvertedPixelShaderHandle); + // Switch to the converted pixel shader (if it's any different from our currently active + // pixel shader, to avoid many unnecessary state changes on the local side). + Microsoft::WRL::ComPtr CurrentPixelShader; + g_pD3DDevice->GetPixelShader(/*out*/CurrentPixelShader.GetAddressOf()); + if (CurrentPixelShader.Get() != RecompiledPixelShader->ConvertedPixelShader) { + g_pD3DDevice->SetPixelShader(RecompiledPixelShader->ConvertedPixelShader); + } - // TODO: Figure out a method to forward the vertex-shader oFog output to the pixel shader FOG input register : - // We could use the unused oT4.x to output fog from the vertex shader, and read it with 'texcoord t4' in pixel shader! - // For now, we still disable native fog if pixel shader is said to handle it, this prevents black screen issues in titles using pixel shader fog. - // NOTE: Disabled: This breaks fog in XDK samples such as DolphinClassic. -#if-0 - if ((RecompiledPixelShader->PSDef.PSFinalCombinerInputsABCD > 0) || (RecompiledPixelShader->PSDef.PSFinalCombinerInputsEFG > 0)) { - g_pD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE); - } -#endif + //PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]; + //PSH_XBOX_SHADER::GetPSTextureModes(pPSDef, psTextureModes); + // + //for (i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) + //{ + // switch (psTextureModes[i]) + // { + // default: + // break; + // } + //} - //PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]; - //PSH_XBOX_SHADER::GetPSTextureModes(pPSDef, psTextureModes); - // - //for (i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) - //{ - // switch (psTextureModes[i]) - // { - // case PS_TEXTUREMODES_BUMPENVMAP: - // g_pD3DDevice->SetTextureStageState(i, D3DTSS_COLOROP, D3DTOP_BUMPENVMAP); - // break; - // case PS_TEXTUREMODES_BUMPENVMAP_LUM: - // g_pD3DDevice->SetTextureStageState(i, D3DTSS_COLOROP, D3DTOP_BUMPENVMAPLUMINANCE); - // break; - // default: - // break; - // } - //} + // Set constants, not based on g_PixelShaderConstants, but based on + // the render state slots containing the pixel shader constants, + // as these could have been updated via SetRenderState or otherwise : + D3DXCOLOR fColor[PSH_XBOX_CONSTANT_MAX]; - // Set constants, not based on g_PixelShaderConstants, but based on - // the render state slots containing the pixel shader constants, - // as these could have been updated via SetRenderState or otherwise : - D3DXCOLOR fColor[PSH_XBOX_CONSTANT_MAX]; - for (int i = 0; i < PSH_XBOX_CONSTANT_MAX; i++) - { - // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) - // Read the color from the corresponding render state slot : - switch (i) { - case PSH_XBOX_CONSTANT_FOG: - // Note : FOG.RGB is correct like this, but FOG.a should be coming - // from the vertex shader (oFog) - however, D3D8 does not forward this... - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR); + // PSH_XBOX_CONSTANT_C0..C15 are stored as-is in (and should thus be read from) the Xbox render state pixel shader constant slots + for (unsigned constant_nr = 0; constant_nr < 16; constant_nr++) { + fColor[PSH_XBOX_CONSTANT_C0 + constant_nr] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSCONSTANT0_0 + constant_nr); // Note : 0xAARRGGBB format + } + + fColor[PSH_XBOX_CONSTANT_FC0] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT0); + fColor[PSH_XBOX_CONSTANT_FC1] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT1); + + // Fog requires a constant (as host PS1.4 doesn't support the FOG register) + // Note : FOG.RGB is correct like this, but FOG.a should be coming + // from the vertex shader (oFog) - however, D3D8 does not forward this... + fColor[PSH_XBOX_CONSTANT_FOG] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR); +#if 0 // New, doesn't work yet + // Bump Environment Material registers + for (int stage_nr = 0; stage_nr < xbox::X_D3DTS_STAGECOUNT; stage_nr++) { + // Note : No loop, because X_D3DTSS_BUMPENVMAT11 and X_D3DTSS_BUMPENVMAT10 are swapped + fColor[PSH_XBOX_CONSTANT_BEM + stage_nr].r = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVMAT00); // Maps to BEM[stage].x + fColor[PSH_XBOX_CONSTANT_BEM + stage_nr].g = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVMAT01); // Maps to BEM[stage].y + fColor[PSH_XBOX_CONSTANT_BEM + stage_nr].b = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVMAT10); // Maps to BEM[stage].z + fColor[PSH_XBOX_CONSTANT_BEM + stage_nr].a = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVMAT11); // Maps to BEM[stage].w + } + + // Bump map Luminance registers + for (int stage_nr = 0; stage_nr < xbox::X_D3DTS_STAGECOUNT; stage_nr++) { + fColor[PSH_XBOX_CONSTANT_LUM + stage_nr].r = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVLSCALE); // Maps to LUM[stage].x + fColor[PSH_XBOX_CONSTANT_LUM + stage_nr].g = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVLOFFSET); // Maps to LUM[stage].y + fColor[PSH_XBOX_CONSTANT_LUM + stage_nr].b = 0; + fColor[PSH_XBOX_CONSTANT_LUM + stage_nr].a = 0; + } +#else + for (int i = 0; i < PSH_XBOX_CONSTANT_MAX; i++) { + switch (i) { + case PSH_XBOX_CONSTANT_BEM + 0: + case PSH_XBOX_CONSTANT_BEM + 1: + case PSH_XBOX_CONSTANT_BEM + 2: + case PSH_XBOX_CONSTANT_BEM + 3: + { + int stage_nr = i - PSH_XBOX_CONSTANT_BEM; + DWORD* value = (DWORD*)&fColor[i];; // Note : This overlays D3DXCOLOR's FLOAT r, g, b, a + + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT00, &value[0]); // Maps to BEM[stage].x + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT01, &value[1]); // Maps to BEM[stage].y + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT10, &value[2]); // Maps to BEM[stage].z + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT11, &value[3]); // Maps to BEM[stage].w + // Note : The TSS values being read here, have been transfered from Xbox to host in XboxTextureStateConverter::Apply() break; - case PSH_XBOX_CONSTANT_FC0: - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT0); - break; - case PSH_XBOX_CONSTANT_FC1: - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT1); - break; - case PSH_XBOX_CONSTANT_BEM + 0: - case PSH_XBOX_CONSTANT_BEM + 1: - case PSH_XBOX_CONSTANT_BEM + 2: - case PSH_XBOX_CONSTANT_BEM + 3: - { - int stage = i - PSH_XBOX_CONSTANT_BEM; - DWORD* value = (DWORD*)&fColor[i]; + } + case PSH_XBOX_CONSTANT_LUM + 0: + case PSH_XBOX_CONSTANT_LUM + 1: + case PSH_XBOX_CONSTANT_LUM + 2: + case PSH_XBOX_CONSTANT_LUM + 3: + { + int stage_nr = i - PSH_XBOX_CONSTANT_LUM; + DWORD* value = (DWORD*)&fColor[i]; // Note : This overlays D3DXCOLOR's FLOAT r, g, b, a - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVMAT00, &value[0]); - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVMAT01, &value[1]); - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVMAT11, &value[2]); - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVMAT10, &value[3]); - break; - } - case PSH_XBOX_CONSTANT_LUM + 0: - case PSH_XBOX_CONSTANT_LUM + 1: - case PSH_XBOX_CONSTANT_LUM + 2: - case PSH_XBOX_CONSTANT_LUM + 3: - { - int stage = i - PSH_XBOX_CONSTANT_LUM; - DWORD* value = (DWORD*)&fColor[i]; - - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVLSCALE, &value[0]); - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVLOFFSET, &value[1]); - value[2] = 0; - value[3] = 0; - break; - } - default: // PSH_XBOX_CONSTANT_C0..C15 are stored as-is in (and should thus be read from) the Xbox render state pixel shader constant slots - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSCONSTANT0_0 + i - PSH_XBOX_CONSTANT_C0); + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVLSCALE, &value[0]); // Maps to LUM[stage].x + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVLOFFSET, &value[1]); // Maps to LUM[stage].y + value[2] = 0; + value[3] = 0; break; } } - - // Set all host constant values using a single call: - g_pD3DDevice->SetPixelShaderConstantF(0, reinterpret_cast(fColor), PSH_XBOX_CONSTANT_MAX); - // Note PSH_XBOX_CONSTANT_MUL0 and PSH_XBOX_CONSTANT_MUL1 fall outside PSH_XBOX_CONSTANT_MAX - // and have already been 'PO_DEF'ined at the start of ConvertConstantsToNative } - else - { - g_pD3DDevice->SetPixelShader(nullptr); +#endif + + // Control whether to use front or back diffuse/specular colours + // This factor should be multipled with VFACE + // Test cases: + // Amped (snowboard trails should use front colours, but use both CW and CCW winding) + // TwoSidedLighting sample + float frontfaceFactor = 0; // 0 == always use the front colours + if (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_TWOSIDEDLIGHTING)) { + LOG_TEST_CASE("Two sided lighting"); + // VFACE is positive for clockwise faces + // If Xbox designates counter-clockwise as front-facing, we invert VFACE + auto cwFrontface = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FRONTFACE) == 0x900; // clockwise; = NV097_SET_FRONT_FACE_V_CW = NV2A_FRONT_FACE_CW + frontfaceFactor = cwFrontface ? 1.0 : -1.0; } -} - -// End of Dxbx code - -#define REVEL8N_PIXEL_SHADER_CHANGES - -// help functions -char *pCodeBuffer=nullptr; - -void WriteCode(const char *str, ...) -{ - char szBuffer[256]; - va_list argp; - - va_start(argp, str); - vsprintf(szBuffer, str, argp); - va_end(argp); - - //printf("%s", szBuffer); - if(pCodeBuffer) - strcat(pCodeBuffer, szBuffer); -} - -void InsertString(char *szStr, int iOffset, char *szInsert, int iInsertLen, int iRemoveLen); - -inline void HandleInputOutput -( - DWORD dwInput, - DWORD dwOutput, - BOOL bAlpha, - int iCStage, - BOOL bUniqueC0, - BOOL bUniqueC1, - int *iPSC0, - int *iPSC1, - - BOOL bGlobalRGBA, - - BOOL bFinalCombiner -); - -inline void GetRegister -( - WORD wRegister, - char *szRegister, - BOOL bUniqueC0, - BOOL bUniqueC1, - int iCStage, - int *pPSC0, - int *pPSC1 -); - -inline void GetInputMapping(WORD wInputMapping, char *szInputMapping, char *szInputMappingAfter, char *szConst); -inline void GetChannel(WORD wInputChannel, char *szInput, BOOL bAlpha, BOOL bGlobalRGBA); - -inline void GetOutputFlags -( - WORD wOutputFlags, - char *szInstMod, - char *szABOp, - char *szCDOp, - char *szABCDOp, - - BOOL *bAB_BA, - BOOL *bCD_BA, - - BOOL *bShl1Bias, - BOOL *bBias -); - -//inline BOOL CheckOpForMov(char *szOp, char *szInputs1, char *szInput2, char *szRegInput); -inline BOOL OptimizeOperation -( - char *szOp, - char *szOp1, - - char *szOp2, - char *szMod, - - char *szInputAB1, - char *szInputAB2, - - char *szInputCD1, - char *szInputCD2, - - char *szConstRegAB1, - char *szConstRegAB2, - char *szConstRegCD1, - char *szConstRegCD2, - - char *szOutAB, - char *szOutCD, - char *szABCDOutput, - - char *szCommand -); - -inline void ClearConstRegVars(); -inline void CorrectConstToReg(char *szConst, int *pPSC0, int *pPSC1); - -int iPreRunLen=0; - -// This is set to true if an operation tries to read from r0 -// before r0 was written, in that case we do the same as the xbox -// we write the value of t0.a to r0 ;-) -BOOL bR0WAccess=FALSE; -BOOL bR0Written=FALSE; -BOOL bR0AWritten=FALSE; -/* -BOOL bR1WAccess=FALSE; -BOOL bR1AWAccess=FALSE; -BOOL bR1RGBWAccess=FALSE; - -BOOL bR1AWritten=FALSE; -BOOL bR1RGBWritten=FALSE; -BOOL bR1Written=FALSE; -*/ -BOOL bR0AlphaOutput = FALSE; - -BOOL bLastOpRGB = FALSE; - -BOOL bEFProduct = FALSE; -BOOL bV1R0Reg = FALSE; - -#define DEF_VAR_TABLE_LEN 7 -char szVar[][10] = -{ - "r0", - "r1", - "t0", - "t1", - "t2", - "t3", - "t4" -}; - -inline void HandleInputOutput -( - DWORD dwInput, - DWORD dwOutput, - BOOL bAlpha, - int iCStage, - BOOL bUniqueC0, - BOOL bUniqueC1, - int *iPSC0, - int *iPSC1, - - BOOL bGlobalRGBA, - - BOOL bFinalCombiner -) -{ - // INPUTS - if(bFinalCombiner) printf("\npPSD.PSFinalCombinerInputsABCD = PS_COMBINERINPUTS(\n"); - else if(bAlpha) printf("\npPSD.PSAlphaInputs[%d] = PS_COMBINERINPUTS(\n", iCStage); - else printf("\npPSD.PSRGBInputs[%d] = PS_COMBINERINPUTS(\n", iCStage); - - WORD wCombinerInputs[4]; // 0=a, 1=b, 2=c, 3=d - wCombinerInputs[0] = (WORD) ((dwInput>>24) & 0xFF); - wCombinerInputs[1] = (WORD) ((dwInput>>16) & 0xFF); - wCombinerInputs[2] = (WORD) ((dwInput>> 8) & 0xFF); - wCombinerInputs[3] = (WORD) ( dwInput & 0xFF); - - char szInput[4][20] = {0}; - char szConst[4][20] = {0}; - char szInputMapping[4][20] = {0}; - char szInputMappingAfter[4][20] = {0}; - char szChannels[4][5] = {0}; - - // Go through inputs - int i=0; - for(i=0; i<4; i++) - { - szInput[i][0]=0x00; // Fast way to zero a string ;-) - szConst[i][0]=0x00; - szInputMapping[i][0]=0x00; - szInputMappingAfter[i][0]=0x00; - szChannels[i][0]=0x00; - - GetRegister(wCombinerInputs[i] & 0xF, szInput[i], bUniqueC0, bUniqueC1, iCStage, iPSC0, iPSC1); - - if(strcmp(szInput[i], "r0")==0) - { - if(!bR0AWritten) - strcpy(szInput[i], "t0"); - - if(!bR0Written) { - strcpy(szInput[i], "t0"); - //bR0WAccess=TRUE; - } - } - - printf(" | "); - GetInputMapping(wCombinerInputs[i] & 0x1E0, szInputMapping[i], szInputMappingAfter[i], szConst[i]); - printf(" | "); - GetChannel(wCombinerInputs[i] & 0x10, szChannels[i], bAlpha, bGlobalRGBA); - printf(",\n"); - - if((wCombinerInputs[i] & 0xF)==0x00) - szInput[i][0]=0x00; - - // 6928: check this as I doubt whether it works really like that - /*if(strcmp(szInput[i], "r1")==0) - { - // EmuLog(LOG_LEVEL::DEBUG, "channel: %s", szChannels[i]); - // Sleep(3000); - - if((strcmp(szChannels[i], ".a")==0) && (!bR1AWritten)) { - bR1AWAccess=TRUE; - - strcpy(szInput[i], " t1"); - } else if((strcmp(szChannels[i], ".rgb")==0) && (!bR1RGBWritten)) { - bR1RGBWAccess=TRUE; - - strcpy(szInput[i], " t1"); - } else if(!bR1Written) { - bR1WAccess=TRUE; - - strcpy(szInput[i], " t1"); - } - - if(bR1AWAccess && bR1RGBWAccess) - bR1WAccess=TRUE; - - //if(bR1AWAccess || bR1RGBWAccess) - // strcpy(szInput[i], "t1"); - }*/ - - //printf("\n*** szInput[%d]: %s\n", i, szInput[i]); - } - - // Input stuff - BOOL bInput[4] = {0, 0, 0, 0}; - if(szInput[0][0]) bInput[0]=TRUE; - if(szInput[1][0]) bInput[1]=TRUE; - if(szInput[2][0]) bInput[2]=TRUE; - if(szInput[3][0]) bInput[3]=TRUE; - -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - // Correct param if a constant is used! - if(!bInput[0]) - CorrectConstToReg(szConst[0], iPSC0, iPSC1); - if(!bInput[1]) - CorrectConstToReg(szConst[1], iPSC0, iPSC1); - if(!bInput[2]) - CorrectConstToReg(szConst[2], iPSC0, iPSC1); - if(!bInput[3]) - CorrectConstToReg(szConst[3], iPSC0, iPSC1); - - bool bEmptyChannel = false; -#endif - - char szCompleteInput[4][20] = {0}; - for(i=0; i<4; i++) - { - strcpy(szCompleteInput[i], szInputMapping[i]); -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - if(bInput[i]) - { -#endif - strcat(szCompleteInput[i], szInput[i]); -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - bEmptyChannel = bEmptyChannel || (szChannels[i][0] == 0); - } - else - strcat(szCompleteInput[i], &szConst[i][4]); -#endif - strcat(szCompleteInput[i], szInputMappingAfter[i]); - strcat(szCompleteInput[i], szChannels[i]); - } - - printf(");\n"); - - if(!bFinalCombiner) - { - // OUTPUTS - if(bAlpha) printf("\npPSD.PSAlphaOutputs[%d] = PS_COMBINEROUTPUTS(\n", iCStage); - else printf("\npPSD.PSRGBOutputs[%d] = PS_COMBINEROUTPUTS(\n", iCStage); - - WORD wCombinerOutputs[3]; // 0=d0 (ab), 1=d1 (cd), 2=d2 (mux_sum) - wCombinerOutputs[0] = (WORD) ((dwOutput>> 4) & 0xF); - wCombinerOutputs[1] = (WORD) ( dwOutput & 0xF); - wCombinerOutputs[2] = (WORD) ((dwOutput>> 8) & 0xF); - WORD wCombinerOutputFlags = (WORD) ((dwOutput>>12) & 0xFF); - - char szOutput[3][10] = {0}; - char szOutputMod[10]="\0"; - - char szABOp[10]="\0"; - char szCDOp[10]="\0"; - char szABCDOp[10]="\0"; - - BOOL bAB_B2A; - BOOL bCD_B2A; - - BOOL bR0Now = FALSE; - BOOL bR0ANow = FALSE; - BOOL bVAccess[3] = {0,0,0}; - - BOOL bOpRGB_Current = FALSE; - BOOL bCurrOpRealAlpha = FALSE; - - // Go through outputs - for(i=0; i<3; i++) - { - szOutput[i][0]=0x00; // Fast way to zero a string ;-) - - GetRegister(wCombinerOutputs[i], szOutput[i], bUniqueC0, bUniqueC1, iCStage, iPSC0, iPSC1); - if(strcmp(szOutput[i], "r0")==0) - { - bR0Now=TRUE; - - // this checks for output to r0.a - if(bGlobalRGBA || (!bGlobalRGBA && bAlpha)) - bR0AlphaOutput=TRUE; - } - - if((strcmp(szOutput[i], "v0")==0) || (strcmp(szOutput[i], "v1")==0)) { bVAccess[i] = TRUE; } - - /*BOOL bR1_Written = FALSE; - if(strcmp(szOutput[i], "r1")==0) - bR1_Written=TRUE;*/ - - // check channel! - if(!bGlobalRGBA && bAlpha) - { - strcat(szOutput[i], ".a"); - bCurrOpRealAlpha = TRUE; - - if(bR0Now) - bR0ANow=TRUE; - - /*if(bR1_Written) - bR1AWritten=TRUE;*/ - } - else if(!bGlobalRGBA && !bAlpha -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - && !bEmptyChannel -#endif - ) - { - strcat(szOutput[i], ".rgb"); - - if(wCombinerOutputs[i]) - bOpRGB_Current = TRUE; - - /*if(bR1_Written) - bR1RGBWritten=TRUE;*/ - } - else - { - /*if(bR1_Written) - bR1Written=TRUE;*/ - - if(bR0Now) - bR0ANow=TRUE; - } - - printf(",\n"); - - if(wCombinerOutputs[i]==0x00) - szOutput[i][0]=0x00; - - //printf("\n*** szOutput[%d]: %s\n", i, szOutput[i]); - } - - BOOL bBias=FALSE; - BOOL bSh1Bias=FALSE; - - GetOutputFlags( - wCombinerOutputFlags, - szOutputMod, - - szABOp, - szCDOp, - szABCDOp, - - &bAB_B2A, - &bCD_B2A, - - &bSh1Bias, - &bBias); - - if(bR0Now) - bR0Written=TRUE; - - if(bR0ANow) - bR0AWritten=TRUE; - - printf(");\n"); - - // Find output for the operations - char szOut[10]="\0"; - char szOut1[10]="\0"; - - //printf("|****| %s |****|\n", szOutput[1]); - - if(szOutput[0][0]) - strcpy(szOut, szOutput[0]); - if(szOutput[1][0]) - strcpy(szOut1, szOutput[1]); - -#ifndef REVEL8N_PIXEL_SHADER_CHANGES - if(szOutput[2][0]) - { - /* - //EmuWarningMsg("THIS IS WRONG, FIX ME!"); - //if(!szOutput[1][0]) - // strcpy(szOut1, szOutput[2]); - EmuLog(LOG_LEVEL::DEBUG, "(!szOutput[0][0] || !szOutput[1][0]) && szOutput[2][0] = TRUE!"); - - BOOL bUsable=TRUE; - for(i=2; i<4; i++) - { - if((strcmp(szOutput[2], szInput[i])==0) || (strcmp(szOutput[2], szOut1)==0)) { - bUsable=FALSE; - } - } - if(bUsable && !szOutput[0][0]) - { - - strcpy(szOut, szOutput[2]); - - EmuLog(LOG_LEVEL::DEBUG, "BUsable = TRUE, new output: %s", szOut); - - } - else { - printf("!WARNING!: The operation uses the output register also as input!" - "Trying to find a free output register. It is possible that the pixel shader " - "will generate garbage because the new free one contains data used " - "in an other comming operation!\n\n"); - - for(int j=0; j> 24) & 0xFF); - wEFG[1] = (WORD) ((dwOutput >> 16) & 0xFF); - wEFG[2] = (WORD) ((dwOutput >> 8) & 0xFF); - - BOOL bInputEFG[3] = {0, 0, 0}; - char szCompleteInputEFG[3][10]; - - char szInputEFG[3][10]; - char szInputMappingEFG[3][10]; - char szInputMappingAfterEFG[3][10]; - char szConstEFG[3][10]; - - for(i=0; i<3; i++) - { - szInputEFG[i][0]=0x00; - szInputMappingEFG[i][0]=0x00; - szInputMappingAfterEFG[i][0]=0x00; - szConstEFG[i][0]=0x00; - - GetRegister(wEFG[i] & 0xF, szInputEFG[i], bUniqueC0, bUniqueC1, 0, iPSC0, iPSC1); - printf(" | "); - GetInputMapping(wEFG[i] & 0x1E0, szInputMappingEFG[i], szInputMappingAfterEFG[i], szConstEFG[i]); - printf(" | "); - GetChannel(wEFG[i] & 0x10, szInputEFG[i], bAlpha, FALSE); - printf(", \n"); - - strcpy(szCompleteInputEFG[i], szInputMappingEFG[i]); - strcat(szCompleteInputEFG[i], szInputEFG[i]); - strcat(szCompleteInputEFG[i], szInputMappingAfterEFG[i]); - - if(szInputEFG[i][0]) - bInputEFG[i]=TRUE; - else - { - // add that constant as a reg - CorrectConstToReg(szConstEFG[i], iPSC0, iPSC1); - } - } - - if(dwV1R0_EFProd_Flags & 0x20) - printf("PS_FINALCOMBINERSETTINGS_COMPLEMENT_R0"); - else if(dwV1R0_EFProd_Flags & 0x40) - printf("PS_FINALCOMBINERSETTINGS_COMPLEMENT_V1"); - else if(dwV1R0_EFProd_Flags & 0x80) - printf("PS_FINALCOMBINERSETTINGS_CLAMP_SUM"); - else - printf("0"); - - printf(");\n"); - - if (bV1R0Reg) - { - char sMod[10] = {0}; - char sV1[10] = {0}; - char sR0[10] = {0}; - if(dwV1R0_EFProd_Flags & 0x20) - strcpy(sR0, "1-"); - else if(dwV1R0_EFProd_Flags & 0x40) - strcpy(sV1, "1-"); - else if(dwV1R0_EFProd_Flags & 0x80) - strcpy(sMod, "_sat"); - - if (bEFProduct) - { - EmuLog(LOG_LEVEL::WARNING, "EF Product and V1R0 register used at the same time!"); - } - else - { - WriteCode("; (v1 + r0)\nadd%s r0, %sr0, %sv1\n\n", sMod, sR0, sV1); - } - } - - // only we we will use this later in final combiner stuff!! - // all inputs are known now, so check: - if(bEFProduct) { - - // r0 = E * F (E or F must be the r0 calculated before otherwise the stage results - // are lost, problem??? - if(! - ((!bInputEFG[0] && szConstEFG[0][0]=='0') && - (!bInputEFG[1] && szConstEFG[1][0]=='0'))) { - WriteCode(";E * F\nmul r0, %s, %s\n\n", bInputEFG[0] ? szCompleteInputEFG[0] : &szConstEFG[0][4], - bInputEFG[1] ? szCompleteInputEFG[1] : &szConstEFG[1][4]); - } - - } - - // Now the result: - - // What is done by the final combiner: - // final color = s0*s1 + (1-s0)*s2 + s3 - - // lrp r0, s0, s1, s2 - // add r0, r0, s3 - // s0 = szInput[0] - // s1 = szInput[1] - // s2 = szInput[2] - // s3 = szInput[3] - - // Check whether it is a mov r0, r0 - // for example: lrp r0, 1, r0, 0 - // r0 = 1*r0 + (1-1)*r0 + 0 - // --> r0 = r0 - - for(i=0; i<4; i++) - { - if(!bInput[i]) - CorrectConstToReg(szConst[i], iPSC0, iPSC1); - } - - if(!((!bInput[0]) && (szConst[0][0] == '1') && (strncmp(szCompleteInput[1], "r0", 2)==0))) - { - // cases for s2 - // s2 == 0 --> final color = s0*s1 + s3 - if((!bInput[2]) && (szConst[2][0] == '0')) - { - WriteCode("mul r0.rgb, %s, %s\n", - bInput[0] ? szCompleteInput[0] : &szConst[0][4], - bInput[1] ? szCompleteInput[1] : &szConst[1][4]); - } - // s0 == 0 --> final color = s2 + s3 - else if((!bInput[0]) && (szConst[0][0] == '0')) { - // Check whether s2 is r0!!! - if(!(bInput[2] && (strncmp(szCompleteInput[2], "r0", 2)==0))) - WriteCode("mov r0.rgb, %s\n", - bInput[2] ? szCompleteInput[2] : &szConst[2][4]); - } - // s0 == 1 --> final color = s1 + s3 - else if((!bInput[0]) && (szConst[0][0] == '1')) { - // Check whether s1 is r0!!! - if(!(bInput[1] && (strncmp(szCompleteInput[1], "r0", 2)==0))) - WriteCode("mov r0.rgb, %s\n", - bInput[1] ? szCompleteInput[1] : &szConst[1][4]); - } - // no special cases - else if(bInput[2] || bInput[0]) - { - WriteCode("lrp r0.rgb, %s, %s, %s\n", - bInput[0] ? szCompleteInput[0] : &szConst[0][4], - bInput[1] ? szCompleteInput[1] : &szConst[1][4], - bInput[2] ? szCompleteInput[2] : &szConst[2][4]); - } - } - // case for s3 - if(bInput[3] || (szConst[3][0] != '0')) - WriteCode("add r0.rgb, r0, %s\n", bInput[3] ? szCompleteInput[3] : &szConst[3][4]); - - // Alpha ouput (G) - if(bInputEFG[2] && (strncmp(szInputEFG[2], "r0", 2)!=0)) - { - bR0AlphaOutput=TRUE; - - WriteCode("mov r0.a, %s\n", - bInputEFG[2] ? szCompleteInputEFG[2] : &szConstEFG[2][4]); - } - - //else - // WriteCode("mov r0.a, v0.a\n"); - //*/ - //Sleep(3000); - } -} - -inline void GetRegister(WORD wRegister, char *szRegister, BOOL bUniqueC0, BOOL bUniqueC1, int iCStage, int *iPSC0, int *iPSC1) -{ - // Determine register - switch(wRegister) - { - case 0x00: - printf("PS_REGISTER_ZERO"); - break; - case 0x01: // read - printf("PS_REGISTER_C0"); - if(bUniqueC0) - sprintf(szRegister, "c%d", iPSC0[iCStage]); - else - strcpy(szRegister, "c0"); - break; - case 0x02: // read - printf("PS_REGISTER_C1"); - if(bUniqueC0) - sprintf(szRegister, "c%d", iPSC1[iCStage]); - else - strcpy(szRegister, "c1"); - break; - case 0x03: // read - { - printf("PS_REGISTER_FOG"); - - char szOneHalf[40] = "0.5\0"; - CorrectConstToReg(szOneHalf, iPSC0, iPSC1); - - strcpy(szRegister, &szOneHalf[4]); // Unsupported - break; - } - case 0x04: // read/(write ???) - printf("PS_REGISTER_V0"); - strcpy(szRegister, "v0"); - break; - case 0x05: // read/(write ???) - printf("PS_REGISTER_V1"); - strcpy(szRegister, "v1"); - break; - case 0x08: // read/write - printf("PS_REGISTER_T0"); - strcpy(szRegister, "t0"); - //strcpy(szRegister, "r2"); - break; - case 0x09: // read/write - printf("PS_REGISTER_T1"); - strcpy(szRegister, "t1"); - //strcpy(szRegister, "r3"); - break; - case 0x0A: // read/write - printf("PS_REGISTER_T2"); - strcpy(szRegister, "t2"); - //strcpy(szRegister, "r4"); - break; - case 0x0B: // read/write - printf("PS_REGISTER_T3"); - strcpy(szRegister, "t3"); - //strcpy(szRegister, "r5"); - break; - case 0x0C: // read/write - printf("PS_REGISTER_R0"); - strcpy(szRegister, "r0"); - break; - case 0x0D: // read/write - printf("PS_REGISTER_R1"); - strcpy(szRegister, "r1"); - break; - case 0x0E: // read - printf("PS_REGISTER_V1R0_SUM"); - - bV1R0Reg = TRUE; - strcpy(szRegister, "r0"); //"V1R0");//(v1+r0)"); - break; - case 0x0F: - printf("PS_REGISTER_EF_PROD"); - - // we save it in r0 - bEFProduct = TRUE; - strcpy(szRegister, "r0");/* e * f --> combiner input */ - break; - default: - printf("/*Unknown register %d*/", wRegister); - break; - } -} - -inline void GetInputMapping(WORD wInputMapping, char *szInputMapping, char *szInputMappingAfter, char *szConst) -{ - strcpy(szConst, "0"); - switch(wInputMapping) - { - case 0x00: // max(0,x) [ok for final combiner] - printf("PS_INPUTMAPPING_UNSIGNED_IDENTITY"); - break; - case 0x20: // 1 - max(0,x) [ok for final combiner] - printf("PS_INPUTMAPPING_UNSIGNED_INVERT"); - strcpy(szInputMapping, "1-"); - strcpy(szConst, "1"); - break; - case 0x40: // 2*max(0,x) - 1 [invalid for final combiner] - printf("PS_INPUTMAPPING_EXPAND_NORMAL"); - strcpy(szInputMappingAfter, "_bx2"); // right??? - strcpy(szConst, "-1"); - break; - case 0x60: // 1 - 2*max(0,x) [invalid for final combiner] - printf("PS_INPUTMAPPING_EXPAND_NEGATE"); - - strcpy(szInputMapping, "-"); - strcpy(szInputMappingAfter, "_bx2"); - strcpy(szConst, "1"); - break; - case 0x80: // max(0,x) - 1/2 [invalid for final combiner] - printf("PS_INPUTMAPPING_HALFBIAS_NORMAL"); - strcpy(szInputMappingAfter, "_bias"); - - strcpy(szConst, "-0.5"); - break; - case 0xA0: // 1/2 - max(0,x) [invalid for final combiner] - printf("PS_INPUTMAPPING_HALFBIAS_NEGATE"); - - strcpy(szConst, "0.5"); - - // Negate is run last if combined with bias - strcpy(szInputMapping, "-"); - strcpy(szInputMappingAfter, "_bias"); - break; - case 0xC0: // x [invalid for final combiner] - printf("PS_INPUTMAPPING_SIGNED_IDENTITY"); - break; - case 0xE0: // -x [invalid for final combiner] - printf("PS_INPUTMAPPING_SIGNED_NEGATE"); - strcpy(szInputMapping, "-"); - break; - default: - printf("/*Unknown input mapping %d!*/", wInputMapping); - break; - } -} - -inline void GetChannel(WORD wInputChannel, char *szInput, BOOL bAlpha, BOOL bGlobalRGBA) -{ - switch(wInputChannel) - { - case 0x00: - if(bAlpha) { - printf("PS_CHANNEL_BLUE"); - strcat(szInput, ".b"); - } else { - printf("PS_CHANNEL_RGB"); - - //if (!bGlobalRGBA) - // strcat(szInput, ".rgb"); - } - break; - case 0x10: - printf("PS_CHANNEL_ALPHA"); - - // TODO: check this || !bAlpha, it should mean that alpha channel - // is detected in a RGB register, then it must be set also - // if both commands the same are (in that case it has to be RGB!) - if (!bGlobalRGBA || !bAlpha) - strcat(szInput, ".a"); - break; - default: - printf("/*Unknown channel %d!*/", wInputChannel); - break; - } -} - -inline void GetOutputFlags -( - WORD wOutputFlags, - char *szInstMod, - char *szABOp, - char *szCDOp, - char *szABCDOp, - - BOOL *bAB_BA, - BOOL *bCD_BA, - - BOOL *bShl1Bias, - BOOL *bBias -) -{ - // Output mapping - switch (wOutputFlags & 0x38) { - case PS_COMBINEROUTPUT_BIAS: - { - printf("PS_COMBINEROUTPUT_BIAS"); // y = x - 0.5 - //strcpy(szInstMod, "_bias"); - - // Only over this: - // mov y, y_bias - (*bBias)=TRUE; - break; - } - case PS_COMBINEROUTPUT_SHIFTLEFT_1: // 0x10L - { - printf("PS_COMBINEROUTPUT_SHIFTLEFT_1"); // y = x*2 - strcpy(szInstMod, "_x2"); - break; - } - case PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS: // 0x18L - { - LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS"); - printf("PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS"); // y = (x - 0.5)*2 - - //strcpy(szInstMod, "_x2"); - // what is missing is a subtraction of 1 - // --> 2 * (x - 0.5) = 2x - 1 - - // But this won't work because we would have to do 2 movs - // to subtract 1 - // Let's do this: mov_x2 y, y_bias - (*bShl1Bias)=TRUE; - break; - } - case PS_COMBINEROUTPUT_SHIFTLEFT_2: // 0x20L - { - LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTLEFT_2"); - printf("PS_COMBINEROUTPUT_SHIFTLEFT_2"); // y = x*4 - strcpy(szInstMod, "_x4"); - break; - } - // case PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS: // 0x28L, // y = (x - 0.5)*4 - case PS_COMBINEROUTPUT_SHIFTRIGHT_1: // 0x30L - { - LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTRIGHT_1"); - printf("PS_COMBINEROUTPUT_SHIFTRIGHT_1"); // y = x/2 - strcpy(szInstMod, "_d2"); - break; - } - // case PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS: // 0x38L, // y = (x - 0.5)/2 - default: - printf("PS_COMBINEROUTPUT_IDENTITY"); - } - - printf(" | "); - - // MUX operation - if(wOutputFlags & 0x04) { - printf("PS_COMBINEROUTPUT_AB_CD_MUX"); - strcpy(szABCDOp, "cnd"); - - if((!bR0Written) || (!bR0AWritten)) - bR0WAccess=TRUE; - } - else - { - printf("PS_COMBINEROUTPUT_AB_CD_SUM"); // 3rd output is AB+CD - strcpy(szABCDOp, "add"); - } - - printf(" | "); - - // Function for ab side - if(wOutputFlags & 0x02) - { - printf("PS_COMBINEROUTPUT_AB_DOT_PRODUCT"); // RGB only - strcpy(szABOp, "dp3"); - } else { - printf("PS_COMBINEROUTPUT_AB_MULTIPLY"); - strcpy(szABOp, "mul"); - } - - printf(" | "); - - // Functiomn for cd side - if(wOutputFlags & 0x01) - { - printf("!!!PS_COMBINEROUTPUT_CD_DOT_PRODUCT!!!"); // RGB only - strcpy(szCDOp, "dp3"); - } else { - printf("PS_COMBINEROUTPUT_CD_MULTIPLY"); - strcpy(szCDOp, "mul"); - } - - // Blue to alpha for ab side - if(wOutputFlags & 0x80) { - printf(" | PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA"); // RGB only - (*bAB_BA)=TRUE; - } else (*bAB_BA)=FALSE; - - // Blue to alpha for cd side - if(wOutputFlags & 0x40) { - printf(" | PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA"); // RGB only - (*bCD_BA)=TRUE; - } else (*bCD_BA)=FALSE; -} - -enum OpType -{ - OPTYPE_NOP = -1, - OPTYPE_MOV = 0, - OPTYPE_ADD, - OPTYPE_MUL, - OPTYPE_DP3, - OPTYPE_CND, -}; - -inline BOOL OptimizeOperation -( - char *szOp, - char *szOp1, - - char *szOp2, - char *szMod, - - char *szInputAB1, - char *szInputAB2, - - char *szInputCD1, - char *szInputCD2, - - char *szConstRegAB1, - char *szConstRegAB2, - char *szConstRegCD1, - char *szConstRegCD2, - - char *szOutAB, - char *szOutCD, - char *szABCDOutput, - - char *szCommand) -{ - printf("----------\nszOp: |%s|\nszOp1: |%s|\nszOp2: |%s|\nszMod: |%s|\n" - "szInputAB1: |%s|\nszInputAB2: |%s|\nszInputCD1: |%s|\nszInputCD2: |%s|\n" - "szOutAB: |%s|\nszOutCD: |%s|\nszABCDOutput: |%s|\n", - szOp, szOp1, szOp2, szMod, szInputAB1, szInputAB2, szInputCD1, szInputCD2, - szOutAB, szOutCD, szABCDOutput); - - char szABCDInput[2][10]; - szABCDInput[0][0]=0x00; - szABCDInput[1][0]=0x00; - - szCommand[0]=0x00; - - char *szOps[3]; - szOps[0] = szOp; - szOps[1] = szOp1; - szOps[2] = szOp2; - - char *szInputs[4]; - szInputs[0] = szInputAB1; - szInputs[1] = szInputAB2; - szInputs[2] = szInputCD1; - szInputs[3] = szInputCD2; - - char *szRealInputs[4]; - szRealInputs[0] = szConstRegAB1; - szRealInputs[1] = szConstRegAB2; - szRealInputs[2] = szConstRegCD1; - szRealInputs[3] = szConstRegCD2; - -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - char *szOutputs[3]; - szOutputs[0] = szOutAB; - szOutputs[1] = szOutCD; - szOutputs[2] = szABCDOutput; -#endif - - // TODO: check mov: other operations like lrp - // are ignored because of a shitty mul with 1 - BOOL bMov[3]={0, 0, 0}; - - int i=0; - for(i=0; i<2; i++) - { - //printf("szOps[i]: %s\n", szOps[i]); - //printf("szInputs[i*2+1]: %s\n", szInputs[i*2+1]); - if(strcmp(szOps[i], "mul")==0) - { - // If it is a mul, it can also be only a mov - if(strcmp(szInputs[i*2], "1")==0) { - //strcpy(szABCDInput[i], szInputs[i*2+1]); -#ifndef REVEL8N_PIXEL_SHADER_CHANGES - strcpy(szABCDInput[i], szRealInputs[i*2+1]); -#endif - - strcpy(szOps[i], "mov"); - - strcpy(szInputs[i*2], szInputs[i*2+1]); - strcpy(szRealInputs[i*2], szRealInputs[i*2+1]); - - strcpy(szInputs[i*2+1], ""); - strcpy(szRealInputs[i*2+1], ""); - - bMov[i]=TRUE; - - } else if(strcmp(szInputs[i*2+1], "1")==0) { - //strcpy(szABCDInput[i], szInputs[i*2]); -#ifndef REVEL8N_PIXEL_SHADER_CHANGES - strcpy(szABCDInput[i], szRealInputs[i*2]); -#endif - - strcpy(szOps[i], "mov"); - - strcpy(szInputs[i*2+1], ""); - strcpy(szRealInputs[i*2+1], ""); - - bMov[i]=TRUE; - } - } - } - -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - OpType eOpTypes[3] = {OPTYPE_NOP, OPTYPE_NOP, OPTYPE_NOP}; - for (i = 0; i < 3; ++i) - { - if (strcmp(szOps[i], "mov") == 0) - eOpTypes[i] = OPTYPE_MOV; - else if (strcmp(szOps[i], "add") == 0) - eOpTypes[i] = OPTYPE_ADD; - else if (strcmp(szOps[i], "mul") == 0) - eOpTypes[i] = OPTYPE_MUL; - else if (strcmp(szOps[i], "dp3") == 0) - eOpTypes[i] = OPTYPE_DP3; - else if (strcmp(szOps[i], "cnd") == 0) - eOpTypes[i] = OPTYPE_CND; - else - eOpTypes[i] = OPTYPE_NOP; - } - - bool bHandled = false; - int iOffset = 0; - int iOpCount = 0; - if (szOps[2][0] && szOutputs[2][0] && szOutputs[2][0] != 'v') - { - if (!szOutputs[0][0] && - !szOutputs[1][0]) - { - if (szMod[0]) - { - EmuLog(LOG_LEVEL::WARNING, "Destination modifier present!"); - } - switch (eOpTypes[2]) - { - case OPTYPE_ADD: - { - if (eOpTypes[0] == OPTYPE_MOV && - eOpTypes[1] == OPTYPE_MOV) - { - iOffset += sprintf(szCommand + iOffset, "add%s %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[0], szRealInputs[2]); - ++iOpCount; - bHandled = true; - } - else if (eOpTypes[0] == OPTYPE_MOV && - eOpTypes[1] == OPTYPE_MUL) - { - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[2], szRealInputs[3], szRealInputs[0]); - bHandled = true; - ++iOpCount; - } - else if (eOpTypes[0] == OPTYPE_MUL && - eOpTypes[1] == OPTYPE_MOV) - { - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[0], szRealInputs[1], szRealInputs[2]); - bHandled = true; - ++iOpCount; - } - else if (eOpTypes[0] == OPTYPE_MUL && - eOpTypes[1] == OPTYPE_MUL) - { - // nice, mul, mul, add can be converted to lrp - // lrp r0, t0, t1, c2 - // --> r0 = t0 * t1 + (1-t0) * c2 - // or r0 = c2 + t0 * (t1 - c2), but that would mean we have to mul in the ABCD op - // and that is not possible - - for(i=0; i<2; i++) - { - // To match the first option, the first input of the AB/CD op must inverted - BOOL bInvert[2] = {0, 0}; - if((szRealInputs[2*i][0] == '1') && (szRealInputs[2*i][1] == '-')) - //if((szInputs[2*i][0] == '1') && (szInputs[2*i][1] == '-')) - bInvert[0]=TRUE; - - if((szRealInputs[2*i+1][0] == '1') && (szRealInputs[2*i+1][1] == '-')) - //if((szInputs[2*i+1][0] == '1') && (szInputs[2*i+1][1] == '-')) - bInvert[1]=TRUE; - - //printf("szInputs[2*i]: %s\nszInputs[2*i+1]: %s\n", szInputs[2*i], szInputs[2*i+1]); - //printf("bInvert[0]: %d\nbInvert[1]: %d\n", bInvert[0], bInvert[1]); - - if((bInvert[0] || bInvert[1]) && (!(bInvert[0] && bInvert[1]))) - { - char szParam[3][10] = {0}; - char szRealParam0[10] = {0}; - if(bInvert[0]) - { - // copy over the not inverted param - strcpy(szParam[i+1], /*szInputs*/szRealInputs[2*i+1]); - - // and the inverted - strcpy(szParam[0], &szInputs[2*i][2]); - strcpy(szRealParam0, &szRealInputs[2*i][2]); - } - else if(bInvert[1]) - { - // copy over the not inverted param - strcpy(szParam[i+1], /*szInputs*/szRealInputs[2*i]); - - // and the inverted - strcpy(szParam[0], &szInputs[2*i+1][2]); - strcpy(szRealParam0, &szRealInputs[2*i+1][2]); - } - int iOtherOp = i == 0 ? 1 : 0; - - bHandled = true; - if (strcmp(szRealInputs[2*iOtherOp], szRealParam0/*szParam[0]*/)==0) - strcpy(szParam[iOtherOp+1], /*szInputs*/szRealInputs[2*iOtherOp+1]); - else if (strcmp(szRealInputs[2*iOtherOp+1], szRealParam0/*szParam[0]*/)==0) - strcpy(szParam[iOtherOp+1], /*szInputs*/szRealInputs[2*iOtherOp]); - else - bHandled = false; - if (bHandled) - { - // ok, we have it - iOffset += sprintf(szCommand, "lrp%s %s, %s, %s, %s\n", - szMod, szABCDOutput, szRealParam0/*szParam[0]*/, szParam[1], szParam[2]); - ++iOpCount; - break; - } - } - } - - if (!bHandled) - { - iOffset += sprintf(szCommand + iOffset, "mul r1, %s, %s\n", - szRealInputs[0], szRealInputs[1]); - ++iOpCount; - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, r1\n", - szMod, szOutputs[2], szRealInputs[2], szRealInputs[3]); - ++iOpCount; - - bHandled = true; - } - } - } - break; - case OPTYPE_CND: - { - if (eOpTypes[0] == OPTYPE_MOV && - eOpTypes[1] == OPTYPE_MOV) - { - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, %s, %s\n", - szMod, szOutputs[2], szRealInputs[2], szRealInputs[0]); - ++iOpCount; - bHandled = true; - } - else if (eOpTypes[0] == OPTYPE_MUL && - eOpTypes[1] == OPTYPE_MUL) - { - if (szOutputs[2][0] != 'r') - { - EmuLog(LOG_LEVEL::WARNING, "Destination not temporary register!"); - } - // ab input - iOffset += sprintf(szCommand + iOffset, "mul%s r1, %s, %s\n", - szMod, szRealInputs[0], szRealInputs[1]); - ++iOpCount; - // cd input - iOffset += sprintf(szCommand + iOffset, "mul%s r0, %s, %s\n", - szMod, szRealInputs[2], szRealInputs[3]); - ++iOpCount; - // abcd output - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, r0, r1\n", - szMod, szOutputs[2]); - ++iOpCount; - bHandled = true; - } - } - break; - } - if (!bHandled && strcmp(szOps[2], "add") == 0) - { - if ((strcmp(szOps[0], "mov")==0)) - { - if ((strcmp(szOps[1], "mul")==0)) - { - char szParam[10]="\0"; - - if(strcmp(szInputCD1, "-1")==0) - strcpy(szParam, szInputCD2); - else if(strcmp(szInputCD2, "-1")==0) - strcpy(szParam, szInputCD1); - - if(szParam[0] && szConstRegAB1[0] && szABCDOutput[0]) - { - iOffset += sprintf(szCommand, "sub%s %s, %s, %s\n", - szMod, szABCDOutput, szConstRegAB1, szParam); - bHandled = true; - ++iOpCount; - } -// else -// { -// iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", -// szMod, szOutputs[2], szRealInputs[2], szRealInputs[3], szRealInputs[0]); -// bHandled = true; -// ++iOpCount; -// } - } - } -// else if ((strcmp(szOps[0], "mul")==0)) -// { -// if ((strcmp(szOps[1], "mov")==0)) -// { -// iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", -// szMod, szOutputs[2], szRealInputs[0], szRealInputs[1], szRealInputs[2]); -// bHandled = true; -// ++iOpCount; -// } -// else if ((strcmp(szOps[1], "mul")==0)) -// { -// // nice, mul, mul, add can be converted to lrp -// // lrp r0, t0, t1, c2 -// // --> r0 = t0 * t1 + (1-t0) * c2 -// // or r0 = c2 + t0 * (t1 - c2), but that would mean we have to mul in the ABCD op -// // and that is not possible -// -// for(i=0; i<2; i++) -// { -// // To match the first option, the first input of the AB/CD op must inverted -// BOOL bInvert[2] = {0, 0}; -// if((szRealInputs[2*i][0] == '1') && (szRealInputs[2*i][1] == '-')) -// //if((szInputs[2*i][0] == '1') && (szInputs[2*i][1] == '-')) -// bInvert[0]=TRUE; -// -// if((szRealInputs[2*i+1][0] == '1') && (szRealInputs[2*i+1][1] == '-')) -// //if((szInputs[2*i+1][0] == '1') && (szInputs[2*i+1][1] == '-')) -// bInvert[1]=TRUE; -// -// //printf("szInputs[2*i]: %s\nszInputs[2*i+1]: %s\n", szInputs[2*i], szInputs[2*i+1]); -// //printf("bInvert[0]: %d\nbInvert[1]: %d\n", bInvert[0], bInvert[1]); -// -// if((bInvert[0] || bInvert[1]) && (!(bInvert[0] && bInvert[1]))) -// { -// char szParam[3][10]; -// char szRealParam0[10]; -// if(bInvert[0]) -// { -// // copy over the not inverted param -// strcpy(szParam[2], /*szInputs*/szRealInputs[2*i+1]); -// -// // and the inverted -// strcpy(szParam[0], &szInputs[2*i][2]); -// strcpy(szRealParam0, &szRealInputs[2*i][2]); -// } -// else if(bInvert[1]) -// { -// // copy over the not inverted param -// strcpy(szParam[2], /*szInputs*/szRealInputs[2*i]); -// -// // and the inverted -// strcpy(szParam[0], &szInputs[2*i+1][2]); -// strcpy(szRealParam0, &szRealInputs[2*i+1][2]); -// } -// int iOtherOp = i == 0 ? 1 : 0; -// -// bHandled = true; -// if (strcmp(szRealInputs[2*iOtherOp], szRealParam0/*szParam[0]*/)==0) -// strcpy(szParam[1], /*szInputs*/szRealInputs[2*iOtherOp+1]); -// else if (strcmp(szRealInputs[2*iOtherOp+1], szRealParam0/*szParam[0]*/)==0) -// strcpy(szParam[1], /*szInputs*/szRealInputs[2*iOtherOp]); -// else -// bHandled = false; -// if (bHandled) -// { -// // ok, we have it -// iOffset += sprintf(szCommand, "lrp%s %s, %s, %s, %s\n", -// szMod, szABCDOutput, szRealParam0/*szParam[0]*/, szParam[1], szParam[2]); -// ++iOpCount; -// break; -// } -// } -// } -// -// if (!bHandled) -// { -// iOffset += sprintf(szCommand + iOffset, "mul r1, %s, %s\n", -// szRealInputs[0], szRealInputs[1]); -// ++iOpCount; -// iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, r1\n", -// szMod, szOutputs[2], szRealInputs[2], szRealInputs[3]); -// ++iOpCount; -// -// bHandled = true; -// } -// } -// } - } - } - } - - if (!bHandled) - { - for (i = 0; i < 2; ++i) - { - if (szOps[i][0] && szOutputs[i][0] && szOutputs[i][0] != 'v') - { - ++iOpCount; - // copy output value to final input - strcpy(szABCDInput[i], szOutputs[i]); - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s %s, %s\n", szOps[i], szMod, szOutputs[i], szRealInputs[i * 2 + 0]); - - // if there are more parameters... - if (szRealInputs[i * 2 + 1][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[i * 2 + 1]); - } - bHandled = true; - } - } - -// if (szOutputs[2][0]) -// { -// if(!szOutputs[1][0]) -// strcpy(szOutputs[1], "r0"); -// if(!szOutputs[0][0]) -// strcpy(szOutputs[0], "r1"); -// } - - if (szOps[2][0] && szOutputs[2][0] && szOutputs[2][0] != 'v') - { - switch (eOpTypes[2]) - { - case OPTYPE_ADD: - { - if (szABCDInput[0][0] && - szABCDInput[1][0]) - { - iOffset += sprintf(szCommand + iOffset, "add%s %s, %s, %s\n", - szMod, szOutputs[2], szABCDInput[0], szABCDInput[1]); - ++iOpCount; - bHandled = true; - } - else if (szABCDInput[0][0] && - !szABCDInput[1][0]) - { - switch (eOpTypes[1]) - { - case OPTYPE_MUL: - { - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[2], szRealInputs[3], szABCDInput[0]); - ++iOpCount; - bHandled = true; - } - break; - case OPTYPE_DP3: - { - { - ++iOpCount; - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s r1, %s\n", szOps[1], szMod, szRealInputs[2]); - - // if there are more parameters... - if (szRealInputs[3][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[3]); - } - } - { - iOffset += sprintf(szCommand + iOffset, "add%s %s, %s, r1\n", - szMod, szOutputs[2], szABCDInput[0]); - ++iOpCount; - bHandled = true; - } - } - break; - default: - break; - } - } - else if (!szABCDInput[0][0] && - szABCDInput[1][0]) - { - switch (eOpTypes[0]) - { - case OPTYPE_MUL: - { - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[0], szRealInputs[1], szABCDInput[1]); - ++iOpCount; - bHandled = true; - } - break; - case OPTYPE_DP3: - { - { - ++iOpCount; - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s r1, %s\n", szOps[0], szMod, szRealInputs[0]); - - // if there are more parameters... - if (szRealInputs[1][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[1]); - } - } - { - iOffset += sprintf(szCommand + iOffset, "add%s %s, r1, %s\n", - szMod, szOutputs[2], szABCDInput[1]); - ++iOpCount; - bHandled = true; - } - } - break; - default: - break; - } - } - } - break; - case OPTYPE_CND: - { - if (szABCDInput[0][0] && - szABCDInput[1][0]) - { - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, %s, %s\n", - szMod, szOutputs[2], szABCDInput[1], szABCDInput[0]); - ++iOpCount; - bHandled = true; - } - else if (szABCDInput[0][0] && - !szABCDInput[1][0]) - { - { - ++iOpCount; - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s r1, %s\n", szOps[1], szMod, szRealInputs[2]); - - // if there are more parameters... - if (szRealInputs[3][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[3]); - } - } - { - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, r1, %s\n", - szMod, szOutputs[2], szABCDInput[0]); - ++iOpCount; - bHandled = true; - } - } - else if (!szABCDInput[0][0] && - szABCDInput[1][0]) - { - { - ++iOpCount; - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s r1, %s\n", szOps[0], szMod, szRealInputs[0]); - - // if there are more parameters... - if (szRealInputs[1][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[1]); - } - } - { - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, %s, r1\n", - szMod, szOutputs[2], szABCDInput[1]); - ++iOpCount; - bHandled = true; - } - } - } - break; - } - if (!bHandled) - { - EmuLog(LOG_LEVEL::WARNING, "Unhandled pixel shader instruction!"); - } -// if (strcmp(szOps[2], "add") == 0) -// { -// if (szABCDInput[0][0] && -// szABCDInput[1][0]) -// { -// iOffset += sprintf(szCommand + iOffset, "add%s %s, %s, %s\n", -// szMod, szOutputs[2], szABCDInput[1], szABCDInput[0]); -// ++iOpCount; -// bHandled = true; -// } -// else -// { -// EmuLog(LOG_LEVEL::WARNING, "Unhandled pixel shader instruction!"); -// } -// } -// else if (strcmp(szOps[2], "cnd") == 0) -// { -// if (szABCDInput[0][0] && -// szABCDInput[1][0]) -// { -// iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, %s, %s\n", -// szMod, szOutputs[2], szABCDInput[1], szABCDInput[0]); -// ++iOpCount; -// bHandled = true; -// } -// else -// { -// EmuLog(LOG_LEVEL::WARNING, "Unhandled pixel shader instruction!"); -// } -// } -// else -// { -// EmuLog(LOG_LEVEL::WARNING, "Unhandled pixel shader instruction!"); -// } - } - } - - if(szCommand[0]) - printf("new command:\n%s\n", szCommand); - return (bHandled && (iOpCount == 1)) ? (TRUE) : (FALSE); -#endif - - if( - (strcmp(szOp, "mul")==0) && - (strcmp(szOp1, "mov")==0) && //bMov[1] && - (strcmp(szOp2, "add")==0) && - szABCDOutput[0]) - { - sprintf(szCommand, "mad%s %s, %s, %s, %s\n", - szMod, szABCDOutput, - /*szInput*/szConstRegAB1, - /*szInput*/szConstRegAB2, - /*szInput*/szConstRegCD1 /*because it's a mov now*/); - } - else if( - (strcmp(szOp, "mul")==0) && - (strcmp(szOp1, "mul")==0) && - (strcmp(szOp2, "add")==0) && - szABCDOutput[0]) // TODO: check that strange lrp/ABCDOutput[0]=0 case - { - // nice, mul, mul, add can be converted to lrp - // lrp r0, t0, t1, c2 - // --> r0 = t0 * t1 + (1-t0) * c2 - // or r0 = c2 + t0 * (t1 - c2), but that would mean we have to mul in the ABCD op - // and that is not possible - - for(i=0; i<2; i++) - { - // To match the first option, the first input of the AB/CD op must inverted - BOOL bInvert[2] = {0, 0}; - if((szInputs[2*i][0] == '1') && (szInputs[2*i][1] == '-')) - bInvert[0]=TRUE; - - if((szInputs[2*i+1][0] == '1') && (szInputs[2*i+1][1] == '-')) - bInvert[1]=TRUE; - - //printf("szInputs[2*i]: %s\nszInputs[2*i+1]: %s\n", szInputs[2*i], szInputs[2*i+1]); - //printf("bInvert[0]: %d\nbInvert[1]: %d\n", bInvert[0], bInvert[1]); - - if((bInvert[0] || bInvert[1]) && (!(bInvert[0] && bInvert[1]))) - { - char szParam[3][10]; - char szRealParam0[10]; - if(bInvert[0]) - { - // copy over the not inverted param - strcpy(szParam[2], /*szInputs*/szRealInputs[2*i+1]); - - // and the inverted - strcpy(szParam[0], &szInputs[2*i][2]); - strcpy(szRealParam0, &szRealInputs[2*i][2]); - } - else if(bInvert[1]) - { - // copy over the not inverted param - strcpy(szParam[2], /*szInputs*/szRealInputs[2*i]); - - // and the inverted - strcpy(szParam[0], &szInputs[2*i+1][2]); - strcpy(szRealParam0, &szRealInputs[2*i+1][2]); - } - int iOtherOp = i == 0 ? 1 : 0; - - if(strcmp(szInputs[2*iOtherOp], szParam[0])==0) - strcpy(szParam[1], /*szInputs*/szRealInputs[2*iOtherOp+1]); - else - strcpy(szParam[1], /*szInputs*/szRealInputs[2*iOtherOp]); - // ok, we have it - sprintf(szCommand, "lrp%s %s, %s, %s, %s\n", - szMod, szABCDOutput, szRealParam0/*szParam[0]*/, szParam[1], szParam[2]); - - break; - } - } - } else if(strcmp(szOp2, "cnd")==0) { -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - iOffset = 0; - i = 0; - for (i = 0; i < 2; ++i) - { - if (strcmp(szOps[i], "mul")==0) - { - strcpy(szABCDInput[i], szOutputs[i]); - iOffset += sprintf(szCommand + iOffset, "mul %s, %s, %s\n", szOutputs[i], szRealInputs[i * 2 + 0], szRealInputs[i * 2 + 1]); - } - } - sprintf(szCommand + iOffset, "cnd%s %s, %s, %s, %s\n", - szMod, szABCDOutput, "r0.a", szABCDInput[1], szABCDInput[0]); -#else - sprintf(szCommand, "cnd%s %s, %s, %s, %s\n", - szMod, szABCDOutput, "r0.a", szABCDInput[1], szABCDInput[0]); -#endif - - bMov[1]=0; - bMov[0]=0; - } else if( - (strcmp(szOp, "mov")==0) && - (strcmp(szOp1, "mul")==0) && - (strcmp(szOp2, "add")==0)) - { - char szParam[10]="\0"; - - if(strcmp(szInputCD1, "-1")==0) - strcpy(szParam, szInputCD2); - else if(strcmp(szInputCD2, "-1")==0) - strcpy(szParam, szInputCD1); - - if(szParam[0] && szConstRegAB1[0] && szABCDOutput[0]) - { - sprintf(szCommand, "sub%s %s, %s, %s\n", - szMod, szABCDOutput, szConstRegAB1, szParam); - } - - } -//do_operation_with_new_input: - - if(bMov[0] && bMov[1] && szABCDOutput[0]) { - sprintf(szCommand, "%s%s %s, %s, %s\n", szOp2, szMod, szABCDOutput, szABCDInput[0], szABCDInput[1]); - } - - if(szCommand[0]) - printf("new command: %s", szCommand); - return TRUE; -} - -float fConstants[20] = {0.0f}; -int iConstants[20] = {0}; -int iConstCount=0; - -inline void ClearConstRegVars() -{ - iConstCount=0; - memset(fConstants, 0x00, 20*sizeof(float)); - memset(iConstants, 0x00, 20*sizeof(int)); -} - -inline void CorrectConstToReg(char *szConst, int *pPSC0, int *pPSC1) -{ - printf("Looking for %s\n", szConst); - float fConst = (float)atof(szConst); - - // check whether we already saved it - int i=0; - for(i=0; iPSAlphaInputs[0], pPSDef->PSAlphaInputs[1], pPSDef->PSAlphaInputs[2], pPSDef->PSAlphaInputs[3], - pPSDef->PSAlphaInputs[4], pPSDef->PSAlphaInputs[5], pPSDef->PSAlphaInputs[6], pPSDef->PSAlphaInputs[7], - pPSDef->PSFinalCombinerInputsABCD, - pPSDef->PSFinalCombinerInputsEFG, - pPSDef->PSConstant0[0], pPSDef->PSConstant0[1], pPSDef->PSConstant0[2], pPSDef->PSConstant0[3], - pPSDef->PSConstant0[4], pPSDef->PSConstant0[5], pPSDef->PSConstant0[6], pPSDef->PSConstant0[7], - pPSDef->PSConstant1[0], pPSDef->PSConstant1[1], pPSDef->PSConstant1[2], pPSDef->PSConstant1[3], - pPSDef->PSConstant1[4], pPSDef->PSConstant1[5], pPSDef->PSConstant1[6], pPSDef->PSConstant1[7], - pPSDef->PSAlphaOutputs[0], pPSDef->PSAlphaOutputs[1], pPSDef->PSAlphaOutputs[2], pPSDef->PSAlphaOutputs[3], - pPSDef->PSAlphaOutputs[4], pPSDef->PSAlphaOutputs[5], pPSDef->PSAlphaOutputs[6], pPSDef->PSAlphaOutputs[7], - pPSDef->PSRGBInputs[0], pPSDef->PSRGBInputs[1], pPSDef->PSRGBInputs[2], pPSDef->PSRGBInputs[3], - pPSDef->PSRGBInputs[4], pPSDef->PSRGBInputs[5], pPSDef->PSRGBInputs[6], pPSDef->PSRGBInputs[7], - pPSDef->PSCompareMode, - pPSDef->PSFinalCombinerConstant0, - pPSDef->PSFinalCombinerConstant1, - pPSDef->PSRGBOutputs[0], pPSDef->PSRGBOutputs[1], pPSDef->PSRGBOutputs[2], pPSDef->PSRGBOutputs[3], - pPSDef->PSRGBOutputs[4], pPSDef->PSRGBOutputs[5], pPSDef->PSRGBOutputs[6], pPSDef->PSRGBOutputs[7], - pPSDef->PSCombinerCount, - XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES), /* pPSDef->PSTextureModes is stored in a different place than pPSDef*/ - pPSDef->PSDotMapping, - pPSDef->PSInputTexture, - pPSDef->PSC0Mapping, - pPSDef->PSC1Mapping, - pPSDef->PSFinalCombinerConstants ); - if (pszCode) - { - fprintf(out, "\n\n%s\n", pszCode); - } - - fclose( out ); - } -} - -// print relevant contents to the debug console -void PrintPixelShaderDefContents(xbox::X_D3DPIXELSHADERDEF* pPSDef ) -{ - // Show the contents to the user - if( pPSDef ) - { - DbgPshPrintf( "\n-----PixelShader Def Contents-----\n" ); - - if(XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES)) - { - DWORD dwPSTexMode0 = (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> 0 ) & 0x1F; - DWORD dwPSTexMode1 = (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> 5 ) & 0x1F; - DWORD dwPSTexMode2 = (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> 10 ) & 0x1F; - DWORD dwPSTexMode3 = (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> 15 ) & 0x1F; - - DbgPshPrintf( "PSTextureModes ->\n" ); - DbgPshPrintf( "Stage 0: %s\n", PS_TextureModesStr[dwPSTexMode0] ); - DbgPshPrintf( "Stage 1: %s\n", PS_TextureModesStr[dwPSTexMode1] ); - DbgPshPrintf( "Stage 2: %s\n", PS_TextureModesStr[dwPSTexMode2] ); - DbgPshPrintf( "Stage 3: %s\n", PS_TextureModesStr[dwPSTexMode3] ); - } - - if( pPSDef->PSDotMapping ) - { - DWORD dwPSDMStage1 = ( pPSDef->PSDotMapping >> 0 ) & 0x7; - DWORD dwPSDMStage2 = ( pPSDef->PSDotMapping >> 4 ) & 0x7; - DWORD dwPSDMStage3 = ( pPSDef->PSDotMapping >> 8 ) & 0x7; - - DbgPshPrintf( "PSDotMapping ->\n" ); - DbgPshPrintf( "Stage 1: %s\n", PS_DotMappingStr[dwPSDMStage1] ); - DbgPshPrintf( "Stage 2: %s\n", PS_DotMappingStr[dwPSDMStage2] ); - DbgPshPrintf( "Stage 3: %s\n", PS_DotMappingStr[dwPSDMStage3] ); - } - - if( pPSDef->PSCompareMode ) - { - DWORD dwPSCMStage0 = ( pPSDef->PSCompareMode >> 0 ) & 0xF; - DWORD dwPSCMStage1 = ( pPSDef->PSCompareMode >> 4 ) & 0xF; - DWORD dwPSCMStage2 = ( pPSDef->PSCompareMode >> 8 ) & 0xF; - DWORD dwPSCMStage3 = ( pPSDef->PSCompareMode >> 12 ) & 0xF; - - DbgPshPrintf( "PSCompareMode ->\n" ); - DbgPshPrintf( "Stage 0: %s\n", PS_TextureModesStr[dwPSCMStage0 == 0 ? 0 : 1] ); - DbgPshPrintf( "Stage 1: %s\n", PS_TextureModesStr[dwPSCMStage1 == 0 ? 2 : 3] ); - DbgPshPrintf( "Stage 2: %s\n", PS_TextureModesStr[dwPSCMStage2 == 0 ? 4 : 5] ); - DbgPshPrintf( "Stage 3: %s\n", PS_TextureModesStr[dwPSCMStage3 == 0 ? 6 : 7] ); - } - - if( pPSDef->PSInputTexture ) - { - DWORD dwPSITStage2 = ( pPSDef->PSInputTexture >> 16 ) & 0x1; - DWORD dwPSITStage3 = ( pPSDef->PSInputTexture >> 20 ) & 0x3; - - DbgPshPrintf( "PSInputTexture ->\n" ); - DbgPshPrintf( "Stage 2: %s\n", PS_TextureModesStr[dwPSITStage2] ); - DbgPshPrintf( "Stage 3: %s\n", PS_TextureModesStr[dwPSITStage3] ); - } - - if( pPSDef->PSCombinerCount ) - { - DWORD dwPSCCNumCombiners = ( pPSDef->PSCombinerCount >> 0 ) & 0xF; - DWORD dwPSCCMux = ( pPSDef->PSCombinerCount >> 8 ) & 0x1; - DWORD dwPSCCC0 = ( pPSDef->PSCombinerCount >> 12 ) & 0x1; - DWORD dwPSCCC1 = ( pPSDef->PSCombinerCount >> 16 ) & 0x1; - - DbgPshPrintf( "PSCombinerCount ->\n" ); - DbgPshPrintf( "Combiners: %d\n", dwPSCCNumCombiners ); - DbgPshPrintf( "Mux: %s\n", PS_CombinerCountFlagsStr[dwPSCCMux] ); - DbgPshPrintf( "C0: %s\n", PS_CombinerCountFlagsStr[dwPSCCC0 == 0 ? 2 : 3] ); - DbgPshPrintf( "C1: %s\n", PS_CombinerCountFlagsStr[dwPSCCC1 == 0 ? 4 : 5] ); - } - - /*for( int i = 0; i > 7; i++ ) - { - if( pPSDef->PSRGBInputs[i] ) - {*/ - } + fColor[PSH_XBOX_CONSTANT_FRONTFACE_FACTOR].r = frontfaceFactor; + + // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) + // Read the color from the corresponding render state slot : + // Set all host constant values using a single call: + g_pD3DDevice->SetPixelShaderConstantF(0, reinterpret_cast(fColor), PSH_XBOX_CONSTANT_MAX); } diff --git a/src/core/hle/D3D8/XbPixelShader.h b/src/core/hle/D3D8/XbPixelShader.h index f5c0edc36..737ba9da1 100644 --- a/src/core/hle/D3D8/XbPixelShader.h +++ b/src/core/hle/D3D8/XbPixelShader.h @@ -29,10 +29,501 @@ #include "core\hle\D3D8\XbD3D8Types.h" -// dump pixel shader definition to file -void DumpPixelShaderDefToFile( xbox::X_D3DPIXELSHADERDEF* pPSDef, const char* pszCode ); -// print relevant contents to the debug console -void PrintPixelShaderDefContents(xbox::X_D3DPIXELSHADERDEF* pDSDef ); +/*---------------------------------------------------------------------------*/ +/* Texture configuration - The following members of the D3DPixelShaderDef */ +/* structure define the addressing modes of each of the four texture stages:*/ +/* PSTextureModes */ +/* PSDotMapping */ +/* PSInputTexture */ +/* PSCompareMode */ +/*---------------------------------------------------------------------------*/ + +// ========================================================================================================= +// PSTextureModes +// --------.--------.--------.---xxxxx stage 0 PS_TEXTUREMODES +// --------.--------.------xx.xxx----- stage 1 PS_TEXTUREMODES +// --------.--------.-xxxxx--.-------- stage 2 PS_TEXTUREMODES +// --------.----xxxx.x-------.-------- stage 3 PS_TEXTUREMODES + +#define PS_TEXTUREMODES(t0,t1,t2,t3) (((t3)<<15)|((t2)<<10)|((t1)<<5)|(t0)) + +/* +Texture modes: +NONE :stage inactive +PROJECT2D :argb = texture(s/q, t/q) +PROJECT3D :argb = texture(s/q, t/q, r/q) +CUBEMAP :argb = cubemap(s,t,r) +PASSTHRU :argb = s,t,r,q +CLIPPLANE :pixel not drawn if s,t,r, or q < 0. PSCompareMode affects comparison +BUMPENVMAP :argb=texture(s+mat00*src.r+mat01*src.g, + t+mat10*src.r+mat11*src.g) + mat00 set via D3DTSS_BUMPENVMAT00, etc. +BUMPENVMAP_LUM :argb=texture(s+mat00*src.r+mat01*src.g, + t+mat10*src.r+mat11*src.g); + rgb *= (lum_scale*src.b + lum_bias); (a is not affected) + lum_scale set by D3DTSS_BUMPENVLSCALE + lum_bias set by D3DTSS_BUMPENVLOFFSET + mat00 set via D3DTSS_BUMPENVMAT00, etc. +BRDF :argb = texture(eyeSigma, lightSigma, dPhi) + eyeSigma = Sigma of eye vector in spherical coordinates, read from stage-2 as (16 bit phi,sigma) + lightSigma = Sigma of light vector in spherical coordinates, read from stage-1 as (16 bit phi,sigma) + dPhi = Phi of eye - Phi of light +DOT_ST :argb = texture(, (s,t,r).(src.r,src.g,src.b)) +DOT_ZW :frag depth = (/((s,t,r).(src.r,src.g,src.b)) +DOT_RFLCT_DIFF :n = (,(s,t,r).(src.r,src.g,src.b),) + argb = cubemap(n) +DOT_RFLCT_SPEC :n = (,,(s,t,r).(src.r,src.g,src.b)) + r = 2*n*(n.e)/(n.n) - e where e is eye vector built from q texture coordinate of each stage + argb = cubemap(r) +DOT_STR_3D :argb=texture((,,(s,t,r).(src.r,src.g,src.b))) +DOT_STR_CUBE :argb=cubemap((,,(s,t,r).(src.r,src.g,src.b))) +DEPENDENT_AR :argb = texture(src.a, src.r) +DEPENDENT_GB :argb = texture(src.g, src.b) +DOTPRODUCT :argb = (s,t,r).(src.r,src.g,src.b) +DOT_RFLCT_SPEC_CONST :n = (,,(s,t,r).(src.r,src.g,src.b)) + r = 2*n*(n.e)/(n.n) - e where e is eye vector set via SetEyeVector() into c0 + argb = cubemap(r) +*/ + +enum PS_TEXTUREMODES +{ // valid in stage 0 1 2 3 Uses + PS_TEXTUREMODES_NONE= 0x00L, // * * * * + PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * * Sample + PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * * Sample + PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * * Sample + PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * * + PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * * PSCompareMode + PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * * Sample, PSInputTexture + PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * * Sample, PSInputTexture + PS_TEXTUREMODES_BRDF= 0x08L, // - - * * + PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * * Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * * PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * - Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - * Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - * Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - * Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * * Sample, PSInputTexture + PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * * Sample, PSInputTexture + PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * - PSInputTexture + PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - * Sample, PSInputTexture, PSDotMapping + // 0x13-0x1f reserved + + PS_TEXTUREMODES_MASK= 0x1fL +}; + +// ========================================================================================================= +// PSDotMapping +// --------.--------.--------.-----xxx // stage 1 PS_DOTMAPPING +// --------.--------.--------.-xxx---- // stage 2 PS_DOTMAPPING +// --------.--------.-----xxx.-------- // stage 3 PS_DOTMAPPING + +#define PS_DOTMAPPING(t0,t1,t2,t3) (((t3)<<8)|((t2)<<4)|(t1)) + +// Dot mappings over the output value of a (4 component 8 bit unsigned) texture stage register into a (3 component float) vector value, for use in a dot product calculation: +// PS_DOTMAPPING_ZERO_TO_ONE :r8g8b8a8->(r,g,b): 0x00=>0, 0xff=>1 thus : output = (input / 0xff ) +// PS_DOTMAPPING_MINUS1_TO_1_D3D :r8g8b8a8->(r,g,b): 0x00=>-128/127, 0x01=>-1, 0x80=>0, 0xff=>1 thus : output = ((input - 0x100 ) / 0x7f ) +// PS_DOTMAPPING_MINUS1_TO_1_GL :r8g8b8a8->(r,g,b): 0x80=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x80 ) (see https://en.wikipedia.org/wiki/Two's_complement) +// PS_DOTMAPPING_MINUS1_TO_1 :r8g8b8a8->(r,g,b): 0x80=>-128/127, ?0x81=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x7f ) (see https://en.wikipedia.org/wiki/Two's_complement) +// PS_DOTMAPPING_HILO_1 :H16L16 ->(H,L,1): 0x0000=>0, 0xffff=>1 thus : output = (input / 0xffff) +// PS_DOTMAPPING_HILO_HEMISPHERE_D3D :H16L16 ->(H,L,sqrt(1-H*H-L*L)):? 0x8000=>-1, 0x0000=>0, 0x7fff=32767/32768 thus : output = ((input - 0x10000) / 0x7fff) +// PS_DOTMAPPING_HILO_HEMISPHERE_GL :H16L16 ->(H,L,sqrt(1-H*H-L*L)):? 0x8000=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x8000) +// PS_DOTMAPPING_HILO_HEMISPHERE :H16L16 ->(H,L,sqrt(1-H*H-L*L)): 0x8000=>-32768/32767, 0x8001=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x7fff) + +enum PS_DOTMAPPING +{ // valid in stage 0 1 2 3 + PS_DOTMAPPING_ZERO_TO_ONE= 0x00L, // - * * * + PS_DOTMAPPING_MINUS1_TO_1_D3D= 0x01L, // - * * * + PS_DOTMAPPING_MINUS1_TO_1_GL= 0x02L, // - * * * + PS_DOTMAPPING_MINUS1_TO_1= 0x03L, // - * * * + PS_DOTMAPPING_HILO_1= 0x04L, // - * * * + PS_DOTMAPPING_HILO_HEMISPHERE_D3D= 0x05L, // - * * * + PS_DOTMAPPING_HILO_HEMISPHERE_GL= 0x06L, // - * * * + PS_DOTMAPPING_HILO_HEMISPHERE= 0x07L, // - * * * + + PS_DOTMAPPING_MASK= 0x07L +}; + +// ========================================================================================================= +// PSCompareMode +// --------.--------.--------.----xxxx // stage 0 PS_COMPAREMODE +// --------.--------.--------.xxxx---- // stage 1 PS_COMPAREMODE +// --------.--------.----xxxx.-------- // stage 2 PS_COMPAREMODE +// --------.--------.xxxx----.-------- // stage 3 PS_COMPAREMODE + +#define PS_COMPAREMODE(t0,t1,t2,t3) (((t3)<<12)|((t2)<<8)|((t1)<<4)|(t0)) + +enum PS_COMPAREMODE +{ + PS_COMPAREMODE_S_LT= 0x00L, + PS_COMPAREMODE_S_GE= 0x01L, + + PS_COMPAREMODE_T_LT= 0x00L, + PS_COMPAREMODE_T_GE= 0x02L, + + PS_COMPAREMODE_R_LT= 0x00L, + PS_COMPAREMODE_R_GE= 0x04L, + + PS_COMPAREMODE_Q_LT= 0x00L, + PS_COMPAREMODE_Q_GE= 0x08L, + + PS_COMPAREMODE_MASK= 0x0fL +}; + +// ========================================================================================================= +// PSInputTexture +// --------.-------x.--------.-------- // stage 2 +// --------.--xx----.--------.-------- // stage 3 +// +// Selects the other texture to use as an input in the following texture modes: +// DOT_ST, DOT_STR_3D, DOT_STR_CUBE, DOT_ZW, DOT_RFLCT_SPEC, +// DOT_RFLCT_DIFF, DPNDNT_AR, DPNDNT_GB, BUMPENVMAP, +// BUMPENVMAP_LUM, DOT_PRODUCT + +#define PS_INPUTTEXTURE(t0,t1,t2,t3) (((t3)<<20)|((t2)<<16)) + + +/*---------------------------------------------------------------------------------*/ +/* Color combiners - The following members of the D3DPixelShaderDef structure */ +/* define the state for the eight stages of color combiners: */ +/* PSCombinerCount - Number of stages */ +/* PSAlphaInputs[8] - Inputs for alpha portion of each stage */ +/* PSRGBInputs[8] - Inputs for RGB portion of each stage */ +/* PSConstant0[8] - Constant 0 for each stage */ +/* PSConstant1[8] - Constant 1 for each stage */ +/* PSFinalCombinerConstant0 - Constant 0 for final combiner */ +/* PSFinalCombinerConstant1 - Constant 1 for final combiner */ +/* PSAlphaOutputs[8] - Outputs for alpha portion of each stage */ +/* PSRGBOutputs[8] - Outputs for RGB portion of each stage */ +/*---------------------------------------------------------------------------------*/ + +// ========================================================================================================= +// PSCombinerCount +// --------.--------.--------.----xxxx // number of combiners (1-8) +// --------.--------.-------x.-------- // PS_COMBINERCOUNT_MUX_MSB bit (0= LSB, 1= MSB) +// --------.--------.---x----.-------- // PS_COMBINERCOUNT_UNIQUE_C0 +// --------.-------x.--------.-------- // PS_COMBINERCOUNT_UNIQUE_C1 + +#define PS_COMBINERCOUNT(count, flags) (((flags)<<8)|(count)) +// count is 1-8, flags contains one or more values from PS_COMBINERCOUNTFLAGS + +enum PS_COMBINERCOUNTFLAGS +{ + PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb + PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb + + PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage + PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage + + PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage + PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage +}; + +// ========================================================================================================= +// PSRGBInputs[0-7] +// PSAlphaInputs[0-7] +// PSFinalCombinerInputsABCD +// PSFinalCombinerInputsEFG +// --------.--------.--------.----xxxx // D PS_REGISTER +// --------.--------.--------.---x---- // D PS_CHANNEL (0= RGB/BLUE, 1= ALPHA) +// --------.--------.--------.xxx----- // D PS_INPUTMAPPING +// --------.--------.----xxxx.-------- // C PS_REGISTER +// --------.--------.---x----.-------- // C PS_CHANNEL (0= RGB/BLUE, 1= ALPHA) +// --------.--------.xxx-----.-------- // C PS_INPUTMAPPING +// --------.----xxxx.--------.-------- // B PS_REGISTER +// --------.---x----.--------.-------- // B PS_CHANNEL (0= RGB/BLUE, 1= ALPHA) +// --------.xxx-----.--------.-------- // B PS_INPUTMAPPING +// ----xxxx.--------.--------.-------- // A PS_REGISTER +// ---x----.--------.--------.-------- // A PS_CHANNEL (0= RGB/BLUE, 1= ALPHA) +// xxx-----.--------.--------.-------- // A PS_INPUTMAPPING + +// examples: +// +// shader.PSRGBInputs[3]= PS_COMBINERINPUTS( +// PS_REGISTER_T0 | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB, +// PS_REGISTER_C0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA, +// PS_REGISTER_ZERO, +// PS_REGISTER_ZERO); +// +// shader.PSFinalCombinerInputsABCD= PS_COMBINERINPUTS( +// PS_REGISTER_T0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA, +// PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB, +// PS_REGISTER_EFPROD | PS_INPUTMAPPING_UNSIGNED_INVERT | PS_CHANNEL_RGB, +// PS_REGISTER_ZERO); +// +// PS_FINALCOMBINERSETTING is set in 4th field of PSFinalCombinerInputsEFG with PS_COMBINERINPUTS +// example: +// +// shader.PSFinalCombinerInputsEFG= PS_COMBINERINPUTS( +// PS_REGISTER_R0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB, +// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB, +// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_BLUE, +// PS_FINALCOMBINERSETTING_CLAMP_SUM | PS_FINALCOMBINERSETTING_COMPLEMENT_R0); + +#define PS_COMBINERINPUTS(a,b,c,d) (((a)<<24)|((b)<<16)|((c)<<8)|(d)) +// For PSFinalCombinerInputsEFG, +// a,b,c contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING for input E,F, and G +// d contains values from PS_FINALCOMBINERSETTING +// For all other inputs, +// a,b,c,d each contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING + +// The input has PS_INPUTMAPPING applied +// (Note : I don't know for sure if the max() operation mentioned above is indeed what happens, +// as there's no further documentation available on this. Native Direct3D can clamp with the +// '_sat' instruction modifier, but that's not really the same as these Xbox1 input mappings.) +// +// When the input register is PS_ZERO, the above mappings result in the following constants: +// +// PS_REGISTER_NEGATIVE_ONE (PS_INPUTMAPPING_EXPAND_NORMAL on zero) : y = -1.0 +// PS_REGISTER_NEGATIVE_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NORMAL on zero) : y = -0.5 +// PS_REGISTER_ZERO itself : y = 0.0 +// PS_REGISTER_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NEGATE on zero) : y = 0.5 +// PS_REGISTER_ONE (PS_INPUTMAPPING_UNSIGNED_INVERT on zero) : y = 1.0 +// (Note : It has no define, but PS_INPUTMAPPING_EXPAND_NEGATE on zero results in ONE too!) + +enum PS_INPUTMAPPING +{ + PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) = 1*max(0,x) + 0.0 OK for final combiner: y = abs(x) + PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) = -1*max(0,x) + 1.0 OK for final combiner: y = 1 - x + PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2 * max(0,x) - 1 = 2*max(0,x) - 1.0 invalid for final combiner + PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2 * max(0,x) = -2*max(0,x) + 1.0 invalid for final combiner + PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 = 1*max(0,x) - 0.5 invalid for final combiner + PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) = -1*max(0,x) + 0.5 invalid for final combiner + PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x = 1* x + 0.0 invalid for final combiner + PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x = -1* x + 0.0 invalid for final combiner + + PS_INPUTMAPPING_MASK= 0xe0L +}; + +enum PS_REGISTER +{ + PS_REGISTER_ZERO= 0x00L, // r A.k.a. _REG_0 + PS_REGISTER_DISCARD= 0x00L, // w A.k.a. _REG_0 + PS_REGISTER_C0= 0x01L, // r A.k.a. _REG_1 + PS_REGISTER_C1= 0x02L, // r A.k.a. _REG_2 + PS_REGISTER_FOG= 0x03L, // r A.k.a. _REG_3 + PS_REGISTER_V0= 0x04L, // r/w A.k.a. _REG_4 + PS_REGISTER_V1= 0x05L, // r/w A.k.a. _REG_5 + PS_REGISTER_T0= 0x08L, // r/w A.k.a. _REG_8 + PS_REGISTER_T1= 0x09L, // r/w A.k.a. _REG_9 + PS_REGISTER_T2= 0x0aL, // r/w A.k.a. _REG_A + PS_REGISTER_T3= 0x0bL, // r/w A.k.a. _REG_B + PS_REGISTER_R0= 0x0cL, // r/w A.k.a. _REG_C + PS_REGISTER_R1= 0x0dL, // r/w A.k.a. _REG_D + PS_REGISTER_V1R0_SUM= 0x0eL, // r A.k.a. _REG_SPECLIT + PS_REGISTER_EF_PROD= 0x0fL, // r A.k.a. _REG_EF_PROD + + PS_REGISTER_MASK= 0x0fL, + + // These constant values can be represented as a combination of 0, and an input modifier + // But they're not registers + // PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // 0x20 r OK for final combiner + // PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // 0x40 r invalid for final combiner + // PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // 0xa0 r invalid for final combiner + // PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // 0x80 r invalid for final combiner + + // Cxbx extension; Separate final combiner constant registers (values not encoded on NV2A, as outside of available bits range) : + PS_REGISTER_FC0= 0x10, + PS_REGISTER_FC1= 0x11, +}; + +// FOG ALPHA is only available in final combiner +// V1R0_SUM and EF_PROD are only available in final combiner (A,B,C,D inputs only) +// V1R0_SUM_ALPHA and EF_PROD_ALPHA are not available +// R0_ALPHA is initialized to T0_ALPHA in stage0 + +enum PS_CHANNEL +{ + PS_CHANNEL_RGB= 0x00, // used as RGB source + PS_CHANNEL_BLUE= 0x00, // used as ALPHA source + PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source + + PS_CHANNEL_MASK= 0x10 +}; + +enum PS_FINALCOMBINERSETTING +{ + PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1] A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_CLAMP_TRUE + PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping (1 - v1) is used as an input to the sum rather than v1 A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_ADD_INVERT_R5_TRUE + PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping (1 - r0) is used as an input to the sum rather than r0 A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_ADD_INVERT_R12_TRUE +}; + +// ========================================================================================================= +// PSRGBOutputs[0-7] +// PSAlphaOutputs[0-7] +// --------.--------.--------.----xxxx // CD output PS_REGISTER +// --------.--------.--------.xxxx---- // AB output PS_REGISTER +// --------.--------.----xxxx.-------- // AB_CD output PS_REGISTER Note : Must be PS_REGISTER_DISCARD if either PS_COMBINEROUTPUT_AB_DOT_PRODUCT or PS_COMBINEROUTPUT_CD_DOT_PRODUCT are set +// --------.--------.---x----.-------- // PS_COMBINEROUTPUT_CD_DOT_PRODUCT (CD output 0= multiply, 1= dot product) +// --------.--------.--x-----.-------- // PS_COMBINEROUTPUT_AB_DOT_PRODUCT (AB output 0= multiply, 1= dot product) +// --------.--------.-x------.-------- // PS_COMBINEROUTPUT_AB_CD_MUX (AB_CD mux/sum select 0= sum, 1= mux) +// --------.------xx.x-------.-------- // PS_COMBINEROUTPUT_OUTPUTMAPPING +// --------.-----x--.--------.-------- // PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA +// --------.----x---.--------.-------- // PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA + +#define PS_COMBINEROUTPUTS(ab,cd,mux_sum,flags) (((flags)<<12)|((mux_sum)<<8)|((ab)<<4)|(cd)) +// ab,cd,mux_sum contain a value from PS_REGISTER +// flags contains values from PS_COMBINEROUTPUT + +enum PS_COMBINEROUTPUT_OUTPUTMAPPING +{ + PS_COMBINEROUTPUT_OUTPUTMAPPING_IDENTITY= 0x00L, // y = x + PS_COMBINEROUTPUT_OUTPUTMAPPING_BIAS= 0x08L, // y = (x - 0.5) + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1= 0x10L, // y = x * 2 + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5) * 2 + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2= 0x20L, // y = x * 4 + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS= 0x28L, // y = (x - 0.5) * 4 Note : Cxbx inferred method; May not be supported on NV2A + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1= 0x30L, // y = x / 2 + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS= 0x38L, // y = (x - 0.5) / 2 Note : Cxbx inferred method; May not be supported on NV2A + + PS_COMBINEROUTPUT_OUTPUTMAPPING_MASK= 0x38L +}; + +enum PS_COMBINEROUTPUT +{ + PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L, + PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only + + PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L, + PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only + + PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD + PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a + + PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only + + PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only +}; + +// ========================================================================================================= +// PSC0Mapping +// PSC1Mapping +// --------.--------.--------.----xxxx // offset of D3D constant for stage 0 +// --------.--------.--------.xxxx---- // offset of D3D constant for stage 1 +// --------.--------.----xxxx.-------- // offset of D3D constant for stage 2 +// --------.--------.xxxx----.-------- // offset of D3D constant for stage 3 +// --------.----xxxx.--------.-------- // offset of D3D constant for stage 4 +// --------.xxxx----.--------.-------- // offset of D3D constant for stage 5 +// ----xxxx.--------.--------.-------- // offset of D3D constant for stage 6 +// xxxx----.--------.--------.-------- // offset of D3D constant for stage 7 + +#define PS_CONSTANTMAPPING(s0,s1,s2,s3,s4,s5,s6,s7) \ + (((DWORD)(s0)&0xf)<< 0) | (((DWORD)(s1)&0xf)<< 4) | \ + (((DWORD)(s2)&0xf)<< 8) | (((DWORD)(s3)&0xf)<<12) | \ + (((DWORD)(s4)&0xf)<<16) | (((DWORD)(s5)&0xf)<<20) | \ + (((DWORD)(s6)&0xf)<<24) | (((DWORD)(s7)&0xf)<<28) +// s0-s7 contain the offset of the D3D constant that corresponds to the +// c0 or c1 constant in stages 0 through 7. These mappings are only used in +// SetPixelShaderConstant(). + +// ========================================================================================================= +// PSFinalCombinerConstants +// --------.--------.--------.----xxxx // offset of D3D constant for C0 +// --------.--------.--------.xxxx---- // offset of D3D constant for C1 +// --------.--------.-------x.-------- // Adjust texture flag + +#define PS_FINALCOMBINERCONSTANTS(c0,c1,flags) (((DWORD)(flags) << 8) | ((DWORD)(c0)&0xf)<< 0) | (((DWORD)(c1)&0xf)<< 4) +// c0 and c1 contain the offset of the D3D constant that corresponds to the +// constants in the final combiner. These mappings are only used in +// SetPixelShaderConstant(). Flags contains values from PS_GLOBALFLAGS + +enum PS_GLOBALFLAGS +{ + // if this flag is set, the texture mode for each texture stage is adjusted as follows: + // if set texture is a cubemap, + // change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_CUBEMAP + // change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_CUBEMAP + // change PS_TEXTUREMODES_DOT_STR_3D to PS_TEXTUREMODES_DOT_STR_CUBE + // if set texture is a volume texture, + // change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_PROJECT3D + // change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT3D + // change PS_TEXTUREMODES_DOT_STR_CUBE to PS_TEXTUREMODES_DOT_STR_3D + // if set texture is neither cubemap or volume texture, + // change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_PROJECT2D + // change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT2D + + PS_GLOBALFLAGS_NO_TEXMODE_ADJUST= 0x0000L, // don't adjust texture modes + PS_GLOBALFLAGS_TEXMODE_ADJUST= 0x0001L, // adjust texture modes according to set texture + + PS_GLOBALFLAGS_SHIFT= 8 +}; + + +constexpr int PSH_XBOX_MAX_C_REGISTER_COUNT = 16; +constexpr int PSH_XBOX_MAX_R_REGISTER_COUNT = 2; +constexpr int PSH_XBOX_MAX_T_REGISTER_COUNT = 4; +constexpr int PSH_XBOX_MAX_V_REGISTER_COUNT = 2; + + +struct RPSRegisterObject { + PS_REGISTER Reg; + + void Decode(uint8_t Value); +}; + +struct RPSInputRegister : RPSRegisterObject { + PS_CHANNEL Channel; + PS_INPUTMAPPING InputMapping; + + void Decode(uint8_t Value, unsigned stage_nr, bool isRGB); +}; + +struct RPSCombinerOutput : RPSRegisterObject { + RPSInputRegister Input[2]; // Called Input A and B, or C and D (depending if it's inside the AB or CD combiner) + bool DotProduct; // False=Multiply, True=DotProduct + unsigned BlueToAlpha; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha + + void Decode(uint8_t Value, uint16_t PSInputs, unsigned stage_nr, bool isRGB); +}; + +struct RPSCombinerStageChannel { + RPSCombinerOutput OutputCD; // Contains InputC and InputD (as Input1 and Input2) + RPSCombinerOutput OutputAB; // Contains InputA and InputB (as Input1 and Input2) + RPSRegisterObject OutputMUX_SUM; + bool AB_CD_MUX; // False=AB+CD, True=MUX(AB,CD) based on R0.a + PS_COMBINEROUTPUT_OUTPUTMAPPING CombinerOutputMapping; + + void Decode(uint32_t PSInputs, uint32_t PSOutputs, unsigned stage_nr, bool isRGB); +}; + +struct RPSCombinerStage { + RPSCombinerStageChannel RGB; + RPSCombinerStageChannel Alpha; +}; + +struct RPSFinalCombiner { + RPSInputRegister Input[7]; + bool ComplementV1; + bool ComplementR0; + bool ClampSum; + + void Decode(const uint32_t PSFinalCombinerInputsABCD, const uint32_t PSFinalCombinerInputsEFG); +}; + +struct DecodedRegisterCombiner { + PS_TEXTUREMODES PSTextureModes[xbox::X_D3DTS_STAGECOUNT]; + PS_DOTMAPPING PSDotMapping[xbox::X_D3DTS_STAGECOUNT]; + bool PSCompareMode[xbox::X_D3DTS_STAGECOUNT][4]; // True in [0] = PS_COMPAREMODE_S_GE, [1] = PS_COMPAREMODE_T_GE, [2] = PS_COMPAREMODE_R_GE, [3] PS_COMPAREMODE_Q_GE (so, STRQ>0, otherwise <0) + int PSInputTexture[xbox::X_D3DTS_STAGECOUNT]; + + bool CombinerMuxesOnMsb; + bool CombinerHasUniqueC0; + bool CombinerHasUniqueC1; + unsigned NumberOfCombiners; + RPSCombinerStage Combiners[xbox::X_PSH_COMBINECOUNT]; + bool hasFinalCombiner; + RPSFinalCombiner FinalCombiner; + bool TexModeAdjust; + // Variables + bool AlphaKill[4]; // X_D3DTSS_ALPHAKILL + + static void GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]); + static void GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]); + static void GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, bool psCompareModes[xbox::X_D3DTS_STAGECOUNT][4]); + static void GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]); + void Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef); +}; + +extern bool g_UseFixedFunctionPixelShader; // PatrickvL's Dxbx pixel shader translation void DxbxUpdateActivePixelShader(); // NOPATCH diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index fd47d2cbf..9adc3e3b7 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -38,6 +38,7 @@ #include "core\hle\D3D8\XbVertexBuffer.h" // For CxbxImpl_SetVertexData4f #include "core\hle\D3D8\XbVertexShader.h" #include "core\hle\D3D8\XbD3D8Logging.h" // For DEBUG_D3DRESULT +#include "devices\xbox.h" #include "core\hle\D3D8\XbConvert.h" // For NV2A_VP_UPLOAD_INST, NV2A_VP_UPLOAD_CONST_ID, NV2A_VP_UPLOAD_CONST #include "devices\video\nv2a.h" // For D3DPUSH_DECODE #include "common\Logging.h" // For LOG_INIT @@ -98,7 +99,7 @@ void CxbxVertexShaderSetFlags() // Note : Temporary, until we reliably locate the Xbox internal state for this // See D3DXDeclaratorFromFVF docs https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dxdeclaratorfromfvf // and https://github.com/reactos/wine/blob/2e8dfbb1ad71f24c41e8485a39df01bb9304127f/dlls/d3dx9_36/mesh.c#L2041 -static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) +static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) // TODO : Rename CxbxFVFToXboxVertexAttributeFormat? { using namespace xbox; @@ -127,17 +128,14 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) unsigned offset = 0; DWORD position = (xboxFvf & X_D3DFVF_POSITION_MASK); switch (position) { - case 0: nrPositionFloats = 0; LOG_TEST_CASE("FVF without position"); break; // Note : Remove logging if this occurs often - case X_D3DFVF_XYZ: /*nrPositionFloats is set to 3 by default*/ break; - case X_D3DFVF_XYZRHW: - g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_PASSTHROUGH; - nrPositionFloats = 4; - break; - case X_D3DFVF_XYZB1: nrBlendWeights = 1; break; - case X_D3DFVF_XYZB2: nrBlendWeights = 2; break; - case X_D3DFVF_XYZB3: nrBlendWeights = 3; break; - case X_D3DFVF_XYZB4: nrBlendWeights = 4; break; - case X_D3DFVF_POSITION_MASK: /*Keep nrPositionFloats set to 3*/ LOG_TEST_CASE("FVF invalid (5th blendweight?)"); break; + case 0: nrPositionFloats = 0; LOG_TEST_CASE("FVF without position"); break; // Note : Remove logging if this occurs often + case X_D3DFVF_XYZ: /*nrPositionFloats is set to 3 by default*/ break; + case X_D3DFVF_XYZRHW: nrPositionFloats = 4; g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_PASSTHROUGH; break; + case X_D3DFVF_XYZB1: nrBlendWeights = 1; break; + case X_D3DFVF_XYZB2: nrBlendWeights = 2; break; + case X_D3DFVF_XYZB3: nrBlendWeights = 3; break; + case X_D3DFVF_XYZB4: nrBlendWeights = 4; break; + case X_D3DFVF_POSITION_MASK: /*Keep nrPositionFloats set to 3*/ LOG_TEST_CASE("FVF invalid (5th blendweight?)"); break; DEFAULT_UNREACHABLE; } @@ -158,6 +156,7 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) offset += sizeof(float) * nrBlendWeights; } } + else if (nrBlendWeights > 0) LOG_TEST_CASE("BlendWeights given without position?"); // Write Normal, Diffuse, and Specular if (xboxFvf & X_D3DFVF_NORMAL) { @@ -193,18 +192,23 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) LOG_TEST_CASE("Limiting FVF to 4 textures"); textureCount = 4; // Safeguard, since the X_D3DFVF_TEXCOUNT bitfield could contain invalid values (5 up to 15) } + for (int i = 0; i < textureCount; i++) { - int numberOfCoordinates = 0; auto FVFTextureFormat = (xboxFvf >> X_D3DFVF_TEXCOORDSIZE_SHIFT(i)) & 0x003; - switch (FVFTextureFormat) { - case X_D3DFVF_TEXTUREFORMAT1: numberOfCoordinates = 1; break; - case X_D3DFVF_TEXTUREFORMAT2: numberOfCoordinates = 2; break; - case X_D3DFVF_TEXTUREFORMAT3: numberOfCoordinates = 3; break; - case X_D3DFVF_TEXTUREFORMAT4: numberOfCoordinates = 4; break; - DEFAULT_UNREACHABLE; +#if 1 + int numberOfCoordinates = ((FVFTextureFormat + 1) & 3) + 1; +#else + int numberOfCoordinates = 0; + switch (FVFTextureFormat) { // Note : Below enums are not ordered; In a math expression mapped as : + case X_D3DFVF_TEXTUREFORMAT1: numberOfCoordinates = 1; break; // input = 3 -> 4 -> 0 -> 1 = output + case X_D3DFVF_TEXTUREFORMAT2: numberOfCoordinates = 2; break; // input = 0 -> 1 -> 1 -> 2 = output + case X_D3DFVF_TEXTUREFORMAT3: numberOfCoordinates = 3; break; // input = 1 -> 2 -> 2 -> 3 = output + case X_D3DFVF_TEXTUREFORMAT4: numberOfCoordinates = 4; break; // input = 2 -> 3 -> 3 -> 4 = output + DEFAULT_UNREACHABLE; // ((input +1 ) &3 ) +1 ) = output } assert(numberOfCoordinates > 0); +#endif pSlot = &declaration.Slots[X_D3DVSDE_TEXCOORD0 + i]; pSlot->Format = X_D3DVSDT_FLOAT[numberOfCoordinates]; pSlot->Offset = offset; diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index 5b82c28a3..aed15821c 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -209,6 +209,10 @@ extern void EmuParseVshFunction extern size_t GetVshFunctionSize(const xbox::dword_xt* pXboxFunction); inline boolean VshHandleIsVertexShader(DWORD Handle) { return (Handle & X_D3DFVF_RESERVED0) ? TRUE : FALSE; } +inline boolean VshHandleIsFVF(DWORD Handle) { return !VshHandleIsVertexShader(Handle); } +inline boolean VshHandleIsPassthrough(DWORD Handle) { + return VshHandleIsFVF(Handle) && ((Handle & X_D3DFVF_POSITION_MASK) == X_D3DFVF_XYZRHW); +} inline xbox::X_D3DVertexShader *VshHandleToXboxVertexShader(DWORD Handle) { return (xbox::X_D3DVertexShader *)(Handle & ~X_D3DFVF_RESERVED0);} // Get the number of components represented by the given xbox vertex data type @@ -228,5 +232,4 @@ extern void CxbxImpl_SetVertexShaderInput(DWORD Handle, UINT StreamCount, xbox:: extern void CxbxImpl_SetVertexShaderConstant(INT Register, PVOID pConstantData, DWORD ConstantCount); extern void CxbxImpl_DeleteVertexShader(DWORD Handle); extern void CxbxVertexShaderSetFlags(); -extern HRESULT SetVertexShader(IDirect3DVertexShader* pShader); #endif