Merge pull request #2136 from Cxbx-Reloaded/hlsl_ps

Pixel shader hlsl
This commit is contained in:
PatrickvL 2021-06-07 16:04:18 +02:00 committed by GitHub
commit 8b9b26b56e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 3229 additions and 8227 deletions

View File

@ -131,10 +131,15 @@ file (GLOB CXBXR_HEADER_EMU
"${CXBXR_ROOT_DIR}/src/core/common/imgui/settings.h"
"${CXBXR_ROOT_DIR}/src/core/common/imgui/video.hpp"
"${CXBXR_ROOT_DIR}/src/core/common/video/RenderBase.hpp"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.h"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.h"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.h"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.h"
@ -294,7 +299,9 @@ file (GLOB CXBXR_SOURCE_EMU
"${CXBXR_ROOT_DIR}/src/core/common/imgui/video.cpp"
"${CXBXR_ROOT_DIR}/src/core/common/video/RenderBase.cpp"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.cpp"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/RenderStates.cpp"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.cpp"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/TextureStates.cpp"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.cpp"
"${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp"
@ -442,6 +449,9 @@ install(FILES ${cxbxr_INSTALL_files}
)
install(FILES
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl"
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl"
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli"
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli"
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl"
DESTINATION bin/hlsl

View File

@ -32,6 +32,8 @@ file(COPY ${CXBXR_GLEW_DLL} DESTINATION ${TargetRunTimeDir})
set(CXBXR_HLSL_FILES
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli"
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl"
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli"
"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl"
)
set(HlslOutputDir ${TargetRunTimeDir}/hlsl)
file(MAKE_DIRECTORY ${HlslOutputDir})

View File

@ -0,0 +1,374 @@
// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) :
R"DELIMITER(
struct PS_INPUT // Declared identical to vertex shader output (see VS_OUTPUT)
{
float2 iPos : VPOS; // Screen space x,y pixel location
float4 iD0 : COLOR0; // Front-facing primary (diffuse) vertex color (clamped to 0..1)
float4 iD1 : COLOR1; // Front-facing secondary (specular) vertex color (clamped to 0..1)
float iFog : FOG;
float iPts : PSIZE;
float4 iB0 : TEXCOORD4; // Back-facing primary (diffuse) vertex color (clamped to 0..1)
float4 iB1 : TEXCOORD5; // Back-facing secondary (specular) vertex color (clamped to 0..1)
float4 iT0 : TEXCOORD0; // Texture Coord 0
float4 iT1 : TEXCOORD1; // Texture Coord 1
float4 iT2 : TEXCOORD2; // Texture Coord 2
float4 iT3 : TEXCOORD3; // Texture Coord 3
float iFF : VFACE; // Front facing if > 0
};
struct PS_OUTPUT
{
float4 oR0 : COLOR;
};
// Source register modifier macro's, based on enum PS_INPUTMAPPING :
// TODO : Should all these 'max(0, x)' actually be 'saturate(x)'? This, because the operation may actually clamp the register value to the range [0..1]
#define s_sat(x) saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // OK for final combiner // Clamps negative x to 0 // Was : max(0, x), then abs(x) (Test case: Scaler)
#define s_comp(x) (1 - saturate(x)) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // OK for final combiner // Complements x (1-x) // Was : 1- min(max(0, x), 1)
#define s_bx2(x) (( 2 * max(0, x)) - 1) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // invalid for final combiner // Shifts range from [0..1] to [-1..1]
#define s_negbx2(x) ((-2 * max(0, x)) + 1) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates
#define s_bias(x) (max(0, x) - 0.5) // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5
#define s_negbias(x) (-max(0, x) + 0.5) // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates
#define s_ident(x) x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // invalid for final combiner // No modifier, x is passed without alteration
#define s_neg(x) (-x) // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // invalid for final combiner // Negate
// Destination register modifier macro's, based on enum PS_COMBINEROUTPUT :
#define d_ident(x) x // PS_COMBINEROUTPUT_IDENTITY= 0x00L, //
#define d_bias(x) (x - 0.5) // PS_COMBINEROUTPUT_BIAS= 0x08L, // Subtracts 0.5 from outputs
#define d_x2(x) ( x * 2) // PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // Scales outputs by 2
#define d_bx2(x) ((x - 0.5) * 2) // PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // Subtracts 0.5 from outputs and scales by 2
#define d_x4(x) ( x * 4) // PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // Scales outputs by 4
#define d_bx4(x) ((x - 0.5) * 4) // PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS= 0x28L, // Subtracts 0.5 from outputs and scales by 4
#define d_d2(x) ( x / 2) // PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // Divides outputs by 2
#define d_bd2(x) ((x - 0.5) / 2) // PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS= 0x38L, // Subtracts 0.5 from outputs and divides by 2
// Constant registers
uniform const float4 c0_[8] : register(c0);
uniform const float4 c1_[8] : register(c8);
uniform const float4 c_fog : register(c18); // Note : Maps to PSH_XBOX_CONSTANT_FOG, assigned to fog.rgb
// Constant registers used only in final combiner stage (xfc 'opcode') :
uniform const float4 FC0 : register(c16); // Note : Maps to PSH_XBOX_CONSTANT_FC0, must be generated as argument to xfc instead of C0
uniform const float4 FC1 : register(c17); // Note : Maps to PSH_XBOX_CONSTANT_FC1, must be generated as argument to xfc instead of C1
uniform const float4 BEM[4] : register(c19); // Note : PSH_XBOX_CONSTANT_BEM for 4 texture stages
uniform const float4 LUM[4] : register(c23); // Note : PSH_XBOX_CONSTANT_LUM for 4 texture stages
uniform const float FRONTFACE_FACTOR : register(c27); // Note : PSH_XBOX_CONSTANT_LUM for 4 texture stages
#define CM_LT(c) if(c < 0) clip(-1); // = PS_COMPAREMODE_[RSTQ]_LT
#define CM_GE(c) if(c >= 0) clip(-1); // = PS_COMPAREMODE_[RSTQ]_GE
#if 0
// Compiler-defines/symbols which must be defined when their bit/value is set in the corresponding register :
// Generated by PixelShader.cpp::BuildShader()
// Data from X_D3DTSS_ALPHAKILL :
#define ALPHAKILL {false, false, false, false}
// Bits from PSCombinerCount (a.k.a. PSCombinerCountFlags) :
#define PS_COMBINERCOUNT 2
#define PS_COMBINERCOUNT_UNIQUE_C0
#define PS_COMBINERCOUNT_UNIQUE_C1
#define PS_COMBINERCOUNT_MUX_MSB
// Generate defines like this, based on actual values :
#define PS_COMPAREMODE_0(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w)
#define PS_COMPAREMODE_1(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w)
#define PS_COMPAREMODE_2(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w)
#define PS_COMPAREMODE_3(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w)
// Input texture register mappings for texture stage 1, 2 and 3 (stage 0 has no input-texture)
static const int PS_INPUTTEXTURE_[4] = { -1, 0, 0, 0 };
// Dot mappings for texture stage 1, 2 and 3 (stage 0 performs no dot product)
#define PS_DOTMAPPING_1 PS_DOTMAPPING_MINUS1_TO_1_D3D
#define PS_DOTMAPPING_2 PS_DOTMAPPING_MINUS1_TO_1_D3D
#define PS_DOTMAPPING_3 PS_DOTMAPPING_MINUS1_TO_1_D3D
// Bits from FinalCombinerFlags (the 4th byte in PSFinalCombinerInputsEFG) :
#define PS_FINALCOMBINERSETTING_COMPLEMENT_V1
#define PS_FINALCOMBINERSETTING_COMPLEMENT_R0
#define PS_FINALCOMBINERSETTING_CLAMP_SUM
#endif
)DELIMITER", /* This terminates the 1st raw string within the 16380 single-byte characters limit. // */
// See https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/compiler-error-c2026?f1url=%3FappId%3DDev15IDEF1%26l%3DEN-US%26k%3Dk(C2026)%26rd%3Dtrue&view=vs-2019
// Second raw string :
R"DELIMITER(
// PS_COMBINERCOUNT_UNIQUE_C0 steers whether for C0 to use combiner stage-specific constants c0_0 .. c0_7, or c0_0 for all stages
#ifdef PS_COMBINERCOUNT_UNIQUE_C0
#define C0 c0_[stage] // concatenate stage to form c0_0 .. c0_7
#else // PS_COMBINERCOUNT_SAME_C0
#define C0 c0_[0] // always resolve to c0_0
#endif
// PS_COMBINERCOUNT_UNIQUE_C1 steers whether for C1 to use combiner stage-specific constants c1_0 .. c1_7, or c1_0 for all stages
#ifdef PS_COMBINERCOUNT_UNIQUE_C1
#define C1 c1_[stage] // concatenate stage to form c1_0 .. c1_7
#else // PS_COMBINERCOUNT_SAME_C1
#define C1 c1_[0] // always resolve to c1_0
#endif
// PS_COMBINERCOUNT_MUX_MSB steers the 'muxing' operation in the XMMC opcode,
// checking either the Most Significant Bit (MSB) or Least (LSB) of the r0 register.
// (In practice, LSB is seldom encountered, we have zero known test-cases.)
#ifdef PS_COMBINERCOUNT_MUX_MSB
#define FCS_MUX (r0.a >= 0.5) // Check r0.a MSB; Having range upto 1 this should be equal to : (((r0.a * 255) /*mod 256*/) >= 128)
#else // PS_COMBINERCOUNT_MUX_LSB
#define FCS_MUX (((r0.a * 255) mod 2) >= 1) // Check r0.b LSB; Get LSB by converting 1 into 255 (highest 8-bit value) and using modulo 2. TODO : Verify correctness
#endif
// PS_FINALCOMBINERSETTING_COMPLEMENT_V1, when defined, applies a modifier to the v1 input when calculating the sum register
#ifdef PS_FINALCOMBINERSETTING_COMPLEMENT_V1
#define FCS_V1 s_comp // making it use 1-complement,
#else
#define FCS_V1 s_ident // otherwise identity mapping.
#endif
// PS_FINALCOMBINERSETTING_COMPLEMENT_R0, when defined, applies a modifier to the r0 input when calculating the sum register
#ifdef PS_FINALCOMBINERSETTING_COMPLEMENT_R0
#define FCS_R0 s_comp // making it use 1-complement,
#else
#define FCS_R0 s_ident // otherwise identity mapping.
#endif
// PS_FINALCOMBINERSETTING_CLAMP_SUM, when defined, applies a modifier to the sum register
#ifdef PS_FINALCOMBINERSETTING_CLAMP_SUM
#define FCS_SUM s_sat // making it clamp negative to zero,
#else
#define FCS_SUM s_ident // otherwise identity mapping. TODO : Confirm correctness
#endif
// Xbox supports only one 'pixel shader' opcode, but bit flags tunes it's function;
// Here, effective all 5 Xbox opcodes, extended with a variable macro {xop_m(m,...)} for destination modifier :
// Note : Since both d0 AND d1 could be the same output register, calculation of d2 can re-use only one (d0 or d1)
#define xmma(d0, d1, d2, s0, s1, s2, s3, m, tmp) tmp = d0 = m(s0 * s1); d1 = m(s2 * s3); d2 = d1 + tmp // PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD
#define xmmc(d0, d1, d2, s0, s1, s2, s3, m, tmp) tmp = d0 = m(s0 * s1); d1 = m(s2 * s3); d2 = FCS_MUX ? d1 : tmp // PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a
#define xdm(d0, d1, s0, s1, s2, s3, m) d0 = m(dot(s0 , s1)); d1 = m( s2 * s3 ) // PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only // PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L,
#define xdd(d0, d1, s0, s1, s2, s3, m) d0 = m(dot(s0 , s1)); d1 = m(dot(s2 , s3)) // PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only // PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L,
#define xmd(d0, d1, s0, s1, s2, s3, m) d0 = m( s0 * s1 ); d1 = m(dot(s2 , s3)) // PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only // PS_COMBINEROUTPUT_CD_MULTIPLY= 0x01L,
// After the register combiner stages, there's one (optional) final combiner step, consisting of 4 parts;
// All the 7 final combiner inputs operate on rgb only and clamp negative input to zero:
#define fcin(r) saturate(r)
// Special purpose registers prod and sum operate on rgb only, and have alpha set to zero
#define xfc_sum sum = FCS_SUM(float4(FCS_V1(fcin(v1.rgb)) + FCS_R0(fcin(r0.rgb)), 0)) // Note : perform sum first, so prod can use its result
#define xfc_prod(e, f) prod = float4(fcin(e) * fcin(f), 0) // Note : prod can't have modifiers
// Color and Alpha calculations are performed, potentially using sum and/or prod and/or fog registers
#define xfc_rgb(a, b, c, d) r0.rgb = lerp(fcin(c), fcin(b), fcin(a)) + fcin(d) // Note : perform rgb and alpha last, so prod and sum can be used as inputs
#define xfc_alpha(g) r0.a = fcin(g)
// Glue them all together, so we can generate a one-liner closing off the stages :
#define xfc(a, b, c, d, e, f, g) xfc_sum; xfc_prod(e, f); xfc_rgb(a, b, c, d); xfc_alpha(g)
// Note : If xfc is not generated (when PSFinalCombinerInputsABCD and PSFinalCombinerEFG are both 0), r0.rgba is still returned as pixel shader output
// GLSL : https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/mix.xhtml
// mix(x, y, a ) x*(1-a ) + y*a
//
// HLSL : https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-lerp
// lerp(x, y, s ) x*(1-s ) + y*s == x + s(y-x)
// lerp(s2, s1, s0) s2*(1-s0) + s1*s0
)DELIMITER", /* This terminates the 1st raw string within the 16380 single-byte characters limit. // */
// See https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/compiler-error-c2026?f1url=%3FappId%3DDev15IDEF1%26l%3DEN-US%26k%3Dk(C2026)%26rd%3Dtrue&view=vs-2019
// Second raw string :
R"DELIMITER(
float m21d(const float input)
{
int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255
tmp -= 128; // 0 lowers to -128, 128 lowers to 0, 255 lowers to 127
return (float)tmp / 127; // -128 scales to -1.007874016, 0 scales to 0.0, 127 scales to 1.0
}
float m21g(const float input)
{
int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255
if (tmp >= 128) {
tmp -= 256; // 128 lowers to -128, 255 lowers to -1
} // 0 stays 0, 127 stays 127
return ((float)tmp + 0.5) / 127.5;
}
float m21(const float input)
{
int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255
if (tmp >= 128) {
tmp -= 256; // 128 lowers to -128, 255 lowers to -1
} // 0 stays 0, 127 stays 127
return (float)tmp / 127; // -128 scales to -1.007874016, 0 scales to 0.0, 127 scales to 1.0
}
// Note : each component seems already in range [0..1], but two must be combined into one
#define TwoIntoOne(a,b) (((a * 255) * 256) + (b * 255)) / 255 // TODO : Verify whether this works at all !
#define CalcHiLo(in) H = TwoIntoOne(in.x, in.y); L = TwoIntoOne(in.z, in.w) // TODO : Verify whether this works at all !
// Dot mappings over the output value of a (4 component 8 bit unsigned) texture stage register into a (3 component float) vector value, for use in a dot product calculation:
#define PS_DOTMAPPING_ZERO_TO_ONE(in) dm = in.rgb // :r8g8b8a8->(r,g,b): 0x00=>0, 0xff=>1 thus : output = (input / 0xff )
#define PS_DOTMAPPING_MINUS1_TO_1_D3D(in) dm = float3(m21d(in.x), m21d(in.y), m21d(in.z)) // :r8g8b8a8->(r,g,b): 0x00=>-128/127, 0x01=>-1, 0x80=>0, 0xff=>1 thus : output = ((input - 0x100 ) / 0x7f )
#define PS_DOTMAPPING_MINUS1_TO_1_GL(in) dm = float3(m21g(in.x), m21g(in.y), m21g(in.z)) // :r8g8b8a8->(r,g,b): 0x80=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x80 ) (see https://en.wikipedia.org/wiki/Two's_complement)
#define PS_DOTMAPPING_MINUS1_TO_1(in) dm = float3(m21(in.x), m21(in.y), m21(in.z)) // :r8g8b8a8->(r,g,b): 0x80=>-128/127, ?0x81=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x7f ) (see https://en.wikipedia.org/wiki/Two's_complement)
#define PS_DOTMAPPING_HILO_1(in) CalcHiLo(in); dm = float3(H, L, 1) // :H16L16 ->(H,L,1): 0x0000=>0, 0xffff=>1 thus : output = (input / 0xffff)
#define PS_DOTMAPPING_HILO_HEMISPHERE_D3D(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)):? 0x8000=>-1, 0x0000=>0, 0x7fff=32767/32768 thus : output = ((input - 0x10000) / 0x7fff)
#define PS_DOTMAPPING_HILO_HEMISPHERE_GL(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)):? 0x8000=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x8000)
#define PS_DOTMAPPING_HILO_HEMISPHERE(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)): 0x8000=>-32768/32767, 0x8001=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x7fff)
// Declare one sampler per each {Sampler Type, Texture Stage} combination
// TODO : Generate sampler status?
sampler samplers[4] : register(s0);
// Declare alphakill as a variable (avoiding a constant, to allow false's to be optimized away) :
#ifndef ALPHAKILL
#define ALPHAKILL {false, false, false, false}
#endif
static bool alphakill[4] = ALPHAKILL;
float4 PostProcessTexel(const int ts, float4 t)
{
if (alphakill[ts])
if (t.a == 0)
discard;
return t;
}
// Actual texture sampling per texture stage (ts), using the sampling vector (s) as input,
// abstracting away the specifics of accessing above sampler declarations (usefull for future Direct3D 10+ sampler arrays)
float4 Sample2D(int ts, float3 s)
{
float4 result = tex2D(samplers[ts], s.xy); // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below)
return PostProcessTexel(ts, result);
}
float4 Sample3D(int ts, float3 s)
{
float4 result = tex3D(samplers[ts], s.xyz);
return PostProcessTexel(ts, result);
}
float4 Sample6F(int ts, float3 s)
{
float4 result = texCUBE(samplers[ts], s.xyz);
return PostProcessTexel(ts, result);
}
// Test-case JSRF (boost-dash effect).
float3 DoBumpEnv(const float4 TexCoord, const float4 BumpEnvMat, const float4 src)
{
// Convert the input bump map (source texture) value range into two's complement signed values (from (0, +1) to (-1, +1), using s_bx2):
const float4 BumpMap = s_bx2(src); // Note : medieval discovered s_bias improved JSRF, PatrickvL changed it into s_bx2 thanks to http://www.rastertek.com/dx11tut20.html
// TODO : The above should be removed, and replaced by some form of COLORSIGN handling, which may not be possible inside this pixel shader, because filtering-during-sampling would cause artifacts.
const float u = TexCoord.x + (BumpEnvMat.x * BumpMap.r) + (BumpEnvMat.z * BumpMap.g); // Or : TexCoord.x + dot(BumpEnvMat.xz, BumpMap.rg)
const float v = TexCoord.y + (BumpEnvMat.y * BumpMap.r) + (BumpEnvMat.w * BumpMap.g); // Or : TexCoord.y + dot(BumpEnvMat.yw, BumpMap.rg)
return float3(u, v, 0);
}
// Map texture registers to their array elements. Having texture registers in an array allows indexed access to them
#define t0 t[0]
#define t1 t[1]
#define t2 t[2]
#define t3 t[3]
// Resolve a stage number via 'input texture (index) mapping' to it's corresponding output texture register (rgba?)
#define src(ts) t[PS_INPUTTEXTURE_[ts]]
// Calculate the dot result for a given texture stage. Since any given stage is input-mapped to always be less than or equal the stage it appears in, this won't cause read-ahead issues
// Test case: BumpDemo demo
#define CalcDot(ts) PS_DOTMAPPING_ ## ts(src(ts)); dot_[ts] = dot(iT[ts].xyz, dm)
// Addressing operations
// Clamps input texture coordinates to the range [0..1]
// Note alpha is passed through rather than set to one like ps_1_3 'texcoord'
// Test case: Metal Arms (menu skybox clouds, alpha is specifically set in the VS)
#define Passthru(ts) float4(saturate(iT[ts]))
#define Brdf(ts) float3(t[ts-2].y, t[ts-1].y, t[ts-2].x - t[ts-1].x) // TODO : Complete 16 bit phi/sigma retrieval from float4 texture register. Perhaps use CalcHiLo?
#define Normal2(ts) float3(dot_[ts-1], dot_[ts], 0) // Preceding and current stage dot result. Will be input for Sample2D.
#define Normal3(ts) float3(dot_[ts-2], dot_[ts-1], dot_[ts]) // Two preceding and current stage dot result.
#define Eye float3(iT[1].w, iT[2].w, iT[3].w) // 4th (q) component of input texture coordinates 1, 2 and 3. Only used by texm3x3vspec/PS_TEXTUREMODES_DOT_RFLCT_SPEC, always at stage 3. TODO : Map iT[1/2/3] through PS_INPUTTEXTURE_[]?
#define Reflect(n, e) 2 * (dot(n, e) / dot(n, n)) * n - e // https://documentation.help/directx8_c/texm3x3vspec.htm
#define BumpEnv(ts) DoBumpEnv(iT[ts], BEM[ts], src(ts)) // Will be input for Sample2D.
#define LSO(ts) (LUM[ts].x * src(ts).b) + LUM[ts].y // Uses PSH_XBOX_CONSTANT_LUM .x = D3DTSS_BUMPENVLSCALE .y = D3DTSS_BUMPENVLOFFSET
// Implementations for all possible texture modes, with stage as argument (prefixed with valid stages and corresponding pixel shader 1.3 assembly texture addressing instructions)
// For ease of understanding, all follow this plan : Optional specifics, or dot calculation (some with normal selection) and sampling vector determination. All end by deriving a value and assigning this to the stage's texture register.
/*0123 tex */ #define PS_TEXTUREMODES_NONE(ts) v = black; t[ts] = v // Seems to work
/*0123 tex */ #define PS_TEXTUREMODES_PROJECT2D(ts) s = iT[ts].xyz; v = Sample2D(ts, s); t[ts] = v // Seems to work (are x/w and y/w implicit?) [1]
/*0123 tex */ #define PS_TEXTUREMODES_PROJECT3D(ts) s = iT[ts].xyz; v = Sample3D(ts, s); t[ts] = v // Seems to work (is z/w implicit?)
/*0123 tex */ #define PS_TEXTUREMODES_CUBEMAP(ts) s = iT[ts].xyz; v = Sample6F(ts, s); t[ts] = v // TODO : Test
/*0123 texcoord */ #define PS_TEXTUREMODES_PASSTHRU(ts) v = Passthru(ts); t[ts] = v // Seems to work
/*0123 texkill */ #define PS_TEXTUREMODES_CLIPPLANE(ts) PS_COMPAREMODE_ ## ts(iT[ts]); v = black; t[ts] = v // Seems to work (setting black to texture register, in case it gets read)
/*-123 texbem */ #define PS_TEXTUREMODES_BUMPENVMAP(ts) s = BumpEnv(ts); v = Sample2D(ts, s); t[ts] = v // Seems to work
/*-123 texbeml */ #define PS_TEXTUREMODES_BUMPENVMAP_LUM(ts) PS_TEXTUREMODES_BUMPENVMAP(ts); v.rgb *= LSO(ts); t[ts] = v // TODO : Test
/*--23 texbrdf */ #define PS_TEXTUREMODES_BRDF(ts) s = Brdf(ts); v = Sample3D(ts, s); t[ts] = v // TODO : Test (t[ts-2] is 16 bit eyePhi,eyeSigma; t[ts-1] is lightPhi,lightSigma)
/*--23 texm3x2tex */ #define PS_TEXTUREMODES_DOT_ST(ts) CalcDot(ts); n = Normal2(ts); s = n; v = Sample2D(ts, s); t[ts] = v // TODO : Test
/*--23 texm3x2depth */ #define PS_TEXTUREMODES_DOT_ZW(ts) CalcDot(ts); n = Normal2(ts); if (n.y==0) v=1;else v = n.x / n.y; t[ts] = v // TODO : Make depth-check use result of division, but how?
/*--2- texm3x3diff */ #define PS_TEXTUREMODES_DOT_RFLCT_DIFF(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample6F(ts, s); t[ts] = v // TODO : Test
/*---3 texm3x3vspec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC(ts) CalcDot(ts); n = Normal3(ts); s = Reflect(n, Eye); v = Sample6F(ts, s); t[ts] = v // TODO : Test
/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_3D(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample3D(ts, s); t[ts] = v // TODO : Test
/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_CUBE(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample6F(ts, s); t[ts] = v // TODO : Test
/*-123 texreg2ar */ #define PS_TEXTUREMODES_DPNDNT_AR(ts) s = src(ts).arg; v = Sample2D(ts, s); t[ts] = v // TODO : Test [1]
/*-123 texreg2bg */ #define PS_TEXTUREMODES_DPNDNT_GB(ts) s = src(ts).gba; v = Sample2D(ts, s); t[ts] = v // TODO : Test [1]
// TODO replace dm with dot_[ts]? Confirm BumpDemo 'Cubemap only' modes
/*-12- texm3x2pad */ #define PS_TEXTUREMODES_DOTPRODUCT(ts) CalcDot(ts); v = float4(dm, 0); t[ts] = v // TODO : Test all dot mapping (setting texture register, in case it gets read - test-case : BumpDemo)
/*---3 texm3x3spec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST(ts) CalcDot(ts); n = Normal3(ts); s = Reflect(n, C0); v = Sample6F(ts, s); t[ts] = v // TODO : Test
// [1] Note : 3rd component set to s.z is just an (ignored) placeholder to produce a float3 (made unique, to avoid the potential complexity of repeated components)
PS_OUTPUT main(const PS_INPUT xIn)
{
// Local constants
const float4 zero = 0;
const float4 half = 0.5; // = s_negbias(zero)
const float4 one = 1; // = s_comp(zero)
const float4 black = float4(0, 0, 0, 1); // opaque black
const float4 iT[4] = { xIn.iT0, xIn.iT1, xIn.iT2, xIn.iT3 }; // Map input texture coordinates to an array, for indexing purposes
// Xbox register variables
float4 r0, r1; // Temporary registers
float4 t[4]; // Texture coordinate registers
float4 v0, v1; // Vertex color registers
float4 _discard; // Write-only discard 'register' (we assume the HLSL compilers' optimization pass will remove assignments to this)
float4 fog; // Read-only fog register, reading alpha is only allowed in final combiner
float4 sum, prod; // Special purpose registers for xfc (final combiner) operation
// Helper variables
int stage = 0; // Write-only variable, emitted as prefix-comment before each 'opcode', used in C0 and C1 macro's (and should thus get optimized away), initialized to zero for use of C0 in PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST
float4 tmp;
float H, L; // HILO (high/low) temps
float dot_[4];
float3 dm; // Dot mapping temporary
float3 n; // Normal vector (based on preceding dot_[] values)
float3 s; // Actual texture coordinate sampling coordinates (temporary)
float4 v; // Texture value (temporary)
// Determine if this is a front face or backface
bool isFrontFace = (xIn.iFF * FRONTFACE_FACTOR) >= 0;
// Initialize variables
r0 = r1 = black; // Note : r0.a/r1.a will be overwritten by t0.a/t1.a (opaque_black will be retained for PS_TEXTUREMODES_NONE)
// Note : VFACE/FrontFace has been unreliable, investigate again if some test-case shows bland colors
v0 = isFrontFace ? xIn.iD0 : xIn.iB0; // Diffuse front/back
v1 = isFrontFace ? xIn.iD1 : xIn.iB1; // Specular front/back
fog = float4(c_fog.rgb, xIn.iFog); // color from PSH_XBOX_CONSTANT_FOG, alpha from vertex shader output / pixel shader input
// Xbox shader program
)DELIMITER", /* This terminates the 2nd raw string within the 16380 single-byte characters limit. // */
// Third and last raw string, the footer :
R"DELIMITER(
// Copy r0.rgba to output
PS_OUTPUT xOut;
xOut.oR0 = r0;
return xOut;
}
// End of pixel shader footer)DELIMITER" /* This terminates the footer raw string" // */

View File

@ -7,13 +7,13 @@ struct VS_INPUT
};
// Output registers
struct VS_OUTPUT
struct VS_OUTPUT // Declared identical to pixel shader input (see PS_INPUT)
{
float4 oPos : POSITION; // Homogeneous clip space position
float4 oD0 : COLOR0; // Primary color (front-facing)
float4 oD1 : COLOR1; // Secondary color (front-facing)
float oFog : FOG; // Fog coordinate
float oPts : PSIZE; // Point size
float oPts : PSIZE; // Point size
float4 oB0 : TEXCOORD4; // Back-facing primary color
float4 oB1 : TEXCOORD5; // Back-facing secondary color
float4 oT0 : TEXCOORD0; // Texture coordinate set 0
@ -37,6 +37,9 @@ uniform float4 xboxScreenspaceOffset : register(c213);
uniform float4 xboxTextureScale[4] : register(c214);
// Parameters for mapping the shader's fog output value to a fog factor
uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO
// Overloaded casts, assuring all inputs are treated as float4
float4 _tof4(float src) { return float4(src, src, src, src); }
float4 _tof4(float2 src) { return src.xyyy; }
@ -294,7 +297,8 @@ VS_OUTPUT main(const VS_INPUT xIn)
// Single component outputs
float4 oFog, oPts; // x is write-only on Xbox. Use float4 as some games use incorrect masks
oFog = oPts = 0;
oFog = 1; // Default to no fog. Test case: Lego Star Wars II
oPts = 0;
// Address (index) register
int1 a0 = 0;
@ -324,12 +328,35 @@ VS_OUTPUT main(const VS_INPUT xIn)
R"DELIMITER(
// Copy variables to output struct
VS_OUTPUT xOut;
VS_OUTPUT xOut;
// Fogging
// TODO deduplicate
const float fogDepth = oFog.x; // Don't abs this value! Test-case : DolphinClassic xdk sample
const float fogTableMode = CxbxFogInfo.x;
const float fogDensity = CxbxFogInfo.y;
const float fogStart = CxbxFogInfo.z;
const float fogEnd = CxbxFogInfo.w;
const float FOG_TABLE_NONE = 0;
const float FOG_TABLE_EXP = 1;
const float FOG_TABLE_EXP2 = 2;
const float FOG_TABLE_LINEAR = 3;
float fogFactor;
if(fogTableMode == FOG_TABLE_NONE)
fogFactor = fogDepth;
if(fogTableMode == FOG_TABLE_EXP)
fogFactor = 1 / exp(fogDepth * fogDensity); /* / 1 / e^(d * density)*/
if(fogTableMode == FOG_TABLE_EXP2)
fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/
if(fogTableMode == FOG_TABLE_LINEAR)
fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart);
xOut.oPos = reverseScreenspaceTransform(oPos);
xOut.oD0 = saturate(oD0);
xOut.oD1 = saturate(oD1);
xOut.oFog = oFog.x; // Note : Xbox clamps fog in pixel shader
xOut.oFog = fogFactor; // Note : Xbox clamps fog in pixel shader -> *NEEDS TESTING* /was oFog.x
xOut.oPts = oPts.x;
xOut.oB0 = saturate(oB0);
xOut.oB1 = saturate(oB1);

View File

@ -297,7 +297,7 @@ g_EmuCDPD;
#define XB_TRAMPOLINES(XB_MACRO) \
XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_CreateVertexShader, (CONST xbox::dword_xt*, CONST xbox::dword_xt*, xbox::dword_xt*, xbox::dword_xt) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader, (xbox::dword_xt) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader_0, () ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader_0, () ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_GetBackBuffer, (xbox::int_xt, D3DBACKBUFFER_TYPE, xbox::X_D3DSurface**) ); \
XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetBackBuffer2, (xbox::int_xt) ); \
XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_GetDepthStencilSurface, (xbox::X_D3DSurface**) ); \
@ -337,9 +337,9 @@ g_EmuCDPD;
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShader_0, () ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShaderInput, (xbox::dword_xt, xbox::uint_xt, xbox::X_STREAMINPUT*) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetViewport, (CONST xbox::X_D3DVIEWPORT8*) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform_0, () ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_MultiplyTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_MultiplyTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource, (xbox::X_D3DResource*) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource__LTCG, (xbox::void_xt) ); \
XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::dword_xt, xbox::X_D3DPRESENT_PARAMETERS*, xbox::X_D3DDevice**)); \
@ -348,7 +348,7 @@ g_EmuCDPD;
XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_4, (xbox::X_D3DPRESENT_PARAMETERS*) ); \
XB_MACRO(xbox::void_xt, WINAPI, Lock2DSurface, (xbox::X_D3DPixelContainer*, D3DCUBEMAP_FACES, xbox::uint_xt, D3DLOCKED_RECT*, RECT*, xbox::dword_xt) ); \
XB_MACRO(xbox::void_xt, WINAPI, Lock3DSurface, (xbox::X_D3DPixelContainer*, xbox::uint_xt, D3DLOCKED_BOX*, D3DBOX*, xbox::dword_xt) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3D_CommonSetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*, void*) ); \
XB_MACRO(xbox::void_xt, WINAPI, D3D_CommonSetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*, void*) ); \
XB_TRAMPOLINES(XB_trampoline_declare);
@ -1980,7 +1980,7 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar
}
else if (wParam == VK_F2)
{
g_UseFixedFunctionVertexShader = !g_UseFixedFunctionVertexShader;
g_UseFixedFunctionPixelShader = !g_UseFixedFunctionPixelShader;
}
else if (wParam == VK_F3)
{
@ -4184,7 +4184,7 @@ void ValidateRenderTargetDimensions(DWORD HostRenderTarget_Width, DWORD HostRend
}
}
float GetZScaleForSurface(xbox::X_D3DSurface* pSurface)
float GetZScaleForPixelContainer(xbox::X_D3DPixelContainer* pSurface)
{
// If no surface was present, fallback to 1
if (pSurface == xbox::zeroptr) {
@ -5012,15 +5012,15 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_Clear)
// Scale the fill based on our scale factor and MSAA scale
float aaX, aaY;
GetMultiSampleScaleRaw(aaX, aaY);
aaX *= g_RenderUpscaleFactor;
aaY *= g_RenderUpscaleFactor;
float Xscale = aaX * g_RenderUpscaleFactor;
float Yscale = aaY * g_RenderUpscaleFactor;
std::vector<D3DRECT> rects(Count);
for (DWORD i = 0; i < Count; i++) {
rects[i].x1 = static_cast<LONG>(pRects[i].x1 * aaX);
rects[i].x2 = static_cast<LONG>(pRects[i].x2 * aaX);
rects[i].y1 = static_cast<LONG>(pRects[i].y1 * aaY);
rects[i].y2 = static_cast<LONG>(pRects[i].y2 * aaY);
rects[i].x1 = static_cast<LONG>(pRects[i].x1 * Xscale);
rects[i].x2 = static_cast<LONG>(pRects[i].x2 * Xscale);
rects[i].y1 = static_cast<LONG>(pRects[i].y1 * Yscale);
rects[i].y2 = static_cast<LONG>(pRects[i].y2 * Yscale);
}
hRet = g_pD3DDevice->Clear(Count, rects.data(), HostFlags, Color, Z, Stencil);
} else {
@ -6406,13 +6406,9 @@ void UpdateFixedFunctionShaderLight(int d3dLightIndex, Light* pShaderLight, D3DX
pShaderLight->SpotIntensityDivisor = cos(d3dLight->Theta / 2) - cos(d3dLight->Phi / 2);
}
float AsFloat(uint32_t value) {
auto v = value;
return *(float*)&v;
}
void UpdateFixedFunctionVertexShaderState()
{
extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue
using namespace xbox;
// Vertex blending
@ -6452,7 +6448,11 @@ void UpdateFixedFunctionVertexShaderState()
}
// Lighting
ffShaderState.Modes.Lighting = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_LIGHTING);
// Point sprites aren't lit - 'each point is always rendered with constant colors.'
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/point-sprites
bool PointSpriteEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSPRITEENABLE);
bool LightingEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_LIGHTING);
ffShaderState.Modes.Lighting = LightingEnable && !PointSpriteEnable;
ffShaderState.Modes.TwoSidedLighting = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_TWOSIDEDLIGHTING);
ffShaderState.Modes.LocalViewer = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_LOCALVIEWER);
@ -6467,28 +6467,39 @@ void UpdateFixedFunctionVertexShaderState()
ffShaderState.Modes.BackSpecularMaterialSource = (float)(ColorVertex ? XboxRenderStates.GetXboxRenderState(X_D3DRS_BACKSPECULARMATERIALSOURCE) : D3DMCS_MATERIAL);
ffShaderState.Modes.BackEmissiveMaterialSource = (float)(ColorVertex ? XboxRenderStates.GetXboxRenderState(X_D3DRS_BACKEMISSIVEMATERIALSOURCE) : D3DMCS_MATERIAL);
// Point sprites
auto pointSize = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE);
auto pointSizeMin = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE_MIN);
auto pointSizeMax = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE_MAX);
ffShaderState.PointSprite.PointSize = *reinterpret_cast<float*>(&pointSize);
ffShaderState.PointSprite.PointSizeMin = *reinterpret_cast<float*>(&pointSizeMin);
ffShaderState.PointSprite.PointSizeMax = *reinterpret_cast<float*>(&pointSizeMax);
// Point sprites; Fetch required variables
float pointSize = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE);
float pointSize_Min = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MIN);
float pointSize_Max = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MAX);
bool PointScaleEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALEENABLE);
auto scaleA = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_A);
auto scaleB = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_B);
auto scaleC = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_C);
ffShaderState.PointSprite.ScaleABC.x = PointScaleEnable ? *reinterpret_cast<float*>(&scaleA) : 1.0f;
ffShaderState.PointSprite.ScaleABC.y = PointScaleEnable ? *reinterpret_cast<float*>(&scaleB) : 0.0f;
ffShaderState.PointSprite.ScaleABC.z = PointScaleEnable ? *reinterpret_cast<float*>(&scaleC) : 0.0f;
ffShaderState.PointSprite.XboxRenderTargetHeight = PointScaleEnable ? (float)GetPixelContainerHeight(g_pXbox_RenderTarget) : 1.0f;
float pointScale_A = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_A);
float pointScale_B = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_B);
float pointScale_C = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_C);
float renderTargetHeight = (float)GetPixelContainerHeight(g_pXbox_RenderTarget);
// Make sure to disable point scaling when point sprites are not enabled
PointScaleEnable &= PointSpriteEnable;
// Set variables in shader state
ffShaderState.PointSprite.PointSize = PointSpriteEnable ? pointSize : 1.0f;
ffShaderState.PointSprite.PointSize_Min = PointSpriteEnable ? pointSize_Min : 1.0f;
ffShaderState.PointSprite.PointSize_Max = PointSpriteEnable ? pointSize_Max : 1.0f;
ffShaderState.PointSprite.PointScaleABC.x = PointScaleEnable ? pointScale_A : 1.0f;
ffShaderState.PointSprite.PointScaleABC.y = PointScaleEnable ? pointScale_B : 0.0f;
ffShaderState.PointSprite.PointScaleABC.z = PointScaleEnable ? pointScale_C : 0.0f;
ffShaderState.PointSprite.XboxRenderTargetHeight = PointScaleEnable ? renderTargetHeight : 1.0f;
ffShaderState.PointSprite.RenderUpscaleFactor = g_RenderUpscaleFactor;
// Fog
// Determine how the fog depth is transformed into the fog factor
auto fogEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGENABLE);
auto fogTableMode = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGTABLEMODE);
ffShaderState.Fog.Enable = fogEnable;
// FIXME remove when fixed function PS is implemented
// Note if we are using the fixed function pixel shader
// We only want to produce the fog depth value in the VS, not the fog factor
ffShaderState.Fog.TableMode = !g_UseFixedFunctionPixelShader ? D3DFOG_NONE : fogTableMode;
// Determine how fog depth is calculated
if (XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGENABLE) &&
XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGTABLEMODE) != D3DFOG_NONE) {
if (fogEnable && fogTableMode != D3DFOG_NONE) {
auto proj = &ffShaderState.Transforms.Projection;
if (XboxRenderStates.GetXboxRenderState(X_D3DRS_RANGEFOGENABLE)) {
@ -6508,13 +6519,20 @@ void UpdateFixedFunctionVertexShaderState()
// JSRF (non-compliant projection matrix)
ffShaderState.Fog.DepthMode = FixedFunctionVertexShader::FOG_DEPTH_W;
}
auto density = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGDENSITY);
auto fogStart = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGSTART);
auto fogEnd = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGEND);
ffShaderState.Fog.Density = *reinterpret_cast<float*>(&density);
ffShaderState.Fog.Start = *reinterpret_cast<float*>(&fogStart);
ffShaderState.Fog.End = *reinterpret_cast<float*>(&fogEnd);
}
else {
ffShaderState.Fog.DepthMode = FixedFunctionVertexShader::FOG_DEPTH_NONE;
}
// Texture state
for (int i = 0; i < 4; i++) {
for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) {
auto transformFlags = XboxTextureStates.Get(i, X_D3DTSS_TEXTURETRANSFORMFLAGS);
ffShaderState.TextureStates[i].TextureTransformFlagsCount = (float)(transformFlags & ~D3DTTFF_PROJECTED);
ffShaderState.TextureStates[i].TextureTransformFlagsProjected = (float)(transformFlags & D3DTTFF_PROJECTED);
@ -6524,9 +6542,14 @@ void UpdateFixedFunctionVertexShaderState()
ffShaderState.TextureStates[i].TexCoordIndexGen = (float)(texCoordIndex >> 16); // D3DTSS_TCI flags
}
// TexCoord component counts
extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue
// Read current TexCoord component counts
xbox::X_VERTEXATTRIBUTEFORMAT* pXboxVertexAttributeFormat = GetXboxVertexAttributeFormat();
// Note : There seem to be other ways to access this, but we can use only this one;
// This, because CxbxGetVertexDeclaration() can't be used, since it doesn't track VertexAttributes
// (plus, it contains the overhead of shader lookup).
// Another, GetXboxVertexShader(), can't be used, because it doesn't honor vertex attribute overrides
// like those that apply for g_InlineVertexBuffer_DeclarationOverride and active SetVertexShaderInput.
// Also, the xbox::X_D3DVertexShader.Dimensionality[] field contains somewhat strange values.
for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) {
auto vertexDataFormat = pXboxVertexAttributeFormat->Slots[xbox::X_D3DVSDE_TEXCOORD0 + i].Format;
ffShaderState.TexCoordComponentCount[i] = (float)GetXboxVertexDataComponentCount(vertexDataFormat);
@ -7522,10 +7545,22 @@ void CxbxUpdateHostTextureScaling()
*texCoordScale = {
width,
height,
(float)CxbxGetPixelContainerDepth(pXboxBaseTexture),
1.0f, // TODO should this be mip levels for volume textures?
1.0f
};
}
// When a depth buffer is used as a texture
// We do 'Native Shadow Mapping'
// https://aras-p.info/texts/D3D9GPUHacks.html
// The z texture coordinate component holds a depth value, which needs to be normalized
// TODO implement handling for
// - X_D3DRS_SHADOWFUNC
// - X_D3DRS_POLYGONOFFSETZSLOPESCALE
// - X_D3DRS_POLYGONOFFSETZOFFSET
if (EmuXBFormatIsDepthBuffer(XboxFormat)) {
(*texCoordScale)[2] = (float)GetZScaleForPixelContainer(pXboxBaseTexture);
}
}
// Pass above determined texture scaling factors to our HLSL shader.
// Note : CxbxVertexShaderTemplate.hlsl applies texture scaling on
@ -7612,6 +7647,14 @@ void CxbxUpdateHostVertexShaderConstants()
// Need for Speed: Hot Pursuit 2 (car select)
CxbxUpdateHostViewPortOffsetAndScaleConstants();
}
// Placed this here until we find a better place
const uint32_t fogTableMode = XboxRenderStates.GetXboxRenderState(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGTABLEMODE);
const float fogDensity = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGDENSITY);
const float fogStart = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGSTART);
const float fogEnd = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGEND);
float fogStuff[4] = { (float)fogTableMode, fogDensity, fogStart, fogEnd };
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_FOGINFO, fogStuff, 1);
}
void CxbxUpdateHostViewport() {
@ -7628,16 +7671,16 @@ void CxbxUpdateHostViewport() {
LOG_TEST_CASE("Could not get rendertarget dimensions while setting the viewport");
}
aaScaleX *= g_RenderUpscaleFactor;
aaScaleY *= g_RenderUpscaleFactor;
float Xscale = aaScaleX * g_RenderUpscaleFactor;
float Yscale = aaScaleY * g_RenderUpscaleFactor;
if (g_Xbox_VertexShaderMode == VertexShaderMode::FixedFunction) {
// Set viewport
D3DVIEWPORT hostViewport = g_Xbox_Viewport;
hostViewport.X *= aaScaleX;
hostViewport.Y *= aaScaleY;
hostViewport.Width *= aaScaleX;
hostViewport.Height *= aaScaleY;
hostViewport.X *= Xscale;
hostViewport.Y *= Yscale;
hostViewport.Width *= Xscale;
hostViewport.Height *= Yscale;
g_pD3DDevice->SetViewport(&hostViewport);
// Reset scissor rect
@ -7669,10 +7712,10 @@ void CxbxUpdateHostViewport() {
// Scissor to viewport
g_pD3DDevice->SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE);
RECT viewportRect;
viewportRect.left = g_Xbox_Viewport.X * aaScaleX;
viewportRect.top = g_Xbox_Viewport.Y * aaScaleY;
viewportRect.right = viewportRect.left + g_Xbox_Viewport.Width * aaScaleX;
viewportRect.bottom = viewportRect.top + g_Xbox_Viewport.Height * aaScaleY;
viewportRect.left = g_Xbox_Viewport.X * Xscale;
viewportRect.top = g_Xbox_Viewport.Y * Yscale;
viewportRect.right = viewportRect.left + (g_Xbox_Viewport.Width * Xscale);
viewportRect.bottom = viewportRect.top + (g_Xbox_Viewport.Height * Yscale);
g_pD3DDevice->SetScissorRect(&viewportRect);
}
}
@ -7848,9 +7891,13 @@ xbox::void_xt CxbxImpl_SetPixelShader(xbox::dword_xt Handle)
// Cache the active shader handle
g_pXbox_PixelShader = (xbox::X_PixelShader*)Handle;
// Copy the Pixel Shader data to our RenderState handler
// Copy the Pixel Shader data to our RenderState handler (this includes values for pixel shader constants)
// This mirrors the fact that unpatched SetPixelShader does the same thing!
// This shouldn't be necessary anymore, but shaders still break if we don't do this
// This breakage might be caused by our push-buffer processing could be "trailing behind" what our patches do;
// By writing to render state during this patch, we avoid missing out on updates that push buffer commands would perform.
// However, any updates that occur mid-way can overwrite what we store here, and still cause problems!
// The only viable solution for that would be to draw entirely based on push-buffer handling (which might require removing possibly all D3D patches!)
if (g_pXbox_PixelShader != nullptr) {
// TODO : If D3DDevice_SetPixelShader() in XDKs don't overwrite the X_D3DRS_PS_RESERVED slot with PSDef.PSTextureModes,
// store it here and restore after memcpy, or alternatively, perform two separate memcpy's (the halves before, and after the reserved slot).
@ -8397,7 +8444,7 @@ static void CxbxImpl_SetRenderTarget
// The currenct depth stencil is always replaced by whats passed in here (even a null)
g_pXbox_DepthStencil = pNewZStencil;
g_ZScale = GetZScaleForSurface(g_pXbox_DepthStencil); // TODO : Discern between Xbox and host and do this in UpdateDepthStencilFlags?
g_ZScale = GetZScaleForPixelContainer(g_pXbox_DepthStencil); // TODO : Discern between Xbox and host and do this in UpdateDepthStencilFlags?
pHostDepthStencil = GetHostSurface(g_pXbox_DepthStencil, D3DUSAGE_DEPTHSTENCIL);
HRESULT hRet;

View File

@ -0,0 +1,297 @@
#include "FixedFunctionPixelShader.hlsli"
uniform FixedFunctionPixelShaderState state : register(c0);
sampler samplers[4] : register(s0);
struct PS_INPUT // Declared identical to vertex shader output (see VS_OUTPUT)
{
float2 iPos : VPOS; // Screen space x,y pixel location
float4 iD0 : COLOR0; // Front-facing primary (diffuse) vertex color (clamped to 0..1)
float4 iD1 : COLOR1; // Front-facing secondary (specular) vertex color (clamped to 0..1)
float iFog : FOG;
float iPts : PSIZE;
float4 iB0 : TEXCOORD4; // Back-facing primary (diffuse) vertex color (clamped to 0..1)
float4 iB1 : TEXCOORD5; // Back-facing secondary (specular) vertex color (clamped to 0..1)
float4 iT[4] : TEXCOORD0; // Texture Coord 0
float iFF : VFACE; // Front facing if > 0
};
// These 'D3DTA' texture argument values
// may be used during each texture stage
struct TextureArgs {
float4 CURRENT;
float4 TEXTURE;
float4 DIFFUSE;
float4 SPECULAR;
float4 TEMP;
float4 TFACTOR;
};
static float4 TexCoords[4];
// When creating an instance of the fixed function shader
// we string-replace the assignment below with a value
// The define keeps the shader compilable without the replacement
#define TEXTURE_SAMPLE_TYPE {SAMPLE_2D, SAMPLE_2D, SAMPLE_2D, SAMPLE_2D};
static int TextureSampleType[4] = TEXTURE_SAMPLE_TYPE;
bool HasFlag(float value, float flag) {
// http://theinstructionlimit.com/encoding-boolean-flags-into-a-float-in-hlsl
return fmod(value, flag) >= flag / 2;
}
float4 GetArg(float arg, TextureArgs ctx) {
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dta
bool alphaReplicate = HasFlag(arg, X_D3DTA_ALPHAREPLICATE);
bool complement = HasFlag(arg, X_D3DTA_COMPLEMENT);
arg = arg % 16;
float4 o;
if (arg == X_D3DTA_DIFFUSE)
o = ctx.DIFFUSE;
if (arg == X_D3DTA_CURRENT)
o = ctx.CURRENT;
if (arg == X_D3DTA_TEXTURE)
o = ctx.TEXTURE;
if (arg == X_D3DTA_TFACTOR)
o = ctx.TFACTOR;
if (arg == X_D3DTA_SPECULAR)
o = ctx.SPECULAR;
if (arg == X_D3DTA_TEMP)
o = ctx.TEMP;
if (alphaReplicate)
return o.aaaa;
else if (complement)
return 1 - o;
else
return o;
}
float4 ExecuteTextureOp(float op, float4 arg1, float4 arg2, float4 arg0, TextureArgs ctx, PsTextureStageState stage) {
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtextureop
// Note if we use ifs here instead of else if
// D3DCompile may stackoverflow at runtime
if (op == X_D3DTOP_SELECTARG1)
return arg1;
else if (op == X_D3DTOP_SELECTARG2)
return arg2;
else if (op == X_D3DTOP_MODULATE)
return arg1 * arg2;
else if (op == X_D3DTOP_MODULATE2X)
return 2 * (arg1 * arg2);
else if (op == X_D3DTOP_MODULATE4X)
return 4 * (arg1 * arg2);
else if (op == X_D3DTOP_ADD)
return arg1 + arg2;
else if (op == X_D3DTOP_ADDSIGNED)
return arg1 + arg2 - 0.5;
else if (op == X_D3DTOP_ADDSIGNED2X)
return 2 * (arg1 + arg2 - 0.5);
else if (op == X_D3DTOP_SUBTRACT)
return arg1 - arg2;
else if (op == X_D3DTOP_ADDSMOOTH)
return arg1 + arg2 * (1 - arg1);
else if (op == X_D3DTOP_BLENDDIFFUSEALPHA)
return arg1 * ctx.DIFFUSE.a + arg2 * (1 - ctx.DIFFUSE.a);
else if (op == X_D3DTOP_BLENDCURRENTALPHA)
return arg1 * ctx.CURRENT.a + arg2 * (1 - ctx.CURRENT.a);
else if (op == X_D3DTOP_BLENDTEXTUREALPHA)
return arg1 * ctx.TEXTURE.a + arg2 * (1 - ctx.TEXTURE.a);
else if (op == X_D3DTOP_BLENDFACTORALPHA)
return arg1 * ctx.TFACTOR.a + arg2 * (1 - ctx.TFACTOR.a);
else if (op == X_D3DTOP_BLENDTEXTUREALPHAPM)
return arg1 + arg2 * (1 - ctx.TEXTURE.a);
else if (op == X_D3DTOP_PREMODULATE)
return arg1; // Note this also multiplies the next stage's CURRENT by its texture
else if (op == X_D3DTOP_MODULATEALPHA_ADDCOLOR)
return float4(arg1.rgb + arg1.a * arg2.rgb, 1);
else if (op == X_D3DTOP_MODULATECOLOR_ADDALPHA)
return float4(arg1.rgb * arg2.rgb + arg1.a, 1);
else if (op == X_D3DTOP_MODULATEINVALPHA_ADDCOLOR)
return float4((1 - arg1.a) * arg2.rgb + arg1.rgb, 1);
else if (op == X_D3DTOP_MODULATEINVCOLOR_ADDALPHA)
return float4((1 - arg1.rgb) * arg2.rgb + arg1.a, 1);
else if (op == X_D3DTOP_DOTPRODUCT3)
// Test case: PerPixelLighting
return saturate(dot(
(arg1.rgb - 0.5) * 2,
(arg2.rgb - 0.5) * 2
));
// Note arg0 below is arg1 in D3D docs
// since it becomes the first argument for operations supporting 3 arguments...
else if (op == X_D3DTOP_MULTIPLYADD)
return arg0 + arg1 * arg2;
else if (op == X_D3DTOP_LERP)
return arg0 * arg1 + (1 - arg0) * arg2;
else if (op == X_D3DTOP_BUMPENVMAP)
return float4(
arg1.x * stage.BUMPENVMAT00 + arg1.y * stage.BUMPENVMAT10,
arg1.x * stage.BUMPENVMAT01 + arg1.y * stage.BUMPENVMAT11,
1, 1);
else if (op == X_D3DTOP_BUMPENVMAPLUMINANCE)
return float4(
arg1.x * stage.BUMPENVMAT00 + arg1.y * stage.BUMPENVMAT10,
arg1.x * stage.BUMPENVMAT01 + arg1.y * stage.BUMPENVMAT11,
arg1.z * stage.BUMPENVLSCALE + stage.BUMPENVLOFFSET,
1);
// Something is amiss... we should have returned by now!
// Return a bright colour
return float4(0, 1, 1, 1);
}
TextureArgs ExecuteTextureStage(
int i,
TextureArgs ctx,
PsTextureHardcodedState s,
int previousOp
)
{
// Early exit if this stage is disabled (and therefore all further stages are too)
if (s.COLOROP == X_D3DTOP_DISABLE)
return ctx;
PsTextureStageState stage = state.stages[i];
// Determine the texture for this stage
float3 offset = float3(0, 0, 0);
float4 factor = float4(1, 1, 1, 1);
// Bumpmap special case
if (previousOp == X_D3DTOP_BUMPENVMAP ||
previousOp == X_D3DTOP_BUMPENVMAPLUMINANCE) {
// Assume U, V, L is in CURRENT
// Add U', V', to the texture coordinates
// And multiply by L'
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/bump-mapping-formulas
offset.xy = ctx.CURRENT.xy;
factor.rgb = ctx.CURRENT.z;
}
// Sample the texture
float4 t;
int type = TextureSampleType[i];
if (type == SAMPLE_NONE)
t = 1; // Test case JSRF
else if (type == SAMPLE_2D)
t = tex2D(samplers[i], TexCoords[i].xy + offset.xy);
else if (type == SAMPLE_3D)
t = tex3D(samplers[i], TexCoords[i].xyz + offset.xyz);
else if (type == SAMPLE_CUBE)
t = texCUBE(samplers[i], TexCoords[i].xyz + offset.xyz);
#ifdef ENABLE_FF_ALPHAKILL
if (stage.ALPHAKILL)
if (t.a == 0)
discard;
#endif
// Assign the final value for TEXTURE
ctx.TEXTURE = t * factor;
// Premodulate special case
if (previousOp == X_D3DTOP_PREMODULATE) {
ctx.CURRENT *= ctx.TEXTURE;
}
// Get arguments for the texture operation
// Almost all operate on 2 arguments, Arg1 and Arg2
// Arg0 is a third argument that seems to have been tacked on
// for MULTIPLYADD and LERP
// Colour operation arguments
float4 cArg1 = GetArg(s.COLORARG1, ctx);
float4 cArg2 = GetArg(s.COLORARG2, ctx);
float4 cArg0 = GetArg(s.COLORARG0, ctx);
// Alpha operation arguments
float4 aArg1 = GetArg(s.ALPHAARG1, ctx);
float4 aArg2 = GetArg(s.ALPHAARG2, ctx);
float4 aArg0 = GetArg(s.ALPHAARG0, ctx);
// Execute texture operation
// ALPHAOP == X_D3DTOP_DISABLE is undefined behaviour
// Using an intermediate value matches known cases...
// Test case: DoA:Xtreme (menu water), GTA III (logos), Crash Wrath of Cortex (relics UI)
static float4 value = 1;
value.rgb = ExecuteTextureOp(s.COLOROP, cArg1, cArg2, cArg0, ctx, stage).rgb;
if (s.ALPHAOP != X_D3DTOP_DISABLE) {
value.a = ExecuteTextureOp(s.ALPHAOP, aArg1, aArg2, aArg0, ctx, stage).a;
}
// Save the result
// Note RESULTARG should either be CURRENT or TEMP
// But some titles seem to set it to DIFFUSE
// Use CURRENT for anything other than TEMP
// Test case: DoA 3
if (s.RESULTARG == X_D3DTA_TEMP)
ctx.TEMP = value;
else
ctx.CURRENT = value;
return ctx;
}
float4 main(const PS_INPUT input) : COLOR {
TexCoords = input.iT;
// Each stage is passed and returns
// a set of texture arguments
// And will usually update the CURRENT value
TextureArgs ctx;
// The CURRENT register
// Default to the diffuse value
// TODO determine whether to use the front or back colours
// and set them here
ctx.CURRENT = input.iD0;
ctx.DIFFUSE = input.iD0;
ctx.SPECULAR = input.iD1;
// The TEMP register
// Default to 0
ctx.TEMP = float4(0, 0, 0, 0);
ctx.TFACTOR = state.TextureFactor;
PsTextureHardcodedState stages[4];
stages[0].COLOROP = X_D3DTOP_DISABLE;
stages[1].COLOROP = X_D3DTOP_DISABLE;
stages[2].COLOROP = X_D3DTOP_DISABLE;
stages[3].COLOROP = X_D3DTOP_DISABLE;
// Define stages
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype
// We'll find comment below and insert the definitions after it
// STAGE DEFINITIONS
// END STAGE DEFINITIONS
// Run each stage
int previousOp = -1;
for (int i = 0; i < 4; i++) {
ctx = ExecuteTextureStage(
i,
ctx,
stages[i],
previousOp
);
previousOp = stages[i].COLOROP;
}
// Add fog if enabled
if (state.FogEnable) {
ctx.CURRENT.rgb = lerp(state.FogColor.rgb, ctx.CURRENT.rgb, saturate(input.iFog));
}
// Add specular if enabled
if (state.SpecularEnable) {
ctx.CURRENT.rgb += ctx.SPECULAR.rgb;
}
// Output whatever is in current at the end
return ctx.CURRENT;
}

View File

@ -0,0 +1,143 @@
// C++ / HLSL shared state block for fixed function support
#ifdef __cplusplus
#pragma once
#include <d3d9.h>
#include <d3d9types.h> // for D3DFORMAT, D3DLIGHT9, etc
#include <d3dx9math.h> // for D3DXVECTOR4, etc
#include <array>
#define float4x4 D3DMATRIX
#define float4 D3DXVECTOR4
#define float3 D3DVECTOR
#define float2 D3DXVECTOR2
#define arr(name, type, length) std::array<type, length> name
#else
// HLSL
#define arr(name, type, length) type name[length]
#define alignas(x)
#define const static
#endif // __cplusplus
#ifdef __cplusplus
namespace FixedFunctionPixelShader {
#endif
// From X_D3DTOP
const float X_D3DTOP_DISABLE = 1;
const float X_D3DTOP_SELECTARG1 = 2;
const float X_D3DTOP_SELECTARG2 = 3;
const float X_D3DTOP_MODULATE = 4;
const float X_D3DTOP_MODULATE2X = 5;
const float X_D3DTOP_MODULATE4X = 6;
const float X_D3DTOP_ADD = 7;
const float X_D3DTOP_ADDSIGNED = 8;
const float X_D3DTOP_ADDSIGNED2X = 9;
const float X_D3DTOP_SUBTRACT = 10;
const float X_D3DTOP_ADDSMOOTH = 11;
const float X_D3DTOP_BLENDDIFFUSEALPHA = 12;
const float X_D3DTOP_BLENDCURRENTALPHA = 13;
const float X_D3DTOP_BLENDTEXTUREALPHA = 14;
const float X_D3DTOP_BLENDFACTORALPHA = 15;
const float X_D3DTOP_BLENDTEXTUREALPHAPM = 16;
const float X_D3DTOP_PREMODULATE = 17;
const float X_D3DTOP_MODULATEALPHA_ADDCOLOR = 18;
const float X_D3DTOP_MODULATECOLOR_ADDALPHA = 19;
const float X_D3DTOP_MODULATEINVALPHA_ADDCOLOR = 20;
const float X_D3DTOP_MODULATEINVCOLOR_ADDALPHA = 21;
const float X_D3DTOP_DOTPRODUCT3 = 22;
const float X_D3DTOP_MULTIPLYADD = 23;
const float X_D3DTOP_LERP = 24;
const float X_D3DTOP_BUMPENVMAP = 25;
const float X_D3DTOP_BUMPENVMAPLUMINANCE = 26;
// D3DTA taken from D3D9 - we don't have Xbox definitions
// for these so I guess they're the same?
const float X_D3DTA_DIFFUSE = 0x00000000; // select diffuse color (read only)
const float X_D3DTA_CURRENT = 0x00000001; // select stage destination register (read/write)
const float X_D3DTA_TEXTURE = 0x00000002; // select texture color (read only)
const float X_D3DTA_TFACTOR = 0x00000003; // select D3DRS_TEXTUREFACTOR (read only)
const float X_D3DTA_SPECULAR = 0x00000004; // select specular color (read only)
const float X_D3DTA_TEMP = 0x00000005; // select temporary register color (read/write)
const float X_D3DTA_CONSTANT = 0x00000006; // select texture stage constant
const float X_D3DTA_COMPLEMENT = 0x00000010; // take 1.0 - x (read modifier)
const float X_D3DTA_ALPHAREPLICATE = 0x00000020; // replicate alpha to color components (read modifier)
const int SAMPLE_NONE = 0;
const int SAMPLE_2D = 1;
const int SAMPLE_3D = 2;
const int SAMPLE_CUBE = 3;
// This state is passed to the shader
struct PsTextureStageState {
// Values correspond to XD3D8 version of D3DTEXTURESTAGESTATETYPE
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype
/* Samplers for now are configured elsewhere already
constexpr DWORD X_D3DTSS_ADDRESSU = 0;
constexpr DWORD X_D3DTSS_ADDRESSV = 1;
constexpr DWORD X_D3DTSS_ADDRESSW = 2;
constexpr DWORD X_D3DTSS_MAGFILTER = 3;
constexpr DWORD X_D3DTSS_MINFILTER = 4;
constexpr DWORD X_D3DTSS_MIPFILTER = 5;
constexpr DWORD X_D3DTSS_MIPMAPLODBIAS = 6;
constexpr DWORD X_D3DTSS_MAXMIPLEVEL = 7;
constexpr DWORD X_D3DTSS_MAXANISOTROPY = 8;
*/
alignas(16) float COLORKEYOP; // Unimplemented Xbox extension!
alignas(16) float COLORSIGN; // Unimplemented Xbox extension!
#ifdef ENABLE_FF_ALPHAKILL
alignas(16) float ALPHAKILL; // Xbox extension!
#else
alignas(16) float ALPHAKILL; // Unimplemented Xbox extension!
#endif
// TEXTURETRANSFORMFLAGS handled by the VS
alignas(16) float BUMPENVMAT00;
alignas(16) float BUMPENVMAT01;
alignas(16) float BUMPENVMAT11;
alignas(16) float BUMPENVMAT10;
alignas(16) float BUMPENVLSCALE;
alignas(16) float BUMPENVLOFFSET;
// TEXCOORDINDEX handled by the VS
// BORDERCOLOR set on sampler
alignas(16) float COLORKEYCOLOR; // Unimplemented Xbox extension!
};
// This state is compiled into the shader
// Values correspond to XD3D8 version of D3DTEXTURESTAGESTATETYPE
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype
struct PsTextureHardcodedState {
alignas(16) float COLOROP;
alignas(16) float COLORARG0;
alignas(16) float COLORARG1;
alignas(16) float COLORARG2;
alignas(16) float ALPHAOP;
alignas(16) float ALPHAARG0;
alignas(16) float ALPHAARG1;
alignas(16) float ALPHAARG2;
alignas(16) float RESULTARG;
};
struct FixedFunctionPixelShaderState {
alignas(16) arr(stages, PsTextureStageState, 4);
alignas(16) float4 TextureFactor;
alignas(16) float SpecularEnable;
alignas(16) float FogEnable;
alignas(16) float3 FogColor;
};
#ifdef __cplusplus
} // FixedFunctionPixelShader namespace
#endif
#ifdef __cplusplus
#undef float4x4
#undef float4
#undef float3
#undef float2
#undef arr
#else // HLSL
#undef arr
#undef alignas
#undef const
#endif // __cplusplus

View File

@ -19,10 +19,13 @@ struct VS_INPUT
#else
float4 pos : POSITION;
float4 bw : BLENDWEIGHT;
float4 color[2] : COLOR;
float4 normal : NORMAL;
float4 color[2] : COLOR;
float1 fogCoord : FOG;
float1 pointSize : PSIZE;
float4 backColor[2] : TEXCOORD4;
float4 normal : NORMAL;
float4 texcoord[4] : TEXCOORD;
float4 reserved[3] : TEXCOORD6;
#endif
};
@ -59,12 +62,17 @@ float4 Get(const VS_INPUT xIn, const uint index)
if(index == normal) return xIn.normal;
if(index == diffuse) return xIn.color[0];
if(index == specular) return xIn.color[1];
if(index == fogCoord) return xIn.fogCoord;
if(index == pointSize) return xIn.pointSize;
if(index == backDiffuse) return xIn.backColor[0];
if(index == backSpecular) return xIn.backColor[1];
if(index == texcoord0) return xIn.texcoord[0];
if(index == texcoord1) return xIn.texcoord[1];
if(index == texcoord2) return xIn.texcoord[2];
if(index == texcoord3) return xIn.texcoord[3];
if(index == reserved0) return xIn.reserved[0];
if(index == reserved1) return xIn.reserved[1];
if(index == reserved2) return xIn.reserved[2];
return 1;
#endif
}
@ -265,11 +273,11 @@ Material DoMaterial(const uint index, const uint diffuseReg, const uint specular
float DoFog(const VS_INPUT xIn)
{
// TODO implement properly
// Until we have pixel shader HLSL we are still leaning on D3D renderstates for fogging
// So we are not doing any fog density calculations here
if (!state.Fog.Enable)
return 1; // No fog!
// http://developer.download.nvidia.com/assets/gamedev/docs/Fog2.pdf
// Obtain the fog depth value 'd'
float fogDepth;
if (state.Fog.DepthMode == FixedFunctionVertexShader::FOG_DEPTH_NONE)
@ -281,7 +289,19 @@ float DoFog(const VS_INPUT xIn)
if (state.Fog.DepthMode == FixedFunctionVertexShader::FOG_DEPTH_W)
fogDepth = Projection.Position.w;
return fogDepth;
// Calculate the fog factor
// Some of this might be better done in the pixel shader?
float fogFactor;
if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_NONE)
fogFactor = fogDepth;
if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_EXP)
fogFactor = 1 / exp(fogDepth * state.Fog.Density); // 1 / e^(d * density)
if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_EXP2)
fogFactor = 1 / exp(pow(fogDepth * state.Fog.Density, 2)); // 1 / e^((d * density)^2)
if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_LINEAR)
fogFactor = (state.Fog.End - fogDepth) / (state.Fog.End - state.Fog.Start); // (end - d) / (end - start)
return fogFactor;
}
float4 DoTexCoord(const uint stage, const VS_INPUT xIn)
@ -389,12 +409,13 @@ float4 DoTexCoord(const uint stage, const VS_INPUT xIn)
float DoPointSpriteSize()
{
const PointSprite ps = state.PointSprite;
float pointSize = ps.PointSize;
float A = ps.ScaleABC.x;
float B = ps.ScaleABC.y;
float C = ps.ScaleABC.z;
// Note : if (ps.PointScaleEnable) not required because when disabled, CPU sets RenderTargetHeight and ScaleA to 1, and ScaleB and ScaleC to 0
float pointSize = ps.PointSize;
const float A = ps.PointScaleABC.x;
const float B = ps.PointScaleABC.y;
const float C = ps.PointScaleABC.z;
// Note : if (ps.PointScaleEnable) not required because when disabled, CPU sets RenderTargetHeight and PointScale _A to 1, and _B and _C to 0
{
const float eyeDistance = length(View.Position);
const float factor = A + (B * eyeDistance) + (C * (eyeDistance * eyeDistance));
@ -402,7 +423,7 @@ float DoPointSpriteSize()
pointSize *= ps.XboxRenderTargetHeight * sqrt(1 / factor);
}
return clamp(pointSize, ps.PointSizeMin, ps.PointSizeMax) * ps.RenderUpscaleFactor;
return clamp(pointSize, ps.PointSize_Min, ps.PointSize_Max) * ps.RenderUpscaleFactor;
}
VS_INPUT InitializeInputRegisters(const VS_INPUT xInput)
@ -423,16 +444,21 @@ VS_INPUT InitializeInputRegisters(const VS_INPUT xInput)
if(i == normal) xIn.normal = value;
if(i == diffuse) xIn.color[0] = value;
if(i == specular) xIn.color[1] = value;
if(i == fogCoord) xIn.fogCoord = value.x; // Note : Untested
if(i == pointSize) xIn.pointSize = value.x; // Note : Untested
if(i == backDiffuse) xIn.backColor[0] = value;
if(i == backSpecular) xIn.backColor[1] = value;
if(i == texcoord0) xIn.texcoord[0] = value;
if(i == texcoord1) xIn.texcoord[1] = value;
if(i == texcoord2) xIn.texcoord[2] = value;
if(i == texcoord3) xIn.texcoord[3] = value;
if(i == reserved0) xIn.reserved[0] = value; // Note : Untested
if(i == reserved1) xIn.reserved[1] = value; // Note : Untested
if(i == reserved2) xIn.reserved[2] = value; // Note : Untested
#endif
}
return xIn;
return xIn; // Note : Untested setters are required to avoid "variable 'xIn' used without having been completely initialized" here
}
VS_OUTPUT main(const VS_INPUT xInput)

View File

@ -29,6 +29,12 @@ namespace FixedFunctionVertexShader {
const float FOG_DEPTH_W = 2;
// Fog depth is based distance of the vertex from the eye position
const float FOG_DEPTH_RANGE = 3;
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/fog-formulas
const float FOG_TABLE_NONE = 0;
const float FOG_TABLE_EXP = 1;
const float FOG_TABLE_EXP2 = 2;
const float FOG_TABLE_LINEAR = 3;
}
// Shared HLSL structures
@ -105,11 +111,11 @@ struct Modes {
struct PointSprite {
alignas(16) float PointSize;
alignas(16) float PointSizeMin;
alignas(16) float PointSizeMax;
alignas(16) float PointSize_Min;
alignas(16) float PointSize_Max;
// alignas(16) float PointScaleEnable;
alignas(16) float XboxRenderTargetHeight;
alignas(16) float3 ScaleABC;
alignas(16) float3 PointScaleABC;
alignas(16) float RenderUpscaleFactor;
};
@ -121,7 +127,12 @@ struct TextureState {
};
struct Fog {
alignas(16) float Enable;
alignas(16) float DepthMode;
alignas(16) float TableMode;
alignas(16) float Density; // EXP fog density
alignas(16) float Start; // LINEAR fog start
alignas(16) float End; // LINEAR fog end
};
// Vertex lighting

View File

@ -0,0 +1,408 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check it.
// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
// ******************************************************************
// *
// * This file is part of the Cxbx project.
// *
// * Cxbx and Cxbe are free software; you can redistribute them
// * and/or modify them under the terms of the GNU General Public
// * License as published by the Free Software Foundation; either
// * version 2 of the license, or (at your option) any later version.
// *
// * This program is distributed in the hope that it will be useful,
// * but WITHOUT ANY WARRANTY; without even the implied warranty of
// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// * GNU General Public License for more details.
// *
// * You should have recieved a copy of the GNU General Public License
// * along with this program; see the file COPYING.
// * If not, write to the Free Software Foundation, Inc.,
// * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA.
// *
// * 2020 PatrickvL
// *
// * All rights reserved
// *
// ******************************************************************
#define LOG_PREFIX CXBXR_MODULE::PXSH
#include <sstream> // std::stringstream
#include "Shader.h" // EmuCompileShader
#include "PixelShader.h" // EmuCompilePixelShader
//#include "core\kernel\init\CxbxKrnl.h"
//#include "core\kernel\support\Emu.h"
extern const char* g_ps_model = ps_model_3_0;
// HLSL pixel shader generation
static const std::string register_str[16+2] = {
"_discard", // PS_REGISTER_DISCARD = 0x00L, // w
"C0", // PS_REGISTER_C0 = 0x01L, // r
"C1", // PS_REGISTER_C1 = 0x02L, // r
"fog", // PS_REGISTER_FOG = 0x03L, // r
"v0", // PS_REGISTER_V0 = 0x04L, // r/w
"v1", // PS_REGISTER_V1 = 0x05L, // r/w
"?r6?",
"?r7?",
"t0", // PS_REGISTER_T0 = 0x08L, // r/w
"t1", // PS_REGISTER_T1 = 0x09L, // r/w
"t2", // PS_REGISTER_T2 = 0x0aL, // r/w
"t3", // PS_REGISTER_T3 = 0x0bL, // r/w
"r0", // PS_REGISTER_R0 = 0x0cL, // r/w
"r1", // PS_REGISTER_R1 = 0x0dL, // r/w
"sum", // PS_REGISTER_V1R0_SUM = 0x0eL, // r
"prod", // PS_REGISTER_EF_PROD = 0x0fL, // r
// Cxbx extension; Separate final combiner constant registers :
"FC0", // PS_REGISTER_FC0 = 0x10L, // r
"FC1", // PS_REGISTER_FC1 = 0x11L, // r
};
static const unsigned channel_index_Alpha = 0;
static const unsigned channel_index_RGB = 1;
static const unsigned channel_index_BlueToAlpha = 2; // Note : RGB pipeline (sometimes referred to as "portion") can (besides reading .rgb) expand blue to alpha as well
void InputRegisterHLSL(std::stringstream& hlsl, RPSInputRegister &input, unsigned channel_index, bool isLast = false, int isFinalCombiner = 0)
{
static const std::string pipeline_channel_str[3][2] = {
".b", ".a", // [0][*] dest Alpha : [0] = PS_CHANNEL_BLUE, [1] = PS_CHANNEL_ALPHA >> 4
".rgb", ".aaa", // [1][*] dest RGB : [0] = PS_CHANNEL_RGB, [1] = PS_CHANNEL_ALPHA >> 4
".rgbb", ".aaaa", // [2][*] dest RGB+BlueToAlpha : [0] = PS_CHANNEL_RGB, [1] = PS_CHANNEL_ALPHA >> 4 (test-case : TechCertGame) TODO : Verify .aaaa is indeed unreachable (BlueToAlpha being forbidden for Alpha channel
};
static const std::string input_mapping_str[8][3] = {
// [*][0] = PS_REGISTER_ZERO-derived constants, based on enum PS_INPUTMAPPING :
// [*][1] = Source register modifier macro's, based on enum PS_INPUTMAPPING :
// [*][2] = Final combiner source register modifier macro's, based on enum PS_INPUTMAPPING :
"zero", "s_sat", "s_sat", // saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, OK for final combiner // Clamps negative x to 0
"one", "s_comp", "s_comp", // ( 1.0 - saturate(x) ) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, OK for final combiner // Complements x (1-x)
"-one", "s_bx2", "N/A", // ( 2.0 * max(0.0, x) - 1.0) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, invalid for final combiner // Shifts range from [0..1] to [-1..1]
"one", "s_negbx2", "N/A", // (-2.0 * max(0.0, x) + 1.0) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates
"-half", "s_bias", "N/A", // (max(0.0, x) - 0.5) // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5
"half", "s_negbias", "N/A", // (-max(0.0, x) + 0.5) // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates
"zero", "s_ident", "N/A", // x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, invalid for final combiner // No modifier, x is passed without alteration
"zero", "s_neg", "N/A" // -x // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, invalid for final combiner // Negate
};
// Generate channel selector
std::string channel_str = pipeline_channel_str[channel_index][input.Channel >> 4];
unsigned input_mapping_index = (input.InputMapping >> 5) & 0x07; // Converts PS_INPUTMAPPING to an index into input_mapping_str
if (input.Reg == PS_REGISTER_ZERO) { // = PS_REGISTER_DISCARD
// Generate a constant per input mapping (instead of applying that on register_str[PS_REGISTER_DISCARD])
hlsl << input_mapping_str[input_mapping_index][0] << channel_str;
}
else {
// Or an actual register (with an input mapping function applied)
switch (input_mapping_index) {
case PS_INPUTMAPPING_SIGNED_IDENTITY >> 5:
// Note : signed identity doesn't alter the argument, so avoid cluttering the output by leaving it out
hlsl << register_str[input.Reg] << channel_str;
break;
case PS_INPUTMAPPING_SIGNED_NEGATE >> 5:
// Note : signed negate can be written in short-hand using a minus sign
hlsl << '-' << register_str[input.Reg] << channel_str;
break;
default:
hlsl << input_mapping_str[input_mapping_index][1 + isFinalCombiner] << '(' << register_str[input.Reg] << channel_str << ')';
break;
}
}
if (!isLast)
hlsl << ',';
}
static const std::string opcode_comment[6][2] = {
"xdd", "d0=s0 dot s1, d1=s2 dot s3", // dot/dot/discard > calculating AB=A.B and CD=C.D
"xdm", "d0=s0 dot s1, d1=s2*s3", // dot/mul/discard > calculating AB=A.B and CD=C*D
"xmd", "d0=s0*s1, d1=s2 dot s3", // mul/dot/discard > calculating AB=A*B and CD=C.D
"xmma", "d0=s0*s1, d1=s2*s3, d2={s2*s3}+{s0*s1}", // mul/mul/sum > calculating AB=A*B and CD=C*D and Sum=CD+AB
"xmmc", "d0=s0*s1, d1=s2*s3, d2={r0.a>0.5}?{s2*s3}:{s0*s1}", // mul/mul/mux > calculating AB=A*B and CD=C*D and Mux=CD?AB
"xfc", "sum=r0+v1, prod=s4*s5, r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.b"
};
void CombinerStageHlsl(std::stringstream& hlsl, RPSCombinerStageChannel& stage, unsigned channel_index)
{
// Determine which opcode we're dealing with (xdd, xdm, xmma or xmma)
// Note : DotProduct can only be set for RGB (see RPSCombinerStageChannel::Decode)
unsigned opcode;
if (stage.OutputAB.DotProduct) {
if (stage.OutputCD.DotProduct)
opcode = 0; // xdd
else
opcode = 1; // xdm
} else {
if (stage.OutputCD.DotProduct)
opcode = 2; // xmd TODO : Verify
else
if (!stage.AB_CD_MUX)
opcode = 3; // xmma
else
opcode = 4; // xmmc
}
// Early exit when all outputs are discarded
if ((stage.OutputAB.Reg == PS_REGISTER_DISCARD) && (stage.OutputCD.Reg == PS_REGISTER_DISCARD)) {
// xdd, xdm and xmd have just 2 outputs, but xmma and xmmc must also check their 3rd output
if ((opcode <= 2) || (stage.OutputMUX_SUM.Reg == PS_REGISTER_DISCARD)) {
hlsl << "// discarded";
return;
}
}
// Determine output channels (only channel_index_RGB can increase to channel_index_BlueToAlpha) :
static const std::string dst_channels[3] = { ".a", ".rgb", ".rgba" };
unsigned AB_channel_index = channel_index + stage.OutputAB.BlueToAlpha;
unsigned CD_channel_index = channel_index + stage.OutputCD.BlueToAlpha;
// Generate 2 (or 3 for xmma/xmmc) output arguments
// Note : BlueToAlpha can only be set for RGB (see RPSCombinerStageChannel::Decode)
std::stringstream arguments;
arguments << register_str[stage.OutputAB.Reg] << dst_channels[AB_channel_index];
arguments << ',' << register_str[stage.OutputCD.Reg] << dst_channels[CD_channel_index];
// xmma and xmmc have a 3rd output (which doesn't support the BlueToAlpha flag)
if (opcode >= 3) {
// TODO : Figure out how to support BlueToAlpha source to MUX_SUM.rgb scenario
// If the xmma_m and xmmc_m macro's can't handle this, we may need to drop
// those macro's, and generate the HLSL here (alas, as we try to avoid that).
arguments << ',' << register_str[stage.OutputMUX_SUM.Reg] << dst_channels[channel_index];
}
// Insert a visual separation between the output arguments, and the 4 input arguments
arguments << ", ";
// Generate 4 input arguments
InputRegisterHLSL(arguments, stage.OutputAB.Input[0], AB_channel_index);
InputRegisterHLSL(arguments, stage.OutputAB.Input[1], AB_channel_index);
InputRegisterHLSL(arguments, stage.OutputCD.Input[0], CD_channel_index);
InputRegisterHLSL(arguments, stage.OutputCD.Input[1], CD_channel_index);
// Generate combiner output modifier
static const std::string output_modifier_str[8] = {
"d_ident", // y = x // PS_COMBINEROUTPUT_OUTPUTMAPPING_IDENTITY= 0x00L
"d_bias", // y = (x - 0.5) // PS_COMBINEROUTPUT_OUTPUTMAPPING_BIAS= 0x08L // Subtracts 0.5 from outputs
"d_x2", // y = x * 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1= 0x10L // Scales outputs by 2
"d_bx2", // y = (x - 0.5) * 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1_BIAS= 0x18L // Subtracts 0.5 from outputs and scales by 2
"d_x4", // y = x * 4 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2= 0x20L // Scales outputs by 4
"d_bx4", // y = (x - 0.5) * 4 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS= 0x28L // Subtracts 0.5 from outputs and scales by 4
"d_d2", // y = x / 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1= 0x30L // Divides outputs by 2
"d_bd2" // y = (x - 0.5) / 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS= 0x38L // Subtracts 0.5 from outputs and divides by 2
};
std::string output_modifier = output_modifier_str[stage.CombinerOutputMapping >> 3];
// Concatenate it all together into an opcode 'call' (which resolves into macro expressions)
hlsl << opcode_comment[opcode][0] << '(' << arguments.str() << ' ' << output_modifier;
// xmma and xmmc require a temporary register with channel designation
if (opcode >= 3)
hlsl << ",tmp" << dst_channels[AB_channel_index]; // TODO :
hlsl << "); // " << opcode_comment[opcode][1];
}
void FinalCombinerStageHlsl(std::stringstream& hlsl, RPSFinalCombiner& fc, bool hasFinalCombiner)
{
std::stringstream arguments;
if (hasFinalCombiner) {
hlsl << "\n // Final combiner xfc was defined in the shader";
}
else {
hlsl << "\n // Final combiner xfc generated from XD3D8 renderstates";
}
for (unsigned i = 0; i < 7; i++) { // Generate A, B, C, D, E, F, G input arguments
// Note : Most final combiner inputs are treated as RGB, but G is single-channel (.a or .b)
bool isLast = (i == 6);
unsigned channel_index = isLast ? channel_index_Alpha : channel_index_RGB;
InputRegisterHLSL(arguments, fc.Input[i], channel_index, isLast, /*isFinalCombiner=*/1);
}
// Concatenate it all together into the xfc opcode 'call' (which resolves into macro expressions)
// Note : The xfc opcode macro does not have an output modifier argument
hlsl << "\n " << opcode_comment[5][0] << "(" << arguments.str() << "); // " << opcode_comment[5][1];
}
void OutputDefineFlag(std::stringstream& hlsl, bool enabled, std::string_view define_enabled, std::string_view define_disabled = "")
{
if (define_disabled.length() > 0) {
if (enabled) {
hlsl << "\n#define " << define_enabled << " // not " << define_disabled;
}
else {
hlsl << "\n#define " << define_disabled << " // not " << define_enabled;
}
}
else {
if (enabled) {
hlsl << "\n#define " << define_enabled;
}
else {
hlsl << "\n#undef " << define_enabled;
}
}
}
/* Disabled, until BumpDemo is fixed (which with this code, inadvertedly skips stage 1 and 2 dotproducts) :
bool IsTextureSampled(DecodedRegisterCombiner* pShader, int reg)
{
// TODO : Instead searching like this afterwards, simply set a boolean for each texture-read detected during decoding
// TODO : Extend detection if textures can also be used indirectly thru PSInputTexture (without mention in actual combiner stages)
for (unsigned i = 0; i < pShader->NumberOfCombiners; i++) {
// Is an output calculated, and does any of the inputs read from the given (texture) register?
if (pShader->Combiners[i].RGB.OutputAB.Reg != PS_REGISTER_DISCARD) {
if (pShader->Combiners[i].RGB.OutputAB.Input[0].Reg == reg) return true;
if (pShader->Combiners[i].RGB.OutputAB.Input[1].Reg == reg) return true;
}
if (pShader->Combiners[i].RGB.OutputCD.Reg != PS_REGISTER_DISCARD) {
if (pShader->Combiners[i].RGB.OutputCD.Input[0].Reg == reg) return true;
if (pShader->Combiners[i].RGB.OutputCD.Input[1].Reg == reg) return true;
}
if (pShader->Combiners[i].Alpha.OutputAB.Reg != PS_REGISTER_DISCARD) {
if (pShader->Combiners[i].Alpha.OutputAB.Input[0].Reg == reg) return true;
if (pShader->Combiners[i].Alpha.OutputAB.Input[1].Reg == reg) return true;
}
if (pShader->Combiners[i].Alpha.OutputCD.Reg != PS_REGISTER_DISCARD) {
if (pShader->Combiners[i].Alpha.OutputCD.Input[0].Reg == reg) return true;
if (pShader->Combiners[i].Alpha.OutputCD.Input[1].Reg == reg) return true;
}
// Is the given register writen to? Then no sampling took place
if (pShader->Combiners[i].RGB.OutputAB.Reg == reg) return false;
if (pShader->Combiners[i].RGB.OutputCD.Reg == reg) return false;
if (pShader->Combiners[i].RGB.OutputMUX_SUM.Reg == reg) return false;
if (pShader->Combiners[i].Alpha.OutputAB.Reg == reg) return false;
if (pShader->Combiners[i].Alpha.OutputCD.Reg == reg) return false;
if (pShader->Combiners[i].Alpha.OutputMUX_SUM.Reg == reg) return false;
}
if (pShader->hasFinalCombiner) {
for (unsigned i = 0; i < 7; i++) {
if (pShader->FinalCombiner.Input[i].Reg == reg) return true;
}
}
return false;
} */
void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl)
{
// Include HLSL header and footer as raw strings :
static const std::string hlsl_template[4] = {
#include "core\hle\D3D8\Direct3D9\CxbxPixelShaderTemplate.hlsl"
};
hlsl << hlsl_template[0]; // Start with the HLSL template header
hlsl << "\n#define ALPHAKILL {"
<< (pShader->AlphaKill[0] ? "true, " : "false, ")
<< (pShader->AlphaKill[1] ? "true, " : "false, ")
<< (pShader->AlphaKill[2] ? "true, " : "false, ")
<< (pShader->AlphaKill[3] ? "true}" : "false}");
hlsl << "\n#define PS_COMBINERCOUNT " << pShader->NumberOfCombiners;
if (pShader->NumberOfCombiners > 0) {
OutputDefineFlag(hlsl, pShader->CombinerHasUniqueC0, "PS_COMBINERCOUNT_UNIQUE_C0", "PS_COMBINERCOUNT_SAME_C0");
OutputDefineFlag(hlsl, pShader->CombinerHasUniqueC1, "PS_COMBINERCOUNT_UNIQUE_C1", "PS_COMBINERCOUNT_SAME_C1");
OutputDefineFlag(hlsl, pShader->CombinerMuxesOnMsb, "PS_COMBINERCOUNT_MUX_MSB", "PS_COMBINERCOUNT_MUX_LSB");
}
for (unsigned i = 0; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) {
hlsl << "\n#define PS_COMPAREMODE_" << i << "(in)"
<< (pShader->PSCompareMode[i][0] ? " CM_GE(in.x)" : " CM_LT(in.x)") // PS_COMPAREMODE_S_[GE|LT]
<< (pShader->PSCompareMode[i][1] ? " CM_GE(in.y)" : " CM_LT(in.y)") // PS_COMPAREMODE_T_[GE|LT]
<< (pShader->PSCompareMode[i][2] ? " CM_GE(in.z)" : " CM_LT(in.z)") // PS_COMPAREMODE_R_[GE|LT]
<< (pShader->PSCompareMode[i][3] ? " CM_GE(in.w)" : " CM_LT(in.w)");// PS_COMPAREMODE_Q_[GE|LT]
}
hlsl << "\nstatic const int PS_INPUTTEXTURE_[4] = { -1, "
<< pShader->PSInputTexture[1] << ", "
<< pShader->PSInputTexture[2] << ", "
<< pShader->PSInputTexture[3] << " };";
// Generate #defines required by CxbxPixelShaderTemplate.hlsl :
for (unsigned i = 1; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) {
static const std::string dotmapping_str[8] = {
"PS_DOTMAPPING_ZERO_TO_ONE", // = 0x00L, // - * * *
"PS_DOTMAPPING_MINUS1_TO_1_D3D", // = 0x01L, // - * * *
"PS_DOTMAPPING_MINUS1_TO_1_GL", // = 0x02L, // - * * *
"PS_DOTMAPPING_MINUS1_TO_1", // = 0x03L, // - * * *
"PS_DOTMAPPING_HILO_1", // = 0x04L, // - * * *
"PS_DOTMAPPING_HILO_HEMISPHERE_D3D", // = 0x05L, // - * * *
"PS_DOTMAPPING_HILO_HEMISPHERE_GL", // = 0x06L, // - * * *
"PS_DOTMAPPING_HILO_HEMISPHERE" // = 0x07L, // - * * *
};
hlsl << "\n#define PS_DOTMAPPING_" << i << " " << dotmapping_str[(unsigned)pShader->PSDotMapping[i]];
}
OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementV1, "PS_FINALCOMBINERSETTING_COMPLEMENT_V1");
OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementR0, "PS_FINALCOMBINERSETTING_COMPLEMENT_R0");
OutputDefineFlag(hlsl, pShader->FinalCombiner.ClampSum, "PS_FINALCOMBINERSETTING_CLAMP_SUM");
hlsl << hlsl_template[1];
hlsl << hlsl_template[2];
// Generate all four texture stages
for (unsigned i = 0; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) {
static const std::string texturemode_str[19] = {
"PS_TEXTUREMODES_NONE", // = 0x00L, // * * * *
"PS_TEXTUREMODES_PROJECT2D", // = 0x01L, // * * * *
"PS_TEXTUREMODES_PROJECT3D", // = 0x02L, // * * * *
"PS_TEXTUREMODES_CUBEMAP", // = 0x03L, // * * * *
"PS_TEXTUREMODES_PASSTHRU", // = 0x04L, // * * * *
"PS_TEXTUREMODES_CLIPPLANE", // = 0x05L, // * * * *
"PS_TEXTUREMODES_BUMPENVMAP", // = 0x06L, // - * * *
"PS_TEXTUREMODES_BUMPENVMAP_LUM", // = 0x07L, // - * * *
"PS_TEXTUREMODES_BRDF", // = 0x08L, // - - * *
"PS_TEXTUREMODES_DOT_ST", // = 0x09L, // - - * *
"PS_TEXTUREMODES_DOT_ZW", // = 0x0aL, // - - * *
"PS_TEXTUREMODES_DOT_RFLCT_DIFF", // = 0x0bL, // - - * -
"PS_TEXTUREMODES_DOT_RFLCT_SPEC", // = 0x0cL, // - - - *
"PS_TEXTUREMODES_DOT_STR_3D", // = 0x0dL, // - - - *
"PS_TEXTUREMODES_DOT_STR_CUBE", // = 0x0eL, // - - - *
"PS_TEXTUREMODES_DPNDNT_AR", // = 0x0fL, // - * * *
"PS_TEXTUREMODES_DPNDNT_GB", // = 0x10L, // - * * *
"PS_TEXTUREMODES_DOTPRODUCT", // = 0x11L, // - * * -
"PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST", // = 0x12L, // - - - *
};
/* Disabled, until BumpDemo is fixed (which with this code, inadvertedly skips stage 1 and 2 dotproducts) :
// Skip stages never read, to avoid compilation overhead
if (!IsTextureSampled(pShader, PS_REGISTER_T0 + i))
continue; */
hlsl << "\n " << texturemode_str[pShader->PSTextureModes[i]] << "(" << i << ");";
// On Xbox, r0.a is initialized to t0.a (and r1.a to t1.a ?) :
if (i == 0) hlsl << " r0.a = t0.a;";
if (i == 1) hlsl << " r1.a = t1.a;";
}
// Generate all combiners (rgb and alpha)
for (unsigned i = 0; i < pShader->NumberOfCombiners; i++) {
hlsl << "\n stage = " << i << "; ";
CombinerStageHlsl(hlsl, pShader->Combiners[i].RGB, channel_index_RGB);
hlsl << "\n /* + */ ";
CombinerStageHlsl(hlsl, pShader->Combiners[i].Alpha, channel_index_Alpha);
}
FinalCombinerStageHlsl(hlsl, pShader->FinalCombiner, pShader->hasFinalCombiner);
hlsl << hlsl_template[3]; // Finish with the HLSL template footer
}
// recompile xbox pixel shader function
extern HRESULT EmuCompilePixelShader
(
DecodedRegisterCombiner* pIntermediateShader,
ID3DBlob** ppHostShader
)
{
auto hlsl_stream = std::stringstream();
BuildShader(pIntermediateShader, hlsl_stream);
std::string hlsl_str = hlsl_stream.str();
return EmuCompileShader(hlsl_str, g_ps_model, ppHostShader);
}

View File

@ -0,0 +1,14 @@
#pragma once
#include "Shader.h" // ID3DBlob (via d3dcompiler.h > d3d11shader.h > d3dcommon.h)
#include "core\hle\D3D8\XbPixelShader.h" // DecodedRegisterCombiner
static const char* ps_model_2_a = "ps_2_a";
static const char* ps_model_3_0 = "ps_3_0";
extern const char* g_ps_model;
extern HRESULT EmuCompilePixelShader
(
DecodedRegisterCombiner* pIntermediateShader,
ID3DBlob** ppHostShader
);

View File

@ -186,6 +186,16 @@ uint32_t XboxRenderStateConverter::GetXboxRenderState(uint32_t State)
return D3D__RenderState[XboxRenderStateOffsets[State]];
}
float XboxRenderStateConverter::GetXboxRenderStateAsFloat(uint32_t State)
{
if (!XboxRenderStateExists(State)) {
EmuLog(LOG_LEVEL::WARNING, "Attempt to read a Renderstate (%s) that does not exist in the current D3D8 XDK Version (%d)", GetDxbxRenderStateInfo(State).S, g_LibVersion_D3D8);
return 0;
}
return *reinterpret_cast<float*>(&(D3D__RenderState[XboxRenderStateOffsets[State]]));
}
void XboxRenderStateConverter::StoreInitialValues()
{
for (unsigned int RenderState = xbox::X_D3DRS_FIRST; RenderState <= xbox::X_D3DRS_LAST; RenderState++) {

View File

@ -46,6 +46,7 @@ public:
void SetWireFrameMode(int mode);
void SetDirty();
uint32_t GetXboxRenderState(uint32_t State);
float GetXboxRenderStateAsFloat(uint32_t State);
private:
void VerifyAndFixDeferredRenderStateOffset();
void DeriveRenderStateOffsetFromDeferredRenderStateOffset();

View File

@ -0,0 +1,142 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check it.
// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
// ******************************************************************
// *
// * This file is part of the Cxbx project.
// *
// * Cxbx and Cxbe are free software; you can redistribute them
// * and/or modify them under the terms of the GNU General Public
// * License as published by the Free Software Foundation; either
// * version 2 of the license, or (at your option) any later version.
// *
// * This program is distributed in the hope that it will be useful,
// * but WITHOUT ANY WARRANTY; without even the implied warranty of
// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// * GNU General Public License for more details.
// *
// * You should have recieved a copy of the GNU General Public License
// * along with this program; see the file COPYING.
// * If not, write to the Free Software Foundation, Inc.,
// * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA.
// *
// * 2020 PatrickvL
// *
// * All rights reserved
// *
// ******************************************************************
#define LOG_PREFIX CXBXR_MODULE::VTXSH // TODO : Introduce generic HLSL logging
#include <d3dcompiler.h>
#include "Shader.h"
#include "core\kernel\init\CxbxKrnl.h" // LOG_TEST_CASE
#include "core\kernel\support\Emu.h" // EmuLog
//#include <sstream>
std::string DebugPrependLineNumbers(std::string shaderString) {
std::stringstream shader(shaderString);
auto debugShader = std::stringstream();
int i = 1;
for (std::string line; std::getline(shader, line); ) {
auto lineNumber = std::to_string(i++);
auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' ');
debugShader << "/* " << paddedLineNumber << " */ " << line << "\n";
}
return debugShader.str();
}
extern HRESULT EmuCompileShader
(
std::string hlsl_str,
const char* shader_profile,
ID3DBlob** ppHostShader,
const char* pSourceName
)
{
ID3DBlob* pErrors = nullptr;
ID3DBlob* pErrorsCompatibility = nullptr;
HRESULT hRet = 0;
EmuLog(LOG_LEVEL::DEBUG, "--- HLSL conversion ---");
EmuLog(LOG_LEVEL::DEBUG, DebugPrependLineNumbers(hlsl_str).c_str());
EmuLog(LOG_LEVEL::DEBUG, "-----------------------");
UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL3;
hRet = D3DCompile(
hlsl_str.c_str(),
hlsl_str.length(),
pSourceName,
nullptr, // pDefines
D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions?
"main", // shader entry poiint
shader_profile,
flags1, // flags1
0, // flags2
ppHostShader, // out
&pErrors // ppErrorMsgs out
);
if (FAILED(hRet)) {
EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Recompiling in compatibility mode");
// Attempt to retry in compatibility mode, this allows some vertex-state shaders to compile
// Test Case: Spy vs Spy
flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_AVOID_FLOW_CONTROL;
hRet = D3DCompile(
hlsl_str.c_str(),
hlsl_str.length(),
pSourceName,
nullptr, // pDefines
D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions?
"main", // shader entry poiint
shader_profile,
flags1, // flags1
0, // flags2
ppHostShader, // out
&pErrorsCompatibility // ppErrorMsgs out
);
if (FAILED(hRet)) {
LOG_TEST_CASE("Couldn't assemble recompiled shader");
//EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled shader");
}
}
// Determine the log level
auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG;
if (pErrors) {
// Log errors from the initial compilation
EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer()));
pErrors->Release();
pErrors = nullptr;
}
// Failure to recompile in compatibility mode ignored for now
if (pErrorsCompatibility != nullptr) {
pErrorsCompatibility->Release();
pErrorsCompatibility = nullptr;
}
LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) {
if (g_bPrintfOn) {
if (!FAILED(hRet)) {
// Log disassembly
hRet = D3DDisassemble(
(*ppHostShader)->GetBufferPointer(),
(*ppHostShader)->GetBufferSize(),
D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING,
NULL,
&pErrors
);
if (pErrors) {
EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer()));
pErrors->Release();
}
}
}
}
return hRet;
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <string> // std::string
#include <d3dcompiler.h> // ID3DBlob (via d3d9.h > d3d11shader.h > d3dcommon.h)
extern HRESULT EmuCompileShader
(
std::string hlsl_str,
const char* shader_profile,
ID3DBlob** ppHostShader,
const char* pSourceName = nullptr
);

View File

@ -33,6 +33,7 @@
#include "EmuShared.h"
#include "core/hle/Intercept.hpp"
#include "RenderStates.h"
#include "core/hle/D3D8/XbVertexShader.h" // For g_UseFixedFunctionVertexShader, g_Xbox_VertexShaderMode and VertexShaderMode::FixedFunction
#include "core/hle/D3D8/Direct3D9/Direct3D9.h" // For g_pD3DDevice
#include <optional>
@ -123,7 +124,6 @@ void XboxTextureStateConverter::BuildTextureStateMappingTable()
DWORD XboxTextureStateConverter::GetHostTextureOpValue(DWORD Value)
{
bool bOldOrder = g_LibVersion_D3D8 <= 3948; // Verified old order in 3944, new order in 4039
switch (Value) {
case xbox::X_D3DTOP_DISABLE: return D3DTOP_DISABLE;
case xbox::X_D3DTOP_SELECTARG1: return D3DTOP_SELECTARG1;
@ -137,10 +137,10 @@ DWORD XboxTextureStateConverter::GetHostTextureOpValue(DWORD Value)
case xbox::X_D3DTOP_SUBTRACT: return D3DTOP_SUBTRACT;
case xbox::X_D3DTOP_ADDSMOOTH: return D3DTOP_ADDSMOOTH;
case xbox::X_D3DTOP_BLENDDIFFUSEALPHA: return D3DTOP_BLENDDIFFUSEALPHA;
case 0x0D/*xbox::X_D3DTOP_BLENDCURRENTALPHA */: return bOldOrder ? D3DTOP_BLENDTEXTUREALPHA : D3DTOP_BLENDCURRENTALPHA;
case 0x0E/*xbox::X_D3DTOP_BLENDTEXTUREALPHA */: return bOldOrder ? D3DTOP_BLENDFACTORALPHA : D3DTOP_BLENDTEXTUREALPHA;
case 0x0F/*xbox::X_D3DTOP_BLENDFACTORALPHA */: return bOldOrder ? D3DTOP_BLENDTEXTUREALPHAPM : D3DTOP_BLENDFACTORALPHA;
case 0x10/*xbox::X_D3DTOP_BLENDTEXTUREALPHAPM*/: return bOldOrder ? D3DTOP_BLENDCURRENTALPHA : D3DTOP_BLENDTEXTUREALPHAPM;
case xbox::X_D3DTOP_BLENDCURRENTALPHA: return D3DTOP_BLENDCURRENTALPHA;
case xbox::X_D3DTOP_BLENDTEXTUREALPHA: return D3DTOP_BLENDTEXTUREALPHA;
case xbox::X_D3DTOP_BLENDFACTORALPHA: return D3DTOP_BLENDFACTORALPHA;
case xbox::X_D3DTOP_BLENDTEXTUREALPHAPM: return D3DTOP_BLENDTEXTUREALPHAPM;
case xbox::X_D3DTOP_PREMODULATE: return D3DTOP_PREMODULATE;
case xbox::X_D3DTOP_MODULATEALPHA_ADDCOLOR: return D3DTOP_MODULATEALPHA_ADDCOLOR;
case xbox::X_D3DTOP_MODULATECOLOR_ADDALPHA: return D3DTOP_MODULATECOLOR_ADDALPHA;
@ -165,8 +165,10 @@ void XboxTextureStateConverter::Apply()
// Track if we need to overwrite state 0 with 3 because of Point Sprites
// The Xbox NV2A uses only Stage 3 for point-sprites, so we emulate this
// by mapping Stage 3 to Stage 0, and disabling all stages > 0
// TODO use stage 3 when we roll our own point sprites after moving off D3D9
bool pointSpriteOverride = false;
bool pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE);
bool pointSpritesEnabled = false;
pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE);
if (pointSpritesEnabled) {
pointSpriteOverride = true;
}
@ -177,7 +179,7 @@ void XboxTextureStateConverter::Apply()
for (int State = xbox::X_D3DTSS_FIRST; State <= xbox::X_D3DTSS_LAST; State++) {
// Read the value of the current stage/state from the Xbox data structure
DWORD XboxValue = Get(XboxStage, State); // OR D3D__TextureState[(XboxStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[State]];
DWORD XboxValue = Get(XboxStage, State);
DWORD PcValue = XboxValue;
// If the state hasn't changed, skip setting it
@ -275,9 +277,13 @@ void XboxTextureStateConverter::Apply()
case xbox::X_D3DTSS_COLORARG0: case xbox::X_D3DTSS_COLORARG1: case xbox::X_D3DTSS_COLORARG2:
case xbox::X_D3DTSS_ALPHAARG0: case xbox::X_D3DTSS_ALPHAARG1: case xbox::X_D3DTSS_ALPHAARG2:
case xbox::X_D3DTSS_RESULTARG: case xbox::X_D3DTSS_TEXTURETRANSFORMFLAGS:
break;
case xbox::X_D3DTSS_BUMPENVMAT00: case xbox::X_D3DTSS_BUMPENVMAT01:
case xbox::X_D3DTSS_BUMPENVMAT11: case xbox::X_D3DTSS_BUMPENVMAT10:
case xbox::X_D3DTSS_BUMPENVLSCALE: case xbox::X_D3DTSS_BUMPENVLOFFSET:
#if 0 // New, doesn't work yet
continue; // Note : Since DxbxUpdateActivePixelShader() reads these too, you'd expect here we could skip, but alas. TODO: Fix PS HLSL to not depend on host D3D TSS
#endif
case xbox::X_D3DTSS_BORDERCOLOR: case xbox::X_D3DTSS_MIPMAPLODBIAS:
case xbox::X_D3DTSS_MAXMIPLEVEL: case xbox::X_D3DTSS_MAXANISOTROPY:
break;
@ -330,11 +336,37 @@ void XboxTextureStateConverter::Apply()
}
}
// Normalize values which may have different mappings per XDK version
DWORD NormalizeValue(DWORD xboxState, DWORD value) {
if (g_LibVersion_D3D8 <= 3948) {
// D3DTOP verified old order in 3948, new order in 4039
switch (xboxState) {
case xbox::X_D3DTSS_COLOROP:
case xbox::X_D3DTSS_ALPHAOP:
switch (value) {
case 13:
return xbox::X_D3DTOP_BLENDTEXTUREALPHA;
case 14:
return xbox::X_D3DTOP_BLENDFACTORALPHA;
case 15:
return xbox::X_D3DTOP_BLENDTEXTUREALPHAPM;
case 16:
return xbox::X_D3DTOP_BLENDCURRENTALPHA;
}
}
}
return value;
}
uint32_t XboxTextureStateConverter::Get(int textureStage, DWORD xboxState) {
if (textureStage < 0 || textureStage > 3)
CxbxKrnlCleanup("Requested texture stage was out of range: %d", textureStage);
if (xboxState < xbox::X_D3DTSS_FIRST || xboxState > xbox::X_D3DTSS_LAST)
CxbxKrnlCleanup("Requested texture state was out of range: %d", xboxState);
return D3D__TextureState[(textureStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[xboxState]];
// Read the value of the current stage/state from the Xbox data structure
DWORD rawValue = D3D__TextureState[(textureStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[xboxState]];
return NormalizeValue(xboxState, rawValue);
}

View File

@ -45,6 +45,8 @@ private:
void BuildTextureStateMappingTable();
DWORD GetHostTextureOpValue(DWORD XboxTextureOp);
// Pointer to Xbox texture states
// Note mappings may change between XDK versions
uint32_t* D3D__TextureState = nullptr;
std::array<int, xbox::X_D3DTSS_LAST + 1> XboxTextureStateOffsets;
XboxRenderStateConverter* pXboxRenderStates;

View File

@ -1,13 +1,14 @@
#define LOG_PREFIX CXBXR_MODULE::VTXSH
#include "VertexShader.h"
#include "core\kernel\init\CxbxKrnl.h"
#include "core\kernel\support\Emu.h"
#include "Shader.h" // EmuCompileShader
#include "VertexShader.h" // EmuCompileVertexShader
#include "core\kernel\init\CxbxKrnl.h" // implicit CxbxKrnl_Xbe used in LOG_TEST_CASE
#include "core\kernel\support\Emu.h" // LOG_TEST_CASE (via Logging.h)
#include <fstream>
#include <sstream>
#include <sstream> // std::stringstream
extern const char* g_vs_model = vs_model_2_a;
extern const char* g_vs_model = vs_model_3_0;
// HLSL generation
void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest)
@ -56,6 +57,20 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest)
// Write the mask as a separate argument to the opcode defines
// (No space, so that "dest,mask, ..." looks close to "dest.mask, ...")
hlsl << ",";
// Detect oFog masks other than x
// Test case: Lego Star Wars II (menu)
if (dest.Type == IMD_OUTPUT_O &&
dest.Address == OREG_OFOG &&
dest.Mask != MASK_X)
{
LOG_TEST_CASE("Vertex shader uses oFog mask other than x");
EmuLog(LOG_LEVEL::WARNING, "oFog mask was %#x", dest.Mask);
hlsl << "x"; // write to x instead
return;
}
// Write the mask
if (dest.Mask & MASK_X) hlsl << "x";
if (dest.Mask & MASK_Y) hlsl << "y";
if (dest.Mask & MASK_Z) hlsl << "z";
@ -181,133 +196,8 @@ void BuildShader(IntermediateVertexShader* pShader, std::stringstream& hlsl)
}
}
std::string DebugPrependLineNumbers(std::string shaderString) {
std::stringstream shader(shaderString);
auto debugShader = std::stringstream();
int i = 1;
for (std::string line; std::getline(shader, line); ) {
auto lineNumber = std::to_string(i++);
auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' ');
debugShader << "/* " << paddedLineNumber << " */ " << line << "\n";
}
return debugShader.str();
}
HRESULT CompileHlsl(const std::string& hlsl, ID3DBlob** ppHostShader, const char* pSourceName)
{
// TODO include header in vertex shader
//xbox::X_VSH_SHADER_HEADER* pXboxVertexShaderHeader = (xbox::X_VSH_SHADER_HEADER*)pXboxFunction;
ID3DBlob* pErrors = nullptr;
ID3DBlob* pErrorsCompatibility = nullptr;
HRESULT hRet = 0;
auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG;
UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL3;
hRet = D3DCompile(
hlsl.c_str(),
hlsl.length(),
pSourceName, // pSourceName
nullptr, // pDefines
D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions?
"main", // shader entry poiint
g_vs_model, // shader profile
flags1, // flags1
0, // flags2
ppHostShader, // out
&pErrors // ppErrorMsgs out
);
// If the shader failed in the default vertex shader model, retry in vs_model_3_0
// This allows shaders too large for 2_a to be compiled (Test Case: Shenmue 2)
if (FAILED(hRet)) {
if (pErrors) {
// Log HLSL compiler errors
EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer()));
pErrors->Release();
pErrors = nullptr;
}
EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Retrying with shader model 3.0");
hRet = D3DCompile(
hlsl.c_str(),
hlsl.length(),
pSourceName, // pSourceName
nullptr, // pDefines
D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions?
"main", // shader entry poiint
vs_model_3_0, // shader profile
flags1, // flags1
0, // flags2
ppHostShader, // out
&pErrors // ppErrorMsgs out
);
}
// If the shader failed again, retry in compatibility mode
if (FAILED(hRet)) {
EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Recompiling in compatibility mode");
// Attempt to retry in compatibility mode, this allows some vertex-state shaders to compile
// Test Case: Spy vs Spy
flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_AVOID_FLOW_CONTROL;
hRet = D3DCompile(
hlsl.c_str(),
hlsl.length(),
pSourceName, // pSourceName
nullptr, // pDefines
D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions?
"main", // shader entry poiint
g_vs_model, // shader profile
flags1, // flags1
0, // flags2
ppHostShader, // out
&pErrorsCompatibility // ppErrorMsgs out
);
if (FAILED(hRet)) {
LOG_TEST_CASE("Couldn't assemble vertex shader");
}
}
// Determine the log level
if (pErrors) {
// Log errors from the initial compilation
EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer()));
pErrors->Release();
pErrors = nullptr;
}
// Failure to recompile in compatibility mode ignored for now
if (pErrorsCompatibility != nullptr) {
pErrorsCompatibility->Release();
pErrorsCompatibility = nullptr;
}
LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) {
if (g_bPrintfOn) {
if (!FAILED(hRet)) {
// Log disassembly
hRet = D3DDisassemble(
(*ppHostShader)->GetBufferPointer(),
(*ppHostShader)->GetBufferSize(),
D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING,
NULL,
&pErrors
);
if (pErrors) {
EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer()));
pErrors->Release();
}
}
}
}
return hRet;
}
// recompile xbox vertex shader function
extern HRESULT EmuCompileShader
extern HRESULT EmuCompileVertexShader
(
IntermediateVertexShader* pIntermediateShader,
ID3DBlob** ppHostShader
@ -326,11 +216,16 @@ extern HRESULT EmuCompileShader
hlsl_stream << hlsl_template[1]; // Finish with the HLSL template footer
std::string hlsl_str = hlsl_stream.str();
EmuLog(LOG_LEVEL::DEBUG, "--- HLSL conversion ---");
EmuLog(LOG_LEVEL::DEBUG, DebugPrependLineNumbers(hlsl_str).c_str());
EmuLog(LOG_LEVEL::DEBUG, "-----------------------");
return CompileHlsl(hlsl_str, ppHostShader, "CxbxVertexShaderTemplate.hlsl");
HRESULT hRet = EmuCompileShader(hlsl_str, g_vs_model, ppHostShader, "CxbxVertexShaderTemplate.hlsl");
if (FAILED(hRet) && (g_vs_model != vs_model_3_0)) {
// If the shader failed in the default vertex shader model, retry in vs_model_3_0
// This allows shaders too large for 2_a to be compiled (Test Case: Shenmue 2)
EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Retrying with shader model 3.0");
hRet = EmuCompileShader(hlsl_str, vs_model_3_0, ppHostShader, "CxbxVertexShaderTemplate.hlsl");
}
return hRet;
}
extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader)
@ -352,7 +247,7 @@ extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader)
hlsl << hlslStream.rdbuf();
// Compile the shader
CompileHlsl(hlsl.str(), &pShader, sourceFile.c_str());
EmuCompileShader(hlsl.str(), g_vs_model, &pShader, sourceFile.c_str());
}
*ppHostShader = pShader;
@ -378,6 +273,9 @@ uniform float4 xboxScreenspaceOffset : register(c213);
uniform float4 xboxTextureScale[4] : register(c214);
// Parameters for mapping the shader's fog output value to a fog factor
uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO
struct VS_INPUT
{
float4 v[16] : TEXCOORD;
@ -456,10 +354,33 @@ VS_OUTPUT main(const VS_INPUT xIn)
// Copy variables to output struct
VS_OUTPUT xOut;
// Fogging
// TODO deduplicate
const float fogDepth = abs(oFog.x);
const float fogTableMode = CxbxFogInfo.x;
const float fogDensity = CxbxFogInfo.y;
const float fogStart = CxbxFogInfo.z;
const float fogEnd = CxbxFogInfo.w;
const float FOG_TABLE_NONE = 0;
const float FOG_TABLE_EXP = 1;
const float FOG_TABLE_EXP2 = 2;
const float FOG_TABLE_LINEAR = 3;
float fogFactor;
if(fogTableMode == FOG_TABLE_NONE)
fogFactor = fogDepth;
if(fogTableMode == FOG_TABLE_EXP)
fogFactor = 1 / exp(fogDepth * fogDensity); /* / 1 / e^(d * density)*/
if(fogTableMode == FOG_TABLE_EXP2)
fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/
if(fogTableMode == FOG_TABLE_LINEAR)
fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart);
xOut.oPos = reverseScreenspaceTransform(oPos);
xOut.oD0 = saturate(oD0);
xOut.oD1 = saturate(oD1);
xOut.oFog = oFog.x; // Note : Xbox clamps fog in pixel shader
xOut.oFog = fogFactor; // Note : Xbox clamps fog in pixel shader
xOut.oPts = oPts.x;
xOut.oB0 = saturate(oB0);
xOut.oB1 = saturate(oB1);
@ -473,7 +394,7 @@ VS_OUTPUT main(const VS_INPUT xIn)
}
)";
CompileHlsl(hlsl, &pPassthroughShader, "passthrough.hlsl");
EmuCompileShader(hlsl, g_vs_model, &pPassthroughShader, "passthrough.hlsl");
}
*ppHostShader = pPassthroughShader;

View File

@ -1,6 +1,4 @@
#ifndef DIRECT3D9VERTEXSHADER_H
#define DIRECT3D9VERTEXSHADER_H
#pragma once
#include "core\hle\D3D8\XbVertexShader.h"
#include "FixedFunctionVertexShaderState.hlsli"
@ -15,7 +13,7 @@ static const char* vs_model_2_a = "vs_2_a";
static const char* vs_model_3_0 = "vs_3_0";
extern const char* g_vs_model;
extern HRESULT EmuCompileShader
extern HRESULT EmuCompileVertexShader
(
IntermediateVertexShader* pIntermediateShader,
ID3DBlob** ppHostShader
@ -25,4 +23,3 @@ extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader);
extern HRESULT EmuCompileXboxPassthrough(ID3DBlob** ppHostShader);
#endif

View File

@ -13,7 +13,7 @@ VertexShaderSource g_VertexShaderSource = VertexShaderSource();
ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, ShaderKey key) {
ID3DBlob* pCompiledShader;
auto hRet = EmuCompileShader(
auto hRet = EmuCompileVertexShader(
&intermediateShader,
&pCompiledShader
);

View File

@ -1339,14 +1339,23 @@ void EmuUnswizzleBox
} // EmuUnswizzleBox NOPATCH
// Notes :
// * most renderstates were introduced in the (lowest known) XDK version : 3424
// * additional renderstates were introduced between 3434 and 4627
// * we MUST list exact versions for each of those, since their inserts impacts mapping!
// * renderstates were finalized in 4627 (so no change after that version)
// * renderstates after D3DRS_MULTISAMPLEMASK have no host mapping, thus no impact
// * D3DRS_MULTISAMPLETYPE seems the only renderstate that got removed (after 3944, before 4039)
// * all renderstates marked 3424 are also verified present in 3944
const RenderStateInfo DxbxRenderStateInfo[] = {
// * Most renderstates were introduced in the (lowest known) XDK version : 3424
// * Some titles use XDK version 3911
// * The lowest XDK version that has been verified is : 3944
// * All renderstates marked 3424 are also verified to be present in 3944
// * Twenty-three additional renderstates were introduced after 3944 and up to 4627;
// * D3DRS_DEPTHCLIPCONTROL, D3DRS_STIPPLEENABLE, D3DRS_SIMPLE_UNUSED8..D3DRS_SIMPLE_UNUSED1,
// * D3DRS_SWAPFILTER, D3DRS_PRESENTATIONINTERVAL, D3DRS_DEFERRED_UNUSED8..D3DRS_DEFERRED_UNUSED1,
// * D3DRS_MULTISAMPLEMODE, D3DRS_MULTISAMPLERENDERTARGETMODE, and D3DRS_SAMPLEALPHA
// * One renderstate, D3DRS_MULTISAMPLETYPE, was removed (after 3944, before 4039, perhaps even 4034)
// * Around when D3DRS_MULTISAMPLETYPE was removed, D3DRS_MULTISAMPLEMODE was introduced (after 3944, before or at 4039, perhaps even 4034)
// * We MUST list exact versions for all above mentioned renderstates, since their inserts impacts mapping!
// * Renderstates verified to be introduced at 4039 or earlier, may have been introduced at 4034 or earlier
// * Renderstates were finalized in 4627 (so no change after that version)
// * XDK versions that have been verified : 3944, 4039, 4134, 4242, 4361, 4432, 4531, 4627, 4721, 4831, 4928, 5028, 5120, 5233, 5344, 5455, 5558, 5659, 5788, 5849, 5933
// * Renderstates with uncertain validity are marked "Verified absent in #XDK#" and/or "present in #XDK#". Some have "Might be introduced "... "in between" or "around #XDK#"
// * Renderstates after D3DRS_MULTISAMPLEMASK have no host DX9 D3DRS mapping, thus no impact
const RenderStateInfo DxbxRenderStateInfo[1+xbox::X_D3DRS_DONOTCULLUNCOMPRESSED] = {
// String Ord Version Type Method Native
{ "D3DRS_PSALPHAINPUTS0" /*= 0*/, 3424, xtDWORD, NV2A_RC_IN_ALPHA(0) },
@ -1495,8 +1504,8 @@ const RenderStateInfo DxbxRenderStateInfo[] = {
{ "D3DRS_VERTEXBLEND" /*= 137*/, 3424, xtD3DVERTEXBLENDFLAGS, NV2A_SKIN_MODE, D3DRS_VERTEXBLEND },
{ "D3DRS_FOGCOLOR" /*= 138*/, 3424, xtD3DCOLOR, NV2A_FOG_COLOR, D3DRS_FOGCOLOR }, // SwapRgb
{ "D3DRS_FILLMODE" /*= 139*/, 3424, xtD3DFILLMODE, NV2A_POLYGON_MODE_FRONT, D3DRS_FILLMODE },
{ "D3DRS_BACKFILLMODE" /*= 140*/, 3424, xtD3DFILLMODE, 0 }, // nsp.
{ "D3DRS_TWOSIDEDLIGHTING" /*= 141*/, 3424, xtBOOL, NV2A_POLYGON_MODE_BACK }, // nsp.
{ "D3DRS_BACKFILLMODE" /*= 140*/, 3424, xtD3DFILLMODE, NV2A_POLYGON_MODE_BACK }, // nsp.
{ "D3DRS_TWOSIDEDLIGHTING" /*= 141*/, 3424, xtBOOL, 0 }, // nsp. // FIXME map from NV2A_LIGHT_MODEL
{ "D3DRS_NORMALIZENORMALS" /*= 142*/, 3424, xtBOOL, NV2A_NORMALIZE_ENABLE, D3DRS_NORMALIZENORMALS },
{ "D3DRS_ZENABLE" /*= 143*/, 3424, xtBOOL, NV2A_DEPTH_TEST_ENABLE, D3DRS_ZENABLE }, // D3DZBUFFERTYPE?
{ "D3DRS_STENCILENABLE" /*= 144*/, 3424, xtBOOL, NV2A_STENCIL_ENABLE, D3DRS_STENCILENABLE },

View File

@ -1039,10 +1039,8 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE;
// TODO co-locate shader workaround constants with shader code
#define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE (X_D3DVS_CONSTREG_COUNT)
#define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_SIZE 16
#define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE + CXBX_D3DVS_CONSTREG_VREGDEFAULTS_SIZE)
#define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE 4
#define CXBX_D3DVS_SCREENSPACE_SCALE_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE + CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE)
#define CXBX_D3DVS_NORMALIZE_SCALE_SIZE 1
@ -1052,6 +1050,8 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE;
#define CXBX_D3DVS_TEXTURES_SCALE_BASE (CXBX_D3DVS_SCREENSPACE_OFFSET_BASE + CXBX_D3DVS_NORMALIZE_OFFSET_SIZE)
#define CXBX_D3DVS_TEXTURES_SCALE_SIZE 4
#define CXBX_D3DVS_CONSTREG_FOGINFO (CXBX_D3DVS_TEXTURES_SCALE_BASE + CXBX_D3DVS_TEXTURES_SCALE_SIZE)
#define X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION)
#define X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION)

File diff suppressed because it is too large Load Diff

View File

@ -29,10 +29,501 @@
#include "core\hle\D3D8\XbD3D8Types.h"
// dump pixel shader definition to file
void DumpPixelShaderDefToFile( xbox::X_D3DPIXELSHADERDEF* pPSDef, const char* pszCode );
// print relevant contents to the debug console
void PrintPixelShaderDefContents(xbox::X_D3DPIXELSHADERDEF* pDSDef );
/*---------------------------------------------------------------------------*/
/* Texture configuration - The following members of the D3DPixelShaderDef */
/* structure define the addressing modes of each of the four texture stages:*/
/* PSTextureModes */
/* PSDotMapping */
/* PSInputTexture */
/* PSCompareMode */
/*---------------------------------------------------------------------------*/
// =========================================================================================================
// PSTextureModes
// --------.--------.--------.---xxxxx stage 0 PS_TEXTUREMODES
// --------.--------.------xx.xxx----- stage 1 PS_TEXTUREMODES
// --------.--------.-xxxxx--.-------- stage 2 PS_TEXTUREMODES
// --------.----xxxx.x-------.-------- stage 3 PS_TEXTUREMODES
#define PS_TEXTUREMODES(t0,t1,t2,t3) (((t3)<<15)|((t2)<<10)|((t1)<<5)|(t0))
/*
Texture modes:
NONE :stage inactive
PROJECT2D :argb = texture(s/q, t/q)
PROJECT3D :argb = texture(s/q, t/q, r/q)
CUBEMAP :argb = cubemap(s,t,r)
PASSTHRU :argb = s,t,r,q
CLIPPLANE :pixel not drawn if s,t,r, or q < 0. PSCompareMode affects comparison
BUMPENVMAP :argb=texture(s+mat00*src.r+mat01*src.g,
t+mat10*src.r+mat11*src.g)
mat00 set via D3DTSS_BUMPENVMAT00, etc.
BUMPENVMAP_LUM :argb=texture(s+mat00*src.r+mat01*src.g,
t+mat10*src.r+mat11*src.g);
rgb *= (lum_scale*src.b + lum_bias); (a is not affected)
lum_scale set by D3DTSS_BUMPENVLSCALE
lum_bias set by D3DTSS_BUMPENVLOFFSET
mat00 set via D3DTSS_BUMPENVMAT00, etc.
BRDF :argb = texture(eyeSigma, lightSigma, dPhi)
eyeSigma = Sigma of eye vector in spherical coordinates, read from stage-2 as (16 bit phi,sigma)
lightSigma = Sigma of light vector in spherical coordinates, read from stage-1 as (16 bit phi,sigma)
dPhi = Phi of eye - Phi of light
DOT_ST :argb = texture(<DotResult of stage-1>, (s,t,r).(src.r,src.g,src.b))
DOT_ZW :frag depth = (<DotResult of stage-1>/((s,t,r).(src.r,src.g,src.b))
DOT_RFLCT_DIFF :n = (<DotResult of stage-1>,(s,t,r).(src.r,src.g,src.b),<DotResult of stage+1>)
argb = cubemap(n)
DOT_RFLCT_SPEC :n = (<DotResult of stage-2>,<DotResult of stage-1>,(s,t,r).(src.r,src.g,src.b))
r = 2*n*(n.e)/(n.n) - e where e is eye vector built from q texture coordinate of each stage
argb = cubemap(r)
DOT_STR_3D :argb=texture((<DotResult of stage-2>,<DotResult of stage-1>,(s,t,r).(src.r,src.g,src.b)))
DOT_STR_CUBE :argb=cubemap((<DotResult of stage-2>,<DotResult of stage-1>,(s,t,r).(src.r,src.g,src.b)))
DEPENDENT_AR :argb = texture(src.a, src.r)
DEPENDENT_GB :argb = texture(src.g, src.b)
DOTPRODUCT :argb = (s,t,r).(src.r,src.g,src.b)
DOT_RFLCT_SPEC_CONST :n = (<DotResult of stage-2>,<DotResult of stage-1>,(s,t,r).(src.r,src.g,src.b))
r = 2*n*(n.e)/(n.n) - e where e is eye vector set via SetEyeVector() into c0
argb = cubemap(r)
*/
enum PS_TEXTUREMODES
{ // valid in stage 0 1 2 3 Uses
PS_TEXTUREMODES_NONE= 0x00L, // * * * *
PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * * Sample
PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * * Sample
PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * * Sample
PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * *
PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * * PSCompareMode
PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * * Sample, PSInputTexture
PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * * Sample, PSInputTexture
PS_TEXTUREMODES_BRDF= 0x08L, // - - * *
PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * * Sample, PSInputTexture, PSDotMapping
PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * * PSInputTexture, PSDotMapping
PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * - Sample, PSInputTexture, PSDotMapping
PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - * Sample, PSInputTexture, PSDotMapping
PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - * Sample, PSInputTexture, PSDotMapping
PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - * Sample, PSInputTexture, PSDotMapping
PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * * Sample, PSInputTexture
PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * * Sample, PSInputTexture
PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * - PSInputTexture
PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - * Sample, PSInputTexture, PSDotMapping
// 0x13-0x1f reserved
PS_TEXTUREMODES_MASK= 0x1fL
};
// =========================================================================================================
// PSDotMapping
// --------.--------.--------.-----xxx // stage 1 PS_DOTMAPPING
// --------.--------.--------.-xxx---- // stage 2 PS_DOTMAPPING
// --------.--------.-----xxx.-------- // stage 3 PS_DOTMAPPING
#define PS_DOTMAPPING(t0,t1,t2,t3) (((t3)<<8)|((t2)<<4)|(t1))
// Dot mappings over the output value of a (4 component 8 bit unsigned) texture stage register into a (3 component float) vector value, for use in a dot product calculation:
// PS_DOTMAPPING_ZERO_TO_ONE :r8g8b8a8->(r,g,b): 0x00=>0, 0xff=>1 thus : output = (input / 0xff )
// PS_DOTMAPPING_MINUS1_TO_1_D3D :r8g8b8a8->(r,g,b): 0x00=>-128/127, 0x01=>-1, 0x80=>0, 0xff=>1 thus : output = ((input - 0x100 ) / 0x7f )
// PS_DOTMAPPING_MINUS1_TO_1_GL :r8g8b8a8->(r,g,b): 0x80=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x80 ) (see https://en.wikipedia.org/wiki/Two's_complement)
// PS_DOTMAPPING_MINUS1_TO_1 :r8g8b8a8->(r,g,b): 0x80=>-128/127, ?0x81=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x7f ) (see https://en.wikipedia.org/wiki/Two's_complement)
// PS_DOTMAPPING_HILO_1 :H16L16 ->(H,L,1): 0x0000=>0, 0xffff=>1 thus : output = (input / 0xffff)
// PS_DOTMAPPING_HILO_HEMISPHERE_D3D :H16L16 ->(H,L,sqrt(1-H*H-L*L)):? 0x8000=>-1, 0x0000=>0, 0x7fff=32767/32768 thus : output = ((input - 0x10000) / 0x7fff)
// PS_DOTMAPPING_HILO_HEMISPHERE_GL :H16L16 ->(H,L,sqrt(1-H*H-L*L)):? 0x8000=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x8000)
// PS_DOTMAPPING_HILO_HEMISPHERE :H16L16 ->(H,L,sqrt(1-H*H-L*L)): 0x8000=>-32768/32767, 0x8001=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x7fff)
enum PS_DOTMAPPING
{ // valid in stage 0 1 2 3
PS_DOTMAPPING_ZERO_TO_ONE= 0x00L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1_D3D= 0x01L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1_GL= 0x02L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1= 0x03L, // - * * *
PS_DOTMAPPING_HILO_1= 0x04L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE_D3D= 0x05L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE_GL= 0x06L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE= 0x07L, // - * * *
PS_DOTMAPPING_MASK= 0x07L
};
// =========================================================================================================
// PSCompareMode
// --------.--------.--------.----xxxx // stage 0 PS_COMPAREMODE
// --------.--------.--------.xxxx---- // stage 1 PS_COMPAREMODE
// --------.--------.----xxxx.-------- // stage 2 PS_COMPAREMODE
// --------.--------.xxxx----.-------- // stage 3 PS_COMPAREMODE
#define PS_COMPAREMODE(t0,t1,t2,t3) (((t3)<<12)|((t2)<<8)|((t1)<<4)|(t0))
enum PS_COMPAREMODE
{
PS_COMPAREMODE_S_LT= 0x00L,
PS_COMPAREMODE_S_GE= 0x01L,
PS_COMPAREMODE_T_LT= 0x00L,
PS_COMPAREMODE_T_GE= 0x02L,
PS_COMPAREMODE_R_LT= 0x00L,
PS_COMPAREMODE_R_GE= 0x04L,
PS_COMPAREMODE_Q_LT= 0x00L,
PS_COMPAREMODE_Q_GE= 0x08L,
PS_COMPAREMODE_MASK= 0x0fL
};
// =========================================================================================================
// PSInputTexture
// --------.-------x.--------.-------- // stage 2
// --------.--xx----.--------.-------- // stage 3
//
// Selects the other texture to use as an input in the following texture modes:
// DOT_ST, DOT_STR_3D, DOT_STR_CUBE, DOT_ZW, DOT_RFLCT_SPEC,
// DOT_RFLCT_DIFF, DPNDNT_AR, DPNDNT_GB, BUMPENVMAP,
// BUMPENVMAP_LUM, DOT_PRODUCT
#define PS_INPUTTEXTURE(t0,t1,t2,t3) (((t3)<<20)|((t2)<<16))
/*---------------------------------------------------------------------------------*/
/* Color combiners - The following members of the D3DPixelShaderDef structure */
/* define the state for the eight stages of color combiners: */
/* PSCombinerCount - Number of stages */
/* PSAlphaInputs[8] - Inputs for alpha portion of each stage */
/* PSRGBInputs[8] - Inputs for RGB portion of each stage */
/* PSConstant0[8] - Constant 0 for each stage */
/* PSConstant1[8] - Constant 1 for each stage */
/* PSFinalCombinerConstant0 - Constant 0 for final combiner */
/* PSFinalCombinerConstant1 - Constant 1 for final combiner */
/* PSAlphaOutputs[8] - Outputs for alpha portion of each stage */
/* PSRGBOutputs[8] - Outputs for RGB portion of each stage */
/*---------------------------------------------------------------------------------*/
// =========================================================================================================
// PSCombinerCount
// --------.--------.--------.----xxxx // number of combiners (1-8)
// --------.--------.-------x.-------- // PS_COMBINERCOUNT_MUX_MSB bit (0= LSB, 1= MSB)
// --------.--------.---x----.-------- // PS_COMBINERCOUNT_UNIQUE_C0
// --------.-------x.--------.-------- // PS_COMBINERCOUNT_UNIQUE_C1
#define PS_COMBINERCOUNT(count, flags) (((flags)<<8)|(count))
// count is 1-8, flags contains one or more values from PS_COMBINERCOUNTFLAGS
enum PS_COMBINERCOUNTFLAGS
{
PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb
PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb
PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage
PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage
PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage
PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage
};
// =========================================================================================================
// PSRGBInputs[0-7]
// PSAlphaInputs[0-7]
// PSFinalCombinerInputsABCD
// PSFinalCombinerInputsEFG
// --------.--------.--------.----xxxx // D PS_REGISTER
// --------.--------.--------.---x---- // D PS_CHANNEL (0= RGB/BLUE, 1= ALPHA)
// --------.--------.--------.xxx----- // D PS_INPUTMAPPING
// --------.--------.----xxxx.-------- // C PS_REGISTER
// --------.--------.---x----.-------- // C PS_CHANNEL (0= RGB/BLUE, 1= ALPHA)
// --------.--------.xxx-----.-------- // C PS_INPUTMAPPING
// --------.----xxxx.--------.-------- // B PS_REGISTER
// --------.---x----.--------.-------- // B PS_CHANNEL (0= RGB/BLUE, 1= ALPHA)
// --------.xxx-----.--------.-------- // B PS_INPUTMAPPING
// ----xxxx.--------.--------.-------- // A PS_REGISTER
// ---x----.--------.--------.-------- // A PS_CHANNEL (0= RGB/BLUE, 1= ALPHA)
// xxx-----.--------.--------.-------- // A PS_INPUTMAPPING
// examples:
//
// shader.PSRGBInputs[3]= PS_COMBINERINPUTS(
// PS_REGISTER_T0 | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB,
// PS_REGISTER_C0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA,
// PS_REGISTER_ZERO,
// PS_REGISTER_ZERO);
//
// shader.PSFinalCombinerInputsABCD= PS_COMBINERINPUTS(
// PS_REGISTER_T0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA,
// PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB,
// PS_REGISTER_EFPROD | PS_INPUTMAPPING_UNSIGNED_INVERT | PS_CHANNEL_RGB,
// PS_REGISTER_ZERO);
//
// PS_FINALCOMBINERSETTING is set in 4th field of PSFinalCombinerInputsEFG with PS_COMBINERINPUTS
// example:
//
// shader.PSFinalCombinerInputsEFG= PS_COMBINERINPUTS(
// PS_REGISTER_R0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB,
// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB,
// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_BLUE,
// PS_FINALCOMBINERSETTING_CLAMP_SUM | PS_FINALCOMBINERSETTING_COMPLEMENT_R0);
#define PS_COMBINERINPUTS(a,b,c,d) (((a)<<24)|((b)<<16)|((c)<<8)|(d))
// For PSFinalCombinerInputsEFG,
// a,b,c contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING for input E,F, and G
// d contains values from PS_FINALCOMBINERSETTING
// For all other inputs,
// a,b,c,d each contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING
// The input has PS_INPUTMAPPING applied
// (Note : I don't know for sure if the max() operation mentioned above is indeed what happens,
// as there's no further documentation available on this. Native Direct3D can clamp with the
// '_sat' instruction modifier, but that's not really the same as these Xbox1 input mappings.)
//
// When the input register is PS_ZERO, the above mappings result in the following constants:
//
// PS_REGISTER_NEGATIVE_ONE (PS_INPUTMAPPING_EXPAND_NORMAL on zero) : y = -1.0
// PS_REGISTER_NEGATIVE_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NORMAL on zero) : y = -0.5
// PS_REGISTER_ZERO itself : y = 0.0
// PS_REGISTER_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NEGATE on zero) : y = 0.5
// PS_REGISTER_ONE (PS_INPUTMAPPING_UNSIGNED_INVERT on zero) : y = 1.0
// (Note : It has no define, but PS_INPUTMAPPING_EXPAND_NEGATE on zero results in ONE too!)
enum PS_INPUTMAPPING
{
PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) = 1*max(0,x) + 0.0 OK for final combiner: y = abs(x)
PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) = -1*max(0,x) + 1.0 OK for final combiner: y = 1 - x
PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2 * max(0,x) - 1 = 2*max(0,x) - 1.0 invalid for final combiner
PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2 * max(0,x) = -2*max(0,x) + 1.0 invalid for final combiner
PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 = 1*max(0,x) - 0.5 invalid for final combiner
PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) = -1*max(0,x) + 0.5 invalid for final combiner
PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x = 1* x + 0.0 invalid for final combiner
PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x = -1* x + 0.0 invalid for final combiner
PS_INPUTMAPPING_MASK= 0xe0L
};
enum PS_REGISTER
{
PS_REGISTER_ZERO= 0x00L, // r A.k.a. _REG_0
PS_REGISTER_DISCARD= 0x00L, // w A.k.a. _REG_0
PS_REGISTER_C0= 0x01L, // r A.k.a. _REG_1
PS_REGISTER_C1= 0x02L, // r A.k.a. _REG_2
PS_REGISTER_FOG= 0x03L, // r A.k.a. _REG_3
PS_REGISTER_V0= 0x04L, // r/w A.k.a. _REG_4
PS_REGISTER_V1= 0x05L, // r/w A.k.a. _REG_5
PS_REGISTER_T0= 0x08L, // r/w A.k.a. _REG_8
PS_REGISTER_T1= 0x09L, // r/w A.k.a. _REG_9
PS_REGISTER_T2= 0x0aL, // r/w A.k.a. _REG_A
PS_REGISTER_T3= 0x0bL, // r/w A.k.a. _REG_B
PS_REGISTER_R0= 0x0cL, // r/w A.k.a. _REG_C
PS_REGISTER_R1= 0x0dL, // r/w A.k.a. _REG_D
PS_REGISTER_V1R0_SUM= 0x0eL, // r A.k.a. _REG_SPECLIT
PS_REGISTER_EF_PROD= 0x0fL, // r A.k.a. _REG_EF_PROD
PS_REGISTER_MASK= 0x0fL,
// These constant values can be represented as a combination of 0, and an input modifier
// But they're not registers
// PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // 0x20 r OK for final combiner
// PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // 0x40 r invalid for final combiner
// PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // 0xa0 r invalid for final combiner
// PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // 0x80 r invalid for final combiner
// Cxbx extension; Separate final combiner constant registers (values not encoded on NV2A, as outside of available bits range) :
PS_REGISTER_FC0= 0x10,
PS_REGISTER_FC1= 0x11,
};
// FOG ALPHA is only available in final combiner
// V1R0_SUM and EF_PROD are only available in final combiner (A,B,C,D inputs only)
// V1R0_SUM_ALPHA and EF_PROD_ALPHA are not available
// R0_ALPHA is initialized to T0_ALPHA in stage0
enum PS_CHANNEL
{
PS_CHANNEL_RGB= 0x00, // used as RGB source
PS_CHANNEL_BLUE= 0x00, // used as ALPHA source
PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source
PS_CHANNEL_MASK= 0x10
};
enum PS_FINALCOMBINERSETTING
{
PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1] A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_CLAMP_TRUE
PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping (1 - v1) is used as an input to the sum rather than v1 A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_ADD_INVERT_R5_TRUE
PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping (1 - r0) is used as an input to the sum rather than r0 A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_ADD_INVERT_R12_TRUE
};
// =========================================================================================================
// PSRGBOutputs[0-7]
// PSAlphaOutputs[0-7]
// --------.--------.--------.----xxxx // CD output PS_REGISTER
// --------.--------.--------.xxxx---- // AB output PS_REGISTER
// --------.--------.----xxxx.-------- // AB_CD output PS_REGISTER Note : Must be PS_REGISTER_DISCARD if either PS_COMBINEROUTPUT_AB_DOT_PRODUCT or PS_COMBINEROUTPUT_CD_DOT_PRODUCT are set
// --------.--------.---x----.-------- // PS_COMBINEROUTPUT_CD_DOT_PRODUCT (CD output 0= multiply, 1= dot product)
// --------.--------.--x-----.-------- // PS_COMBINEROUTPUT_AB_DOT_PRODUCT (AB output 0= multiply, 1= dot product)
// --------.--------.-x------.-------- // PS_COMBINEROUTPUT_AB_CD_MUX (AB_CD mux/sum select 0= sum, 1= mux)
// --------.------xx.x-------.-------- // PS_COMBINEROUTPUT_OUTPUTMAPPING
// --------.-----x--.--------.-------- // PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA
// --------.----x---.--------.-------- // PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA
#define PS_COMBINEROUTPUTS(ab,cd,mux_sum,flags) (((flags)<<12)|((mux_sum)<<8)|((ab)<<4)|(cd))
// ab,cd,mux_sum contain a value from PS_REGISTER
// flags contains values from PS_COMBINEROUTPUT
enum PS_COMBINEROUTPUT_OUTPUTMAPPING
{
PS_COMBINEROUTPUT_OUTPUTMAPPING_IDENTITY= 0x00L, // y = x
PS_COMBINEROUTPUT_OUTPUTMAPPING_BIAS= 0x08L, // y = (x - 0.5)
PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1= 0x10L, // y = x * 2
PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5) * 2
PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2= 0x20L, // y = x * 4
PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS= 0x28L, // y = (x - 0.5) * 4 Note : Cxbx inferred method; May not be supported on NV2A
PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1= 0x30L, // y = x / 2
PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS= 0x38L, // y = (x - 0.5) / 2 Note : Cxbx inferred method; May not be supported on NV2A
PS_COMBINEROUTPUT_OUTPUTMAPPING_MASK= 0x38L
};
enum PS_COMBINEROUTPUT
{
PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L,
PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only
PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L,
PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only
PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD
PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a
PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only
PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only
};
// =========================================================================================================
// PSC0Mapping
// PSC1Mapping
// --------.--------.--------.----xxxx // offset of D3D constant for stage 0
// --------.--------.--------.xxxx---- // offset of D3D constant for stage 1
// --------.--------.----xxxx.-------- // offset of D3D constant for stage 2
// --------.--------.xxxx----.-------- // offset of D3D constant for stage 3
// --------.----xxxx.--------.-------- // offset of D3D constant for stage 4
// --------.xxxx----.--------.-------- // offset of D3D constant for stage 5
// ----xxxx.--------.--------.-------- // offset of D3D constant for stage 6
// xxxx----.--------.--------.-------- // offset of D3D constant for stage 7
#define PS_CONSTANTMAPPING(s0,s1,s2,s3,s4,s5,s6,s7) \
(((DWORD)(s0)&0xf)<< 0) | (((DWORD)(s1)&0xf)<< 4) | \
(((DWORD)(s2)&0xf)<< 8) | (((DWORD)(s3)&0xf)<<12) | \
(((DWORD)(s4)&0xf)<<16) | (((DWORD)(s5)&0xf)<<20) | \
(((DWORD)(s6)&0xf)<<24) | (((DWORD)(s7)&0xf)<<28)
// s0-s7 contain the offset of the D3D constant that corresponds to the
// c0 or c1 constant in stages 0 through 7. These mappings are only used in
// SetPixelShaderConstant().
// =========================================================================================================
// PSFinalCombinerConstants
// --------.--------.--------.----xxxx // offset of D3D constant for C0
// --------.--------.--------.xxxx---- // offset of D3D constant for C1
// --------.--------.-------x.-------- // Adjust texture flag
#define PS_FINALCOMBINERCONSTANTS(c0,c1,flags) (((DWORD)(flags) << 8) | ((DWORD)(c0)&0xf)<< 0) | (((DWORD)(c1)&0xf)<< 4)
// c0 and c1 contain the offset of the D3D constant that corresponds to the
// constants in the final combiner. These mappings are only used in
// SetPixelShaderConstant(). Flags contains values from PS_GLOBALFLAGS
enum PS_GLOBALFLAGS
{
// if this flag is set, the texture mode for each texture stage is adjusted as follows:
// if set texture is a cubemap,
// change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_CUBEMAP
// change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_CUBEMAP
// change PS_TEXTUREMODES_DOT_STR_3D to PS_TEXTUREMODES_DOT_STR_CUBE
// if set texture is a volume texture,
// change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_PROJECT3D
// change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT3D
// change PS_TEXTUREMODES_DOT_STR_CUBE to PS_TEXTUREMODES_DOT_STR_3D
// if set texture is neither cubemap or volume texture,
// change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_PROJECT2D
// change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT2D
PS_GLOBALFLAGS_NO_TEXMODE_ADJUST= 0x0000L, // don't adjust texture modes
PS_GLOBALFLAGS_TEXMODE_ADJUST= 0x0001L, // adjust texture modes according to set texture
PS_GLOBALFLAGS_SHIFT= 8
};
constexpr int PSH_XBOX_MAX_C_REGISTER_COUNT = 16;
constexpr int PSH_XBOX_MAX_R_REGISTER_COUNT = 2;
constexpr int PSH_XBOX_MAX_T_REGISTER_COUNT = 4;
constexpr int PSH_XBOX_MAX_V_REGISTER_COUNT = 2;
struct RPSRegisterObject {
PS_REGISTER Reg;
void Decode(uint8_t Value);
};
struct RPSInputRegister : RPSRegisterObject {
PS_CHANNEL Channel;
PS_INPUTMAPPING InputMapping;
void Decode(uint8_t Value, unsigned stage_nr, bool isRGB);
};
struct RPSCombinerOutput : RPSRegisterObject {
RPSInputRegister Input[2]; // Called Input A and B, or C and D (depending if it's inside the AB or CD combiner)
bool DotProduct; // False=Multiply, True=DotProduct
unsigned BlueToAlpha; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha
void Decode(uint8_t Value, uint16_t PSInputs, unsigned stage_nr, bool isRGB);
};
struct RPSCombinerStageChannel {
RPSCombinerOutput OutputCD; // Contains InputC and InputD (as Input1 and Input2)
RPSCombinerOutput OutputAB; // Contains InputA and InputB (as Input1 and Input2)
RPSRegisterObject OutputMUX_SUM;
bool AB_CD_MUX; // False=AB+CD, True=MUX(AB,CD) based on R0.a
PS_COMBINEROUTPUT_OUTPUTMAPPING CombinerOutputMapping;
void Decode(uint32_t PSInputs, uint32_t PSOutputs, unsigned stage_nr, bool isRGB);
};
struct RPSCombinerStage {
RPSCombinerStageChannel RGB;
RPSCombinerStageChannel Alpha;
};
struct RPSFinalCombiner {
RPSInputRegister Input[7];
bool ComplementV1;
bool ComplementR0;
bool ClampSum;
void Decode(const uint32_t PSFinalCombinerInputsABCD, const uint32_t PSFinalCombinerInputsEFG);
};
struct DecodedRegisterCombiner {
PS_TEXTUREMODES PSTextureModes[xbox::X_D3DTS_STAGECOUNT];
PS_DOTMAPPING PSDotMapping[xbox::X_D3DTS_STAGECOUNT];
bool PSCompareMode[xbox::X_D3DTS_STAGECOUNT][4]; // True in [0] = PS_COMPAREMODE_S_GE, [1] = PS_COMPAREMODE_T_GE, [2] = PS_COMPAREMODE_R_GE, [3] PS_COMPAREMODE_Q_GE (so, STRQ>0, otherwise <0)
int PSInputTexture[xbox::X_D3DTS_STAGECOUNT];
bool CombinerMuxesOnMsb;
bool CombinerHasUniqueC0;
bool CombinerHasUniqueC1;
unsigned NumberOfCombiners;
RPSCombinerStage Combiners[xbox::X_PSH_COMBINECOUNT];
bool hasFinalCombiner;
RPSFinalCombiner FinalCombiner;
bool TexModeAdjust;
// Variables
bool AlphaKill[4]; // X_D3DTSS_ALPHAKILL
static void GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]);
static void GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]);
static void GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, bool psCompareModes[xbox::X_D3DTS_STAGECOUNT][4]);
static void GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]);
void Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef);
};
extern bool g_UseFixedFunctionPixelShader;
// PatrickvL's Dxbx pixel shader translation
void DxbxUpdateActivePixelShader(); // NOPATCH

View File

@ -38,6 +38,7 @@
#include "core\hle\D3D8\XbVertexBuffer.h" // For CxbxImpl_SetVertexData4f
#include "core\hle\D3D8\XbVertexShader.h"
#include "core\hle\D3D8\XbD3D8Logging.h" // For DEBUG_D3DRESULT
#include "devices\xbox.h"
#include "core\hle\D3D8\XbConvert.h" // For NV2A_VP_UPLOAD_INST, NV2A_VP_UPLOAD_CONST_ID, NV2A_VP_UPLOAD_CONST
#include "devices\video\nv2a.h" // For D3DPUSH_DECODE
#include "common\Logging.h" // For LOG_INIT
@ -98,7 +99,7 @@ void CxbxVertexShaderSetFlags()
// Note : Temporary, until we reliably locate the Xbox internal state for this
// See D3DXDeclaratorFromFVF docs https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dxdeclaratorfromfvf
// and https://github.com/reactos/wine/blob/2e8dfbb1ad71f24c41e8485a39df01bb9304127f/dlls/d3dx9_36/mesh.c#L2041
static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf)
static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) // TODO : Rename CxbxFVFToXboxVertexAttributeFormat?
{
using namespace xbox;
@ -127,17 +128,14 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf)
unsigned offset = 0;
DWORD position = (xboxFvf & X_D3DFVF_POSITION_MASK);
switch (position) {
case 0: nrPositionFloats = 0; LOG_TEST_CASE("FVF without position"); break; // Note : Remove logging if this occurs often
case X_D3DFVF_XYZ: /*nrPositionFloats is set to 3 by default*/ break;
case X_D3DFVF_XYZRHW:
g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_PASSTHROUGH;
nrPositionFloats = 4;
break;
case X_D3DFVF_XYZB1: nrBlendWeights = 1; break;
case X_D3DFVF_XYZB2: nrBlendWeights = 2; break;
case X_D3DFVF_XYZB3: nrBlendWeights = 3; break;
case X_D3DFVF_XYZB4: nrBlendWeights = 4; break;
case X_D3DFVF_POSITION_MASK: /*Keep nrPositionFloats set to 3*/ LOG_TEST_CASE("FVF invalid (5th blendweight?)"); break;
case 0: nrPositionFloats = 0; LOG_TEST_CASE("FVF without position"); break; // Note : Remove logging if this occurs often
case X_D3DFVF_XYZ: /*nrPositionFloats is set to 3 by default*/ break;
case X_D3DFVF_XYZRHW: nrPositionFloats = 4; g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_PASSTHROUGH; break;
case X_D3DFVF_XYZB1: nrBlendWeights = 1; break;
case X_D3DFVF_XYZB2: nrBlendWeights = 2; break;
case X_D3DFVF_XYZB3: nrBlendWeights = 3; break;
case X_D3DFVF_XYZB4: nrBlendWeights = 4; break;
case X_D3DFVF_POSITION_MASK: /*Keep nrPositionFloats set to 3*/ LOG_TEST_CASE("FVF invalid (5th blendweight?)"); break;
DEFAULT_UNREACHABLE;
}
@ -158,6 +156,7 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf)
offset += sizeof(float) * nrBlendWeights;
}
}
else if (nrBlendWeights > 0) LOG_TEST_CASE("BlendWeights given without position?");
// Write Normal, Diffuse, and Specular
if (xboxFvf & X_D3DFVF_NORMAL) {
@ -193,18 +192,23 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf)
LOG_TEST_CASE("Limiting FVF to 4 textures");
textureCount = 4; // Safeguard, since the X_D3DFVF_TEXCOUNT bitfield could contain invalid values (5 up to 15)
}
for (int i = 0; i < textureCount; i++) {
int numberOfCoordinates = 0;
auto FVFTextureFormat = (xboxFvf >> X_D3DFVF_TEXCOORDSIZE_SHIFT(i)) & 0x003;
switch (FVFTextureFormat) {
case X_D3DFVF_TEXTUREFORMAT1: numberOfCoordinates = 1; break;
case X_D3DFVF_TEXTUREFORMAT2: numberOfCoordinates = 2; break;
case X_D3DFVF_TEXTUREFORMAT3: numberOfCoordinates = 3; break;
case X_D3DFVF_TEXTUREFORMAT4: numberOfCoordinates = 4; break;
DEFAULT_UNREACHABLE;
#if 1
int numberOfCoordinates = ((FVFTextureFormat + 1) & 3) + 1;
#else
int numberOfCoordinates = 0;
switch (FVFTextureFormat) { // Note : Below enums are not ordered; In a math expression mapped as :
case X_D3DFVF_TEXTUREFORMAT1: numberOfCoordinates = 1; break; // input = 3 -> 4 -> 0 -> 1 = output
case X_D3DFVF_TEXTUREFORMAT2: numberOfCoordinates = 2; break; // input = 0 -> 1 -> 1 -> 2 = output
case X_D3DFVF_TEXTUREFORMAT3: numberOfCoordinates = 3; break; // input = 1 -> 2 -> 2 -> 3 = output
case X_D3DFVF_TEXTUREFORMAT4: numberOfCoordinates = 4; break; // input = 2 -> 3 -> 3 -> 4 = output
DEFAULT_UNREACHABLE; // ((input +1 ) &3 ) +1 ) = output
}
assert(numberOfCoordinates > 0);
#endif
pSlot = &declaration.Slots[X_D3DVSDE_TEXCOORD0 + i];
pSlot->Format = X_D3DVSDT_FLOAT[numberOfCoordinates];
pSlot->Offset = offset;

View File

@ -209,6 +209,10 @@ extern void EmuParseVshFunction
extern size_t GetVshFunctionSize(const xbox::dword_xt* pXboxFunction);
inline boolean VshHandleIsVertexShader(DWORD Handle) { return (Handle & X_D3DFVF_RESERVED0) ? TRUE : FALSE; }
inline boolean VshHandleIsFVF(DWORD Handle) { return !VshHandleIsVertexShader(Handle); }
inline boolean VshHandleIsPassthrough(DWORD Handle) {
return VshHandleIsFVF(Handle) && ((Handle & X_D3DFVF_POSITION_MASK) == X_D3DFVF_XYZRHW);
}
inline xbox::X_D3DVertexShader *VshHandleToXboxVertexShader(DWORD Handle) { return (xbox::X_D3DVertexShader *)(Handle & ~X_D3DFVF_RESERVED0);}
// Get the number of components represented by the given xbox vertex data type
@ -228,5 +232,4 @@ extern void CxbxImpl_SetVertexShaderInput(DWORD Handle, UINT StreamCount, xbox::
extern void CxbxImpl_SetVertexShaderConstant(INT Register, PVOID pConstantData, DWORD ConstantCount);
extern void CxbxImpl_DeleteVertexShader(DWORD Handle);
extern void CxbxVertexShaderSetFlags();
extern HRESULT SetVertexShader(IDirect3DVertexShader* pShader);
#endif