Merge pull request #1801 from PatrickvL/vsh_hlsl_pvl
Port to HLSL (High Level Shader Language) vertex shaders
This commit is contained in:
commit
76d45fdd63
|
@ -160,6 +160,7 @@ endif()
|
|||
set(WINS_LIB
|
||||
legacy_stdio_definitions
|
||||
d3d9
|
||||
d3dcompiler
|
||||
dinput8
|
||||
dxguid
|
||||
odbc32
|
||||
|
|
|
@ -0,0 +1,323 @@
|
|||
// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) :
|
||||
R"DELIMITER(// Xbox HLSL vertex shader (template populated at runtime)
|
||||
|
||||
struct VS_INPUT
|
||||
{
|
||||
float4 v[16] : TEXCOORD;
|
||||
};
|
||||
|
||||
// Output registers
|
||||
struct VS_OUTPUT
|
||||
{
|
||||
float4 oPos : POSITION; // Homogeneous clip space position
|
||||
float4 oD0 : COLOR0; // Primary color (front-facing)
|
||||
float4 oD1 : COLOR1; // Secondary color (front-facing)
|
||||
float oFog : FOG; // Fog coordinate
|
||||
float oPts : PSIZE; // Point size
|
||||
float4 oB0 : TEXCOORD4; // Back-facing primary color
|
||||
float4 oB1 : TEXCOORD5; // Back-facing secondary color
|
||||
float4 oT0 : TEXCOORD0; // Texture coordinate set 0
|
||||
float4 oT1 : TEXCOORD1; // Texture coordinate set 1
|
||||
float4 oT2 : TEXCOORD2; // Texture coordinate set 2
|
||||
float4 oT3 : TEXCOORD3; // Texture coordinate set 3
|
||||
};
|
||||
|
||||
#define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of -96..95)
|
||||
#define X_D3DVS_CONSTREG_COUNT 192
|
||||
|
||||
// Xbox constant registers
|
||||
uniform float4 C[X_D3DVS_CONSTREG_COUNT] : register(c0);
|
||||
|
||||
// Vertex input overrides for SetVertexData4f support
|
||||
uniform float4 vOverrideValue[16] : register(c192);
|
||||
uniform float4 vOverridePacked[4] : register(c208);
|
||||
|
||||
uniform float4 xboxViewportScale : register(c212);
|
||||
uniform float4 xboxViewportOffset : register(c213);
|
||||
|
||||
// Overloaded casts, assuring all inputs are treated as float4
|
||||
float4 _tof4(float src) { return float4(src, src, src, src); }
|
||||
float4 _tof4(float2 src) { return src.xyyy; }
|
||||
float4 _tof4(float3 src) { return src.xyzz; }
|
||||
float4 _tof4(float4 src) { return src; }
|
||||
float4 _ssss(float s) { return float4(s, s, s, s); } // a scalar output replicated across a 4-component vector
|
||||
#define _scalar(src) _tof4(src).x /* a scalar input */
|
||||
|
||||
float4 c(int register_number)
|
||||
{
|
||||
// Map Xbox [-96, 95] to Host [0, 191]
|
||||
// Account for Xbox's negative constant indexes
|
||||
register_number += X_D3DSCM_CORRECTION;
|
||||
if (register_number < 0)
|
||||
return 0;
|
||||
|
||||
if (register_number >= X_D3DVS_CONSTREG_COUNT) // X_D3DVS_CONSTREG_COUNT
|
||||
return 0;
|
||||
|
||||
return C[register_number];
|
||||
}
|
||||
|
||||
// Due to rounding differences with the Xbox (and increased precision on PC?)
|
||||
// some titles produce values just below the threshold of the next integer.
|
||||
// We can add a small bias to make sure it's bumped over the threshold
|
||||
// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader)
|
||||
#define BIAS 0.0001
|
||||
// TODO : Use 0.001 like xqemu?
|
||||
|
||||
// 2.14.1.11 Vertex Program Floating Point Requirements
|
||||
// The floor operations used by the ARL and EXP instructions must
|
||||
// operate identically. Specifically, the EXP instruction's floor(t.x)
|
||||
// intermediate result must exactly match the integer stored in the
|
||||
// address register by the ARL instruction.
|
||||
float x_floor(float src)
|
||||
{
|
||||
return floor(src + BIAS);
|
||||
}
|
||||
|
||||
// http://xboxdevwiki.net/NV2A/Vertex_Shader
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program.txt
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_vertex_program1_1.txt
|
||||
|
||||
// Functions for MAC ('Multiply And Accumulate') opcodes
|
||||
|
||||
// 2.14.1.10.1 ARL: Address Register Load
|
||||
// The address register should be floored
|
||||
#define x_arl(dest, mask, src0) dest.mask = x_floor(_tof4(src0).x).mask
|
||||
|
||||
// 2.14.1.10.2 MOV: Move
|
||||
#define x_mov(dest, mask, src0) dest.mask = (_tof4(src0)).mask
|
||||
|
||||
// 2.14.1.10.3 MUL: Multiply
|
||||
#define x_mul(dest, mask, src0, src1) dest.mask = (_tof4(src0) * _tof4(src1)).mask
|
||||
|
||||
// 2.14.1.10.4 ADD: Add
|
||||
#define x_add(dest, mask, src0, src1) dest.mask = (_tof4(src0) + _tof4(src1)).mask
|
||||
|
||||
// 2.14.1.10.5 MAD: Multiply and Add
|
||||
#define x_mad(dest, mask, src0, src1, src2) dest.mask = (_tof4(src0) * _tof4(src1) + _tof4(src2)).mask
|
||||
|
||||
// 2.14.1.10.8 DP3: Three-Component Dot Product
|
||||
#define x_dp3(dest, mask, src0, src1) dest.mask = _ssss(dot(_tof4(src0).xyz, _tof4(src1).xyz)).mask
|
||||
|
||||
// 2.14.1.10.9 DP4: Four-Component Dot Product
|
||||
#define x_dp4(dest, mask, src0, src1) dest.mask = _ssss(dot(_tof4(src0), _tof4(src1))).mask
|
||||
|
||||
// 2.14.1.10.10 DST: Distance Vector
|
||||
#define x_dst(dest, mask, src0, src1) dest.mask = dst(_tof4(src0), _tof4(src1)).mask /* equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; } */
|
||||
|
||||
// 2.14.1.10.11 MIN: Minimum
|
||||
#define x_min(dest, mask, src0, src1) dest.mask = min(_tof4(src0), _tof4(src1)).mask
|
||||
|
||||
// 2.14.1.10.12 MAX: Maximum
|
||||
#define x_max(dest, mask, src0, src1) dest.mask = max(_tof4(src0), _tof4(src1)).mask
|
||||
|
||||
// 2.14.1.10.13 SLT: Set On Less Than
|
||||
#define x_slt(dest, mask, src0, src1) dest.mask = _slt(_tof4(src0), _tof4(src1)).mask
|
||||
float4 _slt(float4 src0, float4 src1)
|
||||
{
|
||||
float4 dest;
|
||||
dest.x = (src0.x < src1.x) ? 1 : 0;
|
||||
dest.y = (src0.y < src1.y) ? 1 : 0;
|
||||
dest.z = (src0.z < src1.z) ? 1 : 0;
|
||||
dest.w = (src0.w < src1.w) ? 1 : 0;
|
||||
return dest;
|
||||
}
|
||||
|
||||
// 2.14.1.10.14 SGE: Set On Greater or Equal Than
|
||||
#define x_sge(dest, mask, src0, src1) dest.mask = _sge(_tof4(src0), _tof4(src1)).mask
|
||||
float4 _sge(float4 src0, float4 src1)
|
||||
{
|
||||
float4 dest;
|
||||
dest.x = (src0.x >= src1.x) ? 1 : 0;
|
||||
dest.y = (src0.y >= src1.y) ? 1 : 0;
|
||||
dest.z = (src0.z >= src1.z) ? 1 : 0;
|
||||
dest.w = (src0.w >= src1.w) ? 1 : 0;
|
||||
return dest;
|
||||
}
|
||||
|
||||
// 2.14.1.10.18 DPH: Homogeneous Dot Product
|
||||
#define x_dph(dest, mask, src0, src1) dest.mask = _ssss(_dph(_tof4(src0), _tof4(src1))).mask
|
||||
float _dph(float4 src0, float4 src1)
|
||||
{
|
||||
return dot(src0.xyz, src1.xyz) + src1.w;
|
||||
}
|
||||
|
||||
// Xbox ILU Functions
|
||||
|
||||
// 2.14.1.10.6 RCP: Reciprocal
|
||||
#define x_rcp(dest, mask, src0) dest.mask = _ssss(_rcp(_scalar(src0))).mask
|
||||
float _rcp(float src)
|
||||
{
|
||||
#if 0 // TODO : Enable
|
||||
if (src == 1) return 1;
|
||||
if (src == 0) return 1.#INF;
|
||||
#endif
|
||||
return 1/ src;
|
||||
}
|
||||
|
||||
// 2.14.1.10.7 RSQ: Reciprocal Square Root
|
||||
#define x_rsq(dest, mask, src0) dest.mask = _ssss(_rsq(_scalar(src0))).mask
|
||||
float _rsq(float src)
|
||||
{
|
||||
float a = abs(src);
|
||||
#if 0 // TODO : Enable
|
||||
if (a == 1) return 1;
|
||||
if (a == 0) return 1.#INF;
|
||||
#endif
|
||||
return rsqrt(a);
|
||||
}
|
||||
|
||||
// 2.14.1.10.15 EXP: Exponential Base 2
|
||||
#define x_expp(dest, mask, src0) dest.mask = _expp(_scalar(src0)).mask
|
||||
float4 _expp(float src)
|
||||
{
|
||||
float floor_src = x_floor(src);
|
||||
|
||||
float4 dest;
|
||||
dest.x = exp2(floor_src);
|
||||
dest.y = src - floor_src;
|
||||
dest.z = exp2(src);
|
||||
dest.w = 1;
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
// 2.14.1.10.16 LOG: Logarithm Base 2
|
||||
#define x_logp(dest, mask, src0) dest.mask = _logp(_scalar(src0)).mask
|
||||
float4 _logp(float src)
|
||||
{
|
||||
float4 dest;
|
||||
#if 0 // TODO : Enable
|
||||
float t = abs(src);
|
||||
if (t != 0) {
|
||||
if (t == 1.#INF) {
|
||||
dest.x = 1.#INF;
|
||||
dest.y = 1;
|
||||
dest.z = 1.#INF;
|
||||
} else {
|
||||
#endif
|
||||
float exponent = floor(log2(src)); // TODO : x_floor
|
||||
float mantissa = 1 / exp2(exponent);
|
||||
float z = log2(src); // TODO : exponent + log2(mantissa); // TODO : Or log2(t)?
|
||||
// TODO : float exponent = frexp(src + BIAS, /*out*/mantissa);
|
||||
dest.x = exponent;
|
||||
dest.y = mantissa;
|
||||
dest.z = z;
|
||||
#if 0
|
||||
}
|
||||
} else {
|
||||
dest.x = -1.#INF;
|
||||
dest.y = 1;
|
||||
dest.z = -1.#INF;
|
||||
}
|
||||
#endif
|
||||
dest.w = 1;
|
||||
return dest;
|
||||
}
|
||||
|
||||
// 2.14.1.10.17 LIT: Light Coefficients
|
||||
#define x_lit(dest, mask, src) dest.mask = _lit(_tof4(src)).mask
|
||||
float4 _lit(float4 src0)
|
||||
{
|
||||
const float epsilon = 1.0f / 256.0f;
|
||||
|
||||
float diffuse = src0.x;
|
||||
float blinn = src0.y;
|
||||
float specPower = clamp(src0.w, -(128 - epsilon), (128 - epsilon));
|
||||
|
||||
float4 dest;
|
||||
dest.x = 1;
|
||||
dest.y = max(0, diffuse);
|
||||
dest.z = diffuse > 0 ? exp2(specPower * log(blinn)) : 0;
|
||||
// TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0;
|
||||
dest.w = 1;
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
// 2.14.1.10.19 RCC: Reciprocal Clamped
|
||||
#define x_rcc(dest, mask, src0) dest.mask = _ssss(_rcc(_scalar(src0))).mask
|
||||
float _rcc(float src)
|
||||
{
|
||||
// Calculate the reciprocal
|
||||
float r = 1 / src;
|
||||
|
||||
// Clamp
|
||||
return (r >= 0)
|
||||
? clamp(r, 5.42101e-020f, 1.84467e+019f) // the IEEE 32-bit binary values 0x1F800000 and 0x5F800000
|
||||
: clamp(r, -1.84467e+019f, -5.42101e-020f); // the IEEE 32-bit binary values 0xDF800000 and 0x9F800000
|
||||
}
|
||||
|
||||
float4 reverseScreenspaceTransform(float4 oPos)
|
||||
{
|
||||
// On Xbox, oPos should contain the vertex position in screenspace
|
||||
// We need to reverse this transformation
|
||||
// Conventionally, each Xbox Vertex Shader includes instructions like this
|
||||
// mul oPos.xyz, r12, c-38
|
||||
// +rcc r1.x, r12.w
|
||||
// mad oPos.xyz, r12, r1.x, c-37
|
||||
// where c-37 and c-38 are reserved transform values
|
||||
|
||||
oPos.xyz -= xboxViewportOffset.xyz; // reverse offset
|
||||
oPos.xyz *= oPos.w; // reverse perspective divide
|
||||
oPos.xyz /= xboxViewportScale.xyz; // reverse scale
|
||||
|
||||
return oPos;
|
||||
}
|
||||
|
||||
VS_OUTPUT main(const VS_INPUT xIn)
|
||||
{
|
||||
// Output variables
|
||||
float4 oPos, oD0, oD1, oB0, oB1, oT0, oT1, oT2, oT3;
|
||||
oPos = oD0 = oD1 = oB0 = oB1 = oT0 = oT1 = oT2 = oT3 = float4(0, 0, 0, 1); // Pre-initialize w component of outputs to 1
|
||||
|
||||
// Single component outputs
|
||||
float4 oFog, oPts; // x is write-only on Xbox. Use float4 as some games use incorrect masks
|
||||
oFog = oPts = 0;
|
||||
|
||||
// Address (index) register
|
||||
int1 a0 = 0;
|
||||
|
||||
// Temporary registers
|
||||
float4 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
|
||||
r0 = r1 = r2 = r3 = r4 = r5 = r6 = r7 = r8 = r9 = r10 = r11 = float4(0, 0, 0, 0);
|
||||
#define r12 oPos // oPos and r12 are two ways of accessing the same register on Xbox
|
||||
|
||||
// Input registers
|
||||
float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15;
|
||||
|
||||
// View 4 packed overrides as an array of 16 floats
|
||||
float vOverride[16] = (float[16])vOverridePacked;
|
||||
|
||||
// Initialize input registers from the vertex buffer
|
||||
// Or use an override value set with SetVertexData4f
|
||||
#define init_v(i) v##i = lerp(xIn.v[i], vOverride[i], vOverrideValue[i]);
|
||||
// Note : unroll manually instead of for-loop, because of the ## concatenation
|
||||
init_v( 0); init_v( 1); init_v( 2); init_v( 3);
|
||||
init_v( 4); init_v( 5); init_v( 6); init_v( 7);
|
||||
init_v( 8); init_v( 9); init_v(10); init_v(11);
|
||||
init_v(12); init_v(13); init_v(14); init_v(15);
|
||||
|
||||
// Xbox shader program)DELIMITER", /* This terminates the header raw string" // */
|
||||
|
||||
R"DELIMITER(
|
||||
|
||||
// Copy variables to output struct
|
||||
VS_OUTPUT xOut;
|
||||
|
||||
xOut.oPos = reverseScreenspaceTransform(oPos);
|
||||
xOut.oD0 = oD0;
|
||||
xOut.oD1 = oD1;
|
||||
xOut.oFog = oFog.x;
|
||||
xOut.oPts = oPts.x;
|
||||
xOut.oB0 = oB0;
|
||||
xOut.oB1 = oB1;
|
||||
xOut.oT0 = oT0;
|
||||
xOut.oT1 = oT1;
|
||||
xOut.oT2 = oT2;
|
||||
xOut.oT3 = oT3;
|
||||
|
||||
return xOut;
|
||||
}
|
||||
|
||||
// End of vertex shader footer)DELIMITER" /* This terminates the footer raw string" // */
|
|
@ -489,6 +489,17 @@ const char *CxbxGetErrorDescription(HRESULT hResult)
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// TODO move to shader file. Needs to be called whenever a shader or declaration is set
|
||||
void SetOverrideFlags(CxbxVertexShader* pCxbxVertexShader) {
|
||||
if (pCxbxVertexShader != nullptr && pCxbxVertexShader->pHostVertexShader != nullptr) {
|
||||
float overrideFlags[16];
|
||||
for (int i = 0; i < 16; i++) {
|
||||
overrideFlags[i] = pCxbxVertexShader->VertexShaderInfo.vRegisterInDeclaration[i] ? 1.0f : 0.0f;
|
||||
}
|
||||
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE, overrideFlags, 4);
|
||||
}
|
||||
}
|
||||
|
||||
const char *D3DErrorString(HRESULT hResult)
|
||||
{
|
||||
static char buffer[1024];
|
||||
|
@ -3452,6 +3463,8 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SelectVertexShader)
|
|||
pHostVertexDeclaration = pCxbxVertexShader->pHostVertexDeclaration;
|
||||
pHostVertexShader = pCxbxVertexShader->pHostVertexShader;
|
||||
HostFVF = pCxbxVertexShader->HostFVF;
|
||||
|
||||
SetOverrideFlags(pCxbxVertexShader);
|
||||
}
|
||||
|
||||
hRet = g_pD3DDevice->SetVertexDeclaration(pHostVertexDeclaration);
|
||||
|
@ -3742,20 +3755,134 @@ void ValidateRenderTargetDimensions(DWORD HostRenderTarget_Width, DWORD HostRend
|
|||
}
|
||||
}
|
||||
|
||||
float GetZScaleForSurface(XTL::X_D3DSurface* pSurface)
|
||||
{
|
||||
// If no surface was present, fallback to 1
|
||||
if (pSurface == xbnullptr) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto format = GetXboxPixelContainerFormat(pSurface);
|
||||
switch (format) {
|
||||
case XTL::X_D3DFMT_D16:
|
||||
case XTL::X_D3DFMT_LIN_D16:
|
||||
return 65535.0f;
|
||||
|
||||
case XTL::X_D3DFMT_D24S8:
|
||||
case XTL::X_D3DFMT_LIN_D24S8:
|
||||
return 16777215.0f;
|
||||
|
||||
case XTL::X_D3DFMT_F16:
|
||||
case XTL::X_D3DFMT_LIN_F16:
|
||||
return 511.9375f;
|
||||
|
||||
case XTL::X_D3DFMT_F24S8:
|
||||
case XTL::X_D3DFMT_LIN_F24S8:
|
||||
// 24bit floating point is close to precision maximum, so a lower value is used
|
||||
// We can't use a double here since the vertex shader is only at float precision
|
||||
return 1.0e30f;
|
||||
}
|
||||
|
||||
// Default to 1 if unknown depth format
|
||||
LOG_TEST_CASE("GetZScaleForSurface: Unknown Xbox Depth Format");
|
||||
return 1;
|
||||
}
|
||||
|
||||
void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4])
|
||||
{
|
||||
// Store viewport offset and scale in constant registers
|
||||
// used in shaders to transform back from screen space (Xbox Shader Output) to Clip space (Host Shader Output)
|
||||
D3DVIEWPORT ViewPort;
|
||||
g_pD3DDevice->GetViewport(&ViewPort);
|
||||
|
||||
// Calculate Width/Height scale & offset
|
||||
float scaleWidth = (2.0f / ViewPort.Width) * g_RenderScaleFactor;
|
||||
float scaleHeight = (2.0f / ViewPort.Height) * g_RenderScaleFactor;
|
||||
float offsetWidth = scaleWidth;
|
||||
float offsetHeight = scaleHeight;
|
||||
|
||||
// Calculate Z scale & offset
|
||||
float zScale = GetZScaleForSurface(g_pXbox_DepthStencil);
|
||||
float scaleZ = zScale * (ViewPort.MaxZ - ViewPort.MinZ);
|
||||
float offsetZ = zScale * ViewPort.MinZ;
|
||||
|
||||
// TODO will we need to do something here to support upscaling?
|
||||
// TODO remove the code above as required
|
||||
|
||||
// Default scale and offset.
|
||||
// Multisample state will affect these
|
||||
float xScale = 1.0f;
|
||||
float yScale = 1.0f;
|
||||
float xOffset = 0.5f;
|
||||
float yOffset = 0.5f;
|
||||
|
||||
// MULTISAMPLE options have offset of 0
|
||||
// Various sample sizes have various x and y scales
|
||||
switch (g_EmuCDPD.XboxPresentationParameters.MultiSampleType)
|
||||
{
|
||||
case XTL::X_D3DMULTISAMPLE_2_SAMPLES_MULTISAMPLE_LINEAR:
|
||||
case XTL::X_D3DMULTISAMPLE_2_SAMPLES_MULTISAMPLE_QUINCUNX:
|
||||
case XTL::X_D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_LINEAR:
|
||||
case XTL::X_D3DMULTISAMPLE_4_SAMPLES_MULTISAMPLE_GAUSSIAN:
|
||||
xOffset = yOffset = 0.0f;
|
||||
break;
|
||||
case XTL::X_D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_HORIZONTAL_LINEAR:
|
||||
xScale = 2.0f;
|
||||
break;
|
||||
case XTL::X_D3DMULTISAMPLE_2_SAMPLES_SUPERSAMPLE_VERTICAL_LINEAR:
|
||||
yScale = 2.0f;
|
||||
break;
|
||||
case XTL::X_D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_LINEAR:
|
||||
case XTL::X_D3DMULTISAMPLE_4_SAMPLES_SUPERSAMPLE_GAUSSIAN:
|
||||
xScale = yScale = 2.0f;
|
||||
break;
|
||||
case XTL::X_D3DMULTISAMPLE_9_SAMPLES_MULTISAMPLE_GAUSSIAN:
|
||||
xScale = yScale = 1.5f;
|
||||
xOffset = yOffset = 0.0f;
|
||||
break;
|
||||
case XTL::X_D3DMULTISAMPLE_9_SAMPLES_SUPERSAMPLE_GAUSSIAN:
|
||||
xScale = yScale = 3.0f;
|
||||
break;
|
||||
}
|
||||
|
||||
// Xbox correct values?
|
||||
xOffset = xOffset + (1.0f / 32.0f);
|
||||
yOffset = yOffset + (1.0f / 32.0f);
|
||||
xScale = xScale * ViewPort.Width;
|
||||
yScale = yScale * ViewPort.Height;
|
||||
|
||||
// HACK: Add a host correction factor to these values
|
||||
// So that after we reverse the screenspace transformation
|
||||
// Pre-transformed 2d geometry is in the same space as the 3d geometry...?
|
||||
|
||||
// Offset with a host correction
|
||||
vOffset[0] = xOffset + (0.5f * (float)ViewPort.Width / (float)g_RenderScaleFactor);
|
||||
vOffset[1] = yOffset + (0.5f * (float)ViewPort.Height / (float)g_RenderScaleFactor);
|
||||
vOffset[2] = 0.0f; //offsetZ;
|
||||
vOffset[3] = 0.0f;
|
||||
|
||||
// Scale with a host correction
|
||||
vScale[0] = xScale * (1.0f / ( 2.0f * (float)g_RenderScaleFactor));
|
||||
vScale[1] = yScale * (1.0f / (-2.0f * (float)g_RenderScaleFactor));
|
||||
vScale[2] = scaleZ; // ?
|
||||
vScale[3] = 1.0f; // ?
|
||||
}
|
||||
|
||||
void UpdateViewPortOffsetAndScaleConstants()
|
||||
{
|
||||
float vOffset[4], vScale[4];
|
||||
GetViewPortOffsetAndScale(vOffset, vScale);
|
||||
|
||||
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_VIEWPORT_SCALE_MIRROR, vScale, 1);
|
||||
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR, vOffset, 1);
|
||||
|
||||
// Store viewport offset and scale in constant registers 58 (c-38) and
|
||||
// 59 (c-37) used for screen space transformation.
|
||||
// We only do this if X_D3DSCM_NORESERVEDCONSTANTS is not set, since enabling this flag frees up these registers for shader used
|
||||
if (g_Xbox_VertexShaderConstantMode != X_D3DSCM_NORESERVEDCONSTANTS)
|
||||
{
|
||||
D3DVIEWPORT ViewPort;
|
||||
g_pD3DDevice->GetViewport(&ViewPort);
|
||||
|
||||
float vScale[] = { (2.0f / ViewPort.Width) * g_RenderScaleFactor, (-2.0f / ViewPort.Height) * g_RenderScaleFactor, 0.0f, 0.0f };
|
||||
static float vOffset[] = { -1.0f, 1.0f, 0.0f, 1.0f };
|
||||
|
||||
g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_RESERVED_CONSTANT1_CORRECTED, vScale, 1);
|
||||
g_pD3DDevice->SetVertexShaderConstantF(X_D3DVS_RESERVED_CONSTANT2_CORRECTED, vOffset, 1);
|
||||
g_pD3DDevice->SetVertexShaderConstantF(X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION, vScale, 1);
|
||||
g_pD3DDevice->SetVertexShaderConstantF(X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION, vOffset, 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3892,37 +4019,20 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_GetViewportOffsetAndScale)
|
|||
// Test case : TMNT(R)2
|
||||
// Test case : TMNT(R)3
|
||||
|
||||
#if 0
|
||||
float fScaleX = 1.0f;
|
||||
float fScaleY = 1.0f;
|
||||
float fScaleZ = 1.0f;
|
||||
float fOffsetX = 0.5 + 1.0/32;
|
||||
float fOffsetY = 0.5 + 1.0/32;
|
||||
X_D3DVIEWPORT8 Viewport;
|
||||
float vOffset[4], vScale[4];
|
||||
GetViewPortOffsetAndScale(vOffset, vScale);
|
||||
|
||||
EMUPATCH(D3DDevice_GetViewport)(&Viewport);
|
||||
pOffset->x = vOffset[0];
|
||||
pOffset->y = vOffset[1];
|
||||
pOffset->z = vOffset[2];
|
||||
pOffset->w = vOffset[3];
|
||||
|
||||
pScale->x = (float)Viewport.Width * 0.5f * fScaleX;
|
||||
pScale->y = (float)Viewport.Height * -0.5f * fScaleY;
|
||||
pScale->z = (Viewport.MaxZ - Viewport.MinZ) * fScaleZ;
|
||||
pScale->w = 0;
|
||||
|
||||
pOffset->x = (float)Viewport.Width * fScaleX * 0.5f + (float)Viewport.X * fScaleX + fOffsetX;
|
||||
pOffset->y = (float)Viewport.Height * fScaleY * 0.5f + (float)Viewport.Y * fScaleY + fOffsetY;
|
||||
pOffset->z = Viewport.MinZ * fScaleZ;
|
||||
pOffset->w = 0;
|
||||
#else
|
||||
pScale->x = 1.0f;
|
||||
pScale->y = 1.0f;
|
||||
pScale->z = 1.0f;
|
||||
pScale->w = 1.0f;
|
||||
|
||||
pOffset->x = 0.0f;
|
||||
pOffset->y = 0.0f;
|
||||
pOffset->z = 0.0f;
|
||||
pOffset->w = 0.0f;
|
||||
#endif
|
||||
pScale->x = vScale[0];
|
||||
pScale->y = vScale[1];
|
||||
pScale->z = vScale[2];
|
||||
pScale->w = vScale[3];
|
||||
}
|
||||
|
||||
// LTCG specific D3DDevice_SetShaderConstantMode function...
|
||||
// This uses a custom calling convention where parameter is passed in EAX
|
||||
VOID __stdcall XTL::EMUPATCH(D3DDevice_SetShaderConstantMode_0)
|
||||
|
@ -4012,14 +4122,12 @@ HRESULT WINAPI XTL::EMUPATCH(D3DDevice_CreateVertexShader)
|
|||
|
||||
// Now, we can create the host vertex shader
|
||||
DWORD XboxDeclarationCount = 0;
|
||||
DWORD HostDeclarationSize = 0;
|
||||
CxbxVertexShader* pCxbxVertexShader = (CxbxVertexShader*)calloc(1, sizeof(CxbxVertexShader));
|
||||
D3DVERTEXELEMENT *pRecompiledDeclaration = nullptr;
|
||||
|
||||
pRecompiledDeclaration = EmuRecompileVshDeclaration((DWORD*)pDeclaration,
|
||||
/*bIsFixedFunction=*/pFunction == xbnullptr,
|
||||
&XboxDeclarationCount,
|
||||
&HostDeclarationSize,
|
||||
&pCxbxVertexShader->VertexShaderInfo);
|
||||
|
||||
// Create the vertex declaration
|
||||
|
@ -4033,7 +4141,7 @@ HRESULT WINAPI XTL::EMUPATCH(D3DDevice_CreateVertexShader)
|
|||
g_pD3DDevice->SetVertexDeclaration(pCxbxVertexShader->pHostVertexDeclaration);
|
||||
DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexDeclaration");
|
||||
|
||||
LPD3DXBUFFER pRecompiledBuffer = nullptr;
|
||||
ID3DBlob *pRecompiledBuffer = nullptr;
|
||||
DWORD XboxFunctionSize = 0;
|
||||
DWORD *pRecompiledFunction = nullptr;
|
||||
if (SUCCEEDED(hRet) && pFunction)
|
||||
|
@ -4073,38 +4181,6 @@ HRESULT WINAPI XTL::EMUPATCH(D3DDevice_CreateVertexShader)
|
|||
DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexShader");
|
||||
}
|
||||
|
||||
//* Fallback to dummy shader.
|
||||
if (FAILED(hRet))
|
||||
{
|
||||
static const char dummy[] =
|
||||
"vs.1.1\n"
|
||||
"dcl_position v0\n"
|
||||
"dp4 oPos.x, v0, c96\n"
|
||||
"dp4 oPos.y, v0, c97\n"
|
||||
"dp4 oPos.z, v0, c98\n"
|
||||
"dp4 oPos.w, v0, c99\n";
|
||||
|
||||
EmuLog(LOG_LEVEL::WARNING, "Trying fallback:\n%s", dummy);
|
||||
|
||||
hRet = D3DXAssembleShader(
|
||||
dummy,
|
||||
strlen(dummy),
|
||||
/*pDefines=*/nullptr,
|
||||
/*pInclude=*/nullptr,
|
||||
/*Flags=*/0, // Was D3DXASM_SKIPVALIDATION
|
||||
/*ppCompiledShader=*/&pRecompiledBuffer,
|
||||
/*ppCompilationErrors*/nullptr);
|
||||
|
||||
DEBUG_D3DRESULT(hRet, "D3DXAssembleShader");
|
||||
|
||||
hRet = g_pD3DDevice->CreateVertexShader
|
||||
(
|
||||
(DWORD*)pRecompiledBuffer->GetBufferPointer(),
|
||||
&pHostVertexShader
|
||||
);
|
||||
DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexShader(fallback)");
|
||||
}
|
||||
|
||||
if (pRecompiledBuffer != nullptr)
|
||||
{
|
||||
pRecompiledBuffer->Release();
|
||||
|
@ -4223,8 +4299,10 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant)
|
|||
|
||||
// Xbox vertex shader constants range from -96 to 95
|
||||
// The host does not support negative, so we adjust to 0..191
|
||||
Register += X_D3DVS_CONSTREG_BIAS;
|
||||
Register += X_D3DSCM_CORRECTION;
|
||||
|
||||
if (Register < 0) LOG_TEST_CASE("Register < 0");
|
||||
if (Register + ConstantCount > X_D3DVS_CONSTREG_COUNT) LOG_TEST_CASE("Register + ConstantCount > X_D3DVS_CONSTREG_COUNT");
|
||||
HRESULT hRet;
|
||||
hRet = g_pD3DDevice->SetVertexShaderConstantF(
|
||||
Register,
|
||||
|
@ -4254,7 +4332,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant1)
|
|||
// The XDK uses a macro to automatically adjust to 0..191 range
|
||||
// but D3DDevice_SetVertexShaderConstant expects -96..95 range
|
||||
// so we adjust before forwarding
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, 1);
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, 1);
|
||||
}
|
||||
|
||||
// ******************************************************************
|
||||
|
@ -4271,7 +4349,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant1Fast)
|
|||
// The XDK uses a macro to automatically adjust to 0..191 range
|
||||
// but D3DDevice_SetVertexShaderConstant expects -96..95 range
|
||||
// so we adjust before forwarding
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, 1);
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, 1);
|
||||
}
|
||||
|
||||
// ******************************************************************
|
||||
|
@ -4288,7 +4366,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstant4)
|
|||
// The XDK uses a macro to automatically adjust to 0..191 range
|
||||
// but D3DDevice_SetVertexShaderConstant expects -96..95 range
|
||||
// so we adjust before forwarding
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, 4);
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, 4);
|
||||
}
|
||||
|
||||
// ******************************************************************
|
||||
|
@ -4306,7 +4384,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstantNotInline)
|
|||
// The XDK uses a macro to automatically adjust to 0..191 range
|
||||
// but D3DDevice_SetVertexShaderConstant expects -96..95 range
|
||||
// so we adjust before forwarding
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, ConstantCount / 4);
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, ConstantCount / 4);
|
||||
}
|
||||
|
||||
// ******************************************************************
|
||||
|
@ -4324,7 +4402,7 @@ VOID __fastcall XTL::EMUPATCH(D3DDevice_SetVertexShaderConstantNotInlineFast)
|
|||
// The XDK uses a macro to automatically adjust to 0..191 range
|
||||
// but D3DDevice_SetVertexShaderConstant expects -96..95 range
|
||||
// so we adjust before forwarding
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DVS_CONSTREG_BIAS, pConstantData, ConstantCount / 4);
|
||||
EMUPATCH(D3DDevice_SetVertexShaderConstant)(Register - X_D3DSCM_CORRECTION, pConstantData, ConstantCount / 4);
|
||||
}
|
||||
|
||||
// LTCG specific D3DDevice_SetTexture function...
|
||||
|
@ -4569,6 +4647,8 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetVertexData4f)
|
|||
// not present in the vertex declaration.
|
||||
// We use range 193 and up to store these values, as Xbox shaders stop at c192!
|
||||
FLOAT values[] = {a,b,c,d};
|
||||
if (Register < 0) LOG_TEST_CASE("Register < 0");
|
||||
if (Register >= 16) LOG_TEST_CASE("Register >= 16");
|
||||
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + Register, values, 1);
|
||||
}
|
||||
|
||||
|
@ -6623,18 +6703,12 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetVertexShader)
|
|||
}
|
||||
else
|
||||
{
|
||||
SetOverrideFlags(pCxbxVertexShader);
|
||||
|
||||
hRet = g_pD3DDevice->SetVertexShader(pCxbxVertexShader->pHostVertexShader);
|
||||
DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader(VshHandleIsVertexShader)");
|
||||
}
|
||||
|
||||
// Set default constant values for specular, diffuse, etc
|
||||
static const float ColorBlack[4] = { 0,0,0,0 };
|
||||
static const float ColorWhite[4] = { 1,1,1,1 };
|
||||
|
||||
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + XTL::X_D3DVSDE_DIFFUSE, ColorWhite, 1);
|
||||
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + XTL::X_D3DVSDE_BACKDIFFUSE, ColorWhite, 1);
|
||||
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + XTL::X_D3DVSDE_SPECULAR, ColorBlack, 1);
|
||||
g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + XTL::X_D3DVSDE_BACKSPECULAR, ColorBlack, 1);
|
||||
} else {
|
||||
hRet = g_pD3DDevice->SetVertexShader(nullptr);
|
||||
DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader");
|
||||
|
@ -7028,7 +7102,7 @@ void CxbxUpdateNativeD3DResources()
|
|||
auto nv2a = g_NV2A->GetDeviceState();
|
||||
for(int i = 0; i < X_D3DVS_CONSTREG_COUNT; i++) {
|
||||
// Skip vOffset and vScale constants, we don't want our values to be overwritten by accident
|
||||
if (i == X_D3DVS_RESERVED_CONSTANT1_CORRECTED || i == X_D3DVS_RESERVED_CONSTANT2_CORRECTED) {
|
||||
if (i == X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED || i == X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -7721,6 +7795,8 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetRenderTarget)
|
|||
DWORD XboxRenderTarget_Height = GetPixelContainerHeight(g_pXbox_RenderTarget);
|
||||
ValidateRenderTargetDimensions(HostRenderTarget_Width, HostRenderTarget_Height, XboxRenderTarget_Width, XboxRenderTarget_Height);
|
||||
}
|
||||
|
||||
UpdateViewPortOffsetAndScaleConstants();
|
||||
}
|
||||
|
||||
// LTCG specific D3DDevice_SetPalette function...
|
||||
|
@ -7967,7 +8043,7 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_GetVertexShaderConstant)
|
|||
|
||||
// Xbox vertex shader constants range from -96 to 95
|
||||
// The host does not support negative, so we adjust to 0..191
|
||||
Register += X_D3DVS_CONSTREG_BIAS;
|
||||
Register += X_D3DSCM_CORRECTION;
|
||||
|
||||
HRESULT hRet = g_pD3DDevice->GetVertexShaderConstantF
|
||||
(
|
||||
|
|
|
@ -983,13 +983,21 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE;
|
|||
#define X_D3DSCM_192CONSTANTSANDFIXEDPIPELINE 0x02 // Unsupported?
|
||||
#define X_D3DSCM_NORESERVEDCONSTANTS 0x10 // Do not reserve constant -38 and -37
|
||||
|
||||
// Xbox vertex shader constants
|
||||
#define X_D3DVS_CONSTREG_BIAS 96 // Add 96 to arrive at the range 0..191 (instead of -96..95)
|
||||
#define X_D3DVS_CONSTREG_COUNT 192
|
||||
#define X_D3DVS_RESERVED_CONSTANT1 -38 // Becomes 58 after correction, contains Scale v
|
||||
#define X_D3DVS_RESERVED_CONSTANT2 -37 // Becomes 59 after correction, contains Offset
|
||||
#define X_D3DVS_RESERVED_CONSTANT1_CORRECTED (X_D3DVS_RESERVED_CONSTANT1 + X_D3DVS_CONSTREG_BIAS)
|
||||
#define X_D3DVS_RESERVED_CONSTANT2_CORRECTED (X_D3DVS_RESERVED_CONSTANT2 + X_D3DVS_CONSTREG_BIAS)
|
||||
#define X_D3DSCM_RESERVED_CONSTANT_SCALE -38 // Becomes 58 after correction, contains Scale v
|
||||
#define X_D3DSCM_RESERVED_CONSTANT_OFFSET -37 // Becomes 59 after correction, contains Offset
|
||||
|
||||
#define X_D3DSCM_CORRECTION 96 // Add 96 to arrive at the range 0..191 (instead of -96..95)
|
||||
#define X_D3DVS_CONSTREG_COUNT 192
|
||||
|
||||
// Special Registers, used to pass additional information to the shaders
|
||||
// TODO co-locate shader workaround constants with shader code
|
||||
#define CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE (X_D3DVS_CONSTREG_COUNT)
|
||||
#define CXBX_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE (CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE + 16)
|
||||
#define CXBX_D3DVS_VIEWPORT_SCALE_MIRROR (CXBX_D3DVS_CONSTREG_VERTEXDATA4F_FLAG_BASE + 4)
|
||||
#define CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR (CXBX_D3DVS_VIEWPORT_SCALE_MIRROR + 1)
|
||||
|
||||
#define X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION)
|
||||
#define X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION)
|
||||
|
||||
// Xbox vertex declaration token bit masks
|
||||
#define X_D3DVSD_MASK_TESSUV 0x10000000
|
||||
|
@ -1209,9 +1217,6 @@ typedef DWORD NV2AMETHOD;
|
|||
// Below declarations are used by Cxbx, not by the Xbox!!!
|
||||
//
|
||||
|
||||
// Host vertex shader counts
|
||||
#define CXBX_D3DVS_CONSTREG_VERTEXDATA4F_BASE X_D3DVS_CONSTREG_COUNT
|
||||
|
||||
} // end of namespace XTL
|
||||
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -25,15 +25,16 @@
|
|||
#ifndef XBVERTEXSHADER_H
|
||||
#define XBVERTEXSHADER_H
|
||||
|
||||
#include <d3dcompiler.h>
|
||||
|
||||
#include "core\hle\D3D8\XbD3D8Types.h" // for X_VSH_MAX_ATTRIBUTES
|
||||
|
||||
// Host vertex shader counts
|
||||
#define VSH_MIN_TEMPORARY_REGISTERS 12 // Equal to D3DCAPS9.VS20Caps.NumTemps (at least 12 for vs_2_x) - https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx9-graphics-reference-asm-vs-registers-vs-2-x
|
||||
#define VSH_MAX_TEMPORARY_REGISTERS 32
|
||||
#define VSH_MAX_INTERMEDIATE_COUNT 1024 // The maximum number of intermediate format slots
|
||||
#define VSH_VS11_MAX_INSTRUCTION_COUNT 128
|
||||
#define VSH_VS2X_MAX_INSTRUCTION_COUNT 256
|
||||
#define VSH_VS30_MAX_INSTRUCTION_COUNT 512
|
||||
#define VSH_VS30_MAX_INSTRUCTION_COUNT 512
|
||||
|
||||
#define VSH_MAX_INTERMEDIATE_COUNT (X_VSH_MAX_INSTRUCTION_COUNT * 3) // The maximum number of shader function slots
|
||||
|
||||
typedef struct _CxbxVertexShaderStreamElement
|
||||
{
|
||||
|
@ -69,6 +70,7 @@ typedef struct _CxbxVertexShaderInfo
|
|||
{
|
||||
UINT NumberOfVertexStreams; // The number of streams the vertex shader uses
|
||||
CxbxVertexShaderStreamInfo VertexStreams[X_VSH_MAX_STREAMS];
|
||||
bool vRegisterInDeclaration[16];
|
||||
}
|
||||
CxbxVertexShaderInfo;
|
||||
|
||||
|
@ -100,7 +102,6 @@ extern D3DVERTEXELEMENT *EmuRecompileVshDeclaration
|
|||
DWORD *pXboxDeclaration,
|
||||
bool bIsFixedFunction,
|
||||
DWORD *pXboxDeclarationCount,
|
||||
DWORD *pHostDeclarationSize,
|
||||
CxbxVertexShaderInfo *pCxbxVertexShaderInfo
|
||||
);
|
||||
|
||||
|
@ -112,7 +113,7 @@ extern HRESULT EmuRecompileVshFunction
|
|||
D3DVERTEXELEMENT *pRecompiledDeclaration,
|
||||
bool *pbUseDeclarationOnly,
|
||||
DWORD *pXboxFunctionSize,
|
||||
LPD3DXBUFFER *ppRecompiledShader
|
||||
ID3DBlob **ppRecompiledShader
|
||||
);
|
||||
|
||||
extern void FreeVertexDynamicPatch(CxbxVertexShader *pVertexShader);
|
||||
|
|
Loading…
Reference in New Issue