Wrap (or replace) HLSL functions with defines, so that destination swizzles work as expected without too much syntax deviation.

Also adjusted a few hlshl functions to be more accurate
This commit is contained in:
PatrickvL 2019-12-09 15:33:57 +01:00 committed by patrickvl
parent 46fbfad52d
commit 127e51302e
2 changed files with 97 additions and 150 deletions

View File

@ -1,5 +1,5 @@
// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : // This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) :
R"DELIMITER( //R"DELIMITER(
// Xbox HLSL vertex shader (template populated at runtime) // Xbox HLSL vertex shader (template populated at runtime)
struct VS_INPUT struct VS_INPUT
{ {
@ -26,67 +26,35 @@ extern uniform float4 c[192] : register(c0);
// Functions for MAC ('Multiply And Accumulate') opcodes // Functions for MAC ('Multiply And Accumulate') opcodes
float4 x_mov(float4 src0) #define x_mov(dest, src0) dest = src0
{
return src0;
}
float4 x_mul(float4 src0, float4 src1) #define x_mul(dest, src0, src1) dest = src0 * src1
{
return src0 * src1;
}
float4 x_add(float4 src0, float4 src1) #define x_add(dest, src0, src1) dest = src0 + src1
{
return src0 + src1;
}
float4 x_dst(float4 src0, float4 src1) #define x_dst(dest, src0, src1) dest = dst(src0, src1) // equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; }
{
return dst(src0, src1);
}
float4 x_min(float4 src0, float4 src1) #define x_min(dest, src0, src1) dest = min(src0, src1)
{
return min(src0, src1);
}
float4 x_max(float4 src0, float4 src1) #define x_max(dest, src0, src1) dest = max(src0, src1)
{
return max(src0, src1);
}
float4 x_mad(float4 src0, float4 src1, float4 src2) #define x_mad(dest, src0, src1, src2) dest = (src0 * src1) + src2
{
return (src0 * src1) + src2;
}
int x_arl(float src0) // The address register should be floored
{ // Due to rounding differences with the Xbox (and increased precision on PC?)
// The address register should be floored // some titles produce values just below the threshold of the next integer.
// Due to rounding differences with the Xbox (and increased precision on PC?) // We can add a small bias to make sure it's bumped over the threshold
// some titles produce values just below the threshold of the next integer. // Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader)
// We can add a small bias to make sure it's bumped over the threshold #define x_arl(dest, src0) dest = floor(src0 + 0.0001)
// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader)
return floor(src0 + 0.0001);
}
float x_dp3(float4 src0, float4 src1) #define x_dp3(dest, src0, src1) dest = dot((float3)src0, (float3)src1)
{
return dot(src0.xyz, src1.xyz);
}
float x_dph(float4 src0, float4 src1) #define x_dph(dest, src0, src1) x_dp3(src0, src1) + src1.w
{
return x_dp3(src0, src1) + src1.w;
}
float x_dp4(float4 src0, float4 src1) #define x_dp4(dest, src0, src1) dest = dot(src0, src1)
{
return dot(src0, src1);
}
float4 x_sge(float4 src0, float4 src1) #define x_sge(dest, src0) dest = _sge(src0)
float4 _sge(float4 src0, float4 src1)
{ {
float4 dest; float4 dest;
dest.x = (src0.x >= src1.x) ? 1 : 0; dest.x = (src0.x >= src1.x) ? 1 : 0;
@ -96,7 +64,8 @@ float4 x_sge(float4 src0, float4 src1)
return dest; return dest;
} }
float4 x_slt(float4 src0, float4 src1) #define x_slt(dest, src0) dest = _slt(src0)
float4 _slt(float4 src0, float4 src1)
{ {
float4 dest; float4 dest;
dest.x = (src0.x < src1.x) ? 1 : 0; dest.x = (src0.x < src1.x) ? 1 : 0;
@ -108,17 +77,13 @@ float4 x_slt(float4 src0, float4 src1)
// Xbox ILU Functions // Xbox ILU Functions
float scalar_component(float4 src0) #define scalar_component(src0) src0.x
{
return src0.w; // use w component by default
}
float x_rcp(float4 src0) #define x_rcp(dest, src0) dest = 1 / scalar_component(src0)
{ // TODO : #define x_rcp(dest, src0) dest = (scalar_component(src0) == 0) ? 1.#INF : (1 / scalar_component(src0))
return 1 / scalar_component(src0);
}
float x_rcc(float4 src0) #define x_rcc(dest, src0) dest = _rcc(src0)
float _rcc(float4 src0)
{ {
float input = scalar_component(src0); float input = scalar_component(src0);
@ -131,42 +96,54 @@ float x_rcc(float4 src0)
: clamp(r, -1.84467e+019f, -5.42101e-020f); : clamp(r, -1.84467e+019f, -5.42101e-020f);
} }
float x_rsq(float4 src0) #define x_rsq(dest, src0) dest = rsqrt(abs(scalar_component(src0)))
#define x_expp(dest, src0) dest = x_expp(src0)
float4 _expp(float4 src0)
{ {
return rsqrt(scalar_component(src0)); float input = scalar_component(src0);
float base = floor(input);
float4 dest;
dest.x = exp2(base);
dest.y = input - base; // Was : frac(input)
dest.z = exp2(input);
dest.w = 1;
return dest;
} }
float4 x_exp(float4 src0) #define x_logp(dest, src0) dest = _logp(src0)
float4 _logp(float4 src0)
{ {
float input = scalar_component(src0); float input = abs(scalar_component(src0));
float x = exp2(floor(input));
float fractional = frac(input);
float power = exp2(input);
return float4(x, fractional, power, 1);
}
float4 x_log(float4 src0)
{
float input = scalar_component(src0);
float exponent = floor(log2(input)); float exponent = floor(log2(input));
float mantissa = 1 / exp2(exponent);
float logResult = log2(input); float4 dest;
return float4(exponent, mantissa, logResult, 1); dest.x = exponent;
dest.y = 1 / exp2(exponent); // mantissa
dest.z = exponent + log2(input); // logResult
dest.w = 1;
return dest;
} }
float4 x_lit(float4 src0) #define x_lit(dest, src) dest = _lit(src)
float4 _lit(float4 src0)
{ {
const float epsilon = 1.0f / 256.0f; const float epsilon = 1.0f / 256.0f;
float diffuse = src0.x; float diffuse = src0.x;
float blinn = src0.y; float blinn = src0.y;
float specPower = clamp(src0.w, -(128 - epsilon), (128 - epsilon)); float specPower = clamp(src0.w, -(128 - epsilon), (128 - epsilon));
float4 dest; float4 dest;
dest.x = 1; dest.x = 1;
dest.y = max(diffuse, 0); dest.y = max(0, diffuse);
dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; // TODO : Use exp2(#) instead of pow(2, #) ?
// TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0;
dest.w = 1; dest.w = 1;
return dest; return dest;
} }

View File

@ -155,10 +155,10 @@ VSH_OUTPUT_TYPE;
typedef enum _VSH_ARGUMENT_TYPE typedef enum _VSH_ARGUMENT_TYPE
{ {
PARAM_UNKNOWN = 0, PARAM_UNKNOWN = 0,
PARAM_R, // Temporary registers PARAM_R, // Temporary (scRatch) registers
PARAM_V, // Vertex registers PARAM_V, // Vertex registers
PARAM_C, // Constant registers, set by SetVertexShaderConstant PARAM_C, // Constant registers, set by SetVertexShaderConstant
PARAM_O PARAM_O // = 0??
} }
VSH_ARGUMENT_TYPE; VSH_ARGUMENT_TYPE;
@ -246,10 +246,10 @@ typedef struct _VSH_OUTPUT
int16_t OutputAddress; int16_t OutputAddress;
// MAC output R register // MAC output R register
boolean MACRMask[4]; boolean MACRMask[4];
boolean MACRAddress; int16_t MACRAddress;
// ILU output R register // ILU output R register
boolean ILURMask[4]; boolean ILURMask[4];
boolean ILURAddress; int16_t ILURAddress;
} }
VSH_OUTPUT; VSH_OUTPUT;
@ -262,7 +262,8 @@ typedef struct _VSH_SHADER_INSTRUCTION
VSH_PARAMETER A; VSH_PARAMETER A;
VSH_PARAMETER B; VSH_PARAMETER B;
VSH_PARAMETER C; VSH_PARAMETER C;
boolean a0x; boolean a0x;
boolean Final;
} }
VSH_SHADER_INSTRUCTION; VSH_SHADER_INSTRUCTION;
@ -378,7 +379,7 @@ static const VSH_FIELDMAPPING g_FieldMapping[] =
// Final instruction // Final instruction
{ FLD_FINAL, 3, 0, 1 } { FLD_FINAL, 3, 0, 1 }
}; };
static const VSH_OPCODE_PARAMS g_OpCodeParams_ILU[] = static const VSH_OPCODE_PARAMS g_OpCodeParams_ILU[] =
{ {
// ILU OP MAC OP ParamA ParamB ParamC // ILU OP MAC OP ParamA ParamB ParamC
@ -516,7 +517,7 @@ static VSH_OPCODE_PARAMS* VshGetOpCodeParams(VSH_ILU ILU,
static void VshParseInstruction(uint32_t *pShaderToken, static void VshParseInstruction(uint32_t *pShaderToken,
VSH_SHADER_INSTRUCTION *pInstruction) VSH_SHADER_INSTRUCTION *pInstruction)
{ {
// First get the instruction(s). // First get the instruction(s).
pInstruction->ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU); pInstruction->ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU);
pInstruction->MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC); pInstruction->MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC);
@ -589,14 +590,14 @@ static void VshParseInstruction(uint32_t *pShaderToken,
pInstruction->C.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_W); pInstruction->C.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, FLD_C_SWZ_W);
// Get output // Get output
// Output register // Output register
pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB); pInstruction->Output.OutputType = (VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB);
switch(pInstruction->Output.OutputType) switch(pInstruction->Output.OutputType)
{ {
case OUTPUT_C: case OUTPUT_C:
pInstruction->Output.OutputAddress = ConvertCRegister(VshGetField(pShaderToken, FLD_OUT_ADDRESS)); pInstruction->Output.OutputAddress = ConvertCRegister(VshGetField(pShaderToken, FLD_OUT_ADDRESS));
break; break;
case OUTPUT_O: case OUTPUT_O:
pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF; pInstruction->Output.OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS) & 0xF;
break; break;
} }
pInstruction->Output.OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX); pInstruction->Output.OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX);
@ -617,7 +618,8 @@ static void VshParseInstruction(uint32_t *pShaderToken,
pInstruction->Output.ILURMask[3] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_W); pInstruction->Output.ILURMask[3] = VshGetField(pShaderToken, FLD_OUT_ILU_MASK_W);
pInstruction->Output.ILURAddress = VshGetField(pShaderToken, FLD_OUT_R); pInstruction->Output.ILURAddress = VshGetField(pShaderToken, FLD_OUT_R);
// Finally, get a0.x indirect constant addressing // Finally, get a0.x indirect constant addressing
pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X); pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X);
pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL);
} }
// Print functions // Print functions
@ -802,30 +804,6 @@ static VSH_INTERMEDIATE_FORMAT *VshNewIntermediate(VSH_XBOX_SHADER *pShader)
return &pShader->Intermediate[pShader->IntermediateCount++]; return &pShader->Intermediate[pShader->IntermediateCount++];
} }
static void VshInsertIntermediate(VSH_XBOX_SHADER *pShader,
VSH_INTERMEDIATE_FORMAT *pIntermediate,
uint16_t Pos)
{
VshVerifyBufferBounds(pShader);
for (int i = pShader->IntermediateCount; i >= Pos; i--)
{
pShader->Intermediate[i + 1] = pShader->Intermediate[i];
}
pShader->Intermediate[Pos] = *pIntermediate;
pShader->IntermediateCount++;
}
static void VshDeleteIntermediate(VSH_XBOX_SHADER *pShader,
uint16_t Pos)
{
for (int i = Pos; i < (pShader->IntermediateCount - 1); i++)
{
pShader->Intermediate[i] = pShader->Intermediate[i + 1];
}
pShader->IntermediateCount--;
}
static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction, static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction,
VSH_XBOX_SHADER *pShader, VSH_XBOX_SHADER *pShader,
boolean IsCombined) boolean IsCombined)
@ -1834,7 +1812,7 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration
return pHostVertexElements; return pHostVertexElements;
} }
extern std::string BuildShader(VSH_XBOX_SHADER* pShader); extern void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader);
std::string DebugPrependLineNumbers(std::string shaderString) { std::string DebugPrependLineNumbers(std::string shaderString) {
std::stringstream shader(shaderString); std::stringstream shader(shaderString);
@ -1901,12 +1879,18 @@ extern HRESULT EmuRecompileVshFunction
} }
if(SUCCEEDED(hRet)) { if(SUCCEEDED(hRet)) {
static std::string hlsl_template =
#include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string
;
auto hlsl_stream = std::stringstream();
for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) { for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) {
VSH_SHADER_INSTRUCTION Inst; VSH_SHADER_INSTRUCTION Inst;
VshParseInstruction((uint32_t*)pToken, &Inst); VshParseInstruction((uint32_t*)pToken, &Inst);
VshConvertToIntermediate(&Inst, pShader); VshConvertToIntermediate(&Inst, pShader);
EOI = (boolean)VshGetField((uint32_t*)pToken, FLD_FINAL); EOI = Inst.Final;
} }
// The size of the shader is // The size of the shader is
@ -1919,20 +1903,17 @@ extern HRESULT EmuRecompileVshFunction
return D3D_OK; return D3D_OK;
} }
static std::string hlslTemplate = BuildShader(hlsl_stream, pShader);
#include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string std::string hlsl_str = hlsl_stream.str();
; hlsl_str = std::regex_replace(hlsl_template, std::regex("// <Xbox Shader>"), hlsl_str);
auto hlslTest = BuildShader(pShader);
hlslTest = std::regex_replace(hlslTemplate, std::regex("// <Xbox Shader>"), hlslTest);
DbgVshPrintf("--- HLSL conversion ---\n"); DbgVshPrintf("--- HLSL conversion ---\n");
DbgVshPrintf(DebugPrependLineNumbers(hlslTest).c_str()); DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str());
DbgVshPrintf("-----------------------\n"); DbgVshPrintf("-----------------------\n");
hRet = D3DCompile( hRet = D3DCompile(
hlslTest.c_str(), hlsl_str.c_str(),
hlslTest.length(), hlsl_str.length(),
nullptr, // pSourceName nullptr, // pSourceName
nullptr, // pDefines nullptr, // pDefines
nullptr, // pInclude // TODO precompile x_* HLSL functions? nullptr, // pInclude // TODO precompile x_* HLSL functions?
@ -2095,14 +2076,10 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest)
if (!(dest.Mask[0] && dest.Mask[1] && dest.Mask[2] && dest.Mask[3])) if (!(dest.Mask[0] && dest.Mask[1] && dest.Mask[2] && dest.Mask[3]))
{ {
hlsl << "."; hlsl << ".";
unsigned vector_size = 0; if (dest.Mask[0]) hlsl << "x";
if (dest.Mask[0]) { hlsl << "x"; vector_size++; } if (dest.Mask[1]) hlsl << "y";
if (dest.Mask[1]) { hlsl << "y"; vector_size++; } if (dest.Mask[2]) hlsl << "z";
if (dest.Mask[2]) { hlsl << "z"; vector_size++; } if (dest.Mask[3]) hlsl << "w";
if (dest.Mask[3]) { hlsl << "w"; vector_size++; }
hlsl << " = (float" << vector_size << ")";
} else {
hlsl << " = ";
} }
} }
@ -2161,7 +2138,7 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta)
} }
} }
std::string BuildShader(VSH_XBOX_SHADER* pShader) void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader)
{ {
// HLSL strings for all MAC opcodes, indexed with VSH_MAC // HLSL strings for all MAC opcodes, indexed with VSH_MAC
static std::string VSH_MAC_HLSL[] = { static std::string VSH_MAC_HLSL[] = {
@ -2178,7 +2155,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader)
/*MAC_MAX:*/"x_max", /*MAC_MAX:*/"x_max",
/*MAC_SLT:*/"x_slt", /*MAC_SLT:*/"x_slt",
/*MAC_SGE:*/"x_sge", /*MAC_SGE:*/"x_sge",
/*MAC_ARL:*/"x_arl", // Note : For this MAC_ARL case, ToHlsl would always replace 'dest' with 'a', so we optimized this upfront /*MAC_ARL:*/"x_arl",
"", "",
"" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well "" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well
}; };
@ -2190,13 +2167,11 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader)
/*ILU_RCP:*/"x_rcp", /*ILU_RCP:*/"x_rcp",
/*ILU_RCC:*/"x_rcc", /*ILU_RCC:*/"x_rcc",
/*ILU_RSQ:*/"x_rsq", /*ILU_RSQ:*/"x_rsq",
/*ILU_EXP:*/"x_exp", /*ILU_EXP:*/"x_expp",
/*ILU_LOG:*/"x_log", /*ILU_LOG:*/"x_logp",
/*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used /*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used
}; };
auto hlsl = std::stringstream();
for (int i = 0; i < pShader->IntermediateCount; i++) { for (int i = 0; i < pShader->IntermediateCount; i++) {
VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i]; VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i];
@ -2212,20 +2187,15 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader)
} }
if (!str.empty()) { if (!str.empty()) {
hlsl << "\n "; hlsl << "\n " << str << "("; // opcode
OutputHlsl(hlsl, xboxInstruction.Output); OutputHlsl(hlsl, xboxInstruction.Output);
hlsl << str; // opcode for (int i = 0; i < 3; i++) {
str = "(";
for (int i = 0; i < 3; i++) { // TODO remove magic number
if (xboxInstruction.Parameters[i].Active) { if (xboxInstruction.Parameters[i].Active) {
hlsl << str; // separator hlsl << ", ";
ParameterHlsl(hlsl, xboxInstruction.Parameters[i]); ParameterHlsl(hlsl, xboxInstruction.Parameters[i]);
str = ", ";
} }
} }
hlsl << ");"; hlsl << ");";
} }
} }
return hlsl.str();
} }