Wrap (or replace) HLSL functions with defines, so that destination swizzles work as expected without too much syntax deviation.

Also adjusted a few hlshl functions to be more accurate
This commit is contained in:
PatrickvL 2019-12-09 15:33:57 +01:00 committed by patrickvl
parent 46fbfad52d
commit 127e51302e
2 changed files with 97 additions and 150 deletions

View File

@ -1,5 +1,5 @@
// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) :
R"DELIMITER(
//R"DELIMITER(
// Xbox HLSL vertex shader (template populated at runtime)
struct VS_INPUT
{
@ -26,67 +26,35 @@ extern uniform float4 c[192] : register(c0);
// Functions for MAC ('Multiply And Accumulate') opcodes
float4 x_mov(float4 src0)
{
return src0;
}
#define x_mov(dest, src0) dest = src0
float4 x_mul(float4 src0, float4 src1)
{
return src0 * src1;
}
#define x_mul(dest, src0, src1) dest = src0 * src1
float4 x_add(float4 src0, float4 src1)
{
return src0 + src1;
}
#define x_add(dest, src0, src1) dest = src0 + src1
float4 x_dst(float4 src0, float4 src1)
{
return dst(src0, src1);
}
#define x_dst(dest, src0, src1) dest = dst(src0, src1) // equals { dest.x = 1; dest.y = src0.y * src1.y; dest.z = src0.z; dest.w = src1.w; }
float4 x_min(float4 src0, float4 src1)
{
return min(src0, src1);
}
#define x_min(dest, src0, src1) dest = min(src0, src1)
float4 x_max(float4 src0, float4 src1)
{
return max(src0, src1);
}
#define x_max(dest, src0, src1) dest = max(src0, src1)
float4 x_mad(float4 src0, float4 src1, float4 src2)
{
return (src0 * src1) + src2;
}
#define x_mad(dest, src0, src1, src2) dest = (src0 * src1) + src2
int x_arl(float src0)
{
// The address register should be floored
// Due to rounding differences with the Xbox (and increased precision on PC?)
// some titles produce values just below the threshold of the next integer.
// We can add a small bias to make sure it's bumped over the threshold
// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader)
return floor(src0 + 0.0001);
}
// The address register should be floored
// Due to rounding differences with the Xbox (and increased precision on PC?)
// some titles produce values just below the threshold of the next integer.
// We can add a small bias to make sure it's bumped over the threshold
// Test Case: Azurik (divides indexes 755, then scales them back in the vertex shader)
#define x_arl(dest, src0) dest = floor(src0 + 0.0001)
float x_dp3(float4 src0, float4 src1)
{
return dot(src0.xyz, src1.xyz);
}
#define x_dp3(dest, src0, src1) dest = dot((float3)src0, (float3)src1)
float x_dph(float4 src0, float4 src1)
{
return x_dp3(src0, src1) + src1.w;
}
#define x_dph(dest, src0, src1) x_dp3(src0, src1) + src1.w
float x_dp4(float4 src0, float4 src1)
{
return dot(src0, src1);
}
#define x_dp4(dest, src0, src1) dest = dot(src0, src1)
float4 x_sge(float4 src0, float4 src1)
#define x_sge(dest, src0) dest = _sge(src0)
float4 _sge(float4 src0, float4 src1)
{
float4 dest;
dest.x = (src0.x >= src1.x) ? 1 : 0;
@ -96,7 +64,8 @@ float4 x_sge(float4 src0, float4 src1)
return dest;
}
float4 x_slt(float4 src0, float4 src1)
#define x_slt(dest, src0) dest = _slt(src0)
float4 _slt(float4 src0, float4 src1)
{
float4 dest;
dest.x = (src0.x < src1.x) ? 1 : 0;
@ -108,17 +77,13 @@ float4 x_slt(float4 src0, float4 src1)
// Xbox ILU Functions
float scalar_component(float4 src0)
{
return src0.w; // use w component by default
}
#define scalar_component(src0) src0.x
float x_rcp(float4 src0)
{
return 1 / scalar_component(src0);
}
#define x_rcp(dest, src0) dest = 1 / scalar_component(src0)
// TODO : #define x_rcp(dest, src0) dest = (scalar_component(src0) == 0) ? 1.#INF : (1 / scalar_component(src0))
float x_rcc(float4 src0)
#define x_rcc(dest, src0) dest = _rcc(src0)
float _rcc(float4 src0)
{
float input = scalar_component(src0);
@ -131,40 +96,52 @@ float x_rcc(float4 src0)
: clamp(r, -1.84467e+019f, -5.42101e-020f);
}
float x_rsq(float4 src0)
#define x_rsq(dest, src0) dest = rsqrt(abs(scalar_component(src0)))
#define x_expp(dest, src0) dest = x_expp(src0)
float4 _expp(float4 src0)
{
return rsqrt(scalar_component(src0));
float input = scalar_component(src0);
float base = floor(input);
float4 dest;
dest.x = exp2(base);
dest.y = input - base; // Was : frac(input)
dest.z = exp2(input);
dest.w = 1;
return dest;
}
float4 x_exp(float4 src0)
#define x_logp(dest, src0) dest = _logp(src0)
float4 _logp(float4 src0)
{
float input = scalar_component(src0);
float x = exp2(floor(input));
float fractional = frac(input);
float power = exp2(input);
return float4(x, fractional, power, 1);
}
float4 x_log(float4 src0)
{
float input = scalar_component(src0);
float input = abs(scalar_component(src0));
float exponent = floor(log2(input));
float mantissa = 1 / exp2(exponent);
float logResult = log2(input);
return float4(exponent, mantissa, logResult, 1);
float4 dest;
dest.x = exponent;
dest.y = 1 / exp2(exponent); // mantissa
dest.z = exponent + log2(input); // logResult
dest.w = 1;
return dest;
}
float4 x_lit(float4 src0)
#define x_lit(dest, src) dest = _lit(src)
float4 _lit(float4 src0)
{
const float epsilon = 1.0f / 256.0f;
float diffuse = src0.x;
float blinn = src0.y;
float specPower = clamp(src0.w, -(128 - epsilon), (128 - epsilon));
float4 dest;
dest.x = 1;
dest.y = max(diffuse, 0);
dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0;
dest.y = max(0, diffuse);
dest.z = diffuse > 0 ? pow(2, specPower * log(blinn)) : 0; // TODO : Use exp2(#) instead of pow(2, #) ?
// TODO : Use dest.z = (diffuse > 0) && (blinn > 0) ? pow(blinn, specPower) : 0;
dest.w = 1;
return dest;

View File

@ -155,10 +155,10 @@ VSH_OUTPUT_TYPE;
typedef enum _VSH_ARGUMENT_TYPE
{
PARAM_UNKNOWN = 0,
PARAM_R, // Temporary registers
PARAM_R, // Temporary (scRatch) registers
PARAM_V, // Vertex registers
PARAM_C, // Constant registers, set by SetVertexShaderConstant
PARAM_O
PARAM_O // = 0??
}
VSH_ARGUMENT_TYPE;
@ -246,10 +246,10 @@ typedef struct _VSH_OUTPUT
int16_t OutputAddress;
// MAC output R register
boolean MACRMask[4];
boolean MACRAddress;
int16_t MACRAddress;
// ILU output R register
boolean ILURMask[4];
boolean ILURAddress;
int16_t ILURAddress;
}
VSH_OUTPUT;
@ -263,6 +263,7 @@ typedef struct _VSH_SHADER_INSTRUCTION
VSH_PARAMETER B;
VSH_PARAMETER C;
boolean a0x;
boolean Final;
}
VSH_SHADER_INSTRUCTION;
@ -618,6 +619,7 @@ static void VshParseInstruction(uint32_t *pShaderToken,
pInstruction->Output.ILURAddress = VshGetField(pShaderToken, FLD_OUT_R);
// Finally, get a0.x indirect constant addressing
pInstruction->a0x = VshGetField(pShaderToken, FLD_A0X);
pInstruction->Final = VshGetField(pShaderToken, FLD_FINAL);
}
// Print functions
@ -802,30 +804,6 @@ static VSH_INTERMEDIATE_FORMAT *VshNewIntermediate(VSH_XBOX_SHADER *pShader)
return &pShader->Intermediate[pShader->IntermediateCount++];
}
static void VshInsertIntermediate(VSH_XBOX_SHADER *pShader,
VSH_INTERMEDIATE_FORMAT *pIntermediate,
uint16_t Pos)
{
VshVerifyBufferBounds(pShader);
for (int i = pShader->IntermediateCount; i >= Pos; i--)
{
pShader->Intermediate[i + 1] = pShader->Intermediate[i];
}
pShader->Intermediate[Pos] = *pIntermediate;
pShader->IntermediateCount++;
}
static void VshDeleteIntermediate(VSH_XBOX_SHADER *pShader,
uint16_t Pos)
{
for (int i = Pos; i < (pShader->IntermediateCount - 1); i++)
{
pShader->Intermediate[i] = pShader->Intermediate[i + 1];
}
pShader->IntermediateCount--;
}
static boolean VshAddInstructionMAC_R(VSH_SHADER_INSTRUCTION *pInstruction,
VSH_XBOX_SHADER *pShader,
boolean IsCombined)
@ -1834,7 +1812,7 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration
return pHostVertexElements;
}
extern std::string BuildShader(VSH_XBOX_SHADER* pShader);
extern void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader);
std::string DebugPrependLineNumbers(std::string shaderString) {
std::stringstream shader(shaderString);
@ -1901,12 +1879,18 @@ extern HRESULT EmuRecompileVshFunction
}
if(SUCCEEDED(hRet)) {
static std::string hlsl_template =
#include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string
;
auto hlsl_stream = std::stringstream();
for (pToken = (DWORD*)((uint8_t*)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER)); !EOI; pToken += X_VSH_INSTRUCTION_SIZE) {
VSH_SHADER_INSTRUCTION Inst;
VshParseInstruction((uint32_t*)pToken, &Inst);
VshConvertToIntermediate(&Inst, pShader);
EOI = (boolean)VshGetField((uint32_t*)pToken, FLD_FINAL);
EOI = Inst.Final;
}
// The size of the shader is
@ -1919,20 +1903,17 @@ extern HRESULT EmuRecompileVshFunction
return D3D_OK;
}
static std::string hlslTemplate =
#include "core\hle\D3D8\Direct3D9\Xb.hlsl" // Note : This included .hlsl defines a raw string
;
auto hlslTest = BuildShader(pShader);
hlslTest = std::regex_replace(hlslTemplate, std::regex("// <Xbox Shader>"), hlslTest);
BuildShader(hlsl_stream, pShader);
std::string hlsl_str = hlsl_stream.str();
hlsl_str = std::regex_replace(hlsl_template, std::regex("// <Xbox Shader>"), hlsl_str);
DbgVshPrintf("--- HLSL conversion ---\n");
DbgVshPrintf(DebugPrependLineNumbers(hlslTest).c_str());
DbgVshPrintf(DebugPrependLineNumbers(hlsl_str).c_str());
DbgVshPrintf("-----------------------\n");
hRet = D3DCompile(
hlslTest.c_str(),
hlslTest.length(),
hlsl_str.c_str(),
hlsl_str.length(),
nullptr, // pSourceName
nullptr, // pDefines
nullptr, // pInclude // TODO precompile x_* HLSL functions?
@ -2095,14 +2076,10 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest)
if (!(dest.Mask[0] && dest.Mask[1] && dest.Mask[2] && dest.Mask[3]))
{
hlsl << ".";
unsigned vector_size = 0;
if (dest.Mask[0]) { hlsl << "x"; vector_size++; }
if (dest.Mask[1]) { hlsl << "y"; vector_size++; }
if (dest.Mask[2]) { hlsl << "z"; vector_size++; }
if (dest.Mask[3]) { hlsl << "w"; vector_size++; }
hlsl << " = (float" << vector_size << ")";
} else {
hlsl << " = ";
if (dest.Mask[0]) hlsl << "x";
if (dest.Mask[1]) hlsl << "y";
if (dest.Mask[2]) hlsl << "z";
if (dest.Mask[3]) hlsl << "w";
}
}
@ -2161,7 +2138,7 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& paramMeta)
}
}
std::string BuildShader(VSH_XBOX_SHADER* pShader)
void BuildShader(std::stringstream& hlsl, VSH_XBOX_SHADER* pShader)
{
// HLSL strings for all MAC opcodes, indexed with VSH_MAC
static std::string VSH_MAC_HLSL[] = {
@ -2178,7 +2155,7 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader)
/*MAC_MAX:*/"x_max",
/*MAC_SLT:*/"x_slt",
/*MAC_SGE:*/"x_sge",
/*MAC_ARL:*/"x_arl", // Note : For this MAC_ARL case, ToHlsl would always replace 'dest' with 'a', so we optimized this upfront
/*MAC_ARL:*/"x_arl",
"",
"" // VSH_MAC 2 final values of the 4 bits are undefined/unknown TODO : Investigate their effect (if any) and emulate that as well
};
@ -2190,13 +2167,11 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader)
/*ILU_RCP:*/"x_rcp",
/*ILU_RCC:*/"x_rcc",
/*ILU_RSQ:*/"x_rsq",
/*ILU_EXP:*/"x_exp",
/*ILU_LOG:*/"x_log",
/*ILU_EXP:*/"x_expp",
/*ILU_LOG:*/"x_logp",
/*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used
};
auto hlsl = std::stringstream();
for (int i = 0; i < pShader->IntermediateCount; i++) {
VSH_INTERMEDIATE_FORMAT& xboxInstruction = pShader->Intermediate[i];
@ -2212,20 +2187,15 @@ std::string BuildShader(VSH_XBOX_SHADER* pShader)
}
if (!str.empty()) {
hlsl << "\n ";
hlsl << "\n " << str << "("; // opcode
OutputHlsl(hlsl, xboxInstruction.Output);
hlsl << str; // opcode
str = "(";
for (int i = 0; i < 3; i++) { // TODO remove magic number
for (int i = 0; i < 3; i++) {
if (xboxInstruction.Parameters[i].Active) {
hlsl << str; // separator
hlsl << ", ";
ParameterHlsl(hlsl, xboxInstruction.Parameters[i]);
str = ", ";
}
}
hlsl << ");";
}
}
return hlsl.str();
}