Fix vertex shader op independence

Ensure the MAC op does not interfere with the input of the ILU op.
- Use a temp register to hold the input of the ILU op when necessary
- Reorganize vertex shader decoding to better reflect the data.
Decode one vsh instruction to one intermediate instruction, rather than to multiple independent instructions.
Test case:
KOTOR II (menu)
GTA III (lighting)
This commit is contained in:
Anthony 2022-06-18 00:22:39 +12:00
parent b43f6bbcdf
commit 46b1f24153
4 changed files with 235 additions and 141 deletions

View File

@ -323,6 +323,9 @@ VS_OUTPUT main(const VS_INPUT xIn)
init_v( 8); init_v( 9); init_v(10); init_v(11);
init_v(12); init_v(13); init_v(14); init_v(15);
// Temp variable for paired VS instruction
float4 temp;
// Xbox shader program)DELIMITER", /* This terminates the header raw string" // */
R"DELIMITER(

View File

@ -11,9 +11,7 @@
extern const char* g_vs_model = vs_model_3_0;
// HLSL generation
void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest)
{
void DestRegisterHlsl(std::stringstream& hlsl, VSH_IMD_DEST& dest) {
static const char* OReg_Name[/*VSH_OREG_NAME*/] = {
"oPos",
"???",
@ -34,34 +32,37 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest)
};
switch (dest.Type) {
case IMD_OUTPUT_C:
case IMD_DEST_C:
// Access the HLSL capital C[] constants array, with the index bias applied :
// TODO : Avoid out-of-bound writes (perhaps writing to a reserved index?)
hlsl << "C[" << dest.Address + X_D3DSCM_CORRECTION << "]";
LOG_TEST_CASE("Vertex shader writes to constant table");
break;
case IMD_OUTPUT_R:
case IMD_DEST_R:
hlsl << "r" << dest.Address;
break;
case IMD_OUTPUT_O:
case IMD_DEST_O:
assert(dest.Address < OREG_A0X);
hlsl << OReg_Name[dest.Address];
break;
case IMD_OUTPUT_A0X:
case IMD_DEST_A0X:
hlsl << "a0";
break;
default:
assert(false);
break;
}
}
void DestMaskHlsl(std::stringstream& hlsl, VSH_IMD_DEST& dest)
{
// Write the mask as a separate argument to the opcode defines
// (No space, so that "dest,mask, ..." looks close to "dest.mask, ...")
hlsl << ",";
// Detect oFog masks other than x
// Test case: Lego Star Wars II (menu)
if (dest.Type == IMD_OUTPUT_O &&
if (dest.Type == IMD_DEST_O &&
dest.Address == OREG_OFOG &&
dest.Mask != MASK_X)
{
@ -78,7 +79,7 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest)
if (dest.Mask & MASK_W) hlsl << "w";
}
void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& param, bool IndexesWithA0_X)
void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& param, bool IndexesWithA0_X, bool useTemp)
{
static const char* RegisterName[/*VSH_PARAMETER_TYPE*/] = {
"?", // PARAM_UNKNOWN = 0,
@ -92,7 +93,10 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& param, bool Index
hlsl << "-";
}
if (param.ParameterType == PARAM_C) {
if (useTemp) {
hlsl << "temp";
}
else if (param.Type == PARAM_C) {
// Access constant registers through our HLSL c() function,
// which allows dumping negative indices (like Xbox shaders),
// and which returns zero when out-of-bounds indices are passed in:
@ -112,7 +116,7 @@ void ParameterHlsl(std::stringstream& hlsl, VSH_IMD_PARAMETER& param, bool Index
}
}
else {
hlsl << RegisterName[param.ParameterType] << param.Address;
hlsl << RegisterName[param.Type] << param.Address;
}
// Write the swizzle if we need to
@ -175,25 +179,107 @@ void BuildShader(IntermediateVertexShader* pShader, std::stringstream& hlsl)
/*ILU_LIT:*/"x_lit" // = 7 - all values of the 3 bits are used
};
for (size_t i = 0; i < pShader->Instructions.size(); i++) {
VSH_INTERMEDIATE_FORMAT& IntermediateInstruction = pShader->Instructions[i];
auto WriteOp = [&](
const std::string& opcode,
VSH_IMD_DEST dest,
int paramCount, VSH_IMD_PARAMETER* params,
bool indexesWithA0_X,
bool iluUseTempParam
) {
// opcode(dest, a, b, c);
hlsl << "\n " << opcode << "(";
std::string str;
if (IntermediateInstruction.MAC > MAC_NOP) {
str = VSH_MAC_HLSL[IntermediateInstruction.MAC];
}
else {
str = VSH_ILU_HLSL[IntermediateInstruction.ILU];
}
DestRegisterHlsl(hlsl, dest);
DestMaskHlsl(hlsl, dest);
hlsl << "\n " << str << "("; // opcode
OutputHlsl(hlsl, IntermediateInstruction.Output);
for (unsigned i = 0; i < IntermediateInstruction.ParamCount; i++) {
for (int i = 0; i < paramCount; i++) {
hlsl << ", ";
ParameterHlsl(hlsl, IntermediateInstruction.Parameters[i], IntermediateInstruction.IndexesWithA0_X);
ParameterHlsl(hlsl, params[i], indexesWithA0_X, iluUseTempParam);
}
hlsl << ");";
};
for (size_t i = 0; i < pShader->Instructions.size(); i++) {
VSH_IMD_INSTR& in = pShader->Instructions[i];
// Paired if both MAC and ILU write to a dest register
bool isPaired =
in.MAC.Opcode != MAC_NOP &&
in.ILU.Opcode != ILU_NOP &&
(in.MAC.Dest.Mask || in.ORegSource == SRC_MAC) &&
(in.ILU.Dest.Mask || in.ORegSource == SRC_ILU);
// If there are two "paired" instructions that need to run "simultaneously",
// we need to prevent the output of the first instruction interfering
// with the input of the second instruction
// If the MAC output is the same as the ILU input
// we will use a temp variable to hold the ILU input
VSH_IMD_DEST* iluTemp = nullptr;
if (isPaired) {
if (in.MAC.Dest.Address == in.ILU.Parameter.Address &&
(in.MAC.Dest.Type == IMD_DEST_C && in.ILU.Parameter.Type == PARAM_C ||
in.MAC.Dest.Type == IMD_DEST_R && in.ILU.Parameter.Type == PARAM_R ||
in.MAC.Dest.Type == IMD_DEST_A0X && in.ILU.Parameter.Type == PARAM_C && in.IndexesWithA0_X)) {
// Normal MAC output matches ILU input
iluTemp = &in.MAC.Dest;
}
else if (in.ORegSource == SRC_MAC &&
in.ORegDest.Type == IMD_DEST_O && in.ORegDest.Address == 0 &&
in.ILU.Parameter.Type == PARAM_R && in.ILU.Parameter.Address == 12) {
// OReg MAC output matches ILU input
// Note oPos is the same as r12
iluTemp = &in.ORegDest;
}
if (iluTemp) {
// MAC and ILU use the same register.
// This is fine unless the ILU op uses a component written to by the MAC op
bool conflict = false;
for (int s = 0; s < 4; s++) {
auto swizzle = in.ILU.Parameter.Swizzle[s];
if (iluTemp->Mask & MASK_X && swizzle == SWIZZLE_X ||
iluTemp->Mask & MASK_Y && swizzle == SWIZZLE_Y ||
iluTemp->Mask & MASK_Z && swizzle == SWIZZLE_Z ||
iluTemp->Mask & MASK_W && swizzle == SWIZZLE_W) {
conflict = true;
break;
}
}
if (!conflict) {
iluTemp = nullptr; // We don't need a temp after all
}
}
}
if (iluTemp) {
// Write the ILU input to a temp
hlsl << "\n " << "temp = ";
DestRegisterHlsl(hlsl, *iluTemp);
hlsl << ";";
}
// Write MAC op
if (in.MAC.Opcode != MAC_NOP) {
if (in.MAC.Dest.Mask) {
WriteOp(VSH_MAC_HLSL[in.MAC.Opcode], in.MAC.Dest, in.MAC.ParamCount, in.MAC.Parameters, in.IndexesWithA0_X, false);
}
if (in.ORegSource == SRC_MAC && in.ORegDest.Mask) {
WriteOp(VSH_MAC_HLSL[in.MAC.Opcode], in.ORegDest, in.MAC.ParamCount, in.MAC.Parameters, in.IndexesWithA0_X, false);
}
}
// Write ILU op
if (in.ILU.Opcode != ILU_NOP) {
if (in.ILU.Dest.Mask) {
WriteOp(VSH_ILU_HLSL[in.ILU.Opcode], in.ILU.Dest, 1, &in.ILU.Parameter, in.IndexesWithA0_X, iluTemp);
}
if (in.ORegSource == SRC_ILU && in.ORegDest.Mask) {
WriteOp(VSH_ILU_HLSL[in.ILU.Opcode], in.ORegDest, 1, &in.ILU.Parameter, in.IndexesWithA0_X, iluTemp);
}
}
hlsl << "\n"; // Group operations by instruction
}
}

View File

@ -528,7 +528,7 @@ namespace XboxVertexShaderDecoder
return ((((CReg >> 5) & 7) - 3) * 32) + (CReg & 31);
}
static void VshConvertIntermediateParam(VSH_IMD_PARAMETER& Param,
static VSH_IMD_PARAMETER VshGetIntermediateParam(
uint32_t* pShaderToken,
VSH_FIELD_NAME FLD_MUX,
VSH_FIELD_NAME FLD_NEG,
@ -536,80 +536,30 @@ namespace XboxVertexShaderDecoder
uint16_t V,
uint16_t C)
{
Param.ParameterType = (VSH_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_MUX);
switch (Param.ParameterType) {
VSH_IMD_PARAMETER param{};
param.Type = (VSH_IMD_PARAMETER_TYPE)VshGetField(pShaderToken, FLD_MUX);
switch (param.Type) {
case PARAM_R:
Param.Address = R;
param.Address = R;
break;
case PARAM_V:
Param.Address = V;
param.Address = V;
break;
case PARAM_C:
Param.Address = C;
param.Address = C;
break;
default:
LOG_TEST_CASE("parameter type unknown");
}
int d = FLD_NEG - FLD_A_NEG;
Param.Neg = VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_NEG)) > 0;
Param.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_X));
Param.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Y));
Param.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Z));
Param.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_W));
}
param.Neg = VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_NEG)) > 0;
param.Swizzle[0] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_X));
param.Swizzle[1] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Y));
param.Swizzle[2] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_Z));
param.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_W));
static void VshAddIntermediateInstruction(
uint32_t* pShaderToken,
IntermediateVertexShader* pShader,
VSH_MAC MAC,
VSH_ILU ILU,
VSH_IMD_OUTPUT_TYPE output_type,
int16_t output_address,
int8_t output_mask)
{
// Is the output mask set?
if (output_mask == 0) {
return;
}
if (pShader->Instructions.size() >= VSH_MAX_INTERMEDIATE_COUNT) {
CxbxrAbort("Shader exceeds conversion buffer!");
}
VSH_INTERMEDIATE_FORMAT intermediate;
intermediate.MAC = MAC;
intermediate.ILU = ILU;
intermediate.Output.Type = output_type;
intermediate.Output.Address = output_address;
intermediate.Output.Mask = output_mask;
// Get a0.x indirect constant addressing
intermediate.IndexesWithA0_X = VshGetField(pShaderToken, FLD_A0X) > 0; // Applies to PARAM_C parameter reads
int16_t R;
int16_t V = VshGetField(pShaderToken, FLD_V);
int16_t C = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST));
intermediate.ParamCount = 0;
if (MAC >= MAC_MOV) {
// Get parameter A
R = VshGetField(pShaderToken, FLD_A_R);
VshConvertIntermediateParam(intermediate.Parameters[intermediate.ParamCount++], pShaderToken, FLD_A_MUX, FLD_A_NEG, R, V, C);
}
if ((MAC == MAC_MUL) || ((MAC >= MAC_MAD) && (MAC <= MAC_SGE))) {
// Get parameter B
R = VshGetField(pShaderToken, FLD_B_R);
VshConvertIntermediateParam(intermediate.Parameters[intermediate.ParamCount++], pShaderToken, FLD_B_MUX, FLD_B_NEG, R, V, C);
}
if ((ILU >= ILU_MOV) || (MAC == MAC_ADD) || (MAC == MAC_MAD)) {
// Get parameter C
R = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | VshGetField(pShaderToken, FLD_C_R_LOW);
VshConvertIntermediateParam(intermediate.Parameters[intermediate.ParamCount++], pShaderToken, FLD_C_MUX, FLD_C_NEG, R, V, C);
}
// Add the instruction to the shader
pShader->Instructions.push_back(intermediate);
return param;
}
static bool VshConvertToIntermediate(uint32_t* pShaderToken, IntermediateVertexShader* pShader)
@ -619,52 +569,86 @@ namespace XboxVertexShaderDecoder
VSH_MAC MAC = (VSH_MAC)VshGetField(pShaderToken, FLD_MAC);
if (MAC > MAC_ARL) LOG_TEST_CASE("Unknown MAC");
// Output register
VSH_OUTPUT_MUX OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX);
int16_t OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS);
VSH_IMD_OUTPUT_TYPE OutputType;
if ((VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB) == OUTPUT_C) {
OutputType = IMD_OUTPUT_C;
OutputAddress = ConvertCRegister(OutputAddress);
} else { // OUTPUT_O:
OutputType = IMD_OUTPUT_O;
OutputAddress = OutputAddress & 0xF;
}
// MAC,ILU output R register
int16_t RAddress = VshGetField(pShaderToken, FLD_OUT_R);
// Test for paired opcodes
bool bIsPaired = (MAC != MAC_NOP) && (ILU != ILU_NOP);
VSH_IMD_MAC_OP MacOp{};
VSH_IMD_ILU_OP IluOp{};
// Set up input registers
int16_t AR = VshGetField(pShaderToken, FLD_A_R);
int16_t BR = VshGetField(pShaderToken, FLD_B_R);
int16_t CR = VshGetField(pShaderToken, FLD_C_R_HIGH) << 2 | VshGetField(pShaderToken, FLD_C_R_LOW);
int16_t V = VshGetField(pShaderToken, FLD_V);
int16_t C = ConvertCRegister(VshGetField(pShaderToken, FLD_CONST));
// Check if there's a MAC opcode
if (MAC > MAC_NOP && MAC <= MAC_ARL) {
if (MAC != MAC_NOP && MAC <= MAC_ARL) {
MacOp.Opcode = MAC;
if (bIsPaired && RAddress == 1) {
// Ignore paired MAC opcodes that write to R1
} else {
if (MAC == MAC_ARL) {
VshAddIntermediateInstruction(pShaderToken, pShader, MAC, ILU_NOP, IMD_OUTPUT_A0X, 0, MASK_X);
} else {
VshAddIntermediateInstruction(pShaderToken, pShader, MAC, ILU_NOP, IMD_OUTPUT_R, RAddress, VshGetField(pShaderToken, FLD_OUT_MAC_MASK));
}
}
else if (MAC == MAC_ARL) {
MacOp.Dest.Type = IMD_DEST_A0X;
MacOp.Dest.Mask = MASK_X;
}
else {
MacOp.Dest.Type = IMD_DEST_R;
MacOp.Dest.Address = RAddress;
MacOp.Dest.Mask = VshGetField(pShaderToken, FLD_OUT_MAC_MASK);
}
// Check if we must add a muxed MAC opcode as well
if (OutputMux == OMUX_MAC) {
VshAddIntermediateInstruction(pShaderToken, pShader, MAC, ILU_NOP, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK));
if (MAC >= MAC_MOV) {
MacOp.Parameters[MacOp.ParamCount++] = VshGetIntermediateParam(pShaderToken, FLD_A_MUX, FLD_A_NEG, AR, V, C);
}
if (MAC == MAC_MUL || (MAC >= MAC_MAD && MAC <= MAC_SGE)) {
MacOp.Parameters[MacOp.ParamCount++] = VshGetIntermediateParam(pShaderToken, FLD_B_MUX, FLD_B_NEG, BR, V, C);
}
if (MAC == MAC_ADD || MAC == MAC_MAD) {
MacOp.Parameters[MacOp.ParamCount++] = VshGetIntermediateParam(pShaderToken, FLD_C_MUX, FLD_C_NEG, CR, V, C);
}
}
// Check if there's an ILU opcode
if (ILU != ILU_NOP) {
// Paired ILU opcodes will only write to R1
VshAddIntermediateInstruction(pShaderToken, pShader, MAC_NOP, ILU, IMD_OUTPUT_R, bIsPaired ? 1 : RAddress, VshGetField(pShaderToken, FLD_OUT_ILU_MASK));
// Check if we must add a muxed ILU opcode as well
if (OutputMux == OMUX_ILU) {
VshAddIntermediateInstruction(pShaderToken, pShader, MAC_NOP, ILU, OutputType, OutputAddress, VshGetField(pShaderToken, FLD_OUT_O_MASK));
}
IluOp.Opcode = ILU;
IluOp.Dest.Type = IMD_DEST_R;
IluOp.Dest.Address = bIsPaired ? 1 : RAddress;
IluOp.Dest.Mask = VshGetField(pShaderToken, FLD_OUT_ILU_MASK);
IluOp.Parameter = VshGetIntermediateParam(pShaderToken, FLD_C_MUX, FLD_C_NEG, CR, V, C);
}
// Output register
VSH_OUTPUT_MUX OutputMux = (VSH_OUTPUT_MUX)VshGetField(pShaderToken, FLD_OUT_MUX);
int16_t OutputAddress = VshGetField(pShaderToken, FLD_OUT_ADDRESS);
VSH_IMD_DEST_TYPE OutputType;
if ((VSH_OUTPUT_TYPE)VshGetField(pShaderToken, FLD_OUT_ORB) == OUTPUT_C) {
OutputType = IMD_DEST_C;
OutputAddress = ConvertCRegister(OutputAddress);
}
else { // OUTPUT_O:
OutputType = IMD_DEST_O;
OutputAddress = OutputAddress & 0xF;
}
VSH_IMD_INSTR imd{};
imd.MAC = MacOp;
imd.ILU = IluOp;
imd.IndexesWithA0_X = VshGetField(pShaderToken, FLD_A0X) > 0;
imd.ORegSource = OutputMux == OMUX_MAC ? SRC_MAC : SRC_ILU;
imd.ORegDest.Type = OutputType;
imd.ORegDest.Address = OutputAddress;
imd.ORegDest.Mask = VshGetField(pShaderToken, FLD_OUT_O_MASK);
pShader->Instructions.push_back(imd);
return VshGetField(pShaderToken, FLD_FINAL) == 0;
}
};

View File

@ -147,18 +147,18 @@ enum VSH_MAC { // Dxbx note : MAC stands for 'Multiply And Accumulate' opcodes
// ??? 15 - 2 values of the 4 bits are undefined
};
enum VSH_IMD_OUTPUT_TYPE {
IMD_OUTPUT_C,
IMD_OUTPUT_R,
IMD_OUTPUT_O,
IMD_OUTPUT_A0X
enum VSH_IMD_DEST_TYPE {
IMD_DEST_C,
IMD_DEST_R,
IMD_DEST_O,
IMD_DEST_A0X
};
typedef struct _VSH_IMD_OUTPUT {
VSH_IMD_OUTPUT_TYPE Type;
int16_t Address;
int8_t Mask;
} VSH_IMD_OUTPUT;
typedef struct {
VSH_IMD_DEST_TYPE Type;
int16_t Address;
int8_t Mask; // If 0 skip writing to this output
} VSH_IMD_DEST;
enum VSH_SWIZZLE {
SWIZZLE_X = 0,
@ -167,7 +167,7 @@ enum VSH_SWIZZLE {
SWIZZLE_W
};
enum VSH_PARAMETER_TYPE {
enum VSH_IMD_PARAMETER_TYPE {
PARAM_UNKNOWN = 0,
PARAM_R, // Temporary (scRatch) registers
PARAM_V, // Vertex registers
@ -175,28 +175,49 @@ enum VSH_PARAMETER_TYPE {
PARAM_O // = 0??
};
typedef struct _VSH_IMD_PARAMETER {
VSH_PARAMETER_TYPE ParameterType; // Parameter type, R, V or C
typedef struct {
VSH_IMD_PARAMETER_TYPE Type; // Parameter type, R, V or C
bool Neg; // true if negated, false if not
VSH_SWIZZLE Swizzle[4]; // The four swizzles
int16_t Address; // Register address
} VSH_IMD_PARAMETER;
typedef struct _VSH_INTERMEDIATE_FORMAT {
VSH_MAC MAC;
VSH_ILU ILU;
VSH_IMD_OUTPUT Output;
unsigned ParamCount;
VSH_IMD_PARAMETER Parameters[3];
// There is only a single address register in Microsoft DirectX 8.0.
// The address register, designated as a0.x, may be used as signed
// integer offset in relative addressing into the constant register file.
// c[a0.x + n]
bool IndexesWithA0_X;
} VSH_INTERMEDIATE_FORMAT;
typedef struct {
VSH_ILU Opcode;
VSH_IMD_DEST Dest;
VSH_IMD_PARAMETER Parameter;
} VSH_IMD_ILU_OP;
typedef struct _IntermediateVertexShader {
std::vector<VSH_INTERMEDIATE_FORMAT> Instructions;
typedef struct {
VSH_MAC Opcode;
VSH_IMD_DEST Dest;
uint8_t ParamCount;
VSH_IMD_PARAMETER Parameters[3];
} VSH_IMD_MAC_OP;
enum VSH_IMD_OREG_SOURCE {
SRC_MAC,
SRC_ILU,
};
// Intermediate decoded VSH instruction
// Up to two operations (MAC and ILU)
// Writes to up to 3 destination registers
// One dest per op + one output register
typedef struct {
VSH_IMD_MAC_OP MAC;
VSH_IMD_ILU_OP ILU;
VSH_IMD_OREG_SOURCE ORegSource;
VSH_IMD_DEST ORegDest;
// True if the constant input C should use the index register a0
// c[a0.x + n]
bool IndexesWithA0_X;
} VSH_IMD_INSTR;
typedef struct {
std::vector<VSH_IMD_INSTR> Instructions;
} IntermediateVertexShader;
// parse xbox vertex shader function into an intermediate format