[D3D12] DXBC: Fix indexable temp usage

This commit is contained in:
Triang3l 2018-09-07 22:04:57 +03:00
parent 04544945d0
commit bc9b95ac57
1 changed files with 114 additions and 40 deletions

View File

@ -410,49 +410,69 @@ void DxbcShaderTranslator::StartVertexShader() {
void DxbcShaderTranslator::StartPixelShader() { void DxbcShaderTranslator::StartPixelShader() {
// Copy interpolants to GPRs. // Copy interpolants to GPRs.
uint32_t interpolator_count = std::min(kInterpolatorCount, register_count()); uint32_t interpolator_count = std::min(kInterpolatorCount, register_count());
for (uint32_t i = 0; i < interpolator_count; ++i) { if (uses_register_dynamic_addressing()) {
++stat_.instruction_count; // Copy through r# to x0[#].
if (uses_register_dynamic_addressing()) { uint32_t interpolator_temp_register = PushSystemTemp();
++stat_.array_instruction_count; for (uint32_t i = 0; i < interpolator_count; ++i) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(interpolator_temp_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1));
shader_code_.push_back(kPSInInterpolatorRegister + i);
++stat_.instruction_count;
++stat_.mov_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b1111, 2)); D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b1111, 2));
shader_code_.push_back(0); shader_code_.push_back(0);
} else { shader_code_.push_back(i);
++stat_.mov_instruction_count; shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(interpolator_temp_register);
++stat_.instruction_count;
++stat_.array_instruction_count;
}
PopSystemTemp();
} else {
// Copy directly to r#.
for (uint32_t i = 0; i < interpolator_count; ++i) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(i);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1));
shader_code_.push_back(kPSInInterpolatorRegister + i);
++stat_.instruction_count;
++stat_.mov_instruction_count;
} }
shader_code_.push_back(i);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1));
shader_code_.push_back(kPSInInterpolatorRegister + i);
} }
// TODO(Triang3l): ps_param_gen. // TODO(Triang3l): ps_param_gen.
// Initialize color indexable temporary registers so they have a defined value // Initialize color indexable temporary registers so they have a defined value
// in case the shader doesn't write to all used outputs on all execution // in case the shader doesn't write to all used outputs on all execution
// paths. // paths. This must be done via r#.
uint32_t zero_temp_register = PushSystemTemp(true);
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b1111, 2)); D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b1111, 2));
shader_code_.push_back(GetColorIndexableTemp()); shader_code_.push_back(GetColorIndexableTemp());
shader_code_.push_back(i); shader_code_.push_back(i);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(0); shader_code_.push_back(zero_temp_register);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.array_instruction_count; ++stat_.array_instruction_count;
} }
PopSystemTemp();
} }
void DxbcShaderTranslator::StartTranslation() { void DxbcShaderTranslator::StartTranslation() {
@ -491,9 +511,10 @@ void DxbcShaderTranslator::CompleteVertexShader() {
void DxbcShaderTranslator::CompletePixelShader() { void DxbcShaderTranslator::CompletePixelShader() {
uint32_t color_indexable_temp = GetColorIndexableTemp(); uint32_t color_indexable_temp = GetColorIndexableTemp();
// Allocate a temporary register for alpha testing, color indexable temp // Allocate temporary registers for alpha testing, color indexable temp
// load/store and storing the color output map. // load/store and storing the color output map.
uint32_t temp_register = PushSystemTemp(); uint32_t temp1 = PushSystemTemp();
uint32_t temp2 = PushSystemTemp();
// TODO(Triang3l): Alpha testing. // TODO(Triang3l): Alpha testing.
@ -507,7 +528,7 @@ void DxbcShaderTranslator::CompletePixelShader() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(temp_register); shader_code_.push_back(temp1);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, kSwizzleXYZW, 2)); D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, kSwizzleXYZW, 2));
shader_code_.push_back(color_indexable_temp); shader_code_.push_back(color_indexable_temp);
@ -519,10 +540,10 @@ void DxbcShaderTranslator::CompletePixelShader() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(temp_register); shader_code_.push_back(temp1);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(temp_register); shader_code_.push_back(temp1);
shader_code_.push_back(EncodeVectorReplicatedOperand( shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3)); D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kSystemConstants)); shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kSystemConstants));
@ -539,18 +560,22 @@ void DxbcShaderTranslator::CompletePixelShader() {
shader_code_.push_back(i); shader_code_.push_back(i);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(temp_register); shader_code_.push_back(temp1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.array_instruction_count; ++stat_.array_instruction_count;
} }
// Remap guest render target indices to host since because on the host, the // Remap guest render target indices to host since because on the host, the
// indices of the bound render targets are consecutive. // indices of the bound render targets are consecutive.
// temp = xe_color_output_map // temp1 = xe_color_output_map
// SV_Target0 = oC[temp.x] // temp2 = oC[temp1.x]
// SV_Target1 = oC[temp.y] // SV_Target0 = temp2
// SV_Target2 = oC[temp.z] // temp2 = oC[temp1.y]
// SV_Target3 = oC[temp.w] // SV_Target1 = temp2
// temp2 = oC[temp1.w]
// SV_Target2 = temp2
// temp2 = oC[temp1.z]
// SV_Target3 = temp2
// //
// Load the constant to a temporary register so it can be used for relative // Load the constant to a temporary register so it can be used for relative
// addressing. // addressing.
@ -560,7 +585,7 @@ void DxbcShaderTranslator::CompletePixelShader() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(temp_register); shader_code_.push_back(temp1);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kSystemConstants)); shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kSystemConstants));
@ -570,11 +595,12 @@ void DxbcShaderTranslator::CompletePixelShader() {
++stat_.mov_instruction_count; ++stat_.mov_instruction_count;
// Do the remapping. // Do the remapping.
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
// Copy to r# because indexable temps must be loaded/stored via r#.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(i); shader_code_.push_back(temp2);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, kSwizzleXYZW, 2, D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, kSwizzleXYZW, 2,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32, D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
@ -582,13 +608,24 @@ void DxbcShaderTranslator::CompletePixelShader() {
shader_code_.push_back(color_indexable_temp); shader_code_.push_back(color_indexable_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(temp_register); shader_code_.push_back(temp1);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.array_instruction_count; ++stat_.array_instruction_count;
// Write to o#.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
shader_code_.push_back(i);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(temp2);
++stat_.instruction_count;
++stat_.mov_instruction_count;
} }
// Free the temporary register. // Free the temporary registers.
PopSystemTemp(); PopSystemTemp(2);
} }
void DxbcShaderTranslator::CompleteShaderCode() { void DxbcShaderTranslator::CompleteShaderCode() {
@ -1387,6 +1424,31 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
} }
} }
// If writing to an indexable temp, the constant part must be written via r#.
uint32_t constant_temp = UINT32_MAX;
if (constant_mask != 0) {
if ((result.storage_target == InstructionStorageTarget::kRegister &&
uses_register_dynamic_addressing()) ||
result.storage_target == InstructionStorageTarget::kColorTarget) {
constant_temp = PushSystemTemp();
}
}
if (constant_temp != UINT32_MAX) {
// Load constants to r#.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, constant_mask, 1));
shader_code_.push_back(constant_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
for (uint32_t j = 0; j < 4; ++j) {
shader_code_.push_back((constant_values & (1 << j)) ? 0x3F800000 : 0);
}
++stat_.instruction_count;
++stat_.mov_instruction_count;
}
// Store both parts of the write (i == 0 - swizzled, i == 1 - constant). // Store both parts of the write (i == 0 - swizzled, i == 1 - constant).
for (uint32_t i = 0; i < 2; ++i) { for (uint32_t i = 0; i < 2; ++i) {
uint32_t mask = i == 0 ? swizzle_mask : constant_mask; uint32_t mask = i == 0 ? swizzle_mask : constant_mask;
@ -1395,7 +1457,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
} }
// r# for the swizzled part, 4-component imm32 for the constant part. // r# for the swizzled part, 4-component imm32 for the constant part.
uint32_t source_length = i == 0 ? 2 : 5; uint32_t source_length = (i == 1 && constant_temp == UINT32_MAX) ? 5 : 2;
switch (result.storage_target) { switch (result.storage_target) {
case InstructionStorageTarget::kRegister: case InstructionStorageTarget::kRegister:
if (uses_register_dynamic_addressing()) { if (uses_register_dynamic_addressing()) {
@ -1493,14 +1555,26 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
D3D10_SB_OPERAND_TYPE_TEMP, swizzle_components, 1)); D3D10_SB_OPERAND_TYPE_TEMP, swizzle_components, 1));
shader_code_.push_back(reg); shader_code_.push_back(reg);
} else { } else {
// Load constants. if (constant_temp != UINT32_MAX) {
shader_code_.push_back(EncodeVectorSwizzledOperand( // Load constants from the r#.
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); shader_code_.push_back(EncodeVectorSwizzledOperand(
for (uint32_t j = 0; j < 4; ++j) { D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back((constant_values & (1 << j)) ? 0x3F800000 : 0); shader_code_.push_back(constant_temp);
} else {
// Load constants from an immediate.
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
for (uint32_t j = 0; j < 4; ++j) {
shader_code_.push_back((constant_values & (1 << j)) ? 0x3F800000 : 0);
}
} }
} }
} }
// Free the r# with constants if used.
if (constant_temp != UINT32_MAX) {
PopSystemTemp();
}
} }
void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index, void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,