[D3D12] DXBC: eA and eM registers
This commit is contained in:
parent
e803ee84d5
commit
bd9aae016f
|
@ -126,6 +126,8 @@ void DxbcShaderTranslator::Reset() {
|
||||||
texture_srvs_.clear();
|
texture_srvs_.clear();
|
||||||
sampler_bindings_.clear();
|
sampler_bindings_.clear();
|
||||||
|
|
||||||
|
memexport_alloc_current_count_ = 0;
|
||||||
|
|
||||||
std::memset(&stat_, 0, sizeof(stat_));
|
std::memset(&stat_, 0, sizeof(stat_));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -967,6 +969,33 @@ void DxbcShaderTranslator::StartTranslation() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (!is_depth_only_pixel_shader_) {
|
||||||
|
// Allocate temporary registers for memexport addresses and data.
|
||||||
|
std::memset(system_temps_memexport_address_, 0xFF,
|
||||||
|
sizeof(system_temps_memexport_address_));
|
||||||
|
std::memset(system_temps_memexport_data_, 0xFF,
|
||||||
|
sizeof(system_temps_memexport_data_));
|
||||||
|
system_temp_memexport_written_ = UINT32_MAX;
|
||||||
|
const uint8_t* memexports_written = memexport_eM_written();
|
||||||
|
for (uint32_t i = 0; i < kMaxMemExports; ++i) {
|
||||||
|
uint32_t memexport_alloc_written = memexports_written[i];
|
||||||
|
if (memexport_alloc_written == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// If memexport is used at all, allocate a register containing whether eM#
|
||||||
|
// have actually been written to.
|
||||||
|
if (system_temp_memexport_written_ == UINT32_MAX) {
|
||||||
|
system_temp_memexport_written_ = PushSystemTemp(true);
|
||||||
|
}
|
||||||
|
system_temps_memexport_address_[i] = PushSystemTemp(true);
|
||||||
|
uint32_t memexport_data_index;
|
||||||
|
while (xe::bit_scan_forward(memexport_alloc_written,
|
||||||
|
&memexport_data_index)) {
|
||||||
|
memexport_alloc_written &= ~(1u << memexport_data_index);
|
||||||
|
system_temps_memexport_data_[i][memexport_data_index] =
|
||||||
|
PushSystemTemp();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Allocate system temporary variables for the translated code.
|
// Allocate system temporary variables for the translated code.
|
||||||
system_temp_pv_ = PushSystemTemp(true);
|
system_temp_pv_ = PushSystemTemp(true);
|
||||||
system_temp_ps_pc_p0_a0_ = PushSystemTemp(true);
|
system_temp_ps_pc_p0_a0_ = PushSystemTemp(true);
|
||||||
|
@ -1266,6 +1295,26 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
||||||
// - system_temp_grad_h_lod_.
|
// - system_temp_grad_h_lod_.
|
||||||
// - system_temp_grad_v_.
|
// - system_temp_grad_v_.
|
||||||
PopSystemTemp(6);
|
PopSystemTemp(6);
|
||||||
|
|
||||||
|
// TODO(Triang3l): Do memexport.
|
||||||
|
|
||||||
|
// Release memexport temporary registers.
|
||||||
|
for (int i = kMaxMemExports - 1; i >= 0; --i) {
|
||||||
|
if (system_temps_memexport_address_[i] == UINT32_MAX) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Release exported data registers.
|
||||||
|
for (int j = 4; j >= 0; --j) {
|
||||||
|
if (system_temps_memexport_data_[i][j] != UINT32_MAX) {
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Release the address register.
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
|
if (system_temp_memexport_written_ != UINT32_MAX) {
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write stage-specific epilogue.
|
// Write stage-specific epilogue.
|
||||||
|
@ -2009,12 +2058,30 @@ void DxbcShaderTranslator::UnloadDxbcSourceOperand(
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||||
uint32_t reg, bool replicate_x) {
|
uint32_t reg, bool replicate_x,
|
||||||
|
bool can_store_memexport_address) {
|
||||||
if (result.storage_target == InstructionStorageTarget::kNone ||
|
if (result.storage_target == InstructionStorageTarget::kNone ||
|
||||||
!result.has_any_writes()) {
|
!result.has_any_writes()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
||||||
|
if (result.storage_target == InstructionStorageTarget::kExportAddress) {
|
||||||
|
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
|
||||||
|
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||||
|
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
|
||||||
|
UINT32_MAX) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else if (result.storage_target == InstructionStorageTarget::kExportData) {
|
||||||
|
if (memexport_alloc_current_count_ == 0 ||
|
||||||
|
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||||
|
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||||
|
[result.storage_index] == UINT32_MAX) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t saturate_bit =
|
uint32_t saturate_bit =
|
||||||
ENCODE_D3D10_SB_INSTRUCTION_SATURATE(result.is_clamped);
|
ENCODE_D3D10_SB_INSTRUCTION_SATURATE(result.is_clamped);
|
||||||
|
|
||||||
|
@ -2187,6 +2254,34 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||||
shader_code_.push_back(system_temp_position_);
|
shader_code_.push_back(system_temp_position_);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case InstructionStorageTarget::kExportAddress:
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||||
|
saturate_bit);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||||
|
shader_code_.push_back(
|
||||||
|
system_temps_memexport_address_[memexport_alloc_current_count_ -
|
||||||
|
1]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case InstructionStorageTarget::kExportData:
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||||
|
saturate_bit);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||||
|
shader_code_.push_back(
|
||||||
|
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||||
|
[uint32_t(result.storage_index)]);
|
||||||
|
break;
|
||||||
|
|
||||||
case InstructionStorageTarget::kColorTarget:
|
case InstructionStorageTarget::kColorTarget:
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.mov_instruction_count;
|
++stat_.mov_instruction_count;
|
||||||
|
@ -2219,6 +2314,25 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (result.storage_target == InstructionStorageTarget::kExportData) {
|
||||||
|
// Mark that the eM# has been written to and needs to be exported.
|
||||||
|
uint32_t memexport_index = memexport_alloc_current_count_ - 1;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 1 << (memexport_index >> 2), 1));
|
||||||
|
shader_code_.push_back(system_temp_memexport_written_);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP,
|
||||||
|
memexport_index >> 2, 1));
|
||||||
|
shader_code_.push_back(system_temp_memexport_written_);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(
|
||||||
|
1u << (uint32_t(result.storage_index) + ((memexport_index & 3) << 3)));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
}
|
||||||
|
|
||||||
if (edram_rov_used_ &&
|
if (edram_rov_used_ &&
|
||||||
result.storage_target == InstructionStorageTarget::kColorTarget) {
|
result.storage_target == InstructionStorageTarget::kColorTarget) {
|
||||||
// For ROV output, mark that the color has been written to.
|
// For ROV output, mark that the color has been written to.
|
||||||
|
@ -2862,6 +2976,19 @@ void DxbcShaderTranslator::ProcessJumpInstruction(
|
||||||
JumpToLabel(instr.target_address);
|
JumpToLabel(instr.target_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DxbcShaderTranslator::ProcessAllocInstruction(
|
||||||
|
const ParsedAllocInstruction& instr) {
|
||||||
|
if (FLAGS_dxbc_source_map) {
|
||||||
|
instruction_disassembly_buffer_.Reset();
|
||||||
|
instr.Disassemble(&instruction_disassembly_buffer_);
|
||||||
|
EmitInstructionDisassembly();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instr.type == AllocType::kMemory) {
|
||||||
|
++memexport_alloc_current_count_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t DxbcShaderTranslator::AppendString(std::vector<uint32_t>& dest,
|
uint32_t DxbcShaderTranslator::AppendString(std::vector<uint32_t>& dest,
|
||||||
const char* source) {
|
const char* source) {
|
||||||
size_t size = std::strlen(source) + 1;
|
size_t size = std::strlen(source) + 1;
|
||||||
|
|
|
@ -521,6 +521,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
void ProcessLoopEndInstruction(
|
void ProcessLoopEndInstruction(
|
||||||
const ParsedLoopEndInstruction& instr) override;
|
const ParsedLoopEndInstruction& instr) override;
|
||||||
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
|
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
|
||||||
|
void ProcessAllocInstruction(const ParsedAllocInstruction& instr) override;
|
||||||
|
|
||||||
void ProcessVertexFetchInstruction(
|
void ProcessVertexFetchInstruction(
|
||||||
const ParsedVertexFetchInstruction& instr) override;
|
const ParsedVertexFetchInstruction& instr) override;
|
||||||
|
@ -965,8 +966,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
void UnloadDxbcSourceOperand(const DxbcSourceOperand& operand);
|
void UnloadDxbcSourceOperand(const DxbcSourceOperand& operand);
|
||||||
|
|
||||||
// Writes xyzw or xxxx of the specified r# to the destination.
|
// Writes xyzw or xxxx of the specified r# to the destination.
|
||||||
|
// can_store_memexport_address is for safety, to allow only proper MADs with
|
||||||
|
// a stream constant to write to eA.
|
||||||
void StoreResult(const InstructionResult& result, uint32_t reg,
|
void StoreResult(const InstructionResult& result, uint32_t reg,
|
||||||
bool replicate_x);
|
bool replicate_x, bool can_store_memexport_address = false);
|
||||||
|
|
||||||
// The nesting of `if` instructions is the following:
|
// The nesting of `if` instructions is the following:
|
||||||
// - pc checks (labels).
|
// - pc checks (labels).
|
||||||
|
@ -1149,20 +1152,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// translation (for the declaration).
|
// translation (for the declaration).
|
||||||
uint32_t system_temp_count_max_;
|
uint32_t system_temp_count_max_;
|
||||||
|
|
||||||
// Vector ALU result/scratch (since Xenos write masks can contain swizzles).
|
|
||||||
uint32_t system_temp_pv_;
|
|
||||||
// Temporary register ID for previous scalar result, program counter,
|
|
||||||
// predicate and absolute address register.
|
|
||||||
uint32_t system_temp_ps_pc_p0_a0_;
|
|
||||||
// Loop index stack - .x is the active loop, shifted right to .yzw on push.
|
|
||||||
uint32_t system_temp_aL_;
|
|
||||||
// Loop counter stack, .x is the active loop. Represents number of times
|
|
||||||
// remaining to loop.
|
|
||||||
uint32_t system_temp_loop_count_;
|
|
||||||
// Explicitly set texture gradients and LOD.
|
|
||||||
uint32_t system_temp_grad_h_lod_;
|
|
||||||
uint32_t system_temp_grad_v_;
|
|
||||||
|
|
||||||
// Position in vertex shaders (because viewport and W transformations can be
|
// Position in vertex shaders (because viewport and W transformations can be
|
||||||
// applied in the end of the shader).
|
// applied in the end of the shader).
|
||||||
uint32_t system_temp_position_;
|
uint32_t system_temp_position_;
|
||||||
|
@ -1182,6 +1171,29 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// - Z - depth Y derivative.
|
// - Z - depth Y derivative.
|
||||||
uint32_t system_temp_depth_;
|
uint32_t system_temp_depth_;
|
||||||
|
|
||||||
|
// Bits containing whether each eM# has been written, for up to 16 streams, or
|
||||||
|
// UINT32_MAX if memexport is not used. 8 bits (5 used) for each stream, with
|
||||||
|
// 4 `alloc export`s per component.
|
||||||
|
uint32_t system_temp_memexport_written_;
|
||||||
|
// eA in each `alloc export`, or UINT32_MAX if not used.
|
||||||
|
uint32_t system_temps_memexport_address_[kMaxMemExports];
|
||||||
|
// eM# in each `alloc export`, or UINT32_MAX if not used.
|
||||||
|
uint32_t system_temps_memexport_data_[kMaxMemExports][5];
|
||||||
|
|
||||||
|
// Vector ALU result/scratch (since Xenos write masks can contain swizzles).
|
||||||
|
uint32_t system_temp_pv_;
|
||||||
|
// Temporary register ID for previous scalar result, program counter,
|
||||||
|
// predicate and absolute address register.
|
||||||
|
uint32_t system_temp_ps_pc_p0_a0_;
|
||||||
|
// Loop index stack - .x is the active loop, shifted right to .yzw on push.
|
||||||
|
uint32_t system_temp_aL_;
|
||||||
|
// Loop counter stack, .x is the active loop. Represents number of times
|
||||||
|
// remaining to loop.
|
||||||
|
uint32_t system_temp_loop_count_;
|
||||||
|
// Explicitly set texture gradients and LOD.
|
||||||
|
uint32_t system_temp_grad_h_lod_;
|
||||||
|
uint32_t system_temp_grad_v_;
|
||||||
|
|
||||||
// The bool constant number containing the condition for the currently
|
// The bool constant number containing the condition for the currently
|
||||||
// processed exec (or the last - unless a label has reset this), or
|
// processed exec (or the last - unless a label has reset this), or
|
||||||
// kCfExecBoolConstantNone if it's not checked.
|
// kCfExecBoolConstantNone if it's not checked.
|
||||||
|
@ -1209,6 +1221,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
std::vector<TextureSRV> texture_srvs_;
|
std::vector<TextureSRV> texture_srvs_;
|
||||||
std::vector<SamplerBinding> sampler_bindings_;
|
std::vector<SamplerBinding> sampler_bindings_;
|
||||||
|
|
||||||
|
// Number of `alloc export`s encountered so far in the translation. The index
|
||||||
|
// of the current eA/eM# temp register set is this minus 1, if it's not 0.
|
||||||
|
uint32_t memexport_alloc_current_count_;
|
||||||
|
|
||||||
// The STAT chunk (based on Wine d3dcompiler_parse_stat).
|
// The STAT chunk (based on Wine d3dcompiler_parse_stat).
|
||||||
struct Statistics {
|
struct Statistics {
|
||||||
uint32_t instruction_count;
|
uint32_t instruction_count;
|
||||||
|
|
|
@ -1289,7 +1289,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StoreResult(instr.result, system_temp_pv_, replicate_result);
|
StoreResult(instr.result, system_temp_pv_, replicate_result,
|
||||||
|
instr.GetMemExportStreamConstant() != UINT32_MAX);
|
||||||
|
|
||||||
if (predicate_written) {
|
if (predicate_written) {
|
||||||
cf_exec_predicate_written_ = true;
|
cf_exec_predicate_written_ = true;
|
||||||
|
|
Loading…
Reference in New Issue