[D3D12] DXBC: eA and eM registers
This commit is contained in:
parent
e803ee84d5
commit
bd9aae016f
|
@ -126,6 +126,8 @@ void DxbcShaderTranslator::Reset() {
|
|||
texture_srvs_.clear();
|
||||
sampler_bindings_.clear();
|
||||
|
||||
memexport_alloc_current_count_ = 0;
|
||||
|
||||
std::memset(&stat_, 0, sizeof(stat_));
|
||||
}
|
||||
|
||||
|
@ -967,6 +969,33 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
}
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Allocate temporary registers for memexport addresses and data.
|
||||
std::memset(system_temps_memexport_address_, 0xFF,
|
||||
sizeof(system_temps_memexport_address_));
|
||||
std::memset(system_temps_memexport_data_, 0xFF,
|
||||
sizeof(system_temps_memexport_data_));
|
||||
system_temp_memexport_written_ = UINT32_MAX;
|
||||
const uint8_t* memexports_written = memexport_eM_written();
|
||||
for (uint32_t i = 0; i < kMaxMemExports; ++i) {
|
||||
uint32_t memexport_alloc_written = memexports_written[i];
|
||||
if (memexport_alloc_written == 0) {
|
||||
continue;
|
||||
}
|
||||
// If memexport is used at all, allocate a register containing whether eM#
|
||||
// have actually been written to.
|
||||
if (system_temp_memexport_written_ == UINT32_MAX) {
|
||||
system_temp_memexport_written_ = PushSystemTemp(true);
|
||||
}
|
||||
system_temps_memexport_address_[i] = PushSystemTemp(true);
|
||||
uint32_t memexport_data_index;
|
||||
while (xe::bit_scan_forward(memexport_alloc_written,
|
||||
&memexport_data_index)) {
|
||||
memexport_alloc_written &= ~(1u << memexport_data_index);
|
||||
system_temps_memexport_data_[i][memexport_data_index] =
|
||||
PushSystemTemp();
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate system temporary variables for the translated code.
|
||||
system_temp_pv_ = PushSystemTemp(true);
|
||||
system_temp_ps_pc_p0_a0_ = PushSystemTemp(true);
|
||||
|
@ -1266,6 +1295,26 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
// - system_temp_grad_h_lod_.
|
||||
// - system_temp_grad_v_.
|
||||
PopSystemTemp(6);
|
||||
|
||||
// TODO(Triang3l): Do memexport.
|
||||
|
||||
// Release memexport temporary registers.
|
||||
for (int i = kMaxMemExports - 1; i >= 0; --i) {
|
||||
if (system_temps_memexport_address_[i] == UINT32_MAX) {
|
||||
continue;
|
||||
}
|
||||
// Release exported data registers.
|
||||
for (int j = 4; j >= 0; --j) {
|
||||
if (system_temps_memexport_data_[i][j] != UINT32_MAX) {
|
||||
PopSystemTemp();
|
||||
}
|
||||
}
|
||||
// Release the address register.
|
||||
PopSystemTemp();
|
||||
}
|
||||
if (system_temp_memexport_written_ != UINT32_MAX) {
|
||||
PopSystemTemp();
|
||||
}
|
||||
}
|
||||
|
||||
// Write stage-specific epilogue.
|
||||
|
@ -2009,12 +2058,30 @@ void DxbcShaderTranslator::UnloadDxbcSourceOperand(
|
|||
}
|
||||
|
||||
void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||
uint32_t reg, bool replicate_x) {
|
||||
uint32_t reg, bool replicate_x,
|
||||
bool can_store_memexport_address) {
|
||||
if (result.storage_target == InstructionStorageTarget::kNone ||
|
||||
!result.has_any_writes()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
||||
if (result.storage_target == InstructionStorageTarget::kExportAddress) {
|
||||
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
|
||||
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
|
||||
UINT32_MAX) {
|
||||
return;
|
||||
}
|
||||
} else if (result.storage_target == InstructionStorageTarget::kExportData) {
|
||||
if (memexport_alloc_current_count_ == 0 ||
|
||||
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||
[result.storage_index] == UINT32_MAX) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t saturate_bit =
|
||||
ENCODE_D3D10_SB_INSTRUCTION_SATURATE(result.is_clamped);
|
||||
|
||||
|
@ -2187,6 +2254,34 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
shader_code_.push_back(system_temp_position_);
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kExportAddress:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(
|
||||
system_temps_memexport_address_[memexport_alloc_current_count_ -
|
||||
1]);
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kExportData:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(
|
||||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||
[uint32_t(result.storage_index)]);
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kColorTarget:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
|
@ -2219,6 +2314,25 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
}
|
||||
}
|
||||
|
||||
if (result.storage_target == InstructionStorageTarget::kExportData) {
|
||||
// Mark that the eM# has been written to and needs to be exported.
|
||||
uint32_t memexport_index = memexport_alloc_current_count_ - 1;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, 1 << (memexport_index >> 2), 1));
|
||||
shader_code_.push_back(system_temp_memexport_written_);
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP,
|
||||
memexport_index >> 2, 1));
|
||||
shader_code_.push_back(system_temp_memexport_written_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(
|
||||
1u << (uint32_t(result.storage_index) + ((memexport_index & 3) << 3)));
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
}
|
||||
|
||||
if (edram_rov_used_ &&
|
||||
result.storage_target == InstructionStorageTarget::kColorTarget) {
|
||||
// For ROV output, mark that the color has been written to.
|
||||
|
@ -2862,6 +2976,19 @@ void DxbcShaderTranslator::ProcessJumpInstruction(
|
|||
JumpToLabel(instr.target_address);
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::ProcessAllocInstruction(
|
||||
const ParsedAllocInstruction& instr) {
|
||||
if (FLAGS_dxbc_source_map) {
|
||||
instruction_disassembly_buffer_.Reset();
|
||||
instr.Disassemble(&instruction_disassembly_buffer_);
|
||||
EmitInstructionDisassembly();
|
||||
}
|
||||
|
||||
if (instr.type == AllocType::kMemory) {
|
||||
++memexport_alloc_current_count_;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t DxbcShaderTranslator::AppendString(std::vector<uint32_t>& dest,
|
||||
const char* source) {
|
||||
size_t size = std::strlen(source) + 1;
|
||||
|
|
|
@ -521,6 +521,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
void ProcessLoopEndInstruction(
|
||||
const ParsedLoopEndInstruction& instr) override;
|
||||
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
|
||||
void ProcessAllocInstruction(const ParsedAllocInstruction& instr) override;
|
||||
|
||||
void ProcessVertexFetchInstruction(
|
||||
const ParsedVertexFetchInstruction& instr) override;
|
||||
|
@ -965,8 +966,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
void UnloadDxbcSourceOperand(const DxbcSourceOperand& operand);
|
||||
|
||||
// Writes xyzw or xxxx of the specified r# to the destination.
|
||||
// can_store_memexport_address is for safety, to allow only proper MADs with
|
||||
// a stream constant to write to eA.
|
||||
void StoreResult(const InstructionResult& result, uint32_t reg,
|
||||
bool replicate_x);
|
||||
bool replicate_x, bool can_store_memexport_address = false);
|
||||
|
||||
// The nesting of `if` instructions is the following:
|
||||
// - pc checks (labels).
|
||||
|
@ -1149,20 +1152,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// translation (for the declaration).
|
||||
uint32_t system_temp_count_max_;
|
||||
|
||||
// Vector ALU result/scratch (since Xenos write masks can contain swizzles).
|
||||
uint32_t system_temp_pv_;
|
||||
// Temporary register ID for previous scalar result, program counter,
|
||||
// predicate and absolute address register.
|
||||
uint32_t system_temp_ps_pc_p0_a0_;
|
||||
// Loop index stack - .x is the active loop, shifted right to .yzw on push.
|
||||
uint32_t system_temp_aL_;
|
||||
// Loop counter stack, .x is the active loop. Represents number of times
|
||||
// remaining to loop.
|
||||
uint32_t system_temp_loop_count_;
|
||||
// Explicitly set texture gradients and LOD.
|
||||
uint32_t system_temp_grad_h_lod_;
|
||||
uint32_t system_temp_grad_v_;
|
||||
|
||||
// Position in vertex shaders (because viewport and W transformations can be
|
||||
// applied in the end of the shader).
|
||||
uint32_t system_temp_position_;
|
||||
|
@ -1182,6 +1171,29 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// - Z - depth Y derivative.
|
||||
uint32_t system_temp_depth_;
|
||||
|
||||
// Bits containing whether each eM# has been written, for up to 16 streams, or
|
||||
// UINT32_MAX if memexport is not used. 8 bits (5 used) for each stream, with
|
||||
// 4 `alloc export`s per component.
|
||||
uint32_t system_temp_memexport_written_;
|
||||
// eA in each `alloc export`, or UINT32_MAX if not used.
|
||||
uint32_t system_temps_memexport_address_[kMaxMemExports];
|
||||
// eM# in each `alloc export`, or UINT32_MAX if not used.
|
||||
uint32_t system_temps_memexport_data_[kMaxMemExports][5];
|
||||
|
||||
// Vector ALU result/scratch (since Xenos write masks can contain swizzles).
|
||||
uint32_t system_temp_pv_;
|
||||
// Temporary register ID for previous scalar result, program counter,
|
||||
// predicate and absolute address register.
|
||||
uint32_t system_temp_ps_pc_p0_a0_;
|
||||
// Loop index stack - .x is the active loop, shifted right to .yzw on push.
|
||||
uint32_t system_temp_aL_;
|
||||
// Loop counter stack, .x is the active loop. Represents number of times
|
||||
// remaining to loop.
|
||||
uint32_t system_temp_loop_count_;
|
||||
// Explicitly set texture gradients and LOD.
|
||||
uint32_t system_temp_grad_h_lod_;
|
||||
uint32_t system_temp_grad_v_;
|
||||
|
||||
// The bool constant number containing the condition for the currently
|
||||
// processed exec (or the last - unless a label has reset this), or
|
||||
// kCfExecBoolConstantNone if it's not checked.
|
||||
|
@ -1209,6 +1221,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
std::vector<TextureSRV> texture_srvs_;
|
||||
std::vector<SamplerBinding> sampler_bindings_;
|
||||
|
||||
// Number of `alloc export`s encountered so far in the translation. The index
|
||||
// of the current eA/eM# temp register set is this minus 1, if it's not 0.
|
||||
uint32_t memexport_alloc_current_count_;
|
||||
|
||||
// The STAT chunk (based on Wine d3dcompiler_parse_stat).
|
||||
struct Statistics {
|
||||
uint32_t instruction_count;
|
||||
|
|
|
@ -1289,7 +1289,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
|
|||
}
|
||||
}
|
||||
|
||||
StoreResult(instr.result, system_temp_pv_, replicate_result);
|
||||
StoreResult(instr.result, system_temp_pv_, replicate_result,
|
||||
instr.GetMemExportStreamConstant() != UINT32_MAX);
|
||||
|
||||
if (predicate_written) {
|
||||
cf_exec_predicate_written_ = true;
|
||||
|
|
Loading…
Reference in New Issue