From b404f227b1e873cdb79d2a0260112b2cd2b35a6d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 1 Sep 2018 22:19:29 +0300 Subject: [PATCH] [D3D12] DXBC internal r# stack allocation --- src/xenia/gpu/dxbc_shader_translator.cc | 36 +++++++++++++++++++------ src/xenia/gpu/dxbc_shader_translator.h | 18 ++++++++----- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 88af2e8b2..0cfd9ed77 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -35,11 +35,29 @@ void DxbcShaderTranslator::Reset() { shader_code_.clear(); rdef_constants_used_ = 0; + system_temp_count_current_ = 0; + system_temp_count_max_ = 0; writes_depth_ = false; std::memset(&stat_, 0, sizeof(stat_)); } +uint32_t DxbcShaderTranslator::PushSystemTemp() { + uint32_t register_index = system_temp_count_current_; + if (!uses_register_relative_addressing()) { + // Guest shader registers first if they're not in x0. + register_index += register_count(); + } + ++system_temp_count_current_; + system_temp_count_max_ = + std::max(system_temp_count_max_, system_temp_count_current_); +} + +void DxbcShaderTranslator::PopSystemTemp(uint32_t count) { + assert_true(count <= system_temp_count_current_); + system_temp_count_current_ -= std::min(count, system_temp_count_current_); +} + void DxbcShaderTranslator::CompleteVertexShaderCode() {} void DxbcShaderTranslator::CompletePixelShaderCode() {} @@ -869,15 +887,17 @@ void DxbcShaderTranslator::WriteShaderCode() { } } - // Temporary registers - system registers, and also Xbox 360 general-purpose - // registers if not using dynamic indexing. + // Temporary registers - guest general-purpose registers if not using dynamic + // indexing and Xenia internal registers. stat_.temp_register_count = - uint32_t(TempRegister::kCount) + - (uses_register_relative_addressing() ? 0 : register_count()); - shader_object_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_TEMPS) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(2)); - shader_object_.push_back(stat_.temp_register_count); + (uses_register_relative_addressing() ? 0 : register_count()) + + system_temp_count_max_; + if (stat_.temp_register_count != 0) { + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_TEMPS) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(2)); + shader_object_.push_back(stat_.temp_register_count); + } // General-purpose registers if using dynamic indexing (x0). if (uses_register_relative_addressing()) { diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 9b34a67ab..a011a7d14 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -129,12 +129,10 @@ class DxbcShaderTranslator : public ShaderTranslator { static constexpr uint32_t kPSInPositionRegister = kPSInPointParametersRegister + 1; - enum class TempRegister { - // Color output map in pixel shader epilogue. - kColorOutputMap, - - kCount, - }; + // Allocates a new r# register for internal use and returns its index. + uint32_t PushSystemTemp(); + // Frees the last allocated internal r# registers for later reuse. + void PopSystemTemp(uint32_t count = 1); // Writing the epilogue. void CompleteVertexShaderCode(); @@ -268,6 +266,12 @@ class DxbcShaderTranslator : public ShaderTranslator { static const RdefConstantBufferIndex constant_buffer_dcl_order_[size_t(RdefConstantBufferIndex::kCount)]; + // Number of currently allocated Xenia internal r# registers. + uint32_t system_temp_count_current_; + // Total maximum number of temporary registers ever used during this + // translation (for the declaration). + uint32_t system_temp_count_max_; + bool writes_depth_; // The STAT chunk (based on Wine d3dcompiler_parse_stat). @@ -294,7 +298,7 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t texture_comp_instructions; uint32_t texture_bias_instructions; uint32_t texture_gradient_instructions; - // Moving to outputs only. + // Not including indexable temp load/store. uint32_t mov_instruction_count; // Unknown in Wine. uint32_t movc_instruction_count;