From da9f153a29523eeddd0832824b0cdd431964434d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 31 Aug 2018 20:28:29 +0300 Subject: [PATCH] [D3D12] DXBC: Don't use indexable temps unless needed --- src/xenia/gpu/dxbc_shader_translator.cc | 105 ++++-------------------- src/xenia/gpu/dxbc_shader_translator.h | 1 + src/xenia/gpu/shader_translator.h | 5 ++ 3 files changed, 22 insertions(+), 89 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index e8974e238..88af2e8b2 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -42,81 +42,7 @@ void DxbcShaderTranslator::Reset() { void DxbcShaderTranslator::CompleteVertexShaderCode() {} -void DxbcShaderTranslator::CompletePixelShaderCode() { - // Remap color outputs from Xbox 360 to Direct3D 12 indices. - // temp = xe_color_output_map; - // [unroll] for (uint i = 0; i < 4; ++i) { - // o[i] = x1[temp[i]]; - // } - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | - ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( - D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE) | - D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL | - ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_TEMP) | - ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_1D) | - ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( - 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32)); - shader_code_.push_back(uint32_t(TempRegister::kColorOutputMap)); - shader_code_.push_back( - ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | - ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( - D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) | - D3D10_SB_OPERAND_4_COMPONENT_NOSWIZZLE | - ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER) | - ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_3D) | - ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( - 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32) | - ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( - 1, D3D10_SB_OPERAND_INDEX_IMMEDIATE32) | - ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( - 2, D3D10_SB_OPERAND_INDEX_IMMEDIATE32)); - shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kSystemConstants)); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_ColorOutputMap_Vec); - ++stat_.instruction_count; - for (uint32_t i = 0; i < 4; ++i) { - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | - ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( - D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE) | - D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL | - ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_OUTPUT) | - ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_1D) | - ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( - 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32)); - shader_code_.push_back(i); - shader_code_.push_back( - ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | - ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( - D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) | - D3D10_SB_OPERAND_4_COMPONENT_NOSWIZZLE | - ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP) | - ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_2D) | - ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( - 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32) | - ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( - 1, D3D10_SB_OPERAND_INDEX_RELATIVE)); - shader_code_.push_back(1); - shader_code_.push_back( - ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | - ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( - D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE) | - ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(i) | - ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_TEMP) | - ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_1D) | - ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( - 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32)); - shader_code_.push_back(uint32_t(TempRegister::kColorOutputMap)); - ++stat_.instruction_count; - ++stat_.array_instruction_count; - ++stat_.mov_instruction_count; - } -} +void DxbcShaderTranslator::CompletePixelShaderCode() {} void DxbcShaderTranslator::CompleteShaderCode() { // Write stage-specific epilogue. @@ -943,26 +869,27 @@ void DxbcShaderTranslator::WriteShaderCode() { } } - // General-purpose registers (x0). + // Temporary registers - system registers, and also Xbox 360 general-purpose + // registers if not using dynamic indexing. + stat_.temp_register_count = + uint32_t(TempRegister::kCount) + + (uses_register_relative_addressing() ? 0 : register_count()); shader_object_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); - // x0. - shader_object_.push_back(0); - shader_object_.push_back(register_count()); - // 4 components in each. - shader_object_.push_back(4); - stat_.temp_array_count += register_count(); + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_TEMPS) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(2)); + shader_object_.push_back(stat_.temp_register_count); - // Color outputs on the Xbox 360 side (x1), for remapping to D3D12 bindings. - if (is_pixel_shader()) { + // General-purpose registers if using dynamic indexing (x0). + if (uses_register_relative_addressing()) { shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); - shader_object_.push_back(1); + // x0. + shader_object_.push_back(0); + shader_object_.push_back(register_count()); + // 4 components in each. shader_object_.push_back(4); - shader_object_.push_back(4); - stat_.temp_array_count += 4; + stat_.temp_array_count += register_count(); } // Initialize the depth output if used, which must be initialized on every diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index aa5c84396..9b34a67ab 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -130,6 +130,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kPSInPointParametersRegister + 1; enum class TempRegister { + // Color output map in pixel shader epilogue. kColorOutputMap, kCount, diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index f717aa3a0..f97370ad5 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -45,6 +45,11 @@ class ShaderTranslator { bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } // True if the current shader is a pixel shader. bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; } + // True if the current shader addresses general-purpose registers with dynamic + // indices. + bool uses_register_relative_addressing() const { + return uses_register_relative_addressing_; + } // A list of all vertex bindings, populated before translation occurs. const std::vector& vertex_bindings() const { return vertex_bindings_;