From 8c314072c884ac158544c452aac1f68c6815baca Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 31 Aug 2018 16:30:08 +0300 Subject: [PATCH] [D3D12] DXBC color output remapping --- src/xenia/gpu/dxbc_shader_translator.cc | 103 ++++++++++++++++++++++++ src/xenia/gpu/dxbc_shader_translator.h | 46 ++++++++++- 2 files changed, 148 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index b4a7eba21..e8974e238 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -40,7 +40,92 @@ void DxbcShaderTranslator::Reset() { std::memset(&stat_, 0, sizeof(stat_)); } +void DxbcShaderTranslator::CompleteVertexShaderCode() {} + +void DxbcShaderTranslator::CompletePixelShaderCode() { + // Remap color outputs from Xbox 360 to Direct3D 12 indices. + // temp = xe_color_output_map; + // [unroll] for (uint i = 0; i < 4; ++i) { + // o[i] = x1[temp[i]]; + // } + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | + ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE) | + D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL | + ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_TEMP) | + ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_1D) | + ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( + 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32)); + shader_code_.push_back(uint32_t(TempRegister::kColorOutputMap)); + shader_code_.push_back( + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | + ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) | + D3D10_SB_OPERAND_4_COMPONENT_NOSWIZZLE | + ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER) | + ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_3D) | + ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( + 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32) | + ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( + 1, D3D10_SB_OPERAND_INDEX_IMMEDIATE32) | + ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( + 2, D3D10_SB_OPERAND_INDEX_IMMEDIATE32)); + shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kSystemConstants)); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_ColorOutputMap_Vec); + ++stat_.instruction_count; + for (uint32_t i = 0; i < 4; ++i) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | + ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE) | + D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL | + ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_OUTPUT) | + ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_1D) | + ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( + 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32)); + shader_code_.push_back(i); + shader_code_.push_back( + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | + ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE) | + D3D10_SB_OPERAND_4_COMPONENT_NOSWIZZLE | + ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP) | + ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_2D) | + ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( + 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32) | + ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( + 1, D3D10_SB_OPERAND_INDEX_RELATIVE)); + shader_code_.push_back(1); + shader_code_.push_back( + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_4_COMPONENT) | + ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE( + D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE) | + ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(i) | + ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_TEMP) | + ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_1D) | + ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION( + 0, D3D10_SB_OPERAND_INDEX_IMMEDIATE32)); + shader_code_.push_back(uint32_t(TempRegister::kColorOutputMap)); + ++stat_.instruction_count; + ++stat_.array_instruction_count; + ++stat_.mov_instruction_count; + } +} + void DxbcShaderTranslator::CompleteShaderCode() { + // Write stage-specific epilogue. + if (is_vertex_shader()) { + CompleteVertexShaderCode(); + } else if (is_pixel_shader()) { + CompletePixelShaderCode(); + } + // Return from `main`. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RET) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); @@ -880,6 +965,24 @@ void DxbcShaderTranslator::WriteShaderCode() { stat_.temp_array_count += 4; } + // Initialize the depth output if used, which must be initialized on every + // execution path. + if (is_pixel_shader() && writes_depth_) { + shader_object_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); + shader_object_.push_back( + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_1_COMPONENT) | + ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) | + ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_0D)); + shader_object_.push_back( + ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(D3D10_SB_OPERAND_1_COMPONENT) | + ENCODE_D3D10_SB_OPERAND_TYPE(D3D10_SB_OPERAND_TYPE_IMMEDIATE32) | + ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(D3D10_SB_OPERAND_INDEX_0D)); + shader_object_.push_back(0); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + } + // Write the translated shader code. size_t code_size_dwords = shader_code_.size(); // So [] won't crash in case the size is zero somehow. diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 8c2a3b8f9..aa5c84396 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -24,6 +24,7 @@ class DxbcShaderTranslator : public ShaderTranslator { ~DxbcShaderTranslator() override; // IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED: + // - kSysConst enum (registers and first components). // - rdef_constants_. // - rdef_constant_buffers_ system constant buffer size. // - d3d12/shaders/xenos_draw.hlsli (for geometry shaders). @@ -76,6 +77,39 @@ class DxbcShaderTranslator : public ShaderTranslator { kFloatConstantsLast = kFloatConstantsFirst + kFloatConstantPageCount - 1, }; + enum : uint32_t { + kSysConst_MulRcpW_Vec = 0, + kSysConst_MulRcpW_Comp = 0, + kSysConst_VertexBaseIndex_Vec = 0, + kSysConst_VertexBaseIndex_Comp = 3, + + kSysConst_NDCScale_Vec = 1, + kSysConst_NDCScale_Comp = 0, + kSysConst_VertexIndexEndian_Vec = 1, + kSysConst_VertexIndexEndian_Comp = 3, + + kSysConst_NDCOffset_Vec = 2, + kSysConst_NDCOffset_Comp = 0, + kSysConst_PixelHalfPixelOffset_Vec = 2, + kSysConst_PixelHalfPixelOffset_Comp = 3, + + kSysConst_PointSize_Vec = 3, + kSysConst_PointSize_Comp = 0, + kSysConst_SSAAInvScale_Vec = 0, + kSysConst_SSAAInvScale_Comp = 2, + + kSysConst_PixelPosReg_Vec = 4, + kSysConst_PixelPosReg_Comp = 0, + kSysConst_AlphaTest_Vec = 4, + kSysConst_AlphaTest_Comp = 1, + kSysConst_AlphaTestRange_Vec = 4, + kSysConst_AlphaTestRange_Comp = 2, + + kSysConst_ColorExpBias_Vec = 5, + + kSysConst_ColorOutputMap_Vec = 6, + }; + static constexpr uint32_t kInterpolatorCount = 16; static constexpr uint32_t kPointParametersTexCoord = kInterpolatorCount; @@ -95,7 +129,15 @@ class DxbcShaderTranslator : public ShaderTranslator { static constexpr uint32_t kPSInPositionRegister = kPSInPointParametersRegister + 1; - // Writes the epilogue. + enum class TempRegister { + kColorOutputMap, + + kCount, + }; + + // Writing the epilogue. + void CompleteVertexShaderCode(); + void CompletePixelShaderCode(); void CompleteShaderCode(); // Appends a string to a DWORD stream, returns the DWORD-aligned length. @@ -233,6 +275,7 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t temp_register_count; // Unknown in Wine. uint32_t def_count; + // Only inputs and outputs. uint32_t dcl_count; uint32_t float_instruction_count; uint32_t int_instruction_count; @@ -250,6 +293,7 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t texture_comp_instructions; uint32_t texture_bias_instructions; uint32_t texture_gradient_instructions; + // Moving to outputs only. uint32_t mov_instruction_count; // Unknown in Wine. uint32_t movc_instruction_count;