diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 1c0f5e69d..efb4a0aac 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -65,7 +65,7 @@ void DxbcShaderTranslator::Reset() { std::memset(&stat_, 0, sizeof(stat_)); } -uint32_t DxbcShaderTranslator::PushSystemTemp() { +uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) { uint32_t register_index = system_temp_count_current_; if (!uses_register_dynamic_addressing()) { // Guest shader registers first if they're not in x0. @@ -74,6 +74,23 @@ uint32_t DxbcShaderTranslator::PushSystemTemp() { ++system_temp_count_current_; system_temp_count_max_ = std::max(system_temp_count_max_, system_temp_count_current_); + + if (zero) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(register_index); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(0); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + } + return register_index; } @@ -371,6 +388,11 @@ void DxbcShaderTranslator::StartTranslation() { } else if (is_pixel_shader()) { StartPixelShader(); } + + // Request global system temporary variables. + system_temp_ps_pc_p0_a0_ = PushSystemTemp(true); + system_temp_aL_ = PushSystemTemp(true); + system_temp_loop_count_ = PushSystemTemp(true); } void DxbcShaderTranslator::CompleteVertexShader() {} @@ -378,6 +400,12 @@ void DxbcShaderTranslator::CompleteVertexShader() {} void DxbcShaderTranslator::CompletePixelShader() {} void DxbcShaderTranslator::CompleteShaderCode() { + // Release the following system temporary values so epilogue can reuse them: + // - system_temp_ps_pc_p0_a0_. + // - system_temp_aL_. + // - system_temp_loop_count_. + PopSystemTemp(3); + // Write stage-specific epilogue. if (is_vertex_shader()) { CompleteVertexShader(); diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 2d42b5649..bcfc70d9a 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -180,7 +180,7 @@ class DxbcShaderTranslator : public ShaderTranslator { } // Allocates a new r# register for internal use and returns its index. - uint32_t PushSystemTemp(); + uint32_t PushSystemTemp(bool zero = false); // Frees the last allocated internal r# registers for later reuse. void PopSystemTemp(uint32_t count = 1); @@ -327,6 +327,15 @@ class DxbcShaderTranslator : public ShaderTranslator { // translation (for the declaration). uint32_t system_temp_count_max_; + // Temporary register ID for previous scalar result, program counter, + // predicate and absolute address register. + uint32_t system_temp_ps_pc_p0_a0_; + // Loop index stack - .x is the active loop, shifted right to .yzw on push. + uint32_t system_temp_aL_; + // Loop counter stack, .x is the active loop. Represents number of times + // remaining to loop. + uint32_t system_temp_loop_count_; + bool writes_depth_; // The STAT chunk (based on Wine d3dcompiler_parse_stat).