diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index e6be3c805..7b05fb5f8 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -134,7 +134,7 @@ bool DxbcShaderTranslator::UseSwitchForControlFlow() const { return FLAGS_dxbc_switch && vendor_id_ != 0x8086; } -uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) { +uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero, uint32_t count) { uint32_t register_index = system_temp_count_current_; if (!uses_register_dynamic_addressing() && !is_depth_only_pixel_shader_) { // Guest shader registers first if they're not in x0. Depth-only pixel @@ -143,24 +143,26 @@ uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) { // loaded. register_index += register_count(); } - ++system_temp_count_current_; + system_temp_count_current_ += count; system_temp_count_max_ = std::max(system_temp_count_max_, system_temp_count_current_); if (zero) { - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(register_index); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); - shader_code_.push_back(0); - shader_code_.push_back(0); - shader_code_.push_back(0); - shader_code_.push_back(0); - ++stat_.instruction_count; - ++stat_.mov_instruction_count; + for (uint32_t i = 0; i < count; ++i) { + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(register_index + i); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(0); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + } } return register_index; @@ -948,11 +950,9 @@ void DxbcShaderTranslator::StartTranslation() { system_temp_position_ = PushSystemTemp(true); } else if (IsDxbcPixelShader()) { if (!is_depth_only_pixel_shader_) { - for (uint32_t i = 0; i < 4; ++i) { - // In the ROV path, no need to initialize the colors because original - // values will be kept for the unwritten components. - system_temp_color_[i] = PushSystemTemp(!edram_rov_used_); - } + // In the ROV path, no need to initialize the colors because original + // values will be kept for the unwritten components. + system_temps_color_ = PushSystemTemp(!edram_rov_used_, 4); } if (edram_rov_used_) { if (!is_depth_only_pixel_shader_) { @@ -1288,7 +1288,7 @@ void DxbcShaderTranslator::CompleteShaderCode() { } } if (!is_depth_only_pixel_shader_) { - // Release system_temp_color_. + // Release system_temps_color_. PopSystemTemp(4); } } @@ -2196,8 +2196,8 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, saturate_bit); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1)); - shader_code_.push_back( - system_temp_color_[uint32_t(result.storage_index)]); + shader_code_.push_back(system_temps_color_ + + uint32_t(result.storage_index)); break; default: diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index a5acc7e4d..8dbc2bdf8 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -799,8 +799,9 @@ class DxbcShaderTranslator : public ShaderTranslator { // Whether to use switch-case rather than if (pc >= label) for control flow. bool UseSwitchForControlFlow() const; - // Allocates a new r# register for internal use and returns its index. - uint32_t PushSystemTemp(bool zero = false); + // Allocates new consecutive r# registers for internal use and returns the + // index of the first. + uint32_t PushSystemTemp(bool zero = false, uint32_t count = 1); // Frees the last allocated internal r# registers for later reuse. void PopSystemTemp(uint32_t count = 1); @@ -1144,9 +1145,9 @@ class DxbcShaderTranslator : public ShaderTranslator { // applied in the end of the shader). uint32_t system_temp_position_; - // Color outputs in pixel shaders (because of exponent bias, alpha test and - // remapping). - uint32_t system_temp_color_[4]; + // 4 color outputs in pixel shaders (because of exponent bias, alpha test and + // remapping, and also for ROV writing). + uint32_t system_temps_color_; // Whether the color output has been written in the execution path (ROV only). uint32_t system_temp_color_written_; // Depth value (ROV only). The meaning depends on whether the shader writes to diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index 4a023da4b..404ff7e6a 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -899,7 +899,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { shader_code_.push_back(gamma_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; - CompletePixelShader_GammaCorrect(system_temp_color_[i], true); + CompletePixelShader_GammaCorrect(system_temps_color_ + i, true); shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ++stat_.instruction_count; @@ -948,7 +948,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { shader_code_.push_back(remap_movc_mask_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(system_temp_color_[j]); + shader_code_.push_back(system_temps_color_ + j); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(remap_movc_target_temp); @@ -5139,7 +5139,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { shader_code_.push_back(src_color_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(system_temp_color_[i]); + shader_code_.push_back(system_temps_color_ + i); ++stat_.instruction_count; ++stat_.mov_instruction_count; @@ -5488,7 +5488,7 @@ void DxbcShaderTranslator::CompletePixelShader() { shader_code_.push_back(alpha_test_reg); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); - shader_code_.push_back(system_temp_color_[0]); + shader_code_.push_back(system_temps_color_); shader_code_.push_back(EncodeVectorSelectOperand( D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_AlphaTestRange_Comp, 3)); shader_code_.push_back(cbuffer_index_system_constants_); @@ -5510,7 +5510,7 @@ void DxbcShaderTranslator::CompletePixelShader() { shader_code_.push_back(kSysConst_AlphaTestRange_Vec); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); - shader_code_.push_back(system_temp_color_[0]); + shader_code_.push_back(system_temps_color_); ++stat_.instruction_count; ++stat_.float_instruction_count; // Check if both tests have passed and the alpha is in the range. @@ -5587,10 +5587,10 @@ void DxbcShaderTranslator::CompletePixelShader() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(system_temp_color_[i]); + shader_code_.push_back(system_temps_color_ + i); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(system_temp_color_[i]); + shader_code_.push_back(system_temps_color_ + i); shader_code_.push_back(EncodeVectorReplicatedOperand( D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3)); shader_code_.push_back(cbuffer_index_system_constants_);