diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index fcd8ff6c8..ad928b47a 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -59,6 +59,21 @@ ID3D12GraphicsCommandList1* D3D12CommandProcessor::GetCurrentCommandList1() return command_lists_[current_queue_frame_]->GetCommandList1(); } +uint32_t D3D12CommandProcessor::GetCurrentColorMask( + const D3D12Shader* pixel_shader) const { + if (pixel_shader == nullptr) { + return 0; + } + auto& regs = *register_file_; + uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF; + for (uint32_t i = 0; i < 4; ++i) { + if (!pixel_shader->writes_color_target(i)) { + color_mask &= ~(0xF << (i * 4)); + } + } + return color_mask; +} + void D3D12CommandProcessor::PushTransitionBarrier( ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, D3D12_RESOURCE_STATES new_state, UINT subresource) { @@ -848,12 +863,6 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Doesn't actually draw. return true; } - uint32_t color_mask = enable_mode == xenos::ModeControl::kColorDepth ? - regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF : 0; - if (!color_mask && !(regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x1 | 0x4))) { - // Not writing to color, depth or doing stencil test, so doesn't draw. - return true; - } if ((regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 && primitive_type != PrimitiveType::kPointList && primitive_type != PrimitiveType::kRectangleList) { @@ -901,11 +910,17 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return false; } + uint32_t color_mask = GetCurrentColorMask(pixel_shader); + if (!color_mask && !(regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x1 | 0x4))) { + // Not writing to color, depth or doing stencil test, so doesn't draw. + return true; + } + bool new_frame = BeginFrame(); auto command_list = GetCurrentCommandList(); // Set up the render targets - this may bind pipelines. - if (!render_target_cache_->UpdateRenderTargets()) { + if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) { // Doesn't actually draw. return true; } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 0784c48c5..54528eefb 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -48,6 +48,16 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12GraphicsCommandList* GetCurrentCommandList() const; ID3D12GraphicsCommandList1* GetCurrentCommandList1() const; + // Gets the current color write mask, taking the pixel shader's write mask + // into account. If a shader doesn't write to a render target, it shouldn't be + // written to and it shouldn't be even bound - otherwise, in Halo 3, one + // render target is being destroyed by a shader not writing anything, and in + // Banjo-Tooie, the result of clearing the top tile is being ignored because + // there are 4 render targets bound with the same EDRAM base (clearly not + // correct usage), but the shader only clears 1, and then EDRAM buffer stores + // conflict with each other. + uint32_t GetCurrentColorMask(const D3D12Shader* pixel_shader) const; + void PushTransitionBarrier( ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, D3D12_RESOURCE_STATES new_state, diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 50c8a69c0..c9cc754ce 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -335,20 +335,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendStateAndRenderTargets( dirty |= regs.render_targets[i].format != render_targets[i].format; regs.render_targets[i].format = render_targets[i].format; } - uint32_t color_mask; - if (pixel_shader != nullptr) { - color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF; - for (uint32_t i = 0; i < 4; ++i) { - // If the pixel shader doesn't write to a render target, writing to it is - // disabled in the blend state. Otherwise, in Halo 3, one important render - // target is destroyed by a shader not writing to one of the outputs. - if (!pixel_shader->writes_color_target(i)) { - color_mask &= ~(0xF << (i * 4)); - } - } - } else { - color_mask = 0; - } + uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader); dirty |= regs.color_mask != color_mask; regs.color_mask = color_mask; bool blend_enable = diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index aa657ec9d..9e87058dc 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -323,7 +323,7 @@ void RenderTargetCache::BeginFrame() { // stable D24F==D32F comparison. } -bool RenderTargetCache::UpdateRenderTargets() { +bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // There are two kinds of render target binding updates in this implementation // in case something has been changed - full and partial. // @@ -416,16 +416,12 @@ bool RenderTargetCache::UpdateRenderTargets() { uint32_t edram_bases[5]; uint32_t formats[5]; bool formats_are_64bpp[5]; - uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; - if (xenos::ModeControl(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7) != - xenos::ModeControl::kColorDepth) { - rb_color_mask = 0; - } + uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader); uint32_t rb_color_info[4] = { regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32}; for (uint32_t i = 0; i < 4; ++i) { - enabled[i] = (rb_color_mask & (0xF << (i * 4))) != 0; + enabled[i] = (color_mask & (0xF << (i * 4))) != 0; edram_bases[i] = std::min(rb_color_info[i] & 0xFFF, 2048u); formats[i] = (rb_color_info[i] >> 16) & 0xF; formats_are_64bpp[i] = @@ -2027,7 +2023,13 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { if (current_bindings_[a].edram_base < current_bindings_[b].edram_base) { return true; } - return a < b; + // If EDRAM bases are the same (not really a valid usage, but happens in + // Banjo-Tooie - in case color writing was enabled for invalid render + // targets in some draw call), treat the render targets with the lowest + // index as more important (it's the primary one after all, while the + // rest are additional). Also treat the depth buffer as highest-priority + // (in the comparison, treat depth as 0 and color as 1-4). + return ((a + 1) % 5) > ((b + 1) % 5); }); // Calculate the dispatch width. diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index 0a66a20c8..df4c12120 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -12,6 +12,7 @@ #include +#include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/d3d12/shared_memory.h" #include "xenia/gpu/d3d12/texture_cache.h" #include "xenia/gpu/register_file.h" @@ -209,7 +210,7 @@ class RenderTargetCache { void BeginFrame(); // Called in the beginning of a draw call - may bind pipelines. - bool UpdateRenderTargets(); + bool UpdateRenderTargets(const D3D12Shader* pixel_shader); // Returns the host-to-guest mappings and host formats of currently bound // render targets for pipeline creation and remapping in shaders. They are // consecutive, and format DXGI_FORMAT_UNKNOWN terminates the list. Depth