[GPU] Cleanup RB_COLOR_MASK and RB_DEPTHCONTROL normalization

This commit is contained in:
Triang3l 2022-02-13 20:50:31 +03:00
parent 8ca67b8aa7
commit 8d07c79897
8 changed files with 114 additions and 96 deletions

View File

@ -103,22 +103,6 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
render_target_cache_->RestoreEdramSnapshot(snapshot);
}
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
uint32_t shader_writes_color_targets) const {
auto& regs = *register_file_;
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
xenos::ModeControl::kColorDepth) {
return 0;
}
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
for (uint32_t i = 0; i < 4; ++i) {
if (!(shader_writes_color_targets & (1 << i))) {
color_mask &= ~(0xF << (i * 4));
}
}
return color_mask;
}
void D3D12CommandProcessor::PushTransitionBarrier(
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
D3D12_RESOURCE_STATES new_state, UINT subresource) {
@ -2152,10 +2136,12 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
: DxbcShaderTranslator::Modification(0);
// Set up the render targets - this may perform dispatches and draws.
uint32_t pixel_shader_writes_color_targets =
pixel_shader ? pixel_shader->writes_color_targets() : 0;
uint32_t normalized_color_mask =
pixel_shader ? draw_util::GetNormalizedColorMask(
regs, pixel_shader->writes_color_targets())
: 0;
if (!render_target_cache_->Update(is_rasterization_done,
pixel_shader_writes_color_targets)) {
normalized_color_mask)) {
return false;
}
@ -2186,7 +2172,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline(
vertex_shader_translation, pixel_shader_translation,
primitive_processing_result, bound_depth_and_color_render_target_bits,
primitive_processing_result, normalized_color_mask,
bound_depth_and_color_render_target_bits,
bound_depth_and_color_render_target_formats, &pipeline_handle,
&root_signature)) {
return false;
@ -2241,9 +2228,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
memexport_used, primitive_polygonal,
primitive_processing_result.line_loop_closing_index,
primitive_processing_result.host_index_endian, viewport_info,
used_texture_mask,
pixel_shader ? GetCurrentColorMask(pixel_shader->writes_color_targets())
: 0);
used_texture_mask, normalized_color_mask);
// Update constant buffers, descriptors and root parameters.
if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) {
@ -3114,7 +3099,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
bool shared_memory_is_uav, bool primitive_polygonal,
uint32_t line_loop_closing_index, xenos::Endian index_endian,
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
uint32_t color_mask) {
uint32_t normalized_color_mask) {
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
@ -3161,7 +3146,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Get the mask for keeping previous color's components unmodified,
// or two UINT32_MAX if no colors actually existing in the RT are written.
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
color_info.color_format, (color_mask >> (i * 4)) & 0b1111,
color_info.color_format, (normalized_color_mask >> (i * 4)) & 0b1111,
rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
rt_keep_masks[i][0], rt_keep_masks[i][1]);
}

View File

@ -83,16 +83,6 @@ class D3D12CommandProcessor : public CommandProcessor {
uint64_t GetCurrentFrame() const { return frame_current_; }
uint64_t GetCompletedFrame() const { return frame_completed_; }
// Gets the current color write mask, taking the pixel shader's write mask
// into account. If a shader doesn't write to a render target, it shouldn't be
// written to and it shouldn't be even bound - otherwise, in 4D5307E6, one
// render target is being destroyed by a shader not writing anything, and in
// 58410955, the result of clearing the top tile is being ignored because
// there are 4 render targets bound with the same EDRAM base (clearly not
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
// conflict with each other.
uint32_t GetCurrentColorMask(uint32_t shader_writes_color_targets) const;
void PushTransitionBarrier(
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
D3D12_RESOURCE_STATES new_state,
@ -362,7 +352,7 @@ class D3D12CommandProcessor : public CommandProcessor {
xenos::Endian index_endian,
const draw_util::ViewportInfo& viewport_info,
uint32_t used_texture_mask,
uint32_t color_mask);
uint32_t normalized_color_mask);
bool UpdateBindings(const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader,
ID3D12RootSignature* root_signature);

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -934,6 +934,7 @@ bool PipelineCache::ConfigurePipeline(
D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader,
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
uint32_t normalized_color_mask,
uint32_t bound_depth_and_color_render_target_bits,
const uint32_t* bound_depth_and_color_render_target_formats,
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
@ -1005,7 +1006,7 @@ bool PipelineCache::ConfigurePipeline(
PipelineRuntimeDescription runtime_description;
if (!GetCurrentStateDescription(
vertex_shader, pixel_shader, primitive_processing_result,
bound_depth_and_color_render_target_bits,
normalized_color_mask, bound_depth_and_color_render_target_bits,
bound_depth_and_color_render_target_formats, runtime_description)) {
return false;
}
@ -1272,6 +1273,7 @@ bool PipelineCache::GetCurrentStateDescription(
D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader,
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
uint32_t normalized_color_mask,
uint32_t bound_depth_and_color_render_target_bits,
const uint32_t* bound_depth_and_color_render_target_formats,
PipelineRuntimeDescription& runtime_description_out) {
@ -1547,10 +1549,6 @@ bool PipelineCache::GetCurrentStateDescription(
// Render targets and blending state. 32 because of 0x1F mask, for safety
// (all unknown to zero).
uint32_t color_mask =
pixel_shader ? command_processor_.GetCurrentColorMask(
pixel_shader->shader().writes_color_targets())
: 0;
static const PipelineBlendFactor kBlendFactorMap[32] = {
/* 0 */ PipelineBlendFactor::kZero,
/* 1 */ PipelineBlendFactor::kOne,
@ -1622,8 +1620,7 @@ bool PipelineCache::GetCurrentStateDescription(
reg::RB_COLOR_INFO::rt_register_indices[i]);
rt.format = xenos::ColorRenderTargetFormat(
bound_depth_and_color_render_target_formats[1 + i]);
// TODO(Triang3l): Normalize unused bits of the color write mask.
rt.write_mask = (color_mask >> (i * 4)) & 0xF;
rt.write_mask = (normalized_color_mask >> (i * 4)) & 0xF;
if (rt.write_mask) {
auto blendcontrol = regs.Get<reg::RB_BLENDCONTROL>(
reg::RB_BLENDCONTROL::rt_register_indices[i]);
@ -2017,9 +2014,6 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
}
D3D12_RENDER_TARGET_BLEND_DESC& blend_desc =
state_desc.BlendState.RenderTarget[i];
// Treat 1 * src + 0 * dest as disabled blending (there are opaque
// surfaces drawn with blending enabled, but it's 1 * src + 0 * dest, in
// 415607E6 - GPU performance is better when not blending.
if (rt.src_blend != PipelineBlendFactor::kOne ||
rt.dest_blend != PipelineBlendFactor::kZero ||
rt.blend_op != xenos::BlendOp::kAdd ||

View File

@ -82,6 +82,7 @@ class PipelineCache {
D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader,
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
uint32_t normalized_color_mask,
uint32_t bound_depth_and_color_render_target_bits,
const uint32_t* bound_depth_and_color_render_targets_formats,
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
@ -247,6 +248,7 @@ class PipelineCache {
D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader,
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
uint32_t normalized_color_mask,
uint32_t bound_depth_and_color_render_target_bits,
const uint32_t* bound_depth_and_color_render_target_formats,
PipelineRuntimeDescription& runtime_description_out);

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -550,6 +550,49 @@ void GetScissor(const RegisterFile& regs, Scissor& scissor_out,
scissor_out.extent[1] = uint32_t(br_y - tl_y);
}
uint32_t GetNormalizedColorMask(const RegisterFile& regs,
uint32_t pixel_shader_writes_color_targets) {
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
xenos::ModeControl::kColorDepth) {
return 0;
}
uint32_t normalized_color_mask = 0;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
// Exclude the render targets not statically written to by the pixel shader.
// If the shader doesn't write to a render target, it shouldn't be written
// to, and no ownership transfers should happen to it on the host even -
// otherwise, in 4D5307E6, one render target is being destroyed by a shader
// not writing anything, and in 58410955, the result of clearing the top
// tile is being ignored because there are 4 render targets bound with the
// same EDRAM base (clearly not correct usage), but the shader only clears
// 1, and then ownership of EDRAM portions by host render targets is
// conflicting.
if (!(pixel_shader_writes_color_targets & (uint32_t(1) << i))) {
continue;
}
// Check if any existing component is written to.
uint32_t format_component_mask =
(uint32_t(1) << xenos::GetColorRenderTargetFormatComponentCount(
regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[i])
.color_format)) -
1;
uint32_t rt_write_mask = (rb_color_mask >> (4 * i)) & format_component_mask;
if (!rt_write_mask) {
continue;
}
// Mark the non-existent components as written so in the host driver, no
// slow path (involving reading and merging components) is taken if the
// driver doesn't perform this check internally, and some components are not
// included in the mask even though they actually don't exist in the format.
rt_write_mask |= 0b1111 & ~format_component_mask;
// Add to the normalized mask.
normalized_color_mask |= rt_write_mask << (4 * i);
}
return normalized_color_mask;
}
xenos::CopySampleSelect SanitizeCopySampleSelect(
xenos::CopySampleSelect copy_sample_select, xenos::MsaaSamples msaa_samples,
bool is_depth) {

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -186,6 +186,17 @@ struct Scissor {
void GetScissor(const RegisterFile& regs, Scissor& scissor_out,
bool clamp_to_surface_pitch = true);
// Returns the color component write mask for the draw command taking into
// account which color targets are written to by the pixel shader, as well as
// components that don't exist in the formats of the render targets (render
// targets with only non-existent components written are skipped, but
// non-existent components are forced to written if some existing components of
// the render target are actually used to make sure the host driver doesn't try
// to take a slow path involving reading and mixing if there are any disabled
// components even if they don't actually exist).
uint32_t GetNormalizedColorMask(const RegisterFile& regs,
uint32_t pixel_shader_writes_color_targets);
// Scales, and shift amounts of the upper 32 bits of the 32x32=64-bit
// multiplication result, for fast division and multiplication by
// EDRAM-tile-related amounts.

View File

@ -366,7 +366,7 @@ void RenderTargetCache::ClearCache() {
void RenderTargetCache::BeginFrame() { ResetAccumulatedRenderTargets(); }
bool RenderTargetCache::Update(bool is_rasterization_done,
uint32_t shader_writes_color_targets) {
uint32_t normalized_color_mask) {
const RegisterFile& regs = register_file();
bool interlock_barrier_only = GetPath() == Path::kPixelShaderInterlock;
@ -419,9 +419,6 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
}
}
uint32_t rts_remaining;
uint32_t rt_index;
// Get used render targets.
// [0] is depth / stencil where relevant, [1...4] is color.
// Depth / stencil testing / writing is before color in the pipeline.
@ -432,7 +429,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
uint32_t rts_are_64bpp = 0;
uint32_t color_rts_are_gamma = 0;
if (is_rasterization_done) {
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
if (rb_depthcontrol.z_enable || rb_depthcontrol.stencil_enable) {
depth_and_color_rts_used_bits |= 1;
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
@ -445,50 +442,46 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
resource_formats[0] =
interlock_barrier_only ? 0 : uint32_t(rb_depth_info.depth_format);
}
if (regs.Get<reg::RB_MODECONTROL>().edram_mode ==
xenos::ModeControl::kColorDepth) {
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
rts_remaining = shader_writes_color_targets;
while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
rts_remaining &= ~(uint32_t(1) << rt_index);
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[rt_index]);
xenos::ColorRenderTargetFormat color_format =
regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[rt_index])
.color_format;
if ((rb_color_mask >> (rt_index * 4)) &
((uint32_t(1) << xenos::GetColorRenderTargetFormatComponentCount(
color_format)) -
1)) {
uint32_t rt_bit_index = 1 + rt_index;
depth_and_color_rts_used_bits |= uint32_t(1) << rt_bit_index;
edram_bases[rt_bit_index] =
std::min(color_info.color_base, xenos::kEdramTileCount);
bool is_64bpp = xenos::IsColorRenderTargetFormat64bpp(color_format);
if (is_64bpp) {
rts_are_64bpp |= uint32_t(1) << rt_bit_index;
}
if (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
color_rts_are_gamma |= uint32_t(1) << rt_index;
}
xenos::ColorRenderTargetFormat color_resource_format;
if (interlock_barrier_only) {
// Only changes in mapping between coordinates and addresses are
// interesting (along with access overlap between draw calls), thus
// only pixel size is relevant.
color_resource_format =
is_64bpp ? xenos::ColorRenderTargetFormat::k_16_16_16_16
: xenos::ColorRenderTargetFormat::k_8_8_8_8;
} else {
color_resource_format = GetColorResourceFormat(
xenos::GetStorageColorFormat(color_format));
}
resource_formats[rt_bit_index] = uint32_t(color_resource_format);
}
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
if (!(normalized_color_mask & (uint32_t(0b1111) << (4 * i)))) {
continue;
}
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[i]);
uint32_t rt_bit_index = 1 + i;
depth_and_color_rts_used_bits |= uint32_t(1) << rt_bit_index;
edram_bases[rt_bit_index] =
std::min(color_info.color_base, xenos::kEdramTileCount);
xenos::ColorRenderTargetFormat color_format =
regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[i])
.color_format;
bool is_64bpp = xenos::IsColorRenderTargetFormat64bpp(color_format);
if (is_64bpp) {
rts_are_64bpp |= uint32_t(1) << rt_bit_index;
}
if (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
color_rts_are_gamma |= uint32_t(1) << i;
}
xenos::ColorRenderTargetFormat color_resource_format;
if (interlock_barrier_only) {
// Only changes in mapping between coordinates and addresses are
// interesting (along with access overlap between draw calls), thus only
// pixel size is relevant.
color_resource_format =
is_64bpp ? xenos::ColorRenderTargetFormat::k_16_16_16_16
: xenos::ColorRenderTargetFormat::k_8_8_8_8;
} else {
color_resource_format =
GetColorResourceFormat(xenos::GetStorageColorFormat(color_format));
}
resource_formats[rt_bit_index] = uint32_t(color_resource_format);
}
}
uint32_t rts_remaining;
uint32_t rt_index;
// Eliminate other bound render targets if their EDRAM base conflicts with
// another render target - it's an error in most host implementations to bind
// the same render target into multiple slots, also the behavior would be

View File

@ -215,7 +215,7 @@ class RenderTargetCache {
virtual void BeginFrame();
virtual bool Update(bool is_rasterization_done,
uint32_t shader_writes_color_targets);
uint32_t normalized_color_mask);
// Returns bits where 0 is whether a depth render target is currently bound on
// the host and 1... are whether the same applies to color render targets, and