[GPU] Cleanup RB_COLOR_MASK and RB_DEPTHCONTROL normalization
This commit is contained in:
parent
8ca67b8aa7
commit
8d07c79897
|
@ -103,22 +103,6 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
|
|||
render_target_cache_->RestoreEdramSnapshot(snapshot);
|
||||
}
|
||||
|
||||
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
|
||||
uint32_t shader_writes_color_targets) const {
|
||||
auto& regs = *register_file_;
|
||||
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
|
||||
xenos::ModeControl::kColorDepth) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!(shader_writes_color_targets & (1 << i))) {
|
||||
color_mask &= ~(0xF << (i * 4));
|
||||
}
|
||||
}
|
||||
return color_mask;
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::PushTransitionBarrier(
|
||||
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
||||
D3D12_RESOURCE_STATES new_state, UINT subresource) {
|
||||
|
@ -2152,10 +2136,12 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
: DxbcShaderTranslator::Modification(0);
|
||||
|
||||
// Set up the render targets - this may perform dispatches and draws.
|
||||
uint32_t pixel_shader_writes_color_targets =
|
||||
pixel_shader ? pixel_shader->writes_color_targets() : 0;
|
||||
uint32_t normalized_color_mask =
|
||||
pixel_shader ? draw_util::GetNormalizedColorMask(
|
||||
regs, pixel_shader->writes_color_targets())
|
||||
: 0;
|
||||
if (!render_target_cache_->Update(is_rasterization_done,
|
||||
pixel_shader_writes_color_targets)) {
|
||||
normalized_color_mask)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2186,7 +2172,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
ID3D12RootSignature* root_signature;
|
||||
if (!pipeline_cache_->ConfigurePipeline(
|
||||
vertex_shader_translation, pixel_shader_translation,
|
||||
primitive_processing_result, bound_depth_and_color_render_target_bits,
|
||||
primitive_processing_result, normalized_color_mask,
|
||||
bound_depth_and_color_render_target_bits,
|
||||
bound_depth_and_color_render_target_formats, &pipeline_handle,
|
||||
&root_signature)) {
|
||||
return false;
|
||||
|
@ -2241,9 +2228,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
memexport_used, primitive_polygonal,
|
||||
primitive_processing_result.line_loop_closing_index,
|
||||
primitive_processing_result.host_index_endian, viewport_info,
|
||||
used_texture_mask,
|
||||
pixel_shader ? GetCurrentColorMask(pixel_shader->writes_color_targets())
|
||||
: 0);
|
||||
used_texture_mask, normalized_color_mask);
|
||||
|
||||
// Update constant buffers, descriptors and root parameters.
|
||||
if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) {
|
||||
|
@ -3114,7 +3099,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
bool shared_memory_is_uav, bool primitive_polygonal,
|
||||
uint32_t line_loop_closing_index, xenos::Endian index_endian,
|
||||
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
|
||||
uint32_t color_mask) {
|
||||
uint32_t normalized_color_mask) {
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
@ -3161,7 +3146,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// Get the mask for keeping previous color's components unmodified,
|
||||
// or two UINT32_MAX if no colors actually existing in the RT are written.
|
||||
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
|
||||
color_info.color_format, (color_mask >> (i * 4)) & 0b1111,
|
||||
color_info.color_format, (normalized_color_mask >> (i * 4)) & 0b1111,
|
||||
rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
|
||||
rt_keep_masks[i][0], rt_keep_masks[i][1]);
|
||||
}
|
||||
|
|
|
@ -83,16 +83,6 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
uint64_t GetCurrentFrame() const { return frame_current_; }
|
||||
uint64_t GetCompletedFrame() const { return frame_completed_; }
|
||||
|
||||
// Gets the current color write mask, taking the pixel shader's write mask
|
||||
// into account. If a shader doesn't write to a render target, it shouldn't be
|
||||
// written to and it shouldn't be even bound - otherwise, in 4D5307E6, one
|
||||
// render target is being destroyed by a shader not writing anything, and in
|
||||
// 58410955, the result of clearing the top tile is being ignored because
|
||||
// there are 4 render targets bound with the same EDRAM base (clearly not
|
||||
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
|
||||
// conflict with each other.
|
||||
uint32_t GetCurrentColorMask(uint32_t shader_writes_color_targets) const;
|
||||
|
||||
void PushTransitionBarrier(
|
||||
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
||||
D3D12_RESOURCE_STATES new_state,
|
||||
|
@ -362,7 +352,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
xenos::Endian index_endian,
|
||||
const draw_util::ViewportInfo& viewport_info,
|
||||
uint32_t used_texture_mask,
|
||||
uint32_t color_mask);
|
||||
uint32_t normalized_color_mask);
|
||||
bool UpdateBindings(const D3D12Shader* vertex_shader,
|
||||
const D3D12Shader* pixel_shader,
|
||||
ID3D12RootSignature* root_signature);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -934,6 +934,7 @@ bool PipelineCache::ConfigurePipeline(
|
|||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
uint32_t normalized_color_mask,
|
||||
uint32_t bound_depth_and_color_render_target_bits,
|
||||
const uint32_t* bound_depth_and_color_render_target_formats,
|
||||
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
|
||||
|
@ -1005,7 +1006,7 @@ bool PipelineCache::ConfigurePipeline(
|
|||
PipelineRuntimeDescription runtime_description;
|
||||
if (!GetCurrentStateDescription(
|
||||
vertex_shader, pixel_shader, primitive_processing_result,
|
||||
bound_depth_and_color_render_target_bits,
|
||||
normalized_color_mask, bound_depth_and_color_render_target_bits,
|
||||
bound_depth_and_color_render_target_formats, runtime_description)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -1272,6 +1273,7 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
uint32_t normalized_color_mask,
|
||||
uint32_t bound_depth_and_color_render_target_bits,
|
||||
const uint32_t* bound_depth_and_color_render_target_formats,
|
||||
PipelineRuntimeDescription& runtime_description_out) {
|
||||
|
@ -1547,10 +1549,6 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
|
||||
// Render targets and blending state. 32 because of 0x1F mask, for safety
|
||||
// (all unknown to zero).
|
||||
uint32_t color_mask =
|
||||
pixel_shader ? command_processor_.GetCurrentColorMask(
|
||||
pixel_shader->shader().writes_color_targets())
|
||||
: 0;
|
||||
static const PipelineBlendFactor kBlendFactorMap[32] = {
|
||||
/* 0 */ PipelineBlendFactor::kZero,
|
||||
/* 1 */ PipelineBlendFactor::kOne,
|
||||
|
@ -1622,8 +1620,7 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
||||
rt.format = xenos::ColorRenderTargetFormat(
|
||||
bound_depth_and_color_render_target_formats[1 + i]);
|
||||
// TODO(Triang3l): Normalize unused bits of the color write mask.
|
||||
rt.write_mask = (color_mask >> (i * 4)) & 0xF;
|
||||
rt.write_mask = (normalized_color_mask >> (i * 4)) & 0xF;
|
||||
if (rt.write_mask) {
|
||||
auto blendcontrol = regs.Get<reg::RB_BLENDCONTROL>(
|
||||
reg::RB_BLENDCONTROL::rt_register_indices[i]);
|
||||
|
@ -2017,9 +2014,6 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
|
|||
}
|
||||
D3D12_RENDER_TARGET_BLEND_DESC& blend_desc =
|
||||
state_desc.BlendState.RenderTarget[i];
|
||||
// Treat 1 * src + 0 * dest as disabled blending (there are opaque
|
||||
// surfaces drawn with blending enabled, but it's 1 * src + 0 * dest, in
|
||||
// 415607E6 - GPU performance is better when not blending.
|
||||
if (rt.src_blend != PipelineBlendFactor::kOne ||
|
||||
rt.dest_blend != PipelineBlendFactor::kZero ||
|
||||
rt.blend_op != xenos::BlendOp::kAdd ||
|
||||
|
|
|
@ -82,6 +82,7 @@ class PipelineCache {
|
|||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
uint32_t normalized_color_mask,
|
||||
uint32_t bound_depth_and_color_render_target_bits,
|
||||
const uint32_t* bound_depth_and_color_render_targets_formats,
|
||||
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
|
||||
|
@ -247,6 +248,7 @@ class PipelineCache {
|
|||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
uint32_t normalized_color_mask,
|
||||
uint32_t bound_depth_and_color_render_target_bits,
|
||||
const uint32_t* bound_depth_and_color_render_target_formats,
|
||||
PipelineRuntimeDescription& runtime_description_out);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -550,6 +550,49 @@ void GetScissor(const RegisterFile& regs, Scissor& scissor_out,
|
|||
scissor_out.extent[1] = uint32_t(br_y - tl_y);
|
||||
}
|
||||
|
||||
uint32_t GetNormalizedColorMask(const RegisterFile& regs,
|
||||
uint32_t pixel_shader_writes_color_targets) {
|
||||
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
|
||||
xenos::ModeControl::kColorDepth) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t normalized_color_mask = 0;
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
||||
// Exclude the render targets not statically written to by the pixel shader.
|
||||
// If the shader doesn't write to a render target, it shouldn't be written
|
||||
// to, and no ownership transfers should happen to it on the host even -
|
||||
// otherwise, in 4D5307E6, one render target is being destroyed by a shader
|
||||
// not writing anything, and in 58410955, the result of clearing the top
|
||||
// tile is being ignored because there are 4 render targets bound with the
|
||||
// same EDRAM base (clearly not correct usage), but the shader only clears
|
||||
// 1, and then ownership of EDRAM portions by host render targets is
|
||||
// conflicting.
|
||||
if (!(pixel_shader_writes_color_targets & (uint32_t(1) << i))) {
|
||||
continue;
|
||||
}
|
||||
// Check if any existing component is written to.
|
||||
uint32_t format_component_mask =
|
||||
(uint32_t(1) << xenos::GetColorRenderTargetFormatComponentCount(
|
||||
regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[i])
|
||||
.color_format)) -
|
||||
1;
|
||||
uint32_t rt_write_mask = (rb_color_mask >> (4 * i)) & format_component_mask;
|
||||
if (!rt_write_mask) {
|
||||
continue;
|
||||
}
|
||||
// Mark the non-existent components as written so in the host driver, no
|
||||
// slow path (involving reading and merging components) is taken if the
|
||||
// driver doesn't perform this check internally, and some components are not
|
||||
// included in the mask even though they actually don't exist in the format.
|
||||
rt_write_mask |= 0b1111 & ~format_component_mask;
|
||||
// Add to the normalized mask.
|
||||
normalized_color_mask |= rt_write_mask << (4 * i);
|
||||
}
|
||||
return normalized_color_mask;
|
||||
}
|
||||
|
||||
xenos::CopySampleSelect SanitizeCopySampleSelect(
|
||||
xenos::CopySampleSelect copy_sample_select, xenos::MsaaSamples msaa_samples,
|
||||
bool is_depth) {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -186,6 +186,17 @@ struct Scissor {
|
|||
void GetScissor(const RegisterFile& regs, Scissor& scissor_out,
|
||||
bool clamp_to_surface_pitch = true);
|
||||
|
||||
// Returns the color component write mask for the draw command taking into
|
||||
// account which color targets are written to by the pixel shader, as well as
|
||||
// components that don't exist in the formats of the render targets (render
|
||||
// targets with only non-existent components written are skipped, but
|
||||
// non-existent components are forced to written if some existing components of
|
||||
// the render target are actually used to make sure the host driver doesn't try
|
||||
// to take a slow path involving reading and mixing if there are any disabled
|
||||
// components even if they don't actually exist).
|
||||
uint32_t GetNormalizedColorMask(const RegisterFile& regs,
|
||||
uint32_t pixel_shader_writes_color_targets);
|
||||
|
||||
// Scales, and shift amounts of the upper 32 bits of the 32x32=64-bit
|
||||
// multiplication result, for fast division and multiplication by
|
||||
// EDRAM-tile-related amounts.
|
||||
|
|
|
@ -366,7 +366,7 @@ void RenderTargetCache::ClearCache() {
|
|||
void RenderTargetCache::BeginFrame() { ResetAccumulatedRenderTargets(); }
|
||||
|
||||
bool RenderTargetCache::Update(bool is_rasterization_done,
|
||||
uint32_t shader_writes_color_targets) {
|
||||
uint32_t normalized_color_mask) {
|
||||
const RegisterFile& regs = register_file();
|
||||
bool interlock_barrier_only = GetPath() == Path::kPixelShaderInterlock;
|
||||
|
||||
|
@ -419,9 +419,6 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t rts_remaining;
|
||||
uint32_t rt_index;
|
||||
|
||||
// Get used render targets.
|
||||
// [0] is depth / stencil where relevant, [1...4] is color.
|
||||
// Depth / stencil testing / writing is before color in the pipeline.
|
||||
|
@ -432,7 +429,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
|
|||
uint32_t rts_are_64bpp = 0;
|
||||
uint32_t color_rts_are_gamma = 0;
|
||||
if (is_rasterization_done) {
|
||||
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
|
||||
auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
|
||||
if (rb_depthcontrol.z_enable || rb_depthcontrol.stencil_enable) {
|
||||
depth_and_color_rts_used_bits |= 1;
|
||||
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||
|
@ -445,50 +442,46 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
|
|||
resource_formats[0] =
|
||||
interlock_barrier_only ? 0 : uint32_t(rb_depth_info.depth_format);
|
||||
}
|
||||
if (regs.Get<reg::RB_MODECONTROL>().edram_mode ==
|
||||
xenos::ModeControl::kColorDepth) {
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
rts_remaining = shader_writes_color_targets;
|
||||
while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
|
||||
rts_remaining &= ~(uint32_t(1) << rt_index);
|
||||
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[rt_index]);
|
||||
xenos::ColorRenderTargetFormat color_format =
|
||||
regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[rt_index])
|
||||
.color_format;
|
||||
if ((rb_color_mask >> (rt_index * 4)) &
|
||||
((uint32_t(1) << xenos::GetColorRenderTargetFormatComponentCount(
|
||||
color_format)) -
|
||||
1)) {
|
||||
uint32_t rt_bit_index = 1 + rt_index;
|
||||
depth_and_color_rts_used_bits |= uint32_t(1) << rt_bit_index;
|
||||
edram_bases[rt_bit_index] =
|
||||
std::min(color_info.color_base, xenos::kEdramTileCount);
|
||||
bool is_64bpp = xenos::IsColorRenderTargetFormat64bpp(color_format);
|
||||
if (is_64bpp) {
|
||||
rts_are_64bpp |= uint32_t(1) << rt_bit_index;
|
||||
}
|
||||
if (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
|
||||
color_rts_are_gamma |= uint32_t(1) << rt_index;
|
||||
}
|
||||
xenos::ColorRenderTargetFormat color_resource_format;
|
||||
if (interlock_barrier_only) {
|
||||
// Only changes in mapping between coordinates and addresses are
|
||||
// interesting (along with access overlap between draw calls), thus
|
||||
// only pixel size is relevant.
|
||||
color_resource_format =
|
||||
is_64bpp ? xenos::ColorRenderTargetFormat::k_16_16_16_16
|
||||
: xenos::ColorRenderTargetFormat::k_8_8_8_8;
|
||||
} else {
|
||||
color_resource_format = GetColorResourceFormat(
|
||||
xenos::GetStorageColorFormat(color_format));
|
||||
}
|
||||
resource_formats[rt_bit_index] = uint32_t(color_resource_format);
|
||||
}
|
||||
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
||||
if (!(normalized_color_mask & (uint32_t(0b1111) << (4 * i)))) {
|
||||
continue;
|
||||
}
|
||||
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
||||
uint32_t rt_bit_index = 1 + i;
|
||||
depth_and_color_rts_used_bits |= uint32_t(1) << rt_bit_index;
|
||||
edram_bases[rt_bit_index] =
|
||||
std::min(color_info.color_base, xenos::kEdramTileCount);
|
||||
xenos::ColorRenderTargetFormat color_format =
|
||||
regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[i])
|
||||
.color_format;
|
||||
bool is_64bpp = xenos::IsColorRenderTargetFormat64bpp(color_format);
|
||||
if (is_64bpp) {
|
||||
rts_are_64bpp |= uint32_t(1) << rt_bit_index;
|
||||
}
|
||||
if (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
|
||||
color_rts_are_gamma |= uint32_t(1) << i;
|
||||
}
|
||||
xenos::ColorRenderTargetFormat color_resource_format;
|
||||
if (interlock_barrier_only) {
|
||||
// Only changes in mapping between coordinates and addresses are
|
||||
// interesting (along with access overlap between draw calls), thus only
|
||||
// pixel size is relevant.
|
||||
color_resource_format =
|
||||
is_64bpp ? xenos::ColorRenderTargetFormat::k_16_16_16_16
|
||||
: xenos::ColorRenderTargetFormat::k_8_8_8_8;
|
||||
} else {
|
||||
color_resource_format =
|
||||
GetColorResourceFormat(xenos::GetStorageColorFormat(color_format));
|
||||
}
|
||||
resource_formats[rt_bit_index] = uint32_t(color_resource_format);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t rts_remaining;
|
||||
uint32_t rt_index;
|
||||
|
||||
// Eliminate other bound render targets if their EDRAM base conflicts with
|
||||
// another render target - it's an error in most host implementations to bind
|
||||
// the same render target into multiple slots, also the behavior would be
|
||||
|
|
|
@ -215,7 +215,7 @@ class RenderTargetCache {
|
|||
virtual void BeginFrame();
|
||||
|
||||
virtual bool Update(bool is_rasterization_done,
|
||||
uint32_t shader_writes_color_targets);
|
||||
uint32_t normalized_color_mask);
|
||||
|
||||
// Returns bits where 0 is whether a depth render target is currently bound on
|
||||
// the host and 1... are whether the same applies to color render targets, and
|
||||
|
|
Loading…
Reference in New Issue