[GPU] PS and rasterization disabling cleanup

This commit is contained in:
Triang3l 2020-12-24 23:40:38 +03:00
parent 733efa5ff8
commit dcde08a493
8 changed files with 318 additions and 129 deletions

View File

@ -101,6 +101,10 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
uint32_t D3D12CommandProcessor::GetCurrentColorMask( uint32_t D3D12CommandProcessor::GetCurrentColorMask(
uint32_t shader_writes_color_targets) const { uint32_t shader_writes_color_targets) const {
auto& regs = *register_file_; auto& regs = *register_file_;
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
xenos::ModeControl::kColorDepth) {
return 0;
}
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF; uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
if (!(shader_writes_color_targets & (1 << i))) { if (!(shader_writes_color_targets & (1 << i))) {
@ -1801,12 +1805,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
xenos::ModeControl enable_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode; xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (enable_mode == xenos::ModeControl::kIgnore) { if (edram_mode == xenos::ModeControl::kCopy) {
// Ignored.
return true;
}
if (enable_mode == xenos::ModeControl::kCopy) {
// Special copy handling. // Special copy handling.
return IssueCopy(); return IssueCopy();
} }
@ -1818,64 +1818,60 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
return true; return true;
} }
// Shaders will have already been defined by previous loads. // Vertex shader.
// We need them to do just about anything so validate here.
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader()); auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
auto pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
if (!vertex_shader) { if (!vertex_shader) {
// Always need a vertex shader. // Always need a vertex shader.
return false; return false;
} }
// Depth-only mode doesn't need a pixel shader. pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
if (enable_mode == xenos::ModeControl::kDepth) {
pixel_shader = nullptr;
} else if (!pixel_shader) {
// Need a pixel shader in normal color mode.
return false;
}
// Gather shader ucode information to get the color mask, which is needed by
// the render target cache, and memexport configuration, and also get the
// current shader modification bits.
DxbcShaderTranslator::Modification vertex_shader_modification;
DxbcShaderTranslator::Modification pixel_shader_modification;
if (!pipeline_cache_->AnalyzeShaderUcodeAndGetCurrentModifications(
vertex_shader, pixel_shader, vertex_shader_modification,
pixel_shader_modification)) {
return false;
}
D3D12Shader::D3D12Translation* vertex_shader_translation =
static_cast<D3D12Shader::D3D12Translation*>(
vertex_shader->GetOrCreateTranslation(
vertex_shader_modification.value));
D3D12Shader::D3D12Translation* pixel_shader_translation =
pixel_shader ? static_cast<D3D12Shader::D3D12Translation*>(
pixel_shader->GetOrCreateTranslation(
pixel_shader_modification.value))
: nullptr;
bool tessellated = vertex_shader_modification.host_vertex_shader_type !=
Shader::HostVertexShaderType::kVertex;
// Check if memexport is used. If it is, we can't skip draw calls that have no
// visual effect.
bool memexport_used_vertex = bool memexport_used_vertex =
!vertex_shader->memexport_stream_constants().empty(); !vertex_shader->memexport_stream_constants().empty();
bool memexport_used_pixel = DxbcShaderTranslator::Modification vertex_shader_modification;
pixel_shader != nullptr && pipeline_cache_->GetCurrentShaderModification(*vertex_shader,
!pixel_shader->memexport_stream_constants().empty(); vertex_shader_modification);
bool memexport_used = memexport_used_vertex || memexport_used_pixel; bool tessellated = vertex_shader_modification.host_vertex_shader_type !=
Shader::HostVertexShaderType::kVertex;
bool primitive_polygonal = bool primitive_polygonal =
xenos::IsPrimitivePolygonal(tessellated, primitive_type); xenos::IsPrimitivePolygonal(tessellated, primitive_type);
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>(); // Pixel shader.
if (!memexport_used_vertex && D3D12Shader* pixel_shader = nullptr;
(sq_program_cntl.vs_export_mode == if (draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal)) {
xenos::VertexShaderExportMode::kMultipass || // See xenos::ModeControl for explanation why the pixel shader is only used
(primitive_polygonal && pa_su_sc_mode_cntl.cull_front && // when it's kColorDepth here.
pa_su_sc_mode_cntl.cull_back))) { if (edram_mode == xenos::ModeControl::kColorDepth) {
// All faces are culled - can't be expressed in the pipeline. pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
return true; if (pixel_shader) {
pipeline_cache_->AnalyzeShaderUcode(*pixel_shader);
if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader,
regs)) {
pixel_shader = nullptr;
}
}
}
} else {
// Disabling pixel shader for this case is also required by the pipeline
// cache.
if (!memexport_used_vertex) {
// This draw has no effect.
return true;
}
} }
bool memexport_used_pixel;
DxbcShaderTranslator::Modification pixel_shader_modification;
if (pixel_shader) {
memexport_used_pixel = !pixel_shader->memexport_stream_constants().empty();
if (!pipeline_cache_->GetCurrentShaderModification(
*pixel_shader, pixel_shader_modification)) {
return false;
}
} else {
memexport_used_pixel = false;
pixel_shader_modification = DxbcShaderTranslator::Modification(0);
}
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
BeginSubmission(true); BeginSubmission(true);
@ -1953,6 +1949,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
} }
// Translate the shaders and create the pipeline if needed. // Translate the shaders and create the pipeline if needed.
D3D12Shader::D3D12Translation* vertex_shader_translation =
static_cast<D3D12Shader::D3D12Translation*>(
vertex_shader->GetOrCreateTranslation(
vertex_shader_modification.value));
D3D12Shader::D3D12Translation* pixel_shader_translation =
pixel_shader ? static_cast<D3D12Shader::D3D12Translation*>(
pixel_shader->GetOrCreateTranslation(
pixel_shader_modification.value))
: nullptr;
void* pipeline_handle; void* pipeline_handle;
ID3D12RootSignature* root_signature; ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline( if (!pipeline_cache_->ConfigurePipeline(
@ -2844,7 +2849,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
Register stencil_ref_mask_reg; Register stencil_ref_mask_reg;
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>(); auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
if (primitive_polygonal && if (primitive_polygonal &&
regs.Get<reg::RB_DEPTHCONTROL>().backface_enable && draw_util::GetDepthControlForCurrentEdramMode(regs).backface_enable &&
pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) { pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) {
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
} else { } else {
@ -2880,7 +2885,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>(); auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>(); auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>(); auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>(); auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
auto rb_stencilrefmask_bf = auto rb_stencilrefmask_bf =
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF); regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
@ -3068,24 +3073,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
} }
// Conversion to Direct3D 12 normalized device coordinates. // Conversion to Direct3D 12 normalized device coordinates.
// Kill all primitives if multipass or both faces are culled, but still need for (uint32_t i = 0; i < 3; ++i) {
// to do memexport. dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i];
if (sq_program_cntl.vs_export_mode == dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i];
xenos::VertexShaderExportMode::kMultipass || system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i];
(primitive_polygonal && pa_su_sc_mode_cntl.cull_front && system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
pa_su_sc_mode_cntl.cull_back)) {
float nan_value = std::nanf("");
for (uint32_t i = 0; i < 3; ++i) {
dirty |= !std::isnan(system_constants_.ndc_scale[i]);
system_constants_.ndc_scale[i] = nan_value;
}
} else {
for (uint32_t i = 0; i < 3; ++i) {
dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i];
dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i];
system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i];
system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
}
} }
// Point size. // Point size.

View File

@ -33,6 +33,7 @@
#include "xenia/base/string_buffer.h" #include "xenia/base/string_buffer.h"
#include "xenia/base/xxhash.h" #include "xenia/base/xxhash.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/draw_util.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
#include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/ui/d3d12/d3d12_util.h"
@ -857,32 +858,30 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
return shader; return shader;
} }
bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications( bool PipelineCache::GetCurrentShaderModification(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, const Shader& shader,
DxbcShaderTranslator::Modification& vertex_shader_modification_out, DxbcShaderTranslator::Modification& modification_out) const {
DxbcShaderTranslator::Modification& pixel_shader_modification_out) { assert_true(shader.is_ucode_analyzed());
Shader::HostVertexShaderType host_vertex_shader_type =
GetCurrentHostVertexShaderTypeIfValid();
if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) {
return false;
}
const auto& regs = register_file_; const auto& regs = register_file_;
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>(); auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
if (shader.type() == xenos::ShaderType::kVertex) {
vertex_shader->AnalyzeUcode(ucode_disasm_buffer_); Shader::HostVertexShaderType host_vertex_shader_type =
vertex_shader_modification_out = DxbcShaderTranslator::Modification( GetCurrentHostVertexShaderTypeIfValid();
shader_translator_->GetDefaultModification( if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) {
xenos::ShaderType::kVertex, return false;
vertex_shader->GetDynamicAddressableRegisterCount( }
sq_program_cntl.vs_num_reg), modification_out = DxbcShaderTranslator::Modification(
host_vertex_shader_type)); shader_translator_->GetDefaultModification(
xenos::ShaderType::kVertex,
if (pixel_shader) { shader.GetDynamicAddressableRegisterCount(
pixel_shader->AnalyzeUcode(ucode_disasm_buffer_); sq_program_cntl.vs_num_reg),
host_vertex_shader_type));
} else {
assert_true(shader.type() == xenos::ShaderType::kPixel);
DxbcShaderTranslator::Modification pixel_shader_modification( DxbcShaderTranslator::Modification pixel_shader_modification(
shader_translator_->GetDefaultModification( shader_translator_->GetDefaultModification(
xenos::ShaderType::kPixel, xenos::ShaderType::kPixel,
pixel_shader->GetDynamicAddressableRegisterCount( shader.GetDynamicAddressableRegisterCount(
sq_program_cntl.ps_num_reg))); sq_program_cntl.ps_num_reg)));
if (!edram_rov_used_) { if (!edram_rov_used_) {
using DepthStencilMode = using DepthStencilMode =
@ -891,7 +890,7 @@ bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
flags::DepthFloat24Conversion::kOnOutputTruncating || flags::DepthFloat24Conversion::kOnOutputTruncating ||
depth_float24_conversion_ == depth_float24_conversion_ ==
flags::DepthFloat24Conversion::kOnOutputRounding) && flags::DepthFloat24Conversion::kOnOutputRounding) &&
regs.Get<reg::RB_DEPTHCONTROL>().z_enable && draw_util::GetDepthControlForCurrentEdramMode(regs).z_enable &&
regs.Get<reg::RB_DEPTH_INFO>().depth_format == regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
xenos::DepthRenderTargetFormat::kD24FS8) { xenos::DepthRenderTargetFormat::kD24FS8) {
pixel_shader_modification.depth_stencil_mode = pixel_shader_modification.depth_stencil_mode =
@ -900,11 +899,10 @@ bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
? DepthStencilMode::kFloat24Truncating ? DepthStencilMode::kFloat24Truncating
: DepthStencilMode::kFloat24Rounding; : DepthStencilMode::kFloat24Rounding;
} else { } else {
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>(); if (shader.implicit_early_z_write_allowed() &&
if (pixel_shader->implicit_early_z_write_allowed() && (!shader.writes_color_target(0) ||
(!rb_colorcontrol.alpha_test_enable || !draw_util::DoesCoverageDependOnAlpha(
rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) && regs.Get<reg::RB_COLORCONTROL>()))) {
!rb_colorcontrol.alpha_to_mask_enable) {
pixel_shader_modification.depth_stencil_mode = pixel_shader_modification.depth_stencil_mode =
DepthStencilMode::kEarlyHint; DepthStencilMode::kEarlyHint;
} else { } else {
@ -913,11 +911,7 @@ bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
} }
} }
} }
pixel_shader_modification_out = pixel_shader_modification; modification_out = pixel_shader_modification;
} else {
pixel_shader_modification_out = DxbcShaderTranslator::Modification(
shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel,
0));
} }
return true; return true;
} }
@ -1336,6 +1330,21 @@ bool PipelineCache::GetCurrentStateDescription(
bool tessellated = bool tessellated =
DxbcShaderTranslator::Modification(vertex_shader->modification()) DxbcShaderTranslator::Modification(vertex_shader->modification())
.host_vertex_shader_type != Shader::HostVertexShaderType::kVertex; .host_vertex_shader_type != Shader::HostVertexShaderType::kVertex;
bool primitive_polygonal =
xenos::IsPrimitivePolygonal(tessellated, primitive_type);
bool rasterization_enabled =
draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal);
// In Direct3D, rasterization (along with pixel counting) is disabled by
// disabling the pixel shader and depth / stencil. However, if rasterization
// should be disabled, the pixel shader must be disabled externally, to ensure
// things like texture binding layout is correct for the shader actually being
// used (don't replace anything here).
if (!rasterization_enabled) {
assert_null(pixel_shader);
if (pixel_shader) {
return false;
}
}
// Root signature. // Root signature.
runtime_description_out.root_signature = command_processor_.GetRootSignature( runtime_description_out.root_signature = command_processor_.GetRootSignature(
@ -1347,17 +1356,11 @@ bool PipelineCache::GetCurrentStateDescription(
return false; return false;
} }
// Shaders. // Vertex shader.
runtime_description_out.vertex_shader = vertex_shader; runtime_description_out.vertex_shader = vertex_shader;
description_out.vertex_shader_hash = description_out.vertex_shader_hash =
vertex_shader->shader().ucode_data_hash(); vertex_shader->shader().ucode_data_hash();
description_out.vertex_shader_modification = vertex_shader->modification(); description_out.vertex_shader_modification = vertex_shader->modification();
if (pixel_shader) {
runtime_description_out.pixel_shader = pixel_shader;
description_out.pixel_shader_hash =
pixel_shader->shader().ucode_data_hash();
description_out.pixel_shader_modification = pixel_shader->modification();
}
// Index buffer strip cut value. // Index buffer strip cut value.
if (pa_su_sc_mode_cntl.multi_prim_ib_ena) { if (pa_su_sc_mode_cntl.multi_prim_ib_ena) {
@ -1411,8 +1414,20 @@ bool PipelineCache::GetCurrentStateDescription(
} }
} }
bool primitive_polygonal = // The rest doesn't matter when rasterization is disabled (thus no writing to
xenos::IsPrimitivePolygonal(tessellated, primitive_type); // anywhere from post-geometry stages and no samples are counted).
if (!rasterization_enabled) {
description_out.cull_mode = PipelineCullMode::kDisableRasterization;
return true;
}
// Pixel shader.
if (pixel_shader) {
runtime_description_out.pixel_shader = pixel_shader;
description_out.pixel_shader_hash =
pixel_shader->shader().ucode_data_hash();
description_out.pixel_shader_modification = pixel_shader->modification();
}
// Rasterizer state. // Rasterizer state.
// Because Direct3D 12 doesn't support per-side fill mode and depth bias, the // Because Direct3D 12 doesn't support per-side fill mode and depth bias, the
@ -1428,7 +1443,8 @@ bool PipelineCache::GetCurrentStateDescription(
// developer didn't want to fill the whole primitive and use wireframe (like // developer didn't want to fill the whole primitive and use wireframe (like
// Xenos fill mode 1). // Xenos fill mode 1).
// Here we also assume that only one side is culled - if two sides are culled, // Here we also assume that only one side is culled - if two sides are culled,
// the D3D12 command processor will drop such draw early. // rasterization will be disabled externally, or the draw call will be dropped
// early if the vertex shader doesn't export to memory.
bool cull_front, cull_back; bool cull_front, cull_back;
float poly_offset = 0.0f, poly_offset_scale = 0.0f; float poly_offset = 0.0f, poly_offset_scale = 0.0f;
if (primitive_polygonal) { if (primitive_polygonal) {
@ -1436,6 +1452,9 @@ bool PipelineCache::GetCurrentStateDescription(
cull_front = pa_su_sc_mode_cntl.cull_front != 0; cull_front = pa_su_sc_mode_cntl.cull_front != 0;
cull_back = pa_su_sc_mode_cntl.cull_back != 0; cull_back = pa_su_sc_mode_cntl.cull_back != 0;
if (cull_front) { if (cull_front) {
// The case when both faces are culled should be handled by disabling
// rasterization.
assert_false(cull_back);
description_out.cull_mode = PipelineCullMode::kFront; description_out.cull_mode = PipelineCullMode::kFront;
} else if (cull_back) { } else if (cull_back) {
description_out.cull_mode = PipelineCullMode::kBack; description_out.cull_mode = PipelineCullMode::kBack;
@ -1522,7 +1541,8 @@ bool PipelineCache::GetCurrentStateDescription(
// Depth/stencil. No stencil, always passing depth test and no depth writing // Depth/stencil. No stencil, always passing depth test and no depth writing
// means depth disabled. // means depth disabled.
if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) { if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) {
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>(); auto rb_depthcontrol =
draw_util::GetDepthControlForCurrentEdramMode(regs);
if (rb_depthcontrol.z_enable) { if (rb_depthcontrol.z_enable) {
description_out.depth_func = rb_depthcontrol.zfunc; description_out.depth_func = rb_depthcontrol.zfunc;
description_out.depth_write = rb_depthcontrol.z_write_enable; description_out.depth_write = rb_depthcontrol.z_write_enable;
@ -1864,6 +1884,9 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
break; break;
default: default:
assert_true(description.cull_mode == PipelineCullMode::kNone ||
description.cull_mode ==
PipelineCullMode::kDisableRasterization);
state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
break; break;
} }
@ -1990,6 +2013,23 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
} }
} }
// Disable rasterization if needed (parameter combinations that make no
// difference when rasterization is disabled have already been handled in
// GetCurrentStateDescription) the way it's disabled in Direct3D by design
// (disabling a pixel shader and depth / stencil).
// TODO(Triang3l): When it happens to be that a combination of parameters
// (no host pixel shader and depth / stencil without ROV) would disable
// rasterization when it's still needed (for occlusion query sample counting),
// ensure rasterization happens (by binding an empty pixel shader, or maybe
// via ForcedSampleCount when not using 2x MSAA - its requirements for
// OMSetRenderTargets need some investigation though).
if (description.cull_mode == PipelineCullMode::kDisableRasterization) {
state_desc.PS.pShaderBytecode = nullptr;
state_desc.PS.BytecodeLength = 0;
state_desc.DepthStencilState.DepthEnable = FALSE;
state_desc.DepthStencilState.StencilEnable = FALSE;
}
// Create the D3D12 pipeline state object. // Create the D3D12 pipeline state object.
auto device = auto device =
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();

View File

@ -63,14 +63,19 @@ class PipelineCache {
D3D12Shader* LoadShader(xenos::ShaderType shader_type, D3D12Shader* LoadShader(xenos::ShaderType shader_type,
const uint32_t* host_address, uint32_t dword_count); const uint32_t* host_address, uint32_t dword_count);
// Analyze shader microcode on the translator thread.
void AnalyzeShaderUcode(Shader& shader) {
shader.AnalyzeUcode(ucode_disasm_buffer_);
}
// Ensures microcode is analyzed, retrieves the shader modifications for the // Retrieves the shader modification for the current state, and returns
// current state, and returns whether they are valid. // whether it is valid. The shader must have microcode analyzed.
bool AnalyzeShaderUcodeAndGetCurrentModifications( bool PipelineCache::GetCurrentShaderModification(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, const Shader& shader,
DxbcShaderTranslator::Modification& vertex_shader_modification_out, DxbcShaderTranslator::Modification& modification_out) const;
DxbcShaderTranslator::Modification& pixel_shader_modification_out);
// If draw_util::IsRasterizationPotentiallyDone is false, the pixel shader
// MUST be made nullptr BEFORE calling this!
bool ConfigurePipeline( bool ConfigurePipeline(
D3D12Shader::D3D12Translation* vertex_shader, D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader, D3D12Shader::D3D12Translation* pixel_shader,
@ -134,6 +139,8 @@ class PipelineCache {
kNone, kNone,
kFront, kFront,
kBack, kBack,
// Special case, handled via disabling the pixel shader and depth / stencil.
kDisableRasterization,
}; };
enum class PipelineBlendFactor : uint32_t { enum class PipelineBlendFactor : uint32_t {
@ -234,6 +241,8 @@ class PipelineCache {
IDxcUtils* dxc_utils = nullptr, IDxcUtils* dxc_utils = nullptr,
IDxcCompiler* dxc_compiler = nullptr); IDxcCompiler* dxc_compiler = nullptr);
// If draw_util::IsRasterizationPotentiallyDone is false, the pixel shader
// MUST be made nullptr BEFORE calling this!
bool GetCurrentStateDescription( bool GetCurrentStateDescription(
D3D12Shader::D3D12Translation* vertex_shader, D3D12Shader::D3D12Translation* vertex_shader,
D3D12Shader::D3D12Translation* pixel_shader, D3D12Shader::D3D12Translation* pixel_shader,

View File

@ -647,7 +647,7 @@ bool RenderTargetCache::UpdateRenderTargets(
formats_are_64bpp[i] = xenos::IsColorRenderTargetFormat64bpp( formats_are_64bpp[i] = xenos::IsColorRenderTargetFormat64bpp(
xenos::ColorRenderTargetFormat(formats[i])); xenos::ColorRenderTargetFormat(formats[i]));
} }
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>(); auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>(); auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
// 0x1 = stencil test, 0x2 = depth test. // 0x1 = stencil test, 0x2 = depth test.
enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable; enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;

View File

@ -111,6 +111,78 @@ int32_t FloatToD3D11Fixed16p8(float f32) {
return result.s; return result.s;
} }
bool IsRasterizationPotentiallyDone(const RegisterFile& regs,
bool primitive_polygonal) {
// TODO(Triang3l): Investigate ModeControl::kIgnore better, with respect to
// sample counting. Let's assume sample counting is a part of depth / stencil,
// thus disabled too.
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode != xenos::ModeControl::kColorDepth &&
edram_mode != xenos::ModeControl::kDepth) {
return false;
}
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
if (sq_program_cntl.vs_export_mode ==
xenos::VertexShaderExportMode::kMultipass) {
return false;
}
if (primitive_polygonal) {
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
if (pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back) {
// Both faces are culled.
return false;
}
}
return true;
}
bool IsPixelShaderNeededWithRasterization(const Shader& shader,
const RegisterFile& regs) {
assert_true(shader.type() == xenos::ShaderType::kPixel);
assert_true(shader.is_ucode_analyzed());
// See xenos::ModeControl for explanation why the pixel shader is only used
// when it's kColorDepth here.
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
xenos::ModeControl::kColorDepth) {
return false;
}
// Discarding (explicitly or through alphatest or alpha to coverage) has side
// effects on pixel counting.
//
// Depth output only really matters if depth test is active, but it's used
// extremely rarely, and pretty much always intentionally - for simplicity,
// consider it as always mattering.
//
// Memory export is an obvious intentional side effect.
if (shader.kills_pixels() || shader.writes_depth() ||
!shader.memexport_stream_constants().empty() ||
(shader.writes_color_target(0) &&
DoesCoverageDependOnAlpha(regs.Get<reg::RB_COLORCONTROL>()))) {
return true;
}
// Check if a color target is actually written.
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t rts_remaining = shader.writes_color_targets();
uint32_t rt_index;
while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
rts_remaining &= ~(uint32_t(1) << rt_index);
uint32_t format_component_count = GetColorRenderTargetFormatComponentCount(
regs.Get<reg::RB_COLOR_INFO>(
reg::RB_COLOR_INFO::rt_register_indices[rt_index])
.color_format);
if ((rb_color_mask >> (rt_index * 4)) &
((uint32_t(1) << format_component_count) - 1)) {
return true;
}
}
// Only depth / stencil passthrough potentially.
return false;
}
void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
float pixel_size_y, bool origin_bottom_left, float pixel_size_y, bool origin_bottom_left,
float x_max, float y_max, bool allow_reverse_z, float x_max, float y_max, bool allow_reverse_z,
@ -271,7 +343,8 @@ void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
ndc_scale_z = -ndc_scale_z; ndc_scale_z = -ndc_scale_z;
ndc_offset_z = 1.0f - ndc_offset_z; ndc_offset_z = 1.0f - ndc_offset_z;
} }
if (convert_z_to_float24 && regs.Get<reg::RB_DEPTHCONTROL>().z_enable && if (convert_z_to_float24 &&
GetDepthControlForCurrentEdramMode(regs).z_enable &&
regs.Get<reg::RB_DEPTH_INFO>().depth_format == regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
xenos::DepthRenderTargetFormat::kD24FS8) { xenos::DepthRenderTargetFormat::kD24FS8) {
// Need to adjust the bounds that the resulting depth values will be clamped // Need to adjust the bounds that the resulting depth values will be clamped

View File

@ -16,6 +16,7 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
#include "xenia/gpu/registers.h" #include "xenia/gpu/registers.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/trace_writer.h" #include "xenia/gpu/trace_writer.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/memory.h" #include "xenia/memory.h"
@ -33,6 +34,45 @@ namespace draw_util {
// for use with the top-left rasterization rule later. // for use with the top-left rasterization rule later.
int32_t FloatToD3D11Fixed16p8(float f32); int32_t FloatToD3D11Fixed16p8(float f32);
// Whether with the current state, any samples to rasterize (for any reason, not
// only to write something to a render target, but also to do sample counting or
// pixel shader memexport) can be generated. Finally dropping draw calls can
// only be done if the vertex shader doesn't memexport.
bool IsRasterizationPotentiallyDone(const RegisterFile& regs,
bool primitive_polygonal);
inline reg::RB_DEPTHCONTROL GetDepthControlForCurrentEdramMode(
const RegisterFile& regs) {
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode != xenos::ModeControl::kColorDepth &&
edram_mode != xenos::ModeControl::kDepth) {
// Both depth and stencil disabled (EDRAM depth and stencil ignored).
reg::RB_DEPTHCONTROL disabled;
disabled.value = 0;
return disabled;
}
return regs.Get<reg::RB_DEPTHCONTROL>();
}
inline bool DoesCoverageDependOnAlpha(reg::RB_COLORCONTROL rb_colorcontrol) {
return (rb_colorcontrol.alpha_test_enable &&
rb_colorcontrol.alpha_func != xenos::CompareFunction::kAlways) ||
rb_colorcontrol.alpha_to_mask_enable;
}
// Whether the pixel shader can be disabled on the host to speed up depth
// pre-passes and shadowmaps. The shader must have its ucode analyzed. If
// IsRasterizationPotentiallyDone, this shouldn't be called, and assumed false
// instead. Helps reject the pixel shader in some cases - memexport draws in
// Halo 3, and also most of some 1-point draws not covering anything done for
// some reason in different games with a leftover pixel shader from the previous
// draw, but with SQ_PROGRAM_CNTL destroyed, reducing the number of
// unpredictable unneeded translations of random shaders with different host
// modification bits, such as register count and depth format-related (though
// shaders with side effects on depth or memory export will still be preserved).
bool IsPixelShaderNeededWithRasterization(const Shader& shader,
const RegisterFile& regs);
struct ViewportInfo { struct ViewportInfo {
// The returned viewport will always be in the positive quarter-plane for // The returned viewport will always be in the positive quarter-plane for
// simplicity of clamping to the maximum size supported by the host, negative // simplicity of clamping to the maximum size supported by the host, negative

View File

@ -892,11 +892,11 @@ class Shader {
// TODO(Triang3l): Investigate what happens to memexport when the pixel // TODO(Triang3l): Investigate what happens to memexport when the pixel
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early // fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
// depth/stencil. // depth/stencil.
return !writes_depth() && !kills_pixels() && return !kills_pixels() && !writes_depth() &&
memexport_stream_constants().empty(); memexport_stream_constants().empty();
} }
// Whether each color render target is written to on any exection path. // Whether each color render target is written to on any execution path.
uint32_t writes_color_targets() const { return writes_color_targets_; } uint32_t writes_color_targets() const { return writes_color_targets_; }
bool writes_color_target(uint32_t i) const { bool writes_color_target(uint32_t i) const {
return (writes_color_targets() & (uint32_t(1) << i)) != 0; return (writes_color_targets() & (uint32_t(1) << i)) != 0;
@ -954,8 +954,9 @@ class Shader {
// compiled when a new material appears in the game, and having the order of // compiled when a new material appears in the game, and having the order of
// draws also matter in such unpredictable way would break this rule; limit // draws also matter in such unpredictable way would break this rule; limit
// the effect to shaders with dynamic register addressing only, which are // the effect to shaders with dynamic register addressing only, which are
// extremely rare), also some info needed for drawing is collected during the // extremely rare; however care should be taken regarding depth format-related
// ucode analysis. // translation modifications in this case), also some info needed for drawing
// is collected during the ucode analysis.
bool is_ucode_analyzed_ = false; bool is_ucode_analyzed_ = false;
std::string ucode_disassembly_; std::string ucode_disassembly_;

View File

@ -297,6 +297,20 @@ constexpr bool IsColorRenderTargetFormat64bpp(ColorRenderTargetFormat format) {
format == ColorRenderTargetFormat::k_32_32_FLOAT; format == ColorRenderTargetFormat::k_32_32_FLOAT;
} }
inline uint32_t GetColorRenderTargetFormatComponentCount(
ColorRenderTargetFormat format) {
switch (format) {
case ColorRenderTargetFormat::k_32_FLOAT:
return 1;
case ColorRenderTargetFormat::k_16_16:
case ColorRenderTargetFormat::k_16_16_FLOAT:
case ColorRenderTargetFormat::k_32_32_FLOAT:
return 2;
default:
return 4;
}
}
enum class DepthRenderTargetFormat : uint32_t { enum class DepthRenderTargetFormat : uint32_t {
kD24S8 = 0, kD24S8 = 0,
// 20e4 [0, 2). // 20e4 [0, 2).
@ -749,6 +763,26 @@ enum class PolygonType : uint32_t {
enum class ModeControl : uint32_t { enum class ModeControl : uint32_t {
kIgnore = 0, kIgnore = 0,
kColorDepth = 4, kColorDepth = 4,
// TODO(Triang3l): Verify whether kDepth means the pixel shader is ignored
// completely even if it writes depth, exports to memory or kills pixels.
// Hints suggesting that it should be completely ignored (which is desirable
// on real hardware to avoid scheduling the pixel shader at all and waiting
// for it especially since the Xbox 360 doesn't have early per-sample depth /
// stencil, only early hi-Z / hi-stencil, and other registers possibly
// toggling pixel shader execution are yet to be found):
// - Most of depth pre-pass draws in Call of Duty 4 use the kDepth more with
// a `oC0 = tfetch2D(tf0, r0.xy) * r1` shader, some use `oC0 = r0` though.
// However, when alphatested surfaces are drawn, kColorDepth is explicitly
// used with the same shader performing the texture fetch.
// - Red Dead Redemption has some kDepth draws with alphatest enabled, but the
// shader is `oC0 = r0`, which makes no sense (alphatest based on an
// interpolant from the vertex shader) as no texture alpha cutout is
// involved.
// - Red Dead Redemption also has kDepth draws with pretty complex shaders
// clearly for use only in the color pass - even fetching and filtering a
// shadowmap.
// For now, based on these, let's assume the pixel shader is never used with
// kDepth.
kDepth = 5, kDepth = 5,
kCopy = 6, kCopy = 6,
}; };