[GPU] PS and rasterization disabling cleanup
This commit is contained in:
parent
733efa5ff8
commit
dcde08a493
|
@ -101,6 +101,10 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
|
|||
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
|
||||
uint32_t shader_writes_color_targets) const {
|
||||
auto& regs = *register_file_;
|
||||
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
|
||||
xenos::ModeControl::kColorDepth) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!(shader_writes_color_targets & (1 << i))) {
|
||||
|
@ -1801,12 +1805,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
xenos::ModeControl enable_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
|
||||
if (enable_mode == xenos::ModeControl::kIgnore) {
|
||||
// Ignored.
|
||||
return true;
|
||||
}
|
||||
if (enable_mode == xenos::ModeControl::kCopy) {
|
||||
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
|
||||
if (edram_mode == xenos::ModeControl::kCopy) {
|
||||
// Special copy handling.
|
||||
return IssueCopy();
|
||||
}
|
||||
|
@ -1818,64 +1818,60 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
return true;
|
||||
}
|
||||
|
||||
// Shaders will have already been defined by previous loads.
|
||||
// We need them to do just about anything so validate here.
|
||||
// Vertex shader.
|
||||
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
|
||||
auto pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
|
||||
if (!vertex_shader) {
|
||||
// Always need a vertex shader.
|
||||
return false;
|
||||
}
|
||||
// Depth-only mode doesn't need a pixel shader.
|
||||
if (enable_mode == xenos::ModeControl::kDepth) {
|
||||
pixel_shader = nullptr;
|
||||
} else if (!pixel_shader) {
|
||||
// Need a pixel shader in normal color mode.
|
||||
return false;
|
||||
}
|
||||
// Gather shader ucode information to get the color mask, which is needed by
|
||||
// the render target cache, and memexport configuration, and also get the
|
||||
// current shader modification bits.
|
||||
DxbcShaderTranslator::Modification vertex_shader_modification;
|
||||
DxbcShaderTranslator::Modification pixel_shader_modification;
|
||||
if (!pipeline_cache_->AnalyzeShaderUcodeAndGetCurrentModifications(
|
||||
vertex_shader, pixel_shader, vertex_shader_modification,
|
||||
pixel_shader_modification)) {
|
||||
return false;
|
||||
}
|
||||
D3D12Shader::D3D12Translation* vertex_shader_translation =
|
||||
static_cast<D3D12Shader::D3D12Translation*>(
|
||||
vertex_shader->GetOrCreateTranslation(
|
||||
vertex_shader_modification.value));
|
||||
D3D12Shader::D3D12Translation* pixel_shader_translation =
|
||||
pixel_shader ? static_cast<D3D12Shader::D3D12Translation*>(
|
||||
pixel_shader->GetOrCreateTranslation(
|
||||
pixel_shader_modification.value))
|
||||
: nullptr;
|
||||
bool tessellated = vertex_shader_modification.host_vertex_shader_type !=
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
|
||||
// Check if memexport is used. If it is, we can't skip draw calls that have no
|
||||
// visual effect.
|
||||
pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
|
||||
bool memexport_used_vertex =
|
||||
!vertex_shader->memexport_stream_constants().empty();
|
||||
bool memexport_used_pixel =
|
||||
pixel_shader != nullptr &&
|
||||
!pixel_shader->memexport_stream_constants().empty();
|
||||
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
|
||||
|
||||
DxbcShaderTranslator::Modification vertex_shader_modification;
|
||||
pipeline_cache_->GetCurrentShaderModification(*vertex_shader,
|
||||
vertex_shader_modification);
|
||||
bool tessellated = vertex_shader_modification.host_vertex_shader_type !=
|
||||
Shader::HostVertexShaderType::kVertex;
|
||||
bool primitive_polygonal =
|
||||
xenos::IsPrimitivePolygonal(tessellated, primitive_type);
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
if (!memexport_used_vertex &&
|
||||
(sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kMultipass ||
|
||||
(primitive_polygonal && pa_su_sc_mode_cntl.cull_front &&
|
||||
pa_su_sc_mode_cntl.cull_back))) {
|
||||
// All faces are culled - can't be expressed in the pipeline.
|
||||
|
||||
// Pixel shader.
|
||||
D3D12Shader* pixel_shader = nullptr;
|
||||
if (draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal)) {
|
||||
// See xenos::ModeControl for explanation why the pixel shader is only used
|
||||
// when it's kColorDepth here.
|
||||
if (edram_mode == xenos::ModeControl::kColorDepth) {
|
||||
pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
|
||||
if (pixel_shader) {
|
||||
pipeline_cache_->AnalyzeShaderUcode(*pixel_shader);
|
||||
if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader,
|
||||
regs)) {
|
||||
pixel_shader = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Disabling pixel shader for this case is also required by the pipeline
|
||||
// cache.
|
||||
if (!memexport_used_vertex) {
|
||||
// This draw has no effect.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
bool memexport_used_pixel;
|
||||
DxbcShaderTranslator::Modification pixel_shader_modification;
|
||||
if (pixel_shader) {
|
||||
memexport_used_pixel = !pixel_shader->memexport_stream_constants().empty();
|
||||
if (!pipeline_cache_->GetCurrentShaderModification(
|
||||
*pixel_shader, pixel_shader_modification)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
memexport_used_pixel = false;
|
||||
pixel_shader_modification = DxbcShaderTranslator::Modification(0);
|
||||
}
|
||||
|
||||
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
|
||||
|
||||
BeginSubmission(true);
|
||||
|
||||
|
@ -1953,6 +1949,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
}
|
||||
|
||||
// Translate the shaders and create the pipeline if needed.
|
||||
D3D12Shader::D3D12Translation* vertex_shader_translation =
|
||||
static_cast<D3D12Shader::D3D12Translation*>(
|
||||
vertex_shader->GetOrCreateTranslation(
|
||||
vertex_shader_modification.value));
|
||||
D3D12Shader::D3D12Translation* pixel_shader_translation =
|
||||
pixel_shader ? static_cast<D3D12Shader::D3D12Translation*>(
|
||||
pixel_shader->GetOrCreateTranslation(
|
||||
pixel_shader_modification.value))
|
||||
: nullptr;
|
||||
void* pipeline_handle;
|
||||
ID3D12RootSignature* root_signature;
|
||||
if (!pipeline_cache_->ConfigurePipeline(
|
||||
|
@ -2844,7 +2849,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
|
|||
Register stencil_ref_mask_reg;
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
if (primitive_polygonal &&
|
||||
regs.Get<reg::RB_DEPTHCONTROL>().backface_enable &&
|
||||
draw_util::GetDepthControlForCurrentEdramMode(regs).backface_enable &&
|
||||
pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) {
|
||||
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
|
||||
} else {
|
||||
|
@ -2880,7 +2885,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
|
||||
auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
|
||||
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
|
||||
auto rb_stencilrefmask_bf =
|
||||
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
|
||||
|
@ -3068,25 +3073,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
}
|
||||
|
||||
// Conversion to Direct3D 12 normalized device coordinates.
|
||||
// Kill all primitives if multipass or both faces are culled, but still need
|
||||
// to do memexport.
|
||||
if (sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kMultipass ||
|
||||
(primitive_polygonal && pa_su_sc_mode_cntl.cull_front &&
|
||||
pa_su_sc_mode_cntl.cull_back)) {
|
||||
float nan_value = std::nanf("");
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
dirty |= !std::isnan(system_constants_.ndc_scale[i]);
|
||||
system_constants_.ndc_scale[i] = nan_value;
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i];
|
||||
dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i];
|
||||
system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i];
|
||||
system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Point size.
|
||||
float point_size_x = float(pa_su_point_size.width) * 0.125f;
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/ui/d3d12/d3d12_util.h"
|
||||
|
||||
|
@ -857,32 +858,30 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
return shader;
|
||||
}
|
||||
|
||||
bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
DxbcShaderTranslator::Modification& vertex_shader_modification_out,
|
||||
DxbcShaderTranslator::Modification& pixel_shader_modification_out) {
|
||||
bool PipelineCache::GetCurrentShaderModification(
|
||||
const Shader& shader,
|
||||
DxbcShaderTranslator::Modification& modification_out) const {
|
||||
assert_true(shader.is_ucode_analyzed());
|
||||
const auto& regs = register_file_;
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
if (shader.type() == xenos::ShaderType::kVertex) {
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
GetCurrentHostVertexShaderTypeIfValid();
|
||||
if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) {
|
||||
return false;
|
||||
}
|
||||
const auto& regs = register_file_;
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
|
||||
vertex_shader->AnalyzeUcode(ucode_disasm_buffer_);
|
||||
vertex_shader_modification_out = DxbcShaderTranslator::Modification(
|
||||
modification_out = DxbcShaderTranslator::Modification(
|
||||
shader_translator_->GetDefaultModification(
|
||||
xenos::ShaderType::kVertex,
|
||||
vertex_shader->GetDynamicAddressableRegisterCount(
|
||||
shader.GetDynamicAddressableRegisterCount(
|
||||
sq_program_cntl.vs_num_reg),
|
||||
host_vertex_shader_type));
|
||||
|
||||
if (pixel_shader) {
|
||||
pixel_shader->AnalyzeUcode(ucode_disasm_buffer_);
|
||||
} else {
|
||||
assert_true(shader.type() == xenos::ShaderType::kPixel);
|
||||
DxbcShaderTranslator::Modification pixel_shader_modification(
|
||||
shader_translator_->GetDefaultModification(
|
||||
xenos::ShaderType::kPixel,
|
||||
pixel_shader->GetDynamicAddressableRegisterCount(
|
||||
shader.GetDynamicAddressableRegisterCount(
|
||||
sq_program_cntl.ps_num_reg)));
|
||||
if (!edram_rov_used_) {
|
||||
using DepthStencilMode =
|
||||
|
@ -891,7 +890,7 @@ bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
|
|||
flags::DepthFloat24Conversion::kOnOutputTruncating ||
|
||||
depth_float24_conversion_ ==
|
||||
flags::DepthFloat24Conversion::kOnOutputRounding) &&
|
||||
regs.Get<reg::RB_DEPTHCONTROL>().z_enable &&
|
||||
draw_util::GetDepthControlForCurrentEdramMode(regs).z_enable &&
|
||||
regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8) {
|
||||
pixel_shader_modification.depth_stencil_mode =
|
||||
|
@ -900,11 +899,10 @@ bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
|
|||
? DepthStencilMode::kFloat24Truncating
|
||||
: DepthStencilMode::kFloat24Rounding;
|
||||
} else {
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
if (pixel_shader->implicit_early_z_write_allowed() &&
|
||||
(!rb_colorcontrol.alpha_test_enable ||
|
||||
rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) &&
|
||||
!rb_colorcontrol.alpha_to_mask_enable) {
|
||||
if (shader.implicit_early_z_write_allowed() &&
|
||||
(!shader.writes_color_target(0) ||
|
||||
!draw_util::DoesCoverageDependOnAlpha(
|
||||
regs.Get<reg::RB_COLORCONTROL>()))) {
|
||||
pixel_shader_modification.depth_stencil_mode =
|
||||
DepthStencilMode::kEarlyHint;
|
||||
} else {
|
||||
|
@ -913,11 +911,7 @@ bool PipelineCache::AnalyzeShaderUcodeAndGetCurrentModifications(
|
|||
}
|
||||
}
|
||||
}
|
||||
pixel_shader_modification_out = pixel_shader_modification;
|
||||
} else {
|
||||
pixel_shader_modification_out = DxbcShaderTranslator::Modification(
|
||||
shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel,
|
||||
0));
|
||||
modification_out = pixel_shader_modification;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -1336,6 +1330,21 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
bool tessellated =
|
||||
DxbcShaderTranslator::Modification(vertex_shader->modification())
|
||||
.host_vertex_shader_type != Shader::HostVertexShaderType::kVertex;
|
||||
bool primitive_polygonal =
|
||||
xenos::IsPrimitivePolygonal(tessellated, primitive_type);
|
||||
bool rasterization_enabled =
|
||||
draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal);
|
||||
// In Direct3D, rasterization (along with pixel counting) is disabled by
|
||||
// disabling the pixel shader and depth / stencil. However, if rasterization
|
||||
// should be disabled, the pixel shader must be disabled externally, to ensure
|
||||
// things like texture binding layout is correct for the shader actually being
|
||||
// used (don't replace anything here).
|
||||
if (!rasterization_enabled) {
|
||||
assert_null(pixel_shader);
|
||||
if (pixel_shader) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Root signature.
|
||||
runtime_description_out.root_signature = command_processor_.GetRootSignature(
|
||||
|
@ -1347,17 +1356,11 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
return false;
|
||||
}
|
||||
|
||||
// Shaders.
|
||||
// Vertex shader.
|
||||
runtime_description_out.vertex_shader = vertex_shader;
|
||||
description_out.vertex_shader_hash =
|
||||
vertex_shader->shader().ucode_data_hash();
|
||||
description_out.vertex_shader_modification = vertex_shader->modification();
|
||||
if (pixel_shader) {
|
||||
runtime_description_out.pixel_shader = pixel_shader;
|
||||
description_out.pixel_shader_hash =
|
||||
pixel_shader->shader().ucode_data_hash();
|
||||
description_out.pixel_shader_modification = pixel_shader->modification();
|
||||
}
|
||||
|
||||
// Index buffer strip cut value.
|
||||
if (pa_su_sc_mode_cntl.multi_prim_ib_ena) {
|
||||
|
@ -1411,8 +1414,20 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
}
|
||||
}
|
||||
|
||||
bool primitive_polygonal =
|
||||
xenos::IsPrimitivePolygonal(tessellated, primitive_type);
|
||||
// The rest doesn't matter when rasterization is disabled (thus no writing to
|
||||
// anywhere from post-geometry stages and no samples are counted).
|
||||
if (!rasterization_enabled) {
|
||||
description_out.cull_mode = PipelineCullMode::kDisableRasterization;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Pixel shader.
|
||||
if (pixel_shader) {
|
||||
runtime_description_out.pixel_shader = pixel_shader;
|
||||
description_out.pixel_shader_hash =
|
||||
pixel_shader->shader().ucode_data_hash();
|
||||
description_out.pixel_shader_modification = pixel_shader->modification();
|
||||
}
|
||||
|
||||
// Rasterizer state.
|
||||
// Because Direct3D 12 doesn't support per-side fill mode and depth bias, the
|
||||
|
@ -1428,7 +1443,8 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
// developer didn't want to fill the whole primitive and use wireframe (like
|
||||
// Xenos fill mode 1).
|
||||
// Here we also assume that only one side is culled - if two sides are culled,
|
||||
// the D3D12 command processor will drop such draw early.
|
||||
// rasterization will be disabled externally, or the draw call will be dropped
|
||||
// early if the vertex shader doesn't export to memory.
|
||||
bool cull_front, cull_back;
|
||||
float poly_offset = 0.0f, poly_offset_scale = 0.0f;
|
||||
if (primitive_polygonal) {
|
||||
|
@ -1436,6 +1452,9 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
cull_front = pa_su_sc_mode_cntl.cull_front != 0;
|
||||
cull_back = pa_su_sc_mode_cntl.cull_back != 0;
|
||||
if (cull_front) {
|
||||
// The case when both faces are culled should be handled by disabling
|
||||
// rasterization.
|
||||
assert_false(cull_back);
|
||||
description_out.cull_mode = PipelineCullMode::kFront;
|
||||
} else if (cull_back) {
|
||||
description_out.cull_mode = PipelineCullMode::kBack;
|
||||
|
@ -1522,7 +1541,8 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
// Depth/stencil. No stencil, always passing depth test and no depth writing
|
||||
// means depth disabled.
|
||||
if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) {
|
||||
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
|
||||
auto rb_depthcontrol =
|
||||
draw_util::GetDepthControlForCurrentEdramMode(regs);
|
||||
if (rb_depthcontrol.z_enable) {
|
||||
description_out.depth_func = rb_depthcontrol.zfunc;
|
||||
description_out.depth_write = rb_depthcontrol.z_write_enable;
|
||||
|
@ -1864,6 +1884,9 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
|
|||
state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
|
||||
break;
|
||||
default:
|
||||
assert_true(description.cull_mode == PipelineCullMode::kNone ||
|
||||
description.cull_mode ==
|
||||
PipelineCullMode::kDisableRasterization);
|
||||
state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
break;
|
||||
}
|
||||
|
@ -1990,6 +2013,23 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
|
|||
}
|
||||
}
|
||||
|
||||
// Disable rasterization if needed (parameter combinations that make no
|
||||
// difference when rasterization is disabled have already been handled in
|
||||
// GetCurrentStateDescription) the way it's disabled in Direct3D by design
|
||||
// (disabling a pixel shader and depth / stencil).
|
||||
// TODO(Triang3l): When it happens to be that a combination of parameters
|
||||
// (no host pixel shader and depth / stencil without ROV) would disable
|
||||
// rasterization when it's still needed (for occlusion query sample counting),
|
||||
// ensure rasterization happens (by binding an empty pixel shader, or maybe
|
||||
// via ForcedSampleCount when not using 2x MSAA - its requirements for
|
||||
// OMSetRenderTargets need some investigation though).
|
||||
if (description.cull_mode == PipelineCullMode::kDisableRasterization) {
|
||||
state_desc.PS.pShaderBytecode = nullptr;
|
||||
state_desc.PS.BytecodeLength = 0;
|
||||
state_desc.DepthStencilState.DepthEnable = FALSE;
|
||||
state_desc.DepthStencilState.StencilEnable = FALSE;
|
||||
}
|
||||
|
||||
// Create the D3D12 pipeline state object.
|
||||
auto device =
|
||||
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
|
||||
|
|
|
@ -63,14 +63,19 @@ class PipelineCache {
|
|||
|
||||
D3D12Shader* LoadShader(xenos::ShaderType shader_type,
|
||||
const uint32_t* host_address, uint32_t dword_count);
|
||||
// Analyze shader microcode on the translator thread.
|
||||
void AnalyzeShaderUcode(Shader& shader) {
|
||||
shader.AnalyzeUcode(ucode_disasm_buffer_);
|
||||
}
|
||||
|
||||
// Ensures microcode is analyzed, retrieves the shader modifications for the
|
||||
// current state, and returns whether they are valid.
|
||||
bool AnalyzeShaderUcodeAndGetCurrentModifications(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
DxbcShaderTranslator::Modification& vertex_shader_modification_out,
|
||||
DxbcShaderTranslator::Modification& pixel_shader_modification_out);
|
||||
// Retrieves the shader modification for the current state, and returns
|
||||
// whether it is valid. The shader must have microcode analyzed.
|
||||
bool PipelineCache::GetCurrentShaderModification(
|
||||
const Shader& shader,
|
||||
DxbcShaderTranslator::Modification& modification_out) const;
|
||||
|
||||
// If draw_util::IsRasterizationPotentiallyDone is false, the pixel shader
|
||||
// MUST be made nullptr BEFORE calling this!
|
||||
bool ConfigurePipeline(
|
||||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
|
@ -134,6 +139,8 @@ class PipelineCache {
|
|||
kNone,
|
||||
kFront,
|
||||
kBack,
|
||||
// Special case, handled via disabling the pixel shader and depth / stencil.
|
||||
kDisableRasterization,
|
||||
};
|
||||
|
||||
enum class PipelineBlendFactor : uint32_t {
|
||||
|
@ -234,6 +241,8 @@ class PipelineCache {
|
|||
IDxcUtils* dxc_utils = nullptr,
|
||||
IDxcCompiler* dxc_compiler = nullptr);
|
||||
|
||||
// If draw_util::IsRasterizationPotentiallyDone is false, the pixel shader
|
||||
// MUST be made nullptr BEFORE calling this!
|
||||
bool GetCurrentStateDescription(
|
||||
D3D12Shader::D3D12Translation* vertex_shader,
|
||||
D3D12Shader::D3D12Translation* pixel_shader,
|
||||
|
|
|
@ -647,7 +647,7 @@ bool RenderTargetCache::UpdateRenderTargets(
|
|||
formats_are_64bpp[i] = xenos::IsColorRenderTargetFormat64bpp(
|
||||
xenos::ColorRenderTargetFormat(formats[i]));
|
||||
}
|
||||
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
|
||||
auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
|
||||
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||
// 0x1 = stencil test, 0x2 = depth test.
|
||||
enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
|
||||
|
|
|
@ -111,6 +111,78 @@ int32_t FloatToD3D11Fixed16p8(float f32) {
|
|||
return result.s;
|
||||
}
|
||||
|
||||
bool IsRasterizationPotentiallyDone(const RegisterFile& regs,
|
||||
bool primitive_polygonal) {
|
||||
// TODO(Triang3l): Investigate ModeControl::kIgnore better, with respect to
|
||||
// sample counting. Let's assume sample counting is a part of depth / stencil,
|
||||
// thus disabled too.
|
||||
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
|
||||
if (edram_mode != xenos::ModeControl::kColorDepth &&
|
||||
edram_mode != xenos::ModeControl::kDepth) {
|
||||
return false;
|
||||
}
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
if (sq_program_cntl.vs_export_mode ==
|
||||
xenos::VertexShaderExportMode::kMultipass) {
|
||||
return false;
|
||||
}
|
||||
if (primitive_polygonal) {
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
if (pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back) {
|
||||
// Both faces are culled.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsPixelShaderNeededWithRasterization(const Shader& shader,
|
||||
const RegisterFile& regs) {
|
||||
assert_true(shader.type() == xenos::ShaderType::kPixel);
|
||||
assert_true(shader.is_ucode_analyzed());
|
||||
|
||||
// See xenos::ModeControl for explanation why the pixel shader is only used
|
||||
// when it's kColorDepth here.
|
||||
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
|
||||
xenos::ModeControl::kColorDepth) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Discarding (explicitly or through alphatest or alpha to coverage) has side
|
||||
// effects on pixel counting.
|
||||
//
|
||||
// Depth output only really matters if depth test is active, but it's used
|
||||
// extremely rarely, and pretty much always intentionally - for simplicity,
|
||||
// consider it as always mattering.
|
||||
//
|
||||
// Memory export is an obvious intentional side effect.
|
||||
if (shader.kills_pixels() || shader.writes_depth() ||
|
||||
!shader.memexport_stream_constants().empty() ||
|
||||
(shader.writes_color_target(0) &&
|
||||
DoesCoverageDependOnAlpha(regs.Get<reg::RB_COLORCONTROL>()))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if a color target is actually written.
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
uint32_t rts_remaining = shader.writes_color_targets();
|
||||
uint32_t rt_index;
|
||||
while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
|
||||
rts_remaining &= ~(uint32_t(1) << rt_index);
|
||||
uint32_t format_component_count = GetColorRenderTargetFormatComponentCount(
|
||||
regs.Get<reg::RB_COLOR_INFO>(
|
||||
reg::RB_COLOR_INFO::rt_register_indices[rt_index])
|
||||
.color_format);
|
||||
if ((rb_color_mask >> (rt_index * 4)) &
|
||||
((uint32_t(1) << format_component_count) - 1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Only depth / stencil passthrough potentially.
|
||||
return false;
|
||||
}
|
||||
|
||||
void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
|
||||
float pixel_size_y, bool origin_bottom_left,
|
||||
float x_max, float y_max, bool allow_reverse_z,
|
||||
|
@ -271,7 +343,8 @@ void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x,
|
|||
ndc_scale_z = -ndc_scale_z;
|
||||
ndc_offset_z = 1.0f - ndc_offset_z;
|
||||
}
|
||||
if (convert_z_to_float24 && regs.Get<reg::RB_DEPTHCONTROL>().z_enable &&
|
||||
if (convert_z_to_float24 &&
|
||||
GetDepthControlForCurrentEdramMode(regs).z_enable &&
|
||||
regs.Get<reg::RB_DEPTH_INFO>().depth_format ==
|
||||
xenos::DepthRenderTargetFormat::kD24FS8) {
|
||||
// Need to adjust the bounds that the resulting depth values will be clamped
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "xenia/base/assert.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/memory.h"
|
||||
|
@ -33,6 +34,45 @@ namespace draw_util {
|
|||
// for use with the top-left rasterization rule later.
|
||||
int32_t FloatToD3D11Fixed16p8(float f32);
|
||||
|
||||
// Whether with the current state, any samples to rasterize (for any reason, not
|
||||
// only to write something to a render target, but also to do sample counting or
|
||||
// pixel shader memexport) can be generated. Finally dropping draw calls can
|
||||
// only be done if the vertex shader doesn't memexport.
|
||||
bool IsRasterizationPotentiallyDone(const RegisterFile& regs,
|
||||
bool primitive_polygonal);
|
||||
|
||||
inline reg::RB_DEPTHCONTROL GetDepthControlForCurrentEdramMode(
|
||||
const RegisterFile& regs) {
|
||||
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
|
||||
if (edram_mode != xenos::ModeControl::kColorDepth &&
|
||||
edram_mode != xenos::ModeControl::kDepth) {
|
||||
// Both depth and stencil disabled (EDRAM depth and stencil ignored).
|
||||
reg::RB_DEPTHCONTROL disabled;
|
||||
disabled.value = 0;
|
||||
return disabled;
|
||||
}
|
||||
return regs.Get<reg::RB_DEPTHCONTROL>();
|
||||
}
|
||||
|
||||
inline bool DoesCoverageDependOnAlpha(reg::RB_COLORCONTROL rb_colorcontrol) {
|
||||
return (rb_colorcontrol.alpha_test_enable &&
|
||||
rb_colorcontrol.alpha_func != xenos::CompareFunction::kAlways) ||
|
||||
rb_colorcontrol.alpha_to_mask_enable;
|
||||
}
|
||||
|
||||
// Whether the pixel shader can be disabled on the host to speed up depth
|
||||
// pre-passes and shadowmaps. The shader must have its ucode analyzed. If
|
||||
// IsRasterizationPotentiallyDone, this shouldn't be called, and assumed false
|
||||
// instead. Helps reject the pixel shader in some cases - memexport draws in
|
||||
// Halo 3, and also most of some 1-point draws not covering anything done for
|
||||
// some reason in different games with a leftover pixel shader from the previous
|
||||
// draw, but with SQ_PROGRAM_CNTL destroyed, reducing the number of
|
||||
// unpredictable unneeded translations of random shaders with different host
|
||||
// modification bits, such as register count and depth format-related (though
|
||||
// shaders with side effects on depth or memory export will still be preserved).
|
||||
bool IsPixelShaderNeededWithRasterization(const Shader& shader,
|
||||
const RegisterFile& regs);
|
||||
|
||||
struct ViewportInfo {
|
||||
// The returned viewport will always be in the positive quarter-plane for
|
||||
// simplicity of clamping to the maximum size supported by the host, negative
|
||||
|
|
|
@ -892,11 +892,11 @@ class Shader {
|
|||
// TODO(Triang3l): Investigate what happens to memexport when the pixel
|
||||
// fails the depth/stencil test, but in Direct3D 11 UAV writes disable early
|
||||
// depth/stencil.
|
||||
return !writes_depth() && !kills_pixels() &&
|
||||
return !kills_pixels() && !writes_depth() &&
|
||||
memexport_stream_constants().empty();
|
||||
}
|
||||
|
||||
// Whether each color render target is written to on any exection path.
|
||||
// Whether each color render target is written to on any execution path.
|
||||
uint32_t writes_color_targets() const { return writes_color_targets_; }
|
||||
bool writes_color_target(uint32_t i) const {
|
||||
return (writes_color_targets() & (uint32_t(1) << i)) != 0;
|
||||
|
@ -954,8 +954,9 @@ class Shader {
|
|||
// compiled when a new material appears in the game, and having the order of
|
||||
// draws also matter in such unpredictable way would break this rule; limit
|
||||
// the effect to shaders with dynamic register addressing only, which are
|
||||
// extremely rare), also some info needed for drawing is collected during the
|
||||
// ucode analysis.
|
||||
// extremely rare; however care should be taken regarding depth format-related
|
||||
// translation modifications in this case), also some info needed for drawing
|
||||
// is collected during the ucode analysis.
|
||||
bool is_ucode_analyzed_ = false;
|
||||
|
||||
std::string ucode_disassembly_;
|
||||
|
|
|
@ -297,6 +297,20 @@ constexpr bool IsColorRenderTargetFormat64bpp(ColorRenderTargetFormat format) {
|
|||
format == ColorRenderTargetFormat::k_32_32_FLOAT;
|
||||
}
|
||||
|
||||
inline uint32_t GetColorRenderTargetFormatComponentCount(
|
||||
ColorRenderTargetFormat format) {
|
||||
switch (format) {
|
||||
case ColorRenderTargetFormat::k_32_FLOAT:
|
||||
return 1;
|
||||
case ColorRenderTargetFormat::k_16_16:
|
||||
case ColorRenderTargetFormat::k_16_16_FLOAT:
|
||||
case ColorRenderTargetFormat::k_32_32_FLOAT:
|
||||
return 2;
|
||||
default:
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
enum class DepthRenderTargetFormat : uint32_t {
|
||||
kD24S8 = 0,
|
||||
// 20e4 [0, 2).
|
||||
|
@ -749,6 +763,26 @@ enum class PolygonType : uint32_t {
|
|||
enum class ModeControl : uint32_t {
|
||||
kIgnore = 0,
|
||||
kColorDepth = 4,
|
||||
// TODO(Triang3l): Verify whether kDepth means the pixel shader is ignored
|
||||
// completely even if it writes depth, exports to memory or kills pixels.
|
||||
// Hints suggesting that it should be completely ignored (which is desirable
|
||||
// on real hardware to avoid scheduling the pixel shader at all and waiting
|
||||
// for it especially since the Xbox 360 doesn't have early per-sample depth /
|
||||
// stencil, only early hi-Z / hi-stencil, and other registers possibly
|
||||
// toggling pixel shader execution are yet to be found):
|
||||
// - Most of depth pre-pass draws in Call of Duty 4 use the kDepth more with
|
||||
// a `oC0 = tfetch2D(tf0, r0.xy) * r1` shader, some use `oC0 = r0` though.
|
||||
// However, when alphatested surfaces are drawn, kColorDepth is explicitly
|
||||
// used with the same shader performing the texture fetch.
|
||||
// - Red Dead Redemption has some kDepth draws with alphatest enabled, but the
|
||||
// shader is `oC0 = r0`, which makes no sense (alphatest based on an
|
||||
// interpolant from the vertex shader) as no texture alpha cutout is
|
||||
// involved.
|
||||
// - Red Dead Redemption also has kDepth draws with pretty complex shaders
|
||||
// clearly for use only in the color pass - even fetching and filtering a
|
||||
// shadowmap.
|
||||
// For now, based on these, let's assume the pixel shader is never used with
|
||||
// kDepth.
|
||||
kDepth = 5,
|
||||
kCopy = 6,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue