[D3D12] Remove runtime check of cvars::d3d12_edram_rov

This commit is contained in:
Triang3l 2020-04-12 20:48:35 +03:00
parent 0f0ed0eb21
commit 5795d25afe
7 changed files with 62 additions and 68 deletions

View File

@ -106,14 +106,6 @@ void D3D12CommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {
render_target_cache_->RestoreEDRAMSnapshot(snapshot);
}
bool D3D12CommandProcessor::IsROVUsedForEDRAM() const {
if (!cvars::d3d12_edram_rov) {
return false;
}
auto provider = GetD3D12Context()->GetD3D12Provider();
return provider->AreRasterizerOrderedViewsSupported();
}
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
const D3D12Shader* pixel_shader) const {
if (pixel_shader == nullptr) {
@ -330,7 +322,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory);
shared_memory_and_edram_ranges[1].RegisterSpace = 0;
shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1;
if (IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
++parameter.DescriptorTable.NumDescriptorRanges;
shared_memory_and_edram_ranges[2].RangeType =
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
@ -572,7 +564,7 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) {
// for ROV output. There's hardly any difference between 2,6 (of 0 and 3 with
// 4x MSAA) and 4,4 anyway.
// https://docs.microsoft.com/en-us/windows/desktop/api/d3d12/nf-d3d12-id3d12graphicscommandlist1-setsamplepositions
if (cvars::d3d12_ssaa_custom_sample_positions && !IsROVUsedForEDRAM() &&
if (cvars::d3d12_ssaa_custom_sample_positions && !edram_rov_used_ &&
command_list_1_) {
auto provider = GetD3D12Context()->GetD3D12Provider();
auto tier = provider->GetProgrammableSamplePositionsTier();
@ -664,7 +656,10 @@ void D3D12CommandProcessor::SetExternalGraphicsPipeline(
}
std::string D3D12CommandProcessor::GetWindowTitleText() const {
if (IsROVUsedForEDRAM()) {
if (!render_target_cache_) {
return "Direct3D 12";
}
if (edram_rov_used_) {
// Currently scaling is only supported with ROV.
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
return "Direct3D 12 - ROV 2x";
@ -804,22 +799,25 @@ bool D3D12CommandProcessor::SetupContext() {
return false;
}
edram_rov_used_ =
cvars::d3d12_edram_rov && provider->AreRasterizerOrderedViewsSupported();
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
shared_memory_.get());
if (!texture_cache_->Initialize()) {
if (!texture_cache_->Initialize(edram_rov_used_)) {
XELOGE("Failed to initialize the texture cache");
return false;
}
render_target_cache_ =
std::make_unique<RenderTargetCache>(this, register_file_, &trace_writer_);
render_target_cache_ = std::make_unique<RenderTargetCache>(
this, register_file_, &trace_writer_, edram_rov_used_);
if (!render_target_cache_->Initialize(texture_cache_.get())) {
XELOGE("Failed to initialize the render target cache");
return false;
}
pipeline_cache_ = std::make_unique<PipelineCache>(
this, register_file_, IsROVUsedForEDRAM(),
this, register_file_, edram_rov_used_,
texture_cache_->IsResolutionScale2X() ? 2 : 1);
if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline state cache");
@ -2153,7 +2151,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
// EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also
// resolution scale.
uint32_t pixel_size_x, pixel_size_y;
if (IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
pixel_size_x = 1;
pixel_size_y = 1;
} else {
@ -2260,7 +2258,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
ff_scissor_update_needed_ = false;
}
if (!IsROVUsedForEDRAM()) {
if (!edram_rov_used_) {
// Blend factor.
ff_blend_factor_update_needed_ |=
ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32;
@ -2342,7 +2340,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
reg::RB_COLOR_INFO::rt_register_indices[i]);
color_infos[i] = color_info;
if (IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
// Get the mask for keeping previous color's components unmodified,
// or two UINT32_MAX if no colors actually existing in the RT are written.
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
@ -2372,7 +2370,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// already disabled there).
bool depth_stencil_enabled =
rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
if (IsROVUsedForEDRAM() && depth_stencil_enabled) {
if (edram_rov_used_ && depth_stencil_enabled) {
for (uint32_t i = 0; i < 4; ++i) {
if (rb_depth_info.depth_base == color_infos[i].color_base &&
(rt_keep_masks[i][0] != UINT32_MAX ||
@ -2448,7 +2446,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i;
}
}
if (IsROVUsedForEDRAM() && depth_stencil_enabled) {
if (edram_rov_used_ && depth_stencil_enabled) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil;
if (rb_depth_info.depth_format == DepthRenderTargetFormat::kD24FS8) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24;
@ -2661,7 +2659,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.alpha_test_reference = rb_alpha_ref;
// EDRAM pitch for ROV writing.
if (IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
uint32_t edram_pitch_tiles =
((std::min(rb_surface_info.surface_pitch, 2560u) *
(rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
@ -2685,7 +2683,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// be incorrect in this case, but there's no other way without using ROV,
// though there's an option to limit the range to -1...1).
// http://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf
if (!IsROVUsedForEDRAM() && cvars::d3d12_16bit_rtv_full_range) {
if (!edram_rov_used_ && cvars::d3d12_16bit_rtv_full_range) {
color_exp_bias -= 5;
}
}
@ -2694,7 +2692,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
0x3F800000 + (color_exp_bias << 23);
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
system_constants_.color_exp_bias[i] = color_exp_bias_scale;
if (IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
dirty |=
system_constants_.edram_rt_keep_mask[i][0] != rt_keep_masks[i][0];
system_constants_.edram_rt_keep_mask[i][0] = rt_keep_masks[i][0];
@ -2736,7 +2734,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
}
// Resolution scale, depth/stencil testing and blend constant for ROV.
if (IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
uint32_t resolution_square_scale =
texture_cache_->IsResolutionScale2X() ? 4 : 1;
dirty |= system_constants_.edram_resolution_square_scale !=
@ -3136,7 +3134,7 @@ bool D3D12CommandProcessor::UpdateBindings(
// All the constants + shared memory SRV and UAV + textures.
uint32_t view_count_full_update =
7 + texture_count_vertex + texture_count_pixel;
if (IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
// + EDRAM UAV.
++view_count_full_update;
}
@ -3193,7 +3191,7 @@ bool D3D12CommandProcessor::UpdateBindings(
shared_memory_->WriteRawUAVDescriptor(view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
if (IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
render_target_cache_->WriteEDRAMUint32UAVDescriptor(view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;

View File

@ -63,11 +63,6 @@ class D3D12CommandProcessor : public CommandProcessor {
return deferred_command_list_.get();
}
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
// and blending performed in pixel shaders be used instead of host render
// targets.
bool IsROVUsedForEDRAM() const;
uint64_t GetCurrentSubmission() const { return submission_current_; }
uint64_t GetCompletedSubmission() const { return submission_completed_; }
@ -311,6 +306,11 @@ class D3D12CommandProcessor : public CommandProcessor {
std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr;
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
// and blending performed in pixel shaders be used instead of host render
// targets.
bool edram_rov_used_ = false;
std::unique_ptr<TextureCache> texture_cache_ = nullptr;
std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr;

View File

@ -233,8 +233,6 @@ class PipelineCache {
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
// Whether the output merger is emulated in pixel shaders.
bool edram_rov_used_;
uint32_t resolution_scale_;

View File

@ -101,18 +101,19 @@ const RenderTargetCache::EDRAMLoadStoreModeInfo
RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file,
TraceWriter* trace_writer)
TraceWriter* trace_writer,
bool edram_rov_used)
: command_processor_(command_processor),
register_file_(register_file),
trace_writer_(trace_writer) {}
trace_writer_(trace_writer),
edram_rov_used_(edram_rov_used) {}
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
// EDRAM buffer size depends on this.
resolution_scale_2x_ = texture_cache->IsResolutionScale2X();
assert_false(resolution_scale_2x_ &&
!command_processor_->IsROVUsedForEDRAM());
assert_false(resolution_scale_2x_ && !edram_rov_used_);
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
@ -125,7 +126,7 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
edram_buffer_desc, GetEDRAMBufferSize(),
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
// The first operation will likely be drawing with ROV or a load without ROV.
edram_buffer_state_ = command_processor_->IsROVUsedForEDRAM()
edram_buffer_state_ = edram_rov_used_
? D3D12_RESOURCE_STATE_UNORDERED_ACCESS
: D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
if (FAILED(device->CreateCommittedResource(
@ -238,14 +239,13 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
}
// Create the pipelines.
bool rov_used = command_processor_->IsROVUsedForEDRAM();
// Load and store.
for (uint32_t i = 0; i < uint32_t(EDRAMLoadStoreMode::kCount); ++i) {
const EDRAMLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.load_shader, mode_info.load_shader_size,
edram_load_store_root_signature_);
if (!rov_used) {
if (!edram_rov_used_) {
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.store_shader, mode_info.store_shader_size,
edram_load_store_root_signature_);
@ -263,7 +263,7 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
edram_load_store_root_signature_);
}
if (edram_load_pipelines_[i] == nullptr ||
(!rov_used && edram_store_pipelines_[i] == nullptr) ||
(!edram_rov_used_ && edram_store_pipelines_[i] == nullptr) ||
(load_2x_resolve_pipeline_used &&
edram_load_2x_resolve_pipelines_[i] == nullptr)) {
XELOGE("Failed to create the EDRAM load/store pipelines for mode {}", i);
@ -563,8 +563,6 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
bool rov_used = command_processor_->IsROVUsedForEDRAM();
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u);
if (surface_pitch == 0) {
@ -652,7 +650,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// Check the following full update conditions:
// - Render target is disabled and another render target got more space than
// is currently available in the textures (RTV/DSV only).
if (!rov_used && edram_max_rows > current_edram_max_rows_) {
if (!edram_rov_used_ && edram_max_rows > current_edram_max_rows_) {
full_update = true;
}
@ -698,7 +696,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
full_update = true;
break;
}
if (rov_used) {
if (edram_rov_used_) {
if (i != 4) {
full_update |= IsColorFormat64bpp(binding.color_format) !=
formats_are_64bpp[i];
@ -780,7 +778,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
uint32_t heap_usage[5] = {};
#endif
if (full_update) {
if (rov_used) {
if (edram_rov_used_) {
// Place a UAV barrier because across draws, pixels with different
// SV_Positions or different sample counts (thus without interlocking
// between each other) may access the same data now.
@ -794,7 +792,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
ClearBindings();
current_surface_pitch_ = surface_pitch;
current_msaa_samples_ = rb_surface_info.msaa_samples;
if (!rov_used) {
if (!edram_rov_used_) {
current_edram_max_rows_ = edram_max_rows;
}
@ -807,7 +805,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
}
} else {
#if 0
if (!rov_used) {
if (!edram_rov_used_) {
// If updating partially, only need to attach new render targets.
for (uint32_t i = 0; i < 5; ++i) {
const RenderTargetBinding& binding = current_bindings_[i];
@ -845,7 +843,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
binding.format = formats[i];
binding.render_target = nullptr;
if (!rov_used) {
if (!edram_rov_used_) {
RenderTargetKey key;
key.width_ss_div_80 = edram_row_tiles_32bpp;
key.height_ss_div_16 = current_edram_max_rows_;
@ -912,7 +910,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
}
}
if (!rov_used) {
if (!edram_rov_used_) {
// Sample positions when loading depth must match sample positions when
// drawing.
command_processor_->SetSamplePositions(current_msaa_samples_);
@ -990,7 +988,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// Bind the render targets to the command list, either in case of an update or
// if asked to externally.
if (!rov_used && apply_to_command_list_) {
if (!edram_rov_used_ && apply_to_command_list_) {
apply_to_command_list_ = false;
if (!sample_positions_set) {
command_processor_->SetSamplePositions(current_msaa_samples_);
@ -1022,7 +1020,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
continue;
}
RenderTargetBinding& binding = current_bindings_[i];
if (!rov_used && binding.render_target == nullptr) {
if (!edram_rov_used_ && binding.render_target == nullptr) {
// Nothing to store to the EDRAM buffer if there was an error.
continue;
}
@ -1030,7 +1028,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
std::max(binding.edram_dirty_rows, edram_dirty_rows);
}
if (rov_used) {
if (edram_rov_used_) {
// The buffer will be used for ROV drawing now.
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
edram_buffer_modified_ = true;
@ -1045,7 +1043,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
uint32_t& written_length_out) {
written_address_out = written_length_out = 0;
if (!command_processor_->IsROVUsedForEDRAM()) {
if (!edram_rov_used_) {
// Save the currently bound render targets to the EDRAM buffer that will be
// used as the resolve source and clear bindings to allow render target
// resources to be reused as source textures for format conversion,
@ -1177,7 +1175,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
return true;
}
if (command_processor_->IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
// Commit ROV writes.
CommitEDRAMBufferUAVWrites(false);
}
@ -1331,8 +1329,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// sampling the host render target gives 1/32 of what is actually stored
// there on the guest side.
// http://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf
if (command_processor_->IsROVUsedForEDRAM() ||
cvars::d3d12_16bit_rtv_full_range) {
if (edram_rov_used_ || cvars::d3d12_16bit_rtv_full_range) {
dest_exp_bias += 5;
}
}
@ -1798,7 +1795,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
command_list->D3DIASetPrimitiveTopology(
D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
command_list->D3DDrawInstanced(3, 1, 0, 0);
if (command_processor_->IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
// Clean up - the ROV path doesn't need render targets bound and has
// non-zero ForcedSampleCount.
command_list->D3DOMSetRenderTargets(0, nullptr, FALSE, nullptr);
@ -1907,7 +1904,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
(resolution_scale_2x_ ? (1 << 13) : 0) | (is_depth ? (1 << 15) : 0) |
(surface_pitch_tiles << 16);
// When ROV is used, there's no 32-bit depth buffer.
if (!command_processor_->IsROVUsedForEDRAM() && is_depth &&
if (!edram_rov_used_ && is_depth &&
DepthRenderTargetFormat(format) == DepthRenderTargetFormat::kD24FS8) {
root_constants.clear_depth24 = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
// 20e4 [0,2), based on CFloat24 from d3dref9.dll and on 6e4 in DirectXTex.
@ -2146,7 +2143,7 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
}
void RenderTargetCache::FlushAndUnbindRenderTargets() {
if (command_processor_->IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
return;
}
StoreRenderTargetsToEDRAM();
@ -2282,7 +2279,7 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
command_processor_->SubmitBarriers();
command_list->D3DCopyBufferRegion(edram_buffer_, 0, upload_buffer,
upload_buffer_offset, kEDRAMSize);
if (!command_processor_->IsROVUsedForEDRAM()) {
if (!edram_rov_used_) {
// Clear and ignore the old 32-bit float depth - the non-ROV path is
// inaccurate anyway, and this is backend-specific, not a part of a guest
// trace.
@ -2317,7 +2314,7 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
uint32_t RenderTargetCache::GetEDRAMBufferSize() const {
uint32_t size = 2048 * 5120;
if (!command_processor_->IsROVUsedForEDRAM()) {
if (!edram_rov_used_) {
// Two 10 MB pages, one containing color and integer depth data, another
// with 32-bit float depth when 20e4 depth is used to allow for multipass
// drawing without precision loss in case of EDRAM store/load.
@ -2686,7 +2683,7 @@ RenderTargetCache::EDRAMLoadStoreMode RenderTargetCache::GetLoadStoreMode(
}
void RenderTargetCache::StoreRenderTargetsToEDRAM() {
if (command_processor_->IsROVUsedForEDRAM()) {
if (edram_rov_used_) {
return;
}

View File

@ -250,7 +250,8 @@ class RenderTargetCache {
};
RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file, TraceWriter* trace_writer);
RegisterFile* register_file, TraceWriter* trace_writer,
bool edram_rov_used);
~RenderTargetCache();
bool Initialize(const TextureCache* texture_cache);
@ -516,6 +517,7 @@ class RenderTargetCache {
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
TraceWriter* trace_writer_;
bool edram_rov_used_;
// Whether 1 guest pixel is rendered as 2x2 host pixels (currently only
// supported with ROV).

View File

@ -912,15 +912,14 @@ TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
TextureCache::~TextureCache() { Shutdown(); }
bool TextureCache::Initialize() {
bool TextureCache::Initialize(bool edram_rov_used) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
// Try to create the tiled buffer 2x resolution scaling.
// Not currently supported with the RTV/DSV output path for various reasons.
// As of November 27th, 2018, PIX doesn't support tiled buffers.
if (cvars::d3d12_resolution_scale >= 2 &&
command_processor_->IsROVUsedForEDRAM() &&
if (cvars::d3d12_resolution_scale >= 2 && edram_rov_used &&
provider->GetTiledResourcesTier() >= 1 &&
provider->GetGraphicsAnalysis() == nullptr &&
provider->GetVirtualAddressBitsPerResource() >=

View File

@ -96,7 +96,7 @@ class TextureCache {
RegisterFile* register_file, SharedMemory* shared_memory);
~TextureCache();
bool Initialize();
bool Initialize(bool edram_rov_used);
void Shutdown();
void ClearCache();