[D3D12] Remove runtime check of cvars::d3d12_edram_rov

This commit is contained in:
Triang3l 2020-04-12 20:48:35 +03:00
parent 0f0ed0eb21
commit 5795d25afe
7 changed files with 62 additions and 68 deletions

View File

@ -106,14 +106,6 @@ void D3D12CommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {
render_target_cache_->RestoreEDRAMSnapshot(snapshot); render_target_cache_->RestoreEDRAMSnapshot(snapshot);
} }
bool D3D12CommandProcessor::IsROVUsedForEDRAM() const {
if (!cvars::d3d12_edram_rov) {
return false;
}
auto provider = GetD3D12Context()->GetD3D12Provider();
return provider->AreRasterizerOrderedViewsSupported();
}
uint32_t D3D12CommandProcessor::GetCurrentColorMask( uint32_t D3D12CommandProcessor::GetCurrentColorMask(
const D3D12Shader* pixel_shader) const { const D3D12Shader* pixel_shader) const {
if (pixel_shader == nullptr) { if (pixel_shader == nullptr) {
@ -330,7 +322,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory);
shared_memory_and_edram_ranges[1].RegisterSpace = 0; shared_memory_and_edram_ranges[1].RegisterSpace = 0;
shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1; shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1;
if (IsROVUsedForEDRAM()) { if (edram_rov_used_) {
++parameter.DescriptorTable.NumDescriptorRanges; ++parameter.DescriptorTable.NumDescriptorRanges;
shared_memory_and_edram_ranges[2].RangeType = shared_memory_and_edram_ranges[2].RangeType =
D3D12_DESCRIPTOR_RANGE_TYPE_UAV; D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
@ -572,7 +564,7 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) {
// for ROV output. There's hardly any difference between 2,6 (of 0 and 3 with // for ROV output. There's hardly any difference between 2,6 (of 0 and 3 with
// 4x MSAA) and 4,4 anyway. // 4x MSAA) and 4,4 anyway.
// https://docs.microsoft.com/en-us/windows/desktop/api/d3d12/nf-d3d12-id3d12graphicscommandlist1-setsamplepositions // https://docs.microsoft.com/en-us/windows/desktop/api/d3d12/nf-d3d12-id3d12graphicscommandlist1-setsamplepositions
if (cvars::d3d12_ssaa_custom_sample_positions && !IsROVUsedForEDRAM() && if (cvars::d3d12_ssaa_custom_sample_positions && !edram_rov_used_ &&
command_list_1_) { command_list_1_) {
auto provider = GetD3D12Context()->GetD3D12Provider(); auto provider = GetD3D12Context()->GetD3D12Provider();
auto tier = provider->GetProgrammableSamplePositionsTier(); auto tier = provider->GetProgrammableSamplePositionsTier();
@ -664,7 +656,10 @@ void D3D12CommandProcessor::SetExternalGraphicsPipeline(
} }
std::string D3D12CommandProcessor::GetWindowTitleText() const { std::string D3D12CommandProcessor::GetWindowTitleText() const {
if (IsROVUsedForEDRAM()) { if (!render_target_cache_) {
return "Direct3D 12";
}
if (edram_rov_used_) {
// Currently scaling is only supported with ROV. // Currently scaling is only supported with ROV.
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) { if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
return "Direct3D 12 - ROV 2x"; return "Direct3D 12 - ROV 2x";
@ -804,22 +799,25 @@ bool D3D12CommandProcessor::SetupContext() {
return false; return false;
} }
edram_rov_used_ =
cvars::d3d12_edram_rov && provider->AreRasterizerOrderedViewsSupported();
texture_cache_ = std::make_unique<TextureCache>(this, register_file_, texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
shared_memory_.get()); shared_memory_.get());
if (!texture_cache_->Initialize()) { if (!texture_cache_->Initialize(edram_rov_used_)) {
XELOGE("Failed to initialize the texture cache"); XELOGE("Failed to initialize the texture cache");
return false; return false;
} }
render_target_cache_ = render_target_cache_ = std::make_unique<RenderTargetCache>(
std::make_unique<RenderTargetCache>(this, register_file_, &trace_writer_); this, register_file_, &trace_writer_, edram_rov_used_);
if (!render_target_cache_->Initialize(texture_cache_.get())) { if (!render_target_cache_->Initialize(texture_cache_.get())) {
XELOGE("Failed to initialize the render target cache"); XELOGE("Failed to initialize the render target cache");
return false; return false;
} }
pipeline_cache_ = std::make_unique<PipelineCache>( pipeline_cache_ = std::make_unique<PipelineCache>(
this, register_file_, IsROVUsedForEDRAM(), this, register_file_, edram_rov_used_,
texture_cache_->IsResolutionScale2X() ? 2 : 1); texture_cache_->IsResolutionScale2X() ? 2 : 1);
if (!pipeline_cache_->Initialize()) { if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline state cache"); XELOGE("Failed to initialize the graphics pipeline state cache");
@ -2153,7 +2151,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
// EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also // EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also
// resolution scale. // resolution scale.
uint32_t pixel_size_x, pixel_size_y; uint32_t pixel_size_x, pixel_size_y;
if (IsROVUsedForEDRAM()) { if (edram_rov_used_) {
pixel_size_x = 1; pixel_size_x = 1;
pixel_size_y = 1; pixel_size_y = 1;
} else { } else {
@ -2260,7 +2258,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
ff_scissor_update_needed_ = false; ff_scissor_update_needed_ = false;
} }
if (!IsROVUsedForEDRAM()) { if (!edram_rov_used_) {
// Blend factor. // Blend factor.
ff_blend_factor_update_needed_ |= ff_blend_factor_update_needed_ |=
ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32; ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32;
@ -2342,7 +2340,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
reg::RB_COLOR_INFO::rt_register_indices[i]); reg::RB_COLOR_INFO::rt_register_indices[i]);
color_infos[i] = color_info; color_infos[i] = color_info;
if (IsROVUsedForEDRAM()) { if (edram_rov_used_) {
// Get the mask for keeping previous color's components unmodified, // Get the mask for keeping previous color's components unmodified,
// or two UINT32_MAX if no colors actually existing in the RT are written. // or two UINT32_MAX if no colors actually existing in the RT are written.
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants( DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
@ -2372,7 +2370,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// already disabled there). // already disabled there).
bool depth_stencil_enabled = bool depth_stencil_enabled =
rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable; rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
if (IsROVUsedForEDRAM() && depth_stencil_enabled) { if (edram_rov_used_ && depth_stencil_enabled) {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
if (rb_depth_info.depth_base == color_infos[i].color_base && if (rb_depth_info.depth_base == color_infos[i].color_base &&
(rt_keep_masks[i][0] != UINT32_MAX || (rt_keep_masks[i][0] != UINT32_MAX ||
@ -2448,7 +2446,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i; flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i;
} }
} }
if (IsROVUsedForEDRAM() && depth_stencil_enabled) { if (edram_rov_used_ && depth_stencil_enabled) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil; flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil;
if (rb_depth_info.depth_format == DepthRenderTargetFormat::kD24FS8) { if (rb_depth_info.depth_format == DepthRenderTargetFormat::kD24FS8) {
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24; flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24;
@ -2661,7 +2659,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.alpha_test_reference = rb_alpha_ref; system_constants_.alpha_test_reference = rb_alpha_ref;
// EDRAM pitch for ROV writing. // EDRAM pitch for ROV writing.
if (IsROVUsedForEDRAM()) { if (edram_rov_used_) {
uint32_t edram_pitch_tiles = uint32_t edram_pitch_tiles =
((std::min(rb_surface_info.surface_pitch, 2560u) * ((std::min(rb_surface_info.surface_pitch, 2560u) *
(rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + (rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
@ -2685,7 +2683,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// be incorrect in this case, but there's no other way without using ROV, // be incorrect in this case, but there's no other way without using ROV,
// though there's an option to limit the range to -1...1). // though there's an option to limit the range to -1...1).
// http://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf // http://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf
if (!IsROVUsedForEDRAM() && cvars::d3d12_16bit_rtv_full_range) { if (!edram_rov_used_ && cvars::d3d12_16bit_rtv_full_range) {
color_exp_bias -= 5; color_exp_bias -= 5;
} }
} }
@ -2694,7 +2692,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
0x3F800000 + (color_exp_bias << 23); 0x3F800000 + (color_exp_bias << 23);
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
system_constants_.color_exp_bias[i] = color_exp_bias_scale; system_constants_.color_exp_bias[i] = color_exp_bias_scale;
if (IsROVUsedForEDRAM()) { if (edram_rov_used_) {
dirty |= dirty |=
system_constants_.edram_rt_keep_mask[i][0] != rt_keep_masks[i][0]; system_constants_.edram_rt_keep_mask[i][0] != rt_keep_masks[i][0];
system_constants_.edram_rt_keep_mask[i][0] = rt_keep_masks[i][0]; system_constants_.edram_rt_keep_mask[i][0] = rt_keep_masks[i][0];
@ -2736,7 +2734,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
} }
// Resolution scale, depth/stencil testing and blend constant for ROV. // Resolution scale, depth/stencil testing and blend constant for ROV.
if (IsROVUsedForEDRAM()) { if (edram_rov_used_) {
uint32_t resolution_square_scale = uint32_t resolution_square_scale =
texture_cache_->IsResolutionScale2X() ? 4 : 1; texture_cache_->IsResolutionScale2X() ? 4 : 1;
dirty |= system_constants_.edram_resolution_square_scale != dirty |= system_constants_.edram_resolution_square_scale !=
@ -3136,7 +3134,7 @@ bool D3D12CommandProcessor::UpdateBindings(
// All the constants + shared memory SRV and UAV + textures. // All the constants + shared memory SRV and UAV + textures.
uint32_t view_count_full_update = uint32_t view_count_full_update =
7 + texture_count_vertex + texture_count_pixel; 7 + texture_count_vertex + texture_count_pixel;
if (IsROVUsedForEDRAM()) { if (edram_rov_used_) {
// + EDRAM UAV. // + EDRAM UAV.
++view_count_full_update; ++view_count_full_update;
} }
@ -3193,7 +3191,7 @@ bool D3D12CommandProcessor::UpdateBindings(
shared_memory_->WriteRawUAVDescriptor(view_cpu_handle); shared_memory_->WriteRawUAVDescriptor(view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view; view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view;
if (IsROVUsedForEDRAM()) { if (edram_rov_used_) {
render_target_cache_->WriteEDRAMUint32UAVDescriptor(view_cpu_handle); render_target_cache_->WriteEDRAMUint32UAVDescriptor(view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view; view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view;

View File

@ -63,11 +63,6 @@ class D3D12CommandProcessor : public CommandProcessor {
return deferred_command_list_.get(); return deferred_command_list_.get();
} }
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
// and blending performed in pixel shaders be used instead of host render
// targets.
bool IsROVUsedForEDRAM() const;
uint64_t GetCurrentSubmission() const { return submission_current_; } uint64_t GetCurrentSubmission() const { return submission_current_; }
uint64_t GetCompletedSubmission() const { return submission_completed_; } uint64_t GetCompletedSubmission() const { return submission_completed_; }
@ -311,6 +306,11 @@ class D3D12CommandProcessor : public CommandProcessor {
std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr; std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr;
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
// and blending performed in pixel shaders be used instead of host render
// targets.
bool edram_rov_used_ = false;
std::unique_ptr<TextureCache> texture_cache_ = nullptr; std::unique_ptr<TextureCache> texture_cache_ = nullptr;
std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr; std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr;

View File

@ -233,8 +233,6 @@ class PipelineCache {
D3D12CommandProcessor* command_processor_; D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_; RegisterFile* register_file_;
// Whether the output merger is emulated in pixel shaders.
bool edram_rov_used_; bool edram_rov_used_;
uint32_t resolution_scale_; uint32_t resolution_scale_;

View File

@ -101,18 +101,19 @@ const RenderTargetCache::EDRAMLoadStoreModeInfo
RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor, RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file, RegisterFile* register_file,
TraceWriter* trace_writer) TraceWriter* trace_writer,
bool edram_rov_used)
: command_processor_(command_processor), : command_processor_(command_processor),
register_file_(register_file), register_file_(register_file),
trace_writer_(trace_writer) {} trace_writer_(trace_writer),
edram_rov_used_(edram_rov_used) {}
RenderTargetCache::~RenderTargetCache() { Shutdown(); } RenderTargetCache::~RenderTargetCache() { Shutdown(); }
bool RenderTargetCache::Initialize(const TextureCache* texture_cache) { bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
// EDRAM buffer size depends on this. // EDRAM buffer size depends on this.
resolution_scale_2x_ = texture_cache->IsResolutionScale2X(); resolution_scale_2x_ = texture_cache->IsResolutionScale2X();
assert_false(resolution_scale_2x_ && assert_false(resolution_scale_2x_ && !edram_rov_used_);
!command_processor_->IsROVUsedForEDRAM());
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
@ -125,7 +126,7 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
edram_buffer_desc, GetEDRAMBufferSize(), edram_buffer_desc, GetEDRAMBufferSize(),
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
// The first operation will likely be drawing with ROV or a load without ROV. // The first operation will likely be drawing with ROV or a load without ROV.
edram_buffer_state_ = command_processor_->IsROVUsedForEDRAM() edram_buffer_state_ = edram_rov_used_
? D3D12_RESOURCE_STATE_UNORDERED_ACCESS ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS
: D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; : D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
if (FAILED(device->CreateCommittedResource( if (FAILED(device->CreateCommittedResource(
@ -238,14 +239,13 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
} }
// Create the pipelines. // Create the pipelines.
bool rov_used = command_processor_->IsROVUsedForEDRAM();
// Load and store. // Load and store.
for (uint32_t i = 0; i < uint32_t(EDRAMLoadStoreMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(EDRAMLoadStoreMode::kCount); ++i) {
const EDRAMLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i]; const EDRAMLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline( edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.load_shader, mode_info.load_shader_size, device, mode_info.load_shader, mode_info.load_shader_size,
edram_load_store_root_signature_); edram_load_store_root_signature_);
if (!rov_used) { if (!edram_rov_used_) {
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline( edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.store_shader, mode_info.store_shader_size, device, mode_info.store_shader, mode_info.store_shader_size,
edram_load_store_root_signature_); edram_load_store_root_signature_);
@ -263,7 +263,7 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
edram_load_store_root_signature_); edram_load_store_root_signature_);
} }
if (edram_load_pipelines_[i] == nullptr || if (edram_load_pipelines_[i] == nullptr ||
(!rov_used && edram_store_pipelines_[i] == nullptr) || (!edram_rov_used_ && edram_store_pipelines_[i] == nullptr) ||
(load_2x_resolve_pipeline_used && (load_2x_resolve_pipeline_used &&
edram_load_2x_resolve_pipelines_[i] == nullptr)) { edram_load_2x_resolve_pipelines_[i] == nullptr)) {
XELOGE("Failed to create the EDRAM load/store pipelines for mode {}", i); XELOGE("Failed to create the EDRAM load/store pipelines for mode {}", i);
@ -563,8 +563,6 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES #endif // FINE_GRAINED_DRAW_SCOPES
bool rov_used = command_processor_->IsROVUsedForEDRAM();
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>(); auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u); uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u);
if (surface_pitch == 0) { if (surface_pitch == 0) {
@ -652,7 +650,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// Check the following full update conditions: // Check the following full update conditions:
// - Render target is disabled and another render target got more space than // - Render target is disabled and another render target got more space than
// is currently available in the textures (RTV/DSV only). // is currently available in the textures (RTV/DSV only).
if (!rov_used && edram_max_rows > current_edram_max_rows_) { if (!edram_rov_used_ && edram_max_rows > current_edram_max_rows_) {
full_update = true; full_update = true;
} }
@ -698,7 +696,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
full_update = true; full_update = true;
break; break;
} }
if (rov_used) { if (edram_rov_used_) {
if (i != 4) { if (i != 4) {
full_update |= IsColorFormat64bpp(binding.color_format) != full_update |= IsColorFormat64bpp(binding.color_format) !=
formats_are_64bpp[i]; formats_are_64bpp[i];
@ -780,7 +778,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
uint32_t heap_usage[5] = {}; uint32_t heap_usage[5] = {};
#endif #endif
if (full_update) { if (full_update) {
if (rov_used) { if (edram_rov_used_) {
// Place a UAV barrier because across draws, pixels with different // Place a UAV barrier because across draws, pixels with different
// SV_Positions or different sample counts (thus without interlocking // SV_Positions or different sample counts (thus without interlocking
// between each other) may access the same data now. // between each other) may access the same data now.
@ -794,7 +792,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
ClearBindings(); ClearBindings();
current_surface_pitch_ = surface_pitch; current_surface_pitch_ = surface_pitch;
current_msaa_samples_ = rb_surface_info.msaa_samples; current_msaa_samples_ = rb_surface_info.msaa_samples;
if (!rov_used) { if (!edram_rov_used_) {
current_edram_max_rows_ = edram_max_rows; current_edram_max_rows_ = edram_max_rows;
} }
@ -807,7 +805,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
} }
} else { } else {
#if 0 #if 0
if (!rov_used) { if (!edram_rov_used_) {
// If updating partially, only need to attach new render targets. // If updating partially, only need to attach new render targets.
for (uint32_t i = 0; i < 5; ++i) { for (uint32_t i = 0; i < 5; ++i) {
const RenderTargetBinding& binding = current_bindings_[i]; const RenderTargetBinding& binding = current_bindings_[i];
@ -845,7 +843,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
binding.format = formats[i]; binding.format = formats[i];
binding.render_target = nullptr; binding.render_target = nullptr;
if (!rov_used) { if (!edram_rov_used_) {
RenderTargetKey key; RenderTargetKey key;
key.width_ss_div_80 = edram_row_tiles_32bpp; key.width_ss_div_80 = edram_row_tiles_32bpp;
key.height_ss_div_16 = current_edram_max_rows_; key.height_ss_div_16 = current_edram_max_rows_;
@ -912,7 +910,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
} }
} }
if (!rov_used) { if (!edram_rov_used_) {
// Sample positions when loading depth must match sample positions when // Sample positions when loading depth must match sample positions when
// drawing. // drawing.
command_processor_->SetSamplePositions(current_msaa_samples_); command_processor_->SetSamplePositions(current_msaa_samples_);
@ -990,7 +988,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// Bind the render targets to the command list, either in case of an update or // Bind the render targets to the command list, either in case of an update or
// if asked to externally. // if asked to externally.
if (!rov_used && apply_to_command_list_) { if (!edram_rov_used_ && apply_to_command_list_) {
apply_to_command_list_ = false; apply_to_command_list_ = false;
if (!sample_positions_set) { if (!sample_positions_set) {
command_processor_->SetSamplePositions(current_msaa_samples_); command_processor_->SetSamplePositions(current_msaa_samples_);
@ -1022,7 +1020,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
continue; continue;
} }
RenderTargetBinding& binding = current_bindings_[i]; RenderTargetBinding& binding = current_bindings_[i];
if (!rov_used && binding.render_target == nullptr) { if (!edram_rov_used_ && binding.render_target == nullptr) {
// Nothing to store to the EDRAM buffer if there was an error. // Nothing to store to the EDRAM buffer if there was an error.
continue; continue;
} }
@ -1030,7 +1028,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
std::max(binding.edram_dirty_rows, edram_dirty_rows); std::max(binding.edram_dirty_rows, edram_dirty_rows);
} }
if (rov_used) { if (edram_rov_used_) {
// The buffer will be used for ROV drawing now. // The buffer will be used for ROV drawing now.
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
edram_buffer_modified_ = true; edram_buffer_modified_ = true;
@ -1045,7 +1043,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
uint32_t& written_length_out) { uint32_t& written_length_out) {
written_address_out = written_length_out = 0; written_address_out = written_length_out = 0;
if (!command_processor_->IsROVUsedForEDRAM()) { if (!edram_rov_used_) {
// Save the currently bound render targets to the EDRAM buffer that will be // Save the currently bound render targets to the EDRAM buffer that will be
// used as the resolve source and clear bindings to allow render target // used as the resolve source and clear bindings to allow render target
// resources to be reused as source textures for format conversion, // resources to be reused as source textures for format conversion,
@ -1177,7 +1175,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
return true; return true;
} }
if (command_processor_->IsROVUsedForEDRAM()) { if (edram_rov_used_) {
// Commit ROV writes. // Commit ROV writes.
CommitEDRAMBufferUAVWrites(false); CommitEDRAMBufferUAVWrites(false);
} }
@ -1331,8 +1329,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// sampling the host render target gives 1/32 of what is actually stored // sampling the host render target gives 1/32 of what is actually stored
// there on the guest side. // there on the guest side.
// http://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf // http://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf
if (command_processor_->IsROVUsedForEDRAM() || if (edram_rov_used_ || cvars::d3d12_16bit_rtv_full_range) {
cvars::d3d12_16bit_rtv_full_range) {
dest_exp_bias += 5; dest_exp_bias += 5;
} }
} }
@ -1798,7 +1795,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
command_list->D3DIASetPrimitiveTopology( command_list->D3DIASetPrimitiveTopology(
D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
command_list->D3DDrawInstanced(3, 1, 0, 0); command_list->D3DDrawInstanced(3, 1, 0, 0);
if (command_processor_->IsROVUsedForEDRAM()) { if (edram_rov_used_) {
// Clean up - the ROV path doesn't need render targets bound and has // Clean up - the ROV path doesn't need render targets bound and has
// non-zero ForcedSampleCount. // non-zero ForcedSampleCount.
command_list->D3DOMSetRenderTargets(0, nullptr, FALSE, nullptr); command_list->D3DOMSetRenderTargets(0, nullptr, FALSE, nullptr);
@ -1907,7 +1904,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
(resolution_scale_2x_ ? (1 << 13) : 0) | (is_depth ? (1 << 15) : 0) | (resolution_scale_2x_ ? (1 << 13) : 0) | (is_depth ? (1 << 15) : 0) |
(surface_pitch_tiles << 16); (surface_pitch_tiles << 16);
// When ROV is used, there's no 32-bit depth buffer. // When ROV is used, there's no 32-bit depth buffer.
if (!command_processor_->IsROVUsedForEDRAM() && is_depth && if (!edram_rov_used_ && is_depth &&
DepthRenderTargetFormat(format) == DepthRenderTargetFormat::kD24FS8) { DepthRenderTargetFormat(format) == DepthRenderTargetFormat::kD24FS8) {
root_constants.clear_depth24 = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; root_constants.clear_depth24 = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
// 20e4 [0,2), based on CFloat24 from d3dref9.dll and on 6e4 in DirectXTex. // 20e4 [0,2), based on CFloat24 from d3dref9.dll and on 6e4 in DirectXTex.
@ -2146,7 +2143,7 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
} }
void RenderTargetCache::FlushAndUnbindRenderTargets() { void RenderTargetCache::FlushAndUnbindRenderTargets() {
if (command_processor_->IsROVUsedForEDRAM()) { if (edram_rov_used_) {
return; return;
} }
StoreRenderTargetsToEDRAM(); StoreRenderTargetsToEDRAM();
@ -2282,7 +2279,7 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
command_processor_->SubmitBarriers(); command_processor_->SubmitBarriers();
command_list->D3DCopyBufferRegion(edram_buffer_, 0, upload_buffer, command_list->D3DCopyBufferRegion(edram_buffer_, 0, upload_buffer,
upload_buffer_offset, kEDRAMSize); upload_buffer_offset, kEDRAMSize);
if (!command_processor_->IsROVUsedForEDRAM()) { if (!edram_rov_used_) {
// Clear and ignore the old 32-bit float depth - the non-ROV path is // Clear and ignore the old 32-bit float depth - the non-ROV path is
// inaccurate anyway, and this is backend-specific, not a part of a guest // inaccurate anyway, and this is backend-specific, not a part of a guest
// trace. // trace.
@ -2317,7 +2314,7 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
uint32_t RenderTargetCache::GetEDRAMBufferSize() const { uint32_t RenderTargetCache::GetEDRAMBufferSize() const {
uint32_t size = 2048 * 5120; uint32_t size = 2048 * 5120;
if (!command_processor_->IsROVUsedForEDRAM()) { if (!edram_rov_used_) {
// Two 10 MB pages, one containing color and integer depth data, another // Two 10 MB pages, one containing color and integer depth data, another
// with 32-bit float depth when 20e4 depth is used to allow for multipass // with 32-bit float depth when 20e4 depth is used to allow for multipass
// drawing without precision loss in case of EDRAM store/load. // drawing without precision loss in case of EDRAM store/load.
@ -2686,7 +2683,7 @@ RenderTargetCache::EDRAMLoadStoreMode RenderTargetCache::GetLoadStoreMode(
} }
void RenderTargetCache::StoreRenderTargetsToEDRAM() { void RenderTargetCache::StoreRenderTargetsToEDRAM() {
if (command_processor_->IsROVUsedForEDRAM()) { if (edram_rov_used_) {
return; return;
} }

View File

@ -250,7 +250,8 @@ class RenderTargetCache {
}; };
RenderTargetCache(D3D12CommandProcessor* command_processor, RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file, TraceWriter* trace_writer); RegisterFile* register_file, TraceWriter* trace_writer,
bool edram_rov_used);
~RenderTargetCache(); ~RenderTargetCache();
bool Initialize(const TextureCache* texture_cache); bool Initialize(const TextureCache* texture_cache);
@ -516,6 +517,7 @@ class RenderTargetCache {
D3D12CommandProcessor* command_processor_; D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_; RegisterFile* register_file_;
TraceWriter* trace_writer_; TraceWriter* trace_writer_;
bool edram_rov_used_;
// Whether 1 guest pixel is rendered as 2x2 host pixels (currently only // Whether 1 guest pixel is rendered as 2x2 host pixels (currently only
// supported with ROV). // supported with ROV).

View File

@ -912,15 +912,14 @@ TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
TextureCache::~TextureCache() { Shutdown(); } TextureCache::~TextureCache() { Shutdown(); }
bool TextureCache::Initialize() { bool TextureCache::Initialize(bool edram_rov_used) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
// Try to create the tiled buffer 2x resolution scaling. // Try to create the tiled buffer 2x resolution scaling.
// Not currently supported with the RTV/DSV output path for various reasons. // Not currently supported with the RTV/DSV output path for various reasons.
// As of November 27th, 2018, PIX doesn't support tiled buffers. // As of November 27th, 2018, PIX doesn't support tiled buffers.
if (cvars::d3d12_resolution_scale >= 2 && if (cvars::d3d12_resolution_scale >= 2 && edram_rov_used &&
command_processor_->IsROVUsedForEDRAM() &&
provider->GetTiledResourcesTier() >= 1 && provider->GetTiledResourcesTier() >= 1 &&
provider->GetGraphicsAnalysis() == nullptr && provider->GetGraphicsAnalysis() == nullptr &&
provider->GetVirtualAddressBitsPerResource() >= provider->GetVirtualAddressBitsPerResource() >=

View File

@ -96,7 +96,7 @@ class TextureCache {
RegisterFile* register_file, SharedMemory* shared_memory); RegisterFile* register_file, SharedMemory* shared_memory);
~TextureCache(); ~TextureCache();
bool Initialize(); bool Initialize(bool edram_rov_used);
void Shutdown(); void Shutdown();
void ClearCache(); void ClearCache();