[D3D12] ROV: Check if supported

This commit is contained in:
Triang3l 2018-10-10 14:30:29 +03:00
parent 90f700c785
commit 6d2e74325c
9 changed files with 83 additions and 16 deletions

View File

@ -628,8 +628,6 @@ bool D3D12CommandProcessor::SetupContext() {
return false;
}
pipeline_cache_ = std::make_unique<PipelineCache>(this, register_file_);
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
shared_memory_.get());
if (!texture_cache_->Initialize()) {
@ -644,6 +642,9 @@ bool D3D12CommandProcessor::SetupContext() {
return false;
}
pipeline_cache_ = std::make_unique<PipelineCache>(
this, register_file_, render_target_cache_->IsROVUsedForEDRAM());
primitive_converter_ =
std::make_unique<PrimitiveConverter>(this, register_file_, memory_);
if (!primitive_converter_->Initialize()) {
@ -810,12 +811,12 @@ void D3D12CommandProcessor::ShutdownContext() {
primitive_converter_.reset();
pipeline_cache_.reset();
render_target_cache_.reset();
texture_cache_.reset();
pipeline_cache_.reset();
// Root signatured are used by pipelines, thus freed after the pipelines.
for (auto it : root_signatures_) {
it.second->Release();
@ -1021,12 +1022,12 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
primitive_converter_->ClearCache();
pipeline_cache_->ClearCache();
render_target_cache_->ClearCache();
texture_cache_->ClearCache();
pipeline_cache_->ClearCache();
for (auto it : root_signatures_) {
it.second->Release();
}

View File

@ -36,9 +36,11 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h"
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file)
: command_processor_(command_processor), register_file_(register_file) {
shader_translator_ = std::make_unique<DxbcShaderTranslator>();
RegisterFile* register_file, bool edram_rov_used)
: command_processor_(command_processor),
register_file_(register_file),
edram_rov_used_(edram_rov_used) {
shader_translator_ = std::make_unique<DxbcShaderTranslator>(edram_rov_used_);
// Set pipeline state description values we never change.
// Zero out tessellation, stream output, blend state and formats for render

View File

@ -35,7 +35,7 @@ class PipelineCache {
};
PipelineCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file);
RegisterFile* register_file, bool edram_rov_used);
~PipelineCache();
void Shutdown();
@ -85,6 +85,9 @@ class PipelineCache {
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
// Whether the output merger is emulated in pixel shaders.
bool edram_rov_used_;
// Reusable shader translator.
std::unique_ptr<DxbcShaderTranslator> shader_translator_ = nullptr;
// All loaded shaders mapped by their guest hash key.

View File

@ -9,6 +9,8 @@
#include "xenia/gpu/d3d12/render_target_cache.h"
#include <gflags/gflags.h>
#include <algorithm>
#include <cmath>
#include <cstring>
@ -23,6 +25,10 @@
#include "xenia/gpu/texture_util.h"
#include "xenia/ui/d3d12/d3d12_util.h"
DEFINE_bool(d3d12_rov, false,
"Use rasterizer-ordered views for render target emulation where "
"available (experimental and currently largely unimplemented).");
namespace xe {
namespace gpu {
namespace d3d12 {
@ -330,6 +336,14 @@ void RenderTargetCache::ClearCache() {
}
}
bool RenderTargetCache::IsROVUsedForEDRAM() const {
if (!FLAGS_d3d12_rov) {
return false;
}
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
return provider->AreRasterizerOrderedViewsSupported();
}
void RenderTargetCache::BeginFrame() {
ClearBindings();

View File

@ -185,6 +185,38 @@ class D3D12CommandProcessor;
// multisampled surface is the same as a single-sampled surface with 2x height
// and width - however, format size doesn't effect the dimensions. Surface pitch
// in the surface info register is single-sampled.
//
// =============================================================================
// Rasterizer-ordered view usage:
// =============================================================================
//
// There is a separate output merger emulation path currently in development,
// using rasterizer-ordered views for writing directly to the 10 MB EDRAM buffer
// instead of the host output merger for render target output.
//
// The convential method of implementing Xenos render targets via host render
// targets has various flaws that may be impossible to fix:
// - k_16_16 and k_16_16_16_16 have -32...32 range on Xenos, but there's no
// equivalent format on PC APIs. They may be emulated using snorm16 (by
// dividing shader color output by 32) or float32, however, blending behaves
// incorrectly for both. In the former case, multiplicative blending may not
// work correctly - 1 becomes 1/32, and instead of 1 * 1 = 1, you get
// 1/32 * 1/32 = 1/1024. For 32-bit floats, additive blending result may go up
// to infinity.
// - k_2_10_10_10_FLOAT has similar blending issues, though less prominent, when
// emulated via float16 render targets. In addition to a greater range for
// RGB (values can go up to 65504 and infinity rather than 31.875), alpha is
// represented totally differently - in k_2_10_10_10_FLOAT, it may have only
// 4 values, and adding, for example, 0.1 to 0.333 will still result in 0.333,
// while with float16, it will be increasing, and the limit is infinity.
// - Due to simultaneously bound host render targets being independent from each
// other, and because the height is unknown (and the viewport and scissor are
// not always present - D3DPT_RECTLIST is used very commonly, especially for
// clearing (Direct3D 9 Clear is implemented this way on the Xbox 360) and
// copying, and it's usually drawn without a viewport and with 8192x8192
// scissor), there may be cases of simulatenously bound render targets
// overlapping each other in the EDRAM in a way that is difficult to resolve,
// and stores/loads may destroy data.
class RenderTargetCache {
public:
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
@ -208,6 +240,11 @@ class RenderTargetCache {
void Shutdown();
void ClearCache();
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
// and blending performed in pixel shaders be used instead of host render
// targets.
bool IsROVUsedForEDRAM() const;
void BeginFrame();
// Called in the beginning of a draw call - may bind pipelines.
bool UpdateRenderTargets(const D3D12Shader* pixel_shader);

View File

@ -62,7 +62,8 @@ using namespace ucode;
// second buffer in the descriptor array at b2, which is assigned to CB1, the
// index would be CB1[3][0].
DxbcShaderTranslator::DxbcShaderTranslator() {
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rovs_used)
: edram_rovs_used_(edram_rovs_used) {
// Don't allocate again and again for the first shader.
shader_code_.reserve(8192);
shader_object_.reserve(16384);

View File

@ -23,7 +23,7 @@ namespace gpu {
// Generates shader model 5_1 byte code (for Direct3D 12).
class DxbcShaderTranslator : public ShaderTranslator {
public:
DxbcShaderTranslator();
DxbcShaderTranslator(bool edram_rovs_used);
~DxbcShaderTranslator() override;
// Constant buffer bindings in space 0.
@ -431,6 +431,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
// generated in the end of translation.
std::vector<uint32_t> shader_object_;
// Whether the output merger should be emulated in pixel shaders.
bool edram_rovs_used_;
// Data types used in constants buffers. Listed in dependency order.
enum class RdefTypeIndex {
kFloat,

View File

@ -150,12 +150,13 @@ bool D3D12Provider::Initialize() {
descriptor_size_dsv_ =
device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
// Check if tiled resources and programmable sample positions (programmable
// sample positions added in Creators Update) are supported.
// Check if optional features are supported.
rasterizer_ordered_views_supported_ = false;
tiled_resources_tier_ = 0;
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS,
&options, sizeof(options)))) {
rasterizer_ordered_views_supported_ = options.ROVsSupported ? true : false;
tiled_resources_tier_ = uint32_t(options.TiledResourcesTier);
}
programmable_sample_positions_tier_ = 0;
@ -167,8 +168,9 @@ bool D3D12Provider::Initialize() {
}
XELOGD3D(
"Direct3D 12 device supports tiled resources tier %u, programmable "
"sample positions tier %u",
tiled_resources_tier_, programmable_sample_positions_tier_);
"sample positions tier %u; rasterizer-ordered views %ssupported",
tiled_resources_tier_, programmable_sample_positions_tier_,
rasterizer_ordered_views_supported_ ? "" : "un");
// Get the graphics analysis interface, will silently fail if PIX not
// attached.

View File

@ -62,6 +62,9 @@ class D3D12Provider : public GraphicsProvider {
return start;
}
bool AreRasterizerOrderedViewsSupported() const {
return rasterizer_ordered_views_supported_;
}
uint32_t GetTiledResourcesTier() const { return tiled_resources_tier_; }
uint32_t GetProgrammableSamplePositionsTier() const {
return programmable_sample_positions_tier_;
@ -82,6 +85,7 @@ class D3D12Provider : public GraphicsProvider {
uint32_t descriptor_size_rtv_;
uint32_t descriptor_size_dsv_;
bool rasterizer_ordered_views_supported_;
uint32_t tiled_resources_tier_;
uint32_t programmable_sample_positions_tier_;
};