[D3D12] ROV: Check if supported
This commit is contained in:
parent
90f700c785
commit
6d2e74325c
|
@ -628,8 +628,6 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
return false;
|
||||
}
|
||||
|
||||
pipeline_cache_ = std::make_unique<PipelineCache>(this, register_file_);
|
||||
|
||||
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
|
||||
shared_memory_.get());
|
||||
if (!texture_cache_->Initialize()) {
|
||||
|
@ -644,6 +642,9 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
return false;
|
||||
}
|
||||
|
||||
pipeline_cache_ = std::make_unique<PipelineCache>(
|
||||
this, register_file_, render_target_cache_->IsROVUsedForEDRAM());
|
||||
|
||||
primitive_converter_ =
|
||||
std::make_unique<PrimitiveConverter>(this, register_file_, memory_);
|
||||
if (!primitive_converter_->Initialize()) {
|
||||
|
@ -810,12 +811,12 @@ void D3D12CommandProcessor::ShutdownContext() {
|
|||
|
||||
primitive_converter_.reset();
|
||||
|
||||
pipeline_cache_.reset();
|
||||
|
||||
render_target_cache_.reset();
|
||||
|
||||
texture_cache_.reset();
|
||||
|
||||
pipeline_cache_.reset();
|
||||
|
||||
// Root signatured are used by pipelines, thus freed after the pipelines.
|
||||
for (auto it : root_signatures_) {
|
||||
it.second->Release();
|
||||
|
@ -1021,12 +1022,12 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
|||
|
||||
primitive_converter_->ClearCache();
|
||||
|
||||
pipeline_cache_->ClearCache();
|
||||
|
||||
render_target_cache_->ClearCache();
|
||||
|
||||
texture_cache_->ClearCache();
|
||||
|
||||
pipeline_cache_->ClearCache();
|
||||
|
||||
for (auto it : root_signatures_) {
|
||||
it.second->Release();
|
||||
}
|
||||
|
|
|
@ -36,9 +36,11 @@ namespace d3d12 {
|
|||
#include "xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h"
|
||||
|
||||
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file)
|
||||
: command_processor_(command_processor), register_file_(register_file) {
|
||||
shader_translator_ = std::make_unique<DxbcShaderTranslator>();
|
||||
RegisterFile* register_file, bool edram_rov_used)
|
||||
: command_processor_(command_processor),
|
||||
register_file_(register_file),
|
||||
edram_rov_used_(edram_rov_used) {
|
||||
shader_translator_ = std::make_unique<DxbcShaderTranslator>(edram_rov_used_);
|
||||
|
||||
// Set pipeline state description values we never change.
|
||||
// Zero out tessellation, stream output, blend state and formats for render
|
||||
|
|
|
@ -35,7 +35,7 @@ class PipelineCache {
|
|||
};
|
||||
|
||||
PipelineCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file);
|
||||
RegisterFile* register_file, bool edram_rov_used);
|
||||
~PipelineCache();
|
||||
|
||||
void Shutdown();
|
||||
|
@ -85,6 +85,9 @@ class PipelineCache {
|
|||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
|
||||
// Whether the output merger is emulated in pixel shaders.
|
||||
bool edram_rov_used_;
|
||||
|
||||
// Reusable shader translator.
|
||||
std::unique_ptr<DxbcShaderTranslator> shader_translator_ = nullptr;
|
||||
// All loaded shaders mapped by their guest hash key.
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
@ -23,6 +25,10 @@
|
|||
#include "xenia/gpu/texture_util.h"
|
||||
#include "xenia/ui/d3d12/d3d12_util.h"
|
||||
|
||||
DEFINE_bool(d3d12_rov, false,
|
||||
"Use rasterizer-ordered views for render target emulation where "
|
||||
"available (experimental and currently largely unimplemented).");
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
@ -330,6 +336,14 @@ void RenderTargetCache::ClearCache() {
|
|||
}
|
||||
}
|
||||
|
||||
bool RenderTargetCache::IsROVUsedForEDRAM() const {
|
||||
if (!FLAGS_d3d12_rov) {
|
||||
return false;
|
||||
}
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
return provider->AreRasterizerOrderedViewsSupported();
|
||||
}
|
||||
|
||||
void RenderTargetCache::BeginFrame() {
|
||||
ClearBindings();
|
||||
|
||||
|
|
|
@ -185,6 +185,38 @@ class D3D12CommandProcessor;
|
|||
// multisampled surface is the same as a single-sampled surface with 2x height
|
||||
// and width - however, format size doesn't effect the dimensions. Surface pitch
|
||||
// in the surface info register is single-sampled.
|
||||
//
|
||||
// =============================================================================
|
||||
// Rasterizer-ordered view usage:
|
||||
// =============================================================================
|
||||
//
|
||||
// There is a separate output merger emulation path currently in development,
|
||||
// using rasterizer-ordered views for writing directly to the 10 MB EDRAM buffer
|
||||
// instead of the host output merger for render target output.
|
||||
//
|
||||
// The convential method of implementing Xenos render targets via host render
|
||||
// targets has various flaws that may be impossible to fix:
|
||||
// - k_16_16 and k_16_16_16_16 have -32...32 range on Xenos, but there's no
|
||||
// equivalent format on PC APIs. They may be emulated using snorm16 (by
|
||||
// dividing shader color output by 32) or float32, however, blending behaves
|
||||
// incorrectly for both. In the former case, multiplicative blending may not
|
||||
// work correctly - 1 becomes 1/32, and instead of 1 * 1 = 1, you get
|
||||
// 1/32 * 1/32 = 1/1024. For 32-bit floats, additive blending result may go up
|
||||
// to infinity.
|
||||
// - k_2_10_10_10_FLOAT has similar blending issues, though less prominent, when
|
||||
// emulated via float16 render targets. In addition to a greater range for
|
||||
// RGB (values can go up to 65504 and infinity rather than 31.875), alpha is
|
||||
// represented totally differently - in k_2_10_10_10_FLOAT, it may have only
|
||||
// 4 values, and adding, for example, 0.1 to 0.333 will still result in 0.333,
|
||||
// while with float16, it will be increasing, and the limit is infinity.
|
||||
// - Due to simultaneously bound host render targets being independent from each
|
||||
// other, and because the height is unknown (and the viewport and scissor are
|
||||
// not always present - D3DPT_RECTLIST is used very commonly, especially for
|
||||
// clearing (Direct3D 9 Clear is implemented this way on the Xbox 360) and
|
||||
// copying, and it's usually drawn without a viewport and with 8192x8192
|
||||
// scissor), there may be cases of simulatenously bound render targets
|
||||
// overlapping each other in the EDRAM in a way that is difficult to resolve,
|
||||
// and stores/loads may destroy data.
|
||||
class RenderTargetCache {
|
||||
public:
|
||||
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
|
||||
|
@ -208,6 +240,11 @@ class RenderTargetCache {
|
|||
void Shutdown();
|
||||
void ClearCache();
|
||||
|
||||
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
|
||||
// and blending performed in pixel shaders be used instead of host render
|
||||
// targets.
|
||||
bool IsROVUsedForEDRAM() const;
|
||||
|
||||
void BeginFrame();
|
||||
// Called in the beginning of a draw call - may bind pipelines.
|
||||
bool UpdateRenderTargets(const D3D12Shader* pixel_shader);
|
||||
|
|
|
@ -62,7 +62,8 @@ using namespace ucode;
|
|||
// second buffer in the descriptor array at b2, which is assigned to CB1, the
|
||||
// index would be CB1[3][0].
|
||||
|
||||
DxbcShaderTranslator::DxbcShaderTranslator() {
|
||||
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rovs_used)
|
||||
: edram_rovs_used_(edram_rovs_used) {
|
||||
// Don't allocate again and again for the first shader.
|
||||
shader_code_.reserve(8192);
|
||||
shader_object_.reserve(16384);
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace gpu {
|
|||
// Generates shader model 5_1 byte code (for Direct3D 12).
|
||||
class DxbcShaderTranslator : public ShaderTranslator {
|
||||
public:
|
||||
DxbcShaderTranslator();
|
||||
DxbcShaderTranslator(bool edram_rovs_used);
|
||||
~DxbcShaderTranslator() override;
|
||||
|
||||
// Constant buffer bindings in space 0.
|
||||
|
@ -431,6 +431,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// generated in the end of translation.
|
||||
std::vector<uint32_t> shader_object_;
|
||||
|
||||
// Whether the output merger should be emulated in pixel shaders.
|
||||
bool edram_rovs_used_;
|
||||
|
||||
// Data types used in constants buffers. Listed in dependency order.
|
||||
enum class RdefTypeIndex {
|
||||
kFloat,
|
||||
|
|
|
@ -150,12 +150,13 @@ bool D3D12Provider::Initialize() {
|
|||
descriptor_size_dsv_ =
|
||||
device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
||||
|
||||
// Check if tiled resources and programmable sample positions (programmable
|
||||
// sample positions added in Creators Update) are supported.
|
||||
// Check if optional features are supported.
|
||||
rasterizer_ordered_views_supported_ = false;
|
||||
tiled_resources_tier_ = 0;
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
|
||||
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS,
|
||||
&options, sizeof(options)))) {
|
||||
rasterizer_ordered_views_supported_ = options.ROVsSupported ? true : false;
|
||||
tiled_resources_tier_ = uint32_t(options.TiledResourcesTier);
|
||||
}
|
||||
programmable_sample_positions_tier_ = 0;
|
||||
|
@ -167,8 +168,9 @@ bool D3D12Provider::Initialize() {
|
|||
}
|
||||
XELOGD3D(
|
||||
"Direct3D 12 device supports tiled resources tier %u, programmable "
|
||||
"sample positions tier %u",
|
||||
tiled_resources_tier_, programmable_sample_positions_tier_);
|
||||
"sample positions tier %u; rasterizer-ordered views %ssupported",
|
||||
tiled_resources_tier_, programmable_sample_positions_tier_,
|
||||
rasterizer_ordered_views_supported_ ? "" : "un");
|
||||
|
||||
// Get the graphics analysis interface, will silently fail if PIX not
|
||||
// attached.
|
||||
|
|
|
@ -62,6 +62,9 @@ class D3D12Provider : public GraphicsProvider {
|
|||
return start;
|
||||
}
|
||||
|
||||
bool AreRasterizerOrderedViewsSupported() const {
|
||||
return rasterizer_ordered_views_supported_;
|
||||
}
|
||||
uint32_t GetTiledResourcesTier() const { return tiled_resources_tier_; }
|
||||
uint32_t GetProgrammableSamplePositionsTier() const {
|
||||
return programmable_sample_positions_tier_;
|
||||
|
@ -82,6 +85,7 @@ class D3D12Provider : public GraphicsProvider {
|
|||
uint32_t descriptor_size_rtv_;
|
||||
uint32_t descriptor_size_dsv_;
|
||||
|
||||
bool rasterizer_ordered_views_supported_;
|
||||
uint32_t tiled_resources_tier_;
|
||||
uint32_t programmable_sample_positions_tier_;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue