[D3D12] ROV: Check if supported
This commit is contained in:
parent
90f700c785
commit
6d2e74325c
|
@ -628,8 +628,6 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline_cache_ = std::make_unique<PipelineCache>(this, register_file_);
|
|
||||||
|
|
||||||
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
|
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
|
||||||
shared_memory_.get());
|
shared_memory_.get());
|
||||||
if (!texture_cache_->Initialize()) {
|
if (!texture_cache_->Initialize()) {
|
||||||
|
@ -644,6 +642,9 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pipeline_cache_ = std::make_unique<PipelineCache>(
|
||||||
|
this, register_file_, render_target_cache_->IsROVUsedForEDRAM());
|
||||||
|
|
||||||
primitive_converter_ =
|
primitive_converter_ =
|
||||||
std::make_unique<PrimitiveConverter>(this, register_file_, memory_);
|
std::make_unique<PrimitiveConverter>(this, register_file_, memory_);
|
||||||
if (!primitive_converter_->Initialize()) {
|
if (!primitive_converter_->Initialize()) {
|
||||||
|
@ -810,12 +811,12 @@ void D3D12CommandProcessor::ShutdownContext() {
|
||||||
|
|
||||||
primitive_converter_.reset();
|
primitive_converter_.reset();
|
||||||
|
|
||||||
|
pipeline_cache_.reset();
|
||||||
|
|
||||||
render_target_cache_.reset();
|
render_target_cache_.reset();
|
||||||
|
|
||||||
texture_cache_.reset();
|
texture_cache_.reset();
|
||||||
|
|
||||||
pipeline_cache_.reset();
|
|
||||||
|
|
||||||
// Root signatured are used by pipelines, thus freed after the pipelines.
|
// Root signatured are used by pipelines, thus freed after the pipelines.
|
||||||
for (auto it : root_signatures_) {
|
for (auto it : root_signatures_) {
|
||||||
it.second->Release();
|
it.second->Release();
|
||||||
|
@ -1021,12 +1022,12 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
||||||
|
|
||||||
primitive_converter_->ClearCache();
|
primitive_converter_->ClearCache();
|
||||||
|
|
||||||
|
pipeline_cache_->ClearCache();
|
||||||
|
|
||||||
render_target_cache_->ClearCache();
|
render_target_cache_->ClearCache();
|
||||||
|
|
||||||
texture_cache_->ClearCache();
|
texture_cache_->ClearCache();
|
||||||
|
|
||||||
pipeline_cache_->ClearCache();
|
|
||||||
|
|
||||||
for (auto it : root_signatures_) {
|
for (auto it : root_signatures_) {
|
||||||
it.second->Release();
|
it.second->Release();
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,9 +36,11 @@ namespace d3d12 {
|
||||||
#include "xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h"
|
#include "xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h"
|
||||||
|
|
||||||
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
|
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
|
||||||
RegisterFile* register_file)
|
RegisterFile* register_file, bool edram_rov_used)
|
||||||
: command_processor_(command_processor), register_file_(register_file) {
|
: command_processor_(command_processor),
|
||||||
shader_translator_ = std::make_unique<DxbcShaderTranslator>();
|
register_file_(register_file),
|
||||||
|
edram_rov_used_(edram_rov_used) {
|
||||||
|
shader_translator_ = std::make_unique<DxbcShaderTranslator>(edram_rov_used_);
|
||||||
|
|
||||||
// Set pipeline state description values we never change.
|
// Set pipeline state description values we never change.
|
||||||
// Zero out tessellation, stream output, blend state and formats for render
|
// Zero out tessellation, stream output, blend state and formats for render
|
||||||
|
|
|
@ -35,7 +35,7 @@ class PipelineCache {
|
||||||
};
|
};
|
||||||
|
|
||||||
PipelineCache(D3D12CommandProcessor* command_processor,
|
PipelineCache(D3D12CommandProcessor* command_processor,
|
||||||
RegisterFile* register_file);
|
RegisterFile* register_file, bool edram_rov_used);
|
||||||
~PipelineCache();
|
~PipelineCache();
|
||||||
|
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
|
@ -85,6 +85,9 @@ class PipelineCache {
|
||||||
D3D12CommandProcessor* command_processor_;
|
D3D12CommandProcessor* command_processor_;
|
||||||
RegisterFile* register_file_;
|
RegisterFile* register_file_;
|
||||||
|
|
||||||
|
// Whether the output merger is emulated in pixel shaders.
|
||||||
|
bool edram_rov_used_;
|
||||||
|
|
||||||
// Reusable shader translator.
|
// Reusable shader translator.
|
||||||
std::unique_ptr<DxbcShaderTranslator> shader_translator_ = nullptr;
|
std::unique_ptr<DxbcShaderTranslator> shader_translator_ = nullptr;
|
||||||
// All loaded shaders mapped by their guest hash key.
|
// All loaded shaders mapped by their guest hash key.
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
|
|
||||||
#include "xenia/gpu/d3d12/render_target_cache.h"
|
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||||
|
|
||||||
|
#include <gflags/gflags.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
@ -23,6 +25,10 @@
|
||||||
#include "xenia/gpu/texture_util.h"
|
#include "xenia/gpu/texture_util.h"
|
||||||
#include "xenia/ui/d3d12/d3d12_util.h"
|
#include "xenia/ui/d3d12/d3d12_util.h"
|
||||||
|
|
||||||
|
DEFINE_bool(d3d12_rov, false,
|
||||||
|
"Use rasterizer-ordered views for render target emulation where "
|
||||||
|
"available (experimental and currently largely unimplemented).");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
@ -330,6 +336,14 @@ void RenderTargetCache::ClearCache() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RenderTargetCache::IsROVUsedForEDRAM() const {
|
||||||
|
if (!FLAGS_d3d12_rov) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
|
return provider->AreRasterizerOrderedViewsSupported();
|
||||||
|
}
|
||||||
|
|
||||||
void RenderTargetCache::BeginFrame() {
|
void RenderTargetCache::BeginFrame() {
|
||||||
ClearBindings();
|
ClearBindings();
|
||||||
|
|
||||||
|
|
|
@ -185,6 +185,38 @@ class D3D12CommandProcessor;
|
||||||
// multisampled surface is the same as a single-sampled surface with 2x height
|
// multisampled surface is the same as a single-sampled surface with 2x height
|
||||||
// and width - however, format size doesn't effect the dimensions. Surface pitch
|
// and width - however, format size doesn't effect the dimensions. Surface pitch
|
||||||
// in the surface info register is single-sampled.
|
// in the surface info register is single-sampled.
|
||||||
|
//
|
||||||
|
// =============================================================================
|
||||||
|
// Rasterizer-ordered view usage:
|
||||||
|
// =============================================================================
|
||||||
|
//
|
||||||
|
// There is a separate output merger emulation path currently in development,
|
||||||
|
// using rasterizer-ordered views for writing directly to the 10 MB EDRAM buffer
|
||||||
|
// instead of the host output merger for render target output.
|
||||||
|
//
|
||||||
|
// The convential method of implementing Xenos render targets via host render
|
||||||
|
// targets has various flaws that may be impossible to fix:
|
||||||
|
// - k_16_16 and k_16_16_16_16 have -32...32 range on Xenos, but there's no
|
||||||
|
// equivalent format on PC APIs. They may be emulated using snorm16 (by
|
||||||
|
// dividing shader color output by 32) or float32, however, blending behaves
|
||||||
|
// incorrectly for both. In the former case, multiplicative blending may not
|
||||||
|
// work correctly - 1 becomes 1/32, and instead of 1 * 1 = 1, you get
|
||||||
|
// 1/32 * 1/32 = 1/1024. For 32-bit floats, additive blending result may go up
|
||||||
|
// to infinity.
|
||||||
|
// - k_2_10_10_10_FLOAT has similar blending issues, though less prominent, when
|
||||||
|
// emulated via float16 render targets. In addition to a greater range for
|
||||||
|
// RGB (values can go up to 65504 and infinity rather than 31.875), alpha is
|
||||||
|
// represented totally differently - in k_2_10_10_10_FLOAT, it may have only
|
||||||
|
// 4 values, and adding, for example, 0.1 to 0.333 will still result in 0.333,
|
||||||
|
// while with float16, it will be increasing, and the limit is infinity.
|
||||||
|
// - Due to simultaneously bound host render targets being independent from each
|
||||||
|
// other, and because the height is unknown (and the viewport and scissor are
|
||||||
|
// not always present - D3DPT_RECTLIST is used very commonly, especially for
|
||||||
|
// clearing (Direct3D 9 Clear is implemented this way on the Xbox 360) and
|
||||||
|
// copying, and it's usually drawn without a viewport and with 8192x8192
|
||||||
|
// scissor), there may be cases of simulatenously bound render targets
|
||||||
|
// overlapping each other in the EDRAM in a way that is difficult to resolve,
|
||||||
|
// and stores/loads may destroy data.
|
||||||
class RenderTargetCache {
|
class RenderTargetCache {
|
||||||
public:
|
public:
|
||||||
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
|
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
|
||||||
|
@ -208,6 +240,11 @@ class RenderTargetCache {
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
void ClearCache();
|
void ClearCache();
|
||||||
|
|
||||||
|
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
|
||||||
|
// and blending performed in pixel shaders be used instead of host render
|
||||||
|
// targets.
|
||||||
|
bool IsROVUsedForEDRAM() const;
|
||||||
|
|
||||||
void BeginFrame();
|
void BeginFrame();
|
||||||
// Called in the beginning of a draw call - may bind pipelines.
|
// Called in the beginning of a draw call - may bind pipelines.
|
||||||
bool UpdateRenderTargets(const D3D12Shader* pixel_shader);
|
bool UpdateRenderTargets(const D3D12Shader* pixel_shader);
|
||||||
|
|
|
@ -62,7 +62,8 @@ using namespace ucode;
|
||||||
// second buffer in the descriptor array at b2, which is assigned to CB1, the
|
// second buffer in the descriptor array at b2, which is assigned to CB1, the
|
||||||
// index would be CB1[3][0].
|
// index would be CB1[3][0].
|
||||||
|
|
||||||
DxbcShaderTranslator::DxbcShaderTranslator() {
|
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rovs_used)
|
||||||
|
: edram_rovs_used_(edram_rovs_used) {
|
||||||
// Don't allocate again and again for the first shader.
|
// Don't allocate again and again for the first shader.
|
||||||
shader_code_.reserve(8192);
|
shader_code_.reserve(8192);
|
||||||
shader_object_.reserve(16384);
|
shader_object_.reserve(16384);
|
||||||
|
|
|
@ -23,7 +23,7 @@ namespace gpu {
|
||||||
// Generates shader model 5_1 byte code (for Direct3D 12).
|
// Generates shader model 5_1 byte code (for Direct3D 12).
|
||||||
class DxbcShaderTranslator : public ShaderTranslator {
|
class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
public:
|
public:
|
||||||
DxbcShaderTranslator();
|
DxbcShaderTranslator(bool edram_rovs_used);
|
||||||
~DxbcShaderTranslator() override;
|
~DxbcShaderTranslator() override;
|
||||||
|
|
||||||
// Constant buffer bindings in space 0.
|
// Constant buffer bindings in space 0.
|
||||||
|
@ -431,6 +431,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// generated in the end of translation.
|
// generated in the end of translation.
|
||||||
std::vector<uint32_t> shader_object_;
|
std::vector<uint32_t> shader_object_;
|
||||||
|
|
||||||
|
// Whether the output merger should be emulated in pixel shaders.
|
||||||
|
bool edram_rovs_used_;
|
||||||
|
|
||||||
// Data types used in constants buffers. Listed in dependency order.
|
// Data types used in constants buffers. Listed in dependency order.
|
||||||
enum class RdefTypeIndex {
|
enum class RdefTypeIndex {
|
||||||
kFloat,
|
kFloat,
|
||||||
|
|
|
@ -150,12 +150,13 @@ bool D3D12Provider::Initialize() {
|
||||||
descriptor_size_dsv_ =
|
descriptor_size_dsv_ =
|
||||||
device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
||||||
|
|
||||||
// Check if tiled resources and programmable sample positions (programmable
|
// Check if optional features are supported.
|
||||||
// sample positions added in Creators Update) are supported.
|
rasterizer_ordered_views_supported_ = false;
|
||||||
tiled_resources_tier_ = 0;
|
tiled_resources_tier_ = 0;
|
||||||
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
|
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
|
||||||
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS,
|
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS,
|
||||||
&options, sizeof(options)))) {
|
&options, sizeof(options)))) {
|
||||||
|
rasterizer_ordered_views_supported_ = options.ROVsSupported ? true : false;
|
||||||
tiled_resources_tier_ = uint32_t(options.TiledResourcesTier);
|
tiled_resources_tier_ = uint32_t(options.TiledResourcesTier);
|
||||||
}
|
}
|
||||||
programmable_sample_positions_tier_ = 0;
|
programmable_sample_positions_tier_ = 0;
|
||||||
|
@ -167,8 +168,9 @@ bool D3D12Provider::Initialize() {
|
||||||
}
|
}
|
||||||
XELOGD3D(
|
XELOGD3D(
|
||||||
"Direct3D 12 device supports tiled resources tier %u, programmable "
|
"Direct3D 12 device supports tiled resources tier %u, programmable "
|
||||||
"sample positions tier %u",
|
"sample positions tier %u; rasterizer-ordered views %ssupported",
|
||||||
tiled_resources_tier_, programmable_sample_positions_tier_);
|
tiled_resources_tier_, programmable_sample_positions_tier_,
|
||||||
|
rasterizer_ordered_views_supported_ ? "" : "un");
|
||||||
|
|
||||||
// Get the graphics analysis interface, will silently fail if PIX not
|
// Get the graphics analysis interface, will silently fail if PIX not
|
||||||
// attached.
|
// attached.
|
||||||
|
|
|
@ -62,6 +62,9 @@ class D3D12Provider : public GraphicsProvider {
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AreRasterizerOrderedViewsSupported() const {
|
||||||
|
return rasterizer_ordered_views_supported_;
|
||||||
|
}
|
||||||
uint32_t GetTiledResourcesTier() const { return tiled_resources_tier_; }
|
uint32_t GetTiledResourcesTier() const { return tiled_resources_tier_; }
|
||||||
uint32_t GetProgrammableSamplePositionsTier() const {
|
uint32_t GetProgrammableSamplePositionsTier() const {
|
||||||
return programmable_sample_positions_tier_;
|
return programmable_sample_positions_tier_;
|
||||||
|
@ -82,6 +85,7 @@ class D3D12Provider : public GraphicsProvider {
|
||||||
uint32_t descriptor_size_rtv_;
|
uint32_t descriptor_size_rtv_;
|
||||||
uint32_t descriptor_size_dsv_;
|
uint32_t descriptor_size_dsv_;
|
||||||
|
|
||||||
|
bool rasterizer_ordered_views_supported_;
|
||||||
uint32_t tiled_resources_tier_;
|
uint32_t tiled_resources_tier_;
|
||||||
uint32_t programmable_sample_positions_tier_;
|
uint32_t programmable_sample_positions_tier_;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue