[D3D12] Resolve shader and draw
This commit is contained in:
parent
dd17cd3f9f
commit
4a747b3b81
|
@ -469,13 +469,39 @@ void D3D12CommandProcessor::ReleaseScratchGPUBuffer(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D12CommandProcessor::SetPipeline(ID3D12PipelineState* pipeline) {
|
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
|
||||||
if (current_pipeline_ != pipeline) {
|
if (current_pipeline_ != pipeline) {
|
||||||
GetCurrentCommandList()->SetPipelineState(pipeline);
|
GetCurrentCommandList()->SetPipelineState(pipeline);
|
||||||
current_pipeline_ = pipeline;
|
current_pipeline_ = pipeline;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::UnbindRenderTargets() {
|
||||||
|
render_target_cache_->UnbindRenderTargets();
|
||||||
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::SetExternalGraphicsPipeline(
|
||||||
|
ID3D12PipelineState* pipeline, bool reset_viewport, bool reset_blend_factor,
|
||||||
|
bool reset_stencil_ref) {
|
||||||
|
if (current_pipeline_ != pipeline) {
|
||||||
|
GetCurrentCommandList()->SetPipelineState(pipeline);
|
||||||
|
current_pipeline_ = pipeline;
|
||||||
|
}
|
||||||
|
current_graphics_root_signature_ = nullptr;
|
||||||
|
current_graphics_root_up_to_date_ = 0;
|
||||||
|
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
||||||
|
if (reset_viewport) {
|
||||||
|
ff_viewport_update_needed_ = true;
|
||||||
|
ff_scissor_update_needed_ = true;
|
||||||
|
}
|
||||||
|
if (reset_blend_factor) {
|
||||||
|
ff_blend_factor_update_needed_ = true;
|
||||||
|
}
|
||||||
|
if (reset_stencil_ref) {
|
||||||
|
ff_stencil_ref_update_needed_ = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::SetupContext() {
|
bool D3D12CommandProcessor::SetupContext() {
|
||||||
if (!CommandProcessor::SetupContext()) {
|
if (!CommandProcessor::SetupContext()) {
|
||||||
XELOGE("Failed to initialize base command processor context");
|
XELOGE("Failed to initialize base command processor context");
|
||||||
|
@ -898,7 +924,10 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
UpdateFixedFunctionState(command_list);
|
UpdateFixedFunctionState(command_list);
|
||||||
|
|
||||||
// Bind the pipeline.
|
// Bind the pipeline.
|
||||||
SetPipeline(pipeline);
|
if (current_pipeline_ != pipeline) {
|
||||||
|
GetCurrentCommandList()->SetPipelineState(pipeline);
|
||||||
|
current_pipeline_ = pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
// Update system constants before uploading them.
|
// Update system constants before uploading them.
|
||||||
UpdateSystemConstantValues(
|
UpdateSystemConstantValues(
|
||||||
|
@ -966,7 +995,8 @@ bool D3D12CommandProcessor::IssueCopy() {
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
BeginFrame();
|
BeginFrame();
|
||||||
return render_target_cache_->Resolve(shared_memory_.get(), memory_);
|
return render_target_cache_->Resolve(shared_memory_.get(),
|
||||||
|
texture_cache_.get(), memory_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::BeginFrame() {
|
bool D3D12CommandProcessor::BeginFrame() {
|
||||||
|
|
|
@ -86,9 +86,23 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
void ReleaseScratchGPUBuffer(ID3D12Resource* buffer,
|
void ReleaseScratchGPUBuffer(ID3D12Resource* buffer,
|
||||||
D3D12_RESOURCE_STATES new_state);
|
D3D12_RESOURCE_STATES new_state);
|
||||||
|
|
||||||
// Sets the current pipeline state - may be called internally or externally.
|
// Sets the current pipeline state to a compute pipeline. This is for cache
|
||||||
// This is for cache invalidation primarily. A frame must be open.
|
// invalidation primarily. A frame must be open.
|
||||||
void SetPipeline(ID3D12PipelineState* pipeline);
|
void SetComputePipeline(ID3D12PipelineState* pipeline);
|
||||||
|
|
||||||
|
// Stores and unbinds render targets before binding changing render targets
|
||||||
|
// externally. This is separate from SetExternalGraphicsPipeline because it
|
||||||
|
// causes computations to be dispatched, and the scratch buffer may also be
|
||||||
|
// used.
|
||||||
|
void UnbindRenderTargets();
|
||||||
|
|
||||||
|
// Sets the current pipeline state to a special drawing pipeline, invalidating
|
||||||
|
// various cached state variables. UnbindRenderTargets may be needed before
|
||||||
|
// calling this. A frame must be open.
|
||||||
|
void SetExternalGraphicsPipeline(ID3D12PipelineState* pipeline,
|
||||||
|
bool reset_viewport = true,
|
||||||
|
bool reset_blend_factor = false,
|
||||||
|
bool reset_stencil_ref = false);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool SetupContext() override;
|
bool SetupContext() override;
|
||||||
|
|
|
@ -38,6 +38,8 @@ namespace d3d12 {
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_float_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_float_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_unorm_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_unorm_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/edram_tile_sample_32bpp_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/edram_tile_sample_32bpp_cs.h"
|
||||||
|
#include "xenia/gpu/d3d12/shaders/bin/resolve_ps.h"
|
||||||
|
#include "xenia/gpu/d3d12/shaders/bin/resolve_vs.h"
|
||||||
|
|
||||||
const RenderTargetCache::EDRAMLoadStoreModeInfo
|
const RenderTargetCache::EDRAMLoadStoreModeInfo
|
||||||
RenderTargetCache::edram_load_store_mode_info_[size_t(
|
RenderTargetCache::edram_load_store_mode_info_[size_t(
|
||||||
|
@ -98,63 +100,67 @@ bool RenderTargetCache::Initialize() {
|
||||||
edram_buffer_cleared_ = false;
|
edram_buffer_cleared_ = false;
|
||||||
|
|
||||||
// Create the root signature for EDRAM buffer load/store.
|
// Create the root signature for EDRAM buffer load/store.
|
||||||
D3D12_ROOT_PARAMETER root_parameters[2];
|
D3D12_ROOT_PARAMETER load_store_root_parameters[2];
|
||||||
// Parameter 0 is constants (changed for each render target binding).
|
// Parameter 0 is constants (changed for each render target binding).
|
||||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
load_store_root_parameters[0].ParameterType =
|
||||||
root_parameters[0].Constants.ShaderRegister = 0;
|
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||||
root_parameters[0].Constants.RegisterSpace = 0;
|
load_store_root_parameters[0].Constants.ShaderRegister = 0;
|
||||||
root_parameters[0].Constants.Num32BitValues =
|
load_store_root_parameters[0].Constants.RegisterSpace = 0;
|
||||||
|
load_store_root_parameters[0].Constants.Num32BitValues =
|
||||||
sizeof(EDRAMLoadStoreRootConstants) / sizeof(uint32_t);
|
sizeof(EDRAMLoadStoreRootConstants) / sizeof(uint32_t);
|
||||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
load_store_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||||
// Parameter 1 is source and target.
|
// Parameter 1 is source and target.
|
||||||
D3D12_DESCRIPTOR_RANGE root_load_store_ranges[2];
|
D3D12_DESCRIPTOR_RANGE load_store_root_ranges[2];
|
||||||
root_load_store_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
load_store_root_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||||
root_load_store_ranges[0].NumDescriptors = 1;
|
load_store_root_ranges[0].NumDescriptors = 1;
|
||||||
root_load_store_ranges[0].BaseShaderRegister = 0;
|
load_store_root_ranges[0].BaseShaderRegister = 0;
|
||||||
root_load_store_ranges[0].RegisterSpace = 0;
|
load_store_root_ranges[0].RegisterSpace = 0;
|
||||||
root_load_store_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
load_store_root_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
||||||
root_load_store_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
load_store_root_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||||
root_load_store_ranges[1].NumDescriptors = 1;
|
load_store_root_ranges[1].NumDescriptors = 1;
|
||||||
root_load_store_ranges[1].BaseShaderRegister = 0;
|
load_store_root_ranges[1].BaseShaderRegister = 0;
|
||||||
root_load_store_ranges[1].RegisterSpace = 0;
|
load_store_root_ranges[1].RegisterSpace = 0;
|
||||||
root_load_store_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
load_store_root_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
||||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
load_store_root_parameters[1].ParameterType =
|
||||||
root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||||
root_parameters[1].DescriptorTable.pDescriptorRanges = root_load_store_ranges;
|
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
|
||||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
load_store_root_parameters[1].DescriptorTable.pDescriptorRanges =
|
||||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
load_store_root_ranges;
|
||||||
root_signature_desc.NumParameters = UINT(xe::countof(root_parameters));
|
load_store_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||||
root_signature_desc.pParameters = root_parameters;
|
D3D12_ROOT_SIGNATURE_DESC load_store_root_desc;
|
||||||
root_signature_desc.NumStaticSamplers = 0;
|
load_store_root_desc.NumParameters =
|
||||||
root_signature_desc.pStaticSamplers = nullptr;
|
UINT(xe::countof(load_store_root_parameters));
|
||||||
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
load_store_root_desc.pParameters = load_store_root_parameters;
|
||||||
ID3DBlob* root_signature_blob;
|
load_store_root_desc.NumStaticSamplers = 0;
|
||||||
ID3DBlob* root_signature_error_blob = nullptr;
|
load_store_root_desc.pStaticSamplers = nullptr;
|
||||||
|
load_store_root_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
||||||
|
ID3DBlob* load_store_root_blob;
|
||||||
|
ID3DBlob* load_store_root_error_blob = nullptr;
|
||||||
if (FAILED(D3D12SerializeRootSignature(
|
if (FAILED(D3D12SerializeRootSignature(
|
||||||
&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1,
|
&load_store_root_desc, D3D_ROOT_SIGNATURE_VERSION_1,
|
||||||
&root_signature_blob, &root_signature_error_blob))) {
|
&load_store_root_blob, &load_store_root_error_blob))) {
|
||||||
XELOGE("Failed to serialize the EDRAM buffer load/store root signature");
|
XELOGE("Failed to serialize the EDRAM buffer load/store root signature");
|
||||||
if (root_signature_error_blob != nullptr) {
|
if (load_store_root_error_blob != nullptr) {
|
||||||
XELOGE("%s", reinterpret_cast<const char*>(
|
XELOGE("%s", reinterpret_cast<const char*>(
|
||||||
root_signature_error_blob->GetBufferPointer()));
|
load_store_root_error_blob->GetBufferPointer()));
|
||||||
root_signature_error_blob->Release();
|
load_store_root_error_blob->Release();
|
||||||
}
|
}
|
||||||
Shutdown();
|
Shutdown();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (root_signature_error_blob != nullptr) {
|
if (load_store_root_error_blob != nullptr) {
|
||||||
root_signature_error_blob->Release();
|
load_store_root_error_blob->Release();
|
||||||
}
|
}
|
||||||
if (FAILED(device->CreateRootSignature(
|
if (FAILED(device->CreateRootSignature(
|
||||||
0, root_signature_blob->GetBufferPointer(),
|
0, load_store_root_blob->GetBufferPointer(),
|
||||||
root_signature_blob->GetBufferSize(),
|
load_store_root_blob->GetBufferSize(),
|
||||||
IID_PPV_ARGS(&edram_load_store_root_signature_)))) {
|
IID_PPV_ARGS(&edram_load_store_root_signature_)))) {
|
||||||
XELOGE("Failed to create the EDRAM buffer load/store root signature");
|
XELOGE("Failed to create the EDRAM buffer load/store root signature");
|
||||||
root_signature_blob->Release();
|
load_store_root_blob->Release();
|
||||||
Shutdown();
|
Shutdown();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
root_signature_blob->Release();
|
load_store_root_blob->Release();
|
||||||
|
|
||||||
// Create the load/store pipelines.
|
// Create the load/store pipelines.
|
||||||
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
|
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
|
||||||
|
@ -197,12 +203,94 @@ bool RenderTargetCache::Initialize() {
|
||||||
}
|
}
|
||||||
edram_tile_sample_32bpp_pipeline_->SetName(L"EDRAM Raw Resolve 32bpp");
|
edram_tile_sample_32bpp_pipeline_->SetName(L"EDRAM Raw Resolve 32bpp");
|
||||||
|
|
||||||
|
// Create the converting resolve root signature.
|
||||||
|
D3D12_ROOT_PARAMETER resolve_root_parameters[2];
|
||||||
|
// Parameter 0 is constants.
|
||||||
|
resolve_root_parameters[0].ParameterType =
|
||||||
|
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||||
|
resolve_root_parameters[0].Constants.ShaderRegister = 0;
|
||||||
|
resolve_root_parameters[0].Constants.RegisterSpace = 0;
|
||||||
|
resolve_root_parameters[0].Constants.Num32BitValues =
|
||||||
|
sizeof(ResolveRootConstants) / sizeof(uint32_t);
|
||||||
|
resolve_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
||||||
|
// Parameter 1 is the source render target.
|
||||||
|
D3D12_DESCRIPTOR_RANGE resolve_root_srv_range;
|
||||||
|
resolve_root_srv_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||||
|
resolve_root_srv_range.NumDescriptors = 1;
|
||||||
|
resolve_root_srv_range.BaseShaderRegister = 0;
|
||||||
|
resolve_root_srv_range.RegisterSpace = 0;
|
||||||
|
resolve_root_srv_range.OffsetInDescriptorsFromTableStart = 0;
|
||||||
|
resolve_root_parameters[1].ParameterType =
|
||||||
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||||
|
resolve_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
||||||
|
resolve_root_parameters[1].DescriptorTable.pDescriptorRanges =
|
||||||
|
&resolve_root_srv_range;
|
||||||
|
resolve_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
||||||
|
// Static sampler for resolving AA using bilinear filtering.
|
||||||
|
D3D12_STATIC_SAMPLER_DESC resolve_sampler_desc;
|
||||||
|
resolve_sampler_desc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
|
||||||
|
resolve_sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
||||||
|
resolve_sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
||||||
|
resolve_sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
||||||
|
resolve_sampler_desc.MipLODBias = 0.0f;
|
||||||
|
resolve_sampler_desc.MaxAnisotropy = 1;
|
||||||
|
resolve_sampler_desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
|
||||||
|
resolve_sampler_desc.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK;
|
||||||
|
resolve_sampler_desc.MinLOD = 0.0f;
|
||||||
|
resolve_sampler_desc.MaxLOD = 0.0f;
|
||||||
|
resolve_sampler_desc.ShaderRegister = 0;
|
||||||
|
resolve_sampler_desc.RegisterSpace = 0;
|
||||||
|
resolve_sampler_desc.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
||||||
|
D3D12_ROOT_SIGNATURE_DESC resolve_root_desc;
|
||||||
|
resolve_root_desc.NumParameters = UINT(xe::countof(resolve_root_parameters));
|
||||||
|
resolve_root_desc.pParameters = resolve_root_parameters;
|
||||||
|
resolve_root_desc.NumStaticSamplers = 1;
|
||||||
|
resolve_root_desc.pStaticSamplers = &resolve_sampler_desc;
|
||||||
|
resolve_root_desc.Flags =
|
||||||
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS;
|
||||||
|
ID3DBlob* resolve_root_blob;
|
||||||
|
ID3DBlob* resolve_root_error_blob = nullptr;
|
||||||
|
if (FAILED(D3D12SerializeRootSignature(
|
||||||
|
&resolve_root_desc, D3D_ROOT_SIGNATURE_VERSION_1, &resolve_root_blob,
|
||||||
|
&resolve_root_error_blob))) {
|
||||||
|
XELOGE("Failed to serialize the converting resolve root signature");
|
||||||
|
if (resolve_root_error_blob != nullptr) {
|
||||||
|
XELOGE("%s", reinterpret_cast<const char*>(
|
||||||
|
resolve_root_error_blob->GetBufferPointer()));
|
||||||
|
resolve_root_error_blob->Release();
|
||||||
|
}
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (resolve_root_error_blob != nullptr) {
|
||||||
|
resolve_root_error_blob->Release();
|
||||||
|
}
|
||||||
|
if (FAILED(device->CreateRootSignature(
|
||||||
|
0, resolve_root_blob->GetBufferPointer(),
|
||||||
|
resolve_root_blob->GetBufferSize(),
|
||||||
|
IID_PPV_ARGS(&resolve_root_signature_)))) {
|
||||||
|
XELOGE("Failed to create the converting resolve root signature");
|
||||||
|
resolve_root_blob->Release();
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
resolve_root_blob->Release();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderTargetCache::Shutdown() {
|
void RenderTargetCache::Shutdown() {
|
||||||
ClearCache();
|
ClearCache();
|
||||||
|
|
||||||
|
for (auto& resolve_pipeline : resolve_pipelines_) {
|
||||||
|
resolve_pipeline.pipeline->Release();
|
||||||
|
}
|
||||||
|
resolve_pipelines_.clear();
|
||||||
|
if (resolve_root_signature_ != nullptr) {
|
||||||
|
resolve_root_signature_->Release();
|
||||||
|
resolve_root_signature_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
if (edram_tile_sample_32bpp_pipeline_ != nullptr) {
|
if (edram_tile_sample_32bpp_pipeline_ != nullptr) {
|
||||||
edram_tile_sample_32bpp_pipeline_->Release();
|
edram_tile_sample_32bpp_pipeline_->Release();
|
||||||
edram_tile_sample_32bpp_pipeline_ = nullptr;
|
edram_tile_sample_32bpp_pipeline_ = nullptr;
|
||||||
|
@ -229,11 +317,16 @@ void RenderTargetCache::Shutdown() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderTargetCache::ClearCache() {
|
void RenderTargetCache::ClearCache() {
|
||||||
|
for (auto resolve_target_pair : resolve_targets_) {
|
||||||
|
ResolveTarget* resolve_target = resolve_target_pair.second;
|
||||||
|
resolve_target->resource->Release();
|
||||||
|
delete resolve_target;
|
||||||
|
}
|
||||||
|
resolve_targets_.clear();
|
||||||
|
|
||||||
for (auto render_target_pair : render_targets_) {
|
for (auto render_target_pair : render_targets_) {
|
||||||
RenderTarget* render_target = render_target_pair.second;
|
RenderTarget* render_target = render_target_pair.second;
|
||||||
if (render_target->resource != nullptr) {
|
render_target->resource->Release();
|
||||||
render_target->resource->Release();
|
|
||||||
}
|
|
||||||
delete render_target;
|
delete render_target;
|
||||||
}
|
}
|
||||||
render_targets_.clear();
|
render_targets_.clear();
|
||||||
|
@ -721,7 +814,8 @@ bool RenderTargetCache::UpdateRenderTargets() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
|
bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
||||||
|
TextureCache* texture_cache, Memory* memory) {
|
||||||
// Save the currently bound render targets to the EDRAM buffer that will be
|
// Save the currently bound render targets to the EDRAM buffer that will be
|
||||||
// used as the resolve source and clear bindings to allow render target
|
// used as the resolve source and clear bindings to allow render target
|
||||||
// resources to be reused as source textures for format conversion, resolving
|
// resources to be reused as source textures for format conversion, resolving
|
||||||
|
@ -823,14 +917,15 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
|
||||||
msaa_samples != MsaaSamples::k1X ? "s" : "", surface_format,
|
msaa_samples != MsaaSamples::k1X ? "s" : "", surface_format,
|
||||||
surface_edram_base);
|
surface_edram_base);
|
||||||
|
|
||||||
bool copied =
|
bool copied = ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
||||||
ResolveCopy(shared_memory, surface_edram_base, surface_pitch,
|
surface_pitch, msaa_samples, surface_is_depth,
|
||||||
msaa_samples, surface_is_depth, surface_format, src_rect);
|
surface_format, src_rect);
|
||||||
// TODO(Triang3l): Clear.
|
// TODO(Triang3l): Clear.
|
||||||
return copied;
|
return copied;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
|
TextureCache* texture_cache,
|
||||||
uint32_t edram_base, uint32_t surface_pitch,
|
uint32_t edram_base, uint32_t surface_pitch,
|
||||||
MsaaSamples msaa_samples, bool is_depth,
|
MsaaSamples msaa_samples, bool is_depth,
|
||||||
uint32_t src_format,
|
uint32_t src_format,
|
||||||
|
@ -954,6 +1049,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
auto descriptor_size_view = provider->GetDescriptorSizeView();
|
auto descriptor_size_view = provider->GetDescriptorSizeView();
|
||||||
if (sample_select <= xenos::CopySampleSelect::k3 &&
|
if (sample_select <= xenos::CopySampleSelect::k3 &&
|
||||||
src_texture_format == dest_format && dest_exp_bias == 0) {
|
src_texture_format == dest_format && dest_exp_bias == 0) {
|
||||||
|
// *************************************************************************
|
||||||
|
// Raw copy
|
||||||
|
// *************************************************************************
|
||||||
XELOGGPU("Resolving a single sample without conversion");
|
XELOGGPU("Resolving a single sample without conversion");
|
||||||
if (src_64bpp) {
|
if (src_64bpp) {
|
||||||
// TODO(Triang3l): 64bpp sample copy shader.
|
// TODO(Triang3l): 64bpp sample copy shader.
|
||||||
|
@ -997,8 +1095,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
// Dispatch the computation.
|
// Dispatch the computation.
|
||||||
command_list->SetComputeRootSignature(edram_load_store_root_signature_);
|
command_list->SetComputeRootSignature(edram_load_store_root_signature_);
|
||||||
EDRAMLoadStoreRootConstants root_constants;
|
EDRAMLoadStoreRootConstants root_constants;
|
||||||
root_constants.tile_sample_rect_tl = copy_rect.left | (copy_rect.top << 16);
|
root_constants.tile_sample_rect_lt = copy_rect.left | (copy_rect.top << 16);
|
||||||
root_constants.tile_sample_rect_br =
|
root_constants.tile_sample_rect_rb =
|
||||||
copy_rect.right | (copy_rect.bottom << 16);
|
copy_rect.right | (copy_rect.bottom << 16);
|
||||||
root_constants.tile_sample_dest_base = dest_address;
|
root_constants.tile_sample_dest_base = dest_address;
|
||||||
assert_true(dest_pitch <= 8192);
|
assert_true(dest_pitch <= 8192);
|
||||||
|
@ -1036,7 +1134,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||||
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||||
// TODO(Triang3l): 64bpp pipeline.
|
// TODO(Triang3l): 64bpp pipeline.
|
||||||
command_processor_->SetPipeline(edram_tile_sample_32bpp_pipeline_);
|
command_processor_->SetComputePipeline(edram_tile_sample_32bpp_pipeline_);
|
||||||
// 1 group per destination 80x16 (32bpp) / 80x8 (64bpp) region.
|
// 1 group per destination 80x16 (32bpp) / 80x8 (64bpp) region.
|
||||||
uint32_t group_count_x = row_tiles, group_count_y = rows;
|
uint32_t group_count_x = row_tiles, group_count_y = rows;
|
||||||
if (msaa_samples >= MsaaSamples::k2X) {
|
if (msaa_samples >= MsaaSamples::k2X) {
|
||||||
|
@ -1053,14 +1151,426 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
// Make the texture cache refresh the data.
|
// Make the texture cache refresh the data.
|
||||||
shared_memory->RangeWrittenByGPU(dest_address, dest_size);
|
shared_memory->RangeWrittenByGPU(dest_address, dest_size);
|
||||||
} else {
|
} else {
|
||||||
|
// *************************************************************************
|
||||||
|
// Conversion and AA resolving
|
||||||
|
// *************************************************************************
|
||||||
XELOGGPU("Resolving with a pixel shader");
|
XELOGGPU("Resolving with a pixel shader");
|
||||||
// TODO(Triang3l): Conversion.
|
|
||||||
return false;
|
// Get everything we need for the conversion.
|
||||||
|
|
||||||
|
// DXGI format (also checking whether this resolve is possible).
|
||||||
|
DXGI_FORMAT dest_dxgi_format =
|
||||||
|
texture_cache->GetResolveDXGIFormat(dest_format);
|
||||||
|
if (dest_dxgi_format == DXGI_FORMAT_UNKNOWN) {
|
||||||
|
XELOGE(
|
||||||
|
"No resolve pipeline for destination format %s - tell Xenia "
|
||||||
|
"developers!",
|
||||||
|
FormatInfo::Get(dest_format)->name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Resolve pipeline.
|
||||||
|
ID3D12PipelineState* resolve_pipeline =
|
||||||
|
GetResolvePipeline(dest_dxgi_format);
|
||||||
|
if (resolve_pipeline == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
RenderTargetKey render_target_key;
|
||||||
|
render_target_key.width_ss_div_80 = row_tiles >> (src_64bpp ? 1 : 0);
|
||||||
|
render_target_key.height_ss_div_16 = rows;
|
||||||
|
render_target_key.is_depth = false;
|
||||||
|
render_target_key.format = src_format;
|
||||||
|
// Render target for loading the EDRAM buffer contents as a texture.
|
||||||
|
RenderTarget* render_target =
|
||||||
|
FindOrCreateRenderTarget(render_target_key, 0);
|
||||||
|
if (render_target == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint =
|
||||||
|
render_target->footprints[0];
|
||||||
|
// Size of the resolved area.
|
||||||
|
uint32_t copy_width = copy_rect.right - copy_rect.left;
|
||||||
|
uint32_t copy_height = copy_rect.bottom - copy_rect.top;
|
||||||
|
// Resolve target for output merger format conversion.
|
||||||
|
ResolveTarget* resolve_target =
|
||||||
|
FindOrCreateResolveTarget(copy_width, copy_height, dest_dxgi_format,
|
||||||
|
render_target->heap_page_count);
|
||||||
|
if (resolve_target == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Descriptors. 2 for EDRAM load, 1 for conversion.
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||||
|
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||||
|
if (command_processor_->RequestViewDescriptors(
|
||||||
|
0, 3, 3, descriptor_cpu_start, descriptor_gpu_start) == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Buffer for copying.
|
||||||
|
D3D12_RESOURCE_STATES copy_buffer_state =
|
||||||
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
|
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
|
||||||
|
render_target->copy_buffer_size, copy_buffer_state);
|
||||||
|
if (copy_buffer == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load the EDRAM buffer contents to the copy buffer.
|
||||||
|
|
||||||
|
command_processor_->PushTransitionBarrier(
|
||||||
|
edram_buffer_, edram_buffer_state_,
|
||||||
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
||||||
|
edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
|
|
||||||
|
command_list->SetComputeRootSignature(edram_load_store_root_signature_);
|
||||||
|
|
||||||
|
EDRAMLoadStoreRootConstants load_root_constants;
|
||||||
|
load_root_constants.rt_color_depth_offset = uint32_t(footprint.Offset);
|
||||||
|
load_root_constants.rt_color_depth_pitch =
|
||||||
|
uint32_t(footprint.Footprint.RowPitch);
|
||||||
|
load_root_constants.base_pitch_tiles =
|
||||||
|
edram_base | (surface_pitch_tiles << 11);
|
||||||
|
command_list->SetComputeRoot32BitConstants(
|
||||||
|
0, sizeof(load_root_constants) / sizeof(uint32_t), &load_root_constants,
|
||||||
|
0);
|
||||||
|
|
||||||
|
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
|
||||||
|
srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||||
|
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
||||||
|
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||||
|
srv_desc.Buffer.FirstElement = 0;
|
||||||
|
srv_desc.Buffer.NumElements = 2048 * 1280;
|
||||||
|
srv_desc.Buffer.StructureByteStride = 0;
|
||||||
|
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
|
||||||
|
device->CreateShaderResourceView(edram_buffer_, &srv_desc,
|
||||||
|
descriptor_cpu_start);
|
||||||
|
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||||
|
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||||
|
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||||
|
uav_desc.Buffer.FirstElement = 0;
|
||||||
|
uav_desc.Buffer.NumElements = render_target->copy_buffer_size >> 2;
|
||||||
|
uav_desc.Buffer.StructureByteStride = 0;
|
||||||
|
uav_desc.Buffer.CounterOffsetInBytes = 0;
|
||||||
|
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE copy_buffer_cpu_handle;
|
||||||
|
copy_buffer_cpu_handle.ptr =
|
||||||
|
descriptor_cpu_start.ptr + descriptor_size_view;
|
||||||
|
device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc,
|
||||||
|
copy_buffer_cpu_handle);
|
||||||
|
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||||
|
|
||||||
|
command_processor_->SetComputePipeline(
|
||||||
|
edram_load_pipelines_[size_t(GetLoadStoreMode(false, src_format))]);
|
||||||
|
command_list->Dispatch(row_tiles, rows, 1);
|
||||||
|
command_processor_->PushUAVBarrier(copy_buffer);
|
||||||
|
|
||||||
|
// Go to the next descriptor set.
|
||||||
|
|
||||||
|
descriptor_cpu_start.ptr += 2 * descriptor_size_view;
|
||||||
|
descriptor_gpu_start.ptr += 2 * descriptor_size_view;
|
||||||
|
|
||||||
|
// Copy the EDRAM buffer contents to the source texture.
|
||||||
|
|
||||||
|
command_processor_->PushAliasingBarrier(nullptr, render_target->resource);
|
||||||
|
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
|
||||||
|
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||||
|
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||||
|
command_processor_->PushTransitionBarrier(render_target->resource,
|
||||||
|
render_target->state,
|
||||||
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
||||||
|
render_target->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
||||||
|
location_source.pResource = copy_buffer;
|
||||||
|
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
||||||
|
location_source.PlacedFootprint = render_target->footprints[0];
|
||||||
|
location_dest.pResource = render_target->resource;
|
||||||
|
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
||||||
|
location_dest.SubresourceIndex = 0;
|
||||||
|
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
|
||||||
|
nullptr);
|
||||||
|
|
||||||
|
// Done with the copy buffer.
|
||||||
|
|
||||||
|
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
||||||
|
|
||||||
|
// Do the resolve. Render targets unbound already, safe to call
|
||||||
|
// OMSetRenderTargets.
|
||||||
|
|
||||||
|
command_processor_->PushAliasingBarrier(nullptr, resolve_target->resource);
|
||||||
|
command_processor_->PushTransitionBarrier(
|
||||||
|
render_target->resource, render_target->state,
|
||||||
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||||
|
render_target->state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
||||||
|
command_processor_->PushTransitionBarrier(
|
||||||
|
resolve_target->resource, resolve_target->state,
|
||||||
|
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||||
|
resolve_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
||||||
|
|
||||||
|
command_list->SetGraphicsRootSignature(resolve_root_signature_);
|
||||||
|
|
||||||
|
ResolveRootConstants resolve_root_constants;
|
||||||
|
uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0;
|
||||||
|
uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0;
|
||||||
|
resolve_root_constants.rect_samples_lw =
|
||||||
|
(copy_rect.left << samples_x_log2) |
|
||||||
|
(copy_width << (16 + samples_x_log2));
|
||||||
|
resolve_root_constants.rect_samples_th =
|
||||||
|
(copy_rect.top << samples_y_log2) |
|
||||||
|
(copy_height << (16 + samples_y_log2));
|
||||||
|
resolve_root_constants.source_size =
|
||||||
|
(render_target_key.width_ss_div_80 * 80) |
|
||||||
|
(render_target_key.height_ss_div_16 << (4 + 16));
|
||||||
|
resolve_root_constants.resolve_info =
|
||||||
|
samples_y_log2 | (samples_x_log2 << 1) |
|
||||||
|
((uint32_t(dest_exp_bias) & 0x3F) << 6);
|
||||||
|
if (msaa_samples == MsaaSamples::k1X) {
|
||||||
|
// No offset.
|
||||||
|
resolve_root_constants.resolve_info |= (1 << 2) | (1 << 4);
|
||||||
|
} else if (msaa_samples == MsaaSamples::k2X) {
|
||||||
|
// -0.5 or +0.5 samples vertical offset if getting only one sample.
|
||||||
|
if (sample_select == xenos::CopySampleSelect::k0) {
|
||||||
|
resolve_root_constants.resolve_info |= (0 << 2) | (1 << 4);
|
||||||
|
} else if (sample_select == xenos::CopySampleSelect::k1) {
|
||||||
|
resolve_root_constants.resolve_info |= (2 << 2) | (1 << 4);
|
||||||
|
} else {
|
||||||
|
resolve_root_constants.resolve_info |= (1 << 2) | (1 << 4);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// -0.5 or +0.5 samples offsets if getting one or two samples.
|
||||||
|
switch (sample_select) {
|
||||||
|
case xenos::CopySampleSelect::k0:
|
||||||
|
resolve_root_constants.resolve_info |= (0 << 2) | (0 << 4);
|
||||||
|
break;
|
||||||
|
case xenos::CopySampleSelect::k1:
|
||||||
|
resolve_root_constants.resolve_info |= (2 << 2) | (0 << 4);
|
||||||
|
break;
|
||||||
|
case xenos::CopySampleSelect::k2:
|
||||||
|
resolve_root_constants.resolve_info |= (0 << 2) | (2 << 4);
|
||||||
|
break;
|
||||||
|
case xenos::CopySampleSelect::k3:
|
||||||
|
resolve_root_constants.resolve_info |= (2 << 2) | (2 << 4);
|
||||||
|
break;
|
||||||
|
case xenos::CopySampleSelect::k01:
|
||||||
|
resolve_root_constants.resolve_info |= (1 << 2) | (0 << 4);
|
||||||
|
break;
|
||||||
|
case xenos::CopySampleSelect::k23:
|
||||||
|
resolve_root_constants.resolve_info |= (1 << 2) | (2 << 4);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
resolve_root_constants.resolve_info |= (1 << 2) | (1 << 4);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
command_list->SetGraphicsRoot32BitConstants(
|
||||||
|
0, sizeof(resolve_root_constants) / sizeof(uint32_t),
|
||||||
|
&resolve_root_constants, 0);
|
||||||
|
|
||||||
|
srv_desc.Format = GetColorDXGIFormat(ColorRenderTargetFormat(src_format));
|
||||||
|
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
||||||
|
if (dest_swap) {
|
||||||
|
switch (ColorRenderTargetFormat(src_format)) {
|
||||||
|
case ColorRenderTargetFormat::k_8_8_8_8:
|
||||||
|
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||||
|
case ColorRenderTargetFormat::k_2_10_10_10:
|
||||||
|
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
||||||
|
case ColorRenderTargetFormat::k_16_16_16_16:
|
||||||
|
case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
||||||
|
case ColorRenderTargetFormat::k_2_10_10_10_AS_16_16_16_16:
|
||||||
|
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
||||||
|
srv_desc.Shader4ComponentMapping =
|
||||||
|
D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(2, 1, 0, 3);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
srv_desc.Shader4ComponentMapping =
|
||||||
|
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
srv_desc.Shader4ComponentMapping =
|
||||||
|
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||||
|
}
|
||||||
|
srv_desc.Texture2D.MostDetailedMip = 0;
|
||||||
|
srv_desc.Texture2D.MipLevels = 1;
|
||||||
|
srv_desc.Texture2D.PlaneSlice = 0;
|
||||||
|
srv_desc.Texture2D.ResourceMinLODClamp = 0.0f;
|
||||||
|
device->CreateShaderResourceView(render_target->resource, &srv_desc,
|
||||||
|
descriptor_cpu_start);
|
||||||
|
command_list->SetGraphicsRootDescriptorTable(1, descriptor_gpu_start);
|
||||||
|
|
||||||
|
command_processor_->SetExternalGraphicsPipeline(resolve_pipeline);
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
|
command_list->OMSetRenderTargets(1, &resolve_target->rtv_handle, TRUE,
|
||||||
|
nullptr);
|
||||||
|
D3D12_VIEWPORT viewport;
|
||||||
|
viewport.TopLeftX = 0.0f;
|
||||||
|
viewport.TopLeftY = 0.0f;
|
||||||
|
viewport.Width = float(copy_width);
|
||||||
|
viewport.Height = float(copy_height);
|
||||||
|
viewport.MinDepth = 0.0f;
|
||||||
|
viewport.MaxDepth = 1.0f;
|
||||||
|
command_list->RSSetViewports(1, &viewport);
|
||||||
|
D3D12_RECT scissor;
|
||||||
|
scissor.left = 0;
|
||||||
|
scissor.top = 0;
|
||||||
|
scissor.right = copy_width;
|
||||||
|
scissor.bottom = copy_height;
|
||||||
|
command_list->RSSetScissorRects(1, &scissor);
|
||||||
|
command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||||
|
command_list->DrawInstanced(3, 1, 0, 0);
|
||||||
|
|
||||||
|
// TODO(Triang3l): Tile the resolve target in the texture cache.
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ID3D12PipelineState* RenderTargetCache::GetResolvePipeline(
|
||||||
|
DXGI_FORMAT dest_format) {
|
||||||
|
// Try to find an existing pipeline.
|
||||||
|
for (auto& resolve_pipeline : resolve_pipelines_) {
|
||||||
|
if (resolve_pipeline.dest_format == dest_format) {
|
||||||
|
return resolve_pipeline.pipeline;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create a new pipeline.
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
|
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
|
||||||
|
pipeline_desc.pRootSignature = resolve_root_signature_;
|
||||||
|
pipeline_desc.VS.pShaderBytecode = resolve_vs;
|
||||||
|
pipeline_desc.VS.BytecodeLength = sizeof(resolve_vs);
|
||||||
|
pipeline_desc.PS.pShaderBytecode = resolve_ps;
|
||||||
|
pipeline_desc.PS.BytecodeLength = sizeof(resolve_ps);
|
||||||
|
pipeline_desc.BlendState.RenderTarget[0].RenderTargetWriteMask =
|
||||||
|
D3D12_COLOR_WRITE_ENABLE_ALL;
|
||||||
|
pipeline_desc.SampleMask = UINT_MAX;
|
||||||
|
pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
|
||||||
|
pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||||
|
pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
|
||||||
|
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||||
|
pipeline_desc.NumRenderTargets = 1;
|
||||||
|
pipeline_desc.RTVFormats[0] = dest_format;
|
||||||
|
pipeline_desc.SampleDesc.Count = 1;
|
||||||
|
ID3D12PipelineState* pipeline;
|
||||||
|
if (FAILED(device->CreateGraphicsPipelineState(&pipeline_desc,
|
||||||
|
IID_PPV_ARGS(&pipeline)))) {
|
||||||
|
XELOGE("Failed to create the resolve pipeline for DXGI format %u",
|
||||||
|
dest_format);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
ResolvePipeline new_resolve_pipeline;
|
||||||
|
new_resolve_pipeline.pipeline = pipeline;
|
||||||
|
new_resolve_pipeline.dest_format = dest_format;
|
||||||
|
resolve_pipelines_.push_back(new_resolve_pipeline);
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
|
||||||
|
uint32_t width, uint32_t height, DXGI_FORMAT format,
|
||||||
|
uint32_t min_heap_page_first) {
|
||||||
|
assert_true(min_heap_page_first < kHeap4MBPages * 5);
|
||||||
|
if (width == 0 || height == 0 || width > 8192 || height > 8192) {
|
||||||
|
assert_always();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
ResolveTargetKey key;
|
||||||
|
key.width_div_32 = (width + 31) >> 5;
|
||||||
|
key.height_div_32 = (height + 31) >> 5;
|
||||||
|
key.format = format;
|
||||||
|
|
||||||
|
// Try to find an existing target that isn't overlapping the resolve source.
|
||||||
|
auto found_range = resolve_targets_.equal_range(key.value);
|
||||||
|
for (auto iter = found_range.first; iter != found_range.second; ++iter) {
|
||||||
|
ResolveTarget* found_resolve_target = iter->second;
|
||||||
|
if (found_resolve_target->heap_page_first >= min_heap_page_first) {
|
||||||
|
return found_resolve_target;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure the new resolve target can get an RTV descriptor.
|
||||||
|
if (!EnsureRTVHeapAvailable(false)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate a new resolve target.
|
||||||
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
|
auto device = provider->GetDevice();
|
||||||
|
D3D12_RESOURCE_DESC resource_desc;
|
||||||
|
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||||
|
resource_desc.Alignment = 0;
|
||||||
|
resource_desc.Width = key.width_div_32 << 5;
|
||||||
|
resource_desc.Height = key.height_div_32 << 5;
|
||||||
|
resource_desc.DepthOrArraySize = 1;
|
||||||
|
resource_desc.MipLevels = 1;
|
||||||
|
resource_desc.Format = format;
|
||||||
|
resource_desc.SampleDesc.Count = 1;
|
||||||
|
resource_desc.SampleDesc.Quality = 0;
|
||||||
|
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||||
|
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||||
|
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
|
||||||
|
device->GetResourceAllocationInfo(0, 1, &resource_desc);
|
||||||
|
uint32_t heap_page_count =
|
||||||
|
(uint32_t(allocation_info.SizeInBytes) + ((4 << 20) - 1)) >> 22;
|
||||||
|
if (heap_page_count == 0 || heap_page_count > kHeap4MBPages) {
|
||||||
|
assert_always();
|
||||||
|
XELOGE(
|
||||||
|
"%ux%u resolve target with DXGI format %u can't fit in a heap, "
|
||||||
|
"needs %u bytes - tell Xenia developers to increase the heap size!",
|
||||||
|
uint32_t(resource_desc.Width), resource_desc.Height, format,
|
||||||
|
uint32_t(allocation_info.SizeInBytes));
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
if (kHeap4MBPages - (min_heap_page_first % kHeap4MBPages) < heap_page_count) {
|
||||||
|
// Go to the next heap if no free space in the current one.
|
||||||
|
min_heap_page_first = xe::round_up(min_heap_page_first, kHeap4MBPages);
|
||||||
|
assert_true(min_heap_page_first < kHeap4MBPages * 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the memory heap if it doesn't exist yet.
|
||||||
|
uint32_t heap_index = min_heap_page_first / kHeap4MBPages;
|
||||||
|
if (!MakeHeapResident(heap_index)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create it.
|
||||||
|
// The first action likely to be done is resolve.
|
||||||
|
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
||||||
|
ID3D12Resource* resource;
|
||||||
|
if (FAILED(device->CreatePlacedResource(
|
||||||
|
heaps_[heap_index], (min_heap_page_first % kHeap4MBPages) << 22,
|
||||||
|
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to create a placed resource for %ux%u resolve target with DXGI "
|
||||||
|
"format %u at heap 4 MB pages %u:%u",
|
||||||
|
uint32_t(resource_desc.Width), resource_desc.Height, format,
|
||||||
|
min_heap_page_first, min_heap_page_first + heap_page_count - 1);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the RTV.
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
|
||||||
|
rtv_handle.ptr = descriptor_heaps_color_->start_handle.ptr +
|
||||||
|
descriptor_heaps_color_->descriptors_used *
|
||||||
|
provider->GetDescriptorSizeRTV();
|
||||||
|
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
|
||||||
|
rtv_desc.Format = format;
|
||||||
|
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
||||||
|
rtv_desc.Texture2D.MipSlice = 0;
|
||||||
|
rtv_desc.Texture2D.PlaneSlice = 0;
|
||||||
|
device->CreateRenderTargetView(resource, &rtv_desc, rtv_handle);
|
||||||
|
++descriptor_heaps_color_->descriptors_used;
|
||||||
|
|
||||||
|
// Add the new resolve target to the cache.
|
||||||
|
ResolveTarget* resolve_target = new ResolveTarget;
|
||||||
|
resolve_target->resource = resource;
|
||||||
|
resolve_target->state = state;
|
||||||
|
resolve_target->rtv_handle.ptr = rtv_handle.ptr;
|
||||||
|
resolve_target->key.value = key.value;
|
||||||
|
resolve_target->heap_page_first = min_heap_page_first;
|
||||||
|
resolve_targets_.insert(std::make_pair(key.value, resolve_target));
|
||||||
|
|
||||||
|
return resolve_target;
|
||||||
|
}
|
||||||
|
|
||||||
void RenderTargetCache::UnbindRenderTargets() {
|
void RenderTargetCache::UnbindRenderTargets() {
|
||||||
StoreRenderTargetsToEDRAM();
|
StoreRenderTargetsToEDRAM();
|
||||||
ClearBindings();
|
ClearBindings();
|
||||||
|
@ -1104,6 +1614,61 @@ void RenderTargetCache::ClearBindings() {
|
||||||
std::memset(current_bindings_, 0, sizeof(current_bindings_));
|
std::memset(current_bindings_, 0, sizeof(current_bindings_));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) {
|
||||||
|
if (heap_index >= 5) {
|
||||||
|
assert_always();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (heaps_[heap_index] != nullptr) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
|
D3D12_HEAP_DESC heap_desc = {};
|
||||||
|
heap_desc.SizeInBytes = kHeap4MBPages << 22;
|
||||||
|
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||||
|
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
|
||||||
|
heap_desc.Alignment = 0;
|
||||||
|
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
|
||||||
|
if (FAILED(
|
||||||
|
device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[heap_index])))) {
|
||||||
|
XELOGE("Failed to create a %u MB heap for render targets",
|
||||||
|
kHeap4MBPages * 4);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RenderTargetCache::EnsureRTVHeapAvailable(bool is_depth) {
|
||||||
|
auto& heap = is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_;
|
||||||
|
if (heap != nullptr &&
|
||||||
|
heap->descriptors_used < kRenderTargetDescriptorHeapSize) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
|
D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
|
||||||
|
heap_desc.Type = is_depth ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV
|
||||||
|
: D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
|
||||||
|
heap_desc.NumDescriptors = kRenderTargetDescriptorHeapSize;
|
||||||
|
heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
||||||
|
heap_desc.NodeMask = 0;
|
||||||
|
ID3D12DescriptorHeap* new_d3d_heap;
|
||||||
|
if (FAILED(device->CreateDescriptorHeap(&heap_desc,
|
||||||
|
IID_PPV_ARGS(&new_d3d_heap)))) {
|
||||||
|
XELOGE("Failed to create a heap for %u %s buffer descriptors",
|
||||||
|
kRenderTargetDescriptorHeapSize, is_depth ? "depth" : "color");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
RenderTargetDescriptorHeap* new_heap = new RenderTargetDescriptorHeap;
|
||||||
|
new_heap->heap = new_d3d_heap;
|
||||||
|
new_heap->start_handle = new_d3d_heap->GetCPUDescriptorHandleForHeapStart();
|
||||||
|
new_heap->descriptors_used = 0;
|
||||||
|
new_heap->previous = heap;
|
||||||
|
heap = new_heap;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
|
bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
|
||||||
D3D12_RESOURCE_DESC& desc) {
|
D3D12_RESOURCE_DESC& desc) {
|
||||||
if (key.width_ss_div_80 == 0 || key.height_ss_div_16 == 0) {
|
if (key.width_ss_div_80 == 0 || key.height_ss_div_16 == 0) {
|
||||||
|
@ -1133,7 +1698,7 @@ bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
|
||||||
|
|
||||||
RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
||||||
RenderTargetKey key, uint32_t heap_page_first) {
|
RenderTargetKey key, uint32_t heap_page_first) {
|
||||||
assert_true(heap_page_first <= kHeap4MBPages * 5);
|
assert_true(heap_page_first < kHeap4MBPages * 5);
|
||||||
|
|
||||||
// Try to find an existing render target.
|
// Try to find an existing render target.
|
||||||
auto found_range = render_targets_.equal_range(key.value);
|
auto found_range = render_targets_.equal_range(key.value);
|
||||||
|
@ -1163,57 +1728,23 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a new descriptor heap if needed, and get a place for the descriptor.
|
// Ensure we can create a new descriptor in the render target heap.
|
||||||
auto& descriptor_heap =
|
if (!EnsureRTVHeapAvailable(key.is_depth)) {
|
||||||
key.is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_;
|
return nullptr;
|
||||||
if (descriptor_heap == nullptr ||
|
|
||||||
descriptor_heap->descriptors_used >= kRenderTargetDescriptorHeapSize) {
|
|
||||||
D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc;
|
|
||||||
descriptor_heap_desc.Type = key.is_depth ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV
|
|
||||||
: D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
|
|
||||||
descriptor_heap_desc.NumDescriptors = kRenderTargetDescriptorHeapSize;
|
|
||||||
descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
|
||||||
descriptor_heap_desc.NodeMask = 0;
|
|
||||||
ID3D12DescriptorHeap* new_d3d_descriptor_heap;
|
|
||||||
if (FAILED(device->CreateDescriptorHeap(
|
|
||||||
&descriptor_heap_desc, IID_PPV_ARGS(&new_d3d_descriptor_heap)))) {
|
|
||||||
XELOGE("Failed to create a heap for %u %s buffer descriptors",
|
|
||||||
kRenderTargetDescriptorHeapSize, key.is_depth ? "depth" : "color");
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
RenderTargetDescriptorHeap* new_descriptor_heap =
|
|
||||||
new RenderTargetDescriptorHeap;
|
|
||||||
new_descriptor_heap->heap = new_d3d_descriptor_heap;
|
|
||||||
new_descriptor_heap->start_handle =
|
|
||||||
new_d3d_descriptor_heap->GetCPUDescriptorHandleForHeapStart();
|
|
||||||
new_descriptor_heap->descriptors_used = 0;
|
|
||||||
new_descriptor_heap->previous = descriptor_heap;
|
|
||||||
descriptor_heap = new_descriptor_heap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the memory heap if it doesn't exist yet.
|
// Create the memory heap if it doesn't exist yet.
|
||||||
ID3D12Heap* heap = heaps_[heap_page_first / kHeap4MBPages];
|
uint32_t heap_index = heap_page_first / kHeap4MBPages;
|
||||||
if (heap == nullptr) {
|
if (!MakeHeapResident(heap_index)) {
|
||||||
D3D12_HEAP_DESC heap_desc = {};
|
return nullptr;
|
||||||
heap_desc.SizeInBytes = kHeap4MBPages << 22;
|
|
||||||
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
||||||
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
|
|
||||||
heap_desc.Alignment = 0;
|
|
||||||
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
|
|
||||||
if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heap)))) {
|
|
||||||
XELOGE("Failed to create a %u MB heap for render targets",
|
|
||||||
kHeap4MBPages * 4);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
heaps_[heap_page_first / kHeap4MBPages] = heap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The first action likely to be done is EDRAM buffer load.
|
// The first action likely to be done is EDRAM buffer load.
|
||||||
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST;
|
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
ID3D12Resource* resource;
|
ID3D12Resource* resource;
|
||||||
if (FAILED(device->CreatePlacedResource(
|
if (FAILED(device->CreatePlacedResource(
|
||||||
heap, (heap_page_first % kHeap4MBPages) << 22, &resource_desc, state,
|
heaps_[heap_index], (heap_page_first % kHeap4MBPages) << 22,
|
||||||
nullptr, IID_PPV_ARGS(&resource)))) {
|
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
|
||||||
XELOGE(
|
XELOGE(
|
||||||
"Failed to create a placed resource for %ux%u %s render target with "
|
"Failed to create a placed resource for %ux%u %s render target with "
|
||||||
"format %u at heap 4 MB pages %u:%u",
|
"format %u at heap 4 MB pages %u:%u",
|
||||||
|
@ -1226,27 +1757,28 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
||||||
// Create the descriptor for the render target.
|
// Create the descriptor for the render target.
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_handle;
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_handle;
|
||||||
if (key.is_depth) {
|
if (key.is_depth) {
|
||||||
descriptor_handle.ptr =
|
descriptor_handle.ptr = descriptor_heaps_depth_->start_handle.ptr +
|
||||||
descriptor_heap->start_handle.ptr +
|
descriptor_heaps_depth_->descriptors_used *
|
||||||
descriptor_heap->descriptors_used * provider->GetDescriptorSizeDSV();
|
provider->GetDescriptorSizeDSV();
|
||||||
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
|
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
|
||||||
dsv_desc.Format = resource_desc.Format;
|
dsv_desc.Format = resource_desc.Format;
|
||||||
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
|
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
|
||||||
dsv_desc.Flags = D3D12_DSV_FLAG_NONE;
|
dsv_desc.Flags = D3D12_DSV_FLAG_NONE;
|
||||||
dsv_desc.Texture2D.MipSlice = 0;
|
dsv_desc.Texture2D.MipSlice = 0;
|
||||||
device->CreateDepthStencilView(resource, &dsv_desc, descriptor_handle);
|
device->CreateDepthStencilView(resource, &dsv_desc, descriptor_handle);
|
||||||
|
++descriptor_heaps_depth_->descriptors_used;
|
||||||
} else {
|
} else {
|
||||||
descriptor_handle.ptr =
|
descriptor_handle.ptr = descriptor_heaps_color_->start_handle.ptr +
|
||||||
descriptor_heap->start_handle.ptr +
|
descriptor_heaps_color_->descriptors_used *
|
||||||
descriptor_heap->descriptors_used * provider->GetDescriptorSizeRTV();
|
provider->GetDescriptorSizeRTV();
|
||||||
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
|
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
|
||||||
rtv_desc.Format = resource_desc.Format;
|
rtv_desc.Format = resource_desc.Format;
|
||||||
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
||||||
rtv_desc.Texture2D.MipSlice = 0;
|
rtv_desc.Texture2D.MipSlice = 0;
|
||||||
rtv_desc.Texture2D.PlaneSlice = 0;
|
rtv_desc.Texture2D.PlaneSlice = 0;
|
||||||
device->CreateRenderTargetView(resource, &rtv_desc, descriptor_handle);
|
device->CreateRenderTargetView(resource, &rtv_desc, descriptor_handle);
|
||||||
|
++descriptor_heaps_color_->descriptors_used;
|
||||||
}
|
}
|
||||||
++descriptor_heap->descriptors_used;
|
|
||||||
|
|
||||||
// Get the layout for copying to the EDRAM buffer.
|
// Get the layout for copying to the EDRAM buffer.
|
||||||
RenderTarget* render_target = new RenderTarget;
|
RenderTarget* render_target = new RenderTarget;
|
||||||
|
@ -1511,7 +2043,8 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
||||||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||||
EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
||||||
render_target->key.format);
|
render_target->key.format);
|
||||||
command_processor_->SetPipeline(edram_store_pipelines_[size_t(mode)]);
|
command_processor_->SetComputePipeline(
|
||||||
|
edram_store_pipelines_[size_t(mode)]);
|
||||||
command_list->Dispatch(rt_pitch_tiles, binding.edram_dirty_rows, 1);
|
command_list->Dispatch(rt_pitch_tiles, binding.edram_dirty_rows, 1);
|
||||||
|
|
||||||
// Commit the UAV write.
|
// Commit the UAV write.
|
||||||
|
@ -1646,7 +2179,7 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
||||||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||||
EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
||||||
render_target->key.format);
|
render_target->key.format);
|
||||||
command_processor_->SetPipeline(edram_load_pipelines_[size_t(mode)]);
|
command_processor_->SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
|
||||||
command_list->Dispatch(edram_pitch_tiles, edram_rows, 1);
|
command_list->Dispatch(edram_pitch_tiles, edram_rows, 1);
|
||||||
|
|
||||||
// Commit the UAV write and transition the copy buffer to copy source now.
|
// Commit the UAV write and transition the copy buffer to copy source now.
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "xenia/gpu/d3d12/shared_memory.h"
|
#include "xenia/gpu/d3d12/shared_memory.h"
|
||||||
|
#include "xenia/gpu/d3d12/texture_cache.h"
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/memory.h"
|
#include "xenia/memory.h"
|
||||||
|
@ -219,7 +220,8 @@ class RenderTargetCache {
|
||||||
// Performs the resolve to a shared memory area according to the current
|
// Performs the resolve to a shared memory area according to the current
|
||||||
// register values, and also clears the EDRAM buffer if needed. Must be in a
|
// register values, and also clears the EDRAM buffer if needed. Must be in a
|
||||||
// frame for calling.
|
// frame for calling.
|
||||||
bool Resolve(SharedMemory* shared_memory, Memory* memory);
|
bool Resolve(SharedMemory* shared_memory, TextureCache* texture_cache,
|
||||||
|
Memory* memory);
|
||||||
// Flushes the render targets to EDRAM and unbinds them, for instance, when
|
// Flushes the render targets to EDRAM and unbinds them, for instance, when
|
||||||
// the command processor takes over framebuffer bindings to draw something
|
// the command processor takes over framebuffer bindings to draw something
|
||||||
// special.
|
// special.
|
||||||
|
@ -320,8 +322,40 @@ class RenderTargetCache {
|
||||||
RenderTarget* render_target;
|
RenderTarget* render_target;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Converting resolve pipeline.
|
||||||
|
struct ResolvePipeline {
|
||||||
|
ID3D12PipelineState* pipeline;
|
||||||
|
DXGI_FORMAT dest_format;
|
||||||
|
};
|
||||||
|
|
||||||
|
union ResolveTargetKey {
|
||||||
|
struct {
|
||||||
|
uint32_t width_div_32 : 9;
|
||||||
|
uint32_t height_div_32 : 9;
|
||||||
|
DXGI_FORMAT format : 14;
|
||||||
|
};
|
||||||
|
uint32_t value;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Target for converting resolves.
|
||||||
|
struct ResolveTarget {
|
||||||
|
ID3D12Resource* resource;
|
||||||
|
D3D12_RESOURCE_STATES state;
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
|
||||||
|
ResolveTargetKey key;
|
||||||
|
uint32_t heap_page_first;
|
||||||
|
};
|
||||||
|
|
||||||
void ClearBindings();
|
void ClearBindings();
|
||||||
|
|
||||||
|
// Checks if the heap for the render target exists and tries to create it if
|
||||||
|
// it's not.
|
||||||
|
bool MakeHeapResident(uint32_t heap_index);
|
||||||
|
|
||||||
|
// Creates a new RTV/DSV descriptor heap if needed to be able to allocate one
|
||||||
|
// descriptor in it.
|
||||||
|
bool EnsureRTVHeapAvailable(bool is_depth);
|
||||||
|
|
||||||
// Returns true if a render target with such key can be created.
|
// Returns true if a render target with such key can be created.
|
||||||
static bool GetResourceDesc(RenderTargetKey key, D3D12_RESOURCE_DESC& desc);
|
static bool GetResourceDesc(RenderTargetKey key, D3D12_RESOURCE_DESC& desc);
|
||||||
|
|
||||||
|
@ -357,11 +391,19 @@ class RenderTargetCache {
|
||||||
const uint32_t* edram_bases);
|
const uint32_t* edram_bases);
|
||||||
|
|
||||||
// Performs the copying part of a resolve.
|
// Performs the copying part of a resolve.
|
||||||
bool ResolveCopy(SharedMemory* shared_memory, uint32_t edram_base,
|
bool ResolveCopy(SharedMemory* shared_memory, TextureCache* texture_cache,
|
||||||
uint32_t surface_pitch, MsaaSamples msaa_samples,
|
uint32_t edram_base, uint32_t surface_pitch,
|
||||||
bool is_depth, uint32_t src_format,
|
MsaaSamples msaa_samples, bool is_depth, uint32_t src_format,
|
||||||
const D3D12_RECT& src_rect);
|
const D3D12_RECT& src_rect);
|
||||||
|
|
||||||
|
ID3D12PipelineState* GetResolvePipeline(DXGI_FORMAT dest_format);
|
||||||
|
// Returns any available resolve target placed at least at
|
||||||
|
// min_heap_first_page, or tries to place it at the specified position (if not
|
||||||
|
// possible, will place it in the next heap).
|
||||||
|
ResolveTarget* FindOrCreateResolveTarget(uint32_t width, uint32_t height,
|
||||||
|
DXGI_FORMAT format,
|
||||||
|
uint32_t min_heap_first_page);
|
||||||
|
|
||||||
D3D12CommandProcessor* command_processor_;
|
D3D12CommandProcessor* command_processor_;
|
||||||
RegisterFile* register_file_;
|
RegisterFile* register_file_;
|
||||||
|
|
||||||
|
@ -382,8 +424,8 @@ class RenderTargetCache {
|
||||||
};
|
};
|
||||||
struct {
|
struct {
|
||||||
// 16 bits for X, 16 bits for Y.
|
// 16 bits for X, 16 bits for Y.
|
||||||
uint32_t tile_sample_rect_tl;
|
uint32_t tile_sample_rect_lt;
|
||||||
uint32_t tile_sample_rect_br;
|
uint32_t tile_sample_rect_rb;
|
||||||
uint32_t tile_sample_dest_base;
|
uint32_t tile_sample_dest_base;
|
||||||
// 0:13 - destination pitch.
|
// 0:13 - destination pitch.
|
||||||
// 14 - log2(vertical sample count), 0 for 1x AA, 1 for 2x/4x AA.
|
// 14 - log2(vertical sample count), 0 for 1x AA, 1 for 2x/4x AA.
|
||||||
|
@ -439,6 +481,30 @@ class RenderTargetCache {
|
||||||
RenderTargetBinding current_bindings_[5] = {};
|
RenderTargetBinding current_bindings_[5] = {};
|
||||||
|
|
||||||
PipelineRenderTarget current_pipeline_render_targets_[5];
|
PipelineRenderTarget current_pipeline_render_targets_[5];
|
||||||
|
|
||||||
|
ID3D12RootSignature* resolve_root_signature_ = nullptr;
|
||||||
|
struct ResolveRootConstants {
|
||||||
|
// In samples.
|
||||||
|
// Left and top in the lower 16 bits, width and height in the upper.
|
||||||
|
uint32_t rect_samples_lw;
|
||||||
|
uint32_t rect_samples_th;
|
||||||
|
// In samples. Width in the lower 16 bits, height in the upper.
|
||||||
|
uint32_t source_size;
|
||||||
|
// 0 - log2(vertical sample count), 0 for 1x AA, 1 for 2x/4x AA.
|
||||||
|
// 1 - log2(horizontal sample count), 0 for 1x/2x AA, 1 for 4x AA.
|
||||||
|
// 2:3 - vertical sample position:
|
||||||
|
// 0 for the upper samples with 2x/4x AA.
|
||||||
|
// 1 for 1x AA or to mix samples with 2x/4x AA.
|
||||||
|
// 2 for the lower samples with 2x/4x AA.
|
||||||
|
// 4:5 - horizontal sample position:
|
||||||
|
// 0 for the left samples with 4x AA.
|
||||||
|
// 1 for 1x/2x AA or to mix samples with 4x AA.
|
||||||
|
// 2 for the right samples with 4x AA.
|
||||||
|
// 6:11 - exponent bias.
|
||||||
|
uint32_t resolve_info;
|
||||||
|
};
|
||||||
|
std::vector<ResolvePipeline> resolve_pipelines_;
|
||||||
|
std::unordered_multimap<uint32_t, ResolveTarget*> resolve_targets_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
cbuffer XeResolveCbuffer : register(b0) {
|
||||||
|
// In samples.
|
||||||
|
// Left and top in the lower 16 bits, width and height in the upper.
|
||||||
|
uint2 xe_resolve_rect_samples;
|
||||||
|
// In samples. Width in the lower 16 bits, height in the upper.
|
||||||
|
uint xe_resolve_source_size;
|
||||||
|
// 0 - log2(vertical sample count), 0 for 1x AA, 1 for 2x/4x AA.
|
||||||
|
// 1 - log2(horizontal sample count), 0 for 1x/2x AA, 1 for 4x AA.
|
||||||
|
// 2:3 - vertical sample position:
|
||||||
|
// 0 for the upper samples with 2x/4x AA.
|
||||||
|
// 1 for 1x AA or to mix samples with 2x/4x AA.
|
||||||
|
// 2 for the lower samples with 2x/4x AA.
|
||||||
|
// 4:5 - horizontal sample position:
|
||||||
|
// 0 for the left samples with 4x AA.
|
||||||
|
// 1 for 1x/2x AA or to mix samples with 4x AA.
|
||||||
|
// 2 for the right samples with 4x AA.
|
||||||
|
// 6:11 - exponent bias.
|
||||||
|
uint xe_resolve_info;
|
||||||
|
};
|
||||||
|
|
||||||
|
Texture2D<float4> xe_resolve_source : register(t0);
|
||||||
|
SamplerState xe_resolve_sampler : register(s0);
|
||||||
|
|
||||||
|
float4 main(float4 xe_position : SV_Position) : SV_Target {
|
||||||
|
// The source texture dimensions are snapped to 80x16 samples for ease of
|
||||||
|
// EDRAM loading, but resolving may be done from different regions within it.
|
||||||
|
// The viewport and the quad have the size of the needed region, but the
|
||||||
|
// viewport starts at (0,0), so texture sample positions need to be offset.
|
||||||
|
// Also because there may be excess texels in the source, clamping needs to be
|
||||||
|
// done manually so those pixels won't effect bilinear filtering. Resolving is
|
||||||
|
// done without stretching, but the resolve region on the source texture is 2
|
||||||
|
// or 4 times bigger than the destination - bilinear filtering is used to mix
|
||||||
|
// the samples (if exact samples are needed, the source texture is sampled at
|
||||||
|
// texel centers, if AA is resolved, it's sampled between texels).
|
||||||
|
|
||||||
|
// Go to sample coordinates and select the needed samples.
|
||||||
|
float2 resolve_position = xe_position.xy *
|
||||||
|
float2(((xe_resolve_info.xx >> uint2(1u, 0u)) & 1u) + 1u) +
|
||||||
|
(float2((xe_resolve_info.xx >> uint2(4u, 2u)) & 3u) * 0.5 - 0.5);
|
||||||
|
|
||||||
|
// Clamp, offset and normalize the position.
|
||||||
|
resolve_position = clamp(resolve_position, (0.5).xx,
|
||||||
|
float2(xe_resolve_rect_samples >> 16u) - 0.5) +
|
||||||
|
float2(xe_resolve_rect_samples & 0xFFFFu);
|
||||||
|
resolve_position /=
|
||||||
|
float2((xe_resolve_source_size >> uint2(0u, 16u)) & 0xFFFFu);
|
||||||
|
|
||||||
|
// Resolve the samples.
|
||||||
|
float4 pixel = xe_resolve_source.SampleLevel(xe_resolve_sampler,
|
||||||
|
resolve_position, 0.0);
|
||||||
|
|
||||||
|
// Bias the exponent.
|
||||||
|
pixel *= exp2(float(int(xe_resolve_info << (32u - 12u)) >> 26));
|
||||||
|
|
||||||
|
return pixel;
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
// A triangle covering the whole viewport.
|
||||||
|
float4 main(uint xe_vertex_id : SV_VertexID) : SV_Position {
|
||||||
|
return float4(float2(uint2(xe_vertex_id, xe_vertex_id << 1u) & 2u) *
|
||||||
|
float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
|
||||||
|
}
|
|
@ -450,6 +450,17 @@ void TextureCache::WriteSampler(uint32_t fetch_constant,
|
||||||
device->CreateSampler(&desc, handle);
|
device->CreateSampler(&desc, handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DXGI_FORMAT TextureCache::GetResolveDXGIFormat(TextureFormat format) {
|
||||||
|
// TODO(Triang3l): Change this to a check whether there is a tiling pipeline.
|
||||||
|
switch (format) {
|
||||||
|
case TextureFormat::k_8_8_8_8:
|
||||||
|
return host_formats_[uint32_t(format)].dxgi_format;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return DXGI_FORMAT_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||||
auto group = reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>(
|
auto group = reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>(
|
||||||
®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]);
|
®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]);
|
||||||
|
@ -467,6 +478,7 @@ bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||||
command_processor_->PushTransitionBarrier(
|
command_processor_->PushTransitionBarrier(
|
||||||
texture->resource, texture->state,
|
texture->resource, texture->state,
|
||||||
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||||
|
texture->state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
||||||
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
|
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
|
||||||
srv_desc.Format = host_formats_[uint32_t(key.format)].dxgi_format;
|
srv_desc.Format = host_formats_[uint32_t(key.format)].dxgi_format;
|
||||||
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
||||||
|
@ -826,7 +838,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
descriptor_cpu_start.ptr + provider->GetDescriptorSizeView();
|
descriptor_cpu_start.ptr + provider->GetDescriptorSizeView();
|
||||||
device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc,
|
device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc,
|
||||||
descriptor_cpu_uav);
|
descriptor_cpu_uav);
|
||||||
command_processor_->SetPipeline(pipeline);
|
command_processor_->SetComputePipeline(pipeline);
|
||||||
command_list->SetComputeRootSignature(copy_root_signature_);
|
command_list->SetComputeRootSignature(copy_root_signature_);
|
||||||
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,8 @@ class TextureCache {
|
||||||
void WriteSampler(uint32_t fetch_constant,
|
void WriteSampler(uint32_t fetch_constant,
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||||
|
|
||||||
|
static DXGI_FORMAT GetResolveDXGIFormat(TextureFormat format);
|
||||||
|
|
||||||
bool RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
bool RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
Loading…
Reference in New Issue