[D3D12] Resolve shader and draw

This commit is contained in:
Triang3l 2018-08-24 22:40:22 +03:00
parent dd17cd3f9f
commit 4a747b3b81
8 changed files with 835 additions and 117 deletions

View File

@ -469,13 +469,39 @@ void D3D12CommandProcessor::ReleaseScratchGPUBuffer(
}
}
void D3D12CommandProcessor::SetPipeline(ID3D12PipelineState* pipeline) {
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
if (current_pipeline_ != pipeline) {
GetCurrentCommandList()->SetPipelineState(pipeline);
current_pipeline_ = pipeline;
}
}
void D3D12CommandProcessor::UnbindRenderTargets() {
render_target_cache_->UnbindRenderTargets();
}
void D3D12CommandProcessor::SetExternalGraphicsPipeline(
ID3D12PipelineState* pipeline, bool reset_viewport, bool reset_blend_factor,
bool reset_stencil_ref) {
if (current_pipeline_ != pipeline) {
GetCurrentCommandList()->SetPipelineState(pipeline);
current_pipeline_ = pipeline;
}
current_graphics_root_signature_ = nullptr;
current_graphics_root_up_to_date_ = 0;
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
if (reset_viewport) {
ff_viewport_update_needed_ = true;
ff_scissor_update_needed_ = true;
}
if (reset_blend_factor) {
ff_blend_factor_update_needed_ = true;
}
if (reset_stencil_ref) {
ff_stencil_ref_update_needed_ = true;
}
}
bool D3D12CommandProcessor::SetupContext() {
if (!CommandProcessor::SetupContext()) {
XELOGE("Failed to initialize base command processor context");
@ -898,7 +924,10 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
UpdateFixedFunctionState(command_list);
// Bind the pipeline.
SetPipeline(pipeline);
if (current_pipeline_ != pipeline) {
GetCurrentCommandList()->SetPipelineState(pipeline);
current_pipeline_ = pipeline;
}
// Update system constants before uploading them.
UpdateSystemConstantValues(
@ -966,7 +995,8 @@ bool D3D12CommandProcessor::IssueCopy() {
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
BeginFrame();
return render_target_cache_->Resolve(shared_memory_.get(), memory_);
return render_target_cache_->Resolve(shared_memory_.get(),
texture_cache_.get(), memory_);
}
bool D3D12CommandProcessor::BeginFrame() {

View File

@ -86,9 +86,23 @@ class D3D12CommandProcessor : public CommandProcessor {
void ReleaseScratchGPUBuffer(ID3D12Resource* buffer,
D3D12_RESOURCE_STATES new_state);
// Sets the current pipeline state - may be called internally or externally.
// This is for cache invalidation primarily. A frame must be open.
void SetPipeline(ID3D12PipelineState* pipeline);
// Sets the current pipeline state to a compute pipeline. This is for cache
// invalidation primarily. A frame must be open.
void SetComputePipeline(ID3D12PipelineState* pipeline);
// Stores and unbinds render targets before binding changing render targets
// externally. This is separate from SetExternalGraphicsPipeline because it
// causes computations to be dispatched, and the scratch buffer may also be
// used.
void UnbindRenderTargets();
// Sets the current pipeline state to a special drawing pipeline, invalidating
// various cached state variables. UnbindRenderTargets may be needed before
// calling this. A frame must be open.
void SetExternalGraphicsPipeline(ID3D12PipelineState* pipeline,
bool reset_viewport = true,
bool reset_blend_factor = false,
bool reset_stencil_ref = false);
protected:
bool SetupContext() override;

View File

@ -38,6 +38,8 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_float_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_unorm_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_tile_sample_32bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/resolve_ps.h"
#include "xenia/gpu/d3d12/shaders/bin/resolve_vs.h"
const RenderTargetCache::EDRAMLoadStoreModeInfo
RenderTargetCache::edram_load_store_mode_info_[size_t(
@ -98,63 +100,67 @@ bool RenderTargetCache::Initialize() {
edram_buffer_cleared_ = false;
// Create the root signature for EDRAM buffer load/store.
D3D12_ROOT_PARAMETER root_parameters[2];
D3D12_ROOT_PARAMETER load_store_root_parameters[2];
// Parameter 0 is constants (changed for each render target binding).
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
root_parameters[0].Constants.ShaderRegister = 0;
root_parameters[0].Constants.RegisterSpace = 0;
root_parameters[0].Constants.Num32BitValues =
load_store_root_parameters[0].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
load_store_root_parameters[0].Constants.ShaderRegister = 0;
load_store_root_parameters[0].Constants.RegisterSpace = 0;
load_store_root_parameters[0].Constants.Num32BitValues =
sizeof(EDRAMLoadStoreRootConstants) / sizeof(uint32_t);
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
load_store_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// Parameter 1 is source and target.
D3D12_DESCRIPTOR_RANGE root_load_store_ranges[2];
root_load_store_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
root_load_store_ranges[0].NumDescriptors = 1;
root_load_store_ranges[0].BaseShaderRegister = 0;
root_load_store_ranges[0].RegisterSpace = 0;
root_load_store_ranges[0].OffsetInDescriptorsFromTableStart = 0;
root_load_store_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
root_load_store_ranges[1].NumDescriptors = 1;
root_load_store_ranges[1].BaseShaderRegister = 0;
root_load_store_ranges[1].RegisterSpace = 0;
root_load_store_ranges[1].OffsetInDescriptorsFromTableStart = 1;
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
root_parameters[1].DescriptorTable.pDescriptorRanges = root_load_store_ranges;
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
root_signature_desc.NumParameters = UINT(xe::countof(root_parameters));
root_signature_desc.pParameters = root_parameters;
root_signature_desc.NumStaticSamplers = 0;
root_signature_desc.pStaticSamplers = nullptr;
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
ID3DBlob* root_signature_blob;
ID3DBlob* root_signature_error_blob = nullptr;
D3D12_DESCRIPTOR_RANGE load_store_root_ranges[2];
load_store_root_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
load_store_root_ranges[0].NumDescriptors = 1;
load_store_root_ranges[0].BaseShaderRegister = 0;
load_store_root_ranges[0].RegisterSpace = 0;
load_store_root_ranges[0].OffsetInDescriptorsFromTableStart = 0;
load_store_root_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
load_store_root_ranges[1].NumDescriptors = 1;
load_store_root_ranges[1].BaseShaderRegister = 0;
load_store_root_ranges[1].RegisterSpace = 0;
load_store_root_ranges[1].OffsetInDescriptorsFromTableStart = 1;
load_store_root_parameters[1].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
load_store_root_parameters[1].DescriptorTable.pDescriptorRanges =
load_store_root_ranges;
load_store_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
D3D12_ROOT_SIGNATURE_DESC load_store_root_desc;
load_store_root_desc.NumParameters =
UINT(xe::countof(load_store_root_parameters));
load_store_root_desc.pParameters = load_store_root_parameters;
load_store_root_desc.NumStaticSamplers = 0;
load_store_root_desc.pStaticSamplers = nullptr;
load_store_root_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
ID3DBlob* load_store_root_blob;
ID3DBlob* load_store_root_error_blob = nullptr;
if (FAILED(D3D12SerializeRootSignature(
&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1,
&root_signature_blob, &root_signature_error_blob))) {
&load_store_root_desc, D3D_ROOT_SIGNATURE_VERSION_1,
&load_store_root_blob, &load_store_root_error_blob))) {
XELOGE("Failed to serialize the EDRAM buffer load/store root signature");
if (root_signature_error_blob != nullptr) {
if (load_store_root_error_blob != nullptr) {
XELOGE("%s", reinterpret_cast<const char*>(
root_signature_error_blob->GetBufferPointer()));
root_signature_error_blob->Release();
load_store_root_error_blob->GetBufferPointer()));
load_store_root_error_blob->Release();
}
Shutdown();
return false;
}
if (root_signature_error_blob != nullptr) {
root_signature_error_blob->Release();
if (load_store_root_error_blob != nullptr) {
load_store_root_error_blob->Release();
}
if (FAILED(device->CreateRootSignature(
0, root_signature_blob->GetBufferPointer(),
root_signature_blob->GetBufferSize(),
0, load_store_root_blob->GetBufferPointer(),
load_store_root_blob->GetBufferSize(),
IID_PPV_ARGS(&edram_load_store_root_signature_)))) {
XELOGE("Failed to create the EDRAM buffer load/store root signature");
root_signature_blob->Release();
load_store_root_blob->Release();
Shutdown();
return false;
}
root_signature_blob->Release();
load_store_root_blob->Release();
// Create the load/store pipelines.
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
@ -197,12 +203,94 @@ bool RenderTargetCache::Initialize() {
}
edram_tile_sample_32bpp_pipeline_->SetName(L"EDRAM Raw Resolve 32bpp");
// Create the converting resolve root signature.
D3D12_ROOT_PARAMETER resolve_root_parameters[2];
// Parameter 0 is constants.
resolve_root_parameters[0].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
resolve_root_parameters[0].Constants.ShaderRegister = 0;
resolve_root_parameters[0].Constants.RegisterSpace = 0;
resolve_root_parameters[0].Constants.Num32BitValues =
sizeof(ResolveRootConstants) / sizeof(uint32_t);
resolve_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
// Parameter 1 is the source render target.
D3D12_DESCRIPTOR_RANGE resolve_root_srv_range;
resolve_root_srv_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
resolve_root_srv_range.NumDescriptors = 1;
resolve_root_srv_range.BaseShaderRegister = 0;
resolve_root_srv_range.RegisterSpace = 0;
resolve_root_srv_range.OffsetInDescriptorsFromTableStart = 0;
resolve_root_parameters[1].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
resolve_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
resolve_root_parameters[1].DescriptorTable.pDescriptorRanges =
&resolve_root_srv_range;
resolve_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
// Static sampler for resolving AA using bilinear filtering.
D3D12_STATIC_SAMPLER_DESC resolve_sampler_desc;
resolve_sampler_desc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
resolve_sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
resolve_sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
resolve_sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
resolve_sampler_desc.MipLODBias = 0.0f;
resolve_sampler_desc.MaxAnisotropy = 1;
resolve_sampler_desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
resolve_sampler_desc.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK;
resolve_sampler_desc.MinLOD = 0.0f;
resolve_sampler_desc.MaxLOD = 0.0f;
resolve_sampler_desc.ShaderRegister = 0;
resolve_sampler_desc.RegisterSpace = 0;
resolve_sampler_desc.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
D3D12_ROOT_SIGNATURE_DESC resolve_root_desc;
resolve_root_desc.NumParameters = UINT(xe::countof(resolve_root_parameters));
resolve_root_desc.pParameters = resolve_root_parameters;
resolve_root_desc.NumStaticSamplers = 1;
resolve_root_desc.pStaticSamplers = &resolve_sampler_desc;
resolve_root_desc.Flags =
D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS;
ID3DBlob* resolve_root_blob;
ID3DBlob* resolve_root_error_blob = nullptr;
if (FAILED(D3D12SerializeRootSignature(
&resolve_root_desc, D3D_ROOT_SIGNATURE_VERSION_1, &resolve_root_blob,
&resolve_root_error_blob))) {
XELOGE("Failed to serialize the converting resolve root signature");
if (resolve_root_error_blob != nullptr) {
XELOGE("%s", reinterpret_cast<const char*>(
resolve_root_error_blob->GetBufferPointer()));
resolve_root_error_blob->Release();
}
Shutdown();
return false;
}
if (resolve_root_error_blob != nullptr) {
resolve_root_error_blob->Release();
}
if (FAILED(device->CreateRootSignature(
0, resolve_root_blob->GetBufferPointer(),
resolve_root_blob->GetBufferSize(),
IID_PPV_ARGS(&resolve_root_signature_)))) {
XELOGE("Failed to create the converting resolve root signature");
resolve_root_blob->Release();
Shutdown();
return false;
}
resolve_root_blob->Release();
return true;
}
void RenderTargetCache::Shutdown() {
ClearCache();
for (auto& resolve_pipeline : resolve_pipelines_) {
resolve_pipeline.pipeline->Release();
}
resolve_pipelines_.clear();
if (resolve_root_signature_ != nullptr) {
resolve_root_signature_->Release();
resolve_root_signature_ = nullptr;
}
if (edram_tile_sample_32bpp_pipeline_ != nullptr) {
edram_tile_sample_32bpp_pipeline_->Release();
edram_tile_sample_32bpp_pipeline_ = nullptr;
@ -229,11 +317,16 @@ void RenderTargetCache::Shutdown() {
}
void RenderTargetCache::ClearCache() {
for (auto resolve_target_pair : resolve_targets_) {
ResolveTarget* resolve_target = resolve_target_pair.second;
resolve_target->resource->Release();
delete resolve_target;
}
resolve_targets_.clear();
for (auto render_target_pair : render_targets_) {
RenderTarget* render_target = render_target_pair.second;
if (render_target->resource != nullptr) {
render_target->resource->Release();
}
render_target->resource->Release();
delete render_target;
}
render_targets_.clear();
@ -721,7 +814,8 @@ bool RenderTargetCache::UpdateRenderTargets() {
return true;
}
bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
TextureCache* texture_cache, Memory* memory) {
// Save the currently bound render targets to the EDRAM buffer that will be
// used as the resolve source and clear bindings to allow render target
// resources to be reused as source textures for format conversion, resolving
@ -823,14 +917,15 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
msaa_samples != MsaaSamples::k1X ? "s" : "", surface_format,
surface_edram_base);
bool copied =
ResolveCopy(shared_memory, surface_edram_base, surface_pitch,
msaa_samples, surface_is_depth, surface_format, src_rect);
bool copied = ResolveCopy(shared_memory, texture_cache, surface_edram_base,
surface_pitch, msaa_samples, surface_is_depth,
surface_format, src_rect);
// TODO(Triang3l): Clear.
return copied;
}
bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
TextureCache* texture_cache,
uint32_t edram_base, uint32_t surface_pitch,
MsaaSamples msaa_samples, bool is_depth,
uint32_t src_format,
@ -954,6 +1049,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
auto descriptor_size_view = provider->GetDescriptorSizeView();
if (sample_select <= xenos::CopySampleSelect::k3 &&
src_texture_format == dest_format && dest_exp_bias == 0) {
// *************************************************************************
// Raw copy
// *************************************************************************
XELOGGPU("Resolving a single sample without conversion");
if (src_64bpp) {
// TODO(Triang3l): 64bpp sample copy shader.
@ -997,8 +1095,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// Dispatch the computation.
command_list->SetComputeRootSignature(edram_load_store_root_signature_);
EDRAMLoadStoreRootConstants root_constants;
root_constants.tile_sample_rect_tl = copy_rect.left | (copy_rect.top << 16);
root_constants.tile_sample_rect_br =
root_constants.tile_sample_rect_lt = copy_rect.left | (copy_rect.top << 16);
root_constants.tile_sample_rect_rb =
copy_rect.right | (copy_rect.bottom << 16);
root_constants.tile_sample_dest_base = dest_address;
assert_true(dest_pitch <= 8192);
@ -1036,7 +1134,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
// TODO(Triang3l): 64bpp pipeline.
command_processor_->SetPipeline(edram_tile_sample_32bpp_pipeline_);
command_processor_->SetComputePipeline(edram_tile_sample_32bpp_pipeline_);
// 1 group per destination 80x16 (32bpp) / 80x8 (64bpp) region.
uint32_t group_count_x = row_tiles, group_count_y = rows;
if (msaa_samples >= MsaaSamples::k2X) {
@ -1053,14 +1151,426 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// Make the texture cache refresh the data.
shared_memory->RangeWrittenByGPU(dest_address, dest_size);
} else {
// *************************************************************************
// Conversion and AA resolving
// *************************************************************************
XELOGGPU("Resolving with a pixel shader");
// TODO(Triang3l): Conversion.
return false;
// Get everything we need for the conversion.
// DXGI format (also checking whether this resolve is possible).
DXGI_FORMAT dest_dxgi_format =
texture_cache->GetResolveDXGIFormat(dest_format);
if (dest_dxgi_format == DXGI_FORMAT_UNKNOWN) {
XELOGE(
"No resolve pipeline for destination format %s - tell Xenia "
"developers!",
FormatInfo::Get(dest_format)->name);
return false;
}
// Resolve pipeline.
ID3D12PipelineState* resolve_pipeline =
GetResolvePipeline(dest_dxgi_format);
if (resolve_pipeline == nullptr) {
return false;
}
RenderTargetKey render_target_key;
render_target_key.width_ss_div_80 = row_tiles >> (src_64bpp ? 1 : 0);
render_target_key.height_ss_div_16 = rows;
render_target_key.is_depth = false;
render_target_key.format = src_format;
// Render target for loading the EDRAM buffer contents as a texture.
RenderTarget* render_target =
FindOrCreateRenderTarget(render_target_key, 0);
if (render_target == nullptr) {
return false;
}
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint =
render_target->footprints[0];
// Size of the resolved area.
uint32_t copy_width = copy_rect.right - copy_rect.left;
uint32_t copy_height = copy_rect.bottom - copy_rect.top;
// Resolve target for output merger format conversion.
ResolveTarget* resolve_target =
FindOrCreateResolveTarget(copy_width, copy_height, dest_dxgi_format,
render_target->heap_page_count);
if (resolve_target == nullptr) {
return false;
}
// Descriptors. 2 for EDRAM load, 1 for conversion.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(
0, 3, 3, descriptor_cpu_start, descriptor_gpu_start) == 0) {
return false;
}
// Buffer for copying.
D3D12_RESOURCE_STATES copy_buffer_state =
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
render_target->copy_buffer_size, copy_buffer_state);
if (copy_buffer == nullptr) {
return false;
}
// Load the EDRAM buffer contents to the copy buffer.
command_processor_->PushTransitionBarrier(
edram_buffer_, edram_buffer_state_,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
command_processor_->SubmitBarriers();
command_list->SetComputeRootSignature(edram_load_store_root_signature_);
EDRAMLoadStoreRootConstants load_root_constants;
load_root_constants.rt_color_depth_offset = uint32_t(footprint.Offset);
load_root_constants.rt_color_depth_pitch =
uint32_t(footprint.Footprint.RowPitch);
load_root_constants.base_pitch_tiles =
edram_base | (surface_pitch_tiles << 11);
command_list->SetComputeRoot32BitConstants(
0, sizeof(load_root_constants) / sizeof(uint32_t), &load_root_constants,
0);
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Buffer.FirstElement = 0;
srv_desc.Buffer.NumElements = 2048 * 1280;
srv_desc.Buffer.StructureByteStride = 0;
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
device->CreateShaderResourceView(edram_buffer_, &srv_desc,
descriptor_cpu_start);
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Buffer.FirstElement = 0;
uav_desc.Buffer.NumElements = render_target->copy_buffer_size >> 2;
uav_desc.Buffer.StructureByteStride = 0;
uav_desc.Buffer.CounterOffsetInBytes = 0;
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
D3D12_CPU_DESCRIPTOR_HANDLE copy_buffer_cpu_handle;
copy_buffer_cpu_handle.ptr =
descriptor_cpu_start.ptr + descriptor_size_view;
device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc,
copy_buffer_cpu_handle);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_processor_->SetComputePipeline(
edram_load_pipelines_[size_t(GetLoadStoreMode(false, src_format))]);
command_list->Dispatch(row_tiles, rows, 1);
command_processor_->PushUAVBarrier(copy_buffer);
// Go to the next descriptor set.
descriptor_cpu_start.ptr += 2 * descriptor_size_view;
descriptor_gpu_start.ptr += 2 * descriptor_size_view;
// Copy the EDRAM buffer contents to the source texture.
command_processor_->PushAliasingBarrier(nullptr, render_target->resource);
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
D3D12_RESOURCE_STATE_COPY_SOURCE);
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
command_processor_->PushTransitionBarrier(render_target->resource,
render_target->state,
D3D12_RESOURCE_STATE_COPY_DEST);
render_target->state = D3D12_RESOURCE_STATE_COPY_DEST;
command_processor_->SubmitBarriers();
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
location_source.pResource = copy_buffer;
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
location_source.PlacedFootprint = render_target->footprints[0];
location_dest.pResource = render_target->resource;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_dest.SubresourceIndex = 0;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
nullptr);
// Done with the copy buffer.
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
// Do the resolve. Render targets unbound already, safe to call
// OMSetRenderTargets.
command_processor_->PushAliasingBarrier(nullptr, resolve_target->resource);
command_processor_->PushTransitionBarrier(
render_target->resource, render_target->state,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
render_target->state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
command_processor_->PushTransitionBarrier(
resolve_target->resource, resolve_target->state,
D3D12_RESOURCE_STATE_RENDER_TARGET);
resolve_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
command_list->SetGraphicsRootSignature(resolve_root_signature_);
ResolveRootConstants resolve_root_constants;
uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0;
uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0;
resolve_root_constants.rect_samples_lw =
(copy_rect.left << samples_x_log2) |
(copy_width << (16 + samples_x_log2));
resolve_root_constants.rect_samples_th =
(copy_rect.top << samples_y_log2) |
(copy_height << (16 + samples_y_log2));
resolve_root_constants.source_size =
(render_target_key.width_ss_div_80 * 80) |
(render_target_key.height_ss_div_16 << (4 + 16));
resolve_root_constants.resolve_info =
samples_y_log2 | (samples_x_log2 << 1) |
((uint32_t(dest_exp_bias) & 0x3F) << 6);
if (msaa_samples == MsaaSamples::k1X) {
// No offset.
resolve_root_constants.resolve_info |= (1 << 2) | (1 << 4);
} else if (msaa_samples == MsaaSamples::k2X) {
// -0.5 or +0.5 samples vertical offset if getting only one sample.
if (sample_select == xenos::CopySampleSelect::k0) {
resolve_root_constants.resolve_info |= (0 << 2) | (1 << 4);
} else if (sample_select == xenos::CopySampleSelect::k1) {
resolve_root_constants.resolve_info |= (2 << 2) | (1 << 4);
} else {
resolve_root_constants.resolve_info |= (1 << 2) | (1 << 4);
}
} else {
// -0.5 or +0.5 samples offsets if getting one or two samples.
switch (sample_select) {
case xenos::CopySampleSelect::k0:
resolve_root_constants.resolve_info |= (0 << 2) | (0 << 4);
break;
case xenos::CopySampleSelect::k1:
resolve_root_constants.resolve_info |= (2 << 2) | (0 << 4);
break;
case xenos::CopySampleSelect::k2:
resolve_root_constants.resolve_info |= (0 << 2) | (2 << 4);
break;
case xenos::CopySampleSelect::k3:
resolve_root_constants.resolve_info |= (2 << 2) | (2 << 4);
break;
case xenos::CopySampleSelect::k01:
resolve_root_constants.resolve_info |= (1 << 2) | (0 << 4);
break;
case xenos::CopySampleSelect::k23:
resolve_root_constants.resolve_info |= (1 << 2) | (2 << 4);
break;
default:
resolve_root_constants.resolve_info |= (1 << 2) | (1 << 4);
break;
}
}
command_list->SetGraphicsRoot32BitConstants(
0, sizeof(resolve_root_constants) / sizeof(uint32_t),
&resolve_root_constants, 0);
srv_desc.Format = GetColorDXGIFormat(ColorRenderTargetFormat(src_format));
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
if (dest_swap) {
switch (ColorRenderTargetFormat(src_format)) {
case ColorRenderTargetFormat::k_8_8_8_8:
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
case ColorRenderTargetFormat::k_2_10_10_10:
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
case ColorRenderTargetFormat::k_16_16_16_16:
case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
case ColorRenderTargetFormat::k_2_10_10_10_AS_16_16_16_16:
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
srv_desc.Shader4ComponentMapping =
D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(2, 1, 0, 3);
break;
default:
srv_desc.Shader4ComponentMapping =
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
}
} else {
srv_desc.Shader4ComponentMapping =
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
}
srv_desc.Texture2D.MostDetailedMip = 0;
srv_desc.Texture2D.MipLevels = 1;
srv_desc.Texture2D.PlaneSlice = 0;
srv_desc.Texture2D.ResourceMinLODClamp = 0.0f;
device->CreateShaderResourceView(render_target->resource, &srv_desc,
descriptor_cpu_start);
command_list->SetGraphicsRootDescriptorTable(1, descriptor_gpu_start);
command_processor_->SetExternalGraphicsPipeline(resolve_pipeline);
command_processor_->SubmitBarriers();
command_list->OMSetRenderTargets(1, &resolve_target->rtv_handle, TRUE,
nullptr);
D3D12_VIEWPORT viewport;
viewport.TopLeftX = 0.0f;
viewport.TopLeftY = 0.0f;
viewport.Width = float(copy_width);
viewport.Height = float(copy_height);
viewport.MinDepth = 0.0f;
viewport.MaxDepth = 1.0f;
command_list->RSSetViewports(1, &viewport);
D3D12_RECT scissor;
scissor.left = 0;
scissor.top = 0;
scissor.right = copy_width;
scissor.bottom = copy_height;
command_list->RSSetScissorRects(1, &scissor);
command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
command_list->DrawInstanced(3, 1, 0, 0);
// TODO(Triang3l): Tile the resolve target in the texture cache.
}
return true;
}
ID3D12PipelineState* RenderTargetCache::GetResolvePipeline(
DXGI_FORMAT dest_format) {
// Try to find an existing pipeline.
for (auto& resolve_pipeline : resolve_pipelines_) {
if (resolve_pipeline.dest_format == dest_format) {
return resolve_pipeline.pipeline;
}
}
// Create a new pipeline.
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
pipeline_desc.pRootSignature = resolve_root_signature_;
pipeline_desc.VS.pShaderBytecode = resolve_vs;
pipeline_desc.VS.BytecodeLength = sizeof(resolve_vs);
pipeline_desc.PS.pShaderBytecode = resolve_ps;
pipeline_desc.PS.BytecodeLength = sizeof(resolve_ps);
pipeline_desc.BlendState.RenderTarget[0].RenderTargetWriteMask =
D3D12_COLOR_WRITE_ENABLE_ALL;
pipeline_desc.SampleMask = UINT_MAX;
pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pipeline_desc.NumRenderTargets = 1;
pipeline_desc.RTVFormats[0] = dest_format;
pipeline_desc.SampleDesc.Count = 1;
ID3D12PipelineState* pipeline;
if (FAILED(device->CreateGraphicsPipelineState(&pipeline_desc,
IID_PPV_ARGS(&pipeline)))) {
XELOGE("Failed to create the resolve pipeline for DXGI format %u",
dest_format);
return nullptr;
}
ResolvePipeline new_resolve_pipeline;
new_resolve_pipeline.pipeline = pipeline;
new_resolve_pipeline.dest_format = dest_format;
resolve_pipelines_.push_back(new_resolve_pipeline);
return pipeline;
}
RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
uint32_t width, uint32_t height, DXGI_FORMAT format,
uint32_t min_heap_page_first) {
assert_true(min_heap_page_first < kHeap4MBPages * 5);
if (width == 0 || height == 0 || width > 8192 || height > 8192) {
assert_always();
return nullptr;
}
ResolveTargetKey key;
key.width_div_32 = (width + 31) >> 5;
key.height_div_32 = (height + 31) >> 5;
key.format = format;
// Try to find an existing target that isn't overlapping the resolve source.
auto found_range = resolve_targets_.equal_range(key.value);
for (auto iter = found_range.first; iter != found_range.second; ++iter) {
ResolveTarget* found_resolve_target = iter->second;
if (found_resolve_target->heap_page_first >= min_heap_page_first) {
return found_resolve_target;
}
}
// Ensure the new resolve target can get an RTV descriptor.
if (!EnsureRTVHeapAvailable(false)) {
return nullptr;
}
// Allocate a new resolve target.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
D3D12_RESOURCE_DESC resource_desc;
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
resource_desc.Alignment = 0;
resource_desc.Width = key.width_div_32 << 5;
resource_desc.Height = key.height_div_32 << 5;
resource_desc.DepthOrArraySize = 1;
resource_desc.MipLevels = 1;
resource_desc.Format = format;
resource_desc.SampleDesc.Count = 1;
resource_desc.SampleDesc.Quality = 0;
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
device->GetResourceAllocationInfo(0, 1, &resource_desc);
uint32_t heap_page_count =
(uint32_t(allocation_info.SizeInBytes) + ((4 << 20) - 1)) >> 22;
if (heap_page_count == 0 || heap_page_count > kHeap4MBPages) {
assert_always();
XELOGE(
"%ux%u resolve target with DXGI format %u can't fit in a heap, "
"needs %u bytes - tell Xenia developers to increase the heap size!",
uint32_t(resource_desc.Width), resource_desc.Height, format,
uint32_t(allocation_info.SizeInBytes));
return nullptr;
}
if (kHeap4MBPages - (min_heap_page_first % kHeap4MBPages) < heap_page_count) {
// Go to the next heap if no free space in the current one.
min_heap_page_first = xe::round_up(min_heap_page_first, kHeap4MBPages);
assert_true(min_heap_page_first < kHeap4MBPages * 5);
}
// Create the memory heap if it doesn't exist yet.
uint32_t heap_index = min_heap_page_first / kHeap4MBPages;
if (!MakeHeapResident(heap_index)) {
return nullptr;
}
// Create it.
// The first action likely to be done is resolve.
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_RENDER_TARGET;
ID3D12Resource* resource;
if (FAILED(device->CreatePlacedResource(
heaps_[heap_index], (min_heap_page_first % kHeap4MBPages) << 22,
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
XELOGE(
"Failed to create a placed resource for %ux%u resolve target with DXGI "
"format %u at heap 4 MB pages %u:%u",
uint32_t(resource_desc.Width), resource_desc.Height, format,
min_heap_page_first, min_heap_page_first + heap_page_count - 1);
return nullptr;
}
// Create the RTV.
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
rtv_handle.ptr = descriptor_heaps_color_->start_handle.ptr +
descriptor_heaps_color_->descriptors_used *
provider->GetDescriptorSizeRTV();
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
rtv_desc.Format = format;
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
rtv_desc.Texture2D.MipSlice = 0;
rtv_desc.Texture2D.PlaneSlice = 0;
device->CreateRenderTargetView(resource, &rtv_desc, rtv_handle);
++descriptor_heaps_color_->descriptors_used;
// Add the new resolve target to the cache.
ResolveTarget* resolve_target = new ResolveTarget;
resolve_target->resource = resource;
resolve_target->state = state;
resolve_target->rtv_handle.ptr = rtv_handle.ptr;
resolve_target->key.value = key.value;
resolve_target->heap_page_first = min_heap_page_first;
resolve_targets_.insert(std::make_pair(key.value, resolve_target));
return resolve_target;
}
void RenderTargetCache::UnbindRenderTargets() {
StoreRenderTargetsToEDRAM();
ClearBindings();
@ -1104,6 +1614,61 @@ void RenderTargetCache::ClearBindings() {
std::memset(current_bindings_, 0, sizeof(current_bindings_));
}
bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) {
if (heap_index >= 5) {
assert_always();
return false;
}
if (heaps_[heap_index] != nullptr) {
return true;
}
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = kHeap4MBPages << 22;
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
heap_desc.Alignment = 0;
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
if (FAILED(
device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[heap_index])))) {
XELOGE("Failed to create a %u MB heap for render targets",
kHeap4MBPages * 4);
return false;
}
return true;
}
bool RenderTargetCache::EnsureRTVHeapAvailable(bool is_depth) {
auto& heap = is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_;
if (heap != nullptr &&
heap->descriptors_used < kRenderTargetDescriptorHeapSize) {
return true;
}
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
heap_desc.Type = is_depth ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV
: D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
heap_desc.NumDescriptors = kRenderTargetDescriptorHeapSize;
heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
heap_desc.NodeMask = 0;
ID3D12DescriptorHeap* new_d3d_heap;
if (FAILED(device->CreateDescriptorHeap(&heap_desc,
IID_PPV_ARGS(&new_d3d_heap)))) {
XELOGE("Failed to create a heap for %u %s buffer descriptors",
kRenderTargetDescriptorHeapSize, is_depth ? "depth" : "color");
return false;
}
RenderTargetDescriptorHeap* new_heap = new RenderTargetDescriptorHeap;
new_heap->heap = new_d3d_heap;
new_heap->start_handle = new_d3d_heap->GetCPUDescriptorHandleForHeapStart();
new_heap->descriptors_used = 0;
new_heap->previous = heap;
heap = new_heap;
return true;
}
bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
D3D12_RESOURCE_DESC& desc) {
if (key.width_ss_div_80 == 0 || key.height_ss_div_16 == 0) {
@ -1133,7 +1698,7 @@ bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
RenderTargetKey key, uint32_t heap_page_first) {
assert_true(heap_page_first <= kHeap4MBPages * 5);
assert_true(heap_page_first < kHeap4MBPages * 5);
// Try to find an existing render target.
auto found_range = render_targets_.equal_range(key.value);
@ -1163,57 +1728,23 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
return nullptr;
}
// Create a new descriptor heap if needed, and get a place for the descriptor.
auto& descriptor_heap =
key.is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_;
if (descriptor_heap == nullptr ||
descriptor_heap->descriptors_used >= kRenderTargetDescriptorHeapSize) {
D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc;
descriptor_heap_desc.Type = key.is_depth ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV
: D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
descriptor_heap_desc.NumDescriptors = kRenderTargetDescriptorHeapSize;
descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
descriptor_heap_desc.NodeMask = 0;
ID3D12DescriptorHeap* new_d3d_descriptor_heap;
if (FAILED(device->CreateDescriptorHeap(
&descriptor_heap_desc, IID_PPV_ARGS(&new_d3d_descriptor_heap)))) {
XELOGE("Failed to create a heap for %u %s buffer descriptors",
kRenderTargetDescriptorHeapSize, key.is_depth ? "depth" : "color");
return nullptr;
}
RenderTargetDescriptorHeap* new_descriptor_heap =
new RenderTargetDescriptorHeap;
new_descriptor_heap->heap = new_d3d_descriptor_heap;
new_descriptor_heap->start_handle =
new_d3d_descriptor_heap->GetCPUDescriptorHandleForHeapStart();
new_descriptor_heap->descriptors_used = 0;
new_descriptor_heap->previous = descriptor_heap;
descriptor_heap = new_descriptor_heap;
// Ensure we can create a new descriptor in the render target heap.
if (!EnsureRTVHeapAvailable(key.is_depth)) {
return nullptr;
}
// Create the memory heap if it doesn't exist yet.
ID3D12Heap* heap = heaps_[heap_page_first / kHeap4MBPages];
if (heap == nullptr) {
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = kHeap4MBPages << 22;
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
heap_desc.Alignment = 0;
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heap)))) {
XELOGE("Failed to create a %u MB heap for render targets",
kHeap4MBPages * 4);
return nullptr;
}
heaps_[heap_page_first / kHeap4MBPages] = heap;
uint32_t heap_index = heap_page_first / kHeap4MBPages;
if (!MakeHeapResident(heap_index)) {
return nullptr;
}
// The first action likely to be done is EDRAM buffer load.
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST;
ID3D12Resource* resource;
if (FAILED(device->CreatePlacedResource(
heap, (heap_page_first % kHeap4MBPages) << 22, &resource_desc, state,
nullptr, IID_PPV_ARGS(&resource)))) {
heaps_[heap_index], (heap_page_first % kHeap4MBPages) << 22,
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
XELOGE(
"Failed to create a placed resource for %ux%u %s render target with "
"format %u at heap 4 MB pages %u:%u",
@ -1226,27 +1757,28 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
// Create the descriptor for the render target.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_handle;
if (key.is_depth) {
descriptor_handle.ptr =
descriptor_heap->start_handle.ptr +
descriptor_heap->descriptors_used * provider->GetDescriptorSizeDSV();
descriptor_handle.ptr = descriptor_heaps_depth_->start_handle.ptr +
descriptor_heaps_depth_->descriptors_used *
provider->GetDescriptorSizeDSV();
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
dsv_desc.Format = resource_desc.Format;
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
dsv_desc.Flags = D3D12_DSV_FLAG_NONE;
dsv_desc.Texture2D.MipSlice = 0;
device->CreateDepthStencilView(resource, &dsv_desc, descriptor_handle);
++descriptor_heaps_depth_->descriptors_used;
} else {
descriptor_handle.ptr =
descriptor_heap->start_handle.ptr +
descriptor_heap->descriptors_used * provider->GetDescriptorSizeRTV();
descriptor_handle.ptr = descriptor_heaps_color_->start_handle.ptr +
descriptor_heaps_color_->descriptors_used *
provider->GetDescriptorSizeRTV();
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
rtv_desc.Format = resource_desc.Format;
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
rtv_desc.Texture2D.MipSlice = 0;
rtv_desc.Texture2D.PlaneSlice = 0;
device->CreateRenderTargetView(resource, &rtv_desc, descriptor_handle);
++descriptor_heaps_color_->descriptors_used;
}
++descriptor_heap->descriptors_used;
// Get the layout for copying to the EDRAM buffer.
RenderTarget* render_target = new RenderTarget;
@ -1511,7 +2043,8 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format);
command_processor_->SetPipeline(edram_store_pipelines_[size_t(mode)]);
command_processor_->SetComputePipeline(
edram_store_pipelines_[size_t(mode)]);
command_list->Dispatch(rt_pitch_tiles, binding.edram_dirty_rows, 1);
// Commit the UAV write.
@ -1646,7 +2179,7 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format);
command_processor_->SetPipeline(edram_load_pipelines_[size_t(mode)]);
command_processor_->SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
command_list->Dispatch(edram_pitch_tiles, edram_rows, 1);
// Commit the UAV write and transition the copy buffer to copy source now.

View File

@ -13,6 +13,7 @@
#include <unordered_map>
#include "xenia/gpu/d3d12/shared_memory.h"
#include "xenia/gpu/d3d12/texture_cache.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/memory.h"
@ -219,7 +220,8 @@ class RenderTargetCache {
// Performs the resolve to a shared memory area according to the current
// register values, and also clears the EDRAM buffer if needed. Must be in a
// frame for calling.
bool Resolve(SharedMemory* shared_memory, Memory* memory);
bool Resolve(SharedMemory* shared_memory, TextureCache* texture_cache,
Memory* memory);
// Flushes the render targets to EDRAM and unbinds them, for instance, when
// the command processor takes over framebuffer bindings to draw something
// special.
@ -320,8 +322,40 @@ class RenderTargetCache {
RenderTarget* render_target;
};
// Converting resolve pipeline.
struct ResolvePipeline {
ID3D12PipelineState* pipeline;
DXGI_FORMAT dest_format;
};
union ResolveTargetKey {
struct {
uint32_t width_div_32 : 9;
uint32_t height_div_32 : 9;
DXGI_FORMAT format : 14;
};
uint32_t value;
};
// Target for converting resolves.
struct ResolveTarget {
ID3D12Resource* resource;
D3D12_RESOURCE_STATES state;
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
ResolveTargetKey key;
uint32_t heap_page_first;
};
void ClearBindings();
// Checks if the heap for the render target exists and tries to create it if
// it's not.
bool MakeHeapResident(uint32_t heap_index);
// Creates a new RTV/DSV descriptor heap if needed to be able to allocate one
// descriptor in it.
bool EnsureRTVHeapAvailable(bool is_depth);
// Returns true if a render target with such key can be created.
static bool GetResourceDesc(RenderTargetKey key, D3D12_RESOURCE_DESC& desc);
@ -357,11 +391,19 @@ class RenderTargetCache {
const uint32_t* edram_bases);
// Performs the copying part of a resolve.
bool ResolveCopy(SharedMemory* shared_memory, uint32_t edram_base,
uint32_t surface_pitch, MsaaSamples msaa_samples,
bool is_depth, uint32_t src_format,
bool ResolveCopy(SharedMemory* shared_memory, TextureCache* texture_cache,
uint32_t edram_base, uint32_t surface_pitch,
MsaaSamples msaa_samples, bool is_depth, uint32_t src_format,
const D3D12_RECT& src_rect);
ID3D12PipelineState* GetResolvePipeline(DXGI_FORMAT dest_format);
// Returns any available resolve target placed at least at
// min_heap_first_page, or tries to place it at the specified position (if not
// possible, will place it in the next heap).
ResolveTarget* FindOrCreateResolveTarget(uint32_t width, uint32_t height,
DXGI_FORMAT format,
uint32_t min_heap_first_page);
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
@ -382,8 +424,8 @@ class RenderTargetCache {
};
struct {
// 16 bits for X, 16 bits for Y.
uint32_t tile_sample_rect_tl;
uint32_t tile_sample_rect_br;
uint32_t tile_sample_rect_lt;
uint32_t tile_sample_rect_rb;
uint32_t tile_sample_dest_base;
// 0:13 - destination pitch.
// 14 - log2(vertical sample count), 0 for 1x AA, 1 for 2x/4x AA.
@ -439,6 +481,30 @@ class RenderTargetCache {
RenderTargetBinding current_bindings_[5] = {};
PipelineRenderTarget current_pipeline_render_targets_[5];
ID3D12RootSignature* resolve_root_signature_ = nullptr;
struct ResolveRootConstants {
// In samples.
// Left and top in the lower 16 bits, width and height in the upper.
uint32_t rect_samples_lw;
uint32_t rect_samples_th;
// In samples. Width in the lower 16 bits, height in the upper.
uint32_t source_size;
// 0 - log2(vertical sample count), 0 for 1x AA, 1 for 2x/4x AA.
// 1 - log2(horizontal sample count), 0 for 1x/2x AA, 1 for 4x AA.
// 2:3 - vertical sample position:
// 0 for the upper samples with 2x/4x AA.
// 1 for 1x AA or to mix samples with 2x/4x AA.
// 2 for the lower samples with 2x/4x AA.
// 4:5 - horizontal sample position:
// 0 for the left samples with 4x AA.
// 1 for 1x/2x AA or to mix samples with 4x AA.
// 2 for the right samples with 4x AA.
// 6:11 - exponent bias.
uint32_t resolve_info;
};
std::vector<ResolvePipeline> resolve_pipelines_;
std::unordered_multimap<uint32_t, ResolveTarget*> resolve_targets_;
};
} // namespace d3d12

View File

@ -0,0 +1,56 @@
cbuffer XeResolveCbuffer : register(b0) {
// In samples.
// Left and top in the lower 16 bits, width and height in the upper.
uint2 xe_resolve_rect_samples;
// In samples. Width in the lower 16 bits, height in the upper.
uint xe_resolve_source_size;
// 0 - log2(vertical sample count), 0 for 1x AA, 1 for 2x/4x AA.
// 1 - log2(horizontal sample count), 0 for 1x/2x AA, 1 for 4x AA.
// 2:3 - vertical sample position:
// 0 for the upper samples with 2x/4x AA.
// 1 for 1x AA or to mix samples with 2x/4x AA.
// 2 for the lower samples with 2x/4x AA.
// 4:5 - horizontal sample position:
// 0 for the left samples with 4x AA.
// 1 for 1x/2x AA or to mix samples with 4x AA.
// 2 for the right samples with 4x AA.
// 6:11 - exponent bias.
uint xe_resolve_info;
};
Texture2D<float4> xe_resolve_source : register(t0);
SamplerState xe_resolve_sampler : register(s0);
float4 main(float4 xe_position : SV_Position) : SV_Target {
// The source texture dimensions are snapped to 80x16 samples for ease of
// EDRAM loading, but resolving may be done from different regions within it.
// The viewport and the quad have the size of the needed region, but the
// viewport starts at (0,0), so texture sample positions need to be offset.
// Also because there may be excess texels in the source, clamping needs to be
// done manually so those pixels won't effect bilinear filtering. Resolving is
// done without stretching, but the resolve region on the source texture is 2
// or 4 times bigger than the destination - bilinear filtering is used to mix
// the samples (if exact samples are needed, the source texture is sampled at
// texel centers, if AA is resolved, it's sampled between texels).
// Go to sample coordinates and select the needed samples.
float2 resolve_position = xe_position.xy *
float2(((xe_resolve_info.xx >> uint2(1u, 0u)) & 1u) + 1u) +
(float2((xe_resolve_info.xx >> uint2(4u, 2u)) & 3u) * 0.5 - 0.5);
// Clamp, offset and normalize the position.
resolve_position = clamp(resolve_position, (0.5).xx,
float2(xe_resolve_rect_samples >> 16u) - 0.5) +
float2(xe_resolve_rect_samples & 0xFFFFu);
resolve_position /=
float2((xe_resolve_source_size >> uint2(0u, 16u)) & 0xFFFFu);
// Resolve the samples.
float4 pixel = xe_resolve_source.SampleLevel(xe_resolve_sampler,
resolve_position, 0.0);
// Bias the exponent.
pixel *= exp2(float(int(xe_resolve_info << (32u - 12u)) >> 26));
return pixel;
}

View File

@ -0,0 +1,5 @@
// A triangle covering the whole viewport.
float4 main(uint xe_vertex_id : SV_VertexID) : SV_Position {
return float4(float2(uint2(xe_vertex_id, xe_vertex_id << 1u) & 2u) *
float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
}

View File

@ -450,6 +450,17 @@ void TextureCache::WriteSampler(uint32_t fetch_constant,
device->CreateSampler(&desc, handle);
}
DXGI_FORMAT TextureCache::GetResolveDXGIFormat(TextureFormat format) {
// TODO(Triang3l): Change this to a check whether there is a tiling pipeline.
switch (format) {
case TextureFormat::k_8_8_8_8:
return host_formats_[uint32_t(format)].dxgi_format;
default:
break;
}
return DXGI_FORMAT_UNKNOWN;
}
bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto group = reinterpret_cast<const xenos::xe_gpu_fetch_group_t*>(
&register_file_->values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]);
@ -467,6 +478,7 @@ bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
command_processor_->PushTransitionBarrier(
texture->resource, texture->state,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
texture->state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
srv_desc.Format = host_formats_[uint32_t(key.format)].dxgi_format;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
@ -826,7 +838,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
descriptor_cpu_start.ptr + provider->GetDescriptorSizeView();
device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc,
descriptor_cpu_uav);
command_processor_->SetPipeline(pipeline);
command_processor_->SetComputePipeline(pipeline);
command_list->SetComputeRootSignature(copy_root_signature_);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);

View File

@ -78,6 +78,8 @@ class TextureCache {
void WriteSampler(uint32_t fetch_constant,
D3D12_CPU_DESCRIPTOR_HANDLE handle);
static DXGI_FORMAT GetResolveDXGIFormat(TextureFormat format);
bool RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle);
private: