[D3D12] Render target binding

This commit is contained in:
Triang3l 2018-08-10 18:06:21 +03:00
parent c0c0ca263d
commit 952bb91c3f
8 changed files with 551 additions and 162 deletions

View File

@ -489,10 +489,6 @@ bool D3D12CommandProcessor::SetupContext() {
render_target_cache_ = render_target_cache_ =
std::make_unique<RenderTargetCache>(this, register_file_); std::make_unique<RenderTargetCache>(this, register_file_);
if (!render_target_cache_->Initialize()) {
XELOGE("Failed to initialize the render target cache");
return false;
}
return true; return true;
} }
@ -652,9 +648,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} }
if (reset_index != reset_index_expected) { if (reset_index != reset_index_expected) {
// Only 0xFFFF and 0xFFFFFFFF primitive restart indices are supported by // Only 0xFFFF and 0xFFFFFFFF primitive restart indices are supported by
// Direct3D 12 (endianness doesn't matter for them). However, Direct3D 9 // Direct3D 12 (endianness doesn't matter for them). With shared memory,
// uses 0xFFFF as the reset index. With shared memory, it's impossible to // it's impossible to replace the cut index in the buffer without
// replace the cut index in the buffer without affecting the game memory. // affecting the game memory.
XELOGE( XELOGE(
"The game uses the primitive restart index 0x%X that isn't 0xFFFF or " "The game uses the primitive restart index 0x%X that isn't 0xFFFF or "
"0xFFFFFFFF. Report the game to Xenia developers so geometry shaders " "0xFFFFFFFF. Report the game to Xenia developers so geometry shaders "
@ -678,14 +674,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
pixel_shader = nullptr; pixel_shader = nullptr;
} else if (!pixel_shader) { } else if (!pixel_shader) {
// Need a pixel shader in normal color mode. // Need a pixel shader in normal color mode.
return true; return false;
} }
bool new_frame = BeginFrame(); bool new_frame = BeginFrame();
auto command_list = GetCurrentCommandList(); auto command_list = GetCurrentCommandList();
// Set up the render targets - this may bind pipelines. // Set up the render targets - this may bind pipelines.
render_target_cache_->UpdateRenderTargets(); if (!render_target_cache_->UpdateRenderTargets()) {
// Doesn't actually draw.
return true;
}
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
render_target_cache_->GetCurrentPipelineRenderTargets();
// Set the primitive topology. // Set the primitive topology.
D3D_PRIMITIVE_TOPOLOGY primitive_topology; D3D_PRIMITIVE_TOPOLOGY primitive_topology;
@ -715,8 +716,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
ID3D12RootSignature* root_signature; ID3D12RootSignature* root_signature;
auto pipeline_status = pipeline_cache_->ConfigurePipeline( auto pipeline_status = pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type, vertex_shader, pixel_shader, primitive_type,
indexed ? index_buffer_info->format : IndexFormat::kInt16, &pipeline, indexed ? index_buffer_info->format : IndexFormat::kInt16,
&root_signature); pipeline_render_targets, &pipeline, &root_signature);
if (pipeline_status == PipelineCache::UpdateStatus::kError) { if (pipeline_status == PipelineCache::UpdateStatus::kError) {
return false; return false;
} }
@ -733,8 +734,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
SetPipeline(pipeline); SetPipeline(pipeline);
// Update system constants before uploading them. // Update system constants before uploading them.
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness UpdateSystemConstantValues(
: Endian::kUnspecified); indexed ? index_buffer_info->endianness : Endian::kUnspecified,
pipeline_render_targets);
// Update constant buffers, descriptors and root parameters. // Update constant buffers, descriptors and root parameters.
if (!UpdateBindings(command_list, vertex_shader, pixel_shader, if (!UpdateBindings(command_list, vertex_shader, pixel_shader,
@ -1022,7 +1024,9 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
} }
} }
void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) { void D3D12CommandProcessor::UpdateSystemConstantValues(
Endian index_endian,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
auto& regs = *register_file_; auto& regs = *register_file_;
uint32_t vgt_indx_offset = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32; uint32_t vgt_indx_offset = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32;
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
@ -1067,7 +1071,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) {
// viewport that is used to emulate unnormalized coordinates. // viewport that is used to emulate unnormalized coordinates.
// Z scale/offset is to convert from OpenGL NDC to Direct3D NDC if needed. // Z scale/offset is to convert from OpenGL NDC to Direct3D NDC if needed.
// Also apply half-pixel offset to reproduce Direct3D 9 rasterization rules. // Also apply half-pixel offset to reproduce Direct3D 9 rasterization rules.
// TODO(Triang3l): Check if pixel coordinates need to offset depending on a // TODO(Triang3l): Check if pixel coordinates need to be offset depending on a
// different register (and if there's such register at all). // different register (and if there's such register at all).
bool gl_clip_space_def = bool gl_clip_space_def =
!(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4));
@ -1127,6 +1131,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) {
system_constants_.ssaa_inv_scale[0] = ssaa_inv_scale_x; system_constants_.ssaa_inv_scale[0] = ssaa_inv_scale_x;
system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y; system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y;
// Color output index mapping.
for (uint32_t i = 0; i < 4; ++i) {
dirty |= system_constants_.color_output_map[i] !=
render_targets[i].guest_render_target;
system_constants_.color_output_map[i] =
render_targets[i].guest_render_target;
}
cbuffer_bindings_system_.up_to_date &= dirty; cbuffer_bindings_system_.up_to_date &= dirty;
} }

View File

@ -148,7 +148,9 @@ class D3D12CommandProcessor : public CommandProcessor {
bool EndFrame(); bool EndFrame();
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list); void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
void UpdateSystemConstantValues(Endian index_endian); void UpdateSystemConstantValues(
Endian index_endian,
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
bool UpdateBindings(ID3D12GraphicsCommandList* command_list, bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
const D3D12Shader* vertex_shader, const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader, const D3D12Shader* pixel_shader,

View File

@ -9,6 +9,7 @@
#include "xenia/gpu/d3d12/pipeline_cache.h" #include "xenia/gpu/d3d12/pipeline_cache.h"
#include <algorithm>
#include <cinttypes> #include <cinttypes>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
@ -17,6 +18,7 @@
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/hlsl_shader_translator.h"
@ -67,6 +69,7 @@ D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type,
PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline( PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type, IndexFormat index_format, PrimitiveType primitive_type, IndexFormat index_format,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
ID3D12PipelineState** pipeline_out, ID3D12PipelineState** pipeline_out,
ID3D12RootSignature** root_signature_out) { ID3D12RootSignature** root_signature_out) {
#if FINE_GRAINED_DRAW_SCOPES #if FINE_GRAINED_DRAW_SCOPES
@ -77,8 +80,8 @@ PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
assert_not_null(root_signature_out); assert_not_null(root_signature_out);
Pipeline* pipeline = nullptr; Pipeline* pipeline = nullptr;
auto update_status = auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type,
UpdateState(vertex_shader, pixel_shader, primitive_type, index_format); index_format, render_targets);
switch (update_status) { switch (update_status) {
case UpdateStatus::kCompatible: case UpdateStatus::kCompatible:
// Requested pipeline is compatible with our previous one, so use that. // Requested pipeline is compatible with our previous one, so use that.
@ -190,7 +193,8 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
PipelineCache::UpdateStatus PipelineCache::UpdateState( PipelineCache::UpdateStatus PipelineCache::UpdateState(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type, IndexFormat index_format) { PrimitiveType primitive_type, IndexFormat index_format,
const RenderTargetCache::PipelineRenderTarget render_targets[5]) {
bool mismatch = false; bool mismatch = false;
// Reset hash so we can build it up. // Reset hash so we can build it up.
@ -208,18 +212,15 @@ PipelineCache::UpdateStatus PipelineCache::UpdateState(
UpdateStatus status; UpdateStatus status;
status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type); status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages"); CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages");
status = UpdateBlendState(pixel_shader); status = UpdateBlendStateAndRenderTargets(pixel_shader, render_targets);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
status = UpdateRasterizerState(primitive_type); status = UpdateRasterizerState(primitive_type);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
status = UpdateDepthStencilState(); status = UpdateDepthStencilState(render_targets[4].format);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
status = UpdateIBStripCutValue(index_format); status = UpdateIBStripCutValue(index_format);
CHECK_UPDATE_STATUS(status, mismatch, CHECK_UPDATE_STATUS(status, mismatch,
"Unable to update index buffer strip cut value"); "Unable to update index buffer strip cut value");
status = UpdateRenderTargetFormats();
CHECK_UPDATE_STATUS(status, mismatch,
"Unable to update render target formats");
#undef CHECK_UPDATE_STATUS #undef CHECK_UPDATE_STATUS
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
@ -303,18 +304,27 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
return UpdateStatus::kMismatch; return UpdateStatus::kMismatch;
} }
PipelineCache::UpdateStatus PipelineCache::UpdateBlendState( PipelineCache::UpdateStatus PipelineCache::UpdateBlendStateAndRenderTargets(
D3D12Shader* pixel_shader) { D3D12Shader* pixel_shader,
auto& regs = update_blend_state_regs_; const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
auto& regs = update_blend_state_and_render_targets_regs_;
bool dirty = current_pipeline_ == nullptr; bool dirty = current_pipeline_ == nullptr;
for (uint32_t i = 0; i < 4; ++i) {
dirty |= regs.render_targets[i].guest_render_target !=
render_targets[i].guest_render_target;
regs.render_targets[i].guest_render_target =
render_targets[i].guest_render_target;
dirty |= regs.render_targets[i].format != render_targets[i].format;
regs.render_targets[i].format = render_targets[i].format;
}
uint32_t color_mask; uint32_t color_mask;
if (pixel_shader != nullptr) { if (pixel_shader != nullptr) {
color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF; color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
// If the pixel shader doesn't write to a render target, writing to it is
// disabled in the blend state. Otherwise, in Halo 3, one important render
// target is destroyed by a shader not writing to one of the outputs.
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
// If the pixel shader doesn't write to a render target, writing to it is
// disabled in the blend state. Otherwise, in Halo 3, one important render
// target is destroyed by a shader not writing to one of the outputs.
if (!pixel_shader->writes_color_target(i)) { if (!pixel_shader->writes_color_target(i)) {
color_mask &= ~(0xF << (i * 4)); color_mask &= ~(0xF << (i * 4));
} }
@ -372,10 +382,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
/* 3 */ D3D12_BLEND_OP_MAX, /* 3 */ D3D12_BLEND_OP_MAX,
/* 4 */ D3D12_BLEND_OP_REV_SUBTRACT, /* 4 */ D3D12_BLEND_OP_REV_SUBTRACT,
}; };
update_desc_.NumRenderTargets = 0;
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
auto& blend_desc = update_desc_.BlendState.RenderTarget[i]; auto& blend_desc = update_desc_.BlendState.RenderTarget[i];
if (blend_enable && (color_mask & (0xF << (i * 4)))) { uint32_t guest_render_target = render_targets[i].guest_render_target;
uint32_t blend_control = regs.blendcontrol[i]; DXGI_FORMAT format = render_targets[i].format;
if (blend_enable && format != DXGI_FORMAT_UNKNOWN &&
(color_mask & (0xF << (guest_render_target * 4)))) {
uint32_t blend_control = regs.blendcontrol[guest_render_target];
// A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND
blend_desc.SrcBlend = kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; blend_desc.SrcBlend = kBlendFactorMap[(blend_control & 0x0000001F) >> 0];
// A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND // A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND
@ -399,7 +413,12 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO; blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO;
blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD; blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD;
} }
blend_desc.RenderTargetWriteMask = (color_mask >> (i * 4)) & 0xF; blend_desc.RenderTargetWriteMask =
(color_mask >> (guest_render_target * 4)) & 0xF;
update_desc_.RTVFormats[i] = format;
if (format != DXGI_FORMAT_UNKNOWN) {
update_desc_.NumRenderTargets = i + 1;
}
} }
return UpdateStatus::kMismatch; return UpdateStatus::kMismatch;
@ -532,10 +551,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
return UpdateStatus::kMismatch; return UpdateStatus::kMismatch;
} }
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState(
DXGI_FORMAT format) {
auto& regs = update_depth_stencil_state_regs_; auto& regs = update_depth_stencil_state_regs_;
bool dirty = current_pipeline_ == nullptr; bool dirty = current_pipeline_ == nullptr;
dirty |= regs.format != format;
regs.format = format;
dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |= dirty |=
SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
@ -544,17 +566,18 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
} }
bool dsv_bound = format != DXGI_FORMAT_UNKNOWN;
update_desc_.DepthStencilState.DepthEnable = update_desc_.DepthStencilState.DepthEnable =
(regs.rb_depthcontrol & 0x2) ? TRUE : FALSE; (dsv_bound && (regs.rb_depthcontrol & 0x2)) ? TRUE : FALSE;
update_desc_.DepthStencilState.DepthWriteMask = update_desc_.DepthStencilState.DepthWriteMask =
(regs.rb_depthcontrol & 0x4) ? D3D12_DEPTH_WRITE_MASK_ALL (dsv_bound && (regs.rb_depthcontrol & 0x4)) ? D3D12_DEPTH_WRITE_MASK_ALL
: D3D12_DEPTH_WRITE_MASK_ZERO; : D3D12_DEPTH_WRITE_MASK_ZERO;
// Comparison functions are the same in Direct3D 12 but plus one (minus one, // Comparison functions are the same in Direct3D 12 but plus one (minus one,
// bit 0 for less, bit 1 for equal, bit 2 for greater). // bit 0 for less, bit 1 for equal, bit 2 for greater).
update_desc_.DepthStencilState.DepthFunc = update_desc_.DepthStencilState.DepthFunc =
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 4) & 0x7) + 1); D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 4) & 0x7) + 1);
update_desc_.DepthStencilState.StencilEnable = update_desc_.DepthStencilState.StencilEnable =
(regs.rb_depthcontrol & 0x1) ? TRUE : FALSE; (dsv_bound && (regs.rb_depthcontrol & 0x1)) ? TRUE : FALSE;
update_desc_.DepthStencilState.StencilReadMask = update_desc_.DepthStencilState.StencilReadMask =
(regs.rb_stencilrefmask >> 8) & 0xFF; (regs.rb_stencilrefmask >> 8) & 0xFF;
update_desc_.DepthStencilState.StencilWriteMask = update_desc_.DepthStencilState.StencilWriteMask =
@ -587,6 +610,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
// test is dynamic - should be enabled anyway if there's no alpha test, // test is dynamic - should be enabled anyway if there's no alpha test,
// discarding and depth output). // discarding and depth output).
update_desc_.DSVFormat = format;
return UpdateStatus::kMismatch; return UpdateStatus::kMismatch;
} }
@ -615,19 +640,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateIBStripCutValue(
return UpdateStatus::kMismatch; return UpdateStatus::kMismatch;
} }
PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargetFormats() {
bool dirty = current_pipeline_ == nullptr;
if (!dirty) {
return UpdateStatus::kCompatible;
}
// TODO(Triang3l): Set the formats when RT cache is added.
update_desc_.NumRenderTargets = 0;
update_desc_.DSVFormat = DXGI_FORMAT_UNKNOWN;
return UpdateStatus::kMismatch;
}
PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) { PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) {
// Lookup the pipeline in the cache. // Lookup the pipeline in the cache.
auto it = pipelines_.find(hash_key); auto it = pipelines_.find(hash_key);

View File

@ -15,6 +15,7 @@
#include "third_party/xxhash/xxhash.h" #include "third_party/xxhash/xxhash.h"
#include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/hlsl_shader_translator.h"
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
@ -42,12 +43,12 @@ class PipelineCache {
D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address, D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count); const uint32_t* host_address, uint32_t dword_count);
UpdateStatus ConfigurePipeline(D3D12Shader* vertex_shader, UpdateStatus ConfigurePipeline(
D3D12Shader* pixel_shader, D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type, PrimitiveType primitive_type, IndexFormat index_format,
IndexFormat index_format, const RenderTargetCache::PipelineRenderTarget render_targets[5],
ID3D12PipelineState** pipeline_out, ID3D12PipelineState** pipeline_out,
ID3D12RootSignature** root_signature_out); ID3D12RootSignature** root_signature_out);
void ClearCache(); void ClearCache();
@ -57,25 +58,25 @@ class PipelineCache {
bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl); bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl);
UpdateStatus UpdateState(D3D12Shader* vertex_shader, UpdateStatus UpdateState(
D3D12Shader* pixel_shader, D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type, PrimitiveType primitive_type, IndexFormat index_format,
IndexFormat index_format); const RenderTargetCache::PipelineRenderTarget render_targets[5]);
// pRootSignature, VS, PS, GS, PrimitiveTopologyType. // pRootSignature, VS, PS, GS, PrimitiveTopologyType.
UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader, UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type); PrimitiveType primitive_type);
// BlendState. // BlendState, NumRenderTargets, RTVFormats.
UpdateStatus UpdateBlendState(D3D12Shader* pixel_shader); UpdateStatus UpdateBlendStateAndRenderTargets(
D3D12Shader* pixel_shader,
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
// RasterizerState. // RasterizerState.
UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type); UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type);
// DepthStencilState. // DepthStencilState, DSVFormat.
UpdateStatus UpdateDepthStencilState(); UpdateStatus UpdateDepthStencilState(DXGI_FORMAT format);
// IBStripCutValue. // IBStripCutValue.
UpdateStatus UpdateIBStripCutValue(IndexFormat index_format); UpdateStatus UpdateIBStripCutValue(IndexFormat index_format);
// NumRenderTargets, RTVFormats, DSVFormat.
UpdateStatus UpdateRenderTargetFormats();
D3D12CommandProcessor* command_processor_; D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_; RegisterFile* register_file_;
@ -117,16 +118,17 @@ class PipelineCache {
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_shader_stages_regs_; } update_shader_stages_regs_;
struct UpdateBlendStateRegisters { struct UpdateBlendStateAndRenderTargetsRegisters {
RenderTargetCache::PipelineRenderTarget render_targets[5];
// RB_COLOR_MASK with unused render targets removed. // RB_COLOR_MASK with unused render targets removed.
uint32_t color_mask; uint32_t color_mask;
// Blend control updated only for used render targets. // Blend control updated only for used render targets.
uint32_t blendcontrol[4]; uint32_t blendcontrol[4];
bool colorcontrol_blend_enable; bool colorcontrol_blend_enable;
UpdateBlendStateRegisters() { Reset(); } UpdateBlendStateAndRenderTargetsRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_blend_state_regs_; } update_blend_state_and_render_targets_regs_;
struct UpdateRasterizerStateRegisters { struct UpdateRasterizerStateRegisters {
// Polygon offset is in Xenos units. // Polygon offset is in Xenos units.
@ -142,6 +144,7 @@ class PipelineCache {
} update_rasterizer_state_regs_; } update_rasterizer_state_regs_;
struct UpdateDepthStencilStateRegisters { struct UpdateDepthStencilStateRegisters {
DXGI_FORMAT format;
uint32_t rb_depthcontrol; uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask; uint32_t rb_stencilrefmask;

View File

@ -27,8 +27,6 @@ RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
RenderTargetCache::~RenderTargetCache() { Shutdown(); } RenderTargetCache::~RenderTargetCache() { Shutdown(); }
bool RenderTargetCache::Initialize() { return true; }
void RenderTargetCache::Shutdown() { ClearCache(); } void RenderTargetCache::Shutdown() { ClearCache(); }
void RenderTargetCache::ClearCache() { void RenderTargetCache::ClearCache() {
@ -41,6 +39,19 @@ void RenderTargetCache::ClearCache() {
} }
render_targets_.clear(); render_targets_.clear();
while (descriptor_heaps_depth_ != nullptr) {
auto heap = descriptor_heaps_depth_;
heap->heap->Release();
descriptor_heaps_depth_ = heap->previous;
delete heap;
}
while (descriptor_heaps_color_ != nullptr) {
auto heap = descriptor_heaps_color_;
heap->heap->Release();
descriptor_heaps_color_ = heap->previous;
delete heap;
}
for (uint32_t i = 0; i < xe::countof(heaps_); ++i) { for (uint32_t i = 0; i < xe::countof(heaps_); ++i) {
if (heaps_[i] != nullptr) { if (heaps_[i] != nullptr) {
heaps_[i]->Release(); heaps_[i]->Release();
@ -51,7 +62,7 @@ void RenderTargetCache::ClearCache() {
void RenderTargetCache::BeginFrame() { ClearBindings(); } void RenderTargetCache::BeginFrame() { ClearBindings(); }
void RenderTargetCache::UpdateRenderTargets() { bool RenderTargetCache::UpdateRenderTargets() {
// There are two kinds of render target binding updates in this implementation // There are two kinds of render target binding updates in this implementation
// in case something has been changed - full and partial. // in case something has been changed - full and partial.
// //
@ -90,16 +101,22 @@ void RenderTargetCache::UpdateRenderTargets() {
// made to the lower part of RT0. So, before draws 2 and 3, full updates must // made to the lower part of RT0. So, before draws 2 and 3, full updates must
// be done. // be done.
// //
// Full updates are better for memory usage than partial updates though, as // Direct3D 12 also requires all render targets to have the same size, so the
// the render targets are re-allocated in the heaps, which means that they can // height is calculated from the EDRAM space available to the last render
// be allocated more tightly, preventing too many 32 MB heaps from being // target available in it. However, to make toggling render targets like in
// created. // the Banjo-Kazooie case possible, the height may be decreased only in full
// updates.
// TODO(Triang3l): Check if it's safe to calculate the smallest EDRAM region
// without aliasing and use it for the height. This won't work if games
// actually alias active render targets for some reason.
// //
// To summarize, a full update happens if: // To summarize, a full update happens if:
// - Starting a new frame. // - Starting a new frame.
// - Drawing after resolving. // - Drawing after resolving.
// - Surface pitch changed. // - Surface pitch changed.
// - Sample count changed. // - Sample count changed.
// - Render target is disabled and another render target got more space than
// is currently available in the textures.
// - EDRAM base of a currently used RT changed. // - EDRAM base of a currently used RT changed.
// - Format of a currently used RT changed. // - Format of a currently used RT changed.
// - Current viewport contains unsaved data from previously used render // - Current viewport contains unsaved data from previously used render
@ -112,18 +129,18 @@ void RenderTargetCache::UpdateRenderTargets() {
// //
// A partial update happens if: // A partial update happens if:
// - New render target is added, but doesn't overlap unsaved data from other // - New render target is added, but doesn't overlap unsaved data from other
// currently or previously used render targets. // currently or previously used render targets, and it doesn't require a
// bigger size.
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetCurrentCommandList();
if (command_list == nullptr) { if (command_list == nullptr) {
return; return false;
} }
auto& regs = *register_file_; auto& regs = *register_file_;
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u); uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u);
if (surface_pitch == 0) { if (surface_pitch == 0) {
assert_always(); return false;
return;
} }
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
@ -178,10 +195,26 @@ void RenderTargetCache::UpdateRenderTargets() {
// clamp the dirty region heights. // clamp the dirty region heights.
uint32_t edram_row_tiles_32bpp = (surface_pitch * msaa_samples_x + 79) / 80; uint32_t edram_row_tiles_32bpp = (surface_pitch * msaa_samples_x + 79) / 80;
uint32_t edram_row_tiles[5]; uint32_t edram_row_tiles[5];
uint32_t edram_max_rows[5]; uint32_t edram_max_rows = UINT32_MAX;
for (uint32_t i = 0; i < 5; ++i) { for (uint32_t i = 0; i < 5; ++i) {
edram_row_tiles[i] = edram_row_tiles_32bpp * (formats_are_64bpp[i] ? 2 : 1); edram_row_tiles[i] = edram_row_tiles_32bpp * (formats_are_64bpp[i] ? 2 : 1);
edram_max_rows[i] = (2048 - edram_bases[i]) / edram_row_tiles[i]; if (enabled[i]) {
// Direct3D 12 doesn't allow render targets with different sizes, so
// calculate the height from the render target closest to the end of
// EDRAM.
edram_max_rows = std::min(edram_max_rows,
(2048 - edram_bases[i]) / edram_row_tiles[i]);
}
}
if (edram_max_rows == 0 || edram_max_rows == UINT32_MAX) {
// Some render target is totally in the end of EDRAM, or nothing is drawn.
return false;
}
// Check the following full update conditions:
// - Render target is disabled and another render target got more space than
// is currently available in the textures.
if (edram_max_rows > current_edram_max_rows_) {
full_update = true;
} }
// Get EDRAM usage of the current draw so dirty regions can be calculated. // Get EDRAM usage of the current draw so dirty regions can be calculated.
@ -210,7 +243,8 @@ void RenderTargetCache::UpdateRenderTargets() {
} }
uint32_t dirty_bottom = uint32_t dirty_bottom =
std::min(std::min(viewport_bottom, scissor_bottom), 2560u); std::min(std::min(viewport_bottom, scissor_bottom), 2560u);
uint32_t edram_rows = (dirty_bottom * msaa_samples_y + 15) >> 4; uint32_t edram_dirty_rows =
std::min((dirty_bottom * msaa_samples_y + 15) >> 4, edram_max_rows);
// Check the following full update conditions: // Check the following full update conditions:
// - EDRAM base of a currently used RT changed. // - EDRAM base of a currently used RT changed.
@ -257,8 +291,7 @@ void RenderTargetCache::UpdateRenderTargets() {
} }
// Checking if the new render target is overlapping any bound one. // Checking if the new render target is overlapping any bound one.
// binding_1 is the new render target. // binding_1 is the new render target.
edram_length_1 = edram_length_1 = edram_dirty_rows * edram_row_tiles[i];
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i];
} }
for (uint32_t j = 0; j < 5; ++j) { for (uint32_t j = 0; j < 5; ++j) {
const RenderTargetBinding& binding_2 = current_bindings_[j]; const RenderTargetBinding& binding_2 = current_bindings_[j];
@ -272,8 +305,7 @@ void RenderTargetCache::UpdateRenderTargets() {
} }
// Checking if now overlapping a previously used render target. // Checking if now overlapping a previously used render target.
// binding_2 is a currently used render target. // binding_2 is a currently used render target.
edram_length_2 = edram_length_2 = edram_dirty_rows * edram_row_tiles[i];
std::min(edram_rows, edram_max_rows[j]) * edram_row_tiles[i];
} else { } else {
// Checking if the new render target is overlapping any bound one. // Checking if the new render target is overlapping any bound one.
// binding_2 is another bound render target. // binding_2 is another bound render target.
@ -295,82 +327,200 @@ void RenderTargetCache::UpdateRenderTargets() {
} }
} }
// If no need to attach any new render targets, update dirty regions and exit. // Need to change the bindings.
if (!full_update && !render_targets_to_attach) { if (full_update || render_targets_to_attach) {
uint32_t heap_usage[5] = {};
if (full_update) {
// Export the currently bound render targets before we ruin the bindings.
WriteRenderTargetsToEDRAM();
ClearBindings();
current_surface_pitch_ = surface_pitch;
current_msaa_samples_ = msaa_samples;
current_edram_max_rows_ = edram_max_rows;
// If updating fully, need to reattach all the render targets and allocate
// from scratch.
for (uint32_t i = 0; i < 5; ++i) {
if (enabled[i]) {
render_targets_to_attach |= 1 << i;
}
}
} else {
// If updating partially, only need to attach new render targets.
for (uint32_t i = 0; i < 5; ++i) {
const RenderTargetBinding& binding = current_bindings_[i];
if (!binding.is_bound) {
continue;
}
const RenderTarget* render_target = binding.render_target;
if (render_target != nullptr) {
// There are no holes between 4 MB pages in each heap.
heap_usage[render_target->heap_page_first >> 3] +=
render_target->heap_page_count;
continue;
}
}
}
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
render_targets_to_attach);
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_RESOURCE_BARRIER barriers[5];
uint32_t barrier_count = 0;
// Allocate new render targets and add them to the bindings list.
for (uint32_t i = 0; i < 5; ++i) { for (uint32_t i = 0; i < 5; ++i) {
if (!enabled[i] || (i == 4 && depth_readonly)) { if (!(render_targets_to_attach & (1 << i))) {
continue; continue;
} }
RenderTargetBinding& binding = current_bindings_[i]; RenderTargetBinding& binding = current_bindings_[i];
binding.edram_dirty_length = std::max(
binding.edram_dirty_length,
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i]);
}
return;
}
// From this point, the function MUST NOT FAIL, otherwise bindings will be
// left in an incomplete state.
uint32_t heap_usage[5] = {};
if (full_update) {
// Export the currently bound render targets before we ruin the bindings.
WriteRenderTargetsToEDRAM();
ClearBindings();
current_surface_pitch_ = surface_pitch;
current_msaa_samples_ = msaa_samples;
// If updating fully, need to reattach all the render targets and allocate
// from scratch.
for (uint32_t i = 0; i < 5; ++i) {
if (enabled[i]) {
render_targets_to_attach |= 1 << i;
}
}
} else {
// If updating partially, only need to attach new render targets.
for (uint32_t i = 0; i < 5; ++i) {
const RenderTargetBinding& binding = current_bindings_[i];
if (!binding.is_bound) {
continue;
}
const RenderTarget* render_target = binding.render_target;
if (render_target != nullptr) {
// There are no holes between 4 MB pages in each heap.
heap_usage[render_target->heap_page_first >> 3] +=
render_target->heap_page_count;
continue;
}
}
}
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
render_targets_to_attach);
// Allocate the new render targets.
// TODO(Triang3l): Actually allocate them.
// TODO(Triang3l): Load the contents from the EDRAM.
// TODO(Triang3l): Bind the render targets to the command list.
// Write the new bindings and update the dirty regions.
for (uint32_t i = 0; i < 5; ++i) {
if (!enabled[i]) {
continue;
}
RenderTargetBinding& binding = current_bindings_[i];
if (render_targets_to_attach & (1 << i)) {
binding.is_bound = true; binding.is_bound = true;
binding.edram_base = edram_bases[i]; binding.edram_base = edram_bases[i];
binding.edram_dirty_length = 0; binding.edram_dirty_length = 0;
binding.format = formats[i]; binding.format = formats[i];
binding.render_target = nullptr;
RenderTargetKey key;
key.width_ss_div_80 = edram_row_tiles_32bpp;
key.height_ss_div_16 = current_edram_max_rows_;
key.is_depth = i == 4;
key.format = formats[i];
D3D12_RESOURCE_DESC resource_desc;
if (!GetResourceDesc(key, resource_desc)) {
// Invalid format.
continue;
}
// Calculate the number of 4 MB pages of 32 MB heaps this RT will use.
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
device->GetResourceAllocationInfo(0, 1, &resource_desc);
if (allocation_info.SizeInBytes == 0 ||
allocation_info.SizeInBytes > (32 << 20)) {
assert_always();
continue;
}
uint32_t heap_page_count =
(uint32_t(allocation_info.SizeInBytes) + ((4 << 20) - 1)) >> 22;
// Find the heap page range for this render target.
uint32_t heap_page_first = UINT32_MAX;
for (uint32_t j = 0; j < 5; ++j) {
if (heap_usage[j] + heap_page_count <= 8) {
heap_page_first = j * 8 + heap_usage[j];
break;
}
}
if (heap_page_first == UINT32_MAX) {
assert_always();
continue;
}
// Get the render target.
binding.render_target = FindOrCreateRenderTarget(key, heap_page_first);
if (binding.render_target == nullptr) {
continue;
}
// Inform Direct3D that we're reusing the heap for this render target.
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Aliasing.pResourceBefore = nullptr;
barrier.Aliasing.pResourceAfter = binding.render_target->resource;
} }
if (!(i == 4 && depth_readonly)) {
binding.edram_dirty_length = std::max( if (barrier_count != 0) {
binding.edram_dirty_length, command_list->ResourceBarrier(barrier_count, barriers);
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i]);
} }
barrier_count = 0;
// Load the contents of the new render targets from the EDRAM buffer and
// switch their state to RTV/DSV.
for (uint32_t i = 0; i < 5; ++i) {
if (!(render_targets_to_attach & (1 << i))) {
continue;
}
RenderTarget* render_target = current_bindings_[i].render_target;
if (render_target == nullptr) {
continue;
}
// TODO(Triang3l): Load the contents from the EDRAM buffer.
// After loading from the EDRAM buffer (which may make this render target
// a copy destination), switch it to RTV/DSV if needed.
D3D12_RESOURCE_STATES state = i == 4 ? D3D12_RESOURCE_STATE_DEPTH_WRITE
: D3D12_RESOURCE_STATE_RENDER_TARGET;
if (render_target->state != state) {
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = render_target->resource;
barrier.Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = render_target->state;
barrier.Transition.StateAfter = state;
render_target->state = state;
}
}
if (barrier_count != 0) {
command_list->ResourceBarrier(barrier_count, barriers);
}
// Compress the list of the render target because null RTV descriptors are
// broken in Direct3D 12 and bind the render targets to the command list.
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4];
uint32_t rtv_count = 0;
for (uint32_t i = 0; i < 4; ++i) {
const RenderTargetBinding& binding = current_bindings_[i];
if (!binding.is_bound || binding.render_target == nullptr) {
continue;
}
rtv_handles[rtv_count] = binding.render_target->handle;
current_pipeline_render_targets_[rtv_count].guest_render_target = i;
current_pipeline_render_targets_[rtv_count].format =
GetColorDXGIFormat(ColorRenderTargetFormat(formats[4]));
++rtv_count;
}
for (uint32_t i = rtv_count; i < 4; ++i) {
current_pipeline_render_targets_[i].guest_render_target = i;
current_pipeline_render_targets_[i].format = DXGI_FORMAT_UNKNOWN;
}
const D3D12_CPU_DESCRIPTOR_HANDLE* dsv_handle;
const RenderTargetBinding& depth_binding = current_bindings_[4];
current_pipeline_render_targets_[4].guest_render_target = 4;
if (depth_binding.is_bound && depth_binding.render_target != nullptr) {
dsv_handle = &depth_binding.render_target->handle;
current_pipeline_render_targets_[4].format =
GetDepthDXGIFormat(DepthRenderTargetFormat(formats[4]));
} else {
dsv_handle = nullptr;
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
}
command_list->OMSetRenderTargets(rtv_count, rtv_handles, FALSE, dsv_handle);
} }
// Update the dirty regions.
for (uint32_t i = 0; i < 5; ++i) {
if (!enabled[i] || (i == 4 && depth_readonly)) {
continue;
}
RenderTargetBinding& binding = current_bindings_[i];
if (binding.render_target == nullptr) {
// Nothing to store to the EDRAM buffer if there was an error.
continue;
}
binding.edram_dirty_length = std::max(
binding.edram_dirty_length, edram_dirty_rows * edram_row_tiles[i]);
}
return true;
} }
void RenderTargetCache::EndFrame() { void RenderTargetCache::EndFrame() {
@ -410,9 +560,155 @@ DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat(
void RenderTargetCache::ClearBindings() { void RenderTargetCache::ClearBindings() {
current_surface_pitch_ = 0; current_surface_pitch_ = 0;
current_msaa_samples_ = MsaaSamples::k1X; current_msaa_samples_ = MsaaSamples::k1X;
current_edram_max_rows_ = 0;
std::memset(current_bindings_, 0, sizeof(current_bindings_)); std::memset(current_bindings_, 0, sizeof(current_bindings_));
} }
bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
D3D12_RESOURCE_DESC& desc) {
if (key.width_ss_div_80 == 0 || key.height_ss_div_16 == 0) {
return false;
}
DXGI_FORMAT dxgi_format =
key.is_depth ? GetDepthDXGIFormat(DepthRenderTargetFormat(key.format))
: GetColorDXGIFormat(ColorRenderTargetFormat(key.format));
if (dxgi_format == DXGI_FORMAT_UNKNOWN) {
return false;
}
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
desc.Alignment = 0;
desc.Width = key.width_ss_div_80 * 80;
desc.Height = key.height_ss_div_16 * 16;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.Format = dxgi_format;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
desc.Flags = key.is_depth ? D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL
: D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
return true;
}
RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
RenderTargetKey key, uint32_t heap_page_first) {
assert_true(heap_page_first <= 8 * 5);
// TODO(Triang3l): Find an existing render target.
D3D12_RESOURCE_DESC resource_desc;
if (!GetResourceDesc(key, resource_desc)) {
return nullptr;
}
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
// Get the number of heap pages needed for the render target.
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
device->GetResourceAllocationInfo(0, 1, &resource_desc);
uint32_t heap_page_count =
(uint32_t(allocation_info.SizeInBytes) + ((4 << 20) - 1)) >> 22;
if (heap_page_count == 0 || (heap_page_first & 7) + heap_page_count > 8) {
assert_always();
return nullptr;
}
// Create a new descriptor heap if needed, and get a place for the descriptor.
auto& descriptor_heap =
key.is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_;
if (descriptor_heap == nullptr ||
descriptor_heap->descriptors_used >= kRenderTargetDescriptorHeapSize) {
D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc;
descriptor_heap_desc.Type = key.is_depth ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV
: D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
descriptor_heap_desc.NumDescriptors = kRenderTargetDescriptorHeapSize;
descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
descriptor_heap_desc.NodeMask = 0;
ID3D12DescriptorHeap* new_d3d_descriptor_heap;
if (FAILED(device->CreateDescriptorHeap(
&descriptor_heap_desc, IID_PPV_ARGS(&new_d3d_descriptor_heap)))) {
XELOGE("Failed to create a heap for %u %s buffer descriptors",
kRenderTargetDescriptorHeapSize, key.is_depth ? "depth" : "color");
return nullptr;
}
RenderTargetDescriptorHeap* new_descriptor_heap =
new RenderTargetDescriptorHeap;
new_descriptor_heap->heap = new_d3d_descriptor_heap;
new_descriptor_heap->start_handle =
new_d3d_descriptor_heap->GetCPUDescriptorHandleForHeapStart();
new_descriptor_heap->descriptors_used = 0;
new_descriptor_heap->previous = descriptor_heap;
descriptor_heap = new_descriptor_heap;
}
// Create the memory heap if it doesn't exist yet.
ID3D12Heap* heap = heaps_[heap_page_first >> 3];
if (heap == nullptr) {
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = 32 << 20;
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
heap_desc.Alignment = 0;
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heap)))) {
XELOGE("Failed to create a 32 MB heap for render targets");
return nullptr;
}
heaps_[heap_page_first >> 3] = heap;
}
// The first action likely to be done is EDRAM buffer load.
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST;
ID3D12Resource* resource;
if (FAILED(device->CreatePlacedResource(heap, (heap_page_first & 7) << 22,
&resource_desc, state, nullptr,
IID_PPV_ARGS(&resource)))) {
XELOGE(
"Failed to create a placed resource for %ux%u %s render target with "
"format %u at heap 4 MB pages %u:%u",
uint32_t(resource_desc.Width), resource_desc.Height,
key.is_depth ? "depth" : "color", key.format, heap_page_first,
heap_page_first + heap_page_count - 1);
return nullptr;
}
// Create the descriptor for the render target.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_handle;
if (key.is_depth) {
descriptor_handle.ptr =
descriptor_heap->start_handle.ptr +
descriptor_heap->descriptors_used * provider->GetDescriptorSizeDSV();
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
dsv_desc.Format = resource_desc.Format;
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
dsv_desc.Flags = D3D12_DSV_FLAG_NONE;
dsv_desc.Texture2D.MipSlice = 0;
device->CreateDepthStencilView(resource, &dsv_desc, descriptor_handle);
} else {
descriptor_handle.ptr =
descriptor_heap->start_handle.ptr +
descriptor_heap->descriptors_used * provider->GetDescriptorSizeRTV();
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
rtv_desc.Format = resource_desc.Format;
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
rtv_desc.Texture2D.MipSlice = 0;
rtv_desc.Texture2D.PlaneSlice = 0;
device->CreateRenderTargetView(resource, &rtv_desc, descriptor_handle);
}
++descriptor_heap->descriptors_used;
RenderTarget* render_target = new RenderTarget;
render_target->resource = resource;
render_target->state = state;
render_target->handle = descriptor_handle;
render_target->key = key;
render_target->heap_page_first = heap_page_first;
render_target->heap_page_count = heap_page_count;
render_targets_.insert(std::make_pair(key.value, render_target));
return render_target;
}
void RenderTargetCache::WriteRenderTargetsToEDRAM() {} void RenderTargetCache::WriteRenderTargetsToEDRAM() {}
} // namespace d3d12 } // namespace d3d12

View File

@ -184,17 +184,36 @@ class D3D12CommandProcessor;
// in the surface info register is single-sampled. // in the surface info register is single-sampled.
class RenderTargetCache { class RenderTargetCache {
public: public:
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
// that contradict each other when you use null RTV descriptors - if you set
// a valid format in RTVFormats in the pipeline state, it says that null
// descriptors can only be used if the format in the pipeline state is
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains
// that the format in the pipeline doesn't match the RTV format. So we have to
// make render target bindings consecutive and remap the output indices in
// pixel shaders.
struct PipelineRenderTarget {
uint32_t guest_render_target;
DXGI_FORMAT format;
};
RenderTargetCache(D3D12CommandProcessor* command_processor, RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file); RegisterFile* register_file);
~RenderTargetCache(); ~RenderTargetCache();
bool Initialize();
void Shutdown(); void Shutdown();
void ClearCache(); void ClearCache();
void BeginFrame(); void BeginFrame();
// Called in the beginning of a draw call - may bind pipelines. // Called in the beginning of a draw call - may bind pipelines.
void UpdateRenderTargets(); bool UpdateRenderTargets();
// Returns the host-to-guest mappings and host formats of currently bound
// render targets for pipeline creation and remapping in shaders. They are
// consecutive, and format DXGI_FORMAT_UNKNOWN terminates the list. Depth
// format is in the 5th render target.
const PipelineRenderTarget* GetCurrentPipelineRenderTargets() const {
return current_pipeline_render_targets_;
}
void EndFrame(); void EndFrame();
static inline bool IsColorFormat64bpp(ColorRenderTargetFormat format) { static inline bool IsColorFormat64bpp(ColorRenderTargetFormat format) {
@ -203,12 +222,22 @@ class RenderTargetCache {
format == ColorRenderTargetFormat::k_32_32_FLOAT; format == ColorRenderTargetFormat::k_32_32_FLOAT;
} }
static DXGI_FORMAT GetColorDXGIFormat(ColorRenderTargetFormat format); static DXGI_FORMAT GetColorDXGIFormat(ColorRenderTargetFormat format);
// Nvidia may have higher performance with 24-bit depth, AMD should have no
// performance difference, but with EDRAM loads/stores less conversion should
// be performed by the shaders if D24S8 is emulated as D24_UNORM_S8_UINT, and
// it's probably more accurate.
static inline DXGI_FORMAT GetDepthDXGIFormat(DepthRenderTargetFormat format) {
return format == DepthRenderTargetFormat::kD24FS8
? DXGI_FORMAT_D32_FLOAT_S8X24_UINT
: DXGI_FORMAT_D24_UNORM_S8_UINT;
}
private: private:
union RenderTargetKey { union RenderTargetKey {
struct { struct {
// Supersampled dimensions. The limit is 2560x2560 without AA, 2560x5120 // Supersampled (_ss - scaled 2x if needed) dimensions, divided by 80x16.
// with 2x AA, and 5120x5120 with 4x AA. // The limit is 2560x2560 without AA, 2560x5120 with 2x AA, and 5120x5120
// with 4x AA.
uint32_t width_ss_div_80 : 7; // 7 uint32_t width_ss_div_80 : 7; // 7
uint32_t height_ss_div_16 : 9; // 16 uint32_t height_ss_div_16 : 9; // 16
uint32_t is_depth : 1; // 17 uint32_t is_depth : 1; // 17
@ -259,6 +288,12 @@ class RenderTargetCache {
void ClearBindings(); void ClearBindings();
// Returns true if a render target with such key can be created.
static bool GetResourceDesc(RenderTargetKey key, D3D12_RESOURCE_DESC& desc);
RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key,
uint32_t heap_page_first);
// Must be in a frame to call. Writes the dirty areas of the currently bound // Must be in a frame to call. Writes the dirty areas of the currently bound
// render targets and marks them as clean. // render targets and marks them as clean.
void WriteRenderTargetsToEDRAM(); void WriteRenderTargetsToEDRAM();
@ -271,11 +306,27 @@ class RenderTargetCache {
// entire EDRAM - a 32-bit depth/stencil one - at some resolution. // entire EDRAM - a 32-bit depth/stencil one - at some resolution.
ID3D12Heap* heaps_[5] = {}; ID3D12Heap* heaps_[5] = {};
static constexpr uint32_t kRenderTargetDescriptorHeapSize = 2048;
// Descriptor heap, for linear allocation of heaps and descriptors.
struct RenderTargetDescriptorHeap {
ID3D12DescriptorHeap* heap;
D3D12_CPU_DESCRIPTOR_HANDLE start_handle;
// When descriptors_used is >= kRenderTargetDescriptorHeapSize, a new heap
// must be allocated and linked to the one that became full now.
uint32_t descriptors_used;
RenderTargetDescriptorHeap* previous;
};
RenderTargetDescriptorHeap* descriptor_heaps_color_ = nullptr;
RenderTargetDescriptorHeap* descriptor_heaps_depth_ = nullptr;
std::unordered_multimap<uint32_t, RenderTarget*> render_targets_; std::unordered_multimap<uint32_t, RenderTarget*> render_targets_;
uint32_t current_surface_pitch_ = 0; uint32_t current_surface_pitch_ = 0;
MsaaSamples current_msaa_samples_ = MsaaSamples::k1X; MsaaSamples current_msaa_samples_ = MsaaSamples::k1X;
uint32_t current_edram_max_rows_ = 0;
RenderTargetBinding current_bindings_[5] = {}; RenderTargetBinding current_bindings_[5] = {};
PipelineRenderTarget current_pipeline_render_targets_[5];
}; };
} // namespace d3d12 } // namespace d3d12

View File

@ -176,6 +176,7 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
" float xe_pixel_half_pixel_offset;\n" " float xe_pixel_half_pixel_offset;\n"
" float2 xe_ssaa_inv_scale;\n" " float2 xe_ssaa_inv_scale;\n"
" uint xe_pixel_pos_reg;\n" " uint xe_pixel_pos_reg;\n"
" uint4 xe_color_output_map;\n"
"};\n" "};\n"
"\n" "\n"
"cbuffer xe_loop_bool_constants : register(b1) {\n" "cbuffer xe_loop_bool_constants : register(b1) {\n"
@ -291,10 +292,11 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
"XePixelShaderOutput main(XePixelShaderInput xe_input) {\n" "XePixelShaderOutput main(XePixelShaderInput xe_input) {\n"
" float4 xe_r[%u];\n" " float4 xe_r[%u];\n"
" XePixelShaderOutput xe_output;\n" " XePixelShaderOutput xe_output;\n"
" xe_output.colors[0] = (0.0).xxxx;\n" " float4 xe_color_output[4];\n"
" xe_output.colors[1] = (0.0).xxxx;\n" " xe_color_output[0] = (0.0).xxxx;\n"
" xe_output.colors[2] = (0.0).xxxx;\n" " xe_color_output[1] = (0.0).xxxx;\n"
" xe_output.colors[3] = (0.0).xxxx;\n", " xe_color_output[2] = (0.0).xxxx;\n"
" xe_color_output[3] = (0.0).xxxx;\n",
kMaxInterpolators, writes_depth_ ? " float depth : SV_Depth;\n" : "", kMaxInterpolators, writes_depth_ ? " float depth : SV_Depth;\n" : "",
register_count()); register_count());
// Initialize SV_Depth if using it. // Initialize SV_Depth if using it.
@ -370,6 +372,14 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
" xe_output.position.xyz =\n" " xe_output.position.xyz =\n"
" xe_output.position.xyz * xe_ndc_scale +\n" " xe_output.position.xyz * xe_ndc_scale +\n"
" xe_ndc_offset * xe_output.position.www;\n"); " xe_ndc_offset * xe_output.position.www;\n");
} else if (is_pixel_shader()) {
// Remap guest color outputs to host render targets because null render
// target descriptors are broken.
source.Append(
" xe_output.colors[0] = xe_color_output[xe_color_output_map.r];\n"
" xe_output.colors[1] = xe_color_output[xe_color_output_map.g];\n"
" xe_output.colors[2] = xe_color_output[xe_color_output_map.b];\n"
" xe_output.colors[3] = xe_color_output[xe_color_output_map.a];\n");
} }
// TODO(Triang3l): Window offset, half pixel offset, alpha test, gamma. // TODO(Triang3l): Window offset, half pixel offset, alpha test, gamma.
source.Append( source.Append(
@ -726,7 +736,7 @@ void HlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
EmitSourceDepth("xe_output.point_size"); EmitSourceDepth("xe_output.point_size");
break; break;
case InstructionStorageTarget::kColorTarget: case InstructionStorageTarget::kColorTarget:
EmitSourceDepth("xe_output.colors"); EmitSourceDepth("xe_color_output");
storage_is_array = true; storage_is_array = true;
break; break;
case InstructionStorageTarget::kDepth: case InstructionStorageTarget::kDepth:

View File

@ -37,6 +37,9 @@ class HlslShaderTranslator : public ShaderTranslator {
// vec4 3 // vec4 3
float ssaa_inv_scale[2]; float ssaa_inv_scale[2];
uint32_t pixel_pos_reg; uint32_t pixel_pos_reg;
uint32_t padding_3;
// vec4 4
uint32_t color_output_map[4];
}; };
struct TextureSRV { struct TextureSRV {