[D3D12] Render target binding
This commit is contained in:
parent
c0c0ca263d
commit
952bb91c3f
|
@ -489,10 +489,6 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
|
|
||||||
render_target_cache_ =
|
render_target_cache_ =
|
||||||
std::make_unique<RenderTargetCache>(this, register_file_);
|
std::make_unique<RenderTargetCache>(this, register_file_);
|
||||||
if (!render_target_cache_->Initialize()) {
|
|
||||||
XELOGE("Failed to initialize the render target cache");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -652,9 +648,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
}
|
}
|
||||||
if (reset_index != reset_index_expected) {
|
if (reset_index != reset_index_expected) {
|
||||||
// Only 0xFFFF and 0xFFFFFFFF primitive restart indices are supported by
|
// Only 0xFFFF and 0xFFFFFFFF primitive restart indices are supported by
|
||||||
// Direct3D 12 (endianness doesn't matter for them). However, Direct3D 9
|
// Direct3D 12 (endianness doesn't matter for them). With shared memory,
|
||||||
// uses 0xFFFF as the reset index. With shared memory, it's impossible to
|
// it's impossible to replace the cut index in the buffer without
|
||||||
// replace the cut index in the buffer without affecting the game memory.
|
// affecting the game memory.
|
||||||
XELOGE(
|
XELOGE(
|
||||||
"The game uses the primitive restart index 0x%X that isn't 0xFFFF or "
|
"The game uses the primitive restart index 0x%X that isn't 0xFFFF or "
|
||||||
"0xFFFFFFFF. Report the game to Xenia developers so geometry shaders "
|
"0xFFFFFFFF. Report the game to Xenia developers so geometry shaders "
|
||||||
|
@ -678,14 +674,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
pixel_shader = nullptr;
|
pixel_shader = nullptr;
|
||||||
} else if (!pixel_shader) {
|
} else if (!pixel_shader) {
|
||||||
// Need a pixel shader in normal color mode.
|
// Need a pixel shader in normal color mode.
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool new_frame = BeginFrame();
|
bool new_frame = BeginFrame();
|
||||||
auto command_list = GetCurrentCommandList();
|
auto command_list = GetCurrentCommandList();
|
||||||
|
|
||||||
// Set up the render targets - this may bind pipelines.
|
// Set up the render targets - this may bind pipelines.
|
||||||
render_target_cache_->UpdateRenderTargets();
|
if (!render_target_cache_->UpdateRenderTargets()) {
|
||||||
|
// Doesn't actually draw.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
|
||||||
|
render_target_cache_->GetCurrentPipelineRenderTargets();
|
||||||
|
|
||||||
// Set the primitive topology.
|
// Set the primitive topology.
|
||||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||||
|
@ -715,8 +716,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
ID3D12RootSignature* root_signature;
|
ID3D12RootSignature* root_signature;
|
||||||
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
||||||
vertex_shader, pixel_shader, primitive_type,
|
vertex_shader, pixel_shader, primitive_type,
|
||||||
indexed ? index_buffer_info->format : IndexFormat::kInt16, &pipeline,
|
indexed ? index_buffer_info->format : IndexFormat::kInt16,
|
||||||
&root_signature);
|
pipeline_render_targets, &pipeline, &root_signature);
|
||||||
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -733,8 +734,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
SetPipeline(pipeline);
|
SetPipeline(pipeline);
|
||||||
|
|
||||||
// Update system constants before uploading them.
|
// Update system constants before uploading them.
|
||||||
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
|
UpdateSystemConstantValues(
|
||||||
: Endian::kUnspecified);
|
indexed ? index_buffer_info->endianness : Endian::kUnspecified,
|
||||||
|
pipeline_render_targets);
|
||||||
|
|
||||||
// Update constant buffers, descriptors and root parameters.
|
// Update constant buffers, descriptors and root parameters.
|
||||||
if (!UpdateBindings(command_list, vertex_shader, pixel_shader,
|
if (!UpdateBindings(command_list, vertex_shader, pixel_shader,
|
||||||
|
@ -1022,7 +1024,9 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) {
|
void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||||
|
Endian index_endian,
|
||||||
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
uint32_t vgt_indx_offset = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32;
|
uint32_t vgt_indx_offset = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32;
|
||||||
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||||
|
@ -1067,7 +1071,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) {
|
||||||
// viewport that is used to emulate unnormalized coordinates.
|
// viewport that is used to emulate unnormalized coordinates.
|
||||||
// Z scale/offset is to convert from OpenGL NDC to Direct3D NDC if needed.
|
// Z scale/offset is to convert from OpenGL NDC to Direct3D NDC if needed.
|
||||||
// Also apply half-pixel offset to reproduce Direct3D 9 rasterization rules.
|
// Also apply half-pixel offset to reproduce Direct3D 9 rasterization rules.
|
||||||
// TODO(Triang3l): Check if pixel coordinates need to offset depending on a
|
// TODO(Triang3l): Check if pixel coordinates need to be offset depending on a
|
||||||
// different register (and if there's such register at all).
|
// different register (and if there's such register at all).
|
||||||
bool gl_clip_space_def =
|
bool gl_clip_space_def =
|
||||||
!(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4));
|
!(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4));
|
||||||
|
@ -1127,6 +1131,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) {
|
||||||
system_constants_.ssaa_inv_scale[0] = ssaa_inv_scale_x;
|
system_constants_.ssaa_inv_scale[0] = ssaa_inv_scale_x;
|
||||||
system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y;
|
system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y;
|
||||||
|
|
||||||
|
// Color output index mapping.
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
dirty |= system_constants_.color_output_map[i] !=
|
||||||
|
render_targets[i].guest_render_target;
|
||||||
|
system_constants_.color_output_map[i] =
|
||||||
|
render_targets[i].guest_render_target;
|
||||||
|
}
|
||||||
|
|
||||||
cbuffer_bindings_system_.up_to_date &= dirty;
|
cbuffer_bindings_system_.up_to_date &= dirty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -148,7 +148,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
bool EndFrame();
|
bool EndFrame();
|
||||||
|
|
||||||
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
|
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
|
||||||
void UpdateSystemConstantValues(Endian index_endian);
|
void UpdateSystemConstantValues(
|
||||||
|
Endian index_endian,
|
||||||
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
|
||||||
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
|
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
|
||||||
const D3D12Shader* vertex_shader,
|
const D3D12Shader* vertex_shader,
|
||||||
const D3D12Shader* pixel_shader,
|
const D3D12Shader* pixel_shader,
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
@ -17,6 +18,7 @@
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/profiling.h"
|
#include "xenia/base/profiling.h"
|
||||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||||
|
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||||
#include "xenia/gpu/gpu_flags.h"
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
#include "xenia/gpu/hlsl_shader_translator.h"
|
#include "xenia/gpu/hlsl_shader_translator.h"
|
||||||
|
|
||||||
|
@ -67,6 +69,7 @@ D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type,
|
||||||
PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
|
PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
|
||||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||||
PrimitiveType primitive_type, IndexFormat index_format,
|
PrimitiveType primitive_type, IndexFormat index_format,
|
||||||
|
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||||
ID3D12PipelineState** pipeline_out,
|
ID3D12PipelineState** pipeline_out,
|
||||||
ID3D12RootSignature** root_signature_out) {
|
ID3D12RootSignature** root_signature_out) {
|
||||||
#if FINE_GRAINED_DRAW_SCOPES
|
#if FINE_GRAINED_DRAW_SCOPES
|
||||||
|
@ -77,8 +80,8 @@ PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
|
||||||
assert_not_null(root_signature_out);
|
assert_not_null(root_signature_out);
|
||||||
|
|
||||||
Pipeline* pipeline = nullptr;
|
Pipeline* pipeline = nullptr;
|
||||||
auto update_status =
|
auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type,
|
||||||
UpdateState(vertex_shader, pixel_shader, primitive_type, index_format);
|
index_format, render_targets);
|
||||||
switch (update_status) {
|
switch (update_status) {
|
||||||
case UpdateStatus::kCompatible:
|
case UpdateStatus::kCompatible:
|
||||||
// Requested pipeline is compatible with our previous one, so use that.
|
// Requested pipeline is compatible with our previous one, so use that.
|
||||||
|
@ -190,7 +193,8 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
|
||||||
|
|
||||||
PipelineCache::UpdateStatus PipelineCache::UpdateState(
|
PipelineCache::UpdateStatus PipelineCache::UpdateState(
|
||||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||||
PrimitiveType primitive_type, IndexFormat index_format) {
|
PrimitiveType primitive_type, IndexFormat index_format,
|
||||||
|
const RenderTargetCache::PipelineRenderTarget render_targets[5]) {
|
||||||
bool mismatch = false;
|
bool mismatch = false;
|
||||||
|
|
||||||
// Reset hash so we can build it up.
|
// Reset hash so we can build it up.
|
||||||
|
@ -208,18 +212,15 @@ PipelineCache::UpdateStatus PipelineCache::UpdateState(
|
||||||
UpdateStatus status;
|
UpdateStatus status;
|
||||||
status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type);
|
status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type);
|
||||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages");
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages");
|
||||||
status = UpdateBlendState(pixel_shader);
|
status = UpdateBlendStateAndRenderTargets(pixel_shader, render_targets);
|
||||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
|
||||||
status = UpdateRasterizerState(primitive_type);
|
status = UpdateRasterizerState(primitive_type);
|
||||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
|
||||||
status = UpdateDepthStencilState();
|
status = UpdateDepthStencilState(render_targets[4].format);
|
||||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
|
||||||
status = UpdateIBStripCutValue(index_format);
|
status = UpdateIBStripCutValue(index_format);
|
||||||
CHECK_UPDATE_STATUS(status, mismatch,
|
CHECK_UPDATE_STATUS(status, mismatch,
|
||||||
"Unable to update index buffer strip cut value");
|
"Unable to update index buffer strip cut value");
|
||||||
status = UpdateRenderTargetFormats();
|
|
||||||
CHECK_UPDATE_STATUS(status, mismatch,
|
|
||||||
"Unable to update render target formats");
|
|
||||||
#undef CHECK_UPDATE_STATUS
|
#undef CHECK_UPDATE_STATUS
|
||||||
|
|
||||||
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
||||||
|
@ -303,18 +304,27 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
||||||
return UpdateStatus::kMismatch;
|
return UpdateStatus::kMismatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
|
PipelineCache::UpdateStatus PipelineCache::UpdateBlendStateAndRenderTargets(
|
||||||
D3D12Shader* pixel_shader) {
|
D3D12Shader* pixel_shader,
|
||||||
auto& regs = update_blend_state_regs_;
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||||
|
auto& regs = update_blend_state_and_render_targets_regs_;
|
||||||
|
|
||||||
bool dirty = current_pipeline_ == nullptr;
|
bool dirty = current_pipeline_ == nullptr;
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
dirty |= regs.render_targets[i].guest_render_target !=
|
||||||
|
render_targets[i].guest_render_target;
|
||||||
|
regs.render_targets[i].guest_render_target =
|
||||||
|
render_targets[i].guest_render_target;
|
||||||
|
dirty |= regs.render_targets[i].format != render_targets[i].format;
|
||||||
|
regs.render_targets[i].format = render_targets[i].format;
|
||||||
|
}
|
||||||
uint32_t color_mask;
|
uint32_t color_mask;
|
||||||
if (pixel_shader != nullptr) {
|
if (pixel_shader != nullptr) {
|
||||||
color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
|
color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
|
||||||
// If the pixel shader doesn't write to a render target, writing to it is
|
|
||||||
// disabled in the blend state. Otherwise, in Halo 3, one important render
|
|
||||||
// target is destroyed by a shader not writing to one of the outputs.
|
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
// If the pixel shader doesn't write to a render target, writing to it is
|
||||||
|
// disabled in the blend state. Otherwise, in Halo 3, one important render
|
||||||
|
// target is destroyed by a shader not writing to one of the outputs.
|
||||||
if (!pixel_shader->writes_color_target(i)) {
|
if (!pixel_shader->writes_color_target(i)) {
|
||||||
color_mask &= ~(0xF << (i * 4));
|
color_mask &= ~(0xF << (i * 4));
|
||||||
}
|
}
|
||||||
|
@ -372,10 +382,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
|
||||||
/* 3 */ D3D12_BLEND_OP_MAX,
|
/* 3 */ D3D12_BLEND_OP_MAX,
|
||||||
/* 4 */ D3D12_BLEND_OP_REV_SUBTRACT,
|
/* 4 */ D3D12_BLEND_OP_REV_SUBTRACT,
|
||||||
};
|
};
|
||||||
|
update_desc_.NumRenderTargets = 0;
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
auto& blend_desc = update_desc_.BlendState.RenderTarget[i];
|
auto& blend_desc = update_desc_.BlendState.RenderTarget[i];
|
||||||
if (blend_enable && (color_mask & (0xF << (i * 4)))) {
|
uint32_t guest_render_target = render_targets[i].guest_render_target;
|
||||||
uint32_t blend_control = regs.blendcontrol[i];
|
DXGI_FORMAT format = render_targets[i].format;
|
||||||
|
if (blend_enable && format != DXGI_FORMAT_UNKNOWN &&
|
||||||
|
(color_mask & (0xF << (guest_render_target * 4)))) {
|
||||||
|
uint32_t blend_control = regs.blendcontrol[guest_render_target];
|
||||||
// A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND
|
// A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND
|
||||||
blend_desc.SrcBlend = kBlendFactorMap[(blend_control & 0x0000001F) >> 0];
|
blend_desc.SrcBlend = kBlendFactorMap[(blend_control & 0x0000001F) >> 0];
|
||||||
// A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND
|
// A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND
|
||||||
|
@ -399,7 +413,12 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
|
||||||
blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO;
|
blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO;
|
||||||
blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
||||||
}
|
}
|
||||||
blend_desc.RenderTargetWriteMask = (color_mask >> (i * 4)) & 0xF;
|
blend_desc.RenderTargetWriteMask =
|
||||||
|
(color_mask >> (guest_render_target * 4)) & 0xF;
|
||||||
|
update_desc_.RTVFormats[i] = format;
|
||||||
|
if (format != DXGI_FORMAT_UNKNOWN) {
|
||||||
|
update_desc_.NumRenderTargets = i + 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
return UpdateStatus::kMismatch;
|
||||||
|
@ -532,10 +551,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
|
||||||
return UpdateStatus::kMismatch;
|
return UpdateStatus::kMismatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
|
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState(
|
||||||
|
DXGI_FORMAT format) {
|
||||||
auto& regs = update_depth_stencil_state_regs_;
|
auto& regs = update_depth_stencil_state_regs_;
|
||||||
|
|
||||||
bool dirty = current_pipeline_ == nullptr;
|
bool dirty = current_pipeline_ == nullptr;
|
||||||
|
dirty |= regs.format != format;
|
||||||
|
regs.format = format;
|
||||||
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
||||||
dirty |=
|
dirty |=
|
||||||
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
||||||
|
@ -544,17 +566,18 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
|
||||||
return UpdateStatus::kCompatible;
|
return UpdateStatus::kCompatible;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool dsv_bound = format != DXGI_FORMAT_UNKNOWN;
|
||||||
update_desc_.DepthStencilState.DepthEnable =
|
update_desc_.DepthStencilState.DepthEnable =
|
||||||
(regs.rb_depthcontrol & 0x2) ? TRUE : FALSE;
|
(dsv_bound && (regs.rb_depthcontrol & 0x2)) ? TRUE : FALSE;
|
||||||
update_desc_.DepthStencilState.DepthWriteMask =
|
update_desc_.DepthStencilState.DepthWriteMask =
|
||||||
(regs.rb_depthcontrol & 0x4) ? D3D12_DEPTH_WRITE_MASK_ALL
|
(dsv_bound && (regs.rb_depthcontrol & 0x4)) ? D3D12_DEPTH_WRITE_MASK_ALL
|
||||||
: D3D12_DEPTH_WRITE_MASK_ZERO;
|
: D3D12_DEPTH_WRITE_MASK_ZERO;
|
||||||
// Comparison functions are the same in Direct3D 12 but plus one (minus one,
|
// Comparison functions are the same in Direct3D 12 but plus one (minus one,
|
||||||
// bit 0 for less, bit 1 for equal, bit 2 for greater).
|
// bit 0 for less, bit 1 for equal, bit 2 for greater).
|
||||||
update_desc_.DepthStencilState.DepthFunc =
|
update_desc_.DepthStencilState.DepthFunc =
|
||||||
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 4) & 0x7) + 1);
|
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 4) & 0x7) + 1);
|
||||||
update_desc_.DepthStencilState.StencilEnable =
|
update_desc_.DepthStencilState.StencilEnable =
|
||||||
(regs.rb_depthcontrol & 0x1) ? TRUE : FALSE;
|
(dsv_bound && (regs.rb_depthcontrol & 0x1)) ? TRUE : FALSE;
|
||||||
update_desc_.DepthStencilState.StencilReadMask =
|
update_desc_.DepthStencilState.StencilReadMask =
|
||||||
(regs.rb_stencilrefmask >> 8) & 0xFF;
|
(regs.rb_stencilrefmask >> 8) & 0xFF;
|
||||||
update_desc_.DepthStencilState.StencilWriteMask =
|
update_desc_.DepthStencilState.StencilWriteMask =
|
||||||
|
@ -587,6 +610,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
|
||||||
// test is dynamic - should be enabled anyway if there's no alpha test,
|
// test is dynamic - should be enabled anyway if there's no alpha test,
|
||||||
// discarding and depth output).
|
// discarding and depth output).
|
||||||
|
|
||||||
|
update_desc_.DSVFormat = format;
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
return UpdateStatus::kMismatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -615,19 +640,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateIBStripCutValue(
|
||||||
return UpdateStatus::kMismatch;
|
return UpdateStatus::kMismatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargetFormats() {
|
|
||||||
bool dirty = current_pipeline_ == nullptr;
|
|
||||||
if (!dirty) {
|
|
||||||
return UpdateStatus::kCompatible;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(Triang3l): Set the formats when RT cache is added.
|
|
||||||
update_desc_.NumRenderTargets = 0;
|
|
||||||
update_desc_.DSVFormat = DXGI_FORMAT_UNKNOWN;
|
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) {
|
PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) {
|
||||||
// Lookup the pipeline in the cache.
|
// Lookup the pipeline in the cache.
|
||||||
auto it = pipelines_.find(hash_key);
|
auto it = pipelines_.find(hash_key);
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include "third_party/xxhash/xxhash.h"
|
#include "third_party/xxhash/xxhash.h"
|
||||||
|
|
||||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||||
|
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||||
#include "xenia/gpu/hlsl_shader_translator.h"
|
#include "xenia/gpu/hlsl_shader_translator.h"
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
|
@ -42,12 +43,12 @@ class PipelineCache {
|
||||||
D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
|
D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
|
||||||
const uint32_t* host_address, uint32_t dword_count);
|
const uint32_t* host_address, uint32_t dword_count);
|
||||||
|
|
||||||
UpdateStatus ConfigurePipeline(D3D12Shader* vertex_shader,
|
UpdateStatus ConfigurePipeline(
|
||||||
D3D12Shader* pixel_shader,
|
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||||
PrimitiveType primitive_type,
|
PrimitiveType primitive_type, IndexFormat index_format,
|
||||||
IndexFormat index_format,
|
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||||
ID3D12PipelineState** pipeline_out,
|
ID3D12PipelineState** pipeline_out,
|
||||||
ID3D12RootSignature** root_signature_out);
|
ID3D12RootSignature** root_signature_out);
|
||||||
|
|
||||||
void ClearCache();
|
void ClearCache();
|
||||||
|
|
||||||
|
@ -57,25 +58,25 @@ class PipelineCache {
|
||||||
|
|
||||||
bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl);
|
bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl);
|
||||||
|
|
||||||
UpdateStatus UpdateState(D3D12Shader* vertex_shader,
|
UpdateStatus UpdateState(
|
||||||
D3D12Shader* pixel_shader,
|
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||||
PrimitiveType primitive_type,
|
PrimitiveType primitive_type, IndexFormat index_format,
|
||||||
IndexFormat index_format);
|
const RenderTargetCache::PipelineRenderTarget render_targets[5]);
|
||||||
|
|
||||||
// pRootSignature, VS, PS, GS, PrimitiveTopologyType.
|
// pRootSignature, VS, PS, GS, PrimitiveTopologyType.
|
||||||
UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader,
|
UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader,
|
||||||
D3D12Shader* pixel_shader,
|
D3D12Shader* pixel_shader,
|
||||||
PrimitiveType primitive_type);
|
PrimitiveType primitive_type);
|
||||||
// BlendState.
|
// BlendState, NumRenderTargets, RTVFormats.
|
||||||
UpdateStatus UpdateBlendState(D3D12Shader* pixel_shader);
|
UpdateStatus UpdateBlendStateAndRenderTargets(
|
||||||
|
D3D12Shader* pixel_shader,
|
||||||
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
|
||||||
// RasterizerState.
|
// RasterizerState.
|
||||||
UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type);
|
UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type);
|
||||||
// DepthStencilState.
|
// DepthStencilState, DSVFormat.
|
||||||
UpdateStatus UpdateDepthStencilState();
|
UpdateStatus UpdateDepthStencilState(DXGI_FORMAT format);
|
||||||
// IBStripCutValue.
|
// IBStripCutValue.
|
||||||
UpdateStatus UpdateIBStripCutValue(IndexFormat index_format);
|
UpdateStatus UpdateIBStripCutValue(IndexFormat index_format);
|
||||||
// NumRenderTargets, RTVFormats, DSVFormat.
|
|
||||||
UpdateStatus UpdateRenderTargetFormats();
|
|
||||||
|
|
||||||
D3D12CommandProcessor* command_processor_;
|
D3D12CommandProcessor* command_processor_;
|
||||||
RegisterFile* register_file_;
|
RegisterFile* register_file_;
|
||||||
|
@ -117,16 +118,17 @@ class PipelineCache {
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
} update_shader_stages_regs_;
|
} update_shader_stages_regs_;
|
||||||
|
|
||||||
struct UpdateBlendStateRegisters {
|
struct UpdateBlendStateAndRenderTargetsRegisters {
|
||||||
|
RenderTargetCache::PipelineRenderTarget render_targets[5];
|
||||||
// RB_COLOR_MASK with unused render targets removed.
|
// RB_COLOR_MASK with unused render targets removed.
|
||||||
uint32_t color_mask;
|
uint32_t color_mask;
|
||||||
// Blend control updated only for used render targets.
|
// Blend control updated only for used render targets.
|
||||||
uint32_t blendcontrol[4];
|
uint32_t blendcontrol[4];
|
||||||
bool colorcontrol_blend_enable;
|
bool colorcontrol_blend_enable;
|
||||||
|
|
||||||
UpdateBlendStateRegisters() { Reset(); }
|
UpdateBlendStateAndRenderTargetsRegisters() { Reset(); }
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
} update_blend_state_regs_;
|
} update_blend_state_and_render_targets_regs_;
|
||||||
|
|
||||||
struct UpdateRasterizerStateRegisters {
|
struct UpdateRasterizerStateRegisters {
|
||||||
// Polygon offset is in Xenos units.
|
// Polygon offset is in Xenos units.
|
||||||
|
@ -142,6 +144,7 @@ class PipelineCache {
|
||||||
} update_rasterizer_state_regs_;
|
} update_rasterizer_state_regs_;
|
||||||
|
|
||||||
struct UpdateDepthStencilStateRegisters {
|
struct UpdateDepthStencilStateRegisters {
|
||||||
|
DXGI_FORMAT format;
|
||||||
uint32_t rb_depthcontrol;
|
uint32_t rb_depthcontrol;
|
||||||
uint32_t rb_stencilrefmask;
|
uint32_t rb_stencilrefmask;
|
||||||
|
|
||||||
|
|
|
@ -27,8 +27,6 @@ RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
|
||||||
|
|
||||||
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
|
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
|
||||||
|
|
||||||
bool RenderTargetCache::Initialize() { return true; }
|
|
||||||
|
|
||||||
void RenderTargetCache::Shutdown() { ClearCache(); }
|
void RenderTargetCache::Shutdown() { ClearCache(); }
|
||||||
|
|
||||||
void RenderTargetCache::ClearCache() {
|
void RenderTargetCache::ClearCache() {
|
||||||
|
@ -41,6 +39,19 @@ void RenderTargetCache::ClearCache() {
|
||||||
}
|
}
|
||||||
render_targets_.clear();
|
render_targets_.clear();
|
||||||
|
|
||||||
|
while (descriptor_heaps_depth_ != nullptr) {
|
||||||
|
auto heap = descriptor_heaps_depth_;
|
||||||
|
heap->heap->Release();
|
||||||
|
descriptor_heaps_depth_ = heap->previous;
|
||||||
|
delete heap;
|
||||||
|
}
|
||||||
|
while (descriptor_heaps_color_ != nullptr) {
|
||||||
|
auto heap = descriptor_heaps_color_;
|
||||||
|
heap->heap->Release();
|
||||||
|
descriptor_heaps_color_ = heap->previous;
|
||||||
|
delete heap;
|
||||||
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < xe::countof(heaps_); ++i) {
|
for (uint32_t i = 0; i < xe::countof(heaps_); ++i) {
|
||||||
if (heaps_[i] != nullptr) {
|
if (heaps_[i] != nullptr) {
|
||||||
heaps_[i]->Release();
|
heaps_[i]->Release();
|
||||||
|
@ -51,7 +62,7 @@ void RenderTargetCache::ClearCache() {
|
||||||
|
|
||||||
void RenderTargetCache::BeginFrame() { ClearBindings(); }
|
void RenderTargetCache::BeginFrame() { ClearBindings(); }
|
||||||
|
|
||||||
void RenderTargetCache::UpdateRenderTargets() {
|
bool RenderTargetCache::UpdateRenderTargets() {
|
||||||
// There are two kinds of render target binding updates in this implementation
|
// There are two kinds of render target binding updates in this implementation
|
||||||
// in case something has been changed - full and partial.
|
// in case something has been changed - full and partial.
|
||||||
//
|
//
|
||||||
|
@ -90,16 +101,22 @@ void RenderTargetCache::UpdateRenderTargets() {
|
||||||
// made to the lower part of RT0. So, before draws 2 and 3, full updates must
|
// made to the lower part of RT0. So, before draws 2 and 3, full updates must
|
||||||
// be done.
|
// be done.
|
||||||
//
|
//
|
||||||
// Full updates are better for memory usage than partial updates though, as
|
// Direct3D 12 also requires all render targets to have the same size, so the
|
||||||
// the render targets are re-allocated in the heaps, which means that they can
|
// height is calculated from the EDRAM space available to the last render
|
||||||
// be allocated more tightly, preventing too many 32 MB heaps from being
|
// target available in it. However, to make toggling render targets like in
|
||||||
// created.
|
// the Banjo-Kazooie case possible, the height may be decreased only in full
|
||||||
|
// updates.
|
||||||
|
// TODO(Triang3l): Check if it's safe to calculate the smallest EDRAM region
|
||||||
|
// without aliasing and use it for the height. This won't work if games
|
||||||
|
// actually alias active render targets for some reason.
|
||||||
//
|
//
|
||||||
// To summarize, a full update happens if:
|
// To summarize, a full update happens if:
|
||||||
// - Starting a new frame.
|
// - Starting a new frame.
|
||||||
// - Drawing after resolving.
|
// - Drawing after resolving.
|
||||||
// - Surface pitch changed.
|
// - Surface pitch changed.
|
||||||
// - Sample count changed.
|
// - Sample count changed.
|
||||||
|
// - Render target is disabled and another render target got more space than
|
||||||
|
// is currently available in the textures.
|
||||||
// - EDRAM base of a currently used RT changed.
|
// - EDRAM base of a currently used RT changed.
|
||||||
// - Format of a currently used RT changed.
|
// - Format of a currently used RT changed.
|
||||||
// - Current viewport contains unsaved data from previously used render
|
// - Current viewport contains unsaved data from previously used render
|
||||||
|
@ -112,18 +129,18 @@ void RenderTargetCache::UpdateRenderTargets() {
|
||||||
//
|
//
|
||||||
// A partial update happens if:
|
// A partial update happens if:
|
||||||
// - New render target is added, but doesn't overlap unsaved data from other
|
// - New render target is added, but doesn't overlap unsaved data from other
|
||||||
// currently or previously used render targets.
|
// currently or previously used render targets, and it doesn't require a
|
||||||
|
// bigger size.
|
||||||
auto command_list = command_processor_->GetCurrentCommandList();
|
auto command_list = command_processor_->GetCurrentCommandList();
|
||||||
if (command_list == nullptr) {
|
if (command_list == nullptr) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||||
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u);
|
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u);
|
||||||
if (surface_pitch == 0) {
|
if (surface_pitch == 0) {
|
||||||
assert_always();
|
return false;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
||||||
uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
|
uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
|
||||||
|
@ -178,10 +195,26 @@ void RenderTargetCache::UpdateRenderTargets() {
|
||||||
// clamp the dirty region heights.
|
// clamp the dirty region heights.
|
||||||
uint32_t edram_row_tiles_32bpp = (surface_pitch * msaa_samples_x + 79) / 80;
|
uint32_t edram_row_tiles_32bpp = (surface_pitch * msaa_samples_x + 79) / 80;
|
||||||
uint32_t edram_row_tiles[5];
|
uint32_t edram_row_tiles[5];
|
||||||
uint32_t edram_max_rows[5];
|
uint32_t edram_max_rows = UINT32_MAX;
|
||||||
for (uint32_t i = 0; i < 5; ++i) {
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
edram_row_tiles[i] = edram_row_tiles_32bpp * (formats_are_64bpp[i] ? 2 : 1);
|
edram_row_tiles[i] = edram_row_tiles_32bpp * (formats_are_64bpp[i] ? 2 : 1);
|
||||||
edram_max_rows[i] = (2048 - edram_bases[i]) / edram_row_tiles[i];
|
if (enabled[i]) {
|
||||||
|
// Direct3D 12 doesn't allow render targets with different sizes, so
|
||||||
|
// calculate the height from the render target closest to the end of
|
||||||
|
// EDRAM.
|
||||||
|
edram_max_rows = std::min(edram_max_rows,
|
||||||
|
(2048 - edram_bases[i]) / edram_row_tiles[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (edram_max_rows == 0 || edram_max_rows == UINT32_MAX) {
|
||||||
|
// Some render target is totally in the end of EDRAM, or nothing is drawn.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Check the following full update conditions:
|
||||||
|
// - Render target is disabled and another render target got more space than
|
||||||
|
// is currently available in the textures.
|
||||||
|
if (edram_max_rows > current_edram_max_rows_) {
|
||||||
|
full_update = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get EDRAM usage of the current draw so dirty regions can be calculated.
|
// Get EDRAM usage of the current draw so dirty regions can be calculated.
|
||||||
|
@ -210,7 +243,8 @@ void RenderTargetCache::UpdateRenderTargets() {
|
||||||
}
|
}
|
||||||
uint32_t dirty_bottom =
|
uint32_t dirty_bottom =
|
||||||
std::min(std::min(viewport_bottom, scissor_bottom), 2560u);
|
std::min(std::min(viewport_bottom, scissor_bottom), 2560u);
|
||||||
uint32_t edram_rows = (dirty_bottom * msaa_samples_y + 15) >> 4;
|
uint32_t edram_dirty_rows =
|
||||||
|
std::min((dirty_bottom * msaa_samples_y + 15) >> 4, edram_max_rows);
|
||||||
|
|
||||||
// Check the following full update conditions:
|
// Check the following full update conditions:
|
||||||
// - EDRAM base of a currently used RT changed.
|
// - EDRAM base of a currently used RT changed.
|
||||||
|
@ -257,8 +291,7 @@ void RenderTargetCache::UpdateRenderTargets() {
|
||||||
}
|
}
|
||||||
// Checking if the new render target is overlapping any bound one.
|
// Checking if the new render target is overlapping any bound one.
|
||||||
// binding_1 is the new render target.
|
// binding_1 is the new render target.
|
||||||
edram_length_1 =
|
edram_length_1 = edram_dirty_rows * edram_row_tiles[i];
|
||||||
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i];
|
|
||||||
}
|
}
|
||||||
for (uint32_t j = 0; j < 5; ++j) {
|
for (uint32_t j = 0; j < 5; ++j) {
|
||||||
const RenderTargetBinding& binding_2 = current_bindings_[j];
|
const RenderTargetBinding& binding_2 = current_bindings_[j];
|
||||||
|
@ -272,8 +305,7 @@ void RenderTargetCache::UpdateRenderTargets() {
|
||||||
}
|
}
|
||||||
// Checking if now overlapping a previously used render target.
|
// Checking if now overlapping a previously used render target.
|
||||||
// binding_2 is a currently used render target.
|
// binding_2 is a currently used render target.
|
||||||
edram_length_2 =
|
edram_length_2 = edram_dirty_rows * edram_row_tiles[i];
|
||||||
std::min(edram_rows, edram_max_rows[j]) * edram_row_tiles[i];
|
|
||||||
} else {
|
} else {
|
||||||
// Checking if the new render target is overlapping any bound one.
|
// Checking if the new render target is overlapping any bound one.
|
||||||
// binding_2 is another bound render target.
|
// binding_2 is another bound render target.
|
||||||
|
@ -295,82 +327,200 @@ void RenderTargetCache::UpdateRenderTargets() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no need to attach any new render targets, update dirty regions and exit.
|
// Need to change the bindings.
|
||||||
if (!full_update && !render_targets_to_attach) {
|
if (full_update || render_targets_to_attach) {
|
||||||
|
uint32_t heap_usage[5] = {};
|
||||||
|
if (full_update) {
|
||||||
|
// Export the currently bound render targets before we ruin the bindings.
|
||||||
|
WriteRenderTargetsToEDRAM();
|
||||||
|
|
||||||
|
ClearBindings();
|
||||||
|
current_surface_pitch_ = surface_pitch;
|
||||||
|
current_msaa_samples_ = msaa_samples;
|
||||||
|
current_edram_max_rows_ = edram_max_rows;
|
||||||
|
|
||||||
|
// If updating fully, need to reattach all the render targets and allocate
|
||||||
|
// from scratch.
|
||||||
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
|
if (enabled[i]) {
|
||||||
|
render_targets_to_attach |= 1 << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If updating partially, only need to attach new render targets.
|
||||||
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
|
const RenderTargetBinding& binding = current_bindings_[i];
|
||||||
|
if (!binding.is_bound) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const RenderTarget* render_target = binding.render_target;
|
||||||
|
if (render_target != nullptr) {
|
||||||
|
// There are no holes between 4 MB pages in each heap.
|
||||||
|
heap_usage[render_target->heap_page_first >> 3] +=
|
||||||
|
render_target->heap_page_count;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
|
||||||
|
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
|
||||||
|
render_targets_to_attach);
|
||||||
|
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
|
|
||||||
|
D3D12_RESOURCE_BARRIER barriers[5];
|
||||||
|
uint32_t barrier_count = 0;
|
||||||
|
|
||||||
|
// Allocate new render targets and add them to the bindings list.
|
||||||
for (uint32_t i = 0; i < 5; ++i) {
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
if (!enabled[i] || (i == 4 && depth_readonly)) {
|
if (!(render_targets_to_attach & (1 << i))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
RenderTargetBinding& binding = current_bindings_[i];
|
RenderTargetBinding& binding = current_bindings_[i];
|
||||||
binding.edram_dirty_length = std::max(
|
|
||||||
binding.edram_dirty_length,
|
|
||||||
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i]);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// From this point, the function MUST NOT FAIL, otherwise bindings will be
|
|
||||||
// left in an incomplete state.
|
|
||||||
|
|
||||||
uint32_t heap_usage[5] = {};
|
|
||||||
if (full_update) {
|
|
||||||
// Export the currently bound render targets before we ruin the bindings.
|
|
||||||
WriteRenderTargetsToEDRAM();
|
|
||||||
|
|
||||||
ClearBindings();
|
|
||||||
current_surface_pitch_ = surface_pitch;
|
|
||||||
current_msaa_samples_ = msaa_samples;
|
|
||||||
|
|
||||||
// If updating fully, need to reattach all the render targets and allocate
|
|
||||||
// from scratch.
|
|
||||||
for (uint32_t i = 0; i < 5; ++i) {
|
|
||||||
if (enabled[i]) {
|
|
||||||
render_targets_to_attach |= 1 << i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// If updating partially, only need to attach new render targets.
|
|
||||||
for (uint32_t i = 0; i < 5; ++i) {
|
|
||||||
const RenderTargetBinding& binding = current_bindings_[i];
|
|
||||||
if (!binding.is_bound) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const RenderTarget* render_target = binding.render_target;
|
|
||||||
if (render_target != nullptr) {
|
|
||||||
// There are no holes between 4 MB pages in each heap.
|
|
||||||
heap_usage[render_target->heap_page_first >> 3] +=
|
|
||||||
render_target->heap_page_count;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
|
|
||||||
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
|
|
||||||
render_targets_to_attach);
|
|
||||||
|
|
||||||
// Allocate the new render targets.
|
|
||||||
// TODO(Triang3l): Actually allocate them.
|
|
||||||
// TODO(Triang3l): Load the contents from the EDRAM.
|
|
||||||
// TODO(Triang3l): Bind the render targets to the command list.
|
|
||||||
|
|
||||||
// Write the new bindings and update the dirty regions.
|
|
||||||
for (uint32_t i = 0; i < 5; ++i) {
|
|
||||||
if (!enabled[i]) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
RenderTargetBinding& binding = current_bindings_[i];
|
|
||||||
if (render_targets_to_attach & (1 << i)) {
|
|
||||||
binding.is_bound = true;
|
binding.is_bound = true;
|
||||||
binding.edram_base = edram_bases[i];
|
binding.edram_base = edram_bases[i];
|
||||||
binding.edram_dirty_length = 0;
|
binding.edram_dirty_length = 0;
|
||||||
binding.format = formats[i];
|
binding.format = formats[i];
|
||||||
|
binding.render_target = nullptr;
|
||||||
|
|
||||||
|
RenderTargetKey key;
|
||||||
|
key.width_ss_div_80 = edram_row_tiles_32bpp;
|
||||||
|
key.height_ss_div_16 = current_edram_max_rows_;
|
||||||
|
key.is_depth = i == 4;
|
||||||
|
key.format = formats[i];
|
||||||
|
D3D12_RESOURCE_DESC resource_desc;
|
||||||
|
if (!GetResourceDesc(key, resource_desc)) {
|
||||||
|
// Invalid format.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the number of 4 MB pages of 32 MB heaps this RT will use.
|
||||||
|
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
|
||||||
|
device->GetResourceAllocationInfo(0, 1, &resource_desc);
|
||||||
|
if (allocation_info.SizeInBytes == 0 ||
|
||||||
|
allocation_info.SizeInBytes > (32 << 20)) {
|
||||||
|
assert_always();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint32_t heap_page_count =
|
||||||
|
(uint32_t(allocation_info.SizeInBytes) + ((4 << 20) - 1)) >> 22;
|
||||||
|
|
||||||
|
// Find the heap page range for this render target.
|
||||||
|
uint32_t heap_page_first = UINT32_MAX;
|
||||||
|
for (uint32_t j = 0; j < 5; ++j) {
|
||||||
|
if (heap_usage[j] + heap_page_count <= 8) {
|
||||||
|
heap_page_first = j * 8 + heap_usage[j];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (heap_page_first == UINT32_MAX) {
|
||||||
|
assert_always();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the render target.
|
||||||
|
binding.render_target = FindOrCreateRenderTarget(key, heap_page_first);
|
||||||
|
if (binding.render_target == nullptr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inform Direct3D that we're reusing the heap for this render target.
|
||||||
|
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
||||||
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
|
||||||
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||||
|
barrier.Aliasing.pResourceBefore = nullptr;
|
||||||
|
barrier.Aliasing.pResourceAfter = binding.render_target->resource;
|
||||||
}
|
}
|
||||||
if (!(i == 4 && depth_readonly)) {
|
|
||||||
binding.edram_dirty_length = std::max(
|
if (barrier_count != 0) {
|
||||||
binding.edram_dirty_length,
|
command_list->ResourceBarrier(barrier_count, barriers);
|
||||||
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
barrier_count = 0;
|
||||||
|
|
||||||
|
// Load the contents of the new render targets from the EDRAM buffer and
|
||||||
|
// switch their state to RTV/DSV.
|
||||||
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
|
if (!(render_targets_to_attach & (1 << i))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
RenderTarget* render_target = current_bindings_[i].render_target;
|
||||||
|
if (render_target == nullptr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(Triang3l): Load the contents from the EDRAM buffer.
|
||||||
|
|
||||||
|
// After loading from the EDRAM buffer (which may make this render target
|
||||||
|
// a copy destination), switch it to RTV/DSV if needed.
|
||||||
|
D3D12_RESOURCE_STATES state = i == 4 ? D3D12_RESOURCE_STATE_DEPTH_WRITE
|
||||||
|
: D3D12_RESOURCE_STATE_RENDER_TARGET;
|
||||||
|
if (render_target->state != state) {
|
||||||
|
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
||||||
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||||
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||||
|
barrier.Transition.pResource = render_target->resource;
|
||||||
|
barrier.Transition.Subresource =
|
||||||
|
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||||
|
barrier.Transition.StateBefore = render_target->state;
|
||||||
|
barrier.Transition.StateAfter = state;
|
||||||
|
render_target->state = state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (barrier_count != 0) {
|
||||||
|
command_list->ResourceBarrier(barrier_count, barriers);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compress the list of the render target because null RTV descriptors are
|
||||||
|
// broken in Direct3D 12 and bind the render targets to the command list.
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4];
|
||||||
|
uint32_t rtv_count = 0;
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
const RenderTargetBinding& binding = current_bindings_[i];
|
||||||
|
if (!binding.is_bound || binding.render_target == nullptr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
rtv_handles[rtv_count] = binding.render_target->handle;
|
||||||
|
current_pipeline_render_targets_[rtv_count].guest_render_target = i;
|
||||||
|
current_pipeline_render_targets_[rtv_count].format =
|
||||||
|
GetColorDXGIFormat(ColorRenderTargetFormat(formats[4]));
|
||||||
|
++rtv_count;
|
||||||
|
}
|
||||||
|
for (uint32_t i = rtv_count; i < 4; ++i) {
|
||||||
|
current_pipeline_render_targets_[i].guest_render_target = i;
|
||||||
|
current_pipeline_render_targets_[i].format = DXGI_FORMAT_UNKNOWN;
|
||||||
|
}
|
||||||
|
const D3D12_CPU_DESCRIPTOR_HANDLE* dsv_handle;
|
||||||
|
const RenderTargetBinding& depth_binding = current_bindings_[4];
|
||||||
|
current_pipeline_render_targets_[4].guest_render_target = 4;
|
||||||
|
if (depth_binding.is_bound && depth_binding.render_target != nullptr) {
|
||||||
|
dsv_handle = &depth_binding.render_target->handle;
|
||||||
|
current_pipeline_render_targets_[4].format =
|
||||||
|
GetDepthDXGIFormat(DepthRenderTargetFormat(formats[4]));
|
||||||
|
} else {
|
||||||
|
dsv_handle = nullptr;
|
||||||
|
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
|
||||||
|
}
|
||||||
|
command_list->OMSetRenderTargets(rtv_count, rtv_handles, FALSE, dsv_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the dirty regions.
|
||||||
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
|
if (!enabled[i] || (i == 4 && depth_readonly)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
RenderTargetBinding& binding = current_bindings_[i];
|
||||||
|
if (binding.render_target == nullptr) {
|
||||||
|
// Nothing to store to the EDRAM buffer if there was an error.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
binding.edram_dirty_length = std::max(
|
||||||
|
binding.edram_dirty_length, edram_dirty_rows * edram_row_tiles[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderTargetCache::EndFrame() {
|
void RenderTargetCache::EndFrame() {
|
||||||
|
@ -410,9 +560,155 @@ DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat(
|
||||||
void RenderTargetCache::ClearBindings() {
|
void RenderTargetCache::ClearBindings() {
|
||||||
current_surface_pitch_ = 0;
|
current_surface_pitch_ = 0;
|
||||||
current_msaa_samples_ = MsaaSamples::k1X;
|
current_msaa_samples_ = MsaaSamples::k1X;
|
||||||
|
current_edram_max_rows_ = 0;
|
||||||
std::memset(current_bindings_, 0, sizeof(current_bindings_));
|
std::memset(current_bindings_, 0, sizeof(current_bindings_));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
|
||||||
|
D3D12_RESOURCE_DESC& desc) {
|
||||||
|
if (key.width_ss_div_80 == 0 || key.height_ss_div_16 == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
DXGI_FORMAT dxgi_format =
|
||||||
|
key.is_depth ? GetDepthDXGIFormat(DepthRenderTargetFormat(key.format))
|
||||||
|
: GetColorDXGIFormat(ColorRenderTargetFormat(key.format));
|
||||||
|
if (dxgi_format == DXGI_FORMAT_UNKNOWN) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
||||||
|
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
|
||||||
|
desc.Alignment = 0;
|
||||||
|
desc.Width = key.width_ss_div_80 * 80;
|
||||||
|
desc.Height = key.height_ss_div_16 * 16;
|
||||||
|
desc.DepthOrArraySize = 1;
|
||||||
|
desc.MipLevels = 1;
|
||||||
|
desc.Format = dxgi_format;
|
||||||
|
desc.SampleDesc.Count = 1;
|
||||||
|
desc.SampleDesc.Quality = 0;
|
||||||
|
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||||
|
desc.Flags = key.is_depth ? D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL
|
||||||
|
: D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
||||||
|
RenderTargetKey key, uint32_t heap_page_first) {
|
||||||
|
assert_true(heap_page_first <= 8 * 5);
|
||||||
|
// TODO(Triang3l): Find an existing render target.
|
||||||
|
|
||||||
|
D3D12_RESOURCE_DESC resource_desc;
|
||||||
|
if (!GetResourceDesc(key, resource_desc)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
|
auto device = provider->GetDevice();
|
||||||
|
|
||||||
|
// Get the number of heap pages needed for the render target.
|
||||||
|
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
|
||||||
|
device->GetResourceAllocationInfo(0, 1, &resource_desc);
|
||||||
|
uint32_t heap_page_count =
|
||||||
|
(uint32_t(allocation_info.SizeInBytes) + ((4 << 20) - 1)) >> 22;
|
||||||
|
if (heap_page_count == 0 || (heap_page_first & 7) + heap_page_count > 8) {
|
||||||
|
assert_always();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new descriptor heap if needed, and get a place for the descriptor.
|
||||||
|
auto& descriptor_heap =
|
||||||
|
key.is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_;
|
||||||
|
if (descriptor_heap == nullptr ||
|
||||||
|
descriptor_heap->descriptors_used >= kRenderTargetDescriptorHeapSize) {
|
||||||
|
D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc;
|
||||||
|
descriptor_heap_desc.Type = key.is_depth ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV
|
||||||
|
: D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
|
||||||
|
descriptor_heap_desc.NumDescriptors = kRenderTargetDescriptorHeapSize;
|
||||||
|
descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
||||||
|
descriptor_heap_desc.NodeMask = 0;
|
||||||
|
ID3D12DescriptorHeap* new_d3d_descriptor_heap;
|
||||||
|
if (FAILED(device->CreateDescriptorHeap(
|
||||||
|
&descriptor_heap_desc, IID_PPV_ARGS(&new_d3d_descriptor_heap)))) {
|
||||||
|
XELOGE("Failed to create a heap for %u %s buffer descriptors",
|
||||||
|
kRenderTargetDescriptorHeapSize, key.is_depth ? "depth" : "color");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
RenderTargetDescriptorHeap* new_descriptor_heap =
|
||||||
|
new RenderTargetDescriptorHeap;
|
||||||
|
new_descriptor_heap->heap = new_d3d_descriptor_heap;
|
||||||
|
new_descriptor_heap->start_handle =
|
||||||
|
new_d3d_descriptor_heap->GetCPUDescriptorHandleForHeapStart();
|
||||||
|
new_descriptor_heap->descriptors_used = 0;
|
||||||
|
new_descriptor_heap->previous = descriptor_heap;
|
||||||
|
descriptor_heap = new_descriptor_heap;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the memory heap if it doesn't exist yet.
|
||||||
|
ID3D12Heap* heap = heaps_[heap_page_first >> 3];
|
||||||
|
if (heap == nullptr) {
|
||||||
|
D3D12_HEAP_DESC heap_desc = {};
|
||||||
|
heap_desc.SizeInBytes = 32 << 20;
|
||||||
|
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||||
|
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
|
||||||
|
heap_desc.Alignment = 0;
|
||||||
|
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;
|
||||||
|
if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heap)))) {
|
||||||
|
XELOGE("Failed to create a 32 MB heap for render targets");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
heaps_[heap_page_first >> 3] = heap;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The first action likely to be done is EDRAM buffer load.
|
||||||
|
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
|
ID3D12Resource* resource;
|
||||||
|
if (FAILED(device->CreatePlacedResource(heap, (heap_page_first & 7) << 22,
|
||||||
|
&resource_desc, state, nullptr,
|
||||||
|
IID_PPV_ARGS(&resource)))) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to create a placed resource for %ux%u %s render target with "
|
||||||
|
"format %u at heap 4 MB pages %u:%u",
|
||||||
|
uint32_t(resource_desc.Width), resource_desc.Height,
|
||||||
|
key.is_depth ? "depth" : "color", key.format, heap_page_first,
|
||||||
|
heap_page_first + heap_page_count - 1);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the descriptor for the render target.
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_handle;
|
||||||
|
if (key.is_depth) {
|
||||||
|
descriptor_handle.ptr =
|
||||||
|
descriptor_heap->start_handle.ptr +
|
||||||
|
descriptor_heap->descriptors_used * provider->GetDescriptorSizeDSV();
|
||||||
|
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
|
||||||
|
dsv_desc.Format = resource_desc.Format;
|
||||||
|
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
|
||||||
|
dsv_desc.Flags = D3D12_DSV_FLAG_NONE;
|
||||||
|
dsv_desc.Texture2D.MipSlice = 0;
|
||||||
|
device->CreateDepthStencilView(resource, &dsv_desc, descriptor_handle);
|
||||||
|
} else {
|
||||||
|
descriptor_handle.ptr =
|
||||||
|
descriptor_heap->start_handle.ptr +
|
||||||
|
descriptor_heap->descriptors_used * provider->GetDescriptorSizeRTV();
|
||||||
|
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
|
||||||
|
rtv_desc.Format = resource_desc.Format;
|
||||||
|
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
||||||
|
rtv_desc.Texture2D.MipSlice = 0;
|
||||||
|
rtv_desc.Texture2D.PlaneSlice = 0;
|
||||||
|
device->CreateRenderTargetView(resource, &rtv_desc, descriptor_handle);
|
||||||
|
}
|
||||||
|
++descriptor_heap->descriptors_used;
|
||||||
|
|
||||||
|
RenderTarget* render_target = new RenderTarget;
|
||||||
|
render_target->resource = resource;
|
||||||
|
render_target->state = state;
|
||||||
|
render_target->handle = descriptor_handle;
|
||||||
|
render_target->key = key;
|
||||||
|
render_target->heap_page_first = heap_page_first;
|
||||||
|
render_target->heap_page_count = heap_page_count;
|
||||||
|
render_targets_.insert(std::make_pair(key.value, render_target));
|
||||||
|
return render_target;
|
||||||
|
}
|
||||||
|
|
||||||
void RenderTargetCache::WriteRenderTargetsToEDRAM() {}
|
void RenderTargetCache::WriteRenderTargetsToEDRAM() {}
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -184,17 +184,36 @@ class D3D12CommandProcessor;
|
||||||
// in the surface info register is single-sampled.
|
// in the surface info register is single-sampled.
|
||||||
class RenderTargetCache {
|
class RenderTargetCache {
|
||||||
public:
|
public:
|
||||||
|
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
|
||||||
|
// that contradict each other when you use null RTV descriptors - if you set
|
||||||
|
// a valid format in RTVFormats in the pipeline state, it says that null
|
||||||
|
// descriptors can only be used if the format in the pipeline state is
|
||||||
|
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains
|
||||||
|
// that the format in the pipeline doesn't match the RTV format. So we have to
|
||||||
|
// make render target bindings consecutive and remap the output indices in
|
||||||
|
// pixel shaders.
|
||||||
|
struct PipelineRenderTarget {
|
||||||
|
uint32_t guest_render_target;
|
||||||
|
DXGI_FORMAT format;
|
||||||
|
};
|
||||||
|
|
||||||
RenderTargetCache(D3D12CommandProcessor* command_processor,
|
RenderTargetCache(D3D12CommandProcessor* command_processor,
|
||||||
RegisterFile* register_file);
|
RegisterFile* register_file);
|
||||||
~RenderTargetCache();
|
~RenderTargetCache();
|
||||||
|
|
||||||
bool Initialize();
|
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
void ClearCache();
|
void ClearCache();
|
||||||
|
|
||||||
void BeginFrame();
|
void BeginFrame();
|
||||||
// Called in the beginning of a draw call - may bind pipelines.
|
// Called in the beginning of a draw call - may bind pipelines.
|
||||||
void UpdateRenderTargets();
|
bool UpdateRenderTargets();
|
||||||
|
// Returns the host-to-guest mappings and host formats of currently bound
|
||||||
|
// render targets for pipeline creation and remapping in shaders. They are
|
||||||
|
// consecutive, and format DXGI_FORMAT_UNKNOWN terminates the list. Depth
|
||||||
|
// format is in the 5th render target.
|
||||||
|
const PipelineRenderTarget* GetCurrentPipelineRenderTargets() const {
|
||||||
|
return current_pipeline_render_targets_;
|
||||||
|
}
|
||||||
void EndFrame();
|
void EndFrame();
|
||||||
|
|
||||||
static inline bool IsColorFormat64bpp(ColorRenderTargetFormat format) {
|
static inline bool IsColorFormat64bpp(ColorRenderTargetFormat format) {
|
||||||
|
@ -203,12 +222,22 @@ class RenderTargetCache {
|
||||||
format == ColorRenderTargetFormat::k_32_32_FLOAT;
|
format == ColorRenderTargetFormat::k_32_32_FLOAT;
|
||||||
}
|
}
|
||||||
static DXGI_FORMAT GetColorDXGIFormat(ColorRenderTargetFormat format);
|
static DXGI_FORMAT GetColorDXGIFormat(ColorRenderTargetFormat format);
|
||||||
|
// Nvidia may have higher performance with 24-bit depth, AMD should have no
|
||||||
|
// performance difference, but with EDRAM loads/stores less conversion should
|
||||||
|
// be performed by the shaders if D24S8 is emulated as D24_UNORM_S8_UINT, and
|
||||||
|
// it's probably more accurate.
|
||||||
|
static inline DXGI_FORMAT GetDepthDXGIFormat(DepthRenderTargetFormat format) {
|
||||||
|
return format == DepthRenderTargetFormat::kD24FS8
|
||||||
|
? DXGI_FORMAT_D32_FLOAT_S8X24_UINT
|
||||||
|
: DXGI_FORMAT_D24_UNORM_S8_UINT;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
union RenderTargetKey {
|
union RenderTargetKey {
|
||||||
struct {
|
struct {
|
||||||
// Supersampled dimensions. The limit is 2560x2560 without AA, 2560x5120
|
// Supersampled (_ss - scaled 2x if needed) dimensions, divided by 80x16.
|
||||||
// with 2x AA, and 5120x5120 with 4x AA.
|
// The limit is 2560x2560 without AA, 2560x5120 with 2x AA, and 5120x5120
|
||||||
|
// with 4x AA.
|
||||||
uint32_t width_ss_div_80 : 7; // 7
|
uint32_t width_ss_div_80 : 7; // 7
|
||||||
uint32_t height_ss_div_16 : 9; // 16
|
uint32_t height_ss_div_16 : 9; // 16
|
||||||
uint32_t is_depth : 1; // 17
|
uint32_t is_depth : 1; // 17
|
||||||
|
@ -259,6 +288,12 @@ class RenderTargetCache {
|
||||||
|
|
||||||
void ClearBindings();
|
void ClearBindings();
|
||||||
|
|
||||||
|
// Returns true if a render target with such key can be created.
|
||||||
|
static bool GetResourceDesc(RenderTargetKey key, D3D12_RESOURCE_DESC& desc);
|
||||||
|
|
||||||
|
RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key,
|
||||||
|
uint32_t heap_page_first);
|
||||||
|
|
||||||
// Must be in a frame to call. Writes the dirty areas of the currently bound
|
// Must be in a frame to call. Writes the dirty areas of the currently bound
|
||||||
// render targets and marks them as clean.
|
// render targets and marks them as clean.
|
||||||
void WriteRenderTargetsToEDRAM();
|
void WriteRenderTargetsToEDRAM();
|
||||||
|
@ -271,11 +306,27 @@ class RenderTargetCache {
|
||||||
// entire EDRAM - a 32-bit depth/stencil one - at some resolution.
|
// entire EDRAM - a 32-bit depth/stencil one - at some resolution.
|
||||||
ID3D12Heap* heaps_[5] = {};
|
ID3D12Heap* heaps_[5] = {};
|
||||||
|
|
||||||
|
static constexpr uint32_t kRenderTargetDescriptorHeapSize = 2048;
|
||||||
|
// Descriptor heap, for linear allocation of heaps and descriptors.
|
||||||
|
struct RenderTargetDescriptorHeap {
|
||||||
|
ID3D12DescriptorHeap* heap;
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE start_handle;
|
||||||
|
// When descriptors_used is >= kRenderTargetDescriptorHeapSize, a new heap
|
||||||
|
// must be allocated and linked to the one that became full now.
|
||||||
|
uint32_t descriptors_used;
|
||||||
|
RenderTargetDescriptorHeap* previous;
|
||||||
|
};
|
||||||
|
RenderTargetDescriptorHeap* descriptor_heaps_color_ = nullptr;
|
||||||
|
RenderTargetDescriptorHeap* descriptor_heaps_depth_ = nullptr;
|
||||||
|
|
||||||
std::unordered_multimap<uint32_t, RenderTarget*> render_targets_;
|
std::unordered_multimap<uint32_t, RenderTarget*> render_targets_;
|
||||||
|
|
||||||
uint32_t current_surface_pitch_ = 0;
|
uint32_t current_surface_pitch_ = 0;
|
||||||
MsaaSamples current_msaa_samples_ = MsaaSamples::k1X;
|
MsaaSamples current_msaa_samples_ = MsaaSamples::k1X;
|
||||||
|
uint32_t current_edram_max_rows_ = 0;
|
||||||
RenderTargetBinding current_bindings_[5] = {};
|
RenderTargetBinding current_bindings_[5] = {};
|
||||||
|
|
||||||
|
PipelineRenderTarget current_pipeline_render_targets_[5];
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -176,6 +176,7 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
" float xe_pixel_half_pixel_offset;\n"
|
" float xe_pixel_half_pixel_offset;\n"
|
||||||
" float2 xe_ssaa_inv_scale;\n"
|
" float2 xe_ssaa_inv_scale;\n"
|
||||||
" uint xe_pixel_pos_reg;\n"
|
" uint xe_pixel_pos_reg;\n"
|
||||||
|
" uint4 xe_color_output_map;\n"
|
||||||
"};\n"
|
"};\n"
|
||||||
"\n"
|
"\n"
|
||||||
"cbuffer xe_loop_bool_constants : register(b1) {\n"
|
"cbuffer xe_loop_bool_constants : register(b1) {\n"
|
||||||
|
@ -291,10 +292,11 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
"XePixelShaderOutput main(XePixelShaderInput xe_input) {\n"
|
"XePixelShaderOutput main(XePixelShaderInput xe_input) {\n"
|
||||||
" float4 xe_r[%u];\n"
|
" float4 xe_r[%u];\n"
|
||||||
" XePixelShaderOutput xe_output;\n"
|
" XePixelShaderOutput xe_output;\n"
|
||||||
" xe_output.colors[0] = (0.0).xxxx;\n"
|
" float4 xe_color_output[4];\n"
|
||||||
" xe_output.colors[1] = (0.0).xxxx;\n"
|
" xe_color_output[0] = (0.0).xxxx;\n"
|
||||||
" xe_output.colors[2] = (0.0).xxxx;\n"
|
" xe_color_output[1] = (0.0).xxxx;\n"
|
||||||
" xe_output.colors[3] = (0.0).xxxx;\n",
|
" xe_color_output[2] = (0.0).xxxx;\n"
|
||||||
|
" xe_color_output[3] = (0.0).xxxx;\n",
|
||||||
kMaxInterpolators, writes_depth_ ? " float depth : SV_Depth;\n" : "",
|
kMaxInterpolators, writes_depth_ ? " float depth : SV_Depth;\n" : "",
|
||||||
register_count());
|
register_count());
|
||||||
// Initialize SV_Depth if using it.
|
// Initialize SV_Depth if using it.
|
||||||
|
@ -370,6 +372,14 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
" xe_output.position.xyz =\n"
|
" xe_output.position.xyz =\n"
|
||||||
" xe_output.position.xyz * xe_ndc_scale +\n"
|
" xe_output.position.xyz * xe_ndc_scale +\n"
|
||||||
" xe_ndc_offset * xe_output.position.www;\n");
|
" xe_ndc_offset * xe_output.position.www;\n");
|
||||||
|
} else if (is_pixel_shader()) {
|
||||||
|
// Remap guest color outputs to host render targets because null render
|
||||||
|
// target descriptors are broken.
|
||||||
|
source.Append(
|
||||||
|
" xe_output.colors[0] = xe_color_output[xe_color_output_map.r];\n"
|
||||||
|
" xe_output.colors[1] = xe_color_output[xe_color_output_map.g];\n"
|
||||||
|
" xe_output.colors[2] = xe_color_output[xe_color_output_map.b];\n"
|
||||||
|
" xe_output.colors[3] = xe_color_output[xe_color_output_map.a];\n");
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Window offset, half pixel offset, alpha test, gamma.
|
// TODO(Triang3l): Window offset, half pixel offset, alpha test, gamma.
|
||||||
source.Append(
|
source.Append(
|
||||||
|
@ -726,7 +736,7 @@ void HlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
|
||||||
EmitSourceDepth("xe_output.point_size");
|
EmitSourceDepth("xe_output.point_size");
|
||||||
break;
|
break;
|
||||||
case InstructionStorageTarget::kColorTarget:
|
case InstructionStorageTarget::kColorTarget:
|
||||||
EmitSourceDepth("xe_output.colors");
|
EmitSourceDepth("xe_color_output");
|
||||||
storage_is_array = true;
|
storage_is_array = true;
|
||||||
break;
|
break;
|
||||||
case InstructionStorageTarget::kDepth:
|
case InstructionStorageTarget::kDepth:
|
||||||
|
|
|
@ -37,6 +37,9 @@ class HlslShaderTranslator : public ShaderTranslator {
|
||||||
// vec4 3
|
// vec4 3
|
||||||
float ssaa_inv_scale[2];
|
float ssaa_inv_scale[2];
|
||||||
uint32_t pixel_pos_reg;
|
uint32_t pixel_pos_reg;
|
||||||
|
uint32_t padding_3;
|
||||||
|
// vec4 4
|
||||||
|
uint32_t color_output_map[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TextureSRV {
|
struct TextureSRV {
|
||||||
|
|
Loading…
Reference in New Issue