[D3D12] Overlap detection when adding RTs
This commit is contained in:
parent
46dc640209
commit
c0c0ca263d
|
@ -9,8 +9,10 @@
|
|||
|
||||
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||
|
@ -49,8 +51,7 @@ void RenderTargetCache::ClearCache() {
|
|||
|
||||
void RenderTargetCache::BeginFrame() { ClearBindings(); }
|
||||
|
||||
void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
||||
const D3D12_RECT& scissor */) {
|
||||
void RenderTargetCache::UpdateRenderTargets() {
|
||||
// There are two kinds of render target binding updates in this implementation
|
||||
// in case something has been changed - full and partial.
|
||||
//
|
||||
|
@ -104,6 +105,11 @@ void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
|||
// - Current viewport contains unsaved data from previously used render
|
||||
// targets.
|
||||
// - New render target overlaps unsaved data from other bound render targets.
|
||||
//
|
||||
// "Previously used" and "new" in the last 2 conditions is important so if the
|
||||
// game has render targets aliased in the same draw call, there won't be a
|
||||
// full update every draw.
|
||||
//
|
||||
// A partial update happens if:
|
||||
// - New render target is added, but doesn't overlap unsaved data from other
|
||||
// currently or previously used render targets.
|
||||
|
@ -114,23 +120,44 @@ void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
|||
|
||||
auto& regs = *register_file_;
|
||||
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
uint32_t surface_pitch = rb_surface_info & 0x3FFF;
|
||||
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u);
|
||||
if (surface_pitch == 0) {
|
||||
assert_always();
|
||||
return;
|
||||
}
|
||||
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
||||
uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
|
||||
uint32_t msaa_samples_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1;
|
||||
|
||||
// Extract color/depth info in an unified way.
|
||||
bool enabled[5];
|
||||
uint32_t edram_bases[5];
|
||||
uint32_t formats[5];
|
||||
bool formats_are_64bpp[5];
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
if (xenos::ModeControl(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7) !=
|
||||
xenos::ModeControl::kColorDepth) {
|
||||
rb_color_mask = 0;
|
||||
}
|
||||
bool color_enabled[4] = {
|
||||
(rb_color_mask & 0xF) != 0, (rb_color_mask & 0xF0) != 0,
|
||||
(rb_color_mask & 0xF00) != 0, (rb_color_mask & 0xF000) != 0};
|
||||
uint32_t rb_color_info[4] = {
|
||||
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32};
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
enabled[i] = (rb_color_mask & (0xF << (i * 4))) != 0;
|
||||
edram_bases[i] = std::min(rb_color_info[i] & 0xFFF, 2048u);
|
||||
formats[i] = (rb_color_info[i] >> 12) & 0xF;
|
||||
formats_are_64bpp[i] =
|
||||
IsColorFormat64bpp(ColorRenderTargetFormat(formats[i]));
|
||||
}
|
||||
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||
// 0x1 = stencil test enabled, 0x2 = depth test enabled, 0x4 = depth write enabled.
|
||||
bool depth_enabled = (rb_depthcontrol & (0x1 | 0x2 | 0x4)) != 0;
|
||||
// 0x1 = stencil test, 0x2 = depth test, 0x4 = depth write.
|
||||
enabled[4] = (rb_depthcontrol & (0x1 | 0x2 | 0x4)) != 0;
|
||||
edram_bases[4] = std::min(rb_depth_info & 0xFFF, 2048u);
|
||||
formats[4] = (rb_depth_info >> 12) & 0x1;
|
||||
formats_are_64bpp[4] = false;
|
||||
// Don't mark depth regions as dirty if not writing the depth.
|
||||
bool depth_readonly = (rb_depthcontrol & (0x1 | 0x4)) == 0;
|
||||
|
||||
bool full_update = false;
|
||||
|
||||
|
@ -147,54 +174,138 @@ void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
|||
full_update = true;
|
||||
}
|
||||
|
||||
// Get the maximum height of each render target in EDRAM rows to help
|
||||
// clamp the dirty region heights.
|
||||
uint32_t edram_row_tiles_32bpp = (surface_pitch * msaa_samples_x + 79) / 80;
|
||||
uint32_t edram_row_tiles[5];
|
||||
uint32_t edram_max_rows[5];
|
||||
for (uint32_t i = 0; i < 5; ++i) {
|
||||
edram_row_tiles[i] = edram_row_tiles_32bpp * (formats_are_64bpp[i] ? 2 : 1);
|
||||
edram_max_rows[i] = (2048 - edram_bases[i]) / edram_row_tiles[i];
|
||||
}
|
||||
|
||||
// Get EDRAM usage of the current draw so dirty regions can be calculated.
|
||||
// See D3D12CommandProcessor::UpdateFixedFunctionState for more info.
|
||||
int16_t window_offset_y =
|
||||
(regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32 >> 16) & 0x7FFF;
|
||||
if (window_offset_y & 0x4000) {
|
||||
window_offset_y |= 0x8000;
|
||||
}
|
||||
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||
float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2))
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
||||
: 1280.0f;
|
||||
float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3))
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
: viewport_scale_y;
|
||||
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) {
|
||||
viewport_offset_y += float(window_offset_y);
|
||||
}
|
||||
uint32_t viewport_bottom = uint32_t(std::max(
|
||||
0.0f, std::ceil(viewport_offset_y + std::abs(viewport_scale_y))));
|
||||
uint32_t scissor_bottom =
|
||||
(regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32 >> 16) & 0x7FFF;
|
||||
if (!(regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32 & (1u << 31))) {
|
||||
scissor_bottom = std::max(int32_t(scissor_bottom) + window_offset_y, 0);
|
||||
}
|
||||
uint32_t dirty_bottom =
|
||||
std::min(std::min(viewport_bottom, scissor_bottom), 2560u);
|
||||
uint32_t edram_rows = (dirty_bottom * msaa_samples_y + 15) >> 4;
|
||||
|
||||
// Check the following full update conditions:
|
||||
// - EDRAM base of a currently used RT changed.
|
||||
// - Format of a currently used RT changed.
|
||||
// TODO(Triang3l): Check the following full update conditions here:
|
||||
// - Current viewport contains unsaved data from previously used render
|
||||
// targets.
|
||||
// Also build a list of render targets to attach in a partial update.
|
||||
uint32_t render_targets_to_attach = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!color_enabled[i]) {
|
||||
if (!full_update) {
|
||||
for (uint32_t i = 0; i < 5; ++i) {
|
||||
if (!enabled[i]) {
|
||||
continue;
|
||||
}
|
||||
RenderTargetBinding& binding = current_bindings_[i];
|
||||
const RenderTargetBinding& binding = current_bindings_[i];
|
||||
if (binding.is_bound) {
|
||||
// TODO(Triang3l): If was inactive, check if overlapping unsaved data now.
|
||||
if ((rb_color_info[i] & 0xFFF) != binding.edram_base ||
|
||||
ColorRenderTargetFormat((rb_color_info[i] >> 12) & 0xF) !=
|
||||
binding.color_format) {
|
||||
if (binding.edram_base != edram_bases[i] ||
|
||||
binding.format != formats[i]) {
|
||||
full_update = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
render_targets_to_attach |= 1 << i;
|
||||
}
|
||||
}
|
||||
if (depth_enabled) {
|
||||
RenderTargetBinding& binding = current_bindings_[4];
|
||||
if (binding.is_bound) {
|
||||
// TODO(Triang3l): If was inactive, check if overlapping unsaved data now.
|
||||
if ((rb_depth_info & 0xFFF) != binding.edram_base ||
|
||||
DepthRenderTargetFormat((rb_depth_info >> 12) & 0x1) !=
|
||||
binding.depth_format) {
|
||||
full_update = true;
|
||||
}
|
||||
|
||||
// Check the following full update conditions here:
|
||||
// - Current viewport contains unsaved data from previously used render
|
||||
// targets.
|
||||
// - New render target overlaps unsaved data from other bound render
|
||||
// targets.
|
||||
if (!full_update) {
|
||||
for (uint32_t i = 0; i < 5; ++i) {
|
||||
const RenderTargetBinding& binding_1 = current_bindings_[i];
|
||||
uint32_t edram_length_1;
|
||||
if (binding_1.is_bound) {
|
||||
if (enabled[i]) {
|
||||
continue;
|
||||
}
|
||||
// Checking if now overlapping a previously used render target.
|
||||
// binding_1 is the previously used render target.
|
||||
edram_length_1 = binding_1.edram_dirty_length;
|
||||
} else {
|
||||
render_targets_to_attach |= 1 << 4;
|
||||
if (!(render_targets_to_attach & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
// Checking if the new render target is overlapping any bound one.
|
||||
// binding_1 is the new render target.
|
||||
edram_length_1 =
|
||||
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i];
|
||||
}
|
||||
for (uint32_t j = 0; j < 5; ++j) {
|
||||
const RenderTargetBinding& binding_2 = current_bindings_[j];
|
||||
if (!binding_2.is_bound) {
|
||||
continue;
|
||||
}
|
||||
uint32_t edram_length_2;
|
||||
if (binding_1.is_bound) {
|
||||
if (!enabled[j]) {
|
||||
continue;
|
||||
}
|
||||
// Checking if now overlapping a previously used render target.
|
||||
// binding_2 is a currently used render target.
|
||||
edram_length_2 =
|
||||
std::min(edram_rows, edram_max_rows[j]) * edram_row_tiles[i];
|
||||
} else {
|
||||
// Checking if the new render target is overlapping any bound one.
|
||||
// binding_2 is another bound render target.
|
||||
edram_length_2 = binding_2.edram_dirty_length;
|
||||
}
|
||||
// Do a full update if there is overlap.
|
||||
if (edram_bases[i] < edram_bases[j] + edram_length_2 &&
|
||||
edram_bases[i] + edram_length_1 > edram_bases[j]) {
|
||||
XELOGGPU("RT Cache: Overlap between %u (%u:%u) and %u (%u:%u)", i,
|
||||
edram_bases[i], edram_bases[i] + edram_length_1 - 1, j,
|
||||
edram_bases[j], edram_bases[j] + edram_length_2 - 1);
|
||||
full_update = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (full_update) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Check the following full update condition here:
|
||||
// - New render target overlaps unsaved data from other bound render targets.
|
||||
|
||||
// If no need to attach any new render targets, update activation state, dirty
|
||||
// regions and exit early.
|
||||
// If no need to attach any new render targets, update dirty regions and exit.
|
||||
if (!full_update && !render_targets_to_attach) {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
current_bindings_[i].is_active = color_enabled[i];
|
||||
for (uint32_t i = 0; i < 5; ++i) {
|
||||
if (!enabled[i] || (i == 4 && depth_readonly)) {
|
||||
continue;
|
||||
}
|
||||
RenderTargetBinding& binding = current_bindings_[i];
|
||||
binding.edram_dirty_length = std::max(
|
||||
binding.edram_dirty_length,
|
||||
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i]);
|
||||
}
|
||||
current_bindings_[4].is_active = depth_enabled;
|
||||
// TODO(Triang3l): Update dirty regions.
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -212,14 +323,11 @@ void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
|||
|
||||
// If updating fully, need to reattach all the render targets and allocate
|
||||
// from scratch.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (color_enabled[i]) {
|
||||
for (uint32_t i = 0; i < 5; ++i) {
|
||||
if (enabled[i]) {
|
||||
render_targets_to_attach |= 1 << i;
|
||||
}
|
||||
}
|
||||
if (depth_enabled) {
|
||||
render_targets_to_attach |= 1 << 4;
|
||||
}
|
||||
} else {
|
||||
// If updating partially, only need to attach new render targets.
|
||||
for (uint32_t i = 0; i < 5; ++i) {
|
||||
|
@ -236,7 +344,7 @@ void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
|||
}
|
||||
}
|
||||
}
|
||||
XELOGGPU("RT Cache: %s update! Pitch %u, samples %u, RTs to attach %u.",
|
||||
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
|
||||
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
|
||||
render_targets_to_attach);
|
||||
|
||||
|
@ -245,24 +353,23 @@ void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
|||
// TODO(Triang3l): Load the contents from the EDRAM.
|
||||
// TODO(Triang3l): Bind the render targets to the command list.
|
||||
|
||||
// Write the new bindings.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!(render_targets_to_attach & (1 << i))) {
|
||||
// Write the new bindings and update the dirty regions.
|
||||
for (uint32_t i = 0; i < 5; ++i) {
|
||||
if (!enabled[i]) {
|
||||
continue;
|
||||
}
|
||||
RenderTargetBinding& binding = current_bindings_[i];
|
||||
if (render_targets_to_attach & (1 << i)) {
|
||||
binding.is_bound = true;
|
||||
binding.is_active = true;
|
||||
binding.edram_base = rb_color_info[i] & 0xFFF;
|
||||
binding.color_format =
|
||||
ColorRenderTargetFormat((rb_color_info[i] >> 12) & 0xF);
|
||||
binding.edram_base = edram_bases[i];
|
||||
binding.edram_dirty_length = 0;
|
||||
binding.format = formats[i];
|
||||
}
|
||||
if (!(i == 4 && depth_readonly)) {
|
||||
binding.edram_dirty_length = std::max(
|
||||
binding.edram_dirty_length,
|
||||
std::min(edram_rows, edram_max_rows[i]) * edram_row_tiles[i]);
|
||||
}
|
||||
if (render_targets_to_attach & (1 << 4)) {
|
||||
RenderTargetBinding& binding = current_bindings_[4];
|
||||
binding.is_bound = true;
|
||||
binding.is_active = true;
|
||||
binding.edram_base = rb_depth_info & 0xFFF;
|
||||
binding.depth_format = DepthRenderTargetFormat((rb_depth_info >> 12) & 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -271,6 +378,35 @@ void RenderTargetCache::EndFrame() {
|
|||
ClearBindings();
|
||||
}
|
||||
|
||||
DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat(
|
||||
ColorRenderTargetFormat format) {
|
||||
switch (format) {
|
||||
case ColorRenderTargetFormat::k_8_8_8_8:
|
||||
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||
return DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
case ColorRenderTargetFormat::k_2_10_10_10:
|
||||
case ColorRenderTargetFormat::k_2_10_10_10_AS_16_16_16_16:
|
||||
return DXGI_FORMAT_R10G10B10A2_UNORM;
|
||||
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
||||
case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
||||
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
||||
return DXGI_FORMAT_R16G16B16A16_FLOAT;
|
||||
case ColorRenderTargetFormat::k_16_16:
|
||||
return DXGI_FORMAT_R16G16_UNORM;
|
||||
case ColorRenderTargetFormat::k_16_16_16_16:
|
||||
return DXGI_FORMAT_R16G16B16A16_UNORM;
|
||||
case ColorRenderTargetFormat::k_16_16_FLOAT:
|
||||
return DXGI_FORMAT_R16G16_FLOAT;
|
||||
case ColorRenderTargetFormat::k_32_FLOAT:
|
||||
return DXGI_FORMAT_R32_FLOAT;
|
||||
case ColorRenderTargetFormat::k_32_32_FLOAT:
|
||||
return DXGI_FORMAT_R32G32_FLOAT;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return DXGI_FORMAT_UNKNOWN;
|
||||
}
|
||||
|
||||
void RenderTargetCache::ClearBindings() {
|
||||
current_surface_pitch_ = 0;
|
||||
current_msaa_samples_ = MsaaSamples::k1X;
|
||||
|
|
|
@ -194,10 +194,16 @@ class RenderTargetCache {
|
|||
|
||||
void BeginFrame();
|
||||
// Called in the beginning of a draw call - may bind pipelines.
|
||||
void UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
||||
const D3D12_RECT& scissor */);
|
||||
void UpdateRenderTargets();
|
||||
void EndFrame();
|
||||
|
||||
static inline bool IsColorFormat64bpp(ColorRenderTargetFormat format) {
|
||||
return format == ColorRenderTargetFormat::k_16_16_16_16 ||
|
||||
format == ColorRenderTargetFormat::k_16_16_16_16_FLOAT ||
|
||||
format == ColorRenderTargetFormat::k_32_32_FLOAT;
|
||||
}
|
||||
static DXGI_FORMAT GetColorDXGIFormat(ColorRenderTargetFormat format);
|
||||
|
||||
private:
|
||||
union RenderTargetKey {
|
||||
struct {
|
||||
|
@ -239,10 +245,12 @@ class RenderTargetCache {
|
|||
struct RenderTargetBinding {
|
||||
// Whether this render target has been used since the last full update.
|
||||
bool is_bound;
|
||||
// Whether the render target was actually used in the last draw.
|
||||
bool is_active;
|
||||
uint32_t edram_base;
|
||||
// How many tiles has already been drawn to the render target since the last
|
||||
// full update.
|
||||
uint32_t edram_dirty_length;
|
||||
union {
|
||||
uint32_t format;
|
||||
ColorRenderTargetFormat color_format;
|
||||
DepthRenderTargetFormat depth_format;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue