[D3D12] Rough outline of render target architecture
This commit is contained in:
parent
83cf482a50
commit
52a1a80200
|
@ -483,7 +483,14 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
|
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
|
||||||
shared_memory_.get());
|
shared_memory_.get());
|
||||||
if (!texture_cache_->Initialize()) {
|
if (!texture_cache_->Initialize()) {
|
||||||
XELOGE("Failed to initialize texture cache");
|
XELOGE("Failed to initialize the texture cache");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
render_target_cache_ =
|
||||||
|
std::make_unique<RenderTargetCache>(this, register_file_);
|
||||||
|
if (!render_target_cache_->Initialize()) {
|
||||||
|
XELOGE("Failed to initialize the render target cache");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -509,6 +516,8 @@ void D3D12CommandProcessor::ShutdownContext() {
|
||||||
view_heap_pool_.reset();
|
view_heap_pool_.reset();
|
||||||
constant_buffer_pool_.reset();
|
constant_buffer_pool_.reset();
|
||||||
|
|
||||||
|
render_target_cache_.reset();
|
||||||
|
|
||||||
texture_cache_.reset();
|
texture_cache_.reset();
|
||||||
|
|
||||||
pipeline_cache_.reset();
|
pipeline_cache_.reset();
|
||||||
|
@ -570,10 +579,12 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
||||||
view_heap_pool_->ClearCache();
|
view_heap_pool_->ClearCache();
|
||||||
constant_buffer_pool_->ClearCache();
|
constant_buffer_pool_->ClearCache();
|
||||||
|
|
||||||
pipeline_cache_->ClearCache();
|
render_target_cache_->ClearCache();
|
||||||
|
|
||||||
texture_cache_->ClearCache();
|
texture_cache_->ClearCache();
|
||||||
|
|
||||||
|
pipeline_cache_->ClearCache();
|
||||||
|
|
||||||
for (auto it : root_signatures_) {
|
for (auto it : root_signatures_) {
|
||||||
it.second->Release();
|
it.second->Release();
|
||||||
}
|
}
|
||||||
|
@ -607,7 +618,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
if (enable_mode == xenos::ModeControl::kIgnore) {
|
if (enable_mode == xenos::ModeControl::kIgnore) {
|
||||||
// Ignored.
|
// Ignored.
|
||||||
return true;
|
return true;
|
||||||
} else if (enable_mode == xenos::ModeControl::kCopy) {
|
}
|
||||||
|
if (enable_mode == xenos::ModeControl::kCopy) {
|
||||||
// Special copy handling.
|
// Special copy handling.
|
||||||
return IssueCopy();
|
return IssueCopy();
|
||||||
}
|
}
|
||||||
|
@ -666,6 +678,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
bool new_frame = BeginFrame();
|
bool new_frame = BeginFrame();
|
||||||
auto command_list = GetCurrentCommandList();
|
auto command_list = GetCurrentCommandList();
|
||||||
|
|
||||||
|
// Set up the render targets - this may bind pipelines.
|
||||||
|
render_target_cache_->UpdateRenderTargets();
|
||||||
|
|
||||||
// Set the primitive topology.
|
// Set the primitive topology.
|
||||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||||
switch (primitive_type) {
|
switch (primitive_type) {
|
||||||
|
@ -824,6 +839,8 @@ bool D3D12CommandProcessor::BeginFrame() {
|
||||||
|
|
||||||
texture_cache_->BeginFrame();
|
texture_cache_->BeginFrame();
|
||||||
|
|
||||||
|
render_target_cache_->BeginFrame();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -837,6 +854,8 @@ bool D3D12CommandProcessor::EndFrame() {
|
||||||
auto command_list_setup = command_lists_setup_[current_queue_frame_].get();
|
auto command_list_setup = command_lists_setup_[current_queue_frame_].get();
|
||||||
auto command_list = command_lists_[current_queue_frame_].get();
|
auto command_list = command_lists_[current_queue_frame_].get();
|
||||||
|
|
||||||
|
render_target_cache_->EndFrame();
|
||||||
|
|
||||||
bool setup_written = shared_memory_->EndFrame(
|
bool setup_written = shared_memory_->EndFrame(
|
||||||
command_list_setup->GetCommandList(), command_list->GetCommandList());
|
command_list_setup->GetCommandList(), command_list->GetCommandList());
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "xenia/gpu/command_processor.h"
|
#include "xenia/gpu/command_processor.h"
|
||||||
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
||||||
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
||||||
|
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||||
#include "xenia/gpu/d3d12/shared_memory.h"
|
#include "xenia/gpu/d3d12/shared_memory.h"
|
||||||
#include "xenia/gpu/d3d12/texture_cache.h"
|
#include "xenia/gpu/d3d12/texture_cache.h"
|
||||||
#include "xenia/gpu/hlsl_shader_translator.h"
|
#include "xenia/gpu/hlsl_shader_translator.h"
|
||||||
|
@ -169,6 +170,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
std::unique_ptr<TextureCache> texture_cache_ = nullptr;
|
std::unique_ptr<TextureCache> texture_cache_ = nullptr;
|
||||||
|
|
||||||
|
std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr;
|
||||||
|
|
||||||
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
|
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
|
||||||
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
|
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
|
||||||
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
|
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
|
||||||
|
|
|
@ -0,0 +1,283 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace d3d12 {
|
||||||
|
|
||||||
|
RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
|
||||||
|
RegisterFile* register_file)
|
||||||
|
: command_processor_(command_processor), register_file_(register_file) {}
|
||||||
|
|
||||||
|
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
|
||||||
|
|
||||||
|
bool RenderTargetCache::Initialize() { return true; }
|
||||||
|
|
||||||
|
void RenderTargetCache::Shutdown() { ClearCache(); }
|
||||||
|
|
||||||
|
void RenderTargetCache::ClearCache() {
|
||||||
|
for (auto render_target_pair : render_targets_) {
|
||||||
|
RenderTarget* render_target = render_target_pair.second;
|
||||||
|
if (render_target->resource != nullptr) {
|
||||||
|
render_target->resource->Release();
|
||||||
|
}
|
||||||
|
delete render_target;
|
||||||
|
}
|
||||||
|
render_targets_.clear();
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < xe::countof(heaps_); ++i) {
|
||||||
|
if (heaps_[i] != nullptr) {
|
||||||
|
heaps_[i]->Release();
|
||||||
|
heaps_[i] = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderTargetCache::BeginFrame() { ClearBindings(); }
|
||||||
|
|
||||||
|
void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
||||||
|
const D3D12_RECT& scissor */) {
|
||||||
|
// There are two kinds of render target binding updates in this implementation
|
||||||
|
// in case something has been changed - full and partial.
|
||||||
|
//
|
||||||
|
// A full update involves flushing all the currently bound render targets that
|
||||||
|
// have been modified to the EDRAM buffer, allocating all the newly bound
|
||||||
|
// render targets in the heaps, loading them from the EDRAM buffer and binding
|
||||||
|
// them.
|
||||||
|
//
|
||||||
|
// ("Bound" here means ever used since the last full update - and in this case
|
||||||
|
// it's bound to the Direct3D 12 command list.)
|
||||||
|
//
|
||||||
|
// However, Banjo-Kazooie interleaves color/depth and depth-only writes every
|
||||||
|
// draw call, and doing a full update whenever the color mask is changed is
|
||||||
|
// too expensive. So, we shouldn't do a full update if the game only toggles
|
||||||
|
// color writes and depth testing. Instead, we're only adding or re-enabling
|
||||||
|
// render targets if color writes are being enabled (adding involves loading
|
||||||
|
// the contents from the EDRAM, while re-enabling does nothing on the D3D
|
||||||
|
// side).
|
||||||
|
//
|
||||||
|
// There are cases when simply toggling render targets may still require EDRAM
|
||||||
|
// stores and thus a full update. Here's an example situation:
|
||||||
|
// Draw 1:
|
||||||
|
// - 32bpp RT0 0-10 MB
|
||||||
|
// - 32bpp RT1 3-10 MB
|
||||||
|
// - 1280x720 viewport
|
||||||
|
// Draw 2:
|
||||||
|
// - 32bpp RT0 0-10 MB
|
||||||
|
// - Inactive RT1
|
||||||
|
// - 1280x1440 viewport
|
||||||
|
// Draw 3:
|
||||||
|
// - 32bpp RT0 0-10 MB
|
||||||
|
// - 32bpp RT1 3-10 MB
|
||||||
|
// - 1280x720 viewport
|
||||||
|
// In this case, before draw 2, RT1 must be written to the EDRAM buffer, and
|
||||||
|
// RT0 must be loaded, and also before draw 3 RT1 must receive the changes
|
||||||
|
// made to the lower part of RT0. So, before draws 2 and 3, full updates must
|
||||||
|
// be done.
|
||||||
|
//
|
||||||
|
// Full updates are better for memory usage than partial updates though, as
|
||||||
|
// the render targets are re-allocated in the heaps, which means that they can
|
||||||
|
// be allocated more tightly, preventing too many 32 MB heaps from being
|
||||||
|
// created.
|
||||||
|
//
|
||||||
|
// To summarize, a full update happens if:
|
||||||
|
// - Starting a new frame.
|
||||||
|
// - Drawing after resolving.
|
||||||
|
// - Surface pitch changed.
|
||||||
|
// - Sample count changed.
|
||||||
|
// - EDRAM base of a currently used RT changed.
|
||||||
|
// - Format of a currently used RT changed.
|
||||||
|
// - Current viewport contains unsaved data from previously used render
|
||||||
|
// targets.
|
||||||
|
// - New render target overlaps unsaved data from other bound render targets.
|
||||||
|
// A partial update happens if:
|
||||||
|
// - New render target is added, but doesn't overlap unsaved data from other
|
||||||
|
// currently or previously used render targets.
|
||||||
|
auto command_list = command_processor_->GetCurrentCommandList();
|
||||||
|
if (command_list == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& regs = *register_file_;
|
||||||
|
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||||
|
uint32_t surface_pitch = rb_surface_info & 0x3FFF;
|
||||||
|
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
||||||
|
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||||
|
if (xenos::ModeControl(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7) ==
|
||||||
|
xenos::ModeControl::kDepth) {
|
||||||
|
rb_color_mask = 0;
|
||||||
|
}
|
||||||
|
bool color_enabled[4] = {
|
||||||
|
(rb_color_mask & 0xF) != 0, (rb_color_mask & 0xF0) != 0,
|
||||||
|
(rb_color_mask & 0xF00) != 0, (rb_color_mask & 0xF000) != 0};
|
||||||
|
uint32_t rb_color_info[4] = {
|
||||||
|
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
|
||||||
|
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32};
|
||||||
|
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||||
|
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||||
|
bool depth_enabled = (rb_depthcontrol & (0x2 | 0x4)) != 0;
|
||||||
|
|
||||||
|
bool full_update = false;
|
||||||
|
|
||||||
|
// Check the following full update conditions:
|
||||||
|
// - Starting a new frame.
|
||||||
|
// - Drawing after resolving.
|
||||||
|
// - Surface pitch changed.
|
||||||
|
// - Sample count changed.
|
||||||
|
// Draws are skipped if the surface pitch is 0, so a full update can be forced
|
||||||
|
// in the beginning of the frame or after resolves by setting the current
|
||||||
|
// pitch to 0.
|
||||||
|
if (current_surface_pitch_ != surface_pitch ||
|
||||||
|
current_msaa_samples_ != msaa_samples) {
|
||||||
|
full_update = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the following full update conditions:
|
||||||
|
// - EDRAM base of a currently used RT changed.
|
||||||
|
// - Format of a currently used RT changed.
|
||||||
|
// TODO(Triang3l): Check the following full update conditions here:
|
||||||
|
// - Current viewport contains unsaved data from previously used render
|
||||||
|
// targets.
|
||||||
|
uint32_t render_targets_to_attach = 0;
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
if (!color_enabled[i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
RenderTargetBinding& binding = current_bindings_[i];
|
||||||
|
if (binding.is_bound) {
|
||||||
|
// TODO(Triang3l): If was inactive, check if overlapping unsaved data now.
|
||||||
|
if ((rb_color_info[i] & 0xFFF) != binding.edram_base ||
|
||||||
|
ColorRenderTargetFormat((rb_color_info[i] >> 12) & 0xF) !=
|
||||||
|
binding.color_format) {
|
||||||
|
full_update = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
render_targets_to_attach |= 1 << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (depth_enabled) {
|
||||||
|
RenderTargetBinding& binding = current_bindings_[4];
|
||||||
|
if (binding.is_bound) {
|
||||||
|
// TODO(Triang3l): If was inactive, check if overlapping unsaved data now.
|
||||||
|
if ((rb_depth_info & 0xFFF) != binding.edram_base ||
|
||||||
|
DepthRenderTargetFormat((rb_depth_info >> 12) & 0x1) !=
|
||||||
|
binding.depth_format) {
|
||||||
|
full_update = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
render_targets_to_attach |= 1 << 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(Triang3l): Check the following full update condition here:
|
||||||
|
// - New render target overlaps unsaved data from other bound render targets.
|
||||||
|
|
||||||
|
// If no need to attach any new render targets, update activation state, dirty
|
||||||
|
// regions and exit early.
|
||||||
|
if (!full_update && !render_targets_to_attach) {
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
current_bindings_[i].is_active = color_enabled[i];
|
||||||
|
}
|
||||||
|
current_bindings_[4].is_active = depth_enabled;
|
||||||
|
// TODO(Triang3l): Update dirty regions.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// From this point, the function MUST NOT FAIL, otherwise bindings will be
|
||||||
|
// left in an incomplete state.
|
||||||
|
|
||||||
|
uint32_t heap_usage[5] = {};
|
||||||
|
if (full_update) {
|
||||||
|
// Export the currently bound render targets before we ruin the bindings.
|
||||||
|
WriteRenderTargetsToEDRAM();
|
||||||
|
|
||||||
|
ClearBindings();
|
||||||
|
current_surface_pitch_ = surface_pitch;
|
||||||
|
current_msaa_samples_ = msaa_samples;
|
||||||
|
|
||||||
|
// If updating fully, need to reattach all the render targets and allocate
|
||||||
|
// from scratch.
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
if (color_enabled[i]) {
|
||||||
|
render_targets_to_attach |= 1 << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (depth_enabled) {
|
||||||
|
render_targets_to_attach |= 1 << 4;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If updating partially, only need to attach new render targets.
|
||||||
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
|
const RenderTargetBinding& binding = current_bindings_[i];
|
||||||
|
if (!binding.is_bound) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const RenderTarget* render_target = binding.render_target;
|
||||||
|
if (render_target != nullptr) {
|
||||||
|
// There are no holes between 4 MB pages in each heap.
|
||||||
|
heap_usage[render_target->heap_page_first >> 3] +=
|
||||||
|
render_target->heap_page_count;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
XELOGGPU("RT Cache: %s update! Pitch %u, samples %u, RTs to attach %u.",
|
||||||
|
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
|
||||||
|
render_targets_to_attach);
|
||||||
|
|
||||||
|
// Allocate the new render targets.
|
||||||
|
// TODO(Triang3l): Actually allocate them.
|
||||||
|
// TODO(Triang3l): Load the contents from the EDRAM.
|
||||||
|
// TODO(Triang3l): Bind the render targets to the command list.
|
||||||
|
|
||||||
|
// Write the new bindings.
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
if (!(render_targets_to_attach & (1 << i))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
RenderTargetBinding& binding = current_bindings_[i];
|
||||||
|
binding.is_bound = true;
|
||||||
|
binding.is_active = true;
|
||||||
|
binding.edram_base = rb_color_info[i] & 0xFFF;
|
||||||
|
binding.color_format =
|
||||||
|
ColorRenderTargetFormat((rb_color_info[i] >> 12) & 0xF);
|
||||||
|
}
|
||||||
|
if (render_targets_to_attach & (1 << 4)) {
|
||||||
|
RenderTargetBinding& binding = current_bindings_[4];
|
||||||
|
binding.is_bound = true;
|
||||||
|
binding.is_active = true;
|
||||||
|
binding.edram_base = rb_depth_info & 0xFFF;
|
||||||
|
binding.depth_format = DepthRenderTargetFormat((rb_depth_info >> 12) & 0x1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderTargetCache::EndFrame() {
|
||||||
|
WriteRenderTargetsToEDRAM();
|
||||||
|
ClearBindings();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderTargetCache::ClearBindings() {
|
||||||
|
current_surface_pitch_ = 0;
|
||||||
|
current_msaa_samples_ = MsaaSamples::k1X;
|
||||||
|
std::memset(current_bindings_, 0, sizeof(current_bindings_));
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderTargetCache::WriteRenderTargetsToEDRAM() {}
|
||||||
|
|
||||||
|
} // namespace d3d12
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,277 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
|
||||||
|
#define XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "xenia/gpu/d3d12/shared_memory.h"
|
||||||
|
#include "xenia/gpu/register_file.h"
|
||||||
|
#include "xenia/gpu/xenos.h"
|
||||||
|
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace d3d12 {
|
||||||
|
|
||||||
|
class D3D12CommandProcessor;
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// How EDRAM is used by Xenos:
|
||||||
|
// (Copied from the old version of the render target cache, so implementation
|
||||||
|
// info may differ from the way EDRAM is emulated now.)
|
||||||
|
// =============================================================================
|
||||||
|
//
|
||||||
|
// On the 360 the render target is an opaque block of memory in EDRAM that's
|
||||||
|
// only accessible via resolves. We use this to our advantage to simulate
|
||||||
|
// something like it as best we can by having a shared backing memory with
|
||||||
|
// a multitude of views for each tile location in EDRAM.
|
||||||
|
//
|
||||||
|
// This allows us to have the same base address write to the same memory
|
||||||
|
// regardless of framebuffer format. Resolving then uses whatever format the
|
||||||
|
// resolve requests straight from the backing memory.
|
||||||
|
//
|
||||||
|
// EDRAM is a beast and we only approximate it as best we can. Basically,
|
||||||
|
// the 10MiB of EDRAM is composed of 2048 5120b tiles. Each tile is 80x16px.
|
||||||
|
// +-----+-----+-----+---
|
||||||
|
// |tile0|tile1|tile2|... 2048 times
|
||||||
|
// +-----+-----+-----+---
|
||||||
|
// Operations dealing with EDRAM deal in tile offsets, so base 0x100 is tile
|
||||||
|
// offset 256, 256*5120=1310720b into the buffer. All rendering operations are
|
||||||
|
// aligned to tiles so trying to draw at 256px wide will have a real width of
|
||||||
|
// 320px by rounding up to the next tile.
|
||||||
|
//
|
||||||
|
// MSAA and other settings will modify the exact pixel sizes, like 4X makes
|
||||||
|
// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still
|
||||||
|
// all 5120b. As we try to emulate this we adjust our viewport when rendering to
|
||||||
|
// stretch pixels as needed.
|
||||||
|
//
|
||||||
|
// It appears that games also take advantage of MSAA stretching tiles when doing
|
||||||
|
// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then
|
||||||
|
// later draw to that view with 1X pitch/height and 1X MSAA.
|
||||||
|
//
|
||||||
|
// The good news is that games cannot read EDRAM directly but must use a copy
|
||||||
|
// operation to get the data out. That gives us a chance to do whatever we
|
||||||
|
// need to (re-tile, etc) only when requested.
|
||||||
|
//
|
||||||
|
// To approximate the tiled EDRAM layout we use a single large chunk of memory.
|
||||||
|
// From this memory we create many VkImages (and VkImageViews) of various
|
||||||
|
// formats and dimensions as requested by the game. These are used as
|
||||||
|
// attachments during rendering and as sources during copies. They are also
|
||||||
|
// heavily aliased - lots of images will reference the same locations in the
|
||||||
|
// underlying EDRAM buffer. The only requirement is that there are no hazards
|
||||||
|
// with specific tiles (reading/writing the same tile through different images)
|
||||||
|
// and otherwise it should be ok *fingers crossed*.
|
||||||
|
//
|
||||||
|
// One complication is the copy/resolve process itself: we need to give back
|
||||||
|
// the data asked for in the format desired and where it goes is arbitrary
|
||||||
|
// (any address in physical memory). If the game is good we get resolves of
|
||||||
|
// EDRAM into fixed base addresses with scissored regions. If the game is bad
|
||||||
|
// we are broken.
|
||||||
|
//
|
||||||
|
// Resolves from EDRAM result in tiled textures - that's texture tiles, not
|
||||||
|
// EDRAM tiles. If we wanted to ensure byte-for-byte correctness we'd need to
|
||||||
|
// then tile the images as we wrote them out. For now, we just attempt to
|
||||||
|
// get the (X, Y) in linear space and do that. This really comes into play
|
||||||
|
// when multiple resolves write to the same texture or memory aliased by
|
||||||
|
// multiple textures - which is common due to predicated tiling. The examples
|
||||||
|
// below demonstrate what this looks like, but the important thing is that
|
||||||
|
// we are aware of partial textures and overlapping regions.
|
||||||
|
//
|
||||||
|
// Example with multiple render targets:
|
||||||
|
// Two color targets of 256x256px tightly packed in EDRAM:
|
||||||
|
// color target 0: base 0x0, pitch 320, scissor 0,0, 256x256
|
||||||
|
// starts at tile 0, buffer offset 0
|
||||||
|
// contains 64 tiles (320/80)*(256/16)
|
||||||
|
// color target 1: base 0x40, pitch 320, scissor 256,0, 256x256
|
||||||
|
// starts at tile 64 (after color target 0), buffer offset 327680b
|
||||||
|
// contains 64 tiles
|
||||||
|
// In EDRAM each set of 64 tiles is contiguous:
|
||||||
|
// +------+------+ +------+------+------+
|
||||||
|
// |ct0.0 |ct0.1 |...|ct0.63|ct1.0 |ct1.1 |...
|
||||||
|
// +------+------+ +------+------+------+
|
||||||
|
// To render into these, we setup two VkImages:
|
||||||
|
// image 0: bound to buffer offset 0, 320x256x4=327680b
|
||||||
|
// image 1: bound to buffer offset 327680b, 320x256x4=327680b
|
||||||
|
// So when we render to them:
|
||||||
|
// +------+-+ scissored to 256x256, actually 320x256
|
||||||
|
// | . | | <- . appears at some untiled offset in the buffer, but
|
||||||
|
// | | | consistent if aliased with the same format
|
||||||
|
// +------+-+
|
||||||
|
// In theory, this gives us proper aliasing in most cases.
|
||||||
|
//
|
||||||
|
// Example with horizontal predicated tiling:
|
||||||
|
// Trying to render 1024x576 @4X MSAA, splitting into two regions
|
||||||
|
// horizontally:
|
||||||
|
// +----------+
|
||||||
|
// | 1024x288 |
|
||||||
|
// +----------+
|
||||||
|
// | 1024x288 |
|
||||||
|
// +----------+
|
||||||
|
// EDRAM configured for 1056x288px with tile size 2112x567px (4X MSAA):
|
||||||
|
// color target 0: base 0x0, pitch 1080, 26x36 tiles
|
||||||
|
// First render (top):
|
||||||
|
// window offset 0,0
|
||||||
|
// scissor 0,0, 1024x288
|
||||||
|
// First resolve (top):
|
||||||
|
// RB_COPY_DEST_BASE 0x1F45D000
|
||||||
|
// RB_COPY_DEST_PITCH pitch=1024, height=576
|
||||||
|
// vertices: 0,0, 1024,0, 1024,288
|
||||||
|
// Second render (bottom):
|
||||||
|
// window offset 0,-288
|
||||||
|
// scissor 0,288, 1024x288
|
||||||
|
// Second resolve (bottom):
|
||||||
|
// RB_COPY_DEST_BASE 0x1F57D000 (+1179648b)
|
||||||
|
// RB_COPY_DEST_PITCH pitch=1024, height=576
|
||||||
|
// (exactly 1024x288*4b after first resolve)
|
||||||
|
// vertices: 0,288, 1024,288, 1024,576
|
||||||
|
// Resolving here is easy as the textures are contiguous in memory. We can
|
||||||
|
// snoop in the first resolve with the dest height to know the total size,
|
||||||
|
// and in the second resolve see that it overlaps and place it in the
|
||||||
|
// existing target.
|
||||||
|
//
|
||||||
|
// Example with vertical predicated tiling:
|
||||||
|
// Trying to render 1280x720 @2X MSAA, splitting into two regions
|
||||||
|
// vertically:
|
||||||
|
// +-----+-----+
|
||||||
|
// | 640 | 640 |
|
||||||
|
// | x | x |
|
||||||
|
// | 720 | 720 |
|
||||||
|
// +-----+-----+
|
||||||
|
// EDRAM configured for 640x736px with tile size 640x1472px (2X MSAA):
|
||||||
|
// color target 0: base 0x0, pitch 640, 8x92 tiles
|
||||||
|
// First render (left):
|
||||||
|
// window offset 0,0
|
||||||
|
// scissor 0,0, 640x720
|
||||||
|
// First resolve (left):
|
||||||
|
// RB_COPY_DEST_BASE 0x1BC6D000
|
||||||
|
// RB_COPY_DEST_PITCH pitch=1280, height=720
|
||||||
|
// vertices: 0,0, 640,0, 640,720
|
||||||
|
// Second render (right):
|
||||||
|
// window offset -640,0
|
||||||
|
// scissor 640,0, 640x720
|
||||||
|
// Second resolve (right):
|
||||||
|
// RB_COPY_DEST_BASE 0x1BC81000 (+81920b)
|
||||||
|
// RB_COPY_DEST_PITCH pitch=1280, height=720
|
||||||
|
// vertices: 640,0, 1280,0, 1280,720
|
||||||
|
// Resolving here is much more difficult as resolves are tiled and the right
|
||||||
|
// half of the texture is 81920b away:
|
||||||
|
// 81920/4bpp=20480px, /32 (texture tile size)=640px
|
||||||
|
// We know the texture size with the first resolve and with the second we
|
||||||
|
// must check for overlap then compute the offset (in both X and Y).
|
||||||
|
//
|
||||||
|
// =============================================================================
|
||||||
|
// Surface size:
|
||||||
|
// =============================================================================
|
||||||
|
//
|
||||||
|
// XGSurfaceSize code in game executables calculates the size in tiles in the
|
||||||
|
// following order:
|
||||||
|
// 1) If MSAA is >=2x, multiply the height by 2.
|
||||||
|
// 2) If MSAA is 4x, multiply the width by 2.
|
||||||
|
// 3) 80x16-align multisampled width and height.
|
||||||
|
// 4) Multiply width*height by 4 or 8 depending on the pixel format.
|
||||||
|
// 5) Divide the byte size by 5120.
|
||||||
|
// This means that when working with EDRAM surface sizes we should assume that a
|
||||||
|
// multisampled surface is the same as a single-sampled surface with 2x height
|
||||||
|
// and width - however, format size doesn't effect the dimensions. Surface pitch
|
||||||
|
// in the surface info register is single-sampled.
|
||||||
|
class RenderTargetCache {
|
||||||
|
public:
|
||||||
|
RenderTargetCache(D3D12CommandProcessor* command_processor,
|
||||||
|
RegisterFile* register_file);
|
||||||
|
~RenderTargetCache();
|
||||||
|
|
||||||
|
bool Initialize();
|
||||||
|
void Shutdown();
|
||||||
|
void ClearCache();
|
||||||
|
|
||||||
|
void BeginFrame();
|
||||||
|
// Called in the beginning of a draw call - may bind pipelines.
|
||||||
|
void UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
|
||||||
|
const D3D12_RECT& scissor */);
|
||||||
|
void EndFrame();
|
||||||
|
|
||||||
|
private:
|
||||||
|
union RenderTargetKey {
|
||||||
|
struct {
|
||||||
|
// Supersampled dimensions. The limit is 2560x2560 without AA, 2560x5120
|
||||||
|
// with 2x AA, and 5120x5120 with 4x AA.
|
||||||
|
uint32_t width_ss_div_80 : 7; // 7
|
||||||
|
uint32_t height_ss_div_16 : 9; // 16
|
||||||
|
uint32_t is_depth : 1; // 17
|
||||||
|
uint32_t format : 4; // 21
|
||||||
|
};
|
||||||
|
uint32_t value;
|
||||||
|
|
||||||
|
// Clearing the unused bits.
|
||||||
|
RenderTargetKey() : value(0) {}
|
||||||
|
RenderTargetKey(const RenderTargetKey& key) : value(key.value) {}
|
||||||
|
RenderTargetKey& operator=(const RenderTargetKey& key) {
|
||||||
|
value = key.value;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
bool operator==(const RenderTargetKey& key) const {
|
||||||
|
return value == key.value;
|
||||||
|
}
|
||||||
|
bool operator!=(const RenderTargetKey& key) const {
|
||||||
|
return value != key.value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RenderTarget {
|
||||||
|
ID3D12Resource* resource;
|
||||||
|
D3D12_RESOURCE_STATES state;
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE handle;
|
||||||
|
RenderTargetKey key;
|
||||||
|
// The first 4 MB page in the heaps.
|
||||||
|
uint32_t heap_page_first;
|
||||||
|
// Number of 4 MB pages this render target uses.
|
||||||
|
uint32_t heap_page_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RenderTargetBinding {
|
||||||
|
// Whether this render target has been used since the last full update.
|
||||||
|
bool is_bound;
|
||||||
|
// Whether the render target was actually used in the last draw.
|
||||||
|
bool is_active;
|
||||||
|
uint32_t edram_base;
|
||||||
|
union {
|
||||||
|
ColorRenderTargetFormat color_format;
|
||||||
|
DepthRenderTargetFormat depth_format;
|
||||||
|
};
|
||||||
|
RenderTarget* render_target;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ClearBindings();
|
||||||
|
|
||||||
|
// Must be in a frame to call. Writes the dirty areas of the currently bound
|
||||||
|
// render targets and marks them as clean.
|
||||||
|
void WriteRenderTargetsToEDRAM();
|
||||||
|
|
||||||
|
D3D12CommandProcessor* command_processor_;
|
||||||
|
RegisterFile* register_file_;
|
||||||
|
|
||||||
|
// 32 MB heaps backing used render targets resources, created when needed.
|
||||||
|
// 24 MB proved to be not enough to store a single render target occupying the
|
||||||
|
// entire EDRAM - a 32-bit depth/stencil one - at some resolution.
|
||||||
|
ID3D12Heap* heaps_[5] = {};
|
||||||
|
|
||||||
|
std::unordered_multimap<uint32_t, RenderTarget*> render_targets_;
|
||||||
|
|
||||||
|
uint32_t current_surface_pitch_ = 0;
|
||||||
|
MsaaSamples current_msaa_samples_ = MsaaSamples::k1X;
|
||||||
|
RenderTargetBinding current_bindings_[5] = {};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace d3d12
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
|
|
@ -196,7 +196,6 @@ bool TextureCache::Initialize() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ClearBindings();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,6 +214,18 @@ void TextureCache::Shutdown() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TextureCache::ClearCache() {
|
||||||
|
// Destroy all the textures.
|
||||||
|
for (auto texture_pair : textures_) {
|
||||||
|
Texture* texture = texture_pair.second;
|
||||||
|
if (texture->resource != nullptr) {
|
||||||
|
texture->resource->Release();
|
||||||
|
}
|
||||||
|
delete texture;
|
||||||
|
}
|
||||||
|
textures_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
void TextureCache::TextureFetchConstantWritten(uint32_t index) {
|
void TextureCache::TextureFetchConstantWritten(uint32_t index) {
|
||||||
texture_keys_in_sync_ &= ~(1u << index);
|
texture_keys_in_sync_ &= ~(1u << index);
|
||||||
}
|
}
|
||||||
|
@ -300,20 +311,6 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::ClearCache() {
|
|
||||||
ClearBindings();
|
|
||||||
|
|
||||||
// Destroy all the textures.
|
|
||||||
for (auto texture_pair : textures_) {
|
|
||||||
Texture* texture = texture_pair.second;
|
|
||||||
if (texture->resource != nullptr) {
|
|
||||||
texture->resource->Release();
|
|
||||||
}
|
|
||||||
delete texture;
|
|
||||||
}
|
|
||||||
textures_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextureCache::WriteTextureSRV(uint32_t fetch_constant,
|
void TextureCache::WriteTextureSRV(uint32_t fetch_constant,
|
||||||
TextureDimension shader_dimension,
|
TextureDimension shader_dimension,
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||||
|
|
|
@ -59,6 +59,7 @@ class TextureCache {
|
||||||
|
|
||||||
bool Initialize();
|
bool Initialize();
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
|
void ClearCache();
|
||||||
|
|
||||||
void TextureFetchConstantWritten(uint32_t index);
|
void TextureFetchConstantWritten(uint32_t index);
|
||||||
|
|
||||||
|
@ -71,8 +72,6 @@ class TextureCache {
|
||||||
void RequestTextures(uint32_t used_vertex_texture_mask,
|
void RequestTextures(uint32_t used_vertex_texture_mask,
|
||||||
uint32_t used_pixel_texture_mask);
|
uint32_t used_pixel_texture_mask);
|
||||||
|
|
||||||
void ClearCache();
|
|
||||||
|
|
||||||
void WriteTextureSRV(uint32_t fetch_constant,
|
void WriteTextureSRV(uint32_t fetch_constant,
|
||||||
TextureDimension shader_dimension,
|
TextureDimension shader_dimension,
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||||
|
|
Loading…
Reference in New Issue