[D3D12] Rough outline of render target architecture

This commit is contained in:
Triang3l 2018-08-07 22:40:24 +03:00
parent 83cf482a50
commit 52a1a80200
6 changed files with 598 additions and 20 deletions

View File

@ -483,7 +483,14 @@ bool D3D12CommandProcessor::SetupContext() {
texture_cache_ = std::make_unique<TextureCache>(this, register_file_, texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
shared_memory_.get()); shared_memory_.get());
if (!texture_cache_->Initialize()) { if (!texture_cache_->Initialize()) {
XELOGE("Failed to initialize texture cache"); XELOGE("Failed to initialize the texture cache");
return false;
}
render_target_cache_ =
std::make_unique<RenderTargetCache>(this, register_file_);
if (!render_target_cache_->Initialize()) {
XELOGE("Failed to initialize the render target cache");
return false; return false;
} }
@ -509,6 +516,8 @@ void D3D12CommandProcessor::ShutdownContext() {
view_heap_pool_.reset(); view_heap_pool_.reset();
constant_buffer_pool_.reset(); constant_buffer_pool_.reset();
render_target_cache_.reset();
texture_cache_.reset(); texture_cache_.reset();
pipeline_cache_.reset(); pipeline_cache_.reset();
@ -570,10 +579,12 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
view_heap_pool_->ClearCache(); view_heap_pool_->ClearCache();
constant_buffer_pool_->ClearCache(); constant_buffer_pool_->ClearCache();
pipeline_cache_->ClearCache(); render_target_cache_->ClearCache();
texture_cache_->ClearCache(); texture_cache_->ClearCache();
pipeline_cache_->ClearCache();
for (auto it : root_signatures_) { for (auto it : root_signatures_) {
it.second->Release(); it.second->Release();
} }
@ -607,7 +618,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
if (enable_mode == xenos::ModeControl::kIgnore) { if (enable_mode == xenos::ModeControl::kIgnore) {
// Ignored. // Ignored.
return true; return true;
} else if (enable_mode == xenos::ModeControl::kCopy) { }
if (enable_mode == xenos::ModeControl::kCopy) {
// Special copy handling. // Special copy handling.
return IssueCopy(); return IssueCopy();
} }
@ -666,6 +678,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
bool new_frame = BeginFrame(); bool new_frame = BeginFrame();
auto command_list = GetCurrentCommandList(); auto command_list = GetCurrentCommandList();
// Set up the render targets - this may bind pipelines.
render_target_cache_->UpdateRenderTargets();
// Set the primitive topology. // Set the primitive topology.
D3D_PRIMITIVE_TOPOLOGY primitive_topology; D3D_PRIMITIVE_TOPOLOGY primitive_topology;
switch (primitive_type) { switch (primitive_type) {
@ -824,6 +839,8 @@ bool D3D12CommandProcessor::BeginFrame() {
texture_cache_->BeginFrame(); texture_cache_->BeginFrame();
render_target_cache_->BeginFrame();
return true; return true;
} }
@ -837,6 +854,8 @@ bool D3D12CommandProcessor::EndFrame() {
auto command_list_setup = command_lists_setup_[current_queue_frame_].get(); auto command_list_setup = command_lists_setup_[current_queue_frame_].get();
auto command_list = command_lists_[current_queue_frame_].get(); auto command_list = command_lists_[current_queue_frame_].get();
render_target_cache_->EndFrame();
bool setup_written = shared_memory_->EndFrame( bool setup_written = shared_memory_->EndFrame(
command_list_setup->GetCommandList(), command_list->GetCommandList()); command_list_setup->GetCommandList(), command_list->GetCommandList());

View File

@ -17,6 +17,7 @@
#include "xenia/gpu/command_processor.h" #include "xenia/gpu/command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h"
#include "xenia/gpu/d3d12/pipeline_cache.h" #include "xenia/gpu/d3d12/pipeline_cache.h"
#include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/d3d12/shared_memory.h" #include "xenia/gpu/d3d12/shared_memory.h"
#include "xenia/gpu/d3d12/texture_cache.h" #include "xenia/gpu/d3d12/texture_cache.h"
#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/hlsl_shader_translator.h"
@ -169,6 +170,8 @@ class D3D12CommandProcessor : public CommandProcessor {
std::unique_ptr<TextureCache> texture_cache_ = nullptr; std::unique_ptr<TextureCache> texture_cache_ = nullptr;
std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr;
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr; std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr; std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr; std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;

View File

@ -0,0 +1,283 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/d3d12/render_target_cache.h"
#include <cstring>
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
namespace xe {
namespace gpu {
namespace d3d12 {
RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file)
: command_processor_(command_processor), register_file_(register_file) {}
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
bool RenderTargetCache::Initialize() { return true; }
void RenderTargetCache::Shutdown() { ClearCache(); }
void RenderTargetCache::ClearCache() {
for (auto render_target_pair : render_targets_) {
RenderTarget* render_target = render_target_pair.second;
if (render_target->resource != nullptr) {
render_target->resource->Release();
}
delete render_target;
}
render_targets_.clear();
for (uint32_t i = 0; i < xe::countof(heaps_); ++i) {
if (heaps_[i] != nullptr) {
heaps_[i]->Release();
heaps_[i] = nullptr;
}
}
}
void RenderTargetCache::BeginFrame() { ClearBindings(); }
void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
const D3D12_RECT& scissor */) {
// There are two kinds of render target binding updates in this implementation
// in case something has been changed - full and partial.
//
// A full update involves flushing all the currently bound render targets that
// have been modified to the EDRAM buffer, allocating all the newly bound
// render targets in the heaps, loading them from the EDRAM buffer and binding
// them.
//
// ("Bound" here means ever used since the last full update - and in this case
// it's bound to the Direct3D 12 command list.)
//
// However, Banjo-Kazooie interleaves color/depth and depth-only writes every
// draw call, and doing a full update whenever the color mask is changed is
// too expensive. So, we shouldn't do a full update if the game only toggles
// color writes and depth testing. Instead, we're only adding or re-enabling
// render targets if color writes are being enabled (adding involves loading
// the contents from the EDRAM, while re-enabling does nothing on the D3D
// side).
//
// There are cases when simply toggling render targets may still require EDRAM
// stores and thus a full update. Here's an example situation:
// Draw 1:
// - 32bpp RT0 0-10 MB
// - 32bpp RT1 3-10 MB
// - 1280x720 viewport
// Draw 2:
// - 32bpp RT0 0-10 MB
// - Inactive RT1
// - 1280x1440 viewport
// Draw 3:
// - 32bpp RT0 0-10 MB
// - 32bpp RT1 3-10 MB
// - 1280x720 viewport
// In this case, before draw 2, RT1 must be written to the EDRAM buffer, and
// RT0 must be loaded, and also before draw 3 RT1 must receive the changes
// made to the lower part of RT0. So, before draws 2 and 3, full updates must
// be done.
//
// Full updates are better for memory usage than partial updates though, as
// the render targets are re-allocated in the heaps, which means that they can
// be allocated more tightly, preventing too many 32 MB heaps from being
// created.
//
// To summarize, a full update happens if:
// - Starting a new frame.
// - Drawing after resolving.
// - Surface pitch changed.
// - Sample count changed.
// - EDRAM base of a currently used RT changed.
// - Format of a currently used RT changed.
// - Current viewport contains unsaved data from previously used render
// targets.
// - New render target overlaps unsaved data from other bound render targets.
// A partial update happens if:
// - New render target is added, but doesn't overlap unsaved data from other
// currently or previously used render targets.
auto command_list = command_processor_->GetCurrentCommandList();
if (command_list == nullptr) {
return;
}
auto& regs = *register_file_;
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = rb_surface_info & 0x3FFF;
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
if (xenos::ModeControl(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7) ==
xenos::ModeControl::kDepth) {
rb_color_mask = 0;
}
bool color_enabled[4] = {
(rb_color_mask & 0xF) != 0, (rb_color_mask & 0xF0) != 0,
(rb_color_mask & 0xF00) != 0, (rb_color_mask & 0xF000) != 0};
uint32_t rb_color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32};
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
bool depth_enabled = (rb_depthcontrol & (0x2 | 0x4)) != 0;
bool full_update = false;
// Check the following full update conditions:
// - Starting a new frame.
// - Drawing after resolving.
// - Surface pitch changed.
// - Sample count changed.
// Draws are skipped if the surface pitch is 0, so a full update can be forced
// in the beginning of the frame or after resolves by setting the current
// pitch to 0.
if (current_surface_pitch_ != surface_pitch ||
current_msaa_samples_ != msaa_samples) {
full_update = true;
}
// Check the following full update conditions:
// - EDRAM base of a currently used RT changed.
// - Format of a currently used RT changed.
// TODO(Triang3l): Check the following full update conditions here:
// - Current viewport contains unsaved data from previously used render
// targets.
uint32_t render_targets_to_attach = 0;
for (uint32_t i = 0; i < 4; ++i) {
if (!color_enabled[i]) {
continue;
}
RenderTargetBinding& binding = current_bindings_[i];
if (binding.is_bound) {
// TODO(Triang3l): If was inactive, check if overlapping unsaved data now.
if ((rb_color_info[i] & 0xFFF) != binding.edram_base ||
ColorRenderTargetFormat((rb_color_info[i] >> 12) & 0xF) !=
binding.color_format) {
full_update = true;
}
} else {
render_targets_to_attach |= 1 << i;
}
}
if (depth_enabled) {
RenderTargetBinding& binding = current_bindings_[4];
if (binding.is_bound) {
// TODO(Triang3l): If was inactive, check if overlapping unsaved data now.
if ((rb_depth_info & 0xFFF) != binding.edram_base ||
DepthRenderTargetFormat((rb_depth_info >> 12) & 0x1) !=
binding.depth_format) {
full_update = true;
}
} else {
render_targets_to_attach |= 1 << 4;
}
}
// TODO(Triang3l): Check the following full update condition here:
// - New render target overlaps unsaved data from other bound render targets.
// If no need to attach any new render targets, update activation state, dirty
// regions and exit early.
if (!full_update && !render_targets_to_attach) {
for (uint32_t i = 0; i < 4; ++i) {
current_bindings_[i].is_active = color_enabled[i];
}
current_bindings_[4].is_active = depth_enabled;
// TODO(Triang3l): Update dirty regions.
return;
}
// From this point, the function MUST NOT FAIL, otherwise bindings will be
// left in an incomplete state.
uint32_t heap_usage[5] = {};
if (full_update) {
// Export the currently bound render targets before we ruin the bindings.
WriteRenderTargetsToEDRAM();
ClearBindings();
current_surface_pitch_ = surface_pitch;
current_msaa_samples_ = msaa_samples;
// If updating fully, need to reattach all the render targets and allocate
// from scratch.
for (uint32_t i = 0; i < 4; ++i) {
if (color_enabled[i]) {
render_targets_to_attach |= 1 << i;
}
}
if (depth_enabled) {
render_targets_to_attach |= 1 << 4;
}
} else {
// If updating partially, only need to attach new render targets.
for (uint32_t i = 0; i < 5; ++i) {
const RenderTargetBinding& binding = current_bindings_[i];
if (!binding.is_bound) {
continue;
}
const RenderTarget* render_target = binding.render_target;
if (render_target != nullptr) {
// There are no holes between 4 MB pages in each heap.
heap_usage[render_target->heap_page_first >> 3] +=
render_target->heap_page_count;
continue;
}
}
}
XELOGGPU("RT Cache: %s update! Pitch %u, samples %u, RTs to attach %u.",
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
render_targets_to_attach);
// Allocate the new render targets.
// TODO(Triang3l): Actually allocate them.
// TODO(Triang3l): Load the contents from the EDRAM.
// TODO(Triang3l): Bind the render targets to the command list.
// Write the new bindings.
for (uint32_t i = 0; i < 4; ++i) {
if (!(render_targets_to_attach & (1 << i))) {
continue;
}
RenderTargetBinding& binding = current_bindings_[i];
binding.is_bound = true;
binding.is_active = true;
binding.edram_base = rb_color_info[i] & 0xFFF;
binding.color_format =
ColorRenderTargetFormat((rb_color_info[i] >> 12) & 0xF);
}
if (render_targets_to_attach & (1 << 4)) {
RenderTargetBinding& binding = current_bindings_[4];
binding.is_bound = true;
binding.is_active = true;
binding.edram_base = rb_depth_info & 0xFFF;
binding.depth_format = DepthRenderTargetFormat((rb_depth_info >> 12) & 0x1);
}
}
void RenderTargetCache::EndFrame() {
WriteRenderTargetsToEDRAM();
ClearBindings();
}
void RenderTargetCache::ClearBindings() {
current_surface_pitch_ = 0;
current_msaa_samples_ = MsaaSamples::k1X;
std::memset(current_bindings_, 0, sizeof(current_bindings_));
}
void RenderTargetCache::WriteRenderTargetsToEDRAM() {}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,277 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
#define XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
#include <unordered_map>
#include "xenia/gpu/d3d12/shared_memory.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/d3d12/d3d12_api.h"
namespace xe {
namespace gpu {
namespace d3d12 {
class D3D12CommandProcessor;
// =============================================================================
// How EDRAM is used by Xenos:
// (Copied from the old version of the render target cache, so implementation
// info may differ from the way EDRAM is emulated now.)
// =============================================================================
//
// On the 360 the render target is an opaque block of memory in EDRAM that's
// only accessible via resolves. We use this to our advantage to simulate
// something like it as best we can by having a shared backing memory with
// a multitude of views for each tile location in EDRAM.
//
// This allows us to have the same base address write to the same memory
// regardless of framebuffer format. Resolving then uses whatever format the
// resolve requests straight from the backing memory.
//
// EDRAM is a beast and we only approximate it as best we can. Basically,
// the 10MiB of EDRAM is composed of 2048 5120b tiles. Each tile is 80x16px.
// +-----+-----+-----+---
// |tile0|tile1|tile2|... 2048 times
// +-----+-----+-----+---
// Operations dealing with EDRAM deal in tile offsets, so base 0x100 is tile
// offset 256, 256*5120=1310720b into the buffer. All rendering operations are
// aligned to tiles so trying to draw at 256px wide will have a real width of
// 320px by rounding up to the next tile.
//
// MSAA and other settings will modify the exact pixel sizes, like 4X makes
// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still
// all 5120b. As we try to emulate this we adjust our viewport when rendering to
// stretch pixels as needed.
//
// It appears that games also take advantage of MSAA stretching tiles when doing
// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then
// later draw to that view with 1X pitch/height and 1X MSAA.
//
// The good news is that games cannot read EDRAM directly but must use a copy
// operation to get the data out. That gives us a chance to do whatever we
// need to (re-tile, etc) only when requested.
//
// To approximate the tiled EDRAM layout we use a single large chunk of memory.
// From this memory we create many VkImages (and VkImageViews) of various
// formats and dimensions as requested by the game. These are used as
// attachments during rendering and as sources during copies. They are also
// heavily aliased - lots of images will reference the same locations in the
// underlying EDRAM buffer. The only requirement is that there are no hazards
// with specific tiles (reading/writing the same tile through different images)
// and otherwise it should be ok *fingers crossed*.
//
// One complication is the copy/resolve process itself: we need to give back
// the data asked for in the format desired and where it goes is arbitrary
// (any address in physical memory). If the game is good we get resolves of
// EDRAM into fixed base addresses with scissored regions. If the game is bad
// we are broken.
//
// Resolves from EDRAM result in tiled textures - that's texture tiles, not
// EDRAM tiles. If we wanted to ensure byte-for-byte correctness we'd need to
// then tile the images as we wrote them out. For now, we just attempt to
// get the (X, Y) in linear space and do that. This really comes into play
// when multiple resolves write to the same texture or memory aliased by
// multiple textures - which is common due to predicated tiling. The examples
// below demonstrate what this looks like, but the important thing is that
// we are aware of partial textures and overlapping regions.
//
// Example with multiple render targets:
// Two color targets of 256x256px tightly packed in EDRAM:
// color target 0: base 0x0, pitch 320, scissor 0,0, 256x256
// starts at tile 0, buffer offset 0
// contains 64 tiles (320/80)*(256/16)
// color target 1: base 0x40, pitch 320, scissor 256,0, 256x256
// starts at tile 64 (after color target 0), buffer offset 327680b
// contains 64 tiles
// In EDRAM each set of 64 tiles is contiguous:
// +------+------+ +------+------+------+
// |ct0.0 |ct0.1 |...|ct0.63|ct1.0 |ct1.1 |...
// +------+------+ +------+------+------+
// To render into these, we setup two VkImages:
// image 0: bound to buffer offset 0, 320x256x4=327680b
// image 1: bound to buffer offset 327680b, 320x256x4=327680b
// So when we render to them:
// +------+-+ scissored to 256x256, actually 320x256
// | . | | <- . appears at some untiled offset in the buffer, but
// | | | consistent if aliased with the same format
// +------+-+
// In theory, this gives us proper aliasing in most cases.
//
// Example with horizontal predicated tiling:
// Trying to render 1024x576 @4X MSAA, splitting into two regions
// horizontally:
// +----------+
// | 1024x288 |
// +----------+
// | 1024x288 |
// +----------+
// EDRAM configured for 1056x288px with tile size 2112x567px (4X MSAA):
// color target 0: base 0x0, pitch 1080, 26x36 tiles
// First render (top):
// window offset 0,0
// scissor 0,0, 1024x288
// First resolve (top):
// RB_COPY_DEST_BASE 0x1F45D000
// RB_COPY_DEST_PITCH pitch=1024, height=576
// vertices: 0,0, 1024,0, 1024,288
// Second render (bottom):
// window offset 0,-288
// scissor 0,288, 1024x288
// Second resolve (bottom):
// RB_COPY_DEST_BASE 0x1F57D000 (+1179648b)
// RB_COPY_DEST_PITCH pitch=1024, height=576
// (exactly 1024x288*4b after first resolve)
// vertices: 0,288, 1024,288, 1024,576
// Resolving here is easy as the textures are contiguous in memory. We can
// snoop in the first resolve with the dest height to know the total size,
// and in the second resolve see that it overlaps and place it in the
// existing target.
//
// Example with vertical predicated tiling:
// Trying to render 1280x720 @2X MSAA, splitting into two regions
// vertically:
// +-----+-----+
// | 640 | 640 |
// | x | x |
// | 720 | 720 |
// +-----+-----+
// EDRAM configured for 640x736px with tile size 640x1472px (2X MSAA):
// color target 0: base 0x0, pitch 640, 8x92 tiles
// First render (left):
// window offset 0,0
// scissor 0,0, 640x720
// First resolve (left):
// RB_COPY_DEST_BASE 0x1BC6D000
// RB_COPY_DEST_PITCH pitch=1280, height=720
// vertices: 0,0, 640,0, 640,720
// Second render (right):
// window offset -640,0
// scissor 640,0, 640x720
// Second resolve (right):
// RB_COPY_DEST_BASE 0x1BC81000 (+81920b)
// RB_COPY_DEST_PITCH pitch=1280, height=720
// vertices: 640,0, 1280,0, 1280,720
// Resolving here is much more difficult as resolves are tiled and the right
// half of the texture is 81920b away:
// 81920/4bpp=20480px, /32 (texture tile size)=640px
// We know the texture size with the first resolve and with the second we
// must check for overlap then compute the offset (in both X and Y).
//
// =============================================================================
// Surface size:
// =============================================================================
//
// XGSurfaceSize code in game executables calculates the size in tiles in the
// following order:
// 1) If MSAA is >=2x, multiply the height by 2.
// 2) If MSAA is 4x, multiply the width by 2.
// 3) 80x16-align multisampled width and height.
// 4) Multiply width*height by 4 or 8 depending on the pixel format.
// 5) Divide the byte size by 5120.
// This means that when working with EDRAM surface sizes we should assume that a
// multisampled surface is the same as a single-sampled surface with 2x height
// and width - however, format size doesn't effect the dimensions. Surface pitch
// in the surface info register is single-sampled.
class RenderTargetCache {
public:
RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file);
~RenderTargetCache();
bool Initialize();
void Shutdown();
void ClearCache();
void BeginFrame();
// Called in the beginning of a draw call - may bind pipelines.
void UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport,
const D3D12_RECT& scissor */);
void EndFrame();
private:
union RenderTargetKey {
struct {
// Supersampled dimensions. The limit is 2560x2560 without AA, 2560x5120
// with 2x AA, and 5120x5120 with 4x AA.
uint32_t width_ss_div_80 : 7; // 7
uint32_t height_ss_div_16 : 9; // 16
uint32_t is_depth : 1; // 17
uint32_t format : 4; // 21
};
uint32_t value;
// Clearing the unused bits.
RenderTargetKey() : value(0) {}
RenderTargetKey(const RenderTargetKey& key) : value(key.value) {}
RenderTargetKey& operator=(const RenderTargetKey& key) {
value = key.value;
return *this;
}
bool operator==(const RenderTargetKey& key) const {
return value == key.value;
}
bool operator!=(const RenderTargetKey& key) const {
return value != key.value;
}
};
struct RenderTarget {
ID3D12Resource* resource;
D3D12_RESOURCE_STATES state;
D3D12_CPU_DESCRIPTOR_HANDLE handle;
RenderTargetKey key;
// The first 4 MB page in the heaps.
uint32_t heap_page_first;
// Number of 4 MB pages this render target uses.
uint32_t heap_page_count;
};
struct RenderTargetBinding {
// Whether this render target has been used since the last full update.
bool is_bound;
// Whether the render target was actually used in the last draw.
bool is_active;
uint32_t edram_base;
union {
ColorRenderTargetFormat color_format;
DepthRenderTargetFormat depth_format;
};
RenderTarget* render_target;
};
void ClearBindings();
// Must be in a frame to call. Writes the dirty areas of the currently bound
// render targets and marks them as clean.
void WriteRenderTargetsToEDRAM();
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
// 32 MB heaps backing used render targets resources, created when needed.
// 24 MB proved to be not enough to store a single render target occupying the
// entire EDRAM - a 32-bit depth/stencil one - at some resolution.
ID3D12Heap* heaps_[5] = {};
std::unordered_multimap<uint32_t, RenderTarget*> render_targets_;
uint32_t current_surface_pitch_ = 0;
MsaaSamples current_msaa_samples_ = MsaaSamples::k1X;
RenderTargetBinding current_bindings_[5] = {};
};
} // namespace d3d12
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_

View File

@ -196,7 +196,6 @@ bool TextureCache::Initialize() {
} }
} }
ClearBindings();
return true; return true;
} }
@ -215,6 +214,18 @@ void TextureCache::Shutdown() {
} }
} }
void TextureCache::ClearCache() {
// Destroy all the textures.
for (auto texture_pair : textures_) {
Texture* texture = texture_pair.second;
if (texture->resource != nullptr) {
texture->resource->Release();
}
delete texture;
}
textures_.clear();
}
void TextureCache::TextureFetchConstantWritten(uint32_t index) { void TextureCache::TextureFetchConstantWritten(uint32_t index) {
texture_keys_in_sync_ &= ~(1u << index); texture_keys_in_sync_ &= ~(1u << index);
} }
@ -300,20 +311,6 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
} }
} }
void TextureCache::ClearCache() {
ClearBindings();
// Destroy all the textures.
for (auto texture_pair : textures_) {
Texture* texture = texture_pair.second;
if (texture->resource != nullptr) {
texture->resource->Release();
}
delete texture;
}
textures_.clear();
}
void TextureCache::WriteTextureSRV(uint32_t fetch_constant, void TextureCache::WriteTextureSRV(uint32_t fetch_constant,
TextureDimension shader_dimension, TextureDimension shader_dimension,
D3D12_CPU_DESCRIPTOR_HANDLE handle) { D3D12_CPU_DESCRIPTOR_HANDLE handle) {

View File

@ -59,6 +59,7 @@ class TextureCache {
bool Initialize(); bool Initialize();
void Shutdown(); void Shutdown();
void ClearCache();
void TextureFetchConstantWritten(uint32_t index); void TextureFetchConstantWritten(uint32_t index);
@ -71,8 +72,6 @@ class TextureCache {
void RequestTextures(uint32_t used_vertex_texture_mask, void RequestTextures(uint32_t used_vertex_texture_mask,
uint32_t used_pixel_texture_mask); uint32_t used_pixel_texture_mask);
void ClearCache();
void WriteTextureSRV(uint32_t fetch_constant, void WriteTextureSRV(uint32_t fetch_constant,
TextureDimension shader_dimension, TextureDimension shader_dimension,
D3D12_CPU_DESCRIPTOR_HANDLE handle); D3D12_CPU_DESCRIPTOR_HANDLE handle);