From 52a1a80200e78eb6db37c1db9c7c07e865a619e0 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 7 Aug 2018 22:40:24 +0300 Subject: [PATCH] [D3D12] Rough outline of render target architecture --- .../gpu/d3d12/d3d12_command_processor.cc | 25 +- src/xenia/gpu/d3d12/d3d12_command_processor.h | 3 + src/xenia/gpu/d3d12/render_target_cache.cc | 283 ++++++++++++++++++ src/xenia/gpu/d3d12/render_target_cache.h | 277 +++++++++++++++++ src/xenia/gpu/d3d12/texture_cache.cc | 27 +- src/xenia/gpu/d3d12/texture_cache.h | 3 +- 6 files changed, 598 insertions(+), 20 deletions(-) create mode 100644 src/xenia/gpu/d3d12/render_target_cache.cc create mode 100644 src/xenia/gpu/d3d12/render_target_cache.h diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 1cffd2a27..477ec9731 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -483,7 +483,14 @@ bool D3D12CommandProcessor::SetupContext() { texture_cache_ = std::make_unique(this, register_file_, shared_memory_.get()); if (!texture_cache_->Initialize()) { - XELOGE("Failed to initialize texture cache"); + XELOGE("Failed to initialize the texture cache"); + return false; + } + + render_target_cache_ = + std::make_unique(this, register_file_); + if (!render_target_cache_->Initialize()) { + XELOGE("Failed to initialize the render target cache"); return false; } @@ -509,6 +516,8 @@ void D3D12CommandProcessor::ShutdownContext() { view_heap_pool_.reset(); constant_buffer_pool_.reset(); + render_target_cache_.reset(); + texture_cache_.reset(); pipeline_cache_.reset(); @@ -570,10 +579,12 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, view_heap_pool_->ClearCache(); constant_buffer_pool_->ClearCache(); - pipeline_cache_->ClearCache(); + render_target_cache_->ClearCache(); texture_cache_->ClearCache(); + pipeline_cache_->ClearCache(); + for (auto it : root_signatures_) { it.second->Release(); } @@ -607,7 +618,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, if (enable_mode == xenos::ModeControl::kIgnore) { // Ignored. return true; - } else if (enable_mode == xenos::ModeControl::kCopy) { + } + if (enable_mode == xenos::ModeControl::kCopy) { // Special copy handling. return IssueCopy(); } @@ -666,6 +678,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, bool new_frame = BeginFrame(); auto command_list = GetCurrentCommandList(); + // Set up the render targets - this may bind pipelines. + render_target_cache_->UpdateRenderTargets(); + // Set the primitive topology. D3D_PRIMITIVE_TOPOLOGY primitive_topology; switch (primitive_type) { @@ -824,6 +839,8 @@ bool D3D12CommandProcessor::BeginFrame() { texture_cache_->BeginFrame(); + render_target_cache_->BeginFrame(); + return true; } @@ -837,6 +854,8 @@ bool D3D12CommandProcessor::EndFrame() { auto command_list_setup = command_lists_setup_[current_queue_frame_].get(); auto command_list = command_lists_[current_queue_frame_].get(); + render_target_cache_->EndFrame(); + bool setup_written = shared_memory_->EndFrame( command_list_setup->GetCommandList(), command_list->GetCommandList()); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index ae62911d3..085ab2a5a 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -17,6 +17,7 @@ #include "xenia/gpu/command_processor.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/pipeline_cache.h" +#include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/shared_memory.h" #include "xenia/gpu/d3d12/texture_cache.h" #include "xenia/gpu/hlsl_shader_translator.h" @@ -169,6 +170,8 @@ class D3D12CommandProcessor : public CommandProcessor { std::unique_ptr texture_cache_ = nullptr; + std::unique_ptr render_target_cache_ = nullptr; + std::unique_ptr constant_buffer_pool_ = nullptr; std::unique_ptr view_heap_pool_ = nullptr; std::unique_ptr sampler_heap_pool_ = nullptr; diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc new file mode 100644 index 000000000..2c9b6433d --- /dev/null +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -0,0 +1,283 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/d3d12/render_target_cache.h" + +#include + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/gpu/d3d12/d3d12_command_processor.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor, + RegisterFile* register_file) + : command_processor_(command_processor), register_file_(register_file) {} + +RenderTargetCache::~RenderTargetCache() { Shutdown(); } + +bool RenderTargetCache::Initialize() { return true; } + +void RenderTargetCache::Shutdown() { ClearCache(); } + +void RenderTargetCache::ClearCache() { + for (auto render_target_pair : render_targets_) { + RenderTarget* render_target = render_target_pair.second; + if (render_target->resource != nullptr) { + render_target->resource->Release(); + } + delete render_target; + } + render_targets_.clear(); + + for (uint32_t i = 0; i < xe::countof(heaps_); ++i) { + if (heaps_[i] != nullptr) { + heaps_[i]->Release(); + heaps_[i] = nullptr; + } + } +} + +void RenderTargetCache::BeginFrame() { ClearBindings(); } + +void RenderTargetCache::UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport, + const D3D12_RECT& scissor */) { + // There are two kinds of render target binding updates in this implementation + // in case something has been changed - full and partial. + // + // A full update involves flushing all the currently bound render targets that + // have been modified to the EDRAM buffer, allocating all the newly bound + // render targets in the heaps, loading them from the EDRAM buffer and binding + // them. + // + // ("Bound" here means ever used since the last full update - and in this case + // it's bound to the Direct3D 12 command list.) + // + // However, Banjo-Kazooie interleaves color/depth and depth-only writes every + // draw call, and doing a full update whenever the color mask is changed is + // too expensive. So, we shouldn't do a full update if the game only toggles + // color writes and depth testing. Instead, we're only adding or re-enabling + // render targets if color writes are being enabled (adding involves loading + // the contents from the EDRAM, while re-enabling does nothing on the D3D + // side). + // + // There are cases when simply toggling render targets may still require EDRAM + // stores and thus a full update. Here's an example situation: + // Draw 1: + // - 32bpp RT0 0-10 MB + // - 32bpp RT1 3-10 MB + // - 1280x720 viewport + // Draw 2: + // - 32bpp RT0 0-10 MB + // - Inactive RT1 + // - 1280x1440 viewport + // Draw 3: + // - 32bpp RT0 0-10 MB + // - 32bpp RT1 3-10 MB + // - 1280x720 viewport + // In this case, before draw 2, RT1 must be written to the EDRAM buffer, and + // RT0 must be loaded, and also before draw 3 RT1 must receive the changes + // made to the lower part of RT0. So, before draws 2 and 3, full updates must + // be done. + // + // Full updates are better for memory usage than partial updates though, as + // the render targets are re-allocated in the heaps, which means that they can + // be allocated more tightly, preventing too many 32 MB heaps from being + // created. + // + // To summarize, a full update happens if: + // - Starting a new frame. + // - Drawing after resolving. + // - Surface pitch changed. + // - Sample count changed. + // - EDRAM base of a currently used RT changed. + // - Format of a currently used RT changed. + // - Current viewport contains unsaved data from previously used render + // targets. + // - New render target overlaps unsaved data from other bound render targets. + // A partial update happens if: + // - New render target is added, but doesn't overlap unsaved data from other + // currently or previously used render targets. + auto command_list = command_processor_->GetCurrentCommandList(); + if (command_list == nullptr) { + return; + } + + auto& regs = *register_file_; + uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + uint32_t surface_pitch = rb_surface_info & 0x3FFF; + MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); + uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; + if (xenos::ModeControl(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7) == + xenos::ModeControl::kDepth) { + rb_color_mask = 0; + } + bool color_enabled[4] = { + (rb_color_mask & 0xF) != 0, (rb_color_mask & 0xF0) != 0, + (rb_color_mask & 0xF00) != 0, (rb_color_mask & 0xF000) != 0}; + uint32_t rb_color_info[4] = { + regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, + regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32}; + uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; + uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + bool depth_enabled = (rb_depthcontrol & (0x2 | 0x4)) != 0; + + bool full_update = false; + + // Check the following full update conditions: + // - Starting a new frame. + // - Drawing after resolving. + // - Surface pitch changed. + // - Sample count changed. + // Draws are skipped if the surface pitch is 0, so a full update can be forced + // in the beginning of the frame or after resolves by setting the current + // pitch to 0. + if (current_surface_pitch_ != surface_pitch || + current_msaa_samples_ != msaa_samples) { + full_update = true; + } + + // Check the following full update conditions: + // - EDRAM base of a currently used RT changed. + // - Format of a currently used RT changed. + // TODO(Triang3l): Check the following full update conditions here: + // - Current viewport contains unsaved data from previously used render + // targets. + uint32_t render_targets_to_attach = 0; + for (uint32_t i = 0; i < 4; ++i) { + if (!color_enabled[i]) { + continue; + } + RenderTargetBinding& binding = current_bindings_[i]; + if (binding.is_bound) { + // TODO(Triang3l): If was inactive, check if overlapping unsaved data now. + if ((rb_color_info[i] & 0xFFF) != binding.edram_base || + ColorRenderTargetFormat((rb_color_info[i] >> 12) & 0xF) != + binding.color_format) { + full_update = true; + } + } else { + render_targets_to_attach |= 1 << i; + } + } + if (depth_enabled) { + RenderTargetBinding& binding = current_bindings_[4]; + if (binding.is_bound) { + // TODO(Triang3l): If was inactive, check if overlapping unsaved data now. + if ((rb_depth_info & 0xFFF) != binding.edram_base || + DepthRenderTargetFormat((rb_depth_info >> 12) & 0x1) != + binding.depth_format) { + full_update = true; + } + } else { + render_targets_to_attach |= 1 << 4; + } + } + + // TODO(Triang3l): Check the following full update condition here: + // - New render target overlaps unsaved data from other bound render targets. + + // If no need to attach any new render targets, update activation state, dirty + // regions and exit early. + if (!full_update && !render_targets_to_attach) { + for (uint32_t i = 0; i < 4; ++i) { + current_bindings_[i].is_active = color_enabled[i]; + } + current_bindings_[4].is_active = depth_enabled; + // TODO(Triang3l): Update dirty regions. + return; + } + + // From this point, the function MUST NOT FAIL, otherwise bindings will be + // left in an incomplete state. + + uint32_t heap_usage[5] = {}; + if (full_update) { + // Export the currently bound render targets before we ruin the bindings. + WriteRenderTargetsToEDRAM(); + + ClearBindings(); + current_surface_pitch_ = surface_pitch; + current_msaa_samples_ = msaa_samples; + + // If updating fully, need to reattach all the render targets and allocate + // from scratch. + for (uint32_t i = 0; i < 4; ++i) { + if (color_enabled[i]) { + render_targets_to_attach |= 1 << i; + } + } + if (depth_enabled) { + render_targets_to_attach |= 1 << 4; + } + } else { + // If updating partially, only need to attach new render targets. + for (uint32_t i = 0; i < 5; ++i) { + const RenderTargetBinding& binding = current_bindings_[i]; + if (!binding.is_bound) { + continue; + } + const RenderTarget* render_target = binding.render_target; + if (render_target != nullptr) { + // There are no holes between 4 MB pages in each heap. + heap_usage[render_target->heap_page_first >> 3] += + render_target->heap_page_count; + continue; + } + } + } + XELOGGPU("RT Cache: %s update! Pitch %u, samples %u, RTs to attach %u.", + full_update ? "Full" : "Partial", surface_pitch, msaa_samples, + render_targets_to_attach); + + // Allocate the new render targets. + // TODO(Triang3l): Actually allocate them. + // TODO(Triang3l): Load the contents from the EDRAM. + // TODO(Triang3l): Bind the render targets to the command list. + + // Write the new bindings. + for (uint32_t i = 0; i < 4; ++i) { + if (!(render_targets_to_attach & (1 << i))) { + continue; + } + RenderTargetBinding& binding = current_bindings_[i]; + binding.is_bound = true; + binding.is_active = true; + binding.edram_base = rb_color_info[i] & 0xFFF; + binding.color_format = + ColorRenderTargetFormat((rb_color_info[i] >> 12) & 0xF); + } + if (render_targets_to_attach & (1 << 4)) { + RenderTargetBinding& binding = current_bindings_[4]; + binding.is_bound = true; + binding.is_active = true; + binding.edram_base = rb_depth_info & 0xFFF; + binding.depth_format = DepthRenderTargetFormat((rb_depth_info >> 12) & 0x1); + } +} + +void RenderTargetCache::EndFrame() { + WriteRenderTargetsToEDRAM(); + ClearBindings(); +} + +void RenderTargetCache::ClearBindings() { + current_surface_pitch_ = 0; + current_msaa_samples_ = MsaaSamples::k1X; + std::memset(current_bindings_, 0, sizeof(current_bindings_)); +} + +void RenderTargetCache::WriteRenderTargetsToEDRAM() {} + +} // namespace d3d12 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h new file mode 100644 index 000000000..dfb3bd112 --- /dev/null +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -0,0 +1,277 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_ +#define XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_ + +#include + +#include "xenia/gpu/d3d12/shared_memory.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/d3d12/d3d12_api.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +class D3D12CommandProcessor; + +// ============================================================================= +// How EDRAM is used by Xenos: +// (Copied from the old version of the render target cache, so implementation +// info may differ from the way EDRAM is emulated now.) +// ============================================================================= +// +// On the 360 the render target is an opaque block of memory in EDRAM that's +// only accessible via resolves. We use this to our advantage to simulate +// something like it as best we can by having a shared backing memory with +// a multitude of views for each tile location in EDRAM. +// +// This allows us to have the same base address write to the same memory +// regardless of framebuffer format. Resolving then uses whatever format the +// resolve requests straight from the backing memory. +// +// EDRAM is a beast and we only approximate it as best we can. Basically, +// the 10MiB of EDRAM is composed of 2048 5120b tiles. Each tile is 80x16px. +// +-----+-----+-----+--- +// |tile0|tile1|tile2|... 2048 times +// +-----+-----+-----+--- +// Operations dealing with EDRAM deal in tile offsets, so base 0x100 is tile +// offset 256, 256*5120=1310720b into the buffer. All rendering operations are +// aligned to tiles so trying to draw at 256px wide will have a real width of +// 320px by rounding up to the next tile. +// +// MSAA and other settings will modify the exact pixel sizes, like 4X makes +// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still +// all 5120b. As we try to emulate this we adjust our viewport when rendering to +// stretch pixels as needed. +// +// It appears that games also take advantage of MSAA stretching tiles when doing +// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then +// later draw to that view with 1X pitch/height and 1X MSAA. +// +// The good news is that games cannot read EDRAM directly but must use a copy +// operation to get the data out. That gives us a chance to do whatever we +// need to (re-tile, etc) only when requested. +// +// To approximate the tiled EDRAM layout we use a single large chunk of memory. +// From this memory we create many VkImages (and VkImageViews) of various +// formats and dimensions as requested by the game. These are used as +// attachments during rendering and as sources during copies. They are also +// heavily aliased - lots of images will reference the same locations in the +// underlying EDRAM buffer. The only requirement is that there are no hazards +// with specific tiles (reading/writing the same tile through different images) +// and otherwise it should be ok *fingers crossed*. +// +// One complication is the copy/resolve process itself: we need to give back +// the data asked for in the format desired and where it goes is arbitrary +// (any address in physical memory). If the game is good we get resolves of +// EDRAM into fixed base addresses with scissored regions. If the game is bad +// we are broken. +// +// Resolves from EDRAM result in tiled textures - that's texture tiles, not +// EDRAM tiles. If we wanted to ensure byte-for-byte correctness we'd need to +// then tile the images as we wrote them out. For now, we just attempt to +// get the (X, Y) in linear space and do that. This really comes into play +// when multiple resolves write to the same texture or memory aliased by +// multiple textures - which is common due to predicated tiling. The examples +// below demonstrate what this looks like, but the important thing is that +// we are aware of partial textures and overlapping regions. +// +// Example with multiple render targets: +// Two color targets of 256x256px tightly packed in EDRAM: +// color target 0: base 0x0, pitch 320, scissor 0,0, 256x256 +// starts at tile 0, buffer offset 0 +// contains 64 tiles (320/80)*(256/16) +// color target 1: base 0x40, pitch 320, scissor 256,0, 256x256 +// starts at tile 64 (after color target 0), buffer offset 327680b +// contains 64 tiles +// In EDRAM each set of 64 tiles is contiguous: +// +------+------+ +------+------+------+ +// |ct0.0 |ct0.1 |...|ct0.63|ct1.0 |ct1.1 |... +// +------+------+ +------+------+------+ +// To render into these, we setup two VkImages: +// image 0: bound to buffer offset 0, 320x256x4=327680b +// image 1: bound to buffer offset 327680b, 320x256x4=327680b +// So when we render to them: +// +------+-+ scissored to 256x256, actually 320x256 +// | . | | <- . appears at some untiled offset in the buffer, but +// | | | consistent if aliased with the same format +// +------+-+ +// In theory, this gives us proper aliasing in most cases. +// +// Example with horizontal predicated tiling: +// Trying to render 1024x576 @4X MSAA, splitting into two regions +// horizontally: +// +----------+ +// | 1024x288 | +// +----------+ +// | 1024x288 | +// +----------+ +// EDRAM configured for 1056x288px with tile size 2112x567px (4X MSAA): +// color target 0: base 0x0, pitch 1080, 26x36 tiles +// First render (top): +// window offset 0,0 +// scissor 0,0, 1024x288 +// First resolve (top): +// RB_COPY_DEST_BASE 0x1F45D000 +// RB_COPY_DEST_PITCH pitch=1024, height=576 +// vertices: 0,0, 1024,0, 1024,288 +// Second render (bottom): +// window offset 0,-288 +// scissor 0,288, 1024x288 +// Second resolve (bottom): +// RB_COPY_DEST_BASE 0x1F57D000 (+1179648b) +// RB_COPY_DEST_PITCH pitch=1024, height=576 +// (exactly 1024x288*4b after first resolve) +// vertices: 0,288, 1024,288, 1024,576 +// Resolving here is easy as the textures are contiguous in memory. We can +// snoop in the first resolve with the dest height to know the total size, +// and in the second resolve see that it overlaps and place it in the +// existing target. +// +// Example with vertical predicated tiling: +// Trying to render 1280x720 @2X MSAA, splitting into two regions +// vertically: +// +-----+-----+ +// | 640 | 640 | +// | x | x | +// | 720 | 720 | +// +-----+-----+ +// EDRAM configured for 640x736px with tile size 640x1472px (2X MSAA): +// color target 0: base 0x0, pitch 640, 8x92 tiles +// First render (left): +// window offset 0,0 +// scissor 0,0, 640x720 +// First resolve (left): +// RB_COPY_DEST_BASE 0x1BC6D000 +// RB_COPY_DEST_PITCH pitch=1280, height=720 +// vertices: 0,0, 640,0, 640,720 +// Second render (right): +// window offset -640,0 +// scissor 640,0, 640x720 +// Second resolve (right): +// RB_COPY_DEST_BASE 0x1BC81000 (+81920b) +// RB_COPY_DEST_PITCH pitch=1280, height=720 +// vertices: 640,0, 1280,0, 1280,720 +// Resolving here is much more difficult as resolves are tiled and the right +// half of the texture is 81920b away: +// 81920/4bpp=20480px, /32 (texture tile size)=640px +// We know the texture size with the first resolve and with the second we +// must check for overlap then compute the offset (in both X and Y). +// +// ============================================================================= +// Surface size: +// ============================================================================= +// +// XGSurfaceSize code in game executables calculates the size in tiles in the +// following order: +// 1) If MSAA is >=2x, multiply the height by 2. +// 2) If MSAA is 4x, multiply the width by 2. +// 3) 80x16-align multisampled width and height. +// 4) Multiply width*height by 4 or 8 depending on the pixel format. +// 5) Divide the byte size by 5120. +// This means that when working with EDRAM surface sizes we should assume that a +// multisampled surface is the same as a single-sampled surface with 2x height +// and width - however, format size doesn't effect the dimensions. Surface pitch +// in the surface info register is single-sampled. +class RenderTargetCache { + public: + RenderTargetCache(D3D12CommandProcessor* command_processor, + RegisterFile* register_file); + ~RenderTargetCache(); + + bool Initialize(); + void Shutdown(); + void ClearCache(); + + void BeginFrame(); + // Called in the beginning of a draw call - may bind pipelines. + void UpdateRenderTargets(/* const D3D12_VIEWPORT& viewport, + const D3D12_RECT& scissor */); + void EndFrame(); + + private: + union RenderTargetKey { + struct { + // Supersampled dimensions. The limit is 2560x2560 without AA, 2560x5120 + // with 2x AA, and 5120x5120 with 4x AA. + uint32_t width_ss_div_80 : 7; // 7 + uint32_t height_ss_div_16 : 9; // 16 + uint32_t is_depth : 1; // 17 + uint32_t format : 4; // 21 + }; + uint32_t value; + + // Clearing the unused bits. + RenderTargetKey() : value(0) {} + RenderTargetKey(const RenderTargetKey& key) : value(key.value) {} + RenderTargetKey& operator=(const RenderTargetKey& key) { + value = key.value; + return *this; + } + bool operator==(const RenderTargetKey& key) const { + return value == key.value; + } + bool operator!=(const RenderTargetKey& key) const { + return value != key.value; + } + }; + + struct RenderTarget { + ID3D12Resource* resource; + D3D12_RESOURCE_STATES state; + D3D12_CPU_DESCRIPTOR_HANDLE handle; + RenderTargetKey key; + // The first 4 MB page in the heaps. + uint32_t heap_page_first; + // Number of 4 MB pages this render target uses. + uint32_t heap_page_count; + }; + + struct RenderTargetBinding { + // Whether this render target has been used since the last full update. + bool is_bound; + // Whether the render target was actually used in the last draw. + bool is_active; + uint32_t edram_base; + union { + ColorRenderTargetFormat color_format; + DepthRenderTargetFormat depth_format; + }; + RenderTarget* render_target; + }; + + void ClearBindings(); + + // Must be in a frame to call. Writes the dirty areas of the currently bound + // render targets and marks them as clean. + void WriteRenderTargetsToEDRAM(); + + D3D12CommandProcessor* command_processor_; + RegisterFile* register_file_; + + // 32 MB heaps backing used render targets resources, created when needed. + // 24 MB proved to be not enough to store a single render target occupying the + // entire EDRAM - a 32-bit depth/stencil one - at some resolution. + ID3D12Heap* heaps_[5] = {}; + + std::unordered_multimap render_targets_; + + uint32_t current_surface_pitch_ = 0; + MsaaSamples current_msaa_samples_ = MsaaSamples::k1X; + RenderTargetBinding current_bindings_[5] = {}; +}; + +} // namespace d3d12 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_ diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index bf9227abb..8cb1e4b14 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -196,7 +196,6 @@ bool TextureCache::Initialize() { } } - ClearBindings(); return true; } @@ -215,6 +214,18 @@ void TextureCache::Shutdown() { } } +void TextureCache::ClearCache() { + // Destroy all the textures. + for (auto texture_pair : textures_) { + Texture* texture = texture_pair.second; + if (texture->resource != nullptr) { + texture->resource->Release(); + } + delete texture; + } + textures_.clear(); +} + void TextureCache::TextureFetchConstantWritten(uint32_t index) { texture_keys_in_sync_ &= ~(1u << index); } @@ -300,20 +311,6 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask, } } -void TextureCache::ClearCache() { - ClearBindings(); - - // Destroy all the textures. - for (auto texture_pair : textures_) { - Texture* texture = texture_pair.second; - if (texture->resource != nullptr) { - texture->resource->Release(); - } - delete texture; - } - textures_.clear(); -} - void TextureCache::WriteTextureSRV(uint32_t fetch_constant, TextureDimension shader_dimension, D3D12_CPU_DESCRIPTOR_HANDLE handle) { diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index e67086bb3..123b2d028 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -59,6 +59,7 @@ class TextureCache { bool Initialize(); void Shutdown(); + void ClearCache(); void TextureFetchConstantWritten(uint32_t index); @@ -71,8 +72,6 @@ class TextureCache { void RequestTextures(uint32_t used_vertex_texture_mask, uint32_t used_pixel_texture_mask); - void ClearCache(); - void WriteTextureSRV(uint32_t fetch_constant, TextureDimension shader_dimension, D3D12_CPU_DESCRIPTOR_HANDLE handle);