diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 35417ec45..299e04c20 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -924,6 +924,7 @@ PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) { pipeline->state = state; pipeline->root_signature = update_desc_.pRootSignature; pipelines_.insert({hash_key, pipeline}); + COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size()); return pipeline; } diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index dbca6d7ba..c8c7bc686 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -353,6 +353,7 @@ void RenderTargetCache::ClearCache() { delete resolve_target; } resolve_targets_.clear(); + COUNT_profile_set("gpu/render_target_cache/resolve_targets", 0); for (auto render_target_pair : render_targets_) { RenderTarget* render_target = render_target_pair.second; @@ -360,6 +361,7 @@ void RenderTargetCache::ClearCache() { delete render_target; } render_targets_.clear(); + COUNT_profile_set("gpu/render_target_cache/render_targets", 0); while (descriptor_heaps_depth_ != nullptr) { auto heap = descriptor_heaps_depth_; @@ -1938,6 +1940,8 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( xe::align(copy_buffer_size, UINT64(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); resolve_target->copy_buffer_size = uint32_t(copy_buffer_size); resolve_targets_.insert(std::make_pair(key.value, resolve_target)); + COUNT_profile_set("gpu/render_target_cache/resolve_targets", + resolve_targets_.size()); return resolve_target; } @@ -2254,6 +2258,8 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( ©_buffer_size); render_target->copy_buffer_size = uint32_t(copy_buffer_size); render_targets_.insert(std::make_pair(key.value, render_target)); + COUNT_profile_set("gpu/render_target_cache/render_targets", + render_targets_.size()); #if 0 XELOGGPU( "Created %ux%u %s render target with format %u at heap 4 MB pages %u:%u", diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index c4b6a147d..e7a23c5ca 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -94,6 +94,7 @@ bool SharedMemory::Initialize() { buffer_gpu_address_ = buffer_->GetGPUVirtualAddress(); std::memset(heaps_, 0, sizeof(heaps_)); + heap_count_ = 0; heap_creation_failed_ = false; std::memset(valid_pages_.data(), 0, valid_pages_.size() * sizeof(uint64_t)); @@ -124,6 +125,8 @@ void SharedMemory::Shutdown() { for (uint32_t i = 0; i < xe::countof(heaps_); ++i) { ui::d3d12::util::ReleaseAndNull(heaps_[i]); } + heap_count_ = 0; + COUNT_profile_set("gpu/shared_memory/mb_used", 0); } } @@ -277,6 +280,9 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) { heap_creation_failed_ = true; return false; } + ++heap_count_; + COUNT_profile_set("gpu/shared_memory/mb_used", + heap_count_ << kHeapSizeLog2 >> 20); D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates; region_start_coordinates.X = (i << kHeapSizeLog2) / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index f6922e0fb..fa43a6734 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -130,13 +130,16 @@ class SharedMemory { D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0; D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST; - // Heaps are 16 MB, so not too many of them are allocated. - static constexpr uint32_t kHeapSizeLog2 = 24; + // Heaps are 4 MB, so not too many of them are allocated, but also not to + // waste too much memory for padding (with 16 MB there's too much). + static constexpr uint32_t kHeapSizeLog2 = 22; static constexpr uint32_t kHeapSize = 1 << kHeapSizeLog2; static_assert((kHeapSize % D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) == 0, "Heap size must be a multiple of Direct3D tile size"); // Resident portions of the tiled buffer. ID3D12Heap* heaps_[kBufferSize >> kHeapSizeLog2] = {}; + // Number of the heaps currently resident, for profiling. + uint32_t heap_count_ = 0; // Whether creation of a heap has failed in the current frame. bool heap_creation_failed_ = false; diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 7c8dc7087..6d26c68a7 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -467,6 +467,7 @@ bool TextureCache::Initialize() { std::memset(scaled_resolve_pages_l2_, 0, sizeof(scaled_resolve_pages_l2_)); } std::memset(scaled_resolve_heaps_, 0, sizeof(scaled_resolve_heaps_)); + scaled_resolve_heap_count_ = 0; // Create the loading root signature. D3D12_ROOT_PARAMETER root_parameters[2]; @@ -590,6 +591,8 @@ void TextureCache::Shutdown() { for (uint32_t i = 0; i < xe::countof(scaled_resolve_heaps_); ++i) { ui::d3d12::util::ReleaseAndNull(scaled_resolve_heaps_[i]); } + scaled_resolve_heap_count_ = 0; + COUNT_profile_set("gpu/texture_cache/scaled_resolve_buffer_mb_used", 0); } void TextureCache::ClearCache() { @@ -602,6 +605,7 @@ void TextureCache::ClearCache() { delete texture; } textures_.clear(); + COUNT_profile_set("gpu/texture_cache/textures", 0); } void TextureCache::TextureFetchConstantWritten(uint32_t index) { @@ -1221,6 +1225,10 @@ bool TextureCache::EnsureScaledResolveBufferResident(uint32_t start_unscaled, XELOGE("Texture cache: Failed to create a scaled resolve tile heap"); return false; } + ++scaled_resolve_heap_count_; + COUNT_profile_set( + "gpu/texture_cache/scaled_resolve_buffer_mb_used", + scaled_resolve_heap_count_ << (kScaledResolveHeapSizeLog2 - 20)); D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates; region_start_coordinates.X = (i << kScaledResolveHeapSizeLog2) / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; @@ -1663,6 +1671,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { texture->base_watch_handle = nullptr; texture->mip_watch_handle = nullptr; textures_.insert(std::make_pair(map_key, texture)); + COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); LogTextureAction(texture, "Created"); return texture; diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index ee8aebcee..e3b6ccf50 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -549,9 +549,9 @@ class TextureCache { ID3D12Resource* scaled_resolve_buffer_ = nullptr; D3D12_RESOURCE_STATES scaled_resolve_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - // Not very big heaps (32 MB) because they are needed pretty sparsely. One + // Not very big heaps (16 MB) because they are needed pretty sparsely. One // scaled 1280x720x32bpp texture is slighly bigger than 14 MB. - static constexpr uint32_t kScaledResolveHeapSizeLog2 = 25; + static constexpr uint32_t kScaledResolveHeapSizeLog2 = 24; static constexpr uint32_t kScaledResolveHeapSize = 1 << kScaledResolveHeapSizeLog2; static_assert( @@ -560,6 +560,8 @@ class TextureCache { // Resident portions of the tiled buffer. ID3D12Heap* scaled_resolve_heaps_[kScaledResolveBufferSize >> kScaledResolveHeapSizeLog2] = {}; + // Number of currently resident portions of the tiled buffer, for profiling. + uint32_t scaled_resolve_heap_count_ = 0; // Bit vector storing whether each 4 KB physical memory page contains scaled // resolve data. uint32_t rather than uint64_t because parts of it are sent to // shaders.