From c49922945589c48ecfe8d9d1b6010be34a1f4186 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 4 Nov 2019 17:30:20 +0300 Subject: [PATCH] [GPU] Store an EDRAM snapshot in traces --- src/xenia/gpu/command_processor.h | 2 + .../gpu/d3d12/d3d12_command_processor.cc | 26 +++- src/xenia/gpu/d3d12/d3d12_command_processor.h | 2 + src/xenia/gpu/d3d12/deferred_command_list.cc | 9 ++ src/xenia/gpu/d3d12/deferred_command_list.h | 30 +++++ src/xenia/gpu/d3d12/render_target_cache.cc | 122 +++++++++++++++++- src/xenia/gpu/d3d12/render_target_cache.h | 12 ++ src/xenia/gpu/null/null_command_processor.cc | 2 + src/xenia/gpu/null/null_command_processor.h | 2 + src/xenia/gpu/trace_player.cc | 15 ++- src/xenia/gpu/trace_player.h | 1 + src/xenia/gpu/trace_protocol.h | 13 ++ src/xenia/gpu/trace_reader.cc | 5 + src/xenia/gpu/trace_viewer.cc | 6 + src/xenia/gpu/trace_writer.cc | 31 +++++ src/xenia/gpu/trace_writer.h | 1 + src/xenia/gpu/vk/vulkan_command_processor.cc | 2 + src/xenia/gpu/vk/vulkan_command_processor.h | 2 + .../gpu/vulkan/vulkan_command_processor.cc | 2 + .../gpu/vulkan/vulkan_command_processor.h | 1 + 20 files changed, 278 insertions(+), 8 deletions(-) diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 2b1899bd2..7f5cc5755 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -136,6 +136,8 @@ class CommandProcessor { virtual void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) = 0; + virtual void RestoreEDRAMSnapshot(const void* snapshot) = 0; + void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index a3a7d7bd1..ea1141342 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -87,6 +87,12 @@ void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, primitive_converter_->MemoryWriteCallback(base_ptr, length, true); } +void D3D12CommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) { + // Starting a new frame because descriptors may be needed. + BeginSubmission(true); + render_target_cache_->RestoreEDRAMSnapshot(snapshot); +} + bool D3D12CommandProcessor::IsROVUsedForEDRAM() const { if (!cvars::d3d12_edram_rov) { return false; @@ -699,8 +705,6 @@ std::unique_ptr D3D12CommandProcessor::Capture() { i * swap_texture_copy_footprint_.Footprint.RowPitch, raw_image->stride); } - D3D12_RANGE readback_written_range = {}; - gamma_ramp_upload_->Unmap(0, &readback_written_range); return raw_image; } @@ -1720,13 +1724,23 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, void D3D12CommandProcessor::InitializeTrace() { BeginSubmission(false); - bool any_downloads_submitted = false; - any_downloads_submitted |= shared_memory_->InitializeTraceSubmitDownloads(); - if (!any_downloads_submitted || !EndSubmission(false)) { + bool render_target_cache_submitted = + render_target_cache_->InitializeTraceSubmitDownloads(); + bool shared_memory_submitted = + shared_memory_->InitializeTraceSubmitDownloads(); + if (!render_target_cache_submitted && !shared_memory_submitted) { + return; + } + if (!EndSubmission(false)) { return; } AwaitAllSubmissionsCompletion(); - shared_memory_->InitializeTraceCompleteDownloads(); + if (render_target_cache_submitted) { + render_target_cache_->InitializeTraceCompleteDownloads(); + } + if (shared_memory_submitted) { + shared_memory_->InitializeTraceCompleteDownloads(); + } } void D3D12CommandProcessor::FinalizeTrace() {} diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 8558f6f26..18d191b28 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -48,6 +48,8 @@ class D3D12CommandProcessor : public CommandProcessor { void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; + void RestoreEDRAMSnapshot(const void* snapshot) override; + // Needed by everything that owns transient objects. xe::ui::d3d12::D3D12Context* GetD3D12Context() const { return static_cast(context_.get()); diff --git a/src/xenia/gpu/d3d12/deferred_command_list.cc b/src/xenia/gpu/d3d12/deferred_command_list.cc index a1a66d8c5..bdebaf3c9 100644 --- a/src/xenia/gpu/d3d12/deferred_command_list.cc +++ b/src/xenia/gpu/d3d12/deferred_command_list.cc @@ -38,6 +38,15 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list, stream += header_size; stream_remaining -= header_size; switch (Command(header[0])) { + case Command::kD3DClearUnorderedAccessViewUint: { + auto& args = + *reinterpret_cast(stream); + command_list->ClearUnorderedAccessViewUint( + args.view_gpu_handle_in_current_heap, args.view_cpu_handle, + args.resource, args.values_uint, args.num_rects, + args.num_rects ? reinterpret_cast(&args + 1) + : nullptr); + } break; case Command::kD3DCopyBufferRegion: { auto& args = *reinterpret_cast(stream); diff --git a/src/xenia/gpu/d3d12/deferred_command_list.h b/src/xenia/gpu/d3d12/deferred_command_list.h index a7670eefa..ccd6edc18 100644 --- a/src/xenia/gpu/d3d12/deferred_command_list.h +++ b/src/xenia/gpu/d3d12/deferred_command_list.h @@ -32,6 +32,24 @@ class DeferredCommandList { void Execute(ID3D12GraphicsCommandList* command_list, ID3D12GraphicsCommandList1* command_list_1); + inline void D3DClearUnorderedAccessViewUint( + D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap, + D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource, + const UINT values[4], UINT num_rects, const D3D12_RECT* rects) { + auto args = reinterpret_cast( + WriteCommand(Command::kD3DClearUnorderedAccessViewUint, + sizeof(ClearUnorderedAccessViewHeader) + + num_rects * sizeof(D3D12_RECT))); + args->view_gpu_handle_in_current_heap = view_gpu_handle_in_current_heap; + args->view_cpu_handle = view_cpu_handle; + args->resource = resource; + std::memcpy(args->values_uint, values, 4 * sizeof(UINT)); + args->num_rects = num_rects; + if (num_rects != 0) { + std::memcpy(args + 1, rects, num_rects * sizeof(D3D12_RECT)); + } + } + inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset, ID3D12Resource* src_buffer, UINT64 src_offset, UINT64 num_bytes) { @@ -303,6 +321,7 @@ class DeferredCommandList { static constexpr size_t kAlignment = std::max(sizeof(void*), sizeof(UINT64)); enum class Command : uint32_t { + kD3DClearUnorderedAccessViewUint, kD3DCopyBufferRegion, kD3DCopyResource, kCopyTexture, @@ -331,6 +350,17 @@ class DeferredCommandList { kD3DSetSamplePositions, }; + struct ClearUnorderedAccessViewHeader { + D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap; + D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; + ID3D12Resource* resource; + union { + float values_float[4]; + UINT values_uint[4]; + }; + UINT num_rects; + }; + struct D3DCopyBufferRegionArguments { ID3D12Resource* dst_buffer; UINT64 dst_offset; diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 494cdba54..df83d5df9 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -391,6 +391,8 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) { void RenderTargetCache::Shutdown() { ClearCache(); + edram_snapshot_restore_pool_.reset(); + ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_); for (auto& resolve_pipeline : resolve_pipelines_) { resolve_pipeline.pipeline->Release(); } @@ -449,9 +451,16 @@ void RenderTargetCache::ClearCache() { } } #endif + + edram_snapshot_restore_pool_.reset(); } void RenderTargetCache::BeginSubmission() { + if (edram_snapshot_restore_pool_) { + edram_snapshot_restore_pool_->Reclaim( + command_processor_->GetCompletedSubmission()); + } + // With the ROV, a submission does not always end in a resolve (for example, // when memexport readback happens) or something else that would surely submit // the UAV barrier, so we need to preserve the `current_` variables. @@ -2197,6 +2206,113 @@ DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat( return DXGI_FORMAT_UNKNOWN; } +bool RenderTargetCache::InitializeTraceSubmitDownloads() { + if (resolution_scale_2x_) { + // No 1:1 mapping. + return false; + } + const uint32_t kEDRAMSize = 2048 * 5120; + if (!edram_snapshot_download_buffer_) { + D3D12_RESOURCE_DESC edram_snapshot_download_buffer_desc; + ui::d3d12::util::FillBufferResourceDesc(edram_snapshot_download_buffer_desc, + kEDRAMSize, + D3D12_RESOURCE_FLAG_NONE); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); + if (FAILED(device->CreateCommittedResource( + &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, + &edram_snapshot_download_buffer_desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&edram_snapshot_download_buffer_)))) { + XELOGE("Failed to create a EDRAM snapshot download buffer"); + return false; + } + } + auto command_list = command_processor_->GetDeferredCommandList(); + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE); + command_processor_->SubmitBarriers(); + command_list->D3DCopyBufferRegion(edram_snapshot_download_buffer_, 0, + edram_buffer_, 0, kEDRAMSize); + return true; +} + +void RenderTargetCache::InitializeTraceCompleteDownloads() { + if (!edram_snapshot_download_buffer_) { + return; + } + void* download_mapping; + if (SUCCEEDED(edram_snapshot_download_buffer_->Map(0, nullptr, + &download_mapping))) { + trace_writer_->WriteEDRAMSnapshot(download_mapping); + D3D12_RANGE download_write_range = {}; + edram_snapshot_download_buffer_->Unmap(0, &download_write_range); + } else { + XELOGE("Failed to map the EDRAM snapshot download buffer"); + } + edram_snapshot_download_buffer_->Release(); + edram_snapshot_download_buffer_ = nullptr; +} + +void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) { + if (resolution_scale_2x_) { + // No 1:1 mapping. + return; + } + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); + auto device = provider->GetDevice(); + const uint32_t kEDRAMSize = 2048 * 5120; + if (!edram_snapshot_restore_pool_) { + edram_snapshot_restore_pool_ = + std::make_unique(device, kEDRAMSize); + } + ID3D12Resource* upload_buffer; + uint32_t upload_buffer_offset; + void* upload_buffer_mapping = edram_snapshot_restore_pool_->Request( + command_processor_->GetCurrentSubmission(), kEDRAMSize, &upload_buffer, + &upload_buffer_offset, nullptr); + if (!upload_buffer_mapping) { + XELOGE("Failed to get a buffer for restoring a EDRAM snapshot"); + return; + } + std::memcpy(upload_buffer_mapping, snapshot, kEDRAMSize); + auto command_list = command_processor_->GetDeferredCommandList(); + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_COPY_DEST); + command_processor_->SubmitBarriers(); + command_list->D3DCopyBufferRegion(edram_buffer_, 0, upload_buffer, + upload_buffer_offset, kEDRAMSize); + if (!command_processor_->IsROVUsedForEDRAM()) { + // Clear and ignore the old 32-bit float depth - the non-ROV path is + // inaccurate anyway, and this is backend-specific, not a part of a guest + // trace. + D3D12_CPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_cpu; + D3D12_GPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_gpu; + if (command_processor_->RequestViewDescriptors( + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1, + shader_visbile_descriptor_cpu, shader_visbile_descriptor_gpu) != + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + WriteEDRAMUint32UAVDescriptor(shader_visbile_descriptor_cpu); + UINT clear_value[4] = {0, 0, 0, 0}; + D3D12_RECT clear_rect; + clear_rect.left = kEDRAMSize >> 2; + clear_rect.top = 0; + clear_rect.right = (kEDRAMSize >> 2) << 1; + clear_rect.bottom = 1; + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + command_processor_->SubmitBarriers(); + // ClearUnorderedAccessView takes a shader-visible GPU descriptor and a + // non-shader-visible CPU descriptor. + command_list->D3DClearUnorderedAccessViewUint( + shader_visbile_descriptor_gpu, + provider->OffsetViewDescriptor( + edram_buffer_descriptor_heap_start_, + uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)), + edram_buffer_, clear_value, 1, &clear_rect); + } else { + XELOGE("Failed to get a UAV descriptor for invalidating 32-bit depth"); + } + } +} + uint32_t RenderTargetCache::GetEDRAMBufferSize() const { uint32_t size = 2048 * 5120; if (!command_processor_->IsROVUsedForEDRAM()) { @@ -2215,10 +2331,14 @@ void RenderTargetCache::TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state) { command_processor_->PushTransitionBarrier(edram_buffer_, edram_buffer_state_, new_state); edram_buffer_state_ = new_state; + if (new_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { + edram_buffer_modified_ = false; + } } void RenderTargetCache::CommitEDRAMBufferUAVWrites(bool force) { - if (edram_buffer_modified_ || force) { + if ((edram_buffer_modified_ || force) && + edram_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { command_processor_->PushUAVBarrier(edram_buffer_); } edram_buffer_modified_ = false; diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index f7580d30e..e227a7b60 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -10,6 +10,7 @@ #ifndef XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_ #define XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_ +#include #include #include "xenia/base/cvar.h" @@ -21,6 +22,7 @@ #include "xenia/gpu/xenos.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_api.h" +#include "xenia/ui/d3d12/pools.h" DECLARE_bool(d3d12_16bit_rtv_full_range); @@ -301,6 +303,11 @@ class RenderTargetCache { : DXGI_FORMAT_D24_UNORM_S8_UINT; } + // Returns true if any downloads were submitted to the command processor. + bool InitializeTraceSubmitDownloads(); + void InitializeTraceCompleteDownloads(); + void RestoreEDRAMSnapshot(const void* snapshot); + private: enum class EDRAMLoadStoreMode { kColor32bpp, @@ -673,6 +680,11 @@ class RenderTargetCache { #else std::unordered_map resolve_targets_; #endif + + // For traces only. + ID3D12Resource* edram_snapshot_download_buffer_ = nullptr; + std::unique_ptr edram_snapshot_restore_pool_ = + nullptr; }; } // namespace d3d12 diff --git a/src/xenia/gpu/null/null_command_processor.cc b/src/xenia/gpu/null/null_command_processor.cc index 07b970bdf..ba5500acd 100644 --- a/src/xenia/gpu/null/null_command_processor.cc +++ b/src/xenia/gpu/null/null_command_processor.cc @@ -21,6 +21,8 @@ NullCommandProcessor::~NullCommandProcessor() = default; void NullCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) {} +void NullCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {} + bool NullCommandProcessor::SetupContext() { return CommandProcessor::SetupContext(); } diff --git a/src/xenia/gpu/null/null_command_processor.h b/src/xenia/gpu/null/null_command_processor.h index 3e04332f9..916668269 100644 --- a/src/xenia/gpu/null/null_command_processor.h +++ b/src/xenia/gpu/null/null_command_processor.h @@ -27,6 +27,8 @@ class NullCommandProcessor : public CommandProcessor { void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; + void RestoreEDRAMSnapshot(const void* snapshot) override; + private: bool SetupContext() override; void ShutdownContext() override; diff --git a/src/xenia/gpu/trace_player.cc b/src/xenia/gpu/trace_player.cc index 1e86e8258..f2579c72d 100644 --- a/src/xenia/gpu/trace_player.cc +++ b/src/xenia/gpu/trace_player.cc @@ -32,7 +32,7 @@ TracePlayer::TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system) playback_event_ = xe::threading::Event::CreateAutoResetEvent(false); } -TracePlayer::~TracePlayer() = default; +TracePlayer::~TracePlayer() { delete[] edram_snapshot_; } const TraceReader::Frame* TracePlayer::current_frame() const { if (current_frame_index_ >= frame_count()) { @@ -186,6 +186,19 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, trace_ptr += cmd->encoded_length; break; } + case TraceCommandType::kEDRAMSnapshot: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + const size_t kEDRAMSize = 10 * 1024 * 1024; + if (!edram_snapshot_) { + edram_snapshot_ = new uint8_t[kEDRAMSize]; + } + DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length, + edram_snapshot_, kEDRAMSize); + trace_ptr += cmd->encoded_length; + command_processor->RestoreEDRAMSnapshot(edram_snapshot_); + break; + } case TraceCommandType::kEvent: { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd); diff --git a/src/xenia/gpu/trace_player.h b/src/xenia/gpu/trace_player.h index 0c3c6571a..897faaff7 100644 --- a/src/xenia/gpu/trace_player.h +++ b/src/xenia/gpu/trace_player.h @@ -61,6 +61,7 @@ class TracePlayer : public TraceReader { bool playing_trace_ = false; std::atomic playback_percent_ = {0}; std::unique_ptr playback_event_; + uint8_t* edram_snapshot_ = nullptr; }; } // namespace gpu diff --git a/src/xenia/gpu/trace_protocol.h b/src/xenia/gpu/trace_protocol.h index afc6b7e36..147895cc7 100644 --- a/src/xenia/gpu/trace_protocol.h +++ b/src/xenia/gpu/trace_protocol.h @@ -51,6 +51,7 @@ enum class TraceCommandType : uint32_t { kPacketEnd, kMemoryRead, kMemoryWrite, + kEDRAMSnapshot, kEvent, }; @@ -110,6 +111,18 @@ struct MemoryCommand { uint32_t decoded_length; }; +// Represents a full 10 MB snapshot of EDRAM contents, for trace initialization +// (since replaying the trace will reconstruct its state at any point later) as +// a sequence of tiles with row-major samples (2x multisampling as 1x2 samples, +// 4x as 2x2 samples). +struct EDRAMSnapshotCommand { + TraceCommandType type; + // Encoding format of the data in the trace file. + MemoryEncodingFormat encoding_format; + // Number of bytes the data occupies in the trace file in its encoded form. + uint32_t encoded_length; +}; + // Represents a GPU event of EventCommand::Type. struct EventCommand { TraceCommandType type; diff --git a/src/xenia/gpu/trace_reader.cc b/src/xenia/gpu/trace_reader.cc index caa4d1e26..1555ac152 100644 --- a/src/xenia/gpu/trace_reader.cc +++ b/src/xenia/gpu/trace_reader.cc @@ -190,6 +190,11 @@ void TraceReader::ParseTrace() { trace_ptr += sizeof(*cmd) + cmd->encoded_length; break; } + case TraceCommandType::kEDRAMSnapshot: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd) + cmd->encoded_length; + break; + } case TraceCommandType::kEvent: { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd); diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index d1fed5b72..3c75df03d 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -372,6 +372,12 @@ void TraceViewer::DrawPacketDisassemblerUI() { // ImGui::BulletText("MemoryWrite"); break; } + case TraceCommandType::kEDRAMSnapshot: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd) + cmd->encoded_length; + // ImGui::BulletText("EDRAMSnapshot"); + break; + } case TraceCommandType::kEvent: { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd); diff --git a/src/xenia/gpu/trace_writer.cc b/src/xenia/gpu/trace_writer.cc index 2c2a1a28f..a4318965d 100644 --- a/src/xenia/gpu/trace_writer.cc +++ b/src/xenia/gpu/trace_writer.cc @@ -229,6 +229,37 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, } } +void TraceWriter::WriteEDRAMSnapshot(const void* snapshot) { + const uint32_t kEDRAMSize = 10 * 1024 * 1024; + EDRAMSnapshotCommand cmd; + cmd.type = TraceCommandType::kEDRAMSnapshot; + if (compress_output_) { + // Write the header now so we reserve space in the buffer. + long header_position = std::ftell(file_); + cmd.encoding_format = MemoryEncodingFormat::kSnappy; + fwrite(&cmd, 1, sizeof(cmd), file_); + + // Stream the content right to the buffer. + snappy::ByteArraySource snappy_source( + reinterpret_cast(snapshot), kEDRAMSize); + SnappySink snappy_sink(file_); + cmd.encoded_length = + static_cast(snappy::Compress(&snappy_source, &snappy_sink)); + + // Seek back and overwrite the header with our final size. + std::fseek(file_, header_position, SEEK_SET); + fwrite(&cmd, 1, sizeof(cmd), file_); + std::fseek(file_, header_position + sizeof(cmd) + cmd.encoded_length, + SEEK_SET); + } else { + // Uncompressed - write buffer directly to the file. + cmd.encoding_format = MemoryEncodingFormat::kNone; + cmd.encoded_length = kEDRAMSize; + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(snapshot, 1, kEDRAMSize, file_); + } +} + void TraceWriter::WriteEvent(EventCommand::Type event_type) { if (!file_) { return; diff --git a/src/xenia/gpu/trace_writer.h b/src/xenia/gpu/trace_writer.h index 206f69a2b..792fd9554 100644 --- a/src/xenia/gpu/trace_writer.h +++ b/src/xenia/gpu/trace_writer.h @@ -42,6 +42,7 @@ class TraceWriter { void WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length); void WriteMemoryWrite(uint32_t base_ptr, size_t length, const void* host_ptr = nullptr); + void WriteEDRAMSnapshot(const void* snapshot); void WriteEvent(EventCommand::Type event_type); private: diff --git a/src/xenia/gpu/vk/vulkan_command_processor.cc b/src/xenia/gpu/vk/vulkan_command_processor.cc index 57f45cbf6..62bdb5677 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.cc +++ b/src/xenia/gpu/vk/vulkan_command_processor.cc @@ -21,6 +21,8 @@ VulkanCommandProcessor::~VulkanCommandProcessor() = default; void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) {} +void VulkanCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {} + bool VulkanCommandProcessor::SetupContext() { return true; } void VulkanCommandProcessor::ShutdownContext() {} diff --git a/src/xenia/gpu/vk/vulkan_command_processor.h b/src/xenia/gpu/vk/vulkan_command_processor.h index 1203bd539..8157c3590 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.h +++ b/src/xenia/gpu/vk/vulkan_command_processor.h @@ -26,6 +26,8 @@ class VulkanCommandProcessor : public CommandProcessor { void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; + void RestoreEDRAMSnapshot(const void* snapshot) override; + protected: bool SetupContext() override; void ShutdownContext() override; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index c26b592df..336e08a11 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -51,6 +51,8 @@ void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) { void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) {} +void VulkanCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {} + void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); cache_clear_requested_ = true; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 56bc1db8e..94f7ae401 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -52,6 +52,7 @@ class VulkanCommandProcessor : public CommandProcessor { void RequestFrameTrace(const std::wstring& root_path) override; void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; + void RestoreEDRAMSnapshot(const void* snapshot) override; void ClearCaches() override; RenderCache* render_cache() { return render_cache_.get(); }