From 4623b41023bad9df63aaf0bc2809a90e46898b1c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 23 Oct 2019 23:33:50 +0300 Subject: [PATCH] [D3D12] Trace guest memory operations --- src/xenia/gpu/command_processor.cc | 5 + src/xenia/gpu/command_processor.h | 3 + .../gpu/d3d12/d3d12_command_processor.cc | 20 ++- src/xenia/gpu/d3d12/d3d12_command_processor.h | 3 + src/xenia/gpu/d3d12/primitive_converter.cc | 17 +- src/xenia/gpu/d3d12/primitive_converter.h | 7 +- src/xenia/gpu/d3d12/shared_memory.cc | 166 +++++++++++++++++- src/xenia/gpu/d3d12/shared_memory.h | 18 +- src/xenia/gpu/null/null_command_processor.cc | 4 + src/xenia/gpu/null/null_command_processor.h | 3 + src/xenia/gpu/trace_player.cc | 20 +-- src/xenia/gpu/trace_writer.cc | 17 +- src/xenia/gpu/trace_writer.h | 5 +- src/xenia/gpu/vk/vulkan_command_processor.cc | 4 + src/xenia/gpu/vk/vulkan_command_processor.h | 3 + .../gpu/vulkan/vulkan_command_processor.cc | 4 + .../gpu/vulkan/vulkan_command_processor.h | 3 + 17 files changed, 269 insertions(+), 33 deletions(-) diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index acb991fb5..51dcc9ac0 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -119,6 +119,8 @@ void CommandProcessor::EndTracing() { return; } assert_true(trace_state_ == TraceState::kStreaming); + FinalizeTrace(); + trace_state_ = TraceState::kDisabled; trace_writer_.Close(); } @@ -437,6 +439,7 @@ uint32_t CommandProcessor::ExecutePrimaryBuffer(uint32_t read_index, auto file_name = xe::format_string(L"%8X_stream.xtr", title_id); auto path = trace_stream_path_ + file_name; trace_writer_.Open(path, title_id); + InitializeTrace(); } // Adjust pointer base. @@ -738,6 +741,7 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) { trace_writer_.WriteEvent(EventCommand::Type::kSwap); trace_writer_.Flush(); if (trace_state_ == TraceState::kSingleFrame) { + FinalizeTrace(); trace_state_ = TraceState::kDisabled; trace_writer_.Close(); } @@ -747,6 +751,7 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) { auto file_name = xe::format_string(L"%8X_%u.xtr", title_id, counter_ - 1); auto path = trace_frame_path_ + file_name; trace_writer_.Open(path, title_id); + InitializeTrace(); } } diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index dad797b05..c5d32ec48 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -237,6 +237,9 @@ class CommandProcessor { IndexBufferInfo* index_buffer_info) = 0; virtual bool IssueCopy() = 0; + virtual void InitializeTrace() = 0; + virtual void FinalizeTrace() = 0; + Memory* memory_ = nullptr; kernel::KernelState* kernel_state_ = nullptr; GraphicsSystem* graphics_system_ = nullptr; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index ce0a136c8..4ff7bfba7 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -672,7 +672,8 @@ bool D3D12CommandProcessor::SetupContext() { sampler_heap_pool_ = std::make_unique( context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048); - shared_memory_ = std::make_unique(this, memory_); + shared_memory_ = + std::make_unique(this, memory_, &trace_writer_); if (!shared_memory_->Initialize()) { XELOGE("Failed to initialize shared memory"); return false; @@ -700,8 +701,8 @@ bool D3D12CommandProcessor::SetupContext() { return false; } - primitive_converter_ = - std::make_unique(this, register_file_, memory_); + primitive_converter_ = std::make_unique( + this, register_file_, memory_, &trace_writer_); if (!primitive_converter_->Initialize()) { XELOGE("Failed to initialize the geometric primitive converter"); return false; @@ -1655,6 +1656,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } +void D3D12CommandProcessor::InitializeTrace() { + BeginFrame(); + bool anySubmitted = false; + anySubmitted |= shared_memory_->InitializeTraceSubmitDownloads(); + if (anySubmitted) { + EndFrame(); + GetD3D12Context()->AwaitAllFramesCompletion(); + shared_memory_->InitializeTraceCompleteDownloads(); + } +} + +void D3D12CommandProcessor::FinalizeTrace() {} + bool D3D12CommandProcessor::IssueCopy() { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index c5833c7bc..57133ca7d 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -158,6 +158,9 @@ class D3D12CommandProcessor : public CommandProcessor { IndexBufferInfo* index_buffer_info) override; bool IssueCopy() override; + void InitializeTrace() override; + void FinalizeTrace() override; + private: enum RootParameter : UINT { // These are always present. diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 9ddeca74f..80db854de 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -41,10 +41,12 @@ constexpr uint32_t PrimitiveConverter::kStaticIBTotalCount; PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor* command_processor, RegisterFile* register_file, - Memory* memory) + Memory* memory, + TraceWriter* trace_writer) : command_processor_(command_processor), register_file_(register_file), - memory_(memory) { + memory_(memory), + trace_writer_(trace_writer) { system_page_size_ = uint32_t(memory::page_size()); } @@ -248,6 +250,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( address &= index_32bit ? 0x1FFFFFFC : 0x1FFFFFFE; uint32_t index_size = index_32bit ? sizeof(uint32_t) : sizeof(uint16_t); + uint32_t index_buffer_size = index_size * index_count; uint32_t address_last = address + index_size * (index_count - 1); // Create the cache entry, currently only for the key. @@ -305,6 +308,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( if (source_type == PrimitiveType::kTriangleFan) { // Triangle fans are not supported by Direct3D 12 at all. conversion_needed = true; + trace_writer_->WriteMemoryRead(address, index_buffer_size); if (reset) { uint32_t current_fan_index_count = 0; for (uint32_t i = 0; i < index_count; ++i) { @@ -327,6 +331,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( // Check if the restart index is used at all in this buffer because reading // vertices from a default heap is faster than from an upload heap. conversion_needed = false; + trace_writer_->WriteMemoryRead(address, index_buffer_size); #if XE_ARCH_AMD64 // Will use SIMD to copy 16-byte blocks using _mm_or_si128. simd = true; @@ -412,6 +417,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( #endif // XE_ARCH_AMD64 } else if (source_type == PrimitiveType::kLineLoop) { conversion_needed = true; + trace_writer_->WriteMemoryRead(address, index_buffer_size); if (reset) { reset_actually_used = false; uint32_t current_strip_index_count = 0; @@ -437,6 +443,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( } } else if (source_type == PrimitiveType::kQuadList) { conversion_needed = true; + trace_writer_->WriteMemoryRead(address, index_buffer_size); converted_index_count = (index_count >> 2) * 6; } converted_indices.converted_index_count = converted_index_count; @@ -739,6 +746,12 @@ D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer( return D3D12_GPU_VIRTUAL_ADDRESS(0); } +void PrimitiveConverter::InitializeTrace() { + // WriteMemoryRead must not be skipped. + converted_indices_cache_.clear(); + memory_regions_used_ = 0; +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/primitive_converter.h b/src/xenia/gpu/d3d12/primitive_converter.h index 12812d3e1..32cc0f8de 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.h +++ b/src/xenia/gpu/d3d12/primitive_converter.h @@ -15,6 +15,7 @@ #include #include "xenia/gpu/register_file.h" +#include "xenia/gpu/trace_writer.h" #include "xenia/gpu/xenos.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_context.h" @@ -37,7 +38,8 @@ class D3D12CommandProcessor; class PrimitiveConverter { public: PrimitiveConverter(D3D12CommandProcessor* command_processor, - RegisterFile* register_file, Memory* memory); + RegisterFile* register_file, Memory* memory, + TraceWriter* trace_writer); ~PrimitiveConverter(); bool Initialize(); @@ -80,6 +82,8 @@ class PrimitiveConverter { PrimitiveType source_type, uint32_t index_count, uint32_t& index_count_out) const; + void InitializeTrace(); + private: // simd_offset is source address & 15 - if SIMD is used, the source and the // target must have the same alignment within one register. 0 is optimal when @@ -98,6 +102,7 @@ class PrimitiveConverter { D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; Memory* memory_; + TraceWriter* trace_writer_; std::unique_ptr buffer_pool_ = nullptr; diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 11d2c6e69..08caf89fd 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -11,6 +11,7 @@ #include #include +#include #include "xenia/base/assert.h" #include "xenia/base/cvar.h" @@ -42,8 +43,10 @@ constexpr uint32_t SharedMemory::kWatchRangePoolSize; constexpr uint32_t SharedMemory::kWatchNodePoolSize; SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor, - Memory* memory) - : command_processor_(command_processor), memory_(memory) { + Memory* memory, TraceWriter* trace_writer) + : command_processor_(command_processor), + memory_(memory), + trace_writer_(trace_writer) { page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size())); page_count_ = kBufferSize >> page_size_log2_; uint32_t page_bitmap_length = page_count_ >> 6; @@ -133,10 +136,14 @@ bool SharedMemory::Initialize() { physical_write_watch_handle_ = memory_->RegisterPhysicalWriteWatch(MemoryWriteCallbackThunk, this); + ResetTraceGPUWrittenBuffer(); + return true; } void SharedMemory::Shutdown() { + ResetTraceGPUWrittenBuffer(); + // TODO(Triang3l): Do something in case any watches are still registered. if (physical_write_watch_handle_ != nullptr) { @@ -365,6 +372,8 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { for (auto upload_range : upload_ranges_) { uint32_t upload_range_start = upload_range.first; uint32_t upload_range_length = upload_range.second; + trace_writer_->WriteMemoryRead(upload_range_start << page_size_log2_, + upload_range_length << page_size_log2_); while (upload_range_length != 0) { ID3D12Resource* upload_buffer; uint32_t upload_buffer_offset, upload_buffer_size; @@ -376,7 +385,6 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { return false; } uint32_t upload_buffer_pages = upload_buffer_size >> page_size_log2_; - // No mutex holding here! MakeRangeValid(upload_range_start, upload_buffer_pages, false); std::memcpy( upload_buffer_mapping, @@ -441,7 +449,6 @@ void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) { // Mark the range as valid (so pages are not reuploaded until modified by the // CPU) and watch it so the CPU can reuse it and this will be caught. - // No mutex holding here! MakeRangeValid(page_first, page_last - page_first + 1, true); } @@ -654,6 +661,157 @@ void SharedMemory::WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle) { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } +bool SharedMemory::InitializeTraceSubmitDownloads() { + // Invalidate the entire memory CPU->GPU memory copy so all the history + // doesn't have to be written into every frame trace, and collect the list of + // ranges with data modified on the GPU. + ResetTraceGPUWrittenBuffer(); + uint32_t gpu_written_page_count = 0; + + { + auto global_lock = global_critical_region_.Acquire(); + uint32_t fire_watches_range_start = UINT32_MAX; + uint32_t gpu_written_range_start = UINT32_MAX; + for (uint32_t i = 0; i * 2 < valid_and_gpu_written_pages_.size(); ++i) { + uint64_t previously_valid_block = valid_and_gpu_written_pages_[i * 2]; + uint64_t gpu_written_block = valid_and_gpu_written_pages_[i * 2 + 1]; + valid_and_gpu_written_pages_[i * 2] = gpu_written_block; + + // Fire watches on the invalidated pages. + uint64_t fire_watches_block = previously_valid_block & ~gpu_written_block; + uint64_t fire_watches_break_block = ~fire_watches_block; + while (true) { + uint32_t fire_watches_block_page; + if (!xe::bit_scan_forward(fire_watches_range_start == UINT32_MAX + ? fire_watches_block + : fire_watches_break_block, + &fire_watches_block_page)) { + break; + } + uint32_t fire_watches_page = (i << 6) + fire_watches_block_page; + if (fire_watches_range_start == UINT32_MAX) { + fire_watches_range_start = fire_watches_page; + } else { + FireWatches(fire_watches_range_start, fire_watches_page - 1, false); + fire_watches_range_start = UINT32_MAX; + } + uint64_t fire_watches_block_mask = + ~((1ull << fire_watches_block_page) - 1); + fire_watches_block &= fire_watches_block_mask; + fire_watches_break_block &= fire_watches_block_mask; + } + + // Add to the GPU-written ranges. + uint64_t gpu_written_break_block = ~gpu_written_block; + while (true) { + uint32_t gpu_written_block_page; + if (!xe::bit_scan_forward(gpu_written_range_start == UINT32_MAX + ? gpu_written_block + : gpu_written_break_block, + &gpu_written_block_page)) { + break; + } + uint32_t gpu_written_page = (i << 6) + gpu_written_block_page; + if (gpu_written_range_start == UINT32_MAX) { + gpu_written_range_start = gpu_written_page; + } else { + uint32_t gpu_written_range_length = + gpu_written_page - gpu_written_range_start; + trace_gpu_written_ranges_.push_back( + std::make_pair(gpu_written_range_start << page_size_log2_, + gpu_written_range_length << page_size_log2_)); + gpu_written_page_count += gpu_written_range_length; + gpu_written_range_start = UINT32_MAX; + } + uint64_t gpu_written_block_mask = + ~((1ull << gpu_written_block_page) - 1); + gpu_written_block &= gpu_written_block_mask; + gpu_written_break_block &= gpu_written_block_mask; + } + } + if (fire_watches_range_start != UINT32_MAX) { + FireWatches(fire_watches_range_start, page_count_ - 1, false); + } + if (gpu_written_range_start != UINT32_MAX) { + uint32_t gpu_written_range_length = page_count_ - gpu_written_range_start; + trace_gpu_written_ranges_.push_back( + std::make_pair(gpu_written_range_start << page_size_log2_, + gpu_written_range_length << page_size_log2_)); + gpu_written_page_count += gpu_written_range_length; + } + } + + // Request downloading of GPU-written memory. + if (!gpu_written_page_count) { + return false; + } + D3D12_RESOURCE_DESC gpu_written_buffer_desc; + ui::d3d12::util::FillBufferResourceDesc( + gpu_written_buffer_desc, gpu_written_page_count << page_size_log2_, + D3D12_RESOURCE_FLAG_NONE); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); + if (FAILED(device->CreateCommittedResource( + &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, + &gpu_written_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&trace_gpu_written_buffer_)))) { + XELOGE( + "Failed to create a %u KB GPU-written memory download buffer for frame " + "tracing", + gpu_written_page_count << page_size_log2_ >> 10); + ResetTraceGPUWrittenBuffer(); + return false; + } + auto command_list = command_processor_->GetDeferredCommandList(); + UseAsCopySource(); + command_processor_->SubmitBarriers(); + uint32_t gpu_written_buffer_offset = 0; + for (auto& gpu_written_submit_range : trace_gpu_written_ranges_) { + // For cases like resolution scale, when the data may not be actually + // written, just marked as valid. + if (!MakeTilesResident(gpu_written_submit_range.first, + gpu_written_submit_range.second)) { + gpu_written_submit_range.second = 0; + continue; + } + command_list->D3DCopyBufferRegion( + trace_gpu_written_buffer_, gpu_written_buffer_offset, buffer_, + gpu_written_submit_range.first, gpu_written_submit_range.second); + gpu_written_buffer_offset += gpu_written_submit_range.second; + } + return true; +} + +void SharedMemory::InitializeTraceCompleteDownloads() { + if (!trace_gpu_written_buffer_) { + return; + } + void* download_mapping; + if (SUCCEEDED( + trace_gpu_written_buffer_->Map(0, nullptr, &download_mapping))) { + uint32_t gpu_written_buffer_offset = 0; + for (auto gpu_written_submit_range : trace_gpu_written_ranges_) { + trace_writer_->WriteMemoryWrite( + gpu_written_submit_range.first, gpu_written_submit_range.second, + reinterpret_cast(download_mapping) + + gpu_written_buffer_offset); + } + D3D12_RANGE download_write_range = {}; + trace_gpu_written_buffer_->Unmap(0, &download_write_range); + } else { + XELOGE( + "Failed to map the GPU-written memory download buffer for frame " + "tracing"); + } + ResetTraceGPUWrittenBuffer(); +} + +void SharedMemory::ResetTraceGPUWrittenBuffer() { + trace_gpu_written_ranges_.clear(); + trace_gpu_written_ranges_.shrink_to_fit(); + ui::d3d12::util::ReleaseAndNull(trace_gpu_written_buffer_); +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 1b2d03834..60f689816 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -12,9 +12,11 @@ #include #include +#include #include #include "xenia/base/mutex.h" +#include "xenia/gpu/trace_writer.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_api.h" #include "xenia/ui/d3d12/pools.h" @@ -30,7 +32,8 @@ class D3D12CommandProcessor; // system page size granularity. class SharedMemory { public: - SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory); + SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory, + TraceWriter* trace_writer); ~SharedMemory(); bool Initialize(); @@ -124,6 +127,10 @@ class SharedMemory { void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); + // Returns true if any downloads were submitted to the command processor. + bool InitializeTraceSubmitDownloads(); + void InitializeTraceCompleteDownloads(); + private: bool AreTiledResourcesUsed() const; @@ -132,8 +139,8 @@ class SharedMemory { bool written_by_gpu); D3D12CommandProcessor* command_processor_; - Memory* memory_; + TraceWriter* trace_writer_; // The 512 MB tiled buffer. static constexpr uint32_t kBufferSizeLog2 = 29; @@ -268,6 +275,13 @@ class SharedMemory { std::unique_ptr upload_buffer_pool_ = nullptr; void TransitionBuffer(D3D12_RESOURCE_STATES new_state); + + // GPU-written memory downloading for traces. + // Start page, length in pages. + std::vector> trace_gpu_written_ranges_; + // Created temporarily, only for downloading. + ID3D12Resource* trace_gpu_written_buffer_ = nullptr; + void ResetTraceGPUWrittenBuffer(); }; } // namespace d3d12 diff --git a/src/xenia/gpu/null/null_command_processor.cc b/src/xenia/gpu/null/null_command_processor.cc index 86efc07c8..f90486217 100644 --- a/src/xenia/gpu/null/null_command_processor.cc +++ b/src/xenia/gpu/null/null_command_processor.cc @@ -45,6 +45,10 @@ bool NullCommandProcessor::IssueDraw(PrimitiveType prim_type, bool NullCommandProcessor::IssueCopy() { return true; } +void NullCommandProcessor::InitializeTrace() {} + +void NullCommandProcessor::FinalizeTrace() {} + } // namespace null } // namespace gpu } // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/null/null_command_processor.h b/src/xenia/gpu/null/null_command_processor.h index 3eef6108d..02a956fcf 100644 --- a/src/xenia/gpu/null/null_command_processor.h +++ b/src/xenia/gpu/null/null_command_processor.h @@ -39,6 +39,9 @@ class NullCommandProcessor : public CommandProcessor { bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) override; bool IssueCopy() override; + + void InitializeTrace() override; + void FinalizeTrace() override; }; } // namespace null diff --git a/src/xenia/gpu/trace_player.cc b/src/xenia/gpu/trace_player.cc index b79b49df2..dfc918c6b 100644 --- a/src/xenia/gpu/trace_player.cc +++ b/src/xenia/gpu/trace_player.cc @@ -21,11 +21,11 @@ TracePlayer::TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system) graphics_system_(graphics_system), current_frame_index_(0), current_command_index_(-1) { - // Need to allocate all of physical memory so that we can write to it - // during playback. - graphics_system_->memory() - ->LookupHeapByType(true, 4096) - ->AllocFixed(0, 0x1FFFFFFF, 4096, + // Need to allocate all of physical memory so that we can write to it during + // playback. The 64 KB page heap is larger, covers the entire physical memory, + // so it is used instead of the 4 KB page one. + auto heap = graphics_system_->memory()->LookupHeapByType(true, 64 * 1024); + heap->AllocFixed(heap->heap_base(), heap->heap_size(), heap->page_size(), kMemoryAllocationReserve | kMemoryAllocationCommit, kMemoryProtectRead | kMemoryProtectWrite); @@ -167,7 +167,8 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, } break; } - case TraceCommandType::kMemoryRead: { + case TraceCommandType::kMemoryRead: + case TraceCommandType::kMemoryWrite: { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd); DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length, @@ -176,13 +177,6 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, trace_ptr += cmd->encoded_length; break; } - case TraceCommandType::kMemoryWrite: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - // ? - trace_ptr += cmd->encoded_length; - break; - } case TraceCommandType::kEvent: { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd); diff --git a/src/xenia/gpu/trace_writer.cc b/src/xenia/gpu/trace_writer.cc index e41f82643..a2d61c239 100644 --- a/src/xenia/gpu/trace_writer.cc +++ b/src/xenia/gpu/trace_writer.cc @@ -168,11 +168,13 @@ void TraceWriter::WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length) { } } -void TraceWriter::WriteMemoryWrite(uint32_t base_ptr, size_t length) { +void TraceWriter::WriteMemoryWrite(uint32_t base_ptr, size_t length, + const void* host_ptr) { if (!file_) { return; } - WriteMemoryCommand(TraceCommandType::kMemoryWrite, base_ptr, length); + WriteMemoryCommand(TraceCommandType::kMemoryWrite, base_ptr, length, + host_ptr); } class SnappySink : public snappy::Sink { @@ -188,13 +190,17 @@ class SnappySink : public snappy::Sink { }; void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, - size_t length) { + size_t length, const void* host_ptr) { MemoryCommand cmd; cmd.type = type; cmd.base_ptr = base_ptr; cmd.encoding_format = MemoryEncodingFormat::kNone; cmd.encoded_length = cmd.decoded_length = static_cast(length); + if (!host_ptr) { + host_ptr = membase_ + cmd.base_ptr; + } + bool compress = compress_output_ && length > compression_threshold_; if (compress) { // Write the header now so we reserve space in the buffer. @@ -204,8 +210,7 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, // Stream the content right to the buffer. snappy::ByteArraySource snappy_source( - reinterpret_cast(membase_ + cmd.base_ptr), - cmd.decoded_length); + reinterpret_cast(host_ptr), cmd.decoded_length); SnappySink snappy_sink(file_); cmd.encoded_length = static_cast(snappy::Compress(&snappy_source, &snappy_sink)); @@ -219,7 +224,7 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, // Uncompressed - write buffer directly to the file. cmd.encoding_format = MemoryEncodingFormat::kNone; fwrite(&cmd, 1, sizeof(cmd), file_); - fwrite(membase_ + cmd.base_ptr, 1, cmd.decoded_length, file_); + fwrite(host_ptr, 1, cmd.decoded_length, file_); } } diff --git a/src/xenia/gpu/trace_writer.h b/src/xenia/gpu/trace_writer.h index 474d846f5..d7379df0f 100644 --- a/src/xenia/gpu/trace_writer.h +++ b/src/xenia/gpu/trace_writer.h @@ -39,12 +39,13 @@ class TraceWriter { void WriteMemoryRead(uint32_t base_ptr, size_t length); void WriteMemoryReadCached(uint32_t base_ptr, size_t length); void WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length); - void WriteMemoryWrite(uint32_t base_ptr, size_t length); + void WriteMemoryWrite(uint32_t base_ptr, size_t length, + const void* host_ptr = nullptr); void WriteEvent(EventCommand::Type event_type); private: void WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, - size_t length); + size_t length, const void* host_ptr = nullptr); std::set cached_memory_reads_; uint8_t* membase_; diff --git a/src/xenia/gpu/vk/vulkan_command_processor.cc b/src/xenia/gpu/vk/vulkan_command_processor.cc index c2817c139..d86199508 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.cc +++ b/src/xenia/gpu/vk/vulkan_command_processor.cc @@ -41,6 +41,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, bool VulkanCommandProcessor::IssueCopy() { return true; } +void VulkanCommandProcessor::InitializeTrace() {} + +void VulkanCommandProcessor::FinalizeTrace() {} + } // namespace vk } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vk/vulkan_command_processor.h b/src/xenia/gpu/vk/vulkan_command_processor.h index e2f4d3b8d..fc80bea9c 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.h +++ b/src/xenia/gpu/vk/vulkan_command_processor.h @@ -38,6 +38,9 @@ class VulkanCommandProcessor : public CommandProcessor { bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) override; bool IssueCopy() override; + + void InitializeTrace() override; + void FinalizeTrace() override; }; } // namespace vk diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 50a3094a4..821ed5083 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -1322,6 +1322,10 @@ bool VulkanCommandProcessor::IssueCopy() { return true; } +void VulkanCommandProcessor::InitializeTrace() {} + +void VulkanCommandProcessor::FinalizeTrace() {} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 8cd1710b6..d7f936e4e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -94,6 +94,9 @@ class VulkanCommandProcessor : public CommandProcessor { VulkanShader* pixel_shader); bool IssueCopy() override; + void InitializeTrace() override; + void FinalizeTrace() override; + xe::ui::vulkan::VulkanDevice* device_ = nullptr; // front buffer / back buffer memory