diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index acb991fb5..51dcc9ac0 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -119,6 +119,8 @@ void CommandProcessor::EndTracing() { return; } assert_true(trace_state_ == TraceState::kStreaming); + FinalizeTrace(); + trace_state_ = TraceState::kDisabled; trace_writer_.Close(); } @@ -437,6 +439,7 @@ uint32_t CommandProcessor::ExecutePrimaryBuffer(uint32_t read_index, auto file_name = xe::format_string(L"%8X_stream.xtr", title_id); auto path = trace_stream_path_ + file_name; trace_writer_.Open(path, title_id); + InitializeTrace(); } // Adjust pointer base. @@ -738,6 +741,7 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) { trace_writer_.WriteEvent(EventCommand::Type::kSwap); trace_writer_.Flush(); if (trace_state_ == TraceState::kSingleFrame) { + FinalizeTrace(); trace_state_ = TraceState::kDisabled; trace_writer_.Close(); } @@ -747,6 +751,7 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) { auto file_name = xe::format_string(L"%8X_%u.xtr", title_id, counter_ - 1); auto path = trace_frame_path_ + file_name; trace_writer_.Open(path, title_id); + InitializeTrace(); } } diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index dad797b05..2b1899bd2 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -134,6 +134,8 @@ class CommandProcessor { virtual void BeginTracing(const std::wstring& root_path); virtual void EndTracing(); + virtual void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) = 0; + void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); @@ -237,6 +239,9 @@ class CommandProcessor { IndexBufferInfo* index_buffer_info) = 0; virtual bool IssueCopy() = 0; + virtual void InitializeTrace() = 0; + virtual void FinalizeTrace() = 0; + Memory* memory_ = nullptr; kernel::KernelState* kernel_state_ = nullptr; GraphicsSystem* graphics_system_ = nullptr; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index ce0a136c8..330bfe020 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -80,6 +80,12 @@ void D3D12CommandProcessor::RequestFrameTrace(const std::wstring& root_path) { CommandProcessor::RequestFrameTrace(root_path); } +void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, + uint32_t length) { + shared_memory_->MemoryWriteCallback(base_ptr, length, true); + primitive_converter_->MemoryWriteCallback(base_ptr, length, true); +} + bool D3D12CommandProcessor::IsROVUsedForEDRAM() const { if (!cvars::d3d12_edram_rov) { return false; @@ -643,6 +649,56 @@ std::wstring D3D12CommandProcessor::GetWindowTitleText() const { } } +std::unique_ptr D3D12CommandProcessor::Capture() { + ID3D12Resource* readback_buffer = + RequestReadbackBuffer(uint32_t(swap_texture_copy_size_)); + if (!readback_buffer) { + return nullptr; + } + BeginFrame(); + PushTransitionBarrier(swap_texture_, + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + D3D12_RESOURCE_STATE_COPY_SOURCE); + SubmitBarriers(); + D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; + location_source.pResource = swap_texture_; + location_source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + location_source.SubresourceIndex = 0; + location_dest.pResource = readback_buffer; + location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + location_dest.PlacedFootprint = swap_texture_copy_footprint_; + deferred_command_list_->CopyTexture(location_dest, location_source); + PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_COPY_SOURCE, + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + EndFrame(); + GetD3D12Context()->AwaitAllFramesCompletion(); + D3D12_RANGE readback_range; + readback_range.Begin = swap_texture_copy_footprint_.Offset; + readback_range.End = swap_texture_copy_size_; + void* readback_mapping; + if (FAILED(readback_buffer->Map(0, &readback_range, &readback_mapping))) { + return nullptr; + } + std::unique_ptr raw_image(new xe::ui::RawImage()); + auto swap_texture_size = GetSwapTextureSize(); + raw_image->width = swap_texture_size.first; + raw_image->height = swap_texture_size.second; + raw_image->stride = swap_texture_size.first * 4; + raw_image->data.resize(raw_image->stride * swap_texture_size.second); + const uint8_t* readback_source_data = + reinterpret_cast(readback_mapping) + + swap_texture_copy_footprint_.Offset; + for (uint32_t i = 0; i < swap_texture_size.second; ++i) { + std::memcpy(raw_image->data.data() + i * raw_image->stride, + readback_source_data + + i * swap_texture_copy_footprint_.Footprint.RowPitch, + raw_image->stride); + } + D3D12_RANGE readback_written_range = {}; + gamma_ramp_upload_->Unmap(0, &readback_written_range); + return raw_image; +} + bool D3D12CommandProcessor::SetupContext() { if (!CommandProcessor::SetupContext()) { XELOGE("Failed to initialize base command processor context"); @@ -672,7 +728,8 @@ bool D3D12CommandProcessor::SetupContext() { sampler_heap_pool_ = std::make_unique( context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048); - shared_memory_ = std::make_unique(this, memory_); + shared_memory_ = + std::make_unique(this, memory_, &trace_writer_); if (!shared_memory_->Initialize()) { XELOGE("Failed to initialize shared memory"); return false; @@ -686,7 +743,7 @@ bool D3D12CommandProcessor::SetupContext() { } render_target_cache_ = - std::make_unique(this, register_file_); + std::make_unique(this, register_file_, &trace_writer_); if (!render_target_cache_->Initialize(texture_cache_.get())) { XELOGE("Failed to initialize the render target cache"); return false; @@ -700,8 +757,8 @@ bool D3D12CommandProcessor::SetupContext() { return false; } - primitive_converter_ = - std::make_unique(this, register_file_, memory_); + primitive_converter_ = std::make_unique( + this, register_file_, memory_, &trace_writer_); if (!primitive_converter_->Initialize()) { XELOGE("Failed to initialize the geometric primitive converter"); return false; @@ -759,12 +816,9 @@ bool D3D12CommandProcessor::SetupContext() { D3D12_RESOURCE_DESC swap_texture_desc; swap_texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; swap_texture_desc.Alignment = 0; - swap_texture_desc.Width = kSwapTextureWidth; - swap_texture_desc.Height = kSwapTextureHeight; - if (texture_cache_->IsResolutionScale2X()) { - swap_texture_desc.Width *= 2; - swap_texture_desc.Height *= 2; - } + auto swap_texture_size = GetSwapTextureSize(); + swap_texture_desc.Width = swap_texture_size.first; + swap_texture_desc.Height = swap_texture_size.second; swap_texture_desc.DepthOrArraySize = 1; swap_texture_desc.MipLevels = 1; swap_texture_desc.Format = ui::d3d12::D3D12Context::kSwapChainFormat; @@ -780,6 +834,9 @@ bool D3D12CommandProcessor::SetupContext() { XELOGE("Failed to create the command processor front buffer"); return false; } + device->GetCopyableFootprints(&swap_texture_desc, 0, 1, 0, + &swap_texture_copy_footprint_, nullptr, nullptr, + &swap_texture_copy_size_); D3D12_DESCRIPTOR_HEAP_DESC swap_descriptor_heap_desc; swap_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; swap_descriptor_heap_desc.NumDescriptors = 1; @@ -1044,12 +1101,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; SubmitBarriers(); - uint32_t swap_texture_width = kSwapTextureWidth; - uint32_t swap_texture_height = kSwapTextureHeight; - if (texture_cache_->IsResolutionScale2X()) { - swap_texture_width *= 2; - swap_texture_height *= 2; - } + auto swap_texture_size = GetSwapTextureSize(); // Draw the stretching rectangle. deferred_command_list_->D3DOMSetRenderTargets(1, &swap_texture_rtv_, TRUE, @@ -1057,16 +1109,16 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, D3D12_VIEWPORT viewport; viewport.TopLeftX = 0.0f; viewport.TopLeftY = 0.0f; - viewport.Width = float(swap_texture_width); - viewport.Height = float(swap_texture_height); + viewport.Width = float(swap_texture_size.first); + viewport.Height = float(swap_texture_size.second); viewport.MinDepth = 0.0f; viewport.MaxDepth = 0.0f; deferred_command_list_->RSSetViewport(viewport); D3D12_RECT scissor; scissor.left = 0; scissor.top = 0; - scissor.right = swap_texture_width; - scissor.bottom = swap_texture_height; + scissor.right = swap_texture_size.first; + scissor.bottom = swap_texture_size.second; deferred_command_list_->RSSetScissorRect(scissor); D3D12GraphicsSystem* graphics_system = static_cast(graphics_system_); @@ -1084,8 +1136,8 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, // Don't care about graphics state because the frame is ending anyway. { std::lock_guard lock(swap_state_.mutex); - swap_state_.width = swap_texture_width; - swap_state_.height = swap_texture_height; + swap_state_.width = swap_texture_size.first; + swap_state_.height = swap_texture_size.second; swap_state_.front_buffer_texture = reinterpret_cast(swap_texture_srv_descriptor_heap_); } @@ -1655,6 +1707,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } +void D3D12CommandProcessor::InitializeTrace() { + BeginFrame(); + bool anySubmitted = false; + anySubmitted |= shared_memory_->InitializeTraceSubmitDownloads(); + if (anySubmitted) { + EndFrame(); + GetD3D12Context()->AwaitAllFramesCompletion(); + shared_memory_->InitializeTraceCompleteDownloads(); + } +} + +void D3D12CommandProcessor::FinalizeTrace() {} + bool D3D12CommandProcessor::IssueCopy() { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index c5833c7bc..006eb1062 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -15,6 +15,7 @@ #include #include #include +#include #include "xenia/gpu/command_processor.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h" @@ -45,6 +46,8 @@ class D3D12CommandProcessor : public CommandProcessor { void RequestFrameTrace(const std::wstring& root_path) override; + void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; + // Needed by everything that owns transient objects. xe::ui::d3d12::D3D12Context* GetD3D12Context() const { return static_cast(context_.get()); @@ -141,6 +144,8 @@ class D3D12CommandProcessor : public CommandProcessor { // Returns the text to display in the GPU backend name in the window title. std::wstring GetWindowTitleText() const; + std::unique_ptr Capture(); + protected: bool SetupContext() override; void ShutdownContext() override; @@ -158,6 +163,9 @@ class D3D12CommandProcessor : public CommandProcessor { IndexBufferInfo* index_buffer_info) override; bool IssueCopy() override; + void InitializeTrace() override; + void FinalizeTrace() override; + private: enum RootParameter : UINT { // These are always present. @@ -265,7 +273,15 @@ class D3D12CommandProcessor : public CommandProcessor { static constexpr uint32_t kSwapTextureWidth = 1280; static constexpr uint32_t kSwapTextureHeight = 720; + inline std::pair GetSwapTextureSize() const { + if (texture_cache_->IsResolutionScale2X()) { + return std::make_pair(kSwapTextureWidth * 2, kSwapTextureHeight * 2); + } + return std::make_pair(kSwapTextureWidth, kSwapTextureHeight); + } ID3D12Resource* swap_texture_ = nullptr; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT swap_texture_copy_footprint_; + UINT64 swap_texture_copy_size_; ID3D12DescriptorHeap* swap_texture_rtv_descriptor_heap_ = nullptr; D3D12_CPU_DESCRIPTOR_HANDLE swap_texture_rtv_; ID3D12DescriptorHeap* swap_texture_srv_descriptor_heap_ = nullptr; diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index 8e86d6490..30afeac08 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -190,6 +190,15 @@ void D3D12GraphicsSystem::Shutdown() { GraphicsSystem::Shutdown(); } +std::unique_ptr D3D12GraphicsSystem::Capture() { + auto d3d12_command_processor = + static_cast(command_processor()); + if (!d3d12_command_processor) { + return nullptr; + } + return d3d12_command_processor->Capture(); +} + void D3D12GraphicsSystem::AwaitFrontBufferUnused() { if (display_context_ != nullptr) { display_context_->AwaitAllFramesCompletion(); diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.h b/src/xenia/gpu/d3d12/d3d12_graphics_system.h index eb8be31d4..dbd6efa03 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.h +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.h @@ -34,6 +34,8 @@ class D3D12GraphicsSystem : public GraphicsSystem { ui::Window* target_window) override; void Shutdown() override; + std::unique_ptr Capture() override; + void AwaitFrontBufferUnused(); // Draws a texture covering the entire viewport to the render target currently diff --git a/src/xenia/gpu/d3d12/d3d12_trace_dump_main.cc b/src/xenia/gpu/d3d12/d3d12_trace_dump_main.cc new file mode 100644 index 000000000..b10a9c1e5 --- /dev/null +++ b/src/xenia/gpu/d3d12/d3d12_trace_dump_main.cc @@ -0,0 +1,40 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2019 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/base/logging.h" +#include "xenia/base/main.h" +#include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include "xenia/gpu/d3d12/d3d12_graphics_system.h" +#include "xenia/gpu/trace_dump.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +using namespace xe::gpu::xenos; + +class D3D12TraceDump : public TraceDump { + public: + std::unique_ptr CreateGraphicsSystem() override { + return std::unique_ptr(new D3D12GraphicsSystem()); + } +}; + +int trace_dump_main(const std::vector& args) { + D3D12TraceDump trace_dump; + return trace_dump.Main(args); +} + +} // namespace d3d12 +} // namespace gpu +} // namespace xe + +DEFINE_ENTRY_POINT(L"xenia-gpu-d3d12-trace-dump", + xe::gpu::d3d12::trace_dump_main, "some.trace", + "target_trace_file"); diff --git a/src/xenia/gpu/d3d12/premake5.lua b/src/xenia/gpu/d3d12/premake5.lua index a3f28e792..001be5b88 100644 --- a/src/xenia/gpu/d3d12/premake5.lua +++ b/src/xenia/gpu/d3d12/premake5.lua @@ -17,3 +17,47 @@ project("xenia-gpu-d3d12") files({ "shaders/bin/*.h", }) + +group("src") +project("xenia-gpu-d3d12-trace-dump") + uuid("686b859c-0046-44c4-a02c-41fc3fb75698") + kind("ConsoleApp") + language("C++") + links({ + "aes_128", + "capstone", + "dxbc", + "imgui", + "libavcodec", + "libavutil", + "mspack", + "snappy", + "xenia-apu", + "xenia-apu-nop", + "xenia-base", + "xenia-core", + "xenia-cpu", + "xenia-cpu-backend-x64", + "xenia-gpu", + "xenia-gpu-d3d12", + "xenia-hid", + "xenia-hid-nop", + "xenia-kernel", + "xenia-ui", + "xenia-ui-d3d12", + "xenia-vfs", + "xxhash", + }) + files({ + "d3d12_trace_dump_main.cc", + "../../base/main_"..platform_suffix..".cc", + }) + -- Only create the .user file if it doesn't already exist. + local user_file = project_root.."/build/xenia-gpu-d3d12-trace-dump.vcxproj.user" + if not os.isfile(user_file) then + debugdir(project_root) + debugargs({ + "2>&1", + "1>scratch/stdout-trace-dump.txt", + }) + end \ No newline at end of file diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 9ddeca74f..80db854de 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -41,10 +41,12 @@ constexpr uint32_t PrimitiveConverter::kStaticIBTotalCount; PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor* command_processor, RegisterFile* register_file, - Memory* memory) + Memory* memory, + TraceWriter* trace_writer) : command_processor_(command_processor), register_file_(register_file), - memory_(memory) { + memory_(memory), + trace_writer_(trace_writer) { system_page_size_ = uint32_t(memory::page_size()); } @@ -248,6 +250,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( address &= index_32bit ? 0x1FFFFFFC : 0x1FFFFFFE; uint32_t index_size = index_32bit ? sizeof(uint32_t) : sizeof(uint16_t); + uint32_t index_buffer_size = index_size * index_count; uint32_t address_last = address + index_size * (index_count - 1); // Create the cache entry, currently only for the key. @@ -305,6 +308,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( if (source_type == PrimitiveType::kTriangleFan) { // Triangle fans are not supported by Direct3D 12 at all. conversion_needed = true; + trace_writer_->WriteMemoryRead(address, index_buffer_size); if (reset) { uint32_t current_fan_index_count = 0; for (uint32_t i = 0; i < index_count; ++i) { @@ -327,6 +331,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( // Check if the restart index is used at all in this buffer because reading // vertices from a default heap is faster than from an upload heap. conversion_needed = false; + trace_writer_->WriteMemoryRead(address, index_buffer_size); #if XE_ARCH_AMD64 // Will use SIMD to copy 16-byte blocks using _mm_or_si128. simd = true; @@ -412,6 +417,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( #endif // XE_ARCH_AMD64 } else if (source_type == PrimitiveType::kLineLoop) { conversion_needed = true; + trace_writer_->WriteMemoryRead(address, index_buffer_size); if (reset) { reset_actually_used = false; uint32_t current_strip_index_count = 0; @@ -437,6 +443,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( } } else if (source_type == PrimitiveType::kQuadList) { conversion_needed = true; + trace_writer_->WriteMemoryRead(address, index_buffer_size); converted_index_count = (index_count >> 2) * 6; } converted_indices.converted_index_count = converted_index_count; @@ -739,6 +746,12 @@ D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer( return D3D12_GPU_VIRTUAL_ADDRESS(0); } +void PrimitiveConverter::InitializeTrace() { + // WriteMemoryRead must not be skipped. + converted_indices_cache_.clear(); + memory_regions_used_ = 0; +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/primitive_converter.h b/src/xenia/gpu/d3d12/primitive_converter.h index 12812d3e1..ea06a1b32 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.h +++ b/src/xenia/gpu/d3d12/primitive_converter.h @@ -15,6 +15,7 @@ #include #include "xenia/gpu/register_file.h" +#include "xenia/gpu/trace_writer.h" #include "xenia/gpu/xenos.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_context.h" @@ -37,7 +38,8 @@ class D3D12CommandProcessor; class PrimitiveConverter { public: PrimitiveConverter(D3D12CommandProcessor* command_processor, - RegisterFile* register_file, Memory* memory); + RegisterFile* register_file, Memory* memory, + TraceWriter* trace_writer); ~PrimitiveConverter(); bool Initialize(); @@ -80,6 +82,12 @@ class PrimitiveConverter { PrimitiveType source_type, uint32_t index_count, uint32_t& index_count_out) const; + // Callback for invalidating buffers mid-frame. + std::pair MemoryWriteCallback( + uint32_t physical_address_start, uint32_t length, bool exact_range); + + void InitializeTrace(); + private: // simd_offset is source address & 15 - if SIMD is used, the source and the // target must have the same alignment within one register. 0 is optimal when @@ -88,9 +96,6 @@ class PrimitiveConverter { uint32_t simd_offset, D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out); - // Callback for invalidating buffers mid-frame. - std::pair MemoryWriteCallback( - uint32_t physical_address_start, uint32_t length, bool exact_range); static std::pair MemoryWriteCallbackThunk( void* context_ptr, uint32_t physical_address_start, uint32_t length, bool exact_range); @@ -98,6 +103,7 @@ class PrimitiveConverter { D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; Memory* memory_; + TraceWriter* trace_writer_; std::unique_ptr buffer_pool_ = nullptr; diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 8d32c5be7..7e429fa50 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -99,8 +99,11 @@ const RenderTargetCache::EDRAMLoadStoreModeInfo }; RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor, - RegisterFile* register_file) - : command_processor_(command_processor), register_file_(register_file) {} + RegisterFile* register_file, + TraceWriter* trace_writer) + : command_processor_(command_processor), + register_file_(register_file), + trace_writer_(trace_writer) {} RenderTargetCache::~RenderTargetCache() { Shutdown(); } @@ -1037,6 +1040,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, assert_true(fetch.type == 3); assert_true(fetch.endian == Endian::k8in32); assert_true(fetch.size == 6); + trace_writer_->WriteMemoryRead(fetch.address << 2, fetch.size << 2); const uint8_t* src_vertex_address = memory->TranslatePhysical(fetch.address << 2); float vertices[6]; diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index 0ccf8be0b..582299845 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -17,6 +17,7 @@ #include "xenia/gpu/d3d12/shared_memory.h" #include "xenia/gpu/d3d12/texture_cache.h" #include "xenia/gpu/register_file.h" +#include "xenia/gpu/trace_writer.h" #include "xenia/gpu/xenos.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_api.h" @@ -247,7 +248,7 @@ class RenderTargetCache { }; RenderTargetCache(D3D12CommandProcessor* command_processor, - RegisterFile* register_file); + RegisterFile* register_file, TraceWriter* trace_writer); ~RenderTargetCache(); bool Initialize(const TextureCache* texture_cache); @@ -503,6 +504,7 @@ class RenderTargetCache { D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; + TraceWriter* trace_writer_; // Whether 1 guest pixel is rendered as 2x2 host pixels (currently only // supported with ROV). diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 11d2c6e69..39dcb679a 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -11,6 +11,7 @@ #include #include +#include #include "xenia/base/assert.h" #include "xenia/base/cvar.h" @@ -42,8 +43,10 @@ constexpr uint32_t SharedMemory::kWatchRangePoolSize; constexpr uint32_t SharedMemory::kWatchNodePoolSize; SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor, - Memory* memory) - : command_processor_(command_processor), memory_(memory) { + Memory* memory, TraceWriter* trace_writer) + : command_processor_(command_processor), + memory_(memory), + trace_writer_(trace_writer) { page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size())); page_count_ = kBufferSize >> page_size_log2_; uint32_t page_bitmap_length = page_count_ >> 6; @@ -133,10 +136,14 @@ bool SharedMemory::Initialize() { physical_write_watch_handle_ = memory_->RegisterPhysicalWriteWatch(MemoryWriteCallbackThunk, this); + ResetTraceGPUWrittenBuffer(); + return true; } void SharedMemory::Shutdown() { + ResetTraceGPUWrittenBuffer(); + // TODO(Triang3l): Do something in case any watches are still registered. if (physical_write_watch_handle_ != nullptr) { @@ -365,6 +372,8 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { for (auto upload_range : upload_ranges_) { uint32_t upload_range_start = upload_range.first; uint32_t upload_range_length = upload_range.second; + trace_writer_->WriteMemoryRead(upload_range_start << page_size_log2_, + upload_range_length << page_size_log2_); while (upload_range_length != 0) { ID3D12Resource* upload_buffer; uint32_t upload_buffer_offset, upload_buffer_size; @@ -376,7 +385,6 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { return false; } uint32_t upload_buffer_pages = upload_buffer_size >> page_size_log2_; - // No mutex holding here! MakeRangeValid(upload_range_start, upload_buffer_pages, false); std::memcpy( upload_buffer_mapping, @@ -441,7 +449,6 @@ void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) { // Mark the range as valid (so pages are not reuploaded until modified by the // CPU) and watch it so the CPU can reuse it and this will be caught. - // No mutex holding here! MakeRangeValid(page_first, page_last - page_first + 1, true); } @@ -487,8 +494,10 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first, } } - memory_->WatchPhysicalMemoryWrite(valid_page_first << page_size_log2_, - valid_page_count << page_size_log2_); + if (physical_write_watch_handle_) { + memory_->WatchPhysicalMemoryWrite(valid_page_first << page_size_log2_, + valid_page_count << page_size_log2_); + } } void SharedMemory::UnlinkWatchRange(WatchRange* range) { @@ -654,6 +663,157 @@ void SharedMemory::WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle) { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } +bool SharedMemory::InitializeTraceSubmitDownloads() { + // Invalidate the entire memory CPU->GPU memory copy so all the history + // doesn't have to be written into every frame trace, and collect the list of + // ranges with data modified on the GPU. + ResetTraceGPUWrittenBuffer(); + uint32_t gpu_written_page_count = 0; + + { + auto global_lock = global_critical_region_.Acquire(); + uint32_t fire_watches_range_start = UINT32_MAX; + uint32_t gpu_written_range_start = UINT32_MAX; + for (uint32_t i = 0; i * 2 < valid_and_gpu_written_pages_.size(); ++i) { + uint64_t previously_valid_block = valid_and_gpu_written_pages_[i * 2]; + uint64_t gpu_written_block = valid_and_gpu_written_pages_[i * 2 + 1]; + valid_and_gpu_written_pages_[i * 2] = gpu_written_block; + + // Fire watches on the invalidated pages. + uint64_t fire_watches_block = previously_valid_block & ~gpu_written_block; + uint64_t fire_watches_break_block = ~fire_watches_block; + while (true) { + uint32_t fire_watches_block_page; + if (!xe::bit_scan_forward(fire_watches_range_start == UINT32_MAX + ? fire_watches_block + : fire_watches_break_block, + &fire_watches_block_page)) { + break; + } + uint32_t fire_watches_page = (i << 6) + fire_watches_block_page; + if (fire_watches_range_start == UINT32_MAX) { + fire_watches_range_start = fire_watches_page; + } else { + FireWatches(fire_watches_range_start, fire_watches_page - 1, false); + fire_watches_range_start = UINT32_MAX; + } + uint64_t fire_watches_block_mask = + ~((1ull << fire_watches_block_page) - 1); + fire_watches_block &= fire_watches_block_mask; + fire_watches_break_block &= fire_watches_block_mask; + } + + // Add to the GPU-written ranges. + uint64_t gpu_written_break_block = ~gpu_written_block; + while (true) { + uint32_t gpu_written_block_page; + if (!xe::bit_scan_forward(gpu_written_range_start == UINT32_MAX + ? gpu_written_block + : gpu_written_break_block, + &gpu_written_block_page)) { + break; + } + uint32_t gpu_written_page = (i << 6) + gpu_written_block_page; + if (gpu_written_range_start == UINT32_MAX) { + gpu_written_range_start = gpu_written_page; + } else { + uint32_t gpu_written_range_length = + gpu_written_page - gpu_written_range_start; + trace_gpu_written_ranges_.push_back( + std::make_pair(gpu_written_range_start << page_size_log2_, + gpu_written_range_length << page_size_log2_)); + gpu_written_page_count += gpu_written_range_length; + gpu_written_range_start = UINT32_MAX; + } + uint64_t gpu_written_block_mask = + ~((1ull << gpu_written_block_page) - 1); + gpu_written_block &= gpu_written_block_mask; + gpu_written_break_block &= gpu_written_block_mask; + } + } + if (fire_watches_range_start != UINT32_MAX) { + FireWatches(fire_watches_range_start, page_count_ - 1, false); + } + if (gpu_written_range_start != UINT32_MAX) { + uint32_t gpu_written_range_length = page_count_ - gpu_written_range_start; + trace_gpu_written_ranges_.push_back( + std::make_pair(gpu_written_range_start << page_size_log2_, + gpu_written_range_length << page_size_log2_)); + gpu_written_page_count += gpu_written_range_length; + } + } + + // Request downloading of GPU-written memory. + if (!gpu_written_page_count) { + return false; + } + D3D12_RESOURCE_DESC gpu_written_buffer_desc; + ui::d3d12::util::FillBufferResourceDesc( + gpu_written_buffer_desc, gpu_written_page_count << page_size_log2_, + D3D12_RESOURCE_FLAG_NONE); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); + if (FAILED(device->CreateCommittedResource( + &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, + &gpu_written_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&trace_gpu_written_buffer_)))) { + XELOGE( + "Failed to create a %u KB GPU-written memory download buffer for frame " + "tracing", + gpu_written_page_count << page_size_log2_ >> 10); + ResetTraceGPUWrittenBuffer(); + return false; + } + auto command_list = command_processor_->GetDeferredCommandList(); + UseAsCopySource(); + command_processor_->SubmitBarriers(); + uint32_t gpu_written_buffer_offset = 0; + for (auto& gpu_written_submit_range : trace_gpu_written_ranges_) { + // For cases like resolution scale, when the data may not be actually + // written, just marked as valid. + if (!MakeTilesResident(gpu_written_submit_range.first, + gpu_written_submit_range.second)) { + gpu_written_submit_range.second = 0; + continue; + } + command_list->D3DCopyBufferRegion( + trace_gpu_written_buffer_, gpu_written_buffer_offset, buffer_, + gpu_written_submit_range.first, gpu_written_submit_range.second); + gpu_written_buffer_offset += gpu_written_submit_range.second; + } + return true; +} + +void SharedMemory::InitializeTraceCompleteDownloads() { + if (!trace_gpu_written_buffer_) { + return; + } + void* download_mapping; + if (SUCCEEDED( + trace_gpu_written_buffer_->Map(0, nullptr, &download_mapping))) { + uint32_t gpu_written_buffer_offset = 0; + for (auto gpu_written_submit_range : trace_gpu_written_ranges_) { + trace_writer_->WriteMemoryRead( + gpu_written_submit_range.first, gpu_written_submit_range.second, + reinterpret_cast(download_mapping) + + gpu_written_buffer_offset); + } + D3D12_RANGE download_write_range = {}; + trace_gpu_written_buffer_->Unmap(0, &download_write_range); + } else { + XELOGE( + "Failed to map the GPU-written memory download buffer for frame " + "tracing"); + } + ResetTraceGPUWrittenBuffer(); +} + +void SharedMemory::ResetTraceGPUWrittenBuffer() { + trace_gpu_written_ranges_.clear(); + trace_gpu_written_ranges_.shrink_to_fit(); + ui::d3d12::util::ReleaseAndNull(trace_gpu_written_buffer_); +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 1b2d03834..dc1119878 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -12,9 +12,11 @@ #include #include +#include #include #include "xenia/base/mutex.h" +#include "xenia/gpu/trace_writer.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_api.h" #include "xenia/ui/d3d12/pools.h" @@ -30,7 +32,8 @@ class D3D12CommandProcessor; // system page size granularity. class SharedMemory { public: - SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory); + SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory, + TraceWriter* trace_writer); ~SharedMemory(); bool Initialize(); @@ -99,6 +102,13 @@ class SharedMemory { // usable. bool RequestRange(uint32_t start, uint32_t length); + // Marks the range and, if not exact_range, potentially its surroundings + // (to up to the first GPU-written page, as an access violation exception + // count optimization) as modified by the CPU, also invalidating GPU-written + // pages directly in the range. + std::pair MemoryWriteCallback( + uint32_t physical_address_start, uint32_t length, bool exact_range); + // Marks the range as containing GPU-generated data (such as resolves), // triggering modification callbacks, making it valid (so pages are not // copied from the main memory until they're modified by the CPU) and @@ -124,6 +134,10 @@ class SharedMemory { void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); + // Returns true if any downloads were submitted to the command processor. + bool InitializeTraceSubmitDownloads(); + void InitializeTraceCompleteDownloads(); + private: bool AreTiledResourcesUsed() const; @@ -132,8 +146,8 @@ class SharedMemory { bool written_by_gpu); D3D12CommandProcessor* command_processor_; - Memory* memory_; + TraceWriter* trace_writer_; // The 512 MB tiled buffer. static constexpr uint32_t kBufferSizeLog2 = 29; @@ -188,12 +202,9 @@ class SharedMemory { // written by the GPU not synchronized with the CPU (subset of valid pages). std::vector valid_and_gpu_written_pages_; - // Memory access callback. static std::pair MemoryWriteCallbackThunk( void* context_ptr, uint32_t physical_address_start, uint32_t length, bool exact_range); - std::pair MemoryWriteCallback( - uint32_t physical_address_start, uint32_t length, bool exact_range); struct GlobalWatch { GlobalWatchCallback callback; @@ -268,6 +279,13 @@ class SharedMemory { std::unique_ptr upload_buffer_pool_ = nullptr; void TransitionBuffer(D3D12_RESOURCE_STATES new_state); + + // GPU-written memory downloading for traces. + // Start page, length in pages. + std::vector> trace_gpu_written_ranges_; + // Created temporarily, only for downloading. + ID3D12Resource* trace_gpu_written_buffer_ = nullptr; + void ResetTraceGPUWrittenBuffer(); }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 99f40e26f..347ebe2d8 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -97,10 +97,14 @@ constexpr uint32_t TextureCache::kScaledResolveBufferSize; constexpr uint32_t TextureCache::kScaledResolveHeapSizeLog2; constexpr uint32_t TextureCache::kScaledResolveHeapSize; -// Assuming all single-component textures have its only component replicated. -// For DXT3A and DXT5A, this is according to: +// For formats with less than 4 components, assuming the last component is +// replicated into the non-existent ones, similar to what is done for unused +// components of operands in shaders. +// For DXT3A and DXT5A, RRRR swizzle is specified in: // http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf // Halo 3 also expects replicated components in k_8 sprites. +// DXN is read as RG in Halo 3, but as RA in Call of Duty. +// TODO(Triang3l): Find out the correct contents of unused texture components. const TextureCache::HostFormat TextureCache::host_formats_[64] = { // k_1_REVERSE {DXGI_FORMAT_UNKNOWN, @@ -158,7 +162,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_B5G6R5_UNORM, ResolveTileMode::k16bpp, - {0, 1, 2, 3}}, + {0, 1, 2, 2}}, // k_6_5_5 // On the host, green bits in blue, blue bits in green. {DXGI_FORMAT_B5G6R5_UNORM, @@ -170,7 +174,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_B5G6R5_UNORM, ResolveTileMode::k16bpp, - {0, 2, 1, 3}}, + {0, 2, 1, 1}}, // k_8_8_8_8 {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, @@ -225,7 +229,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R8G8_UNORM, ResolveTileMode::k16bpp, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_Cr_Y1_Cb_Y0_REP // Red and blue probably must be swapped, similar to k_Y1_Cr_Y0_Cb_REP. {DXGI_FORMAT_G8R8_G8B8_UNORM, @@ -265,7 +269,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_8_8_8_8_A {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, @@ -299,7 +303,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R16G16B16A16_UNORM, ResolveTileMode::kR11G11B10AsRGBA16, - {0, 1, 2, 3}}, + {0, 1, 2, 2}}, // k_11_11_10 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, @@ -310,7 +314,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R16G16B16A16_UNORM, ResolveTileMode::kR10G11B11AsRGBA16, - {0, 1, 2, 3}}, + {0, 1, 2, 2}}, // k_DXT1 {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, @@ -403,7 +407,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R16G16_UNORM, ResolveTileMode::k32bpp, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_16_16_16_16 // The resolve format being unorm is correct (with snorm distortion effects // in Halo 3 cause stretching of one corner of the screen). @@ -438,7 +442,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R16G16_FLOAT, ResolveTileMode::k32bpp, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_16_16_16_16_EXPAND {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, @@ -471,7 +475,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R16G16_FLOAT, ResolveTileMode::k32bpp, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_16_16_16_16_FLOAT {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, @@ -504,7 +508,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_32_32_32_32 {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, @@ -537,7 +541,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R32G32_FLOAT, ResolveTileMode::k64bpp, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_32_32_32_32_FLOAT {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, @@ -570,7 +574,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_16_MPEG {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, @@ -592,7 +596,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_8_INTERLACED {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, @@ -625,7 +629,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_16_INTERLACED {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, @@ -658,10 +662,8 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_DXN - // Appears to be luminance-alpha, like ATI 3Dc and LATC in OpenGL. Call of - // Duty 4 reads this with XW swizzle in the shader. {DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, LoadMode::k128bpb, @@ -671,7 +673,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kDXNToRG8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 0, 0, 1}}, + {0, 1, 1, 1}}, // k_8_8_8_8_AS_16_16_16_16 {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, @@ -737,7 +739,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R16G16B16A16_UNORM, ResolveTileMode::kR11G11B10AsRGBA16, - {0, 1, 2, 3}}, + {0, 1, 2, 2}}, // k_11_11_10_AS_16_16_16_16 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, @@ -748,7 +750,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_R16G16B16A16_UNORM, ResolveTileMode::kR10G11B11AsRGBA16, - {0, 1, 2, 3}}, + {0, 1, 2, 2}}, // k_32_32_32_FLOAT {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, @@ -759,7 +761,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 1, 2, 3}}, + {0, 1, 2, 2}}, // k_DXT3A // R8_UNORM has the same size as BC2, but doesn't have the 4x4 size // alignment requirement. @@ -794,7 +796,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - {0, 1, 2, 3}}, + {0, 1, 1, 1}}, // k_DXT3A_AS_1_1_1_1 {DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, diff --git a/src/xenia/gpu/null/null_command_processor.cc b/src/xenia/gpu/null/null_command_processor.cc index 86efc07c8..07b970bdf 100644 --- a/src/xenia/gpu/null/null_command_processor.cc +++ b/src/xenia/gpu/null/null_command_processor.cc @@ -18,6 +18,9 @@ NullCommandProcessor::NullCommandProcessor(NullGraphicsSystem* graphics_system, : CommandProcessor(graphics_system, kernel_state) {} NullCommandProcessor::~NullCommandProcessor() = default; +void NullCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, + uint32_t length) {} + bool NullCommandProcessor::SetupContext() { return CommandProcessor::SetupContext(); } @@ -45,6 +48,10 @@ bool NullCommandProcessor::IssueDraw(PrimitiveType prim_type, bool NullCommandProcessor::IssueCopy() { return true; } +void NullCommandProcessor::InitializeTrace() {} + +void NullCommandProcessor::FinalizeTrace() {} + } // namespace null } // namespace gpu } // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/null/null_command_processor.h b/src/xenia/gpu/null/null_command_processor.h index 3eef6108d..3e04332f9 100644 --- a/src/xenia/gpu/null/null_command_processor.h +++ b/src/xenia/gpu/null/null_command_processor.h @@ -25,6 +25,8 @@ class NullCommandProcessor : public CommandProcessor { kernel::KernelState* kernel_state); ~NullCommandProcessor(); + void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; + private: bool SetupContext() override; void ShutdownContext() override; @@ -39,6 +41,9 @@ class NullCommandProcessor : public CommandProcessor { bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) override; bool IssueCopy() override; + + void InitializeTrace() override; + void FinalizeTrace() override; }; } // namespace null diff --git a/src/xenia/gpu/trace_player.cc b/src/xenia/gpu/trace_player.cc index b79b49df2..1e86e8258 100644 --- a/src/xenia/gpu/trace_player.cc +++ b/src/xenia/gpu/trace_player.cc @@ -21,11 +21,11 @@ TracePlayer::TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system) graphics_system_(graphics_system), current_frame_index_(0), current_command_index_(-1) { - // Need to allocate all of physical memory so that we can write to it - // during playback. - graphics_system_->memory() - ->LookupHeapByType(true, 4096) - ->AllocFixed(0, 0x1FFFFFFF, 4096, + // Need to allocate all of physical memory so that we can write to it during + // playback. The 64 KB page heap is larger, covers the entire physical memory, + // so it is used instead of the 4 KB page one. + auto heap = graphics_system_->memory()->LookupHeapByType(true, 64 * 1024); + heap->AllocFixed(heap->heap_base(), heap->heap_size(), heap->page_size(), kMemoryAllocationReserve | kMemoryAllocationCommit, kMemoryProtectRead | kMemoryProtectWrite); @@ -174,12 +174,15 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, memory->TranslatePhysical(cmd->base_ptr), cmd->decoded_length); trace_ptr += cmd->encoded_length; + command_processor->TracePlaybackWroteMemory(cmd->base_ptr, + cmd->decoded_length); break; } case TraceCommandType::kMemoryWrite: { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd); // ? + // Assuming the command processor will do the same write. trace_ptr += cmd->encoded_length; break; } diff --git a/src/xenia/gpu/trace_writer.cc b/src/xenia/gpu/trace_writer.cc index e41f82643..2c2a1a28f 100644 --- a/src/xenia/gpu/trace_writer.cc +++ b/src/xenia/gpu/trace_writer.cc @@ -136,11 +136,12 @@ void TraceWriter::WritePacketEnd() { fwrite(&cmd, 1, sizeof(cmd), file_); } -void TraceWriter::WriteMemoryRead(uint32_t base_ptr, size_t length) { +void TraceWriter::WriteMemoryRead(uint32_t base_ptr, size_t length, + const void* host_ptr) { if (!file_) { return; } - WriteMemoryCommand(TraceCommandType::kMemoryRead, base_ptr, length); + WriteMemoryCommand(TraceCommandType::kMemoryRead, base_ptr, length, host_ptr); } void TraceWriter::WriteMemoryReadCached(uint32_t base_ptr, size_t length) { @@ -168,11 +169,13 @@ void TraceWriter::WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length) { } } -void TraceWriter::WriteMemoryWrite(uint32_t base_ptr, size_t length) { +void TraceWriter::WriteMemoryWrite(uint32_t base_ptr, size_t length, + const void* host_ptr) { if (!file_) { return; } - WriteMemoryCommand(TraceCommandType::kMemoryWrite, base_ptr, length); + WriteMemoryCommand(TraceCommandType::kMemoryWrite, base_ptr, length, + host_ptr); } class SnappySink : public snappy::Sink { @@ -188,13 +191,17 @@ class SnappySink : public snappy::Sink { }; void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, - size_t length) { + size_t length, const void* host_ptr) { MemoryCommand cmd; cmd.type = type; cmd.base_ptr = base_ptr; cmd.encoding_format = MemoryEncodingFormat::kNone; cmd.encoded_length = cmd.decoded_length = static_cast(length); + if (!host_ptr) { + host_ptr = membase_ + cmd.base_ptr; + } + bool compress = compress_output_ && length > compression_threshold_; if (compress) { // Write the header now so we reserve space in the buffer. @@ -204,8 +211,7 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, // Stream the content right to the buffer. snappy::ByteArraySource snappy_source( - reinterpret_cast(membase_ + cmd.base_ptr), - cmd.decoded_length); + reinterpret_cast(host_ptr), cmd.decoded_length); SnappySink snappy_sink(file_); cmd.encoded_length = static_cast(snappy::Compress(&snappy_source, &snappy_sink)); @@ -219,7 +225,7 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, // Uncompressed - write buffer directly to the file. cmd.encoding_format = MemoryEncodingFormat::kNone; fwrite(&cmd, 1, sizeof(cmd), file_); - fwrite(membase_ + cmd.base_ptr, 1, cmd.decoded_length, file_); + fwrite(host_ptr, 1, cmd.decoded_length, file_); } } diff --git a/src/xenia/gpu/trace_writer.h b/src/xenia/gpu/trace_writer.h index 474d846f5..206f69a2b 100644 --- a/src/xenia/gpu/trace_writer.h +++ b/src/xenia/gpu/trace_writer.h @@ -36,15 +36,17 @@ class TraceWriter { void WriteIndirectBufferEnd(); void WritePacketStart(uint32_t base_ptr, uint32_t count); void WritePacketEnd(); - void WriteMemoryRead(uint32_t base_ptr, size_t length); + void WriteMemoryRead(uint32_t base_ptr, size_t length, + const void* host_ptr = nullptr); void WriteMemoryReadCached(uint32_t base_ptr, size_t length); void WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length); - void WriteMemoryWrite(uint32_t base_ptr, size_t length); + void WriteMemoryWrite(uint32_t base_ptr, size_t length, + const void* host_ptr = nullptr); void WriteEvent(EventCommand::Type event_type); private: void WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr, - size_t length); + size_t length, const void* host_ptr = nullptr); std::set cached_memory_reads_; uint8_t* membase_; diff --git a/src/xenia/gpu/vk/vulkan_command_processor.cc b/src/xenia/gpu/vk/vulkan_command_processor.cc index c2817c139..57f45cbf6 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.cc +++ b/src/xenia/gpu/vk/vulkan_command_processor.cc @@ -18,6 +18,9 @@ VulkanCommandProcessor::VulkanCommandProcessor( : CommandProcessor(graphics_system, kernel_state) {} VulkanCommandProcessor::~VulkanCommandProcessor() = default; +void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, + uint32_t length) {} + bool VulkanCommandProcessor::SetupContext() { return true; } void VulkanCommandProcessor::ShutdownContext() {} @@ -41,6 +44,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, bool VulkanCommandProcessor::IssueCopy() { return true; } +void VulkanCommandProcessor::InitializeTrace() {} + +void VulkanCommandProcessor::FinalizeTrace() {} + } // namespace vk } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vk/vulkan_command_processor.h b/src/xenia/gpu/vk/vulkan_command_processor.h index e2f4d3b8d..1203bd539 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.h +++ b/src/xenia/gpu/vk/vulkan_command_processor.h @@ -24,6 +24,8 @@ class VulkanCommandProcessor : public CommandProcessor { kernel::KernelState* kernel_state); ~VulkanCommandProcessor(); + void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; + protected: bool SetupContext() override; void ShutdownContext() override; @@ -38,6 +40,9 @@ class VulkanCommandProcessor : public CommandProcessor { bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) override; bool IssueCopy() override; + + void InitializeTrace() override; + void FinalizeTrace() override; }; } // namespace vk diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 50a3094a4..c26b592df 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -48,6 +48,9 @@ void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) { return CommandProcessor::RequestFrameTrace(root_path); } +void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, + uint32_t length) {} + void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); cache_clear_requested_ = true; @@ -1322,6 +1325,10 @@ bool VulkanCommandProcessor::IssueCopy() { return true; } +void VulkanCommandProcessor::InitializeTrace() {} + +void VulkanCommandProcessor::FinalizeTrace() {} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 8cd1710b6..56bc1db8e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -50,7 +50,8 @@ class VulkanCommandProcessor : public CommandProcessor { kernel::KernelState* kernel_state); ~VulkanCommandProcessor() override; - virtual void RequestFrameTrace(const std::wstring& root_path) override; + void RequestFrameTrace(const std::wstring& root_path) override; + void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; void ClearCaches() override; RenderCache* render_cache() { return render_cache_.get(); } @@ -94,6 +95,9 @@ class VulkanCommandProcessor : public CommandProcessor { VulkanShader* pixel_shader); bool IssueCopy() override; + void InitializeTrace() override; + void FinalizeTrace() override; + xe::ui::vulkan::VulkanDevice* device_ = nullptr; // front buffer / back buffer memory diff --git a/src/xenia/vfs/devices/stfs_container_device.cc b/src/xenia/vfs/devices/stfs_container_device.cc index 728bba7c5..e4fb3bd94 100644 --- a/src/xenia/vfs/devices/stfs_container_device.cc +++ b/src/xenia/vfs/devices/stfs_container_device.cc @@ -62,14 +62,13 @@ StfsContainerDevice::~StfsContainerDevice() = default; bool StfsContainerDevice::Initialize() { // Resolve a valid STFS file if a directory is given. if (filesystem::IsFolder(local_path_) && !ResolveFromFolder(local_path_)) { - XELOGE("Could not resolve an STFS container given path %s", - xe::to_string(local_path_).c_str()); + XELOGE("Could not resolve an STFS container given path %ls", + local_path_.c_str()); return false; } if (!filesystem::PathExists(local_path_)) { - XELOGE("Path to STFS container does not exist: %s", - xe::to_string(local_path_).c_str()); + XELOGE("Path to STFS container does not exist: %ls", local_path_.c_str()); return false; } @@ -94,10 +93,15 @@ bool StfsContainerDevice::Initialize() { StfsContainerDevice::Error StfsContainerDevice::MapFiles() { // Map the file containing the STFS Header and read it. - XELOGI("Mapping STFS Header File: %s", xe::to_string(local_path_).c_str()); + XELOGI("Mapping STFS Header file: %ls", local_path_.c_str()); auto header_map = MappedMemory::Open(local_path_, MappedMemory::Mode::kRead); + if (!header_map) { + XELOGE("Error mapping STFS Header file."); + return Error::kErrorReadError; + } - auto header_result = ReadHeaderAndVerify(header_map->data()); + auto header_result = + ReadHeaderAndVerify(header_map->data(), header_map->size()); if (header_result != Error::kSuccess) { XELOGE("Error reading STFS Header: %d", header_result); return header_result; @@ -116,7 +120,7 @@ StfsContainerDevice::Error StfsContainerDevice::MapFiles() { // the files in the .data folder and can discard the header. auto data_fragment_path = local_path_ + L".data"; if (!filesystem::PathExists(data_fragment_path)) { - XELOGE("STFS container is multi-file, but path %s does not exist.", + XELOGE("STFS container is multi-file, but path %ls does not exist.", xe::to_string(data_fragment_path).c_str()); return Error::kErrorFileMismatch; } @@ -138,6 +142,11 @@ StfsContainerDevice::Error StfsContainerDevice::MapFiles() { auto file = fragment_files.at(i); auto path = xe::join_paths(file.path, file.name); auto data = MappedMemory::Open(path, MappedMemory::Mode::kRead); + if (!data) { + XELOGI("Failed to map SVOD file %ls.", path.c_str()); + mmap_.clear(); + return Error::kErrorReadError; + } mmap_.emplace(std::make_pair(i, std::move(data))); } XELOGI("SVOD successfully mapped %d files.", fragment_files.size()); @@ -170,19 +179,41 @@ Entry* StfsContainerDevice::ResolvePath(const std::string& path) { return entry; } -StfsContainerDevice::Error StfsContainerDevice::ReadHeaderAndVerify( - const uint8_t* map_ptr) { - // Check signature. - if (memcmp(map_ptr, "LIVE", 4) == 0) { - package_type_ = StfsPackageType::kLive; - } else if (memcmp(map_ptr, "PIRS", 4) == 0) { - package_type_ = StfsPackageType::kPirs; - } else if (memcmp(map_ptr, "CON ", 4) == 0) { - package_type_ = StfsPackageType::kCon; - } else { - // Unexpected format. +StfsContainerDevice::Error StfsContainerDevice::ReadPackageType( + const uint8_t* map_ptr, size_t map_size, + StfsPackageType* package_type_out) { + if (map_size < 4) { return Error::kErrorFileMismatch; } + if (memcmp(map_ptr, "LIVE", 4) == 0) { + if (package_type_out) { + *package_type_out = StfsPackageType::kLive; + } + return Error::kSuccess; + } + if (memcmp(map_ptr, "PIRS", 4) == 0) { + if (package_type_out) { + *package_type_out = StfsPackageType::kPirs; + } + return Error::kSuccess; + } + if (memcmp(map_ptr, "CON ", 4) == 0) { + if (package_type_out) { + *package_type_out = StfsPackageType::kCon; + } + return Error::kSuccess; + } + // Unexpected format. + return Error::kErrorFileMismatch; +} + +StfsContainerDevice::Error StfsContainerDevice::ReadHeaderAndVerify( + const uint8_t* map_ptr, size_t map_size) { + // Check signature. + auto type_result = ReadPackageType(map_ptr, map_size, &package_type_); + if (type_result != Error::kSuccess) { + return type_result; + } // Read header. if (!header_.Read(map_ptr)) { @@ -708,13 +739,6 @@ bool StfsHeader::Read(const uint8_t* p) { return true; } -const char* StfsContainerDevice::ReadMagic(const std::wstring& path) { - auto map = MappedMemory::Open(path, MappedMemory::Mode::kRead, 0, 4); - auto magic_data = xe::load(map->data()); - auto magic_bytes = static_cast(static_cast(&magic_data)); - return std::move(magic_bytes); -} - bool StfsContainerDevice::ResolveFromFolder(const std::wstring& path) { // Scan through folders until a file with magic is found std::queue queue; @@ -736,12 +760,11 @@ bool StfsContainerDevice::ResolveFromFolder(const std::wstring& path) { } else { // Try to read the file's magic auto path = xe::join_paths(current_file.path, current_file.name); - auto magic = ReadMagic(path); - - if (memcmp(magic, "LIVE", 4) == 0 || memcmp(magic, "PIRS", 4) == 0 || - memcmp(magic, "CON ", 4) == 0) { + auto map = MappedMemory::Open(path, MappedMemory::Mode::kRead, 0, 4); + if (map && ReadPackageType(map->data(), map->size(), nullptr) == + Error::kSuccess) { local_path_ = xe::join_paths(current_file.path, current_file.name); - XELOGI("STFS Package found: %s", xe::to_string(local_path_).c_str()); + XELOGI("STFS Package found: %ls", local_path_.c_str()); return true; } } diff --git a/src/xenia/vfs/devices/stfs_container_device.h b/src/xenia/vfs/devices/stfs_container_device.h index 7413116df..89e76c231 100644 --- a/src/xenia/vfs/devices/stfs_container_device.h +++ b/src/xenia/vfs/devices/stfs_container_device.h @@ -201,11 +201,12 @@ class StfsContainerDevice : public Device { const uint32_t kSTFSHashSpacing = 170; - const char* ReadMagic(const std::wstring& path); bool ResolveFromFolder(const std::wstring& path); Error MapFiles(); - Error ReadHeaderAndVerify(const uint8_t* map_ptr); + static Error ReadPackageType(const uint8_t* map_ptr, size_t map_size, + StfsPackageType* package_type_out); + Error ReadHeaderAndVerify(const uint8_t* map_ptr, size_t map_size); Error ReadSVOD(); Error ReadEntrySVOD(uint32_t sector, uint32_t ordinal,