[D3D12] Make trace dump partially work

This commit is contained in:
Triang3l 2019-10-25 08:38:06 +03:00
parent b622e894d6
commit c057b5a032
19 changed files with 226 additions and 33 deletions

View File

@ -134,6 +134,8 @@ class CommandProcessor {
virtual void BeginTracing(const std::wstring& root_path);
virtual void EndTracing();
virtual void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) = 0;
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count);
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);

View File

@ -80,6 +80,12 @@ void D3D12CommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
CommandProcessor::RequestFrameTrace(root_path);
}
void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
uint32_t length) {
shared_memory_->MemoryWriteCallback(base_ptr, length, true);
primitive_converter_->MemoryWriteCallback(base_ptr, length, true);
}
bool D3D12CommandProcessor::IsROVUsedForEDRAM() const {
if (!cvars::d3d12_edram_rov) {
return false;
@ -643,6 +649,56 @@ std::wstring D3D12CommandProcessor::GetWindowTitleText() const {
}
}
std::unique_ptr<xe::ui::RawImage> D3D12CommandProcessor::Capture() {
ID3D12Resource* readback_buffer =
RequestReadbackBuffer(uint32_t(swap_texture_copy_size_));
if (!readback_buffer) {
return nullptr;
}
BeginFrame();
PushTransitionBarrier(swap_texture_,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
D3D12_RESOURCE_STATE_COPY_SOURCE);
SubmitBarriers();
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
location_source.pResource = swap_texture_;
location_source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_source.SubresourceIndex = 0;
location_dest.pResource = readback_buffer;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
location_dest.PlacedFootprint = swap_texture_copy_footprint_;
deferred_command_list_->CopyTexture(location_dest, location_source);
PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
EndFrame();
GetD3D12Context()->AwaitAllFramesCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = swap_texture_copy_footprint_.Offset;
readback_range.End = swap_texture_copy_size_;
void* readback_mapping;
if (FAILED(readback_buffer->Map(0, &readback_range, &readback_mapping))) {
return nullptr;
}
std::unique_ptr<xe::ui::RawImage> raw_image(new xe::ui::RawImage());
auto swap_texture_size = GetSwapTextureSize();
raw_image->width = swap_texture_size.first;
raw_image->height = swap_texture_size.second;
raw_image->stride = swap_texture_size.first * 4;
raw_image->data.resize(raw_image->stride * swap_texture_size.second);
const uint8_t* readback_source_data =
reinterpret_cast<const uint8_t*>(readback_mapping) +
swap_texture_copy_footprint_.Offset;
for (uint32_t i = 0; i < swap_texture_size.second; ++i) {
std::memcpy(raw_image->data.data() + i * raw_image->stride,
readback_source_data +
i * swap_texture_copy_footprint_.Footprint.RowPitch,
raw_image->stride);
}
D3D12_RANGE readback_written_range = {};
gamma_ramp_upload_->Unmap(0, &readback_written_range);
return raw_image;
}
bool D3D12CommandProcessor::SetupContext() {
if (!CommandProcessor::SetupContext()) {
XELOGE("Failed to initialize base command processor context");
@ -760,12 +816,9 @@ bool D3D12CommandProcessor::SetupContext() {
D3D12_RESOURCE_DESC swap_texture_desc;
swap_texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
swap_texture_desc.Alignment = 0;
swap_texture_desc.Width = kSwapTextureWidth;
swap_texture_desc.Height = kSwapTextureHeight;
if (texture_cache_->IsResolutionScale2X()) {
swap_texture_desc.Width *= 2;
swap_texture_desc.Height *= 2;
}
auto swap_texture_size = GetSwapTextureSize();
swap_texture_desc.Width = swap_texture_size.first;
swap_texture_desc.Height = swap_texture_size.second;
swap_texture_desc.DepthOrArraySize = 1;
swap_texture_desc.MipLevels = 1;
swap_texture_desc.Format = ui::d3d12::D3D12Context::kSwapChainFormat;
@ -781,6 +834,9 @@ bool D3D12CommandProcessor::SetupContext() {
XELOGE("Failed to create the command processor front buffer");
return false;
}
device->GetCopyableFootprints(&swap_texture_desc, 0, 1, 0,
&swap_texture_copy_footprint_, nullptr, nullptr,
&swap_texture_copy_size_);
D3D12_DESCRIPTOR_HEAP_DESC swap_descriptor_heap_desc;
swap_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
swap_descriptor_heap_desc.NumDescriptors = 1;
@ -1045,12 +1101,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
SubmitBarriers();
uint32_t swap_texture_width = kSwapTextureWidth;
uint32_t swap_texture_height = kSwapTextureHeight;
if (texture_cache_->IsResolutionScale2X()) {
swap_texture_width *= 2;
swap_texture_height *= 2;
}
auto swap_texture_size = GetSwapTextureSize();
// Draw the stretching rectangle.
deferred_command_list_->D3DOMSetRenderTargets(1, &swap_texture_rtv_, TRUE,
@ -1058,16 +1109,16 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
D3D12_VIEWPORT viewport;
viewport.TopLeftX = 0.0f;
viewport.TopLeftY = 0.0f;
viewport.Width = float(swap_texture_width);
viewport.Height = float(swap_texture_height);
viewport.Width = float(swap_texture_size.first);
viewport.Height = float(swap_texture_size.second);
viewport.MinDepth = 0.0f;
viewport.MaxDepth = 0.0f;
deferred_command_list_->RSSetViewport(viewport);
D3D12_RECT scissor;
scissor.left = 0;
scissor.top = 0;
scissor.right = swap_texture_width;
scissor.bottom = swap_texture_height;
scissor.right = swap_texture_size.first;
scissor.bottom = swap_texture_size.second;
deferred_command_list_->RSSetScissorRect(scissor);
D3D12GraphicsSystem* graphics_system =
static_cast<D3D12GraphicsSystem*>(graphics_system_);
@ -1085,8 +1136,8 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
// Don't care about graphics state because the frame is ending anyway.
{
std::lock_guard<std::mutex> lock(swap_state_.mutex);
swap_state_.width = swap_texture_width;
swap_state_.height = swap_texture_height;
swap_state_.width = swap_texture_size.first;
swap_state_.height = swap_texture_size.second;
swap_state_.front_buffer_texture =
reinterpret_cast<uintptr_t>(swap_texture_srv_descriptor_heap_);
}

View File

@ -15,6 +15,7 @@
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
@ -45,6 +46,8 @@ class D3D12CommandProcessor : public CommandProcessor {
void RequestFrameTrace(const std::wstring& root_path) override;
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
// Needed by everything that owns transient objects.
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
@ -141,6 +144,8 @@ class D3D12CommandProcessor : public CommandProcessor {
// Returns the text to display in the GPU backend name in the window title.
std::wstring GetWindowTitleText() const;
std::unique_ptr<xe::ui::RawImage> Capture();
protected:
bool SetupContext() override;
void ShutdownContext() override;
@ -268,7 +273,15 @@ class D3D12CommandProcessor : public CommandProcessor {
static constexpr uint32_t kSwapTextureWidth = 1280;
static constexpr uint32_t kSwapTextureHeight = 720;
inline std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
if (texture_cache_->IsResolutionScale2X()) {
return std::make_pair(kSwapTextureWidth * 2, kSwapTextureHeight * 2);
}
return std::make_pair(kSwapTextureWidth, kSwapTextureHeight);
}
ID3D12Resource* swap_texture_ = nullptr;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT swap_texture_copy_footprint_;
UINT64 swap_texture_copy_size_;
ID3D12DescriptorHeap* swap_texture_rtv_descriptor_heap_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE swap_texture_rtv_;
ID3D12DescriptorHeap* swap_texture_srv_descriptor_heap_ = nullptr;

View File

@ -190,6 +190,15 @@ void D3D12GraphicsSystem::Shutdown() {
GraphicsSystem::Shutdown();
}
std::unique_ptr<xe::ui::RawImage> D3D12GraphicsSystem::Capture() {
auto d3d12_command_processor =
static_cast<D3D12CommandProcessor*>(command_processor());
if (!d3d12_command_processor) {
return nullptr;
}
return d3d12_command_processor->Capture();
}
void D3D12GraphicsSystem::AwaitFrontBufferUnused() {
if (display_context_ != nullptr) {
display_context_->AwaitAllFramesCompletion();

View File

@ -34,6 +34,8 @@ class D3D12GraphicsSystem : public GraphicsSystem {
ui::Window* target_window) override;
void Shutdown() override;
std::unique_ptr<xe::ui::RawImage> Capture() override;
void AwaitFrontBufferUnused();
// Draws a texture covering the entire viewport to the render target currently

View File

@ -0,0 +1,40 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2019 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/logging.h"
#include "xenia/base/main.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
#include "xenia/gpu/trace_dump.h"
namespace xe {
namespace gpu {
namespace d3d12 {
using namespace xe::gpu::xenos;
class D3D12TraceDump : public TraceDump {
public:
std::unique_ptr<gpu::GraphicsSystem> CreateGraphicsSystem() override {
return std::unique_ptr<gpu::GraphicsSystem>(new D3D12GraphicsSystem());
}
};
int trace_dump_main(const std::vector<std::wstring>& args) {
D3D12TraceDump trace_dump;
return trace_dump.Main(args);
}
} // namespace d3d12
} // namespace gpu
} // namespace xe
DEFINE_ENTRY_POINT(L"xenia-gpu-d3d12-trace-dump",
xe::gpu::d3d12::trace_dump_main, "some.trace",
"target_trace_file");

View File

@ -17,3 +17,47 @@ project("xenia-gpu-d3d12")
files({
"shaders/bin/*.h",
})
group("src")
project("xenia-gpu-d3d12-trace-dump")
uuid("686b859c-0046-44c4-a02c-41fc3fb75698")
kind("ConsoleApp")
language("C++")
links({
"aes_128",
"capstone",
"dxbc",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
"xenia-core",
"xenia-cpu",
"xenia-cpu-backend-x64",
"xenia-gpu",
"xenia-gpu-d3d12",
"xenia-hid",
"xenia-hid-nop",
"xenia-kernel",
"xenia-ui",
"xenia-ui-d3d12",
"xenia-vfs",
"xxhash",
})
files({
"d3d12_trace_dump_main.cc",
"../../base/main_"..platform_suffix..".cc",
})
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-gpu-d3d12-trace-dump.vcxproj.user"
if not os.isfile(user_file) then
debugdir(project_root)
debugargs({
"2>&1",
"1>scratch/stdout-trace-dump.txt",
})
end

View File

@ -82,6 +82,10 @@ class PrimitiveConverter {
PrimitiveType source_type, uint32_t index_count,
uint32_t& index_count_out) const;
// Callback for invalidating buffers mid-frame.
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
uint32_t physical_address_start, uint32_t length, bool exact_range);
void InitializeTrace();
private:
@ -92,9 +96,6 @@ class PrimitiveConverter {
uint32_t simd_offset,
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out);
// Callback for invalidating buffers mid-frame.
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
uint32_t physical_address_start, uint32_t length, bool exact_range);
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
void* context_ptr, uint32_t physical_address_start, uint32_t length,
bool exact_range);

View File

@ -494,8 +494,10 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
}
}
memory_->WatchPhysicalMemoryWrite(valid_page_first << page_size_log2_,
valid_page_count << page_size_log2_);
if (physical_write_watch_handle_) {
memory_->WatchPhysicalMemoryWrite(valid_page_first << page_size_log2_,
valid_page_count << page_size_log2_);
}
}
void SharedMemory::UnlinkWatchRange(WatchRange* range) {
@ -791,7 +793,7 @@ void SharedMemory::InitializeTraceCompleteDownloads() {
trace_gpu_written_buffer_->Map(0, nullptr, &download_mapping))) {
uint32_t gpu_written_buffer_offset = 0;
for (auto gpu_written_submit_range : trace_gpu_written_ranges_) {
trace_writer_->WriteMemoryWrite(
trace_writer_->WriteMemoryRead(
gpu_written_submit_range.first, gpu_written_submit_range.second,
reinterpret_cast<const uint8_t*>(download_mapping) +
gpu_written_buffer_offset);

View File

@ -102,6 +102,13 @@ class SharedMemory {
// usable.
bool RequestRange(uint32_t start, uint32_t length);
// Marks the range and, if not exact_range, potentially its surroundings
// (to up to the first GPU-written page, as an access violation exception
// count optimization) as modified by the CPU, also invalidating GPU-written
// pages directly in the range.
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
uint32_t physical_address_start, uint32_t length, bool exact_range);
// Marks the range as containing GPU-generated data (such as resolves),
// triggering modification callbacks, making it valid (so pages are not
// copied from the main memory until they're modified by the CPU) and
@ -195,12 +202,9 @@ class SharedMemory {
// written by the GPU not synchronized with the CPU (subset of valid pages).
std::vector<uint64_t> valid_and_gpu_written_pages_;
// Memory access callback.
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
void* context_ptr, uint32_t physical_address_start, uint32_t length,
bool exact_range);
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
uint32_t physical_address_start, uint32_t length, bool exact_range);
struct GlobalWatch {
GlobalWatchCallback callback;

View File

@ -18,6 +18,9 @@ NullCommandProcessor::NullCommandProcessor(NullGraphicsSystem* graphics_system,
: CommandProcessor(graphics_system, kernel_state) {}
NullCommandProcessor::~NullCommandProcessor() = default;
void NullCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
uint32_t length) {}
bool NullCommandProcessor::SetupContext() {
return CommandProcessor::SetupContext();
}

View File

@ -25,6 +25,8 @@ class NullCommandProcessor : public CommandProcessor {
kernel::KernelState* kernel_state);
~NullCommandProcessor();
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
private:
bool SetupContext() override;
void ShutdownContext() override;

View File

@ -167,14 +167,23 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
}
break;
}
case TraceCommandType::kMemoryRead:
case TraceCommandType::kMemoryWrite: {
case TraceCommandType::kMemoryRead: {
auto cmd = reinterpret_cast<const MemoryCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd);
DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length,
memory->TranslatePhysical(cmd->base_ptr),
cmd->decoded_length);
trace_ptr += cmd->encoded_length;
command_processor->TracePlaybackWroteMemory(cmd->base_ptr,
cmd->decoded_length);
break;
}
case TraceCommandType::kMemoryWrite: {
auto cmd = reinterpret_cast<const MemoryCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd);
// ?
// Assuming the command processor will do the same write.
trace_ptr += cmd->encoded_length;
break;
}
case TraceCommandType::kEvent: {

View File

@ -136,11 +136,12 @@ void TraceWriter::WritePacketEnd() {
fwrite(&cmd, 1, sizeof(cmd), file_);
}
void TraceWriter::WriteMemoryRead(uint32_t base_ptr, size_t length) {
void TraceWriter::WriteMemoryRead(uint32_t base_ptr, size_t length,
const void* host_ptr) {
if (!file_) {
return;
}
WriteMemoryCommand(TraceCommandType::kMemoryRead, base_ptr, length);
WriteMemoryCommand(TraceCommandType::kMemoryRead, base_ptr, length, host_ptr);
}
void TraceWriter::WriteMemoryReadCached(uint32_t base_ptr, size_t length) {

View File

@ -36,7 +36,8 @@ class TraceWriter {
void WriteIndirectBufferEnd();
void WritePacketStart(uint32_t base_ptr, uint32_t count);
void WritePacketEnd();
void WriteMemoryRead(uint32_t base_ptr, size_t length);
void WriteMemoryRead(uint32_t base_ptr, size_t length,
const void* host_ptr = nullptr);
void WriteMemoryReadCached(uint32_t base_ptr, size_t length);
void WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length);
void WriteMemoryWrite(uint32_t base_ptr, size_t length,

View File

@ -18,6 +18,9 @@ VulkanCommandProcessor::VulkanCommandProcessor(
: CommandProcessor(graphics_system, kernel_state) {}
VulkanCommandProcessor::~VulkanCommandProcessor() = default;
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
uint32_t length) {}
bool VulkanCommandProcessor::SetupContext() { return true; }
void VulkanCommandProcessor::ShutdownContext() {}

View File

@ -24,6 +24,8 @@ class VulkanCommandProcessor : public CommandProcessor {
kernel::KernelState* kernel_state);
~VulkanCommandProcessor();
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
protected:
bool SetupContext() override;
void ShutdownContext() override;

View File

@ -48,6 +48,9 @@ void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
return CommandProcessor::RequestFrameTrace(root_path);
}
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
uint32_t length) {}
void VulkanCommandProcessor::ClearCaches() {
CommandProcessor::ClearCaches();
cache_clear_requested_ = true;

View File

@ -50,7 +50,8 @@ class VulkanCommandProcessor : public CommandProcessor {
kernel::KernelState* kernel_state);
~VulkanCommandProcessor() override;
virtual void RequestFrameTrace(const std::wstring& root_path) override;
void RequestFrameTrace(const std::wstring& root_path) override;
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
void ClearCaches() override;
RenderCache* render_cache() { return render_cache_.get(); }