[GPU] Store an EDRAM snapshot in traces

This commit is contained in:
Triang3l 2019-11-04 17:30:20 +03:00
parent 1bb3cd45ca
commit c499229455
20 changed files with 278 additions and 8 deletions

View File

@ -136,6 +136,8 @@ class CommandProcessor {
virtual void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) = 0;
virtual void RestoreEDRAMSnapshot(const void* snapshot) = 0;
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count);
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);

View File

@ -87,6 +87,12 @@ void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
primitive_converter_->MemoryWriteCallback(base_ptr, length, true);
}
void D3D12CommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {
// Starting a new frame because descriptors may be needed.
BeginSubmission(true);
render_target_cache_->RestoreEDRAMSnapshot(snapshot);
}
bool D3D12CommandProcessor::IsROVUsedForEDRAM() const {
if (!cvars::d3d12_edram_rov) {
return false;
@ -699,8 +705,6 @@ std::unique_ptr<xe::ui::RawImage> D3D12CommandProcessor::Capture() {
i * swap_texture_copy_footprint_.Footprint.RowPitch,
raw_image->stride);
}
D3D12_RANGE readback_written_range = {};
gamma_ramp_upload_->Unmap(0, &readback_written_range);
return raw_image;
}
@ -1720,13 +1724,23 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
void D3D12CommandProcessor::InitializeTrace() {
BeginSubmission(false);
bool any_downloads_submitted = false;
any_downloads_submitted |= shared_memory_->InitializeTraceSubmitDownloads();
if (!any_downloads_submitted || !EndSubmission(false)) {
bool render_target_cache_submitted =
render_target_cache_->InitializeTraceSubmitDownloads();
bool shared_memory_submitted =
shared_memory_->InitializeTraceSubmitDownloads();
if (!render_target_cache_submitted && !shared_memory_submitted) {
return;
}
if (!EndSubmission(false)) {
return;
}
AwaitAllSubmissionsCompletion();
shared_memory_->InitializeTraceCompleteDownloads();
if (render_target_cache_submitted) {
render_target_cache_->InitializeTraceCompleteDownloads();
}
if (shared_memory_submitted) {
shared_memory_->InitializeTraceCompleteDownloads();
}
}
void D3D12CommandProcessor::FinalizeTrace() {}

View File

@ -48,6 +48,8 @@ class D3D12CommandProcessor : public CommandProcessor {
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
void RestoreEDRAMSnapshot(const void* snapshot) override;
// Needed by everything that owns transient objects.
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());

View File

@ -38,6 +38,15 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
stream += header_size;
stream_remaining -= header_size;
switch (Command(header[0])) {
case Command::kD3DClearUnorderedAccessViewUint: {
auto& args =
*reinterpret_cast<const ClearUnorderedAccessViewHeader*>(stream);
command_list->ClearUnorderedAccessViewUint(
args.view_gpu_handle_in_current_heap, args.view_cpu_handle,
args.resource, args.values_uint, args.num_rects,
args.num_rects ? reinterpret_cast<const D3D12_RECT*>(&args + 1)
: nullptr);
} break;
case Command::kD3DCopyBufferRegion: {
auto& args =
*reinterpret_cast<const D3DCopyBufferRegionArguments*>(stream);

View File

@ -32,6 +32,24 @@ class DeferredCommandList {
void Execute(ID3D12GraphicsCommandList* command_list,
ID3D12GraphicsCommandList1* command_list_1);
inline void D3DClearUnorderedAccessViewUint(
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap,
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource,
const UINT values[4], UINT num_rects, const D3D12_RECT* rects) {
auto args = reinterpret_cast<ClearUnorderedAccessViewHeader*>(
WriteCommand(Command::kD3DClearUnorderedAccessViewUint,
sizeof(ClearUnorderedAccessViewHeader) +
num_rects * sizeof(D3D12_RECT)));
args->view_gpu_handle_in_current_heap = view_gpu_handle_in_current_heap;
args->view_cpu_handle = view_cpu_handle;
args->resource = resource;
std::memcpy(args->values_uint, values, 4 * sizeof(UINT));
args->num_rects = num_rects;
if (num_rects != 0) {
std::memcpy(args + 1, rects, num_rects * sizeof(D3D12_RECT));
}
}
inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
ID3D12Resource* src_buffer, UINT64 src_offset,
UINT64 num_bytes) {
@ -303,6 +321,7 @@ class DeferredCommandList {
static constexpr size_t kAlignment = std::max(sizeof(void*), sizeof(UINT64));
enum class Command : uint32_t {
kD3DClearUnorderedAccessViewUint,
kD3DCopyBufferRegion,
kD3DCopyResource,
kCopyTexture,
@ -331,6 +350,17 @@ class DeferredCommandList {
kD3DSetSamplePositions,
};
struct ClearUnorderedAccessViewHeader {
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap;
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle;
ID3D12Resource* resource;
union {
float values_float[4];
UINT values_uint[4];
};
UINT num_rects;
};
struct D3DCopyBufferRegionArguments {
ID3D12Resource* dst_buffer;
UINT64 dst_offset;

View File

@ -391,6 +391,8 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
void RenderTargetCache::Shutdown() {
ClearCache();
edram_snapshot_restore_pool_.reset();
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
for (auto& resolve_pipeline : resolve_pipelines_) {
resolve_pipeline.pipeline->Release();
}
@ -449,9 +451,16 @@ void RenderTargetCache::ClearCache() {
}
}
#endif
edram_snapshot_restore_pool_.reset();
}
void RenderTargetCache::BeginSubmission() {
if (edram_snapshot_restore_pool_) {
edram_snapshot_restore_pool_->Reclaim(
command_processor_->GetCompletedSubmission());
}
// With the ROV, a submission does not always end in a resolve (for example,
// when memexport readback happens) or something else that would surely submit
// the UAV barrier, so we need to preserve the `current_` variables.
@ -2197,6 +2206,113 @@ DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat(
return DXGI_FORMAT_UNKNOWN;
}
bool RenderTargetCache::InitializeTraceSubmitDownloads() {
if (resolution_scale_2x_) {
// No 1:1 mapping.
return false;
}
const uint32_t kEDRAMSize = 2048 * 5120;
if (!edram_snapshot_download_buffer_) {
D3D12_RESOURCE_DESC edram_snapshot_download_buffer_desc;
ui::d3d12::util::FillBufferResourceDesc(edram_snapshot_download_buffer_desc,
kEDRAMSize,
D3D12_RESOURCE_FLAG_NONE);
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
if (FAILED(device->CreateCommittedResource(
&ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE,
&edram_snapshot_download_buffer_desc,
D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
IID_PPV_ARGS(&edram_snapshot_download_buffer_)))) {
XELOGE("Failed to create a EDRAM snapshot download buffer");
return false;
}
}
auto command_list = command_processor_->GetDeferredCommandList();
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE);
command_processor_->SubmitBarriers();
command_list->D3DCopyBufferRegion(edram_snapshot_download_buffer_, 0,
edram_buffer_, 0, kEDRAMSize);
return true;
}
void RenderTargetCache::InitializeTraceCompleteDownloads() {
if (!edram_snapshot_download_buffer_) {
return;
}
void* download_mapping;
if (SUCCEEDED(edram_snapshot_download_buffer_->Map(0, nullptr,
&download_mapping))) {
trace_writer_->WriteEDRAMSnapshot(download_mapping);
D3D12_RANGE download_write_range = {};
edram_snapshot_download_buffer_->Unmap(0, &download_write_range);
} else {
XELOGE("Failed to map the EDRAM snapshot download buffer");
}
edram_snapshot_download_buffer_->Release();
edram_snapshot_download_buffer_ = nullptr;
}
void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
if (resolution_scale_2x_) {
// No 1:1 mapping.
return;
}
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
const uint32_t kEDRAMSize = 2048 * 5120;
if (!edram_snapshot_restore_pool_) {
edram_snapshot_restore_pool_ =
std::make_unique<ui::d3d12::UploadBufferPool>(device, kEDRAMSize);
}
ID3D12Resource* upload_buffer;
uint32_t upload_buffer_offset;
void* upload_buffer_mapping = edram_snapshot_restore_pool_->Request(
command_processor_->GetCurrentSubmission(), kEDRAMSize, &upload_buffer,
&upload_buffer_offset, nullptr);
if (!upload_buffer_mapping) {
XELOGE("Failed to get a buffer for restoring a EDRAM snapshot");
return;
}
std::memcpy(upload_buffer_mapping, snapshot, kEDRAMSize);
auto command_list = command_processor_->GetDeferredCommandList();
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_COPY_DEST);
command_processor_->SubmitBarriers();
command_list->D3DCopyBufferRegion(edram_buffer_, 0, upload_buffer,
upload_buffer_offset, kEDRAMSize);
if (!command_processor_->IsROVUsedForEDRAM()) {
// Clear and ignore the old 32-bit float depth - the non-ROV path is
// inaccurate anyway, and this is backend-specific, not a part of a guest
// trace.
D3D12_CPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_cpu;
D3D12_GPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_gpu;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1,
shader_visbile_descriptor_cpu, shader_visbile_descriptor_gpu) !=
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
WriteEDRAMUint32UAVDescriptor(shader_visbile_descriptor_cpu);
UINT clear_value[4] = {0, 0, 0, 0};
D3D12_RECT clear_rect;
clear_rect.left = kEDRAMSize >> 2;
clear_rect.top = 0;
clear_rect.right = (kEDRAMSize >> 2) << 1;
clear_rect.bottom = 1;
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
command_processor_->SubmitBarriers();
// ClearUnorderedAccessView takes a shader-visible GPU descriptor and a
// non-shader-visible CPU descriptor.
command_list->D3DClearUnorderedAccessViewUint(
shader_visbile_descriptor_gpu,
provider->OffsetViewDescriptor(
edram_buffer_descriptor_heap_start_,
uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)),
edram_buffer_, clear_value, 1, &clear_rect);
} else {
XELOGE("Failed to get a UAV descriptor for invalidating 32-bit depth");
}
}
}
uint32_t RenderTargetCache::GetEDRAMBufferSize() const {
uint32_t size = 2048 * 5120;
if (!command_processor_->IsROVUsedForEDRAM()) {
@ -2215,10 +2331,14 @@ void RenderTargetCache::TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state) {
command_processor_->PushTransitionBarrier(edram_buffer_, edram_buffer_state_,
new_state);
edram_buffer_state_ = new_state;
if (new_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
edram_buffer_modified_ = false;
}
}
void RenderTargetCache::CommitEDRAMBufferUAVWrites(bool force) {
if (edram_buffer_modified_ || force) {
if ((edram_buffer_modified_ || force) &&
edram_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
command_processor_->PushUAVBarrier(edram_buffer_);
}
edram_buffer_modified_ = false;

View File

@ -10,6 +10,7 @@
#ifndef XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
#define XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
#include <memory>
#include <unordered_map>
#include "xenia/base/cvar.h"
@ -21,6 +22,7 @@
#include "xenia/gpu/xenos.h"
#include "xenia/memory.h"
#include "xenia/ui/d3d12/d3d12_api.h"
#include "xenia/ui/d3d12/pools.h"
DECLARE_bool(d3d12_16bit_rtv_full_range);
@ -301,6 +303,11 @@ class RenderTargetCache {
: DXGI_FORMAT_D24_UNORM_S8_UINT;
}
// Returns true if any downloads were submitted to the command processor.
bool InitializeTraceSubmitDownloads();
void InitializeTraceCompleteDownloads();
void RestoreEDRAMSnapshot(const void* snapshot);
private:
enum class EDRAMLoadStoreMode {
kColor32bpp,
@ -673,6 +680,11 @@ class RenderTargetCache {
#else
std::unordered_map<uint32_t, ResolveTarget*> resolve_targets_;
#endif
// For traces only.
ID3D12Resource* edram_snapshot_download_buffer_ = nullptr;
std::unique_ptr<ui::d3d12::UploadBufferPool> edram_snapshot_restore_pool_ =
nullptr;
};
} // namespace d3d12

View File

@ -21,6 +21,8 @@ NullCommandProcessor::~NullCommandProcessor() = default;
void NullCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
uint32_t length) {}
void NullCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {}
bool NullCommandProcessor::SetupContext() {
return CommandProcessor::SetupContext();
}

View File

@ -27,6 +27,8 @@ class NullCommandProcessor : public CommandProcessor {
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
void RestoreEDRAMSnapshot(const void* snapshot) override;
private:
bool SetupContext() override;
void ShutdownContext() override;

View File

@ -32,7 +32,7 @@ TracePlayer::TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system)
playback_event_ = xe::threading::Event::CreateAutoResetEvent(false);
}
TracePlayer::~TracePlayer() = default;
TracePlayer::~TracePlayer() { delete[] edram_snapshot_; }
const TraceReader::Frame* TracePlayer::current_frame() const {
if (current_frame_index_ >= frame_count()) {
@ -186,6 +186,19 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
trace_ptr += cmd->encoded_length;
break;
}
case TraceCommandType::kEDRAMSnapshot: {
auto cmd = reinterpret_cast<const EDRAMSnapshotCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd);
const size_t kEDRAMSize = 10 * 1024 * 1024;
if (!edram_snapshot_) {
edram_snapshot_ = new uint8_t[kEDRAMSize];
}
DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length,
edram_snapshot_, kEDRAMSize);
trace_ptr += cmd->encoded_length;
command_processor->RestoreEDRAMSnapshot(edram_snapshot_);
break;
}
case TraceCommandType::kEvent: {
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd);

View File

@ -61,6 +61,7 @@ class TracePlayer : public TraceReader {
bool playing_trace_ = false;
std::atomic<uint32_t> playback_percent_ = {0};
std::unique_ptr<xe::threading::Event> playback_event_;
uint8_t* edram_snapshot_ = nullptr;
};
} // namespace gpu

View File

@ -51,6 +51,7 @@ enum class TraceCommandType : uint32_t {
kPacketEnd,
kMemoryRead,
kMemoryWrite,
kEDRAMSnapshot,
kEvent,
};
@ -110,6 +111,18 @@ struct MemoryCommand {
uint32_t decoded_length;
};
// Represents a full 10 MB snapshot of EDRAM contents, for trace initialization
// (since replaying the trace will reconstruct its state at any point later) as
// a sequence of tiles with row-major samples (2x multisampling as 1x2 samples,
// 4x as 2x2 samples).
struct EDRAMSnapshotCommand {
TraceCommandType type;
// Encoding format of the data in the trace file.
MemoryEncodingFormat encoding_format;
// Number of bytes the data occupies in the trace file in its encoded form.
uint32_t encoded_length;
};
// Represents a GPU event of EventCommand::Type.
struct EventCommand {
TraceCommandType type;

View File

@ -190,6 +190,11 @@ void TraceReader::ParseTrace() {
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
break;
}
case TraceCommandType::kEDRAMSnapshot: {
auto cmd = reinterpret_cast<const EDRAMSnapshotCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
break;
}
case TraceCommandType::kEvent: {
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd);

View File

@ -372,6 +372,12 @@ void TraceViewer::DrawPacketDisassemblerUI() {
// ImGui::BulletText("MemoryWrite");
break;
}
case TraceCommandType::kEDRAMSnapshot: {
auto cmd = reinterpret_cast<const EDRAMSnapshotCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
// ImGui::BulletText("EDRAMSnapshot");
break;
}
case TraceCommandType::kEvent: {
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd);

View File

@ -229,6 +229,37 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
}
}
void TraceWriter::WriteEDRAMSnapshot(const void* snapshot) {
const uint32_t kEDRAMSize = 10 * 1024 * 1024;
EDRAMSnapshotCommand cmd;
cmd.type = TraceCommandType::kEDRAMSnapshot;
if (compress_output_) {
// Write the header now so we reserve space in the buffer.
long header_position = std::ftell(file_);
cmd.encoding_format = MemoryEncodingFormat::kSnappy;
fwrite(&cmd, 1, sizeof(cmd), file_);
// Stream the content right to the buffer.
snappy::ByteArraySource snappy_source(
reinterpret_cast<const char*>(snapshot), kEDRAMSize);
SnappySink snappy_sink(file_);
cmd.encoded_length =
static_cast<uint32_t>(snappy::Compress(&snappy_source, &snappy_sink));
// Seek back and overwrite the header with our final size.
std::fseek(file_, header_position, SEEK_SET);
fwrite(&cmd, 1, sizeof(cmd), file_);
std::fseek(file_, header_position + sizeof(cmd) + cmd.encoded_length,
SEEK_SET);
} else {
// Uncompressed - write buffer directly to the file.
cmd.encoding_format = MemoryEncodingFormat::kNone;
cmd.encoded_length = kEDRAMSize;
fwrite(&cmd, 1, sizeof(cmd), file_);
fwrite(snapshot, 1, kEDRAMSize, file_);
}
}
void TraceWriter::WriteEvent(EventCommand::Type event_type) {
if (!file_) {
return;

View File

@ -42,6 +42,7 @@ class TraceWriter {
void WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length);
void WriteMemoryWrite(uint32_t base_ptr, size_t length,
const void* host_ptr = nullptr);
void WriteEDRAMSnapshot(const void* snapshot);
void WriteEvent(EventCommand::Type event_type);
private:

View File

@ -21,6 +21,8 @@ VulkanCommandProcessor::~VulkanCommandProcessor() = default;
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
uint32_t length) {}
void VulkanCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {}
bool VulkanCommandProcessor::SetupContext() { return true; }
void VulkanCommandProcessor::ShutdownContext() {}

View File

@ -26,6 +26,8 @@ class VulkanCommandProcessor : public CommandProcessor {
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
void RestoreEDRAMSnapshot(const void* snapshot) override;
protected:
bool SetupContext() override;
void ShutdownContext() override;

View File

@ -51,6 +51,8 @@ void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
uint32_t length) {}
void VulkanCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {}
void VulkanCommandProcessor::ClearCaches() {
CommandProcessor::ClearCaches();
cache_clear_requested_ = true;

View File

@ -52,6 +52,7 @@ class VulkanCommandProcessor : public CommandProcessor {
void RequestFrameTrace(const std::wstring& root_path) override;
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
void RestoreEDRAMSnapshot(const void* snapshot) override;
void ClearCaches() override;
RenderCache* render_cache() { return render_cache_.get(); }