[GPU] Store an EDRAM snapshot in traces
This commit is contained in:
parent
1bb3cd45ca
commit
c499229455
|
@ -136,6 +136,8 @@ class CommandProcessor {
|
||||||
|
|
||||||
virtual void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) = 0;
|
virtual void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) = 0;
|
||||||
|
|
||||||
|
virtual void RestoreEDRAMSnapshot(const void* snapshot) = 0;
|
||||||
|
|
||||||
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count);
|
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count);
|
||||||
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
|
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
|
||||||
|
|
||||||
|
|
|
@ -87,6 +87,12 @@ void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
||||||
primitive_converter_->MemoryWriteCallback(base_ptr, length, true);
|
primitive_converter_->MemoryWriteCallback(base_ptr, length, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {
|
||||||
|
// Starting a new frame because descriptors may be needed.
|
||||||
|
BeginSubmission(true);
|
||||||
|
render_target_cache_->RestoreEDRAMSnapshot(snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::IsROVUsedForEDRAM() const {
|
bool D3D12CommandProcessor::IsROVUsedForEDRAM() const {
|
||||||
if (!cvars::d3d12_edram_rov) {
|
if (!cvars::d3d12_edram_rov) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -699,8 +705,6 @@ std::unique_ptr<xe::ui::RawImage> D3D12CommandProcessor::Capture() {
|
||||||
i * swap_texture_copy_footprint_.Footprint.RowPitch,
|
i * swap_texture_copy_footprint_.Footprint.RowPitch,
|
||||||
raw_image->stride);
|
raw_image->stride);
|
||||||
}
|
}
|
||||||
D3D12_RANGE readback_written_range = {};
|
|
||||||
gamma_ramp_upload_->Unmap(0, &readback_written_range);
|
|
||||||
return raw_image;
|
return raw_image;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1720,13 +1724,23 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
|
|
||||||
void D3D12CommandProcessor::InitializeTrace() {
|
void D3D12CommandProcessor::InitializeTrace() {
|
||||||
BeginSubmission(false);
|
BeginSubmission(false);
|
||||||
bool any_downloads_submitted = false;
|
bool render_target_cache_submitted =
|
||||||
any_downloads_submitted |= shared_memory_->InitializeTraceSubmitDownloads();
|
render_target_cache_->InitializeTraceSubmitDownloads();
|
||||||
if (!any_downloads_submitted || !EndSubmission(false)) {
|
bool shared_memory_submitted =
|
||||||
|
shared_memory_->InitializeTraceSubmitDownloads();
|
||||||
|
if (!render_target_cache_submitted && !shared_memory_submitted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!EndSubmission(false)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
AwaitAllSubmissionsCompletion();
|
AwaitAllSubmissionsCompletion();
|
||||||
shared_memory_->InitializeTraceCompleteDownloads();
|
if (render_target_cache_submitted) {
|
||||||
|
render_target_cache_->InitializeTraceCompleteDownloads();
|
||||||
|
}
|
||||||
|
if (shared_memory_submitted) {
|
||||||
|
shared_memory_->InitializeTraceCompleteDownloads();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D12CommandProcessor::FinalizeTrace() {}
|
void D3D12CommandProcessor::FinalizeTrace() {}
|
||||||
|
|
|
@ -48,6 +48,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
||||||
|
|
||||||
|
void RestoreEDRAMSnapshot(const void* snapshot) override;
|
||||||
|
|
||||||
// Needed by everything that owns transient objects.
|
// Needed by everything that owns transient objects.
|
||||||
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
|
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
|
||||||
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
|
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
|
||||||
|
|
|
@ -38,6 +38,15 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
|
||||||
stream += header_size;
|
stream += header_size;
|
||||||
stream_remaining -= header_size;
|
stream_remaining -= header_size;
|
||||||
switch (Command(header[0])) {
|
switch (Command(header[0])) {
|
||||||
|
case Command::kD3DClearUnorderedAccessViewUint: {
|
||||||
|
auto& args =
|
||||||
|
*reinterpret_cast<const ClearUnorderedAccessViewHeader*>(stream);
|
||||||
|
command_list->ClearUnorderedAccessViewUint(
|
||||||
|
args.view_gpu_handle_in_current_heap, args.view_cpu_handle,
|
||||||
|
args.resource, args.values_uint, args.num_rects,
|
||||||
|
args.num_rects ? reinterpret_cast<const D3D12_RECT*>(&args + 1)
|
||||||
|
: nullptr);
|
||||||
|
} break;
|
||||||
case Command::kD3DCopyBufferRegion: {
|
case Command::kD3DCopyBufferRegion: {
|
||||||
auto& args =
|
auto& args =
|
||||||
*reinterpret_cast<const D3DCopyBufferRegionArguments*>(stream);
|
*reinterpret_cast<const D3DCopyBufferRegionArguments*>(stream);
|
||||||
|
|
|
@ -32,6 +32,24 @@ class DeferredCommandList {
|
||||||
void Execute(ID3D12GraphicsCommandList* command_list,
|
void Execute(ID3D12GraphicsCommandList* command_list,
|
||||||
ID3D12GraphicsCommandList1* command_list_1);
|
ID3D12GraphicsCommandList1* command_list_1);
|
||||||
|
|
||||||
|
inline void D3DClearUnorderedAccessViewUint(
|
||||||
|
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap,
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource,
|
||||||
|
const UINT values[4], UINT num_rects, const D3D12_RECT* rects) {
|
||||||
|
auto args = reinterpret_cast<ClearUnorderedAccessViewHeader*>(
|
||||||
|
WriteCommand(Command::kD3DClearUnorderedAccessViewUint,
|
||||||
|
sizeof(ClearUnorderedAccessViewHeader) +
|
||||||
|
num_rects * sizeof(D3D12_RECT)));
|
||||||
|
args->view_gpu_handle_in_current_heap = view_gpu_handle_in_current_heap;
|
||||||
|
args->view_cpu_handle = view_cpu_handle;
|
||||||
|
args->resource = resource;
|
||||||
|
std::memcpy(args->values_uint, values, 4 * sizeof(UINT));
|
||||||
|
args->num_rects = num_rects;
|
||||||
|
if (num_rects != 0) {
|
||||||
|
std::memcpy(args + 1, rects, num_rects * sizeof(D3D12_RECT));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
|
inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
|
||||||
ID3D12Resource* src_buffer, UINT64 src_offset,
|
ID3D12Resource* src_buffer, UINT64 src_offset,
|
||||||
UINT64 num_bytes) {
|
UINT64 num_bytes) {
|
||||||
|
@ -303,6 +321,7 @@ class DeferredCommandList {
|
||||||
static constexpr size_t kAlignment = std::max(sizeof(void*), sizeof(UINT64));
|
static constexpr size_t kAlignment = std::max(sizeof(void*), sizeof(UINT64));
|
||||||
|
|
||||||
enum class Command : uint32_t {
|
enum class Command : uint32_t {
|
||||||
|
kD3DClearUnorderedAccessViewUint,
|
||||||
kD3DCopyBufferRegion,
|
kD3DCopyBufferRegion,
|
||||||
kD3DCopyResource,
|
kD3DCopyResource,
|
||||||
kCopyTexture,
|
kCopyTexture,
|
||||||
|
@ -331,6 +350,17 @@ class DeferredCommandList {
|
||||||
kD3DSetSamplePositions,
|
kD3DSetSamplePositions,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ClearUnorderedAccessViewHeader {
|
||||||
|
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap;
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle;
|
||||||
|
ID3D12Resource* resource;
|
||||||
|
union {
|
||||||
|
float values_float[4];
|
||||||
|
UINT values_uint[4];
|
||||||
|
};
|
||||||
|
UINT num_rects;
|
||||||
|
};
|
||||||
|
|
||||||
struct D3DCopyBufferRegionArguments {
|
struct D3DCopyBufferRegionArguments {
|
||||||
ID3D12Resource* dst_buffer;
|
ID3D12Resource* dst_buffer;
|
||||||
UINT64 dst_offset;
|
UINT64 dst_offset;
|
||||||
|
|
|
@ -391,6 +391,8 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
|
||||||
void RenderTargetCache::Shutdown() {
|
void RenderTargetCache::Shutdown() {
|
||||||
ClearCache();
|
ClearCache();
|
||||||
|
|
||||||
|
edram_snapshot_restore_pool_.reset();
|
||||||
|
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
|
||||||
for (auto& resolve_pipeline : resolve_pipelines_) {
|
for (auto& resolve_pipeline : resolve_pipelines_) {
|
||||||
resolve_pipeline.pipeline->Release();
|
resolve_pipeline.pipeline->Release();
|
||||||
}
|
}
|
||||||
|
@ -449,9 +451,16 @@ void RenderTargetCache::ClearCache() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
edram_snapshot_restore_pool_.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderTargetCache::BeginSubmission() {
|
void RenderTargetCache::BeginSubmission() {
|
||||||
|
if (edram_snapshot_restore_pool_) {
|
||||||
|
edram_snapshot_restore_pool_->Reclaim(
|
||||||
|
command_processor_->GetCompletedSubmission());
|
||||||
|
}
|
||||||
|
|
||||||
// With the ROV, a submission does not always end in a resolve (for example,
|
// With the ROV, a submission does not always end in a resolve (for example,
|
||||||
// when memexport readback happens) or something else that would surely submit
|
// when memexport readback happens) or something else that would surely submit
|
||||||
// the UAV barrier, so we need to preserve the `current_` variables.
|
// the UAV barrier, so we need to preserve the `current_` variables.
|
||||||
|
@ -2197,6 +2206,113 @@ DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat(
|
||||||
return DXGI_FORMAT_UNKNOWN;
|
return DXGI_FORMAT_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RenderTargetCache::InitializeTraceSubmitDownloads() {
|
||||||
|
if (resolution_scale_2x_) {
|
||||||
|
// No 1:1 mapping.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const uint32_t kEDRAMSize = 2048 * 5120;
|
||||||
|
if (!edram_snapshot_download_buffer_) {
|
||||||
|
D3D12_RESOURCE_DESC edram_snapshot_download_buffer_desc;
|
||||||
|
ui::d3d12::util::FillBufferResourceDesc(edram_snapshot_download_buffer_desc,
|
||||||
|
kEDRAMSize,
|
||||||
|
D3D12_RESOURCE_FLAG_NONE);
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
|
if (FAILED(device->CreateCommittedResource(
|
||||||
|
&ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE,
|
||||||
|
&edram_snapshot_download_buffer_desc,
|
||||||
|
D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
|
||||||
|
IID_PPV_ARGS(&edram_snapshot_download_buffer_)))) {
|
||||||
|
XELOGE("Failed to create a EDRAM snapshot download buffer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto command_list = command_processor_->GetDeferredCommandList();
|
||||||
|
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
|
command_list->D3DCopyBufferRegion(edram_snapshot_download_buffer_, 0,
|
||||||
|
edram_buffer_, 0, kEDRAMSize);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderTargetCache::InitializeTraceCompleteDownloads() {
|
||||||
|
if (!edram_snapshot_download_buffer_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void* download_mapping;
|
||||||
|
if (SUCCEEDED(edram_snapshot_download_buffer_->Map(0, nullptr,
|
||||||
|
&download_mapping))) {
|
||||||
|
trace_writer_->WriteEDRAMSnapshot(download_mapping);
|
||||||
|
D3D12_RANGE download_write_range = {};
|
||||||
|
edram_snapshot_download_buffer_->Unmap(0, &download_write_range);
|
||||||
|
} else {
|
||||||
|
XELOGE("Failed to map the EDRAM snapshot download buffer");
|
||||||
|
}
|
||||||
|
edram_snapshot_download_buffer_->Release();
|
||||||
|
edram_snapshot_download_buffer_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
|
||||||
|
if (resolution_scale_2x_) {
|
||||||
|
// No 1:1 mapping.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
|
auto device = provider->GetDevice();
|
||||||
|
const uint32_t kEDRAMSize = 2048 * 5120;
|
||||||
|
if (!edram_snapshot_restore_pool_) {
|
||||||
|
edram_snapshot_restore_pool_ =
|
||||||
|
std::make_unique<ui::d3d12::UploadBufferPool>(device, kEDRAMSize);
|
||||||
|
}
|
||||||
|
ID3D12Resource* upload_buffer;
|
||||||
|
uint32_t upload_buffer_offset;
|
||||||
|
void* upload_buffer_mapping = edram_snapshot_restore_pool_->Request(
|
||||||
|
command_processor_->GetCurrentSubmission(), kEDRAMSize, &upload_buffer,
|
||||||
|
&upload_buffer_offset, nullptr);
|
||||||
|
if (!upload_buffer_mapping) {
|
||||||
|
XELOGE("Failed to get a buffer for restoring a EDRAM snapshot");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::memcpy(upload_buffer_mapping, snapshot, kEDRAMSize);
|
||||||
|
auto command_list = command_processor_->GetDeferredCommandList();
|
||||||
|
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_COPY_DEST);
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
|
command_list->D3DCopyBufferRegion(edram_buffer_, 0, upload_buffer,
|
||||||
|
upload_buffer_offset, kEDRAMSize);
|
||||||
|
if (!command_processor_->IsROVUsedForEDRAM()) {
|
||||||
|
// Clear and ignore the old 32-bit float depth - the non-ROV path is
|
||||||
|
// inaccurate anyway, and this is backend-specific, not a part of a guest
|
||||||
|
// trace.
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_cpu;
|
||||||
|
D3D12_GPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_gpu;
|
||||||
|
if (command_processor_->RequestViewDescriptors(
|
||||||
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1,
|
||||||
|
shader_visbile_descriptor_cpu, shader_visbile_descriptor_gpu) !=
|
||||||
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||||
|
WriteEDRAMUint32UAVDescriptor(shader_visbile_descriptor_cpu);
|
||||||
|
UINT clear_value[4] = {0, 0, 0, 0};
|
||||||
|
D3D12_RECT clear_rect;
|
||||||
|
clear_rect.left = kEDRAMSize >> 2;
|
||||||
|
clear_rect.top = 0;
|
||||||
|
clear_rect.right = (kEDRAMSize >> 2) << 1;
|
||||||
|
clear_rect.bottom = 1;
|
||||||
|
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
|
// ClearUnorderedAccessView takes a shader-visible GPU descriptor and a
|
||||||
|
// non-shader-visible CPU descriptor.
|
||||||
|
command_list->D3DClearUnorderedAccessViewUint(
|
||||||
|
shader_visbile_descriptor_gpu,
|
||||||
|
provider->OffsetViewDescriptor(
|
||||||
|
edram_buffer_descriptor_heap_start_,
|
||||||
|
uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)),
|
||||||
|
edram_buffer_, clear_value, 1, &clear_rect);
|
||||||
|
} else {
|
||||||
|
XELOGE("Failed to get a UAV descriptor for invalidating 32-bit depth");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t RenderTargetCache::GetEDRAMBufferSize() const {
|
uint32_t RenderTargetCache::GetEDRAMBufferSize() const {
|
||||||
uint32_t size = 2048 * 5120;
|
uint32_t size = 2048 * 5120;
|
||||||
if (!command_processor_->IsROVUsedForEDRAM()) {
|
if (!command_processor_->IsROVUsedForEDRAM()) {
|
||||||
|
@ -2215,10 +2331,14 @@ void RenderTargetCache::TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state) {
|
||||||
command_processor_->PushTransitionBarrier(edram_buffer_, edram_buffer_state_,
|
command_processor_->PushTransitionBarrier(edram_buffer_, edram_buffer_state_,
|
||||||
new_state);
|
new_state);
|
||||||
edram_buffer_state_ = new_state;
|
edram_buffer_state_ = new_state;
|
||||||
|
if (new_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||||
|
edram_buffer_modified_ = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderTargetCache::CommitEDRAMBufferUAVWrites(bool force) {
|
void RenderTargetCache::CommitEDRAMBufferUAVWrites(bool force) {
|
||||||
if (edram_buffer_modified_ || force) {
|
if ((edram_buffer_modified_ || force) &&
|
||||||
|
edram_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||||
command_processor_->PushUAVBarrier(edram_buffer_);
|
command_processor_->PushUAVBarrier(edram_buffer_);
|
||||||
}
|
}
|
||||||
edram_buffer_modified_ = false;
|
edram_buffer_modified_ = false;
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#ifndef XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
|
#ifndef XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
|
||||||
#define XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
|
#define XENIA_GPU_D3D12_RENDER_TARGET_CACHE_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "xenia/base/cvar.h"
|
#include "xenia/base/cvar.h"
|
||||||
|
@ -21,6 +22,7 @@
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/memory.h"
|
#include "xenia/memory.h"
|
||||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||||
|
#include "xenia/ui/d3d12/pools.h"
|
||||||
|
|
||||||
DECLARE_bool(d3d12_16bit_rtv_full_range);
|
DECLARE_bool(d3d12_16bit_rtv_full_range);
|
||||||
|
|
||||||
|
@ -301,6 +303,11 @@ class RenderTargetCache {
|
||||||
: DXGI_FORMAT_D24_UNORM_S8_UINT;
|
: DXGI_FORMAT_D24_UNORM_S8_UINT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns true if any downloads were submitted to the command processor.
|
||||||
|
bool InitializeTraceSubmitDownloads();
|
||||||
|
void InitializeTraceCompleteDownloads();
|
||||||
|
void RestoreEDRAMSnapshot(const void* snapshot);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum class EDRAMLoadStoreMode {
|
enum class EDRAMLoadStoreMode {
|
||||||
kColor32bpp,
|
kColor32bpp,
|
||||||
|
@ -673,6 +680,11 @@ class RenderTargetCache {
|
||||||
#else
|
#else
|
||||||
std::unordered_map<uint32_t, ResolveTarget*> resolve_targets_;
|
std::unordered_map<uint32_t, ResolveTarget*> resolve_targets_;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// For traces only.
|
||||||
|
ID3D12Resource* edram_snapshot_download_buffer_ = nullptr;
|
||||||
|
std::unique_ptr<ui::d3d12::UploadBufferPool> edram_snapshot_restore_pool_ =
|
||||||
|
nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -21,6 +21,8 @@ NullCommandProcessor::~NullCommandProcessor() = default;
|
||||||
void NullCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
void NullCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
||||||
uint32_t length) {}
|
uint32_t length) {}
|
||||||
|
|
||||||
|
void NullCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {}
|
||||||
|
|
||||||
bool NullCommandProcessor::SetupContext() {
|
bool NullCommandProcessor::SetupContext() {
|
||||||
return CommandProcessor::SetupContext();
|
return CommandProcessor::SetupContext();
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,8 @@ class NullCommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
||||||
|
|
||||||
|
void RestoreEDRAMSnapshot(const void* snapshot) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool SetupContext() override;
|
bool SetupContext() override;
|
||||||
void ShutdownContext() override;
|
void ShutdownContext() override;
|
||||||
|
|
|
@ -32,7 +32,7 @@ TracePlayer::TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system)
|
||||||
playback_event_ = xe::threading::Event::CreateAutoResetEvent(false);
|
playback_event_ = xe::threading::Event::CreateAutoResetEvent(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
TracePlayer::~TracePlayer() = default;
|
TracePlayer::~TracePlayer() { delete[] edram_snapshot_; }
|
||||||
|
|
||||||
const TraceReader::Frame* TracePlayer::current_frame() const {
|
const TraceReader::Frame* TracePlayer::current_frame() const {
|
||||||
if (current_frame_index_ >= frame_count()) {
|
if (current_frame_index_ >= frame_count()) {
|
||||||
|
@ -186,6 +186,19 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
|
||||||
trace_ptr += cmd->encoded_length;
|
trace_ptr += cmd->encoded_length;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case TraceCommandType::kEDRAMSnapshot: {
|
||||||
|
auto cmd = reinterpret_cast<const EDRAMSnapshotCommand*>(trace_ptr);
|
||||||
|
trace_ptr += sizeof(*cmd);
|
||||||
|
const size_t kEDRAMSize = 10 * 1024 * 1024;
|
||||||
|
if (!edram_snapshot_) {
|
||||||
|
edram_snapshot_ = new uint8_t[kEDRAMSize];
|
||||||
|
}
|
||||||
|
DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length,
|
||||||
|
edram_snapshot_, kEDRAMSize);
|
||||||
|
trace_ptr += cmd->encoded_length;
|
||||||
|
command_processor->RestoreEDRAMSnapshot(edram_snapshot_);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case TraceCommandType::kEvent: {
|
case TraceCommandType::kEvent: {
|
||||||
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
|
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
|
||||||
trace_ptr += sizeof(*cmd);
|
trace_ptr += sizeof(*cmd);
|
||||||
|
|
|
@ -61,6 +61,7 @@ class TracePlayer : public TraceReader {
|
||||||
bool playing_trace_ = false;
|
bool playing_trace_ = false;
|
||||||
std::atomic<uint32_t> playback_percent_ = {0};
|
std::atomic<uint32_t> playback_percent_ = {0};
|
||||||
std::unique_ptr<xe::threading::Event> playback_event_;
|
std::unique_ptr<xe::threading::Event> playback_event_;
|
||||||
|
uint8_t* edram_snapshot_ = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
|
|
@ -51,6 +51,7 @@ enum class TraceCommandType : uint32_t {
|
||||||
kPacketEnd,
|
kPacketEnd,
|
||||||
kMemoryRead,
|
kMemoryRead,
|
||||||
kMemoryWrite,
|
kMemoryWrite,
|
||||||
|
kEDRAMSnapshot,
|
||||||
kEvent,
|
kEvent,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -110,6 +111,18 @@ struct MemoryCommand {
|
||||||
uint32_t decoded_length;
|
uint32_t decoded_length;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Represents a full 10 MB snapshot of EDRAM contents, for trace initialization
|
||||||
|
// (since replaying the trace will reconstruct its state at any point later) as
|
||||||
|
// a sequence of tiles with row-major samples (2x multisampling as 1x2 samples,
|
||||||
|
// 4x as 2x2 samples).
|
||||||
|
struct EDRAMSnapshotCommand {
|
||||||
|
TraceCommandType type;
|
||||||
|
// Encoding format of the data in the trace file.
|
||||||
|
MemoryEncodingFormat encoding_format;
|
||||||
|
// Number of bytes the data occupies in the trace file in its encoded form.
|
||||||
|
uint32_t encoded_length;
|
||||||
|
};
|
||||||
|
|
||||||
// Represents a GPU event of EventCommand::Type.
|
// Represents a GPU event of EventCommand::Type.
|
||||||
struct EventCommand {
|
struct EventCommand {
|
||||||
TraceCommandType type;
|
TraceCommandType type;
|
||||||
|
|
|
@ -190,6 +190,11 @@ void TraceReader::ParseTrace() {
|
||||||
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
|
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case TraceCommandType::kEDRAMSnapshot: {
|
||||||
|
auto cmd = reinterpret_cast<const EDRAMSnapshotCommand*>(trace_ptr);
|
||||||
|
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case TraceCommandType::kEvent: {
|
case TraceCommandType::kEvent: {
|
||||||
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
|
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
|
||||||
trace_ptr += sizeof(*cmd);
|
trace_ptr += sizeof(*cmd);
|
||||||
|
|
|
@ -372,6 +372,12 @@ void TraceViewer::DrawPacketDisassemblerUI() {
|
||||||
// ImGui::BulletText("MemoryWrite");
|
// ImGui::BulletText("MemoryWrite");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case TraceCommandType::kEDRAMSnapshot: {
|
||||||
|
auto cmd = reinterpret_cast<const EDRAMSnapshotCommand*>(trace_ptr);
|
||||||
|
trace_ptr += sizeof(*cmd) + cmd->encoded_length;
|
||||||
|
// ImGui::BulletText("EDRAMSnapshot");
|
||||||
|
break;
|
||||||
|
}
|
||||||
case TraceCommandType::kEvent: {
|
case TraceCommandType::kEvent: {
|
||||||
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
|
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
|
||||||
trace_ptr += sizeof(*cmd);
|
trace_ptr += sizeof(*cmd);
|
||||||
|
|
|
@ -229,6 +229,37 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TraceWriter::WriteEDRAMSnapshot(const void* snapshot) {
|
||||||
|
const uint32_t kEDRAMSize = 10 * 1024 * 1024;
|
||||||
|
EDRAMSnapshotCommand cmd;
|
||||||
|
cmd.type = TraceCommandType::kEDRAMSnapshot;
|
||||||
|
if (compress_output_) {
|
||||||
|
// Write the header now so we reserve space in the buffer.
|
||||||
|
long header_position = std::ftell(file_);
|
||||||
|
cmd.encoding_format = MemoryEncodingFormat::kSnappy;
|
||||||
|
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||||
|
|
||||||
|
// Stream the content right to the buffer.
|
||||||
|
snappy::ByteArraySource snappy_source(
|
||||||
|
reinterpret_cast<const char*>(snapshot), kEDRAMSize);
|
||||||
|
SnappySink snappy_sink(file_);
|
||||||
|
cmd.encoded_length =
|
||||||
|
static_cast<uint32_t>(snappy::Compress(&snappy_source, &snappy_sink));
|
||||||
|
|
||||||
|
// Seek back and overwrite the header with our final size.
|
||||||
|
std::fseek(file_, header_position, SEEK_SET);
|
||||||
|
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||||
|
std::fseek(file_, header_position + sizeof(cmd) + cmd.encoded_length,
|
||||||
|
SEEK_SET);
|
||||||
|
} else {
|
||||||
|
// Uncompressed - write buffer directly to the file.
|
||||||
|
cmd.encoding_format = MemoryEncodingFormat::kNone;
|
||||||
|
cmd.encoded_length = kEDRAMSize;
|
||||||
|
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||||
|
fwrite(snapshot, 1, kEDRAMSize, file_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TraceWriter::WriteEvent(EventCommand::Type event_type) {
|
void TraceWriter::WriteEvent(EventCommand::Type event_type) {
|
||||||
if (!file_) {
|
if (!file_) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -42,6 +42,7 @@ class TraceWriter {
|
||||||
void WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length);
|
void WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length);
|
||||||
void WriteMemoryWrite(uint32_t base_ptr, size_t length,
|
void WriteMemoryWrite(uint32_t base_ptr, size_t length,
|
||||||
const void* host_ptr = nullptr);
|
const void* host_ptr = nullptr);
|
||||||
|
void WriteEDRAMSnapshot(const void* snapshot);
|
||||||
void WriteEvent(EventCommand::Type event_type);
|
void WriteEvent(EventCommand::Type event_type);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -21,6 +21,8 @@ VulkanCommandProcessor::~VulkanCommandProcessor() = default;
|
||||||
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
||||||
uint32_t length) {}
|
uint32_t length) {}
|
||||||
|
|
||||||
|
void VulkanCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {}
|
||||||
|
|
||||||
bool VulkanCommandProcessor::SetupContext() { return true; }
|
bool VulkanCommandProcessor::SetupContext() { return true; }
|
||||||
|
|
||||||
void VulkanCommandProcessor::ShutdownContext() {}
|
void VulkanCommandProcessor::ShutdownContext() {}
|
||||||
|
|
|
@ -26,6 +26,8 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
||||||
|
|
||||||
|
void RestoreEDRAMSnapshot(const void* snapshot) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool SetupContext() override;
|
bool SetupContext() override;
|
||||||
void ShutdownContext() override;
|
void ShutdownContext() override;
|
||||||
|
|
|
@ -51,6 +51,8 @@ void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
|
||||||
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
||||||
uint32_t length) {}
|
uint32_t length) {}
|
||||||
|
|
||||||
|
void VulkanCommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {}
|
||||||
|
|
||||||
void VulkanCommandProcessor::ClearCaches() {
|
void VulkanCommandProcessor::ClearCaches() {
|
||||||
CommandProcessor::ClearCaches();
|
CommandProcessor::ClearCaches();
|
||||||
cache_clear_requested_ = true;
|
cache_clear_requested_ = true;
|
||||||
|
|
|
@ -52,6 +52,7 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
void RequestFrameTrace(const std::wstring& root_path) override;
|
void RequestFrameTrace(const std::wstring& root_path) override;
|
||||||
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
||||||
|
void RestoreEDRAMSnapshot(const void* snapshot) override;
|
||||||
void ClearCaches() override;
|
void ClearCaches() override;
|
||||||
|
|
||||||
RenderCache* render_cache() { return render_cache_.get(); }
|
RenderCache* render_cache() { return render_cache_.get(); }
|
||||||
|
|
Loading…
Reference in New Issue