[D3D12] Trace guest memory operations
This commit is contained in:
parent
e07b0ed2ad
commit
4623b41023
|
@ -119,6 +119,8 @@ void CommandProcessor::EndTracing() {
|
|||
return;
|
||||
}
|
||||
assert_true(trace_state_ == TraceState::kStreaming);
|
||||
FinalizeTrace();
|
||||
trace_state_ = TraceState::kDisabled;
|
||||
trace_writer_.Close();
|
||||
}
|
||||
|
||||
|
@ -437,6 +439,7 @@ uint32_t CommandProcessor::ExecutePrimaryBuffer(uint32_t read_index,
|
|||
auto file_name = xe::format_string(L"%8X_stream.xtr", title_id);
|
||||
auto path = trace_stream_path_ + file_name;
|
||||
trace_writer_.Open(path, title_id);
|
||||
InitializeTrace();
|
||||
}
|
||||
|
||||
// Adjust pointer base.
|
||||
|
@ -738,6 +741,7 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) {
|
|||
trace_writer_.WriteEvent(EventCommand::Type::kSwap);
|
||||
trace_writer_.Flush();
|
||||
if (trace_state_ == TraceState::kSingleFrame) {
|
||||
FinalizeTrace();
|
||||
trace_state_ = TraceState::kDisabled;
|
||||
trace_writer_.Close();
|
||||
}
|
||||
|
@ -747,6 +751,7 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) {
|
|||
auto file_name = xe::format_string(L"%8X_%u.xtr", title_id, counter_ - 1);
|
||||
auto path = trace_frame_path_ + file_name;
|
||||
trace_writer_.Open(path, title_id);
|
||||
InitializeTrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -237,6 +237,9 @@ class CommandProcessor {
|
|||
IndexBufferInfo* index_buffer_info) = 0;
|
||||
virtual bool IssueCopy() = 0;
|
||||
|
||||
virtual void InitializeTrace() = 0;
|
||||
virtual void FinalizeTrace() = 0;
|
||||
|
||||
Memory* memory_ = nullptr;
|
||||
kernel::KernelState* kernel_state_ = nullptr;
|
||||
GraphicsSystem* graphics_system_ = nullptr;
|
||||
|
|
|
@ -672,7 +672,8 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
sampler_heap_pool_ = std::make_unique<ui::d3d12::DescriptorHeapPool>(
|
||||
context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048);
|
||||
|
||||
shared_memory_ = std::make_unique<SharedMemory>(this, memory_);
|
||||
shared_memory_ =
|
||||
std::make_unique<SharedMemory>(this, memory_, &trace_writer_);
|
||||
if (!shared_memory_->Initialize()) {
|
||||
XELOGE("Failed to initialize shared memory");
|
||||
return false;
|
||||
|
@ -700,8 +701,8 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
return false;
|
||||
}
|
||||
|
||||
primitive_converter_ =
|
||||
std::make_unique<PrimitiveConverter>(this, register_file_, memory_);
|
||||
primitive_converter_ = std::make_unique<PrimitiveConverter>(
|
||||
this, register_file_, memory_, &trace_writer_);
|
||||
if (!primitive_converter_->Initialize()) {
|
||||
XELOGE("Failed to initialize the geometric primitive converter");
|
||||
return false;
|
||||
|
@ -1655,6 +1656,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
return true;
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::InitializeTrace() {
|
||||
BeginFrame();
|
||||
bool anySubmitted = false;
|
||||
anySubmitted |= shared_memory_->InitializeTraceSubmitDownloads();
|
||||
if (anySubmitted) {
|
||||
EndFrame();
|
||||
GetD3D12Context()->AwaitAllFramesCompletion();
|
||||
shared_memory_->InitializeTraceCompleteDownloads();
|
||||
}
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::FinalizeTrace() {}
|
||||
|
||||
bool D3D12CommandProcessor::IssueCopy() {
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
|
|
@ -158,6 +158,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
IndexBufferInfo* index_buffer_info) override;
|
||||
bool IssueCopy() override;
|
||||
|
||||
void InitializeTrace() override;
|
||||
void FinalizeTrace() override;
|
||||
|
||||
private:
|
||||
enum RootParameter : UINT {
|
||||
// These are always present.
|
||||
|
|
|
@ -41,10 +41,12 @@ constexpr uint32_t PrimitiveConverter::kStaticIBTotalCount;
|
|||
|
||||
PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file,
|
||||
Memory* memory)
|
||||
Memory* memory,
|
||||
TraceWriter* trace_writer)
|
||||
: command_processor_(command_processor),
|
||||
register_file_(register_file),
|
||||
memory_(memory) {
|
||||
memory_(memory),
|
||||
trace_writer_(trace_writer) {
|
||||
system_page_size_ = uint32_t(memory::page_size());
|
||||
}
|
||||
|
||||
|
@ -248,6 +250,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
|
||||
address &= index_32bit ? 0x1FFFFFFC : 0x1FFFFFFE;
|
||||
uint32_t index_size = index_32bit ? sizeof(uint32_t) : sizeof(uint16_t);
|
||||
uint32_t index_buffer_size = index_size * index_count;
|
||||
uint32_t address_last = address + index_size * (index_count - 1);
|
||||
|
||||
// Create the cache entry, currently only for the key.
|
||||
|
@ -305,6 +308,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
if (source_type == PrimitiveType::kTriangleFan) {
|
||||
// Triangle fans are not supported by Direct3D 12 at all.
|
||||
conversion_needed = true;
|
||||
trace_writer_->WriteMemoryRead(address, index_buffer_size);
|
||||
if (reset) {
|
||||
uint32_t current_fan_index_count = 0;
|
||||
for (uint32_t i = 0; i < index_count; ++i) {
|
||||
|
@ -327,6 +331,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
// Check if the restart index is used at all in this buffer because reading
|
||||
// vertices from a default heap is faster than from an upload heap.
|
||||
conversion_needed = false;
|
||||
trace_writer_->WriteMemoryRead(address, index_buffer_size);
|
||||
#if XE_ARCH_AMD64
|
||||
// Will use SIMD to copy 16-byte blocks using _mm_or_si128.
|
||||
simd = true;
|
||||
|
@ -412,6 +417,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
#endif // XE_ARCH_AMD64
|
||||
} else if (source_type == PrimitiveType::kLineLoop) {
|
||||
conversion_needed = true;
|
||||
trace_writer_->WriteMemoryRead(address, index_buffer_size);
|
||||
if (reset) {
|
||||
reset_actually_used = false;
|
||||
uint32_t current_strip_index_count = 0;
|
||||
|
@ -437,6 +443,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
}
|
||||
} else if (source_type == PrimitiveType::kQuadList) {
|
||||
conversion_needed = true;
|
||||
trace_writer_->WriteMemoryRead(address, index_buffer_size);
|
||||
converted_index_count = (index_count >> 2) * 6;
|
||||
}
|
||||
converted_indices.converted_index_count = converted_index_count;
|
||||
|
@ -739,6 +746,12 @@ D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer(
|
|||
return D3D12_GPU_VIRTUAL_ADDRESS(0);
|
||||
}
|
||||
|
||||
void PrimitiveConverter::InitializeTrace() {
|
||||
// WriteMemoryRead must not be skipped.
|
||||
converted_indices_cache_.clear();
|
||||
memory_regions_used_ = 0;
|
||||
}
|
||||
|
||||
} // namespace d3d12
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <unordered_map>
|
||||
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||
|
@ -37,7 +38,8 @@ class D3D12CommandProcessor;
|
|||
class PrimitiveConverter {
|
||||
public:
|
||||
PrimitiveConverter(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file, Memory* memory);
|
||||
RegisterFile* register_file, Memory* memory,
|
||||
TraceWriter* trace_writer);
|
||||
~PrimitiveConverter();
|
||||
|
||||
bool Initialize();
|
||||
|
@ -80,6 +82,8 @@ class PrimitiveConverter {
|
|||
PrimitiveType source_type, uint32_t index_count,
|
||||
uint32_t& index_count_out) const;
|
||||
|
||||
void InitializeTrace();
|
||||
|
||||
private:
|
||||
// simd_offset is source address & 15 - if SIMD is used, the source and the
|
||||
// target must have the same alignment within one register. 0 is optimal when
|
||||
|
@ -98,6 +102,7 @@ class PrimitiveConverter {
|
|||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
Memory* memory_;
|
||||
TraceWriter* trace_writer_;
|
||||
|
||||
std::unique_ptr<ui::d3d12::UploadBufferPool> buffer_pool_ = nullptr;
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
|
@ -42,8 +43,10 @@ constexpr uint32_t SharedMemory::kWatchRangePoolSize;
|
|||
constexpr uint32_t SharedMemory::kWatchNodePoolSize;
|
||||
|
||||
SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor,
|
||||
Memory* memory)
|
||||
: command_processor_(command_processor), memory_(memory) {
|
||||
Memory* memory, TraceWriter* trace_writer)
|
||||
: command_processor_(command_processor),
|
||||
memory_(memory),
|
||||
trace_writer_(trace_writer) {
|
||||
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
|
||||
page_count_ = kBufferSize >> page_size_log2_;
|
||||
uint32_t page_bitmap_length = page_count_ >> 6;
|
||||
|
@ -133,10 +136,14 @@ bool SharedMemory::Initialize() {
|
|||
physical_write_watch_handle_ =
|
||||
memory_->RegisterPhysicalWriteWatch(MemoryWriteCallbackThunk, this);
|
||||
|
||||
ResetTraceGPUWrittenBuffer();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SharedMemory::Shutdown() {
|
||||
ResetTraceGPUWrittenBuffer();
|
||||
|
||||
// TODO(Triang3l): Do something in case any watches are still registered.
|
||||
|
||||
if (physical_write_watch_handle_ != nullptr) {
|
||||
|
@ -365,6 +372,8 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
|
|||
for (auto upload_range : upload_ranges_) {
|
||||
uint32_t upload_range_start = upload_range.first;
|
||||
uint32_t upload_range_length = upload_range.second;
|
||||
trace_writer_->WriteMemoryRead(upload_range_start << page_size_log2_,
|
||||
upload_range_length << page_size_log2_);
|
||||
while (upload_range_length != 0) {
|
||||
ID3D12Resource* upload_buffer;
|
||||
uint32_t upload_buffer_offset, upload_buffer_size;
|
||||
|
@ -376,7 +385,6 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
|
|||
return false;
|
||||
}
|
||||
uint32_t upload_buffer_pages = upload_buffer_size >> page_size_log2_;
|
||||
// No mutex holding here!
|
||||
MakeRangeValid(upload_range_start, upload_buffer_pages, false);
|
||||
std::memcpy(
|
||||
upload_buffer_mapping,
|
||||
|
@ -441,7 +449,6 @@ void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) {
|
|||
|
||||
// Mark the range as valid (so pages are not reuploaded until modified by the
|
||||
// CPU) and watch it so the CPU can reuse it and this will be caught.
|
||||
// No mutex holding here!
|
||||
MakeRangeValid(page_first, page_last - page_first + 1, true);
|
||||
}
|
||||
|
||||
|
@ -654,6 +661,157 @@ void SharedMemory::WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
|||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
bool SharedMemory::InitializeTraceSubmitDownloads() {
|
||||
// Invalidate the entire memory CPU->GPU memory copy so all the history
|
||||
// doesn't have to be written into every frame trace, and collect the list of
|
||||
// ranges with data modified on the GPU.
|
||||
ResetTraceGPUWrittenBuffer();
|
||||
uint32_t gpu_written_page_count = 0;
|
||||
|
||||
{
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
uint32_t fire_watches_range_start = UINT32_MAX;
|
||||
uint32_t gpu_written_range_start = UINT32_MAX;
|
||||
for (uint32_t i = 0; i * 2 < valid_and_gpu_written_pages_.size(); ++i) {
|
||||
uint64_t previously_valid_block = valid_and_gpu_written_pages_[i * 2];
|
||||
uint64_t gpu_written_block = valid_and_gpu_written_pages_[i * 2 + 1];
|
||||
valid_and_gpu_written_pages_[i * 2] = gpu_written_block;
|
||||
|
||||
// Fire watches on the invalidated pages.
|
||||
uint64_t fire_watches_block = previously_valid_block & ~gpu_written_block;
|
||||
uint64_t fire_watches_break_block = ~fire_watches_block;
|
||||
while (true) {
|
||||
uint32_t fire_watches_block_page;
|
||||
if (!xe::bit_scan_forward(fire_watches_range_start == UINT32_MAX
|
||||
? fire_watches_block
|
||||
: fire_watches_break_block,
|
||||
&fire_watches_block_page)) {
|
||||
break;
|
||||
}
|
||||
uint32_t fire_watches_page = (i << 6) + fire_watches_block_page;
|
||||
if (fire_watches_range_start == UINT32_MAX) {
|
||||
fire_watches_range_start = fire_watches_page;
|
||||
} else {
|
||||
FireWatches(fire_watches_range_start, fire_watches_page - 1, false);
|
||||
fire_watches_range_start = UINT32_MAX;
|
||||
}
|
||||
uint64_t fire_watches_block_mask =
|
||||
~((1ull << fire_watches_block_page) - 1);
|
||||
fire_watches_block &= fire_watches_block_mask;
|
||||
fire_watches_break_block &= fire_watches_block_mask;
|
||||
}
|
||||
|
||||
// Add to the GPU-written ranges.
|
||||
uint64_t gpu_written_break_block = ~gpu_written_block;
|
||||
while (true) {
|
||||
uint32_t gpu_written_block_page;
|
||||
if (!xe::bit_scan_forward(gpu_written_range_start == UINT32_MAX
|
||||
? gpu_written_block
|
||||
: gpu_written_break_block,
|
||||
&gpu_written_block_page)) {
|
||||
break;
|
||||
}
|
||||
uint32_t gpu_written_page = (i << 6) + gpu_written_block_page;
|
||||
if (gpu_written_range_start == UINT32_MAX) {
|
||||
gpu_written_range_start = gpu_written_page;
|
||||
} else {
|
||||
uint32_t gpu_written_range_length =
|
||||
gpu_written_page - gpu_written_range_start;
|
||||
trace_gpu_written_ranges_.push_back(
|
||||
std::make_pair(gpu_written_range_start << page_size_log2_,
|
||||
gpu_written_range_length << page_size_log2_));
|
||||
gpu_written_page_count += gpu_written_range_length;
|
||||
gpu_written_range_start = UINT32_MAX;
|
||||
}
|
||||
uint64_t gpu_written_block_mask =
|
||||
~((1ull << gpu_written_block_page) - 1);
|
||||
gpu_written_block &= gpu_written_block_mask;
|
||||
gpu_written_break_block &= gpu_written_block_mask;
|
||||
}
|
||||
}
|
||||
if (fire_watches_range_start != UINT32_MAX) {
|
||||
FireWatches(fire_watches_range_start, page_count_ - 1, false);
|
||||
}
|
||||
if (gpu_written_range_start != UINT32_MAX) {
|
||||
uint32_t gpu_written_range_length = page_count_ - gpu_written_range_start;
|
||||
trace_gpu_written_ranges_.push_back(
|
||||
std::make_pair(gpu_written_range_start << page_size_log2_,
|
||||
gpu_written_range_length << page_size_log2_));
|
||||
gpu_written_page_count += gpu_written_range_length;
|
||||
}
|
||||
}
|
||||
|
||||
// Request downloading of GPU-written memory.
|
||||
if (!gpu_written_page_count) {
|
||||
return false;
|
||||
}
|
||||
D3D12_RESOURCE_DESC gpu_written_buffer_desc;
|
||||
ui::d3d12::util::FillBufferResourceDesc(
|
||||
gpu_written_buffer_desc, gpu_written_page_count << page_size_log2_,
|
||||
D3D12_RESOURCE_FLAG_NONE);
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
if (FAILED(device->CreateCommittedResource(
|
||||
&ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE,
|
||||
&gpu_written_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
|
||||
IID_PPV_ARGS(&trace_gpu_written_buffer_)))) {
|
||||
XELOGE(
|
||||
"Failed to create a %u KB GPU-written memory download buffer for frame "
|
||||
"tracing",
|
||||
gpu_written_page_count << page_size_log2_ >> 10);
|
||||
ResetTraceGPUWrittenBuffer();
|
||||
return false;
|
||||
}
|
||||
auto command_list = command_processor_->GetDeferredCommandList();
|
||||
UseAsCopySource();
|
||||
command_processor_->SubmitBarriers();
|
||||
uint32_t gpu_written_buffer_offset = 0;
|
||||
for (auto& gpu_written_submit_range : trace_gpu_written_ranges_) {
|
||||
// For cases like resolution scale, when the data may not be actually
|
||||
// written, just marked as valid.
|
||||
if (!MakeTilesResident(gpu_written_submit_range.first,
|
||||
gpu_written_submit_range.second)) {
|
||||
gpu_written_submit_range.second = 0;
|
||||
continue;
|
||||
}
|
||||
command_list->D3DCopyBufferRegion(
|
||||
trace_gpu_written_buffer_, gpu_written_buffer_offset, buffer_,
|
||||
gpu_written_submit_range.first, gpu_written_submit_range.second);
|
||||
gpu_written_buffer_offset += gpu_written_submit_range.second;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SharedMemory::InitializeTraceCompleteDownloads() {
|
||||
if (!trace_gpu_written_buffer_) {
|
||||
return;
|
||||
}
|
||||
void* download_mapping;
|
||||
if (SUCCEEDED(
|
||||
trace_gpu_written_buffer_->Map(0, nullptr, &download_mapping))) {
|
||||
uint32_t gpu_written_buffer_offset = 0;
|
||||
for (auto gpu_written_submit_range : trace_gpu_written_ranges_) {
|
||||
trace_writer_->WriteMemoryWrite(
|
||||
gpu_written_submit_range.first, gpu_written_submit_range.second,
|
||||
reinterpret_cast<const uint8_t*>(download_mapping) +
|
||||
gpu_written_buffer_offset);
|
||||
}
|
||||
D3D12_RANGE download_write_range = {};
|
||||
trace_gpu_written_buffer_->Unmap(0, &download_write_range);
|
||||
} else {
|
||||
XELOGE(
|
||||
"Failed to map the GPU-written memory download buffer for frame "
|
||||
"tracing");
|
||||
}
|
||||
ResetTraceGPUWrittenBuffer();
|
||||
}
|
||||
|
||||
void SharedMemory::ResetTraceGPUWrittenBuffer() {
|
||||
trace_gpu_written_ranges_.clear();
|
||||
trace_gpu_written_ranges_.shrink_to_fit();
|
||||
ui::d3d12::util::ReleaseAndNull(trace_gpu_written_buffer_);
|
||||
}
|
||||
|
||||
} // namespace d3d12
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -12,9 +12,11 @@
|
|||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/mutex.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
#include "xenia/ui/d3d12/pools.h"
|
||||
|
@ -30,7 +32,8 @@ class D3D12CommandProcessor;
|
|||
// system page size granularity.
|
||||
class SharedMemory {
|
||||
public:
|
||||
SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory);
|
||||
SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory,
|
||||
TraceWriter* trace_writer);
|
||||
~SharedMemory();
|
||||
|
||||
bool Initialize();
|
||||
|
@ -124,6 +127,10 @@ class SharedMemory {
|
|||
void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
|
||||
// Returns true if any downloads were submitted to the command processor.
|
||||
bool InitializeTraceSubmitDownloads();
|
||||
void InitializeTraceCompleteDownloads();
|
||||
|
||||
private:
|
||||
bool AreTiledResourcesUsed() const;
|
||||
|
||||
|
@ -132,8 +139,8 @@ class SharedMemory {
|
|||
bool written_by_gpu);
|
||||
|
||||
D3D12CommandProcessor* command_processor_;
|
||||
|
||||
Memory* memory_;
|
||||
TraceWriter* trace_writer_;
|
||||
|
||||
// The 512 MB tiled buffer.
|
||||
static constexpr uint32_t kBufferSizeLog2 = 29;
|
||||
|
@ -268,6 +275,13 @@ class SharedMemory {
|
|||
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
||||
|
||||
void TransitionBuffer(D3D12_RESOURCE_STATES new_state);
|
||||
|
||||
// GPU-written memory downloading for traces.
|
||||
// Start page, length in pages.
|
||||
std::vector<std::pair<uint32_t, uint32_t>> trace_gpu_written_ranges_;
|
||||
// Created temporarily, only for downloading.
|
||||
ID3D12Resource* trace_gpu_written_buffer_ = nullptr;
|
||||
void ResetTraceGPUWrittenBuffer();
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
|
@ -45,6 +45,10 @@ bool NullCommandProcessor::IssueDraw(PrimitiveType prim_type,
|
|||
|
||||
bool NullCommandProcessor::IssueCopy() { return true; }
|
||||
|
||||
void NullCommandProcessor::InitializeTrace() {}
|
||||
|
||||
void NullCommandProcessor::FinalizeTrace() {}
|
||||
|
||||
} // namespace null
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -39,6 +39,9 @@ class NullCommandProcessor : public CommandProcessor {
|
|||
bool IssueDraw(PrimitiveType prim_type, uint32_t index_count,
|
||||
IndexBufferInfo* index_buffer_info) override;
|
||||
bool IssueCopy() override;
|
||||
|
||||
void InitializeTrace() override;
|
||||
void FinalizeTrace() override;
|
||||
};
|
||||
|
||||
} // namespace null
|
||||
|
|
|
@ -21,11 +21,11 @@ TracePlayer::TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system)
|
|||
graphics_system_(graphics_system),
|
||||
current_frame_index_(0),
|
||||
current_command_index_(-1) {
|
||||
// Need to allocate all of physical memory so that we can write to it
|
||||
// during playback.
|
||||
graphics_system_->memory()
|
||||
->LookupHeapByType(true, 4096)
|
||||
->AllocFixed(0, 0x1FFFFFFF, 4096,
|
||||
// Need to allocate all of physical memory so that we can write to it during
|
||||
// playback. The 64 KB page heap is larger, covers the entire physical memory,
|
||||
// so it is used instead of the 4 KB page one.
|
||||
auto heap = graphics_system_->memory()->LookupHeapByType(true, 64 * 1024);
|
||||
heap->AllocFixed(heap->heap_base(), heap->heap_size(), heap->page_size(),
|
||||
kMemoryAllocationReserve | kMemoryAllocationCommit,
|
||||
kMemoryProtectRead | kMemoryProtectWrite);
|
||||
|
||||
|
@ -167,7 +167,8 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
|
|||
}
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kMemoryRead: {
|
||||
case TraceCommandType::kMemoryRead:
|
||||
case TraceCommandType::kMemoryWrite: {
|
||||
auto cmd = reinterpret_cast<const MemoryCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
DecompressMemory(cmd->encoding_format, trace_ptr, cmd->encoded_length,
|
||||
|
@ -176,13 +177,6 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
|
|||
trace_ptr += cmd->encoded_length;
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kMemoryWrite: {
|
||||
auto cmd = reinterpret_cast<const MemoryCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
// ?
|
||||
trace_ptr += cmd->encoded_length;
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kEvent: {
|
||||
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
|
|
|
@ -168,11 +168,13 @@ void TraceWriter::WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length) {
|
|||
}
|
||||
}
|
||||
|
||||
void TraceWriter::WriteMemoryWrite(uint32_t base_ptr, size_t length) {
|
||||
void TraceWriter::WriteMemoryWrite(uint32_t base_ptr, size_t length,
|
||||
const void* host_ptr) {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
WriteMemoryCommand(TraceCommandType::kMemoryWrite, base_ptr, length);
|
||||
WriteMemoryCommand(TraceCommandType::kMemoryWrite, base_ptr, length,
|
||||
host_ptr);
|
||||
}
|
||||
|
||||
class SnappySink : public snappy::Sink {
|
||||
|
@ -188,13 +190,17 @@ class SnappySink : public snappy::Sink {
|
|||
};
|
||||
|
||||
void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
|
||||
size_t length) {
|
||||
size_t length, const void* host_ptr) {
|
||||
MemoryCommand cmd;
|
||||
cmd.type = type;
|
||||
cmd.base_ptr = base_ptr;
|
||||
cmd.encoding_format = MemoryEncodingFormat::kNone;
|
||||
cmd.encoded_length = cmd.decoded_length = static_cast<uint32_t>(length);
|
||||
|
||||
if (!host_ptr) {
|
||||
host_ptr = membase_ + cmd.base_ptr;
|
||||
}
|
||||
|
||||
bool compress = compress_output_ && length > compression_threshold_;
|
||||
if (compress) {
|
||||
// Write the header now so we reserve space in the buffer.
|
||||
|
@ -204,8 +210,7 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
|
|||
|
||||
// Stream the content right to the buffer.
|
||||
snappy::ByteArraySource snappy_source(
|
||||
reinterpret_cast<const char*>(membase_ + cmd.base_ptr),
|
||||
cmd.decoded_length);
|
||||
reinterpret_cast<const char*>(host_ptr), cmd.decoded_length);
|
||||
SnappySink snappy_sink(file_);
|
||||
cmd.encoded_length =
|
||||
static_cast<uint32_t>(snappy::Compress(&snappy_source, &snappy_sink));
|
||||
|
@ -219,7 +224,7 @@ void TraceWriter::WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
|
|||
// Uncompressed - write buffer directly to the file.
|
||||
cmd.encoding_format = MemoryEncodingFormat::kNone;
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
fwrite(membase_ + cmd.base_ptr, 1, cmd.decoded_length, file_);
|
||||
fwrite(host_ptr, 1, cmd.decoded_length, file_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -39,12 +39,13 @@ class TraceWriter {
|
|||
void WriteMemoryRead(uint32_t base_ptr, size_t length);
|
||||
void WriteMemoryReadCached(uint32_t base_ptr, size_t length);
|
||||
void WriteMemoryReadCachedNop(uint32_t base_ptr, size_t length);
|
||||
void WriteMemoryWrite(uint32_t base_ptr, size_t length);
|
||||
void WriteMemoryWrite(uint32_t base_ptr, size_t length,
|
||||
const void* host_ptr = nullptr);
|
||||
void WriteEvent(EventCommand::Type event_type);
|
||||
|
||||
private:
|
||||
void WriteMemoryCommand(TraceCommandType type, uint32_t base_ptr,
|
||||
size_t length);
|
||||
size_t length, const void* host_ptr = nullptr);
|
||||
|
||||
std::set<uint64_t> cached_memory_reads_;
|
||||
uint8_t* membase_;
|
||||
|
|
|
@ -41,6 +41,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
|
||||
bool VulkanCommandProcessor::IssueCopy() { return true; }
|
||||
|
||||
void VulkanCommandProcessor::InitializeTrace() {}
|
||||
|
||||
void VulkanCommandProcessor::FinalizeTrace() {}
|
||||
|
||||
} // namespace vk
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -38,6 +38,9 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count,
|
||||
IndexBufferInfo* index_buffer_info) override;
|
||||
bool IssueCopy() override;
|
||||
|
||||
void InitializeTrace() override;
|
||||
void FinalizeTrace() override;
|
||||
};
|
||||
|
||||
} // namespace vk
|
||||
|
|
|
@ -1322,6 +1322,10 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::InitializeTrace() {}
|
||||
|
||||
void VulkanCommandProcessor::FinalizeTrace() {}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -94,6 +94,9 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
VulkanShader* pixel_shader);
|
||||
bool IssueCopy() override;
|
||||
|
||||
void InitializeTrace() override;
|
||||
void FinalizeTrace() override;
|
||||
|
||||
xe::ui::vulkan::VulkanDevice* device_ = nullptr;
|
||||
|
||||
// front buffer / back buffer memory
|
||||
|
|
Loading…
Reference in New Issue