[D3D12] Memexport CPU readback, disabled by default

This commit is contained in:
Triang3l 2019-01-25 16:33:25 +03:00
parent 4a3245650f
commit ba7b6d6081
4 changed files with 94 additions and 3 deletions

View File

@ -32,6 +32,12 @@ DEFINE_bool(d3d12_edram_rov, true,
// disable half-pixel offset by setting this to false.
DEFINE_bool(d3d12_half_pixel_offset, true,
"Enable half-pixel vertex and VPOS offset.");
DEFINE_bool(d3d12_memexport_readback, false,
"Read data written by memory export in shaders on the CPU. This "
"may be needed in some games (but many only access exported data "
"on the GPU, and this flag isn't needed to handle such behavior), "
"but causes mid-frame synchronization, so it has a huge "
"performance impact.");
DEFINE_bool(d3d12_ssaa_custom_sample_positions, false,
"Enable custom SSAA sample positions for the RTV/DSV rendering "
"path where available instead of centers (experimental, not very "
@ -827,6 +833,9 @@ void D3D12CommandProcessor::ShutdownContext() {
auto context = GetD3D12Context();
context->AwaitAllFramesCompletion();
ui::d3d12::util::ReleaseAndNull(readback_buffer_);
readback_buffer_size_ = 0;
ui::d3d12::util::ReleaseAndNull(scratch_buffer_);
scratch_buffer_size_ = 0;
@ -1121,7 +1130,8 @@ Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type,
bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
uint32_t index_count,
IndexBufferInfo* index_buffer_info) {
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
auto context = GetD3D12Context();
auto device = context->GetD3D12Provider()->GetDevice();
auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES
@ -1578,10 +1588,53 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
memexport_range.base_address_dwords << 2,
memexport_range.size_dwords << 2);
}
if (FLAGS_d3d12_memexport_readback) {
// Read the exported data on the CPU.
uint32_t memexport_total_size = 0;
for (uint32_t i = 0; i < memexport_range_count; ++i) {
memexport_total_size += memexport_ranges[i].size_dwords << 2;
}
if (memexport_total_size != 0) {
ID3D12Resource* readback_buffer =
RequestReadbackBuffer(memexport_total_size);
if (readback_buffer != nullptr) {
shared_memory_->UseAsCopySource();
SubmitBarriers();
ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer();
uint32_t readback_buffer_offset = 0;
for (uint32_t i = 0; i < memexport_range_count; ++i) {
const MemExportRange& memexport_range = memexport_ranges[i];
uint32_t memexport_range_size = memexport_range.size_dwords << 2;
deferred_command_list_->D3DCopyBufferRegion(
readback_buffer, readback_buffer_offset, shared_memory_buffer,
memexport_range.base_address_dwords << 2, memexport_range_size);
readback_buffer_offset += memexport_range_size;
}
EndFrame();
context->AwaitAllFramesCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = 0;
readback_range.End = memexport_total_size;
void* readback_mapping;
if (SUCCEEDED(readback_buffer->Map(0, &readback_range,
&readback_mapping))) {
const uint32_t* readback_dwords =
reinterpret_cast<const uint32_t*>(readback_mapping);
for (uint32_t i = 0; i < memexport_range_count; ++i) {
const MemExportRange& memexport_range = memexport_ranges[i];
std::memcpy(memory_->TranslatePhysical(
memexport_range.base_address_dwords << 2),
readback_dwords, memexport_range.size_dwords << 2);
readback_dwords += memexport_range.size_dwords;
}
D3D12_RANGE readback_write_range = {};
readback_buffer->Unmap(0, &readback_write_range);
}
}
}
}
}
// TODO(Triang3l): Read back memexported data if the respective gflag is set.
return true;
}
@ -3057,6 +3110,33 @@ uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize(
return 0;
}
ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) {
if (size == 0) {
return nullptr;
}
size = xe::align(size, kReadbackBufferSizeIncrement);
if (size > readback_buffer_size_) {
auto context = GetD3D12Context();
auto device = context->GetD3D12Provider()->GetDevice();
D3D12_RESOURCE_DESC buffer_desc;
ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size,
D3D12_RESOURCE_FLAG_NONE);
ID3D12Resource* buffer;
if (FAILED(device->CreateCommittedResource(
&ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE,
&buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
IID_PPV_ARGS(&buffer)))) {
XELOGE("Failed to create a %u MB readback buffer", size >> 20);
return nullptr;
}
if (readback_buffer_ != nullptr) {
readback_buffer_->Release();
}
readback_buffer_ = buffer;
}
return readback_buffer_;
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -225,6 +225,10 @@ class D3D12CommandProcessor : public CommandProcessor {
// 32 bits per element.
static uint32_t GetSupportedMemExportFormatSize(ColorFormat format);
// Returns a buffer for reading GPU data back to the CPU. Assuming
// synchronizing immediately after use. Always in COPY_DEST state.
ID3D12Resource* RequestReadbackBuffer(uint32_t size);
bool cache_clear_requested_ = false;
std::unique_ptr<ui::d3d12::CommandList>
@ -281,6 +285,10 @@ class D3D12CommandProcessor : public CommandProcessor {
D3D12_RESOURCE_STATES scratch_buffer_state_;
bool scratch_buffer_used_ = false;
static constexpr uint32_t kReadbackBufferSizeIncrement = 16 * 1024 * 1024;
ID3D12Resource* readback_buffer_ = nullptr;
uint32_t readback_buffer_size_ = 0;
uint32_t current_queue_frame_ = UINT32_MAX;
std::atomic<bool> pix_capture_requested_ = false;

View File

@ -19,6 +19,8 @@ namespace util {
const D3D12_HEAP_PROPERTIES kHeapPropertiesDefault = {D3D12_HEAP_TYPE_DEFAULT};
const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload = {D3D12_HEAP_TYPE_UPLOAD};
const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback = {
D3D12_HEAP_TYPE_READBACK};
ID3D12RootSignature* CreateRootSignature(
D3D12Provider* provider, const D3D12_ROOT_SIGNATURE_DESC& desc) {

View File

@ -19,6 +19,7 @@ namespace util {
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesDefault;
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload;
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback;
template <typename T>
inline bool ReleaseAndNull(T& object) {