[D3D12] Add d3d12_readback_resolve gflag

This commit is contained in:
Triang3l 2019-08-01 08:55:47 +03:00
parent 68b2ddee86
commit 90c9c24ca5
5 changed files with 79 additions and 15 deletions

View File

@ -32,12 +32,17 @@ DEFINE_bool(d3d12_edram_rov, true,
// disable half-pixel offset by setting this to false. // disable half-pixel offset by setting this to false.
DEFINE_bool(d3d12_half_pixel_offset, true, DEFINE_bool(d3d12_half_pixel_offset, true,
"Enable half-pixel vertex and VPOS offset."); "Enable half-pixel vertex and VPOS offset.");
DEFINE_bool(d3d12_memexport_readback, false, DEFINE_bool(d3d12_readback_memexport, false,
"Read data written by memory export in shaders on the CPU. This " "Read data written by memory export in shaders on the CPU. This "
"may be needed in some games (but many only access exported data " "may be needed in some games (but many only access exported data "
"on the GPU, and this flag isn't needed to handle such behavior), " "on the GPU, and this flag isn't needed to handle such behavior), "
"but causes mid-frame synchronization, so it has a huge " "but causes mid-frame synchronization, so it has a huge "
"performance impact."); "performance impact.");
DEFINE_bool(d3d12_readback_resolve, false,
"Read render-to-texture results on the CPU. This may be needed in "
"some games, for instance, for screenshots in saved games, but "
"causes mid-frame synchronization, so it has a huge performance "
"impact.");
DEFINE_bool(d3d12_ssaa_custom_sample_positions, false, DEFINE_bool(d3d12_ssaa_custom_sample_positions, false,
"Enable custom SSAA sample positions for the RTV/DSV rendering " "Enable custom SSAA sample positions for the RTV/DSV rendering "
"path where available instead of centers (experimental, not very " "path where available instead of centers (experimental, not very "
@ -1606,7 +1611,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
memexport_range.base_address_dwords << 2, memexport_range.base_address_dwords << 2,
memexport_range.size_dwords << 2); memexport_range.size_dwords << 2);
} }
if (FLAGS_d3d12_memexport_readback) { if (FLAGS_d3d12_readback_memexport) {
// Read the exported data on the CPU. // Read the exported data on the CPU.
uint32_t memexport_total_size = 0; uint32_t memexport_total_size = 0;
for (uint32_t i = 0; i < memexport_range_count; ++i) { for (uint32_t i = 0; i < memexport_range_count; ++i) {
@ -1661,8 +1666,39 @@ bool D3D12CommandProcessor::IssueCopy() {
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES #endif // FINE_GRAINED_DRAW_SCOPES
BeginFrame(); BeginFrame();
return render_target_cache_->Resolve(shared_memory_.get(), uint32_t written_address, written_length;
texture_cache_.get(), memory_); if (!render_target_cache_->Resolve(shared_memory_.get(), texture_cache_.get(),
memory_, written_address,
written_length)) {
return false;
}
if (FLAGS_d3d12_readback_resolve && !texture_cache_->IsResolutionScale2X() &&
written_length) {
// Read the resolved data on the CPU.
ID3D12Resource* readback_buffer = RequestReadbackBuffer(written_length);
if (readback_buffer != nullptr) {
shared_memory_->UseAsCopySource();
SubmitBarriers();
ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer();
deferred_command_list_->D3DCopyBufferRegion(
readback_buffer, 0, shared_memory_buffer, written_address,
written_length);
EndFrame();
GetD3D12Context()->AwaitAllFramesCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = 0;
readback_range.End = written_length;
void* readback_mapping;
if (SUCCEEDED(
readback_buffer->Map(0, &readback_range, &readback_mapping))) {
std::memcpy(memory_->TranslatePhysical(written_address),
readback_mapping, written_length);
D3D12_RANGE readback_write_range = {};
readback_buffer->Unmap(0, &readback_write_range);
}
}
}
return true;
} }
bool D3D12CommandProcessor::BeginFrame() { bool D3D12CommandProcessor::BeginFrame() {

View File

@ -988,7 +988,11 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
} }
bool RenderTargetCache::Resolve(SharedMemory* shared_memory, bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
TextureCache* texture_cache, Memory* memory) { TextureCache* texture_cache, Memory* memory,
uint32_t& written_address_out,
uint32_t& written_length_out) {
written_address_out = written_length_out = 0;
if (!command_processor_->IsROVUsedForEDRAM()) { if (!command_processor_->IsROVUsedForEDRAM()) {
// Save the currently bound render targets to the EDRAM buffer that will be // Save the currently bound render targets to the EDRAM buffer that will be
// used as the resolve source and clear bindings to allow render target // used as the resolve source and clear bindings to allow render target
@ -1152,9 +1156,10 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
// GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed // GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed
// clamping to the source render target size. // clamping to the source render target size.
bool result = ResolveCopy(shared_memory, texture_cache, surface_edram_base, bool result =
surface_pitch, msaa_samples, surface_is_depth, ResolveCopy(shared_memory, texture_cache, surface_edram_base,
surface_format, rect); surface_pitch, msaa_samples, surface_is_depth, surface_format,
rect, written_address_out, written_length_out);
// Clear the color RT if needed. // Clear the color RT if needed.
if (!surface_is_depth) { if (!surface_is_depth) {
result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples, result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples,
@ -1170,8 +1175,11 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
TextureCache* texture_cache, TextureCache* texture_cache,
uint32_t edram_base, uint32_t surface_pitch, uint32_t edram_base, uint32_t surface_pitch,
MsaaSamples msaa_samples, bool is_depth, MsaaSamples msaa_samples, bool is_depth,
uint32_t src_format, uint32_t src_format, const D3D12_RECT& rect,
const D3D12_RECT& rect) { uint32_t& written_address_out,
uint32_t& written_length_out) {
written_address_out = written_length_out = 0;
auto& regs = *register_file_; auto& regs = *register_file_;
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
@ -1475,6 +1483,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// Invalidate textures and mark the range as scaled if needed. // Invalidate textures and mark the range as scaled if needed.
texture_cache->MarkRangeAsResolved(dest_modified_start, texture_cache->MarkRangeAsResolved(dest_modified_start,
dest_modified_length); dest_modified_length);
written_address_out = dest_modified_start;
written_length_out = dest_modified_length;
} else { } else {
// ************************************************************************* // *************************************************************************
// Conversion and AA resolving // Conversion and AA resolving
@ -1788,7 +1798,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
dest_format, dest_address, dest_pitch, dest_height, dest_3d, dest_format, dest_address, dest_pitch, dest_height, dest_3d,
uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width, uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width,
copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size, copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size,
resolve_target->footprint); resolve_target->footprint, &written_address_out, &written_length_out);
// Done with the copy buffer. // Done with the copy buffer.

View File

@ -269,7 +269,8 @@ class RenderTargetCache {
// register values, and also clears the EDRAM buffer if needed. Must be in a // register values, and also clears the EDRAM buffer if needed. Must be in a
// frame for calling. // frame for calling.
bool Resolve(SharedMemory* shared_memory, TextureCache* texture_cache, bool Resolve(SharedMemory* shared_memory, TextureCache* texture_cache,
Memory* memory); Memory* memory, uint32_t& written_address_out,
uint32_t& written_length_out);
// Flushes the render targets to EDRAM and unbinds them, for instance, when // Flushes the render targets to EDRAM and unbinds them, for instance, when
// the command processor takes over framebuffer bindings to draw something // the command processor takes over framebuffer bindings to draw something
// special. // special.
@ -481,7 +482,8 @@ class RenderTargetCache {
bool ResolveCopy(SharedMemory* shared_memory, TextureCache* texture_cache, bool ResolveCopy(SharedMemory* shared_memory, TextureCache* texture_cache,
uint32_t edram_base, uint32_t surface_pitch, uint32_t edram_base, uint32_t surface_pitch,
MsaaSamples msaa_samples, bool is_depth, uint32_t src_format, MsaaSamples msaa_samples, bool is_depth, uint32_t src_format,
const D3D12_RECT& rect); const D3D12_RECT& rect, uint32_t& written_address_out,
uint32_t& written_length_out);
// Performs the clearing part of a resolve. // Performs the clearing part of a resolve.
bool ResolveClear(uint32_t edram_base, uint32_t surface_pitch, bool ResolveClear(uint32_t edram_base, uint32_t surface_pitch,
MsaaSamples msaa_samples, bool is_depth, uint32_t format, MsaaSamples msaa_samples, bool is_depth, uint32_t format,

View File

@ -1291,7 +1291,15 @@ bool TextureCache::TileResolvedTexture(
uint32_t texture_height, bool is_3d, uint32_t offset_x, uint32_t offset_y, uint32_t texture_height, bool is_3d, uint32_t offset_x, uint32_t offset_y,
uint32_t offset_z, uint32_t resolve_width, uint32_t resolve_height, uint32_t offset_z, uint32_t resolve_width, uint32_t resolve_height,
Endian128 endian, ID3D12Resource* buffer, uint32_t buffer_size, Endian128 endian, ID3D12Resource* buffer, uint32_t buffer_size,
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint) { const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint,
uint32_t* written_address_out, uint32_t* written_length_out) {
if (written_address_out) {
*written_address_out = 0;
}
if (written_length_out) {
*written_length_out = 0;
}
ResolveTileMode resolve_tile_mode = ResolveTileMode resolve_tile_mode =
host_formats_[uint32_t(format)].resolve_tile_mode; host_formats_[uint32_t(format)].resolve_tile_mode;
if (resolve_tile_mode == ResolveTileMode::kUnknown) { if (resolve_tile_mode == ResolveTileMode::kUnknown) {
@ -1456,6 +1464,12 @@ bool TextureCache::TileResolvedTexture(
// Invalidate textures and mark the range as scaled if needed. // Invalidate textures and mark the range as scaled if needed.
MarkRangeAsResolved(texture_modified_start, texture_modified_length); MarkRangeAsResolved(texture_modified_start, texture_modified_length);
if (written_address_out) {
*written_address_out = texture_modified_start;
}
if (written_length_out) {
*written_length_out = texture_modified_length;
}
return true; return true;
} }

View File

@ -137,7 +137,9 @@ class TextureCache {
uint32_t offset_z, uint32_t resolve_width, uint32_t offset_z, uint32_t resolve_width,
uint32_t resolve_height, Endian128 endian, uint32_t resolve_height, Endian128 endian,
ID3D12Resource* buffer, uint32_t buffer_size, ID3D12Resource* buffer, uint32_t buffer_size,
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint); const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint,
uint32_t* written_address_out,
uint32_t* written_length_out);
inline bool IsResolutionScale2X() const { inline bool IsResolutionScale2X() const {
return scaled_resolve_buffer_ != nullptr; return scaled_resolve_buffer_ != nullptr;