[D3D12] Add d3d12_readback_resolve gflag
This commit is contained in:
parent
68b2ddee86
commit
90c9c24ca5
|
@ -32,12 +32,17 @@ DEFINE_bool(d3d12_edram_rov, true,
|
||||||
// disable half-pixel offset by setting this to false.
|
// disable half-pixel offset by setting this to false.
|
||||||
DEFINE_bool(d3d12_half_pixel_offset, true,
|
DEFINE_bool(d3d12_half_pixel_offset, true,
|
||||||
"Enable half-pixel vertex and VPOS offset.");
|
"Enable half-pixel vertex and VPOS offset.");
|
||||||
DEFINE_bool(d3d12_memexport_readback, false,
|
DEFINE_bool(d3d12_readback_memexport, false,
|
||||||
"Read data written by memory export in shaders on the CPU. This "
|
"Read data written by memory export in shaders on the CPU. This "
|
||||||
"may be needed in some games (but many only access exported data "
|
"may be needed in some games (but many only access exported data "
|
||||||
"on the GPU, and this flag isn't needed to handle such behavior), "
|
"on the GPU, and this flag isn't needed to handle such behavior), "
|
||||||
"but causes mid-frame synchronization, so it has a huge "
|
"but causes mid-frame synchronization, so it has a huge "
|
||||||
"performance impact.");
|
"performance impact.");
|
||||||
|
DEFINE_bool(d3d12_readback_resolve, false,
|
||||||
|
"Read render-to-texture results on the CPU. This may be needed in "
|
||||||
|
"some games, for instance, for screenshots in saved games, but "
|
||||||
|
"causes mid-frame synchronization, so it has a huge performance "
|
||||||
|
"impact.");
|
||||||
DEFINE_bool(d3d12_ssaa_custom_sample_positions, false,
|
DEFINE_bool(d3d12_ssaa_custom_sample_positions, false,
|
||||||
"Enable custom SSAA sample positions for the RTV/DSV rendering "
|
"Enable custom SSAA sample positions for the RTV/DSV rendering "
|
||||||
"path where available instead of centers (experimental, not very "
|
"path where available instead of centers (experimental, not very "
|
||||||
|
@ -1606,7 +1611,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
memexport_range.base_address_dwords << 2,
|
memexport_range.base_address_dwords << 2,
|
||||||
memexport_range.size_dwords << 2);
|
memexport_range.size_dwords << 2);
|
||||||
}
|
}
|
||||||
if (FLAGS_d3d12_memexport_readback) {
|
if (FLAGS_d3d12_readback_memexport) {
|
||||||
// Read the exported data on the CPU.
|
// Read the exported data on the CPU.
|
||||||
uint32_t memexport_total_size = 0;
|
uint32_t memexport_total_size = 0;
|
||||||
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||||
|
@ -1661,8 +1666,39 @@ bool D3D12CommandProcessor::IssueCopy() {
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
BeginFrame();
|
BeginFrame();
|
||||||
return render_target_cache_->Resolve(shared_memory_.get(),
|
uint32_t written_address, written_length;
|
||||||
texture_cache_.get(), memory_);
|
if (!render_target_cache_->Resolve(shared_memory_.get(), texture_cache_.get(),
|
||||||
|
memory_, written_address,
|
||||||
|
written_length)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (FLAGS_d3d12_readback_resolve && !texture_cache_->IsResolutionScale2X() &&
|
||||||
|
written_length) {
|
||||||
|
// Read the resolved data on the CPU.
|
||||||
|
ID3D12Resource* readback_buffer = RequestReadbackBuffer(written_length);
|
||||||
|
if (readback_buffer != nullptr) {
|
||||||
|
shared_memory_->UseAsCopySource();
|
||||||
|
SubmitBarriers();
|
||||||
|
ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer();
|
||||||
|
deferred_command_list_->D3DCopyBufferRegion(
|
||||||
|
readback_buffer, 0, shared_memory_buffer, written_address,
|
||||||
|
written_length);
|
||||||
|
EndFrame();
|
||||||
|
GetD3D12Context()->AwaitAllFramesCompletion();
|
||||||
|
D3D12_RANGE readback_range;
|
||||||
|
readback_range.Begin = 0;
|
||||||
|
readback_range.End = written_length;
|
||||||
|
void* readback_mapping;
|
||||||
|
if (SUCCEEDED(
|
||||||
|
readback_buffer->Map(0, &readback_range, &readback_mapping))) {
|
||||||
|
std::memcpy(memory_->TranslatePhysical(written_address),
|
||||||
|
readback_mapping, written_length);
|
||||||
|
D3D12_RANGE readback_write_range = {};
|
||||||
|
readback_buffer->Unmap(0, &readback_write_range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::BeginFrame() {
|
bool D3D12CommandProcessor::BeginFrame() {
|
||||||
|
|
|
@ -988,7 +988,11 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
||||||
TextureCache* texture_cache, Memory* memory) {
|
TextureCache* texture_cache, Memory* memory,
|
||||||
|
uint32_t& written_address_out,
|
||||||
|
uint32_t& written_length_out) {
|
||||||
|
written_address_out = written_length_out = 0;
|
||||||
|
|
||||||
if (!command_processor_->IsROVUsedForEDRAM()) {
|
if (!command_processor_->IsROVUsedForEDRAM()) {
|
||||||
// Save the currently bound render targets to the EDRAM buffer that will be
|
// Save the currently bound render targets to the EDRAM buffer that will be
|
||||||
// used as the resolve source and clear bindings to allow render target
|
// used as the resolve source and clear bindings to allow render target
|
||||||
|
@ -1152,9 +1156,10 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
||||||
// GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed
|
// GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed
|
||||||
// clamping to the source render target size.
|
// clamping to the source render target size.
|
||||||
|
|
||||||
bool result = ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
bool result =
|
||||||
surface_pitch, msaa_samples, surface_is_depth,
|
ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
||||||
surface_format, rect);
|
surface_pitch, msaa_samples, surface_is_depth, surface_format,
|
||||||
|
rect, written_address_out, written_length_out);
|
||||||
// Clear the color RT if needed.
|
// Clear the color RT if needed.
|
||||||
if (!surface_is_depth) {
|
if (!surface_is_depth) {
|
||||||
result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples,
|
result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples,
|
||||||
|
@ -1170,8 +1175,11 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
TextureCache* texture_cache,
|
TextureCache* texture_cache,
|
||||||
uint32_t edram_base, uint32_t surface_pitch,
|
uint32_t edram_base, uint32_t surface_pitch,
|
||||||
MsaaSamples msaa_samples, bool is_depth,
|
MsaaSamples msaa_samples, bool is_depth,
|
||||||
uint32_t src_format,
|
uint32_t src_format, const D3D12_RECT& rect,
|
||||||
const D3D12_RECT& rect) {
|
uint32_t& written_address_out,
|
||||||
|
uint32_t& written_length_out) {
|
||||||
|
written_address_out = written_length_out = 0;
|
||||||
|
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
|
|
||||||
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
||||||
|
@ -1475,6 +1483,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
// Invalidate textures and mark the range as scaled if needed.
|
// Invalidate textures and mark the range as scaled if needed.
|
||||||
texture_cache->MarkRangeAsResolved(dest_modified_start,
|
texture_cache->MarkRangeAsResolved(dest_modified_start,
|
||||||
dest_modified_length);
|
dest_modified_length);
|
||||||
|
written_address_out = dest_modified_start;
|
||||||
|
written_length_out = dest_modified_length;
|
||||||
} else {
|
} else {
|
||||||
// *************************************************************************
|
// *************************************************************************
|
||||||
// Conversion and AA resolving
|
// Conversion and AA resolving
|
||||||
|
@ -1788,7 +1798,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
dest_format, dest_address, dest_pitch, dest_height, dest_3d,
|
dest_format, dest_address, dest_pitch, dest_height, dest_3d,
|
||||||
uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width,
|
uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width,
|
||||||
copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size,
|
copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size,
|
||||||
resolve_target->footprint);
|
resolve_target->footprint, &written_address_out, &written_length_out);
|
||||||
|
|
||||||
// Done with the copy buffer.
|
// Done with the copy buffer.
|
||||||
|
|
||||||
|
|
|
@ -269,7 +269,8 @@ class RenderTargetCache {
|
||||||
// register values, and also clears the EDRAM buffer if needed. Must be in a
|
// register values, and also clears the EDRAM buffer if needed. Must be in a
|
||||||
// frame for calling.
|
// frame for calling.
|
||||||
bool Resolve(SharedMemory* shared_memory, TextureCache* texture_cache,
|
bool Resolve(SharedMemory* shared_memory, TextureCache* texture_cache,
|
||||||
Memory* memory);
|
Memory* memory, uint32_t& written_address_out,
|
||||||
|
uint32_t& written_length_out);
|
||||||
// Flushes the render targets to EDRAM and unbinds them, for instance, when
|
// Flushes the render targets to EDRAM and unbinds them, for instance, when
|
||||||
// the command processor takes over framebuffer bindings to draw something
|
// the command processor takes over framebuffer bindings to draw something
|
||||||
// special.
|
// special.
|
||||||
|
@ -481,7 +482,8 @@ class RenderTargetCache {
|
||||||
bool ResolveCopy(SharedMemory* shared_memory, TextureCache* texture_cache,
|
bool ResolveCopy(SharedMemory* shared_memory, TextureCache* texture_cache,
|
||||||
uint32_t edram_base, uint32_t surface_pitch,
|
uint32_t edram_base, uint32_t surface_pitch,
|
||||||
MsaaSamples msaa_samples, bool is_depth, uint32_t src_format,
|
MsaaSamples msaa_samples, bool is_depth, uint32_t src_format,
|
||||||
const D3D12_RECT& rect);
|
const D3D12_RECT& rect, uint32_t& written_address_out,
|
||||||
|
uint32_t& written_length_out);
|
||||||
// Performs the clearing part of a resolve.
|
// Performs the clearing part of a resolve.
|
||||||
bool ResolveClear(uint32_t edram_base, uint32_t surface_pitch,
|
bool ResolveClear(uint32_t edram_base, uint32_t surface_pitch,
|
||||||
MsaaSamples msaa_samples, bool is_depth, uint32_t format,
|
MsaaSamples msaa_samples, bool is_depth, uint32_t format,
|
||||||
|
|
|
@ -1291,7 +1291,15 @@ bool TextureCache::TileResolvedTexture(
|
||||||
uint32_t texture_height, bool is_3d, uint32_t offset_x, uint32_t offset_y,
|
uint32_t texture_height, bool is_3d, uint32_t offset_x, uint32_t offset_y,
|
||||||
uint32_t offset_z, uint32_t resolve_width, uint32_t resolve_height,
|
uint32_t offset_z, uint32_t resolve_width, uint32_t resolve_height,
|
||||||
Endian128 endian, ID3D12Resource* buffer, uint32_t buffer_size,
|
Endian128 endian, ID3D12Resource* buffer, uint32_t buffer_size,
|
||||||
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint) {
|
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint,
|
||||||
|
uint32_t* written_address_out, uint32_t* written_length_out) {
|
||||||
|
if (written_address_out) {
|
||||||
|
*written_address_out = 0;
|
||||||
|
}
|
||||||
|
if (written_length_out) {
|
||||||
|
*written_length_out = 0;
|
||||||
|
}
|
||||||
|
|
||||||
ResolveTileMode resolve_tile_mode =
|
ResolveTileMode resolve_tile_mode =
|
||||||
host_formats_[uint32_t(format)].resolve_tile_mode;
|
host_formats_[uint32_t(format)].resolve_tile_mode;
|
||||||
if (resolve_tile_mode == ResolveTileMode::kUnknown) {
|
if (resolve_tile_mode == ResolveTileMode::kUnknown) {
|
||||||
|
@ -1456,6 +1464,12 @@ bool TextureCache::TileResolvedTexture(
|
||||||
|
|
||||||
// Invalidate textures and mark the range as scaled if needed.
|
// Invalidate textures and mark the range as scaled if needed.
|
||||||
MarkRangeAsResolved(texture_modified_start, texture_modified_length);
|
MarkRangeAsResolved(texture_modified_start, texture_modified_length);
|
||||||
|
if (written_address_out) {
|
||||||
|
*written_address_out = texture_modified_start;
|
||||||
|
}
|
||||||
|
if (written_length_out) {
|
||||||
|
*written_length_out = texture_modified_length;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,7 +137,9 @@ class TextureCache {
|
||||||
uint32_t offset_z, uint32_t resolve_width,
|
uint32_t offset_z, uint32_t resolve_width,
|
||||||
uint32_t resolve_height, Endian128 endian,
|
uint32_t resolve_height, Endian128 endian,
|
||||||
ID3D12Resource* buffer, uint32_t buffer_size,
|
ID3D12Resource* buffer, uint32_t buffer_size,
|
||||||
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint);
|
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint,
|
||||||
|
uint32_t* written_address_out,
|
||||||
|
uint32_t* written_length_out);
|
||||||
|
|
||||||
inline bool IsResolutionScale2X() const {
|
inline bool IsResolutionScale2X() const {
|
||||||
return scaled_resolve_buffer_ != nullptr;
|
return scaled_resolve_buffer_ != nullptr;
|
||||||
|
|
Loading…
Reference in New Issue