[D3D12] Add d3d12_readback_resolve gflag
This commit is contained in:
parent
68b2ddee86
commit
90c9c24ca5
|
@ -32,12 +32,17 @@ DEFINE_bool(d3d12_edram_rov, true,
|
|||
// disable half-pixel offset by setting this to false.
|
||||
DEFINE_bool(d3d12_half_pixel_offset, true,
|
||||
"Enable half-pixel vertex and VPOS offset.");
|
||||
DEFINE_bool(d3d12_memexport_readback, false,
|
||||
DEFINE_bool(d3d12_readback_memexport, false,
|
||||
"Read data written by memory export in shaders on the CPU. This "
|
||||
"may be needed in some games (but many only access exported data "
|
||||
"on the GPU, and this flag isn't needed to handle such behavior), "
|
||||
"but causes mid-frame synchronization, so it has a huge "
|
||||
"performance impact.");
|
||||
DEFINE_bool(d3d12_readback_resolve, false,
|
||||
"Read render-to-texture results on the CPU. This may be needed in "
|
||||
"some games, for instance, for screenshots in saved games, but "
|
||||
"causes mid-frame synchronization, so it has a huge performance "
|
||||
"impact.");
|
||||
DEFINE_bool(d3d12_ssaa_custom_sample_positions, false,
|
||||
"Enable custom SSAA sample positions for the RTV/DSV rendering "
|
||||
"path where available instead of centers (experimental, not very "
|
||||
|
@ -1606,7 +1611,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
memexport_range.base_address_dwords << 2,
|
||||
memexport_range.size_dwords << 2);
|
||||
}
|
||||
if (FLAGS_d3d12_memexport_readback) {
|
||||
if (FLAGS_d3d12_readback_memexport) {
|
||||
// Read the exported data on the CPU.
|
||||
uint32_t memexport_total_size = 0;
|
||||
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||
|
@ -1661,8 +1666,39 @@ bool D3D12CommandProcessor::IssueCopy() {
|
|||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
BeginFrame();
|
||||
return render_target_cache_->Resolve(shared_memory_.get(),
|
||||
texture_cache_.get(), memory_);
|
||||
uint32_t written_address, written_length;
|
||||
if (!render_target_cache_->Resolve(shared_memory_.get(), texture_cache_.get(),
|
||||
memory_, written_address,
|
||||
written_length)) {
|
||||
return false;
|
||||
}
|
||||
if (FLAGS_d3d12_readback_resolve && !texture_cache_->IsResolutionScale2X() &&
|
||||
written_length) {
|
||||
// Read the resolved data on the CPU.
|
||||
ID3D12Resource* readback_buffer = RequestReadbackBuffer(written_length);
|
||||
if (readback_buffer != nullptr) {
|
||||
shared_memory_->UseAsCopySource();
|
||||
SubmitBarriers();
|
||||
ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer();
|
||||
deferred_command_list_->D3DCopyBufferRegion(
|
||||
readback_buffer, 0, shared_memory_buffer, written_address,
|
||||
written_length);
|
||||
EndFrame();
|
||||
GetD3D12Context()->AwaitAllFramesCompletion();
|
||||
D3D12_RANGE readback_range;
|
||||
readback_range.Begin = 0;
|
||||
readback_range.End = written_length;
|
||||
void* readback_mapping;
|
||||
if (SUCCEEDED(
|
||||
readback_buffer->Map(0, &readback_range, &readback_mapping))) {
|
||||
std::memcpy(memory_->TranslatePhysical(written_address),
|
||||
readback_mapping, written_length);
|
||||
D3D12_RANGE readback_write_range = {};
|
||||
readback_buffer->Unmap(0, &readback_write_range);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool D3D12CommandProcessor::BeginFrame() {
|
||||
|
|
|
@ -988,7 +988,11 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
}
|
||||
|
||||
bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
||||
TextureCache* texture_cache, Memory* memory) {
|
||||
TextureCache* texture_cache, Memory* memory,
|
||||
uint32_t& written_address_out,
|
||||
uint32_t& written_length_out) {
|
||||
written_address_out = written_length_out = 0;
|
||||
|
||||
if (!command_processor_->IsROVUsedForEDRAM()) {
|
||||
// Save the currently bound render targets to the EDRAM buffer that will be
|
||||
// used as the resolve source and clear bindings to allow render target
|
||||
|
@ -1152,9 +1156,10 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
|||
// GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed
|
||||
// clamping to the source render target size.
|
||||
|
||||
bool result = ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
||||
surface_pitch, msaa_samples, surface_is_depth,
|
||||
surface_format, rect);
|
||||
bool result =
|
||||
ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
||||
surface_pitch, msaa_samples, surface_is_depth, surface_format,
|
||||
rect, written_address_out, written_length_out);
|
||||
// Clear the color RT if needed.
|
||||
if (!surface_is_depth) {
|
||||
result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples,
|
||||
|
@ -1170,8 +1175,11 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
TextureCache* texture_cache,
|
||||
uint32_t edram_base, uint32_t surface_pitch,
|
||||
MsaaSamples msaa_samples, bool is_depth,
|
||||
uint32_t src_format,
|
||||
const D3D12_RECT& rect) {
|
||||
uint32_t src_format, const D3D12_RECT& rect,
|
||||
uint32_t& written_address_out,
|
||||
uint32_t& written_length_out) {
|
||||
written_address_out = written_length_out = 0;
|
||||
|
||||
auto& regs = *register_file_;
|
||||
|
||||
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
||||
|
@ -1475,6 +1483,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
// Invalidate textures and mark the range as scaled if needed.
|
||||
texture_cache->MarkRangeAsResolved(dest_modified_start,
|
||||
dest_modified_length);
|
||||
written_address_out = dest_modified_start;
|
||||
written_length_out = dest_modified_length;
|
||||
} else {
|
||||
// *************************************************************************
|
||||
// Conversion and AA resolving
|
||||
|
@ -1788,7 +1798,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
dest_format, dest_address, dest_pitch, dest_height, dest_3d,
|
||||
uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width,
|
||||
copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size,
|
||||
resolve_target->footprint);
|
||||
resolve_target->footprint, &written_address_out, &written_length_out);
|
||||
|
||||
// Done with the copy buffer.
|
||||
|
||||
|
|
|
@ -269,7 +269,8 @@ class RenderTargetCache {
|
|||
// register values, and also clears the EDRAM buffer if needed. Must be in a
|
||||
// frame for calling.
|
||||
bool Resolve(SharedMemory* shared_memory, TextureCache* texture_cache,
|
||||
Memory* memory);
|
||||
Memory* memory, uint32_t& written_address_out,
|
||||
uint32_t& written_length_out);
|
||||
// Flushes the render targets to EDRAM and unbinds them, for instance, when
|
||||
// the command processor takes over framebuffer bindings to draw something
|
||||
// special.
|
||||
|
@ -481,7 +482,8 @@ class RenderTargetCache {
|
|||
bool ResolveCopy(SharedMemory* shared_memory, TextureCache* texture_cache,
|
||||
uint32_t edram_base, uint32_t surface_pitch,
|
||||
MsaaSamples msaa_samples, bool is_depth, uint32_t src_format,
|
||||
const D3D12_RECT& rect);
|
||||
const D3D12_RECT& rect, uint32_t& written_address_out,
|
||||
uint32_t& written_length_out);
|
||||
// Performs the clearing part of a resolve.
|
||||
bool ResolveClear(uint32_t edram_base, uint32_t surface_pitch,
|
||||
MsaaSamples msaa_samples, bool is_depth, uint32_t format,
|
||||
|
|
|
@ -1291,7 +1291,15 @@ bool TextureCache::TileResolvedTexture(
|
|||
uint32_t texture_height, bool is_3d, uint32_t offset_x, uint32_t offset_y,
|
||||
uint32_t offset_z, uint32_t resolve_width, uint32_t resolve_height,
|
||||
Endian128 endian, ID3D12Resource* buffer, uint32_t buffer_size,
|
||||
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint) {
|
||||
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint,
|
||||
uint32_t* written_address_out, uint32_t* written_length_out) {
|
||||
if (written_address_out) {
|
||||
*written_address_out = 0;
|
||||
}
|
||||
if (written_length_out) {
|
||||
*written_length_out = 0;
|
||||
}
|
||||
|
||||
ResolveTileMode resolve_tile_mode =
|
||||
host_formats_[uint32_t(format)].resolve_tile_mode;
|
||||
if (resolve_tile_mode == ResolveTileMode::kUnknown) {
|
||||
|
@ -1456,6 +1464,12 @@ bool TextureCache::TileResolvedTexture(
|
|||
|
||||
// Invalidate textures and mark the range as scaled if needed.
|
||||
MarkRangeAsResolved(texture_modified_start, texture_modified_length);
|
||||
if (written_address_out) {
|
||||
*written_address_out = texture_modified_start;
|
||||
}
|
||||
if (written_length_out) {
|
||||
*written_length_out = texture_modified_length;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -137,7 +137,9 @@ class TextureCache {
|
|||
uint32_t offset_z, uint32_t resolve_width,
|
||||
uint32_t resolve_height, Endian128 endian,
|
||||
ID3D12Resource* buffer, uint32_t buffer_size,
|
||||
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint);
|
||||
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint,
|
||||
uint32_t* written_address_out,
|
||||
uint32_t* written_length_out);
|
||||
|
||||
inline bool IsResolutionScale2X() const {
|
||||
return scaled_resolve_buffer_ != nullptr;
|
||||
|
|
Loading…
Reference in New Issue