rsx/vk: Optimize framebuffer lifetime management

- Significant gains due to avoiding aggressive create-delete cycles every frame
This commit is contained in:
kd-11 2017-07-10 23:45:42 +03:00
parent 0136215ef1
commit f69121116a
7 changed files with 143 additions and 24 deletions

View File

@ -93,7 +93,12 @@ namespace rsx
auto It = m_render_targets_storage.find(address);
// TODO: Fix corner cases
// This doesn't take overlapping surface(s) into account.
surface_storage_type old_surface_storage;
surface_storage_type new_surface_storage;
surface_type old_surface = nullptr;
surface_type new_surface = nullptr;
if (It != m_render_targets_storage.end())
{
surface_storage_type &rtt = It->second;
@ -104,10 +109,43 @@ namespace rsx
}
old_surface = Traits::get(rtt);
invalidated_resources.push_back(std::move(rtt));
old_surface_storage = std::move(rtt);
m_render_targets_storage.erase(address);
}
//Search invalidated resources for a suitable surface
for (auto &It = invalidated_resources.begin(); It != invalidated_resources.end(); It++)
{
auto &rtt = *It;
if (Traits::rtt_has_format_width_height(rtt, color_format, width, height))
{
new_surface_storage = std::move(rtt);
if (old_surface)
//Exchange this surface with the invalidated one
rtt = std::move(old_surface_storage);
else
//rtt is now empty - erase it
invalidated_resources.erase(It);
new_surface = Traits::get(new_surface_storage);
Traits::invalidate_rtt_surface_contents(command_list, new_surface, true);
Traits::prepare_rtt_for_drawing(command_list, new_surface);
break;
}
}
if (old_surface != nullptr && new_surface == nullptr)
//This was already determined to be invalid and is excluded from testing above
invalidated_resources.push_back(std::move(old_surface_storage));
if (new_surface != nullptr)
{
//New surface was found among existing surfaces
m_render_targets_storage[address] = std::move(new_surface_storage);
return new_surface;
}
m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, old_surface, std::forward<Args>(extra_params)...);
return Traits::get(m_render_targets_storage[address]);
}
@ -119,7 +157,11 @@ namespace rsx
surface_depth_format depth_format, size_t width, size_t height,
Args&&... extra_params)
{
surface_storage_type old_surface_storage;
surface_storage_type new_surface_storage;
surface_type old_surface = nullptr;
surface_type new_surface = nullptr;
auto It = m_depth_stencil_storage.find(address);
if (It != m_depth_stencil_storage.end())
{
@ -131,10 +173,42 @@ namespace rsx
}
old_surface = Traits::get(ds);
invalidated_resources.push_back(std::move(ds));
old_surface_storage = std::move(ds);
m_depth_stencil_storage.erase(address);
}
//Search invalidated resources for a suitable surface
for (auto &It = invalidated_resources.begin(); It != invalidated_resources.end(); It++)
{
auto &ds = *It;
if (Traits::ds_has_format_width_height(ds, depth_format, width, height))
{
new_surface_storage = std::move(ds);
if (old_surface)
//Exchange this surface with the invalidated one
ds = std::move(old_surface_storage);
else
invalidated_resources.erase(It);
new_surface = Traits::get(new_surface_storage);
Traits::prepare_ds_for_drawing(command_list, new_surface);
Traits::invalidate_depth_surface_contents(command_list, new_surface, true);
break;
}
}
if (old_surface != nullptr && new_surface == nullptr)
//This was already determined to be invalid and is excluded from testing above
invalidated_resources.push_back(std::move(old_surface_storage));
if (new_surface != nullptr)
{
//New surface was found among existing surfaces
m_depth_stencil_storage[address] = std::move(new_surface_storage);
return new_surface;
}
m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, old_surface, std::forward<Args>(extra_params)...);
return Traits::get(m_depth_stencil_storage[address]);
}
@ -358,10 +432,10 @@ namespace rsx
void invalidate_surface_cache_data(command_list_type command_list)
{
for (auto &rtt : m_render_targets_storage)
Traits::invalidate_rtt_surface_contents(command_list, Traits::get(std::get<1>(rtt)));
Traits::invalidate_rtt_surface_contents(command_list, Traits::get(std::get<1>(rtt)), false);
for (auto &ds : m_depth_stencil_storage)
Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds)));
Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds)), true);
}
};
}

View File

@ -116,14 +116,16 @@ struct render_target_traits
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ));
}
static
void invalidate_rtt_surface_contents(
gsl::not_null<ID3D12GraphicsCommandList*>,
ID3D12Resource*)
ID3D12Resource*, bool)
{}
static
void invalidate_depth_surface_contents(
gsl::not_null<ID3D12GraphicsCommandList*>,
ID3D12Resource*)
ID3D12Resource*, bool)
{
//TODO
}

View File

@ -240,8 +240,8 @@ struct gl_render_target_traits
static void prepare_ds_for_drawing(void *, gl::render_target*) {}
static void prepare_ds_for_sampling(void *, gl::render_target*) {}
static void invalidate_rtt_surface_contents(void *, gl::render_target*) {}
static void invalidate_depth_surface_contents(void *, gl::render_target *ds) { ds->set_cleared(false); }
static void invalidate_rtt_surface_contents(void *, gl::render_target*, bool) {}
static void invalidate_depth_surface_contents(void *, gl::render_target *ds, bool) { ds->set_cleared(false); }
static
bool rtt_has_format_width_height(const std::unique_ptr<gl::render_target> &rtt, rsx::surface_color_format format, size_t width, size_t height)

View File

@ -944,7 +944,7 @@ void VKGSRender::end()
load_program(is_instanced);
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
if (is_instanced)
{
@ -1472,7 +1472,7 @@ void VKGSRender::process_swap_request()
//Feed back damaged resources to the main texture cache for management...
//m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources);
m_rtts.invalidated_resources.clear();
m_rtts.free_invalidated();
m_texture_cache.flush();
if (g_cfg.video.invalidate_surface_cache_every_frame)
@ -1853,6 +1853,7 @@ void VKGSRender::prepare_rtts()
{
LOG_ERROR(RSX, "Invalid framebuffer setup, w=%d, h=%d", clip_width, clip_height);
framebuffer_status_valid = false;
return;
}
framebuffer_status_valid = true;

View File

@ -11,6 +11,8 @@ namespace vk
{
struct render_target : public image
{
u8 deref_count = 0;
bool dirty = false;
u16 native_pitch = 0;
VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
@ -147,6 +149,9 @@ namespace rsx
{
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag);
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range);
//Reset deref count
surface->deref_count = 0;
}
static void prepare_rtt_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface)
@ -159,6 +164,9 @@ namespace rsx
{
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag);
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
//Reset deref count
surface->deref_count = 0;
}
static void prepare_ds_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface)
@ -167,9 +175,25 @@ namespace rsx
change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range);
}
static void invalidate_rtt_surface_contents(vk::command_buffer*, vk::render_target*) {}
static void invalidate_rtt_surface_contents(vk::command_buffer* pcmd, vk::render_target *rtt, bool /*forced*/)
{
if (0)//forced)
{
VkClearColorValue clear_color;
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
clear_color.float32[0] = 0.f;
clear_color.float32[1] = 0.f;
clear_color.float32[2] = 0.f;
clear_color.float32[3] = 0.f;
change_image_layout(*pcmd, rtt, VK_IMAGE_LAYOUT_GENERAL, range);
vkCmdClearColorImage(*pcmd, rtt->value, VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range);
change_image_layout(*pcmd, rtt, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range);
}
}
static void invalidate_depth_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *ds)
static void invalidate_depth_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *ds, bool /*forced*/)
{
ds->dirty = true;
}
@ -186,15 +210,21 @@ namespace rsx
return false;
}
static bool ds_has_format_width_height(const std::unique_ptr<vk::render_target> &ds, surface_depth_format, size_t width, size_t height)
static bool ds_has_format_width_height(const std::unique_ptr<vk::render_target> &ds, surface_depth_format format, size_t width, size_t height)
{
// TODO: check format
//VkFormat fmt = vk::get_compatible_depth_surface_format(format);
if (//tex.get_format() == fmt &&
ds->info.extent.width == width &&
if (ds->info.extent.width == width &&
ds->info.extent.height == height)
return true;
{
//Check format
switch (ds->info.format)
{
case VK_FORMAT_D16_UNORM:
return format == surface_depth_format::z16;
case VK_FORMAT_D24_UNORM_S8_UINT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return format == surface_depth_format::z24s8;
}
}
return false;
}
@ -237,5 +267,16 @@ namespace rsx
m_depth_stencil_storage.clear();
invalidated_resources.clear();
}
void free_invalidated()
{
invalidated_resources.remove_if([](std::unique_ptr<vk::render_target>& rtt)
{
if (rtt->deref_count > 1) return true;
rtt->deref_count++;
return false;
});
}
};
}

View File

@ -477,7 +477,7 @@ namespace
{
const auto &vbo = vertex_buffers[i];
if (vbo.which() == 0 && vertex_count > 128 && vertex_buffers.size() > 2 && rsxthr->vertex_upload_task_ready())
if (vbo.which() == 0 && vertex_count >= g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready())
{
//vertex array buffer. We can thread this thing heavily
const auto& v = vbo.get<rsx::vertex_array_buffer>();

View File

@ -320,12 +320,13 @@ struct cfg_root : cfg::node
cfg::_bool invalidate_surface_cache_every_frame{this, "Invalidate Cache Every Frame", true};
cfg::_bool strict_rendering_mode{this, "Strict Rendering Mode"};
cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false};
cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 };
cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false}; //Avoid re-uploading geometry if the same draw command is repeated
cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 }; //Max number of threads to use for parallel vertex processing
cfg::_int<32, 65536> mt_vertex_upload_threshold{ this, "Multithreaded Vertex Upload Threshold", 4096}; //Minimum vertex count to parallelize
cfg::_bool frame_skip_enabled{this, "Enable Frame Skip"};
cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consequtive Frames Drawn", 1};
cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consequtive Frames Skept", 1};
cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames Drawn", 1};
cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames Skept", 1};
struct node_d3d12 : cfg::node
{