diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 1ff4703a01..65598343fa 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -93,7 +93,12 @@ namespace rsx auto It = m_render_targets_storage.find(address); // TODO: Fix corner cases // This doesn't take overlapping surface(s) into account. + + surface_storage_type old_surface_storage; + surface_storage_type new_surface_storage; surface_type old_surface = nullptr; + surface_type new_surface = nullptr; + if (It != m_render_targets_storage.end()) { surface_storage_type &rtt = It->second; @@ -104,10 +109,43 @@ namespace rsx } old_surface = Traits::get(rtt); - invalidated_resources.push_back(std::move(rtt)); + old_surface_storage = std::move(rtt); m_render_targets_storage.erase(address); } + //Search invalidated resources for a suitable surface + for (auto &It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) + { + auto &rtt = *It; + if (Traits::rtt_has_format_width_height(rtt, color_format, width, height)) + { + new_surface_storage = std::move(rtt); + + if (old_surface) + //Exchange this surface with the invalidated one + rtt = std::move(old_surface_storage); + else + //rtt is now empty - erase it + invalidated_resources.erase(It); + + new_surface = Traits::get(new_surface_storage); + Traits::invalidate_rtt_surface_contents(command_list, new_surface, true); + Traits::prepare_rtt_for_drawing(command_list, new_surface); + break; + } + } + + if (old_surface != nullptr && new_surface == nullptr) + //This was already determined to be invalid and is excluded from testing above + invalidated_resources.push_back(std::move(old_surface_storage)); + + if (new_surface != nullptr) + { + //New surface was found among existing surfaces + m_render_targets_storage[address] = std::move(new_surface_storage); + return new_surface; + } + m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, old_surface, std::forward(extra_params)...); return Traits::get(m_render_targets_storage[address]); } @@ -119,7 +157,11 @@ namespace rsx surface_depth_format depth_format, size_t width, size_t height, Args&&... extra_params) { + surface_storage_type old_surface_storage; + surface_storage_type new_surface_storage; surface_type old_surface = nullptr; + surface_type new_surface = nullptr; + auto It = m_depth_stencil_storage.find(address); if (It != m_depth_stencil_storage.end()) { @@ -131,10 +173,42 @@ namespace rsx } old_surface = Traits::get(ds); - invalidated_resources.push_back(std::move(ds)); + old_surface_storage = std::move(ds); m_depth_stencil_storage.erase(address); } + //Search invalidated resources for a suitable surface + for (auto &It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) + { + auto &ds = *It; + if (Traits::ds_has_format_width_height(ds, depth_format, width, height)) + { + new_surface_storage = std::move(ds); + + if (old_surface) + //Exchange this surface with the invalidated one + ds = std::move(old_surface_storage); + else + invalidated_resources.erase(It); + + new_surface = Traits::get(new_surface_storage); + Traits::prepare_ds_for_drawing(command_list, new_surface); + Traits::invalidate_depth_surface_contents(command_list, new_surface, true); + break; + } + } + + if (old_surface != nullptr && new_surface == nullptr) + //This was already determined to be invalid and is excluded from testing above + invalidated_resources.push_back(std::move(old_surface_storage)); + + if (new_surface != nullptr) + { + //New surface was found among existing surfaces + m_depth_stencil_storage[address] = std::move(new_surface_storage); + return new_surface; + } + m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, old_surface, std::forward(extra_params)...); return Traits::get(m_depth_stencil_storage[address]); } @@ -358,10 +432,10 @@ namespace rsx void invalidate_surface_cache_data(command_list_type command_list) { for (auto &rtt : m_render_targets_storage) - Traits::invalidate_rtt_surface_contents(command_list, Traits::get(std::get<1>(rtt))); + Traits::invalidate_rtt_surface_contents(command_list, Traits::get(std::get<1>(rtt)), false); for (auto &ds : m_depth_stencil_storage) - Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds))); + Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds)), true); } }; } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 309b588eaa..2c92edf87c 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -116,14 +116,16 @@ struct render_target_traits command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); } + static void invalidate_rtt_surface_contents( gsl::not_null, - ID3D12Resource*) + ID3D12Resource*, bool) {} + static void invalidate_depth_surface_contents( gsl::not_null, - ID3D12Resource*) + ID3D12Resource*, bool) { //TODO } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 123943bfc0..bf2010e9dd 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -240,8 +240,8 @@ struct gl_render_target_traits static void prepare_ds_for_drawing(void *, gl::render_target*) {} static void prepare_ds_for_sampling(void *, gl::render_target*) {} - static void invalidate_rtt_surface_contents(void *, gl::render_target*) {} - static void invalidate_depth_surface_contents(void *, gl::render_target *ds) { ds->set_cleared(false); } + static void invalidate_rtt_surface_contents(void *, gl::render_target*, bool) {} + static void invalidate_depth_surface_contents(void *, gl::render_target *ds, bool) { ds->set_cleared(false); } static bool rtt_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_color_format format, size_t width, size_t height) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 0a6b893014..98606a3761 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -944,7 +944,7 @@ void VKGSRender::end() load_program(is_instanced); std::chrono::time_point program_stop = steady_clock::now(); - m_setup_time += (u32)std::chrono::duration_cast(program_stop - program_start).count(); + m_setup_time += std::chrono::duration_cast(program_stop - program_start).count(); if (is_instanced) { @@ -1472,7 +1472,7 @@ void VKGSRender::process_swap_request() //Feed back damaged resources to the main texture cache for management... //m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources); - m_rtts.invalidated_resources.clear(); + m_rtts.free_invalidated(); m_texture_cache.flush(); if (g_cfg.video.invalidate_surface_cache_every_frame) @@ -1853,6 +1853,7 @@ void VKGSRender::prepare_rtts() { LOG_ERROR(RSX, "Invalid framebuffer setup, w=%d, h=%d", clip_width, clip_height); framebuffer_status_valid = false; + return; } framebuffer_status_valid = true; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 1532636130..9ccd88540d 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -11,6 +11,8 @@ namespace vk { struct render_target : public image { + u8 deref_count = 0; + bool dirty = false; u16 native_pitch = 0; VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; @@ -147,6 +149,9 @@ namespace rsx { VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag); change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range); + + //Reset deref count + surface->deref_count = 0; } static void prepare_rtt_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface) @@ -159,6 +164,9 @@ namespace rsx { VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag); change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); + + //Reset deref count + surface->deref_count = 0; } static void prepare_ds_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface) @@ -167,9 +175,25 @@ namespace rsx change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); } - static void invalidate_rtt_surface_contents(vk::command_buffer*, vk::render_target*) {} + static void invalidate_rtt_surface_contents(vk::command_buffer* pcmd, vk::render_target *rtt, bool /*forced*/) + { + if (0)//forced) + { + VkClearColorValue clear_color; + VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); + + clear_color.float32[0] = 0.f; + clear_color.float32[1] = 0.f; + clear_color.float32[2] = 0.f; + clear_color.float32[3] = 0.f; + + change_image_layout(*pcmd, rtt, VK_IMAGE_LAYOUT_GENERAL, range); + vkCmdClearColorImage(*pcmd, rtt->value, VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); + change_image_layout(*pcmd, rtt, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range); + } + } - static void invalidate_depth_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *ds) + static void invalidate_depth_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *ds, bool /*forced*/) { ds->dirty = true; } @@ -186,15 +210,21 @@ namespace rsx return false; } - static bool ds_has_format_width_height(const std::unique_ptr &ds, surface_depth_format, size_t width, size_t height) + static bool ds_has_format_width_height(const std::unique_ptr &ds, surface_depth_format format, size_t width, size_t height) { - // TODO: check format - //VkFormat fmt = vk::get_compatible_depth_surface_format(format); - - if (//tex.get_format() == fmt && - ds->info.extent.width == width && + if (ds->info.extent.width == width && ds->info.extent.height == height) - return true; + { + //Check format + switch (ds->info.format) + { + case VK_FORMAT_D16_UNORM: + return format == surface_depth_format::z16; + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return format == surface_depth_format::z24s8; + } + } return false; } @@ -237,5 +267,16 @@ namespace rsx m_depth_stencil_storage.clear(); invalidated_resources.clear(); } + + void free_invalidated() + { + invalidated_resources.remove_if([](std::unique_ptr& rtt) + { + if (rtt->deref_count > 1) return true; + + rtt->deref_count++; + return false; + }); + } }; } diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 38993b170f..0912cf7e14 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -477,7 +477,7 @@ namespace { const auto &vbo = vertex_buffers[i]; - if (vbo.which() == 0 && vertex_count > 128 && vertex_buffers.size() > 2 && rsxthr->vertex_upload_task_ready()) + if (vbo.which() == 0 && vertex_count >= g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready()) { //vertex array buffer. We can thread this thing heavily const auto& v = vbo.get(); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 33ae8f6057..f8a01640c3 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -320,12 +320,13 @@ struct cfg_root : cfg::node cfg::_bool invalidate_surface_cache_every_frame{this, "Invalidate Cache Every Frame", true}; cfg::_bool strict_rendering_mode{this, "Strict Rendering Mode"}; - cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false}; - cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 }; + cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false}; //Avoid re-uploading geometry if the same draw command is repeated + cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 }; //Max number of threads to use for parallel vertex processing + cfg::_int<32, 65536> mt_vertex_upload_threshold{ this, "Multithreaded Vertex Upload Threshold", 4096}; //Minimum vertex count to parallelize cfg::_bool frame_skip_enabled{this, "Enable Frame Skip"}; - cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consequtive Frames Drawn", 1}; - cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consequtive Frames Skept", 1}; + cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames Drawn", 1}; + cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames Skept", 1}; struct node_d3d12 : cfg::node {