From e4e86455f2119f6a7d0fcb3a9f066d0d03d688e1 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 1 Apr 2019 18:45:19 +0300 Subject: [PATCH] rsx: Fix temporary subresource caching behaviour - Do not cache if a gathered subresource contains a bound RTT - Change op to dynamic copy if parent is still bound --- rpcs3/Emu/RSX/Common/texture_cache.h | 89 +++++++++++++++++++--------- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 28 ++++----- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 10 ++-- rpcs3/Emu/RSX/rsx_utils.h | 1 + 4 files changed, 82 insertions(+), 46 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index d145e892de..db459379d9 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -178,6 +178,7 @@ namespace rsx u16 width = 0; u16 height = 0; u16 depth = 1; + bool do_not_cache = false; deferred_subresource() {} @@ -1506,20 +1507,23 @@ namespace rsx image_view_type create_temporary_subresource(commandbuffer_type &cmd, deferred_subresource& desc) { - const auto found = m_temporary_subresource_cache.equal_range(desc.base_address); - for (auto It = found.first; It != found.second; ++It) + if (!desc.do_not_cache) { - const auto& found_desc = It->second.first; - if (found_desc.external_handle != desc.external_handle || - found_desc.op != desc.op || - found_desc.x != desc.x || found_desc.y != desc.y || - found_desc.width != desc.width || found_desc.height != desc.height) - continue; + const auto found = m_temporary_subresource_cache.equal_range(desc.base_address); + for (auto It = found.first; It != found.second; ++It) + { + const auto& found_desc = It->second.first; + if (found_desc.external_handle != desc.external_handle || + found_desc.op != desc.op || + found_desc.x != desc.x || found_desc.y != desc.y || + found_desc.width != desc.width || found_desc.height != desc.height) + continue; - if (desc.op == deferred_request_command::copy_image_dynamic) - update_image_contents(cmd, It->second.second, desc.external_handle, desc.width, desc.height); + if (desc.op == deferred_request_command::copy_image_dynamic) + update_image_contents(cmd, It->second.second, desc.external_handle, desc.width, desc.height); - return It->second.second; + return It->second.second; + } } image_view_type result = 0; @@ -1600,9 +1604,19 @@ namespace rsx return result; } - void notify_surface_changed(u32 base_address) + void notify_surface_changed(const utils::address_range& range) { - m_temporary_subresource_cache.erase(base_address); + for (auto It = m_temporary_subresource_cache.begin(); It != m_temporary_subresource_cache.end();) + { + if (range.overlaps(It->first)) + { + It = m_temporary_subresource_cache.erase(It); + } + else + { + ++It; + } + } } template @@ -2271,6 +2285,25 @@ namespace rsx // TODO: Overlapped section persistance is required for framebuffer resources to work with this! // Yellow filter in SCV is because of a 384x384 surface being reused as 160x90 (and likely not getting written to) // Its then sampled again here as 384x384 and this does not work! (obviously) + + // Optionally disallow caching if resource is being written to as it is being read from + for (const auto §ion : overlapping_fbos) + { + if (m_rtts.address_is_bound(section.base_address)) + { + if (result.external_subresource_desc.op == deferred_request_command::copy_image_static) + { + result.external_subresource_desc.op = deferred_request_command::copy_image_dynamic; + } + else + { + result.external_subresource_desc.do_not_cache = true; + } + + break; + } + } + return result; } else @@ -2798,23 +2831,26 @@ namespace rsx typeless_info.dst_context = texture_upload_context::blit_engine_dst; } + // Calculate number of bytes actually modified + u32 mem_length; + const u32 mem_base = dst_address - dst.rsx_address; + if (dst.clip_height == 1) + { + mem_length = dst.clip_width * dst_bpp; + } + else + { + const u32 mem_excess = mem_base % dst.pitch; + mem_length = (dst.pitch * dst.clip_height) - mem_excess; + } + + // Invalidate any cached subresources in modified range + notify_surface_changed(utils::address_range::start_length(dst_address, mem_length)); + if (cached_dest) { lock.upgrade(); - u32 mem_length; - const u32 mem_base = dst_address - cached_dest->get_section_base(); - - if (dst.clip_height == 1) - { - mem_length = dst.clip_width * dst_bpp; - } - else - { - const u32 mem_excess = mem_base % dst.pitch; - mem_length = (dst.pitch * dst.clip_height) - mem_excess; - } - verify(HERE), (mem_base + mem_length) <= cached_dest->get_section_size(); cached_dest->reprotect(utils::protection::no, { mem_base, mem_length }); @@ -2863,7 +2899,6 @@ namespace rsx typeless_info.analyse(); blitter.scale_image(cmd, vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info); - notify_surface_changed(dst.rsx_address); blit_op_result result = true; result.is_depth = is_depth_blit; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 136a4ba09d..6be52c13e2 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -261,7 +261,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk rtt->tile = find_tile(color_offsets[i], color_locations[i]); rtt->write_aa_mode = layout.aa_mode; - m_gl_texture_cache.notify_surface_changed(m_surface_info[i].address); + m_gl_texture_cache.notify_surface_changed(m_surface_info[i].get_memory_range(layout.aa_factors)); } else { @@ -270,18 +270,18 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk } } + if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer) + { + auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width; + auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil; + + const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); + m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); + m_gl_texture_cache.flush_if_cache_miss_likely(cmd, surface_range); + } + if (std::get<0>(m_rtts.m_bound_depth_stencil)) { - if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer) - { - auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width; - auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil; - - const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); - m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); - m_gl_texture_cache.flush_if_cache_miss_likely(cmd, surface_range); - } - auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); depth_stencil_target = ds->id(); @@ -289,7 +289,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height, depth_bpp }; ds->write_aa_mode = layout.aa_mode; - m_gl_texture_cache.notify_surface_changed(layout.zeta_address); + m_gl_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(layout.aa_factors)); } else { @@ -387,7 +387,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; - const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors); + const auto surface_range = m_surface_info[i].get_memory_range(); if (g_cfg.video.write_color_buffers) { // Mark buffer regions as NO_ACCESS on Cell-visible side @@ -402,7 +402,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk if (m_depth_surface_info.address && m_depth_surface_info.pitch) { - const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors); + const auto surface_range = m_depth_surface_info.get_memory_range(); if (g_cfg.video.write_depth_buffer) { const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 09fc2d8dd8..381dfedcd2 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2872,7 +2872,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { - //Flush old address if we keep missing it + // Flush old address if we keep missing it if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers) { if (old_format == VK_FORMAT_UNDEFINED) @@ -2925,7 +2925,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) verify("Pitch mismatch!" HERE), surface->rsx_pitch == layout.actual_color_pitch[index]; surface->write_aa_mode = layout.aa_mode; - m_texture_cache.notify_surface_changed(layout.color_addresses[index]); + m_texture_cache.notify_surface_changed(m_surface_info[index].get_memory_range(layout.aa_factors)); m_draw_buffers.push_back(index); } } @@ -2940,7 +2940,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) verify("Pitch mismatch!" HERE), ds->rsx_pitch == layout.actual_zeta_pitch; ds->write_aa_mode = layout.aa_mode; - m_texture_cache.notify_surface_changed(layout.zeta_address); + m_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(layout.aa_factors)); } // Before messing with memory properties, flush command queue if there are dma transfers queued up @@ -2954,7 +2954,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; - const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors); + const utils::address_range surface_range = m_surface_info[index].get_memory_range(); if (g_cfg.video.write_color_buffers) { m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, @@ -2968,7 +2968,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (m_depth_surface_info.address && m_depth_surface_info.pitch) { - const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors); + const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); if (g_cfg.video.write_depth_buffer) { const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index e7a833802f..e7f774948e 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -81,6 +81,7 @@ namespace rsx address_range get_memory_range() const { + verify(HERE), range.start == address; return range; } };