rsx: Fix temporary subresource caching behaviour

- Do not cache if a gathered subresource contains a bound RTT
- Change op to dynamic copy if parent is still bound
This commit is contained in:
kd-11 2019-04-01 18:45:19 +03:00 committed by kd-11
parent 3249000511
commit e4e86455f2
4 changed files with 82 additions and 46 deletions

View File

@ -178,6 +178,7 @@ namespace rsx
u16 width = 0;
u16 height = 0;
u16 depth = 1;
bool do_not_cache = false;
deferred_subresource()
{}
@ -1506,20 +1507,23 @@ namespace rsx
image_view_type create_temporary_subresource(commandbuffer_type &cmd, deferred_subresource& desc)
{
const auto found = m_temporary_subresource_cache.equal_range(desc.base_address);
for (auto It = found.first; It != found.second; ++It)
if (!desc.do_not_cache)
{
const auto& found_desc = It->second.first;
if (found_desc.external_handle != desc.external_handle ||
found_desc.op != desc.op ||
found_desc.x != desc.x || found_desc.y != desc.y ||
found_desc.width != desc.width || found_desc.height != desc.height)
continue;
const auto found = m_temporary_subresource_cache.equal_range(desc.base_address);
for (auto It = found.first; It != found.second; ++It)
{
const auto& found_desc = It->second.first;
if (found_desc.external_handle != desc.external_handle ||
found_desc.op != desc.op ||
found_desc.x != desc.x || found_desc.y != desc.y ||
found_desc.width != desc.width || found_desc.height != desc.height)
continue;
if (desc.op == deferred_request_command::copy_image_dynamic)
update_image_contents(cmd, It->second.second, desc.external_handle, desc.width, desc.height);
if (desc.op == deferred_request_command::copy_image_dynamic)
update_image_contents(cmd, It->second.second, desc.external_handle, desc.width, desc.height);
return It->second.second;
return It->second.second;
}
}
image_view_type result = 0;
@ -1600,9 +1604,19 @@ namespace rsx
return result;
}
void notify_surface_changed(u32 base_address)
void notify_surface_changed(const utils::address_range& range)
{
m_temporary_subresource_cache.erase(base_address);
for (auto It = m_temporary_subresource_cache.begin(); It != m_temporary_subresource_cache.end();)
{
if (range.overlaps(It->first))
{
It = m_temporary_subresource_cache.erase(It);
}
else
{
++It;
}
}
}
template<typename surface_store_list_type>
@ -2271,6 +2285,25 @@ namespace rsx
// TODO: Overlapped section persistance is required for framebuffer resources to work with this!
// Yellow filter in SCV is because of a 384x384 surface being reused as 160x90 (and likely not getting written to)
// Its then sampled again here as 384x384 and this does not work! (obviously)
// Optionally disallow caching if resource is being written to as it is being read from
for (const auto &section : overlapping_fbos)
{
if (m_rtts.address_is_bound(section.base_address))
{
if (result.external_subresource_desc.op == deferred_request_command::copy_image_static)
{
result.external_subresource_desc.op = deferred_request_command::copy_image_dynamic;
}
else
{
result.external_subresource_desc.do_not_cache = true;
}
break;
}
}
return result;
}
else
@ -2798,23 +2831,26 @@ namespace rsx
typeless_info.dst_context = texture_upload_context::blit_engine_dst;
}
// Calculate number of bytes actually modified
u32 mem_length;
const u32 mem_base = dst_address - dst.rsx_address;
if (dst.clip_height == 1)
{
mem_length = dst.clip_width * dst_bpp;
}
else
{
const u32 mem_excess = mem_base % dst.pitch;
mem_length = (dst.pitch * dst.clip_height) - mem_excess;
}
// Invalidate any cached subresources in modified range
notify_surface_changed(utils::address_range::start_length(dst_address, mem_length));
if (cached_dest)
{
lock.upgrade();
u32 mem_length;
const u32 mem_base = dst_address - cached_dest->get_section_base();
if (dst.clip_height == 1)
{
mem_length = dst.clip_width * dst_bpp;
}
else
{
const u32 mem_excess = mem_base % dst.pitch;
mem_length = (dst.pitch * dst.clip_height) - mem_excess;
}
verify(HERE), (mem_base + mem_length) <= cached_dest->get_section_size();
cached_dest->reprotect(utils::protection::no, { mem_base, mem_length });
@ -2863,7 +2899,6 @@ namespace rsx
typeless_info.analyse();
blitter.scale_image(cmd, vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info);
notify_surface_changed(dst.rsx_address);
blit_op_result result = true;
result.is_depth = is_depth_blit;

View File

@ -261,7 +261,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
rtt->tile = find_tile(color_offsets[i], color_locations[i]);
rtt->write_aa_mode = layout.aa_mode;
m_gl_texture_cache.notify_surface_changed(m_surface_info[i].address);
m_gl_texture_cache.notify_surface_changed(m_surface_info[i].get_memory_range(layout.aa_factors));
}
else
{
@ -270,18 +270,18 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
}
}
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
{
auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width;
auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil;
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(cmd, surface_range);
}
if (std::get<0>(m_rtts.m_bound_depth_stencil))
{
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
{
auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width;
auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil;
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(cmd, surface_range);
}
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
depth_stencil_target = ds->id();
@ -289,7 +289,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
m_depth_surface_info = { layout.zeta_address, layout.actual_zeta_pitch, true, layout.color_format, layout.depth_format, layout.width, layout.height, depth_bpp };
ds->write_aa_mode = layout.aa_mode;
m_gl_texture_cache.notify_surface_changed(layout.zeta_address);
m_gl_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(layout.aa_factors));
}
else
{
@ -387,7 +387,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors);
const auto surface_range = m_surface_info[i].get_memory_range();
if (g_cfg.video.write_color_buffers)
{
// Mark buffer regions as NO_ACCESS on Cell-visible side
@ -402,7 +402,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
const auto surface_range = m_depth_surface_info.get_memory_range();
if (g_cfg.video.write_depth_buffer)
{
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);

View File

@ -2872,7 +2872,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
//Flush old address if we keep missing it
// Flush old address if we keep missing it
if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers)
{
if (old_format == VK_FORMAT_UNDEFINED)
@ -2925,7 +2925,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
verify("Pitch mismatch!" HERE), surface->rsx_pitch == layout.actual_color_pitch[index];
surface->write_aa_mode = layout.aa_mode;
m_texture_cache.notify_surface_changed(layout.color_addresses[index]);
m_texture_cache.notify_surface_changed(m_surface_info[index].get_memory_range(layout.aa_factors));
m_draw_buffers.push_back(index);
}
}
@ -2940,7 +2940,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
verify("Pitch mismatch!" HERE), ds->rsx_pitch == layout.actual_zeta_pitch;
ds->write_aa_mode = layout.aa_mode;
m_texture_cache.notify_surface_changed(layout.zeta_address);
m_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(layout.aa_factors));
}
// Before messing with memory properties, flush command queue if there are dma transfers queued up
@ -2954,7 +2954,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors);
const utils::address_range surface_range = m_surface_info[index].get_memory_range();
if (g_cfg.video.write_color_buffers)
{
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
@ -2968,7 +2968,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
if (g_cfg.video.write_depth_buffer)
{
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;

View File

@ -81,6 +81,7 @@ namespace rsx
address_range get_memory_range() const
{
verify(HERE), range.start == address;
return range;
}
};