rsx: Add some texture search options for the cache

- Potentially optimizes texture cache searching using explicit options
This commit is contained in:
kd-11 2019-10-10 18:32:54 +03:00 committed by kd-11
parent 840b52fe80
commit 1ac976771c
2 changed files with 107 additions and 60 deletions

View File

@ -1445,13 +1445,12 @@ namespace rsx
const size2f& scale, const size2f& scale,
u32 encoded_remap, u32 encoded_remap,
const texture_channel_remap_t& remap, const texture_channel_remap_t& remap,
bool is_compressed_format, const texture_cache_search_options& options,
bool skip_texture_barriers,
const utils::address_range& memory_range, const utils::address_range& memory_range,
rsx::texture_dimension_extended extended_dimension, rsx::texture_dimension_extended extended_dimension,
surface_store_type& m_rtts, Args&& ... extras) surface_store_type& m_rtts, Args&& ... extras)
{ {
if (LIKELY(is_compressed_format)) if (LIKELY(options.is_compressed_format))
{ {
// Most mesh textures are stored as compressed to make the most of the limited memory // Most mesh textures are stored as compressed to make the most of the limited memory
if (auto cached_texture = find_texture_from_dimensions(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth)) if (auto cached_texture = find_texture_from_dimensions(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth))
@ -1472,7 +1471,7 @@ namespace rsx
auto result = texture_cache_helpers::process_framebuffer_resource_fast<sampled_image_descriptor>( auto result = texture_cache_helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, texptr, attr, scale, extended_dimension, encoded_remap, remap, true, force_convert); cmd, texptr, attr, scale, extended_dimension, encoded_remap, remap, true, force_convert);
if (!skip_texture_barriers) if (!options.skip_texture_barriers)
{ {
insert_texture_barrier(cmd, texptr); insert_texture_barrier(cmd, texptr);
} }
@ -1480,9 +1479,46 @@ namespace rsx
} }
} }
std::vector<typename surface_store_type::surface_overlap_info> overlapping_fbos;
std::vector<section_storage_type*> overlapping_locals;
auto fast_fbo_check = [&]() -> sampled_image_descriptor
{
const auto& last = overlapping_fbos.back();
if (last.src_area.x == 0 && last.src_area.y == 0 && !last.is_clipped)
{
const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format);
return texture_cache_helpers::process_framebuffer_resource_fast<sampled_image_descriptor>(
cmd, last.surface, attr, scale, extended_dimension, encoded_remap, remap, false, force_convert);
}
return {};
};
// Check surface cache early if the option is enabled
if (options.prefer_surface_cache)
{
// TODO: Should scan for block_height aka required_surface_height not simply attr.height
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, attr.height, attr.pitch, attr.bpp, rsx::surface_access::read);
if (!overlapping_fbos.empty())
{
if (auto result = fast_fbo_check(); result.validate())
{
return result;
}
if (options.skip_texture_merge)
{
overlapping_fbos.clear();
}
}
}
// Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache // Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache
const u32 lookup_mask = rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src; const u32 lookup_mask = rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src;
auto overlapping_locals = find_texture_from_range<true>(memory_range, attr.height > 1 ? attr.pitch : 0, lookup_mask); overlapping_locals = find_texture_from_range<true>(memory_range, attr.height > 1 ? attr.pitch : 0, lookup_mask & options.lookup_mask);
// Search for exact match if possible // Search for exact match if possible
for (auto& cached_texture : overlapping_locals) for (auto& cached_texture : overlapping_locals)
@ -1506,8 +1542,12 @@ namespace rsx
); );
} }
if (!options.prefer_surface_cache)
{
// Now check for surface cache hits
// TODO: Should scan for block_height aka required_surface_height not simply attr.height // TODO: Should scan for block_height aka required_surface_height not simply attr.height
const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, attr.height, attr.pitch, attr.bpp, rsx::surface_access::read); overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, attr.height, attr.pitch, attr.bpp, rsx::surface_access::read);
}
if (!overlapping_fbos.empty() || !overlapping_locals.empty()) if (!overlapping_fbos.empty() || !overlapping_locals.empty())
{ {
@ -1528,13 +1568,12 @@ namespace rsx
if (_pool == 0) if (_pool == 0)
{ {
// Surface cache data is newer, check if this thing fits our search parameters // Surface cache data is newer, check if this thing fits our search parameters
const auto& last = overlapping_fbos.back(); if (!options.prefer_surface_cache)
if (last.src_area.x == 0 && last.src_area.y == 0 && !last.is_clipped)
{ {
const bool force_convert = !render_target_format_is_compatible(last.surface, attr.gcm_format); if (auto result = fast_fbo_check(); result.validate())
{
return texture_cache_helpers::process_framebuffer_resource_fast<sampled_image_descriptor>( return result;
cmd, last.surface, attr, scale, extended_dimension, encoded_remap, remap, false, force_convert); }
} }
} }
else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d) else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d)
@ -1545,19 +1584,12 @@ namespace rsx
if (last->get_section_base() == attr.address && if (last->get_section_base() == attr.address &&
normalized_width >= attr.width && last->get_height() >= attr.height) normalized_width >= attr.width && last->get_height() >= attr.height)
{ {
bool is_depth = last->is_depth_texture();
u32 gcm_format = attr.gcm_format; u32 gcm_format = attr.gcm_format;
const bool gcm_format_is_depth = texture_cache_helpers::is_gcm_depth_format(attr.gcm_format);
if (const auto gcm_format_is_depth = texture_cache_helpers::is_gcm_depth_format(attr.gcm_format); if (gcm_format_is_depth && !last->is_depth_texture())
is_depth != gcm_format_is_depth)
{
// Conflict, resolve
if (gcm_format_is_depth)
{
is_depth = true;
}
else
{ {
// Request for a depth format, but only a color format exists
const auto actual_format = last->get_gcm_format(); const auto actual_format = last->get_gcm_format();
bool resolved = false; bool resolved = false;
@ -1571,7 +1603,6 @@ namespace rsx
{ {
gcm_format = CELL_GCM_TEXTURE_DEPTH24_D8; gcm_format = CELL_GCM_TEXTURE_DEPTH24_D8;
resolved = true; resolved = true;
is_depth = true;
} }
break; break;
} }
@ -1582,7 +1613,6 @@ namespace rsx
{ {
gcm_format = CELL_GCM_TEXTURE_DEPTH16; gcm_format = CELL_GCM_TEXTURE_DEPTH16;
resolved = true; resolved = true;
is_depth = true;
} }
break; break;
} }
@ -1592,9 +1622,6 @@ namespace rsx
{ {
LOG_ERROR(RSX, "Reading texture with gcm format 0x%x as unexpected cast with format 0x%x", LOG_ERROR(RSX, "Reading texture with gcm format 0x%x as unexpected cast with format 0x%x",
actual_format, attr.gcm_format); actual_format, attr.gcm_format);
is_depth = gcm_format_is_depth;
}
} }
} }
@ -1606,6 +1633,11 @@ namespace rsx
} }
} }
if (options.skip_texture_merge)
{
return {};
}
auto result = texture_cache_helpers::merge_cache_resources<sampled_image_descriptor>( auto result = texture_cache_helpers::merge_cache_resources<sampled_image_descriptor>(
overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, encoded_remap, remap, _pool); overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, encoded_remap, remap, _pool);
@ -1646,17 +1678,19 @@ namespace rsx
sampled_image_descriptor upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras) sampled_image_descriptor upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras)
{ {
image_section_attributes_t attributes{}; image_section_attributes_t attributes{};
texture_cache_search_options options{};
attributes.address = rsx::get_address(tex.offset(), tex.location()); attributes.address = rsx::get_address(tex.offset(), tex.location());
attributes.gcm_format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); attributes.gcm_format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
attributes.bpp = get_format_block_size_in_bytes(attributes.gcm_format); attributes.bpp = get_format_block_size_in_bytes(attributes.gcm_format);
attributes.width = tex.width(); attributes.width = tex.width();
attributes.height = tex.height(); attributes.height = tex.height();
const bool is_compressed_format = texture_cache_helpers::is_compressed_gcm_format(attributes.gcm_format);
const bool is_unnormalized = !!(tex.format() & CELL_GCM_TEXTURE_UN); const bool is_unnormalized = !!(tex.format() & CELL_GCM_TEXTURE_UN);
const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN);
const auto extended_dimension = tex.get_extended_texture_dimension(); const auto extended_dimension = tex.get_extended_texture_dimension();
options.is_compressed_format = texture_cache_helpers::is_compressed_gcm_format(attributes.gcm_format);
u32 tex_size = 0, required_surface_height; u32 tex_size = 0, required_surface_height;
u8 subsurface_count; u8 subsurface_count;
size2f scale{ 1.f, 1.f }; size2f scale{ 1.f, 1.f };
@ -1685,7 +1719,7 @@ namespace rsx
break; break;
case rsx::texture_dimension_extended::texture_dimension_2d: case rsx::texture_dimension_extended::texture_dimension_2d:
attributes.depth = 1; attributes.depth = 1;
subsurface_count = is_compressed_format? 1 : tex.get_exact_mipmap_count(); subsurface_count = options.is_compressed_format? 1 : tex.get_exact_mipmap_count();
attributes.slice_h = required_surface_height = attributes.height; attributes.slice_h = required_surface_height = attributes.height;
break; break;
case rsx::texture_dimension_extended::texture_dimension_cubemap: case rsx::texture_dimension_extended::texture_dimension_cubemap:
@ -1721,7 +1755,7 @@ namespace rsx
reader_lock lock(m_cache_mutex); reader_lock lock(m_cache_mutex);
auto result = fast_texture_search(cmd, attributes, scale, tex.remap(), tex.decoded_remap(), auto result = fast_texture_search(cmd, attributes, scale, tex.remap(), tex.decoded_remap(),
is_compressed_format, false, lookup_range, extended_dimension, m_rtts, options, lookup_range, extended_dimension, m_rtts,
std::forward<Args>(extras)...); std::forward<Args>(extras)...);
if (result.validate()) if (result.validate())
@ -1761,6 +1795,10 @@ namespace rsx
auto attr2 = attributes; auto attr2 = attributes;
sections.reserve(subsurface_count); sections.reserve(subsurface_count);
options.skip_texture_merge = true;
options.skip_texture_barriers = true;
options.prefer_surface_cache = (result.upload_context == rsx::texture_upload_context::framebuffer_storage);
for (u8 subsurface = 1; subsurface < subsurface_count; ++subsurface) for (u8 subsurface = 1; subsurface < subsurface_count; ++subsurface)
{ {
attr2.address += (attr2.pitch * attr2.height); attr2.address += (attr2.pitch * attr2.height);
@ -1774,7 +1812,7 @@ namespace rsx
const auto range = utils::address_range::start_length(attr2.address, attr2.pitch * attr2.height); const auto range = utils::address_range::start_length(attr2.address, attr2.pitch * attr2.height);
auto ret = fast_texture_search(cmd, attr2, scale, tex.remap(), tex.decoded_remap(), auto ret = fast_texture_search(cmd, attr2, scale, tex.remap(), tex.decoded_remap(),
false, true, range, extended_dimension, m_rtts, std::forward<Args>(extras)...); options, range, extended_dimension, m_rtts, std::forward<Args>(extras)...);
if (!ret.validate() || if (!ret.validate() ||
!texture_cache_helpers::append_mipmap_level(sections, ret, attr2, subsurface, use_upscaling, attributes)) !texture_cache_helpers::append_mipmap_level(sections, ret, attr2, subsurface, use_upscaling, attributes))

View File

@ -74,6 +74,15 @@ namespace rsx
} }
}; };
struct texture_cache_search_options
{
u8 lookup_mask = 0xff;
bool is_compressed_format = false;
bool skip_texture_barriers = false;
bool skip_texture_merge = false;
bool prefer_surface_cache = false;
};
namespace texture_cache_helpers namespace texture_cache_helpers
{ {
static inline bool is_gcm_depth_format(u32 format) static inline bool is_gcm_depth_format(u32 format)