rsx: Minor texture/surface scanning optimization

- Also re-enable optimization in blit engine accidentally disabled during debugging
This commit is contained in:
kd-11 2019-05-11 18:31:49 +03:00 committed by kd-11
parent 9f0090772a
commit 3c7d8a1099
3 changed files with 66 additions and 57 deletions

View File

@ -434,6 +434,7 @@ namespace rsx
rsx::address_range m_depth_stencil_memory_range;
public:
std::pair<u8, u8> m_bound_render_targets_config = {};
std::array<std::pair<u32, surface_type>, 4> m_bound_render_targets = {};
std::pair<u32, surface_type> m_bound_depth_stencil = {};
@ -1000,15 +1001,22 @@ namespace rsx
cache_tag = rsx::get_shared_tag();
// Make previous RTTs sampleable
for (auto &rtt : m_bound_render_targets)
for (int i = m_bound_render_targets_config.first, count = 0;
count < m_bound_render_targets_config.second;
++i, ++count)
{
if (std::get<1>(rtt) != nullptr)
auto &rtt = m_bound_render_targets[i];
Traits::prepare_rtt_for_sampling(command_list, std::get<1>(rtt));
rtt = std::make_pair(0, nullptr);
}
const auto rtt_indices = utility::get_rtt_indexes(set_surface_target);
if (LIKELY(!rtt_indices.empty()))
{
m_bound_render_targets_config = { rtt_indices.front(), 0 };
// Create/Reuse requested rtts
for (u8 surface_index : utility::get_rtt_indexes(set_surface_target))
for (u8 surface_index : rtt_indices)
{
if (surface_addresses[surface_index] == 0)
continue;
@ -1016,6 +1024,13 @@ namespace rsx
m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index],
bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias,
clip_width, clip_height, surface_pitch[surface_index], std::forward<Args>(extra_params)...));
m_bound_render_targets_config.second++;
}
}
else
{
m_bound_render_targets_config = { 0, 0 };
}
// Same for depth buffer
@ -1288,17 +1303,15 @@ namespace rsx
bool address_is_bound(u32 address) const
{
for (auto &surface : m_bound_render_targets)
for (int i = m_bound_render_targets_config.first, count = 0;
count < m_bound_render_targets_config.second;
++i, ++count)
{
const u32 bound_address = std::get<0>(surface);
if (bound_address == address)
if (m_bound_render_targets[i].first == address)
return true;
}
if (std::get<0>(m_bound_depth_stencil) == address)
return true;
return false;
return (m_bound_depth_stencil.first == address);
}
template <typename commandbuffer_type>
@ -1460,17 +1473,10 @@ namespace rsx
}
// Tag all available surfaces
for (int i = 0; i < m_bound_render_targets.size(); ++i)
for (int i = m_bound_render_targets_config.first, count = 0;
count < m_bound_render_targets_config.second;
++i, ++count)
{
// Usually only 1 or 2 buffers are bound anyway
if (LIKELY(!m_bound_render_targets[i].first))
{
if (i) break;
// B-surface binding
continue;
}
m_bound_render_targets[i].second->on_write(write_tag);
}
@ -1481,14 +1487,10 @@ namespace rsx
}
else
{
for (int i = 0; i < m_bound_render_targets.size(); ++i)
for (int i = m_bound_render_targets_config.first, count = 0;
count < m_bound_render_targets_config.second;
++i, ++count)
{
if (LIKELY(!m_bound_render_targets[i].first))
{
if (i) break;
continue;
}
if (m_bound_render_targets[i].first != address)
{
continue;
@ -1527,6 +1529,7 @@ namespace rsx
free_resource_list(m_depth_stencil_storage);
m_bound_depth_stencil = std::make_pair(0, nullptr);
m_bound_render_targets_config = { 0, 0 };
for (auto &rtt : m_bound_render_targets)
{
rtt = std::make_pair(0, nullptr);

View File

@ -2135,6 +2135,18 @@ namespace rsx
}
}
reader_lock lock(m_cache_mutex);
if (LIKELY(is_compressed_format))
{
// Most mesh textures are stored as compressed to make the most of the limited memory
if (auto cached_texture = find_texture_from_dimensions(texaddr, format, tex_width, tex_height, depth))
{
return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->is_depth_texture(), scale_x, scale_y, cached_texture->get_image_type() };
}
}
else
{
// Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache
const u32 lookup_mask = (is_compressed_format) ? rsx::texture_upload_context::shader_read :
rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src;
@ -2151,8 +2163,6 @@ namespace rsx
}
}
reader_lock lock(m_cache_mutex);
const auto overlapping_locals = find_texture_from_range<true>(lookup_range, tex_height > 1? tex_pitch : 0, lookup_mask);
for (auto& cached_texture : overlapping_locals)
{
@ -2162,8 +2172,6 @@ namespace rsx
}
}
if (!is_compressed_format)
{
// Next, attempt to merge blit engine and surface store
// Blit sources contain info from any shader-read stuff in range
// NOTE: Compressed formats require a reupload, facilitated by blit synchronization and/or WCB and are not handled here

View File

@ -810,7 +810,6 @@ namespace rsx
namespace nv3089
{
#pragma optimize("", off)
void image_in(thread *rsx, u32 _reg, u32 arg)
{
const rsx::blit_engine::transfer_operation operation = method_registers.blit_engine_operation();
@ -1191,7 +1190,6 @@ namespace rsx
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
}
}
#pragma optimize("", on)
}
namespace nv0039