mirror of https://github.com/RPCS3/rpcs3.git
rsx: Minor texture/surface scanning optimization
- Also re-enable optimization in blit engine accidentally disabled during debugging
This commit is contained in:
parent
9f0090772a
commit
3c7d8a1099
|
@ -434,6 +434,7 @@ namespace rsx
|
|||
rsx::address_range m_depth_stencil_memory_range;
|
||||
|
||||
public:
|
||||
std::pair<u8, u8> m_bound_render_targets_config = {};
|
||||
std::array<std::pair<u32, surface_type>, 4> m_bound_render_targets = {};
|
||||
std::pair<u32, surface_type> m_bound_depth_stencil = {};
|
||||
|
||||
|
@ -1000,15 +1001,22 @@ namespace rsx
|
|||
cache_tag = rsx::get_shared_tag();
|
||||
|
||||
// Make previous RTTs sampleable
|
||||
for (auto &rtt : m_bound_render_targets)
|
||||
for (int i = m_bound_render_targets_config.first, count = 0;
|
||||
count < m_bound_render_targets_config.second;
|
||||
++i, ++count)
|
||||
{
|
||||
if (std::get<1>(rtt) != nullptr)
|
||||
auto &rtt = m_bound_render_targets[i];
|
||||
Traits::prepare_rtt_for_sampling(command_list, std::get<1>(rtt));
|
||||
rtt = std::make_pair(0, nullptr);
|
||||
}
|
||||
|
||||
const auto rtt_indices = utility::get_rtt_indexes(set_surface_target);
|
||||
if (LIKELY(!rtt_indices.empty()))
|
||||
{
|
||||
m_bound_render_targets_config = { rtt_indices.front(), 0 };
|
||||
|
||||
// Create/Reuse requested rtts
|
||||
for (u8 surface_index : utility::get_rtt_indexes(set_surface_target))
|
||||
for (u8 surface_index : rtt_indices)
|
||||
{
|
||||
if (surface_addresses[surface_index] == 0)
|
||||
continue;
|
||||
|
@ -1016,6 +1024,13 @@ namespace rsx
|
|||
m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index],
|
||||
bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias,
|
||||
clip_width, clip_height, surface_pitch[surface_index], std::forward<Args>(extra_params)...));
|
||||
|
||||
m_bound_render_targets_config.second++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_bound_render_targets_config = { 0, 0 };
|
||||
}
|
||||
|
||||
// Same for depth buffer
|
||||
|
@ -1288,17 +1303,15 @@ namespace rsx
|
|||
|
||||
bool address_is_bound(u32 address) const
|
||||
{
|
||||
for (auto &surface : m_bound_render_targets)
|
||||
for (int i = m_bound_render_targets_config.first, count = 0;
|
||||
count < m_bound_render_targets_config.second;
|
||||
++i, ++count)
|
||||
{
|
||||
const u32 bound_address = std::get<0>(surface);
|
||||
if (bound_address == address)
|
||||
if (m_bound_render_targets[i].first == address)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (std::get<0>(m_bound_depth_stencil) == address)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return (m_bound_depth_stencil.first == address);
|
||||
}
|
||||
|
||||
template <typename commandbuffer_type>
|
||||
|
@ -1460,17 +1473,10 @@ namespace rsx
|
|||
}
|
||||
|
||||
// Tag all available surfaces
|
||||
for (int i = 0; i < m_bound_render_targets.size(); ++i)
|
||||
for (int i = m_bound_render_targets_config.first, count = 0;
|
||||
count < m_bound_render_targets_config.second;
|
||||
++i, ++count)
|
||||
{
|
||||
// Usually only 1 or 2 buffers are bound anyway
|
||||
if (LIKELY(!m_bound_render_targets[i].first))
|
||||
{
|
||||
if (i) break;
|
||||
|
||||
// B-surface binding
|
||||
continue;
|
||||
}
|
||||
|
||||
m_bound_render_targets[i].second->on_write(write_tag);
|
||||
}
|
||||
|
||||
|
@ -1481,14 +1487,10 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < m_bound_render_targets.size(); ++i)
|
||||
for (int i = m_bound_render_targets_config.first, count = 0;
|
||||
count < m_bound_render_targets_config.second;
|
||||
++i, ++count)
|
||||
{
|
||||
if (LIKELY(!m_bound_render_targets[i].first))
|
||||
{
|
||||
if (i) break;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (m_bound_render_targets[i].first != address)
|
||||
{
|
||||
continue;
|
||||
|
@ -1527,6 +1529,7 @@ namespace rsx
|
|||
free_resource_list(m_depth_stencil_storage);
|
||||
|
||||
m_bound_depth_stencil = std::make_pair(0, nullptr);
|
||||
m_bound_render_targets_config = { 0, 0 };
|
||||
for (auto &rtt : m_bound_render_targets)
|
||||
{
|
||||
rtt = std::make_pair(0, nullptr);
|
||||
|
|
|
@ -2135,6 +2135,18 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
reader_lock lock(m_cache_mutex);
|
||||
|
||||
if (LIKELY(is_compressed_format))
|
||||
{
|
||||
// Most mesh textures are stored as compressed to make the most of the limited memory
|
||||
if (auto cached_texture = find_texture_from_dimensions(texaddr, format, tex_width, tex_height, depth))
|
||||
{
|
||||
return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->is_depth_texture(), scale_x, scale_y, cached_texture->get_image_type() };
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache
|
||||
const u32 lookup_mask = (is_compressed_format) ? rsx::texture_upload_context::shader_read :
|
||||
rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src;
|
||||
|
@ -2151,8 +2163,6 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
reader_lock lock(m_cache_mutex);
|
||||
|
||||
const auto overlapping_locals = find_texture_from_range<true>(lookup_range, tex_height > 1? tex_pitch : 0, lookup_mask);
|
||||
for (auto& cached_texture : overlapping_locals)
|
||||
{
|
||||
|
@ -2162,8 +2172,6 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
if (!is_compressed_format)
|
||||
{
|
||||
// Next, attempt to merge blit engine and surface store
|
||||
// Blit sources contain info from any shader-read stuff in range
|
||||
// NOTE: Compressed formats require a reupload, facilitated by blit synchronization and/or WCB and are not handled here
|
||||
|
|
|
@ -810,7 +810,6 @@ namespace rsx
|
|||
|
||||
namespace nv3089
|
||||
{
|
||||
#pragma optimize("", off)
|
||||
void image_in(thread *rsx, u32 _reg, u32 arg)
|
||||
{
|
||||
const rsx::blit_engine::transfer_operation operation = method_registers.blit_engine_operation();
|
||||
|
@ -1191,7 +1190,6 @@ namespace rsx
|
|||
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
|
||||
}
|
||||
}
|
||||
#pragma optimize("", on)
|
||||
}
|
||||
|
||||
namespace nv0039
|
||||
|
|
Loading…
Reference in New Issue