rsx: Add cache pattern checking to blit engine resources

- Feature was implemented long ago but was not functional due to bugs
This commit is contained in:
kd-11 2018-02-10 19:21:16 +03:00
parent c191a98ec3
commit 1bd77c2f51
4 changed files with 203 additions and 59 deletions

View File

@ -40,6 +40,12 @@ namespace rsx
u16 rsx_pitch; u16 rsx_pitch;
u32 gcm_format = 0; u32 gcm_format = 0;
bool pack_unpack_swap_bytes = false;
bool synchronized = false;
bool flushed = false;
u32 num_writes = 0;
u32 required_writes = 1;
u64 cache_tag = 0; u64 cache_tag = 0;
@ -79,6 +85,17 @@ namespace rsx
return false; return false;
} }
void touch()
{
num_writes++;
}
void reset_write_statistics()
{
required_writes = num_writes;
num_writes = 0;
}
void set_view_flags(rsx::texture_create_flags flags) void set_view_flags(rsx::texture_create_flags flags)
{ {
view_flags = flags; view_flags = flags;
@ -148,6 +165,14 @@ namespace rsx
{ {
return sampler_status; return sampler_status;
} }
bool writes_likely_completed() const
{
if (context == rsx::texture_upload_context::blit_engine_dst)
return num_writes >= required_writes;
return true;
}
}; };
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format> template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>
@ -246,6 +271,17 @@ namespace rsx
{} {}
}; };
struct blit_op_result
{
bool succeeded = false;
bool is_depth = false;
u32 real_dst_address = 0;
u32 real_dst_size = 0;
blit_op_result(bool success) : succeeded(success)
{}
};
struct sampled_image_descriptor : public sampled_image_descriptor_base struct sampled_image_descriptor : public sampled_image_descriptor_base
{ {
image_view_type image_handle = 0; image_view_type image_handle = 0;
@ -425,7 +461,7 @@ namespace rsx
if (tex.cache_tag == cache_tag) continue; //already processed if (tex.cache_tag == cache_tag) continue; //already processed
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
auto overlapped = tex.overlaps_page(trampled_range, address, strict_range_check); auto overlapped = tex.overlaps_page(trampled_range, address, strict_range_check || tex.get_context() == rsx::texture_upload_context::blit_engine_dst);
if (std::get<0>(overlapped)) if (std::get<0>(overlapped))
{ {
auto &new_range = std::get<1>(overlapped); auto &new_range = std::get<1>(overlapped);
@ -883,6 +919,9 @@ namespace rsx
if (skip_synchronized && region->is_synchronized()) if (skip_synchronized && region->is_synchronized())
return false; return false;
if (!region->writes_likely_completed())
return true;
region->copy_texture(false, std::forward<Args>(extra)...); region->copy_texture(false, std::forward<Args>(extra)...);
return true; return true;
} }
@ -1013,7 +1052,10 @@ namespace rsx
if (tex->is_locked()) if (tex->is_locked())
{ {
if (!tex->is_synchronized()) if (!tex->is_synchronized())
{
record_cache_miss(*tex);
tex->copy_texture(true, std::forward<Args>(extras)...); tex->copy_texture(true, std::forward<Args>(extras)...);
}
m_cache[get_block_address(tex->get_section_base())].remove_one(); m_cache[get_block_address(tex->get_section_base())].remove_one();
} }
@ -1074,31 +1116,31 @@ namespace rsx
return; return;
} }
value.misses++; value.misses += 2;
} }
template <typename ...Args> template <typename ...Args>
void flush_if_cache_miss_likely(texture_format fmt, u32 memory_address, u32 memory_size, Args&&... extras) bool flush_if_cache_miss_likely(texture_format fmt, u32 memory_address, u32 memory_size, Args&&... extras)
{ {
auto It = m_cache_miss_statistics_table.find(memory_address); auto It = m_cache_miss_statistics_table.find(memory_address);
if (It == m_cache_miss_statistics_table.end()) if (It == m_cache_miss_statistics_table.end())
{ {
m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt };
return; return false;
} }
auto &value = It->second; auto &value = It->second;
if (value.format != fmt || value.block_size != memory_size) if (value.format != fmt || value.block_size < memory_size)
{ {
//Reset since the data has changed //Reset since the data has changed
//TODO: Keep track of all this information together //TODO: Keep track of all this information together
m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt };
return; return false;
} }
//Properly synchronized - no miss //Properly synchronized - no miss
if (!value.misses) return; if (!value.misses) return false;
//Auto flush if this address keeps missing (not properly synchronized) //Auto flush if this address keeps missing (not properly synchronized)
if (value.misses > 16) if (value.misses > 16)
@ -1106,7 +1148,11 @@ namespace rsx
//TODO: Determine better way of setting threshold //TODO: Determine better way of setting threshold
if (!flush_memory_to_cache(memory_address, memory_size, true, std::forward<Args>(extras)...)) if (!flush_memory_to_cache(memory_address, memory_size, true, std::forward<Args>(extras)...))
value.misses--; value.misses--;
return true;
} }
return false;
} }
void purge_dirty() void purge_dirty()
@ -1575,7 +1621,7 @@ namespace rsx
} }
template <typename surface_store_type, typename blitter_type, typename ...Args> template <typename surface_store_type, typename blitter_type, typename ...Args>
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras) blit_op_result upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras)
{ {
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag //Since we will have dst in vram, we can 'safely' ignore the swizzle flag
//TODO: Verify correct behavior //TODO: Verify correct behavior
@ -1878,12 +1924,6 @@ namespace rsx
if (cached_dest) if (cached_dest)
{ {
if (!cached_dest->is_locked())
{
cached_dest->reprotect(utils::protection::no);
m_cache[get_block_address(cached_dest->get_section_base())].notify();
}
//Prep surface //Prep surface
auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order : auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order :
dst_is_argb8 ? rsx::texture_create_flags::default_component_order : dst_is_argb8 ? rsx::texture_create_flags::default_component_order :
@ -1924,6 +1964,18 @@ namespace rsx
m_texture_memory_in_use += dst.pitch * dst_dimensions.height; m_texture_memory_in_use += dst.pitch * dst_dimensions.height;
} }
else if (cached_dest)
{
if (!cached_dest->is_locked())
{
lock.upgrade();
cached_dest->reprotect(utils::protection::no);
m_cache[get_block_address(cached_dest->get_section_base())].notify();
}
cached_dest->touch();
}
const f32 scale = rsx::get_resolution_scale(); const f32 scale = rsx::get_resolution_scale();
if (src_is_render_target) if (src_is_render_target)
@ -1934,7 +1986,22 @@ namespace rsx
blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit); blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit);
notify_surface_changed(dst.rsx_address); notify_surface_changed(dst.rsx_address);
return true;
blit_op_result result = true;
result.is_depth = is_depth_blit;
if (cached_dest)
{
result.real_dst_address = cached_dest->get_section_base();
result.real_dst_size = cached_dest->get_section_size();
}
else
{
result.real_dst_address = dst.rsx_address;
result.real_dst_size = dst.pitch * dst_dimensions.height;
}
return result;
} }
void do_update() void do_update()

View File

@ -96,13 +96,10 @@ namespace gl
u32 vram_texture = 0; u32 vram_texture = 0;
u32 scaled_texture = 0; u32 scaled_texture = 0;
bool copied = false;
bool flushed = false;
bool is_depth = false; bool is_depth = false;
texture::format format = texture::format::rgba; texture::format format = texture::format::rgba;
texture::type type = texture::type::ubyte; texture::type type = texture::type::ubyte;
bool pack_unpack_swap_bytes = false;
rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample; rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample;
u8 get_pixel_size(texture::format fmt_, texture::type type_) u8 get_pixel_size(texture::format fmt_, texture::type type_)
@ -203,7 +200,7 @@ namespace gl
rsx::buffered_section::reset(base, size, policy); rsx::buffered_section::reset(base, size, policy);
flushed = false; flushed = false;
copied = false; synchronized = false;
is_depth = false; is_depth = false;
vram_texture = 0; vram_texture = 0;
@ -226,7 +223,7 @@ namespace gl
} }
flushed = false; flushed = false;
copied = false; synchronized = false;
is_depth = false; is_depth = false;
this->width = w; this->width = w;
@ -413,12 +410,12 @@ namespace gl
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
m_fence.reset(); m_fence.reset();
copied = true; synchronized = true;
} }
void fill_texture(gl::texture* tex) void fill_texture(gl::texture* tex)
{ {
if (!copied) if (!synchronized)
{ {
//LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read"); //LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read");
return; return;
@ -438,17 +435,20 @@ namespace gl
{ {
if (flushed) return true; //Already written, ignore if (flushed) return true; //Already written, ignore
if (!copied) bool result = true;
if (!synchronized)
{ {
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
copy_texture(); copy_texture();
if (!copied) if (!synchronized)
{ {
LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on..."); LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on...");
protect(utils::protection::ro); protect(utils::protection::ro);
return false; return false;
} }
result = false;
} }
m_fence.wait_for_signal(); m_fence.wait_for_signal();
@ -489,13 +489,15 @@ namespace gl
glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
return true; reset_write_statistics();
return result;
} }
void reprotect(utils::protection prot) void reprotect(utils::protection prot)
{ {
flushed = false; flushed = false;
copied = false; synchronized = false;
protect(prot); protect(prot);
} }
@ -553,7 +555,7 @@ namespace gl
bool is_synchronized() const bool is_synchronized() const
{ {
return copied; return synchronized;
} }
void set_flushed(bool state) void set_flushed(bool state)
@ -835,14 +837,31 @@ namespace gl
{ {
//TODO: More tests on byte order //TODO: More tests on byte order
//ARGB8+native+unswizzled is confirmed with Dark Souls II character preview //ARGB8+native+unswizzled is confirmed with Dark Souls II character preview
if (gcm_format == CELL_GCM_TEXTURE_A8R8G8B8) switch (gcm_format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
{ {
bool bgra = (flags == rsx::texture_create_flags::native_component_order); bool bgra = (flags == rsx::texture_create_flags::native_component_order);
cached.set_format(bgra? gl::texture::format::bgra : gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, false); cached.set_format(bgra ? gl::texture::format::bgra : gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, false);
break;
} }
else case CELL_GCM_TEXTURE_R5G6B5:
{ {
cached.set_format(gl::texture::format::rgb, gl::texture::type::ushort_5_6_5, true); cached.set_format(gl::texture::format::rgb, gl::texture::type::ushort_5_6_5, true);
break;
}
case CELL_GCM_TEXTURE_DEPTH24_D8:
{
cached.set_format(gl::texture::format::depth_stencil, gl::texture::type::uint_24_8, true);
break;
}
case CELL_GCM_TEXTURE_DEPTH16:
{
cached.set_format(gl::texture::format::depth, gl::texture::type::ushort, true);
break;
}
default:
fmt::throw_exception("Unexpected gcm format 0x%X" HERE, gcm_format);
} }
cached.make_flushable(); cached.make_flushable();
@ -974,7 +993,27 @@ namespace gl
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts) bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts)
{ {
void* unused = nullptr; void* unused = nullptr;
return upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter); auto result = upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter);
if (result.succeeded)
{
gl::texture::format fmt;
if (!result.is_depth)
{
fmt = dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8 ?
gl::texture::format::bgra : gl::texture::format::rgba;
}
else
{
fmt = dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8 ?
gl::texture::format::depth_stencil : gl::texture::format::depth;
}
flush_if_cache_miss_likely(fmt, result.real_dst_address, result.real_dst_size);
return true;
}
return false;
} }
}; };
} }

View File

@ -3187,30 +3187,42 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer); auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
m_current_command_buffer->begin(); m_current_command_buffer->begin();
if (auto deferred_op_dst = std::get<1>(result)) if (result.succeeded)
{
bool require_flush = false;
if (result.deferred)
{ {
//Requires manual scaling; depth/stencil surface //Requires manual scaling; depth/stencil surface
auto deferred_op_src = std::get<2>(result); auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, result.dst_image->info.format, 0);
auto src_view = std::get<3>(result);
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, deferred_op_dst->info.format, 0);
auto render_pass = m_render_passes[rp]; auto render_pass = m_render_passes[rp];
auto old_src_layout = deferred_op_src->current_layout; auto old_src_layout = result.src_image->current_layout;
auto old_dst_layout = deferred_op_dst->current_layout; auto old_dst_layout = result.dst_image->current_layout;
vk::change_image_layout(*m_current_command_buffer, deferred_op_src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); vk::change_image_layout(*m_current_command_buffer, result.src_image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vk::change_image_layout(*m_current_command_buffer, deferred_op_dst, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); vk::change_image_layout(*m_current_command_buffer, result.dst_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
m_depth_scaler->run(*m_current_command_buffer, deferred_op_dst->width(), deferred_op_dst->height(), deferred_op_dst, m_depth_scaler->run(*m_current_command_buffer, result.dst_image->width(), result.dst_image->height(), result.dst_image,
src_view, render_pass, m_framebuffers_to_clean); result.src_view, render_pass, m_framebuffers_to_clean);
vk::change_image_layout(*m_current_command_buffer, deferred_op_src, old_src_layout); vk::change_image_layout(*m_current_command_buffer, result.src_image, old_src_layout);
vk::change_image_layout(*m_current_command_buffer, deferred_op_dst, old_dst_layout); vk::change_image_layout(*m_current_command_buffer, result.dst_image, old_dst_layout);
require_flush = true;
} }
if (m_texture_cache.flush_if_cache_miss_likely(result.dst_image->info.format, result.real_dst_address, result.real_dst_size,
*m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()))
require_flush = true;
if (require_flush)
flush_command_queue();
m_samplers_dirty.store(true); m_samplers_dirty.store(true);
return std::get<0>(result); return true;
}
return false;
} }
void VKGSRender::clear_zcull_stats(u32 type) void VKGSRender::clear_zcull_stats(u32 type)

View File

@ -19,9 +19,6 @@ namespace vk
//DMA relevant data //DMA relevant data
VkFence dma_fence = VK_NULL_HANDLE; VkFence dma_fence = VK_NULL_HANDLE;
bool synchronized = false;
bool flushed = false;
bool pack_unpack_swap_bytes = false;
u64 sync_timestamp = 0; u64 sync_timestamp = 0;
u64 last_use_timestamp = 0; u64 last_use_timestamp = 0;
vk::render_device* m_device = nullptr; vk::render_device* m_device = nullptr;
@ -351,6 +348,7 @@ namespace vk
} }
dma_buffer->unmap(); dma_buffer->unmap();
reset_write_statistics();
//Its highly likely that this surface will be reused, so we just leave resources in place //Its highly likely that this surface will be reused, so we just leave resources in place
return result; return result;
@ -371,6 +369,11 @@ namespace vk
protect(prot); protect(prot);
} }
void invalidate_cached()
{
synchronized = false;
}
bool is_synchronized() const bool is_synchronized() const
{ {
return synchronized; return synchronized;
@ -898,6 +901,18 @@ namespace vk
vk::insert_texture_barrier(cmd, tex); vk::insert_texture_barrier(cmd, tex);
} }
public:
struct vk_blit_op_result : public blit_op_result
{
bool deferred = false;
vk::image *src_image = nullptr;
vk::image *dst_image = nullptr;
vk::image_view *src_view = nullptr;
using blit_op_result::blit_op_result;
};
public: public:
void initialize(vk::render_device& device, vk::memory_type_mapping& memory_types, vk::gpu_formats_support& formats_support, void initialize(vk::render_device& device, vk::memory_type_mapping& memory_types, vk::gpu_formats_support& formats_support,
@ -980,13 +995,14 @@ namespace vk
return upload_texture(cmd, tex, m_rtts, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue)); return upload_texture(cmd, tex, m_rtts, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
} }
std::tuple<bool, vk::image*, vk::image*, vk::image_view*> blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) vk_blit_op_result blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
{ {
struct blit_helper struct blit_helper
{ {
vk::command_buffer* commands; vk::command_buffer* commands;
blit_helper(vk::command_buffer *c) : commands(c) {} blit_helper(vk::command_buffer *c) : commands(c) {}
bool deferred = false;
vk::image* deferred_op_src = nullptr; vk::image* deferred_op_src = nullptr;
vk::image* deferred_op_dst = nullptr; vk::image* deferred_op_dst = nullptr;
@ -1019,17 +1035,19 @@ namespace vk
const auto dst_width = dst_area.x2 - dst_area.x1; const auto dst_width = dst_area.x2 - dst_area.x1;
const auto dst_height = dst_area.y2 - dst_area.y1; const auto dst_height = dst_area.y2 - dst_area.y1;
deferred_op_src = src;
deferred_op_dst = dst;
if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
{ {
if (src_width != dst_width || src_height != dst_height || src->info.format != dst->info.format) if (src_width != dst_width || src_height != dst_height || src->info.format != dst->info.format)
{ {
//Scaled depth scaling //Scaled depth scaling
deferred_op_src = src; deferred = true;
deferred_op_dst = dst;
} }
} }
if (!deferred_op_src) if (!deferred)
{ {
copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height, copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format); dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format);
@ -1040,18 +1058,26 @@ namespace vk
} }
helper(&cmd); helper(&cmd);
bool reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue)); auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
if (helper.deferred_op_src == nullptr) vk_blit_op_result result = reply.succeeded;
return std::make_tuple(reply, nullptr, nullptr, nullptr); result.real_dst_address = reply.real_dst_address;
result.real_dst_size = reply.real_dst_size;
result.is_depth = reply.is_depth;
result.deferred = helper.deferred;
result.dst_image = helper.deferred_op_dst;
result.src_image = helper.deferred_op_src;
if (!helper.deferred)
return result;
VkImageSubresourceRange view_range = { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 }; VkImageSubresourceRange view_range = { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 };
auto tmp_view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), helper.deferred_op_src->value, VK_IMAGE_VIEW_TYPE_2D, auto tmp_view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), helper.deferred_op_src->value, VK_IMAGE_VIEW_TYPE_2D,
helper.deferred_op_src->info.format, helper.deferred_op_src->native_component_map, view_range); helper.deferred_op_src->info.format, helper.deferred_op_src->native_component_map, view_range);
auto src_view = tmp_view.get(); result.src_view = tmp_view.get();
m_discardable_storage.push_back(tmp_view); m_discardable_storage.push_back(tmp_view);
return std::make_tuple(reply, helper.deferred_op_dst, helper.deferred_op_src, src_view); return result;
} }
const u32 get_unreleased_textures_count() const override const u32 get_unreleased_textures_count() const override