rsx: Add cache pattern checking to blit engine resources

- Feature was implemented long ago but was not functional due to bugs
This commit is contained in:
kd-11 2018-02-10 19:21:16 +03:00
parent c191a98ec3
commit 1bd77c2f51
4 changed files with 203 additions and 59 deletions

View File

@ -40,6 +40,12 @@ namespace rsx
u16 rsx_pitch;
u32 gcm_format = 0;
bool pack_unpack_swap_bytes = false;
bool synchronized = false;
bool flushed = false;
u32 num_writes = 0;
u32 required_writes = 1;
u64 cache_tag = 0;
@ -79,6 +85,17 @@ namespace rsx
return false;
}
void touch()
{
num_writes++;
}
void reset_write_statistics()
{
required_writes = num_writes;
num_writes = 0;
}
void set_view_flags(rsx::texture_create_flags flags)
{
view_flags = flags;
@ -148,6 +165,14 @@ namespace rsx
{
return sampler_status;
}
bool writes_likely_completed() const
{
if (context == rsx::texture_upload_context::blit_engine_dst)
return num_writes >= required_writes;
return true;
}
};
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>
@ -246,6 +271,17 @@ namespace rsx
{}
};
struct blit_op_result
{
bool succeeded = false;
bool is_depth = false;
u32 real_dst_address = 0;
u32 real_dst_size = 0;
blit_op_result(bool success) : succeeded(success)
{}
};
struct sampled_image_descriptor : public sampled_image_descriptor_base
{
image_view_type image_handle = 0;
@ -425,7 +461,7 @@ namespace rsx
if (tex.cache_tag == cache_tag) continue; //already processed
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
auto overlapped = tex.overlaps_page(trampled_range, address, strict_range_check);
auto overlapped = tex.overlaps_page(trampled_range, address, strict_range_check || tex.get_context() == rsx::texture_upload_context::blit_engine_dst);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
@ -883,6 +919,9 @@ namespace rsx
if (skip_synchronized && region->is_synchronized())
return false;
if (!region->writes_likely_completed())
return true;
region->copy_texture(false, std::forward<Args>(extra)...);
return true;
}
@ -1013,7 +1052,10 @@ namespace rsx
if (tex->is_locked())
{
if (!tex->is_synchronized())
{
record_cache_miss(*tex);
tex->copy_texture(true, std::forward<Args>(extras)...);
}
m_cache[get_block_address(tex->get_section_base())].remove_one();
}
@ -1074,31 +1116,31 @@ namespace rsx
return;
}
value.misses++;
value.misses += 2;
}
template <typename ...Args>
void flush_if_cache_miss_likely(texture_format fmt, u32 memory_address, u32 memory_size, Args&&... extras)
bool flush_if_cache_miss_likely(texture_format fmt, u32 memory_address, u32 memory_size, Args&&... extras)
{
auto It = m_cache_miss_statistics_table.find(memory_address);
if (It == m_cache_miss_statistics_table.end())
{
m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt };
return;
return false;
}
auto &value = It->second;
if (value.format != fmt || value.block_size != memory_size)
if (value.format != fmt || value.block_size < memory_size)
{
//Reset since the data has changed
//TODO: Keep track of all this information together
m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt };
return;
return false;
}
//Properly synchronized - no miss
if (!value.misses) return;
if (!value.misses) return false;
//Auto flush if this address keeps missing (not properly synchronized)
if (value.misses > 16)
@ -1106,7 +1148,11 @@ namespace rsx
//TODO: Determine better way of setting threshold
if (!flush_memory_to_cache(memory_address, memory_size, true, std::forward<Args>(extras)...))
value.misses--;
return true;
}
return false;
}
void purge_dirty()
@ -1575,7 +1621,7 @@ namespace rsx
}
template <typename surface_store_type, typename blitter_type, typename ...Args>
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras)
blit_op_result upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras)
{
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag
//TODO: Verify correct behavior
@ -1878,12 +1924,6 @@ namespace rsx
if (cached_dest)
{
if (!cached_dest->is_locked())
{
cached_dest->reprotect(utils::protection::no);
m_cache[get_block_address(cached_dest->get_section_base())].notify();
}
//Prep surface
auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order :
dst_is_argb8 ? rsx::texture_create_flags::default_component_order :
@ -1924,6 +1964,18 @@ namespace rsx
m_texture_memory_in_use += dst.pitch * dst_dimensions.height;
}
else if (cached_dest)
{
if (!cached_dest->is_locked())
{
lock.upgrade();
cached_dest->reprotect(utils::protection::no);
m_cache[get_block_address(cached_dest->get_section_base())].notify();
}
cached_dest->touch();
}
const f32 scale = rsx::get_resolution_scale();
if (src_is_render_target)
@ -1934,7 +1986,22 @@ namespace rsx
blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit);
notify_surface_changed(dst.rsx_address);
return true;
blit_op_result result = true;
result.is_depth = is_depth_blit;
if (cached_dest)
{
result.real_dst_address = cached_dest->get_section_base();
result.real_dst_size = cached_dest->get_section_size();
}
else
{
result.real_dst_address = dst.rsx_address;
result.real_dst_size = dst.pitch * dst_dimensions.height;
}
return result;
}
void do_update()

View File

@ -96,13 +96,10 @@ namespace gl
u32 vram_texture = 0;
u32 scaled_texture = 0;
bool copied = false;
bool flushed = false;
bool is_depth = false;
texture::format format = texture::format::rgba;
texture::type type = texture::type::ubyte;
bool pack_unpack_swap_bytes = false;
rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample;
u8 get_pixel_size(texture::format fmt_, texture::type type_)
@ -203,7 +200,7 @@ namespace gl
rsx::buffered_section::reset(base, size, policy);
flushed = false;
copied = false;
synchronized = false;
is_depth = false;
vram_texture = 0;
@ -226,7 +223,7 @@ namespace gl
}
flushed = false;
copied = false;
synchronized = false;
is_depth = false;
this->width = w;
@ -413,12 +410,12 @@ namespace gl
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
m_fence.reset();
copied = true;
synchronized = true;
}
void fill_texture(gl::texture* tex)
{
if (!copied)
if (!synchronized)
{
//LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read");
return;
@ -438,17 +435,20 @@ namespace gl
{
if (flushed) return true; //Already written, ignore
if (!copied)
bool result = true;
if (!synchronized)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
copy_texture();
if (!copied)
if (!synchronized)
{
LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on...");
protect(utils::protection::ro);
return false;
}
result = false;
}
m_fence.wait_for_signal();
@ -489,13 +489,15 @@ namespace gl
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
return true;
reset_write_statistics();
return result;
}
void reprotect(utils::protection prot)
{
flushed = false;
copied = false;
synchronized = false;
protect(prot);
}
@ -553,7 +555,7 @@ namespace gl
bool is_synchronized() const
{
return copied;
return synchronized;
}
void set_flushed(bool state)
@ -835,14 +837,31 @@ namespace gl
{
//TODO: More tests on byte order
//ARGB8+native+unswizzled is confirmed with Dark Souls II character preview
if (gcm_format == CELL_GCM_TEXTURE_A8R8G8B8)
switch (gcm_format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
{
bool bgra = (flags == rsx::texture_create_flags::native_component_order);
cached.set_format(bgra? gl::texture::format::bgra : gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, false);
cached.set_format(bgra ? gl::texture::format::bgra : gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, false);
break;
}
else
case CELL_GCM_TEXTURE_R5G6B5:
{
cached.set_format(gl::texture::format::rgb, gl::texture::type::ushort_5_6_5, true);
break;
}
case CELL_GCM_TEXTURE_DEPTH24_D8:
{
cached.set_format(gl::texture::format::depth_stencil, gl::texture::type::uint_24_8, true);
break;
}
case CELL_GCM_TEXTURE_DEPTH16:
{
cached.set_format(gl::texture::format::depth, gl::texture::type::ushort, true);
break;
}
default:
fmt::throw_exception("Unexpected gcm format 0x%X" HERE, gcm_format);
}
cached.make_flushable();
@ -974,7 +993,27 @@ namespace gl
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts)
{
void* unused = nullptr;
return upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter);
auto result = upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter);
if (result.succeeded)
{
gl::texture::format fmt;
if (!result.is_depth)
{
fmt = dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8 ?
gl::texture::format::bgra : gl::texture::format::rgba;
}
else
{
fmt = dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8 ?
gl::texture::format::depth_stencil : gl::texture::format::depth;
}
flush_if_cache_miss_likely(fmt, result.real_dst_address, result.real_dst_size);
return true;
}
return false;
}
};
}

View File

@ -3187,30 +3187,42 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
m_current_command_buffer->begin();
if (auto deferred_op_dst = std::get<1>(result))
if (result.succeeded)
{
//Requires manual scaling; depth/stencil surface
auto deferred_op_src = std::get<2>(result);
auto src_view = std::get<3>(result);
bool require_flush = false;
if (result.deferred)
{
//Requires manual scaling; depth/stencil surface
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, result.dst_image->info.format, 0);
auto render_pass = m_render_passes[rp];
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, deferred_op_dst->info.format, 0);
auto render_pass = m_render_passes[rp];
auto old_src_layout = result.src_image->current_layout;
auto old_dst_layout = result.dst_image->current_layout;
auto old_src_layout = deferred_op_src->current_layout;
auto old_dst_layout = deferred_op_dst->current_layout;
vk::change_image_layout(*m_current_command_buffer, result.src_image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vk::change_image_layout(*m_current_command_buffer, result.dst_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
vk::change_image_layout(*m_current_command_buffer, deferred_op_src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vk::change_image_layout(*m_current_command_buffer, deferred_op_dst, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
m_depth_scaler->run(*m_current_command_buffer, result.dst_image->width(), result.dst_image->height(), result.dst_image,
result.src_view, render_pass, m_framebuffers_to_clean);
m_depth_scaler->run(*m_current_command_buffer, deferred_op_dst->width(), deferred_op_dst->height(), deferred_op_dst,
src_view, render_pass, m_framebuffers_to_clean);
vk::change_image_layout(*m_current_command_buffer, result.src_image, old_src_layout);
vk::change_image_layout(*m_current_command_buffer, result.dst_image, old_dst_layout);
vk::change_image_layout(*m_current_command_buffer, deferred_op_src, old_src_layout);
vk::change_image_layout(*m_current_command_buffer, deferred_op_dst, old_dst_layout);
require_flush = true;
}
if (m_texture_cache.flush_if_cache_miss_likely(result.dst_image->info.format, result.real_dst_address, result.real_dst_size,
*m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()))
require_flush = true;
if (require_flush)
flush_command_queue();
m_samplers_dirty.store(true);
return true;
}
m_samplers_dirty.store(true);
return std::get<0>(result);
return false;
}
void VKGSRender::clear_zcull_stats(u32 type)

View File

@ -19,9 +19,6 @@ namespace vk
//DMA relevant data
VkFence dma_fence = VK_NULL_HANDLE;
bool synchronized = false;
bool flushed = false;
bool pack_unpack_swap_bytes = false;
u64 sync_timestamp = 0;
u64 last_use_timestamp = 0;
vk::render_device* m_device = nullptr;
@ -351,6 +348,7 @@ namespace vk
}
dma_buffer->unmap();
reset_write_statistics();
//Its highly likely that this surface will be reused, so we just leave resources in place
return result;
@ -371,6 +369,11 @@ namespace vk
protect(prot);
}
void invalidate_cached()
{
synchronized = false;
}
bool is_synchronized() const
{
return synchronized;
@ -898,6 +901,18 @@ namespace vk
vk::insert_texture_barrier(cmd, tex);
}
public:
struct vk_blit_op_result : public blit_op_result
{
bool deferred = false;
vk::image *src_image = nullptr;
vk::image *dst_image = nullptr;
vk::image_view *src_view = nullptr;
using blit_op_result::blit_op_result;
};
public:
void initialize(vk::render_device& device, vk::memory_type_mapping& memory_types, vk::gpu_formats_support& formats_support,
@ -980,13 +995,14 @@ namespace vk
return upload_texture(cmd, tex, m_rtts, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
}
std::tuple<bool, vk::image*, vk::image*, vk::image_view*> blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
vk_blit_op_result blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
{
struct blit_helper
{
vk::command_buffer* commands;
blit_helper(vk::command_buffer *c) : commands(c) {}
bool deferred = false;
vk::image* deferred_op_src = nullptr;
vk::image* deferred_op_dst = nullptr;
@ -1019,17 +1035,19 @@ namespace vk
const auto dst_width = dst_area.x2 - dst_area.x1;
const auto dst_height = dst_area.y2 - dst_area.y1;
deferred_op_src = src;
deferred_op_dst = dst;
if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
{
if (src_width != dst_width || src_height != dst_height || src->info.format != dst->info.format)
{
//Scaled depth scaling
deferred_op_src = src;
deferred_op_dst = dst;
deferred = true;
}
}
if (!deferred_op_src)
if (!deferred)
{
copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format);
@ -1040,18 +1058,26 @@ namespace vk
}
helper(&cmd);
bool reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast<const VkQueue>(m_submit_queue));
if (helper.deferred_op_src == nullptr)
return std::make_tuple(reply, nullptr, nullptr, nullptr);
vk_blit_op_result result = reply.succeeded;
result.real_dst_address = reply.real_dst_address;
result.real_dst_size = reply.real_dst_size;
result.is_depth = reply.is_depth;
result.deferred = helper.deferred;
result.dst_image = helper.deferred_op_dst;
result.src_image = helper.deferred_op_src;
if (!helper.deferred)
return result;
VkImageSubresourceRange view_range = { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 };
auto tmp_view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), helper.deferred_op_src->value, VK_IMAGE_VIEW_TYPE_2D,
helper.deferred_op_src->info.format, helper.deferred_op_src->native_component_map, view_range);
auto src_view = tmp_view.get();
result.src_view = tmp_view.get();
m_discardable_storage.push_back(tmp_view);
return std::make_tuple(reply, helper.deferred_op_dst, helper.deferred_op_src, src_view);
return result;
}
const u32 get_unreleased_textures_count() const override