diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index f2935bbab7..0b30149354 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -40,6 +40,12 @@ namespace rsx u16 rsx_pitch; u32 gcm_format = 0; + bool pack_unpack_swap_bytes = false; + + bool synchronized = false; + bool flushed = false; + u32 num_writes = 0; + u32 required_writes = 1; u64 cache_tag = 0; @@ -79,6 +85,17 @@ namespace rsx return false; } + void touch() + { + num_writes++; + } + + void reset_write_statistics() + { + required_writes = num_writes; + num_writes = 0; + } + void set_view_flags(rsx::texture_create_flags flags) { view_flags = flags; @@ -148,6 +165,14 @@ namespace rsx { return sampler_status; } + + bool writes_likely_completed() const + { + if (context == rsx::texture_upload_context::blit_engine_dst) + return num_writes >= required_writes; + + return true; + } }; template @@ -246,6 +271,17 @@ namespace rsx {} }; + struct blit_op_result + { + bool succeeded = false; + bool is_depth = false; + u32 real_dst_address = 0; + u32 real_dst_size = 0; + + blit_op_result(bool success) : succeeded(success) + {} + }; + struct sampled_image_descriptor : public sampled_image_descriptor_base { image_view_type image_handle = 0; @@ -425,7 +461,7 @@ namespace rsx if (tex.cache_tag == cache_tag) continue; //already processed if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better - auto overlapped = tex.overlaps_page(trampled_range, address, strict_range_check); + auto overlapped = tex.overlaps_page(trampled_range, address, strict_range_check || tex.get_context() == rsx::texture_upload_context::blit_engine_dst); if (std::get<0>(overlapped)) { auto &new_range = std::get<1>(overlapped); @@ -883,6 +919,9 @@ namespace rsx if (skip_synchronized && region->is_synchronized()) return false; + if (!region->writes_likely_completed()) + return true; + region->copy_texture(false, std::forward(extra)...); return true; } @@ -1013,7 +1052,10 @@ namespace rsx if (tex->is_locked()) { if (!tex->is_synchronized()) + { + record_cache_miss(*tex); tex->copy_texture(true, std::forward(extras)...); + } m_cache[get_block_address(tex->get_section_base())].remove_one(); } @@ -1074,31 +1116,31 @@ namespace rsx return; } - value.misses++; + value.misses += 2; } template - void flush_if_cache_miss_likely(texture_format fmt, u32 memory_address, u32 memory_size, Args&&... extras) + bool flush_if_cache_miss_likely(texture_format fmt, u32 memory_address, u32 memory_size, Args&&... extras) { auto It = m_cache_miss_statistics_table.find(memory_address); if (It == m_cache_miss_statistics_table.end()) { m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; - return; + return false; } auto &value = It->second; - if (value.format != fmt || value.block_size != memory_size) + if (value.format != fmt || value.block_size < memory_size) { //Reset since the data has changed //TODO: Keep track of all this information together m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; - return; + return false; } //Properly synchronized - no miss - if (!value.misses) return; + if (!value.misses) return false; //Auto flush if this address keeps missing (not properly synchronized) if (value.misses > 16) @@ -1106,7 +1148,11 @@ namespace rsx //TODO: Determine better way of setting threshold if (!flush_memory_to_cache(memory_address, memory_size, true, std::forward(extras)...)) value.misses--; + + return true; } + + return false; } void purge_dirty() @@ -1575,7 +1621,7 @@ namespace rsx } template - bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras) + blit_op_result upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras) { //Since we will have dst in vram, we can 'safely' ignore the swizzle flag //TODO: Verify correct behavior @@ -1878,12 +1924,6 @@ namespace rsx if (cached_dest) { - if (!cached_dest->is_locked()) - { - cached_dest->reprotect(utils::protection::no); - m_cache[get_block_address(cached_dest->get_section_base())].notify(); - } - //Prep surface auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order : dst_is_argb8 ? rsx::texture_create_flags::default_component_order : @@ -1924,6 +1964,18 @@ namespace rsx m_texture_memory_in_use += dst.pitch * dst_dimensions.height; } + else if (cached_dest) + { + if (!cached_dest->is_locked()) + { + lock.upgrade(); + + cached_dest->reprotect(utils::protection::no); + m_cache[get_block_address(cached_dest->get_section_base())].notify(); + } + + cached_dest->touch(); + } const f32 scale = rsx::get_resolution_scale(); if (src_is_render_target) @@ -1934,7 +1986,22 @@ namespace rsx blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit); notify_surface_changed(dst.rsx_address); - return true; + + blit_op_result result = true; + result.is_depth = is_depth_blit; + + if (cached_dest) + { + result.real_dst_address = cached_dest->get_section_base(); + result.real_dst_size = cached_dest->get_section_size(); + } + else + { + result.real_dst_address = dst.rsx_address; + result.real_dst_size = dst.pitch * dst_dimensions.height; + } + + return result; } void do_update() diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 5ad24aaba5..083324a6d0 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -96,13 +96,10 @@ namespace gl u32 vram_texture = 0; u32 scaled_texture = 0; - bool copied = false; - bool flushed = false; bool is_depth = false; texture::format format = texture::format::rgba; texture::type type = texture::type::ubyte; - bool pack_unpack_swap_bytes = false; rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample; u8 get_pixel_size(texture::format fmt_, texture::type type_) @@ -203,7 +200,7 @@ namespace gl rsx::buffered_section::reset(base, size, policy); flushed = false; - copied = false; + synchronized = false; is_depth = false; vram_texture = 0; @@ -226,7 +223,7 @@ namespace gl } flushed = false; - copied = false; + synchronized = false; is_depth = false; this->width = w; @@ -413,12 +410,12 @@ namespace gl glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); m_fence.reset(); - copied = true; + synchronized = true; } void fill_texture(gl::texture* tex) { - if (!copied) + if (!synchronized) { //LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read"); return; @@ -438,17 +435,20 @@ namespace gl { if (flushed) return true; //Already written, ignore - if (!copied) + bool result = true; + if (!synchronized) { LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); copy_texture(); - if (!copied) + if (!synchronized) { LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on..."); protect(utils::protection::ro); return false; } + + result = false; } m_fence.wait_for_signal(); @@ -489,13 +489,15 @@ namespace gl glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - return true; + reset_write_statistics(); + + return result; } void reprotect(utils::protection prot) { flushed = false; - copied = false; + synchronized = false; protect(prot); } @@ -553,7 +555,7 @@ namespace gl bool is_synchronized() const { - return copied; + return synchronized; } void set_flushed(bool state) @@ -835,14 +837,31 @@ namespace gl { //TODO: More tests on byte order //ARGB8+native+unswizzled is confirmed with Dark Souls II character preview - if (gcm_format == CELL_GCM_TEXTURE_A8R8G8B8) + switch (gcm_format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: { bool bgra = (flags == rsx::texture_create_flags::native_component_order); - cached.set_format(bgra? gl::texture::format::bgra : gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, false); + cached.set_format(bgra ? gl::texture::format::bgra : gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, false); + break; } - else + case CELL_GCM_TEXTURE_R5G6B5: { cached.set_format(gl::texture::format::rgb, gl::texture::type::ushort_5_6_5, true); + break; + } + case CELL_GCM_TEXTURE_DEPTH24_D8: + { + cached.set_format(gl::texture::format::depth_stencil, gl::texture::type::uint_24_8, true); + break; + } + case CELL_GCM_TEXTURE_DEPTH16: + { + cached.set_format(gl::texture::format::depth, gl::texture::type::ushort, true); + break; + } + default: + fmt::throw_exception("Unexpected gcm format 0x%X" HERE, gcm_format); } cached.make_flushable(); @@ -974,7 +993,27 @@ namespace gl bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts) { void* unused = nullptr; - return upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter); + auto result = upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter); + + if (result.succeeded) + { + gl::texture::format fmt; + if (!result.is_depth) + { + fmt = dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8 ? + gl::texture::format::bgra : gl::texture::format::rgba; + } + else + { + fmt = dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8 ? + gl::texture::format::depth_stencil : gl::texture::format::depth; + } + + flush_if_cache_miss_likely(fmt, result.real_dst_address, result.real_dst_size); + return true; + } + + return false; } }; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 08e8ec8745..56de9572fc 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -3187,30 +3187,42 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer); m_current_command_buffer->begin(); - if (auto deferred_op_dst = std::get<1>(result)) + if (result.succeeded) { - //Requires manual scaling; depth/stencil surface - auto deferred_op_src = std::get<2>(result); - auto src_view = std::get<3>(result); + bool require_flush = false; + if (result.deferred) + { + //Requires manual scaling; depth/stencil surface + auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, result.dst_image->info.format, 0); + auto render_pass = m_render_passes[rp]; - auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, deferred_op_dst->info.format, 0); - auto render_pass = m_render_passes[rp]; + auto old_src_layout = result.src_image->current_layout; + auto old_dst_layout = result.dst_image->current_layout; - auto old_src_layout = deferred_op_src->current_layout; - auto old_dst_layout = deferred_op_dst->current_layout; + vk::change_image_layout(*m_current_command_buffer, result.src_image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + vk::change_image_layout(*m_current_command_buffer, result.dst_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - vk::change_image_layout(*m_current_command_buffer, deferred_op_src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - vk::change_image_layout(*m_current_command_buffer, deferred_op_dst, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + m_depth_scaler->run(*m_current_command_buffer, result.dst_image->width(), result.dst_image->height(), result.dst_image, + result.src_view, render_pass, m_framebuffers_to_clean); - m_depth_scaler->run(*m_current_command_buffer, deferred_op_dst->width(), deferred_op_dst->height(), deferred_op_dst, - src_view, render_pass, m_framebuffers_to_clean); + vk::change_image_layout(*m_current_command_buffer, result.src_image, old_src_layout); + vk::change_image_layout(*m_current_command_buffer, result.dst_image, old_dst_layout); - vk::change_image_layout(*m_current_command_buffer, deferred_op_src, old_src_layout); - vk::change_image_layout(*m_current_command_buffer, deferred_op_dst, old_dst_layout); + require_flush = true; + } + + if (m_texture_cache.flush_if_cache_miss_likely(result.dst_image->info.format, result.real_dst_address, result.real_dst_size, + *m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue())) + require_flush = true; + + if (require_flush) + flush_command_queue(); + + m_samplers_dirty.store(true); + return true; } - m_samplers_dirty.store(true); - return std::get<0>(result); + return false; } void VKGSRender::clear_zcull_stats(u32 type) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 6a58038d72..e784ea1289 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -19,9 +19,6 @@ namespace vk //DMA relevant data VkFence dma_fence = VK_NULL_HANDLE; - bool synchronized = false; - bool flushed = false; - bool pack_unpack_swap_bytes = false; u64 sync_timestamp = 0; u64 last_use_timestamp = 0; vk::render_device* m_device = nullptr; @@ -351,6 +348,7 @@ namespace vk } dma_buffer->unmap(); + reset_write_statistics(); //Its highly likely that this surface will be reused, so we just leave resources in place return result; @@ -371,6 +369,11 @@ namespace vk protect(prot); } + void invalidate_cached() + { + synchronized = false; + } + bool is_synchronized() const { return synchronized; @@ -898,6 +901,18 @@ namespace vk vk::insert_texture_barrier(cmd, tex); } + public: + + struct vk_blit_op_result : public blit_op_result + { + bool deferred = false; + vk::image *src_image = nullptr; + vk::image *dst_image = nullptr; + vk::image_view *src_view = nullptr; + + using blit_op_result::blit_op_result; + }; + public: void initialize(vk::render_device& device, vk::memory_type_mapping& memory_types, vk::gpu_formats_support& formats_support, @@ -980,13 +995,14 @@ namespace vk return upload_texture(cmd, tex, m_rtts, cmd, m_memory_types, const_cast(m_submit_queue)); } - std::tuple blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) + vk_blit_op_result blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) { struct blit_helper { vk::command_buffer* commands; blit_helper(vk::command_buffer *c) : commands(c) {} + bool deferred = false; vk::image* deferred_op_src = nullptr; vk::image* deferred_op_dst = nullptr; @@ -1019,17 +1035,19 @@ namespace vk const auto dst_width = dst_area.x2 - dst_area.x1; const auto dst_height = dst_area.y2 - dst_area.y1; + deferred_op_src = src; + deferred_op_dst = dst; + if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (src_width != dst_width || src_height != dst_height || src->info.format != dst->info.format) { //Scaled depth scaling - deferred_op_src = src; - deferred_op_dst = dst; + deferred = true; } } - if (!deferred_op_src) + if (!deferred) { copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height, dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format); @@ -1040,18 +1058,26 @@ namespace vk } helper(&cmd); - bool reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast(m_submit_queue)); + auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, m_memory_types, const_cast(m_submit_queue)); - if (helper.deferred_op_src == nullptr) - return std::make_tuple(reply, nullptr, nullptr, nullptr); + vk_blit_op_result result = reply.succeeded; + result.real_dst_address = reply.real_dst_address; + result.real_dst_size = reply.real_dst_size; + result.is_depth = reply.is_depth; + result.deferred = helper.deferred; + result.dst_image = helper.deferred_op_dst; + result.src_image = helper.deferred_op_src; + + if (!helper.deferred) + return result; VkImageSubresourceRange view_range = { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 }; auto tmp_view = std::make_unique(*vk::get_current_renderer(), helper.deferred_op_src->value, VK_IMAGE_VIEW_TYPE_2D, helper.deferred_op_src->info.format, helper.deferred_op_src->native_component_map, view_range); - auto src_view = tmp_view.get(); + result.src_view = tmp_view.get(); m_discardable_storage.push_back(tmp_view); - return std::make_tuple(reply, helper.deferred_op_dst, helper.deferred_op_src, src_view); + return result; } const u32 get_unreleased_textures_count() const override