diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 1a67af21b5..1288a36d27 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2,6 +2,7 @@ #include "../rsx_cache.h" #include "../rsx_utils.h" +#include "texture_cache_predictor.h" #include "texture_cache_utils.h" #include "TextureUtils.h" @@ -11,17 +12,26 @@ extern u64 get_system_time(); namespace rsx { - template - class texture_cache : public rsx::texture_cache_base + template + class texture_cache { - static_assert(std::is_base_of, section_storage_type>::value, "section_storage_type must derive from rsx::cached_texture_section"); - public: - using baseclass = typename rsx::texture_cache_base; - using ranged_storage = typename rsx::ranged_storage; + using traits = _traits; + + using commandbuffer_type = typename traits::commandbuffer_type; + using section_storage_type = typename traits::section_storage_type; + using image_resource_type = typename traits::image_resource_type; + using image_view_type = typename traits::image_view_type; + using image_storage_type = typename traits::image_storage_type; + using texture_format = typename traits::texture_format; + + using predictor_type = texture_cache_predictor; + using ranged_storage = rsx::ranged_storage; using ranged_storage_block = typename ranged_storage::block_type; private: + static_assert(std::is_base_of, section_storage_type>::value, "section_storage_type must derive from rsx::cached_texture_section"); + /** * Helper structs/enums */ @@ -236,14 +246,13 @@ namespace rsx shared_mutex m_cache_mutex; ranged_storage m_storage; std::unordered_multimap> m_temporary_subresource_cache; + predictor_type m_predictor; std::atomic m_cache_update_tag = {0}; address_range read_only_range; address_range no_access_range; - std::unordered_map m_cache_miss_statistics_table; - //Map of messages to only emit once std::unordered_set m_once_only_messages_set; @@ -258,26 +267,28 @@ namespace rsx const u32 m_max_zombie_objects = 64; //Limit on how many texture objects to keep around for reuse after they are invalidated //Other statistics - const u32 m_cache_miss_threshold = 8; // How many times an address can miss speculative writing before it is considered high priority - std::atomic m_num_flush_requests = { 0 }; - std::atomic m_num_cache_misses = { 0 }; - std::atomic m_num_cache_speculative_writes = { 0 }; - std::atomic m_num_cache_mispredictions = { 0 }; + std::atomic m_flushes_this_frame = { 0 }; + std::atomic m_misses_this_frame = { 0 }; + std::atomic m_speculations_this_frame = { 0 }; + std::atomic m_unavoidable_hard_faults_this_frame = { 0 }; + static const u32 m_predict_max_flushes_per_frame = 100; // Above this number the predictions are disabled // Invalidation static const bool invalidation_ignore_unsynchronized = true; // If true, unsynchronized sections don't get forcefully flushed unless they overlap the fault range static const bool invalidation_keep_ro_during_read = true; // If true, RO sections are not invalidated during read faults + + /** * Virtual Methods */ virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_resource_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) = 0; virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_storage_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) = 0; virtual section_storage_type* create_new_texture(commandbuffer_type&, const address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u32 gcm_format, - rsx::texture_upload_context context, rsx::texture_dimension_extended type, texture_create_flags flags) = 0; + rsx::texture_upload_context context, rsx::texture_dimension_extended type, texture_create_flags flags) = 0; virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, texture_upload_context context, - const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) = 0; + const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) = 0; virtual void enforce_surface_creation_type(section_storage_type& section, u32 gcm_format, texture_create_flags expected) = 0; virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex) = 0; virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::vector& sources, const texture_channel_remap_t& remap_vector) = 0; @@ -286,7 +297,14 @@ namespace rsx virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0; virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0; + public: + virtual void destroy() = 0; + virtual bool is_depth_texture(u32, u32) = 0; + virtual void on_section_destroyed(section_storage_type& section) + {} + + protected: /** * Helpers */ @@ -382,20 +400,11 @@ namespace rsx const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp()) { - m_num_cache_mispredictions++; - m_num_cache_misses++; surface->copy_texture(true, std::forward(extras)...); } } - if (!surface->flush(std::forward(extras)...)) - { - // Missed address, note this - // TODO: Lower severity when successful to keep the cache from overworking - record_cache_miss(*surface); - } - - m_num_flush_requests++; + surface->flush(std::forward(extras)...); } data.flushed = true; @@ -916,16 +925,20 @@ namespace rsx public: - texture_cache() : m_storage(this) {} + texture_cache() : m_storage(this), m_predictor(this) {} ~texture_cache() {} - virtual void destroy() = 0; - virtual bool is_depth_texture(u32, u32) = 0; - virtual void on_frame_end() = 0; - void clear() { m_storage.clear(); + m_predictor.clear(); + } + + virtual void on_frame_end() + { + m_temporary_subresource_cache.clear(); + m_predictor.on_frame_end(); + reset_frame_statistics(); } @@ -966,6 +979,7 @@ namespace rsx { auto &block = m_storage.block_for(range); + section_storage_type *dimensions_mismatch = nullptr; section_storage_type *best_fit = nullptr; section_storage_type *reuse = nullptr; #ifdef TEXTURE_CACHE_DEBUG @@ -988,6 +1002,10 @@ namespace rsx res = &tex; #endif } + else if (dimensions_mismatch == nullptr) + { + dimensions_mismatch = &tex; + } } else if (best_fit == nullptr && tex.can_be_reused()) { @@ -1006,9 +1024,9 @@ namespace rsx return res; #endif - if (best_fit != nullptr) + if (dimensions_mismatch != nullptr) { - auto &tex = *best_fit; + auto &tex = *dimensions_mismatch; LOG_WARNING(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters (width=%d vs %d; height=%d vs %d; depth=%d vs %d; mipmaps=%d vs %d)", range.start, width, tex.get_width(), height, tex.get_height(), depth, tex.get_depth(), mipmaps, tex.get_mipmaps()); } @@ -1198,38 +1216,6 @@ namespace rsx } public: - template - bool flush_memory_to_cache(const address_range &memory_range, bool skip_synchronized, u32 allowed_types_mask, Args&&... extra) - { - // Temporarily disable prediction if more than 50% of predictions are wrong. Also lower prediction pressure - if (m_num_cache_mispredictions > (m_num_cache_speculative_writes / 2) && - m_num_cache_mispredictions > 8) - return false; - - std::lock_guard lock(m_cache_mutex); - section_storage_type* region = find_flushable_section(memory_range); - - // Check if section was released, usually if cell overwrites a currently bound render target - if (region == nullptr) - return true; - - // Skip if already synchronized - if (skip_synchronized && region->is_synchronized()) - return false; - - // Skip if type is not allowed - if ((allowed_types_mask & region->get_context()) == 0) - return true; - - // Skip if more writes to the same target are likely - if (!region->writes_likely_completed()) - return true; - - region->copy_texture(false, std::forward(extra)...); - m_num_cache_speculative_writes++; - return true; - } - template bool load_memory_from_cache(const address_range &memory_range, Args&&... extras) { @@ -1294,71 +1280,47 @@ namespace rsx return true; } - void record_cache_miss(section_storage_type &tex) - { - m_num_cache_misses++; - - const auto& memory_range = tex.get_section_range(); - const auto fmt = tex.get_format(); - - auto It = m_cache_miss_statistics_table.find(memory_range); - if (It == m_cache_miss_statistics_table.end()) - { - m_cache_miss_statistics_table[memory_range] = { 1, fmt }; - return; - } - - auto &value = It->second; - if (value.format != fmt) - { - value = { 1, fmt }; - return; - } - - value.misses += 2; - } - template - bool flush_if_cache_miss_likely(texture_format fmt, const address_range &memory_range, Args&&... extras) + bool flush_if_cache_miss_likely(const address_range &range, Args&&... extras) { - auto It = m_cache_miss_statistics_table.find(memory_range); - if (It == m_cache_miss_statistics_table.end()) - { - m_cache_miss_statistics_table[memory_range] = { 0, fmt }; + u32 cur_flushes_this_frame = (m_flushes_this_frame + m_speculations_this_frame); + + if (cur_flushes_this_frame > m_predict_max_flushes_per_frame) return false; - } - auto &value = It->second; + auto& block = m_storage.block_for(range); + if (block.empty()) + return false; - if (value.format != fmt) + reader_lock lock(m_cache_mutex); + + // Try to find matching regions + bool result = false; + for (auto ®ion : block) { - //Reset since the data has changed - //TODO: Keep track of all this information together - value = { 0, fmt }; + if (region.is_dirty() || region.is_synchronized() || !region.is_flushable()) + continue; + + if (!region.matches(range)) + continue; + + if (!region.tracked_by_predictor()) + continue; + + if (!m_predictor.predict(region)) + continue; + + lock.upgrade(); + + region.copy_texture(false, std::forward(extras)...); + result = true; + + cur_flushes_this_frame++; + if (cur_flushes_this_frame > m_predict_max_flushes_per_frame) + return result; } - // By default, blit targets are always to be tested for readback - u32 flush_mask = rsx::texture_upload_context::blit_engine_dst; - - // Auto flush if this address keeps missing (not properly synchronized) - if (value.misses >= m_cache_miss_threshold) - { - // Disable prediction if memory is flagged as flush_always - if (m_flush_always_cache.find(memory_range) == m_flush_always_cache.end()) - { - // TODO: Determine better way of setting threshold - // Allow all types - flush_mask = 0xFF; - } - } - - if (!flush_memory_to_cache(memory_range, true, flush_mask, std::forward(extras)...) && - value.misses > 0) - { - value.misses--; - } - - return true; + return result; } void purge_unreleased_sections() @@ -2337,12 +2299,6 @@ namespace rsx { lock.upgrade(); - if (cached_dest->is_locked() && cached_dest->is_synchronized()) - { - // Premature readback - m_num_cache_mispredictions++; - } - u32 mem_length; const u32 mem_base = dst_address - cached_dest->get_section_base(); @@ -2457,55 +2413,22 @@ namespace rsx } } - void reset_frame_statistics() + predictor_type& get_predictor() { - m_num_flush_requests.store(0u); - m_num_cache_misses.store(0u); - m_num_cache_mispredictions.store(0u); - m_num_cache_speculative_writes.store(0u); + return m_predictor; } - virtual const u32 get_unreleased_textures_count() const - { - return m_storage.m_unreleased_texture_objects; - } - - virtual const u64 get_texture_memory_in_use() const - { - return m_storage.m_texture_memory_in_use; - } - - virtual u32 get_num_flush_requests() const - { - return m_num_flush_requests; - } - - virtual u32 get_num_cache_mispredictions() const - { - return m_num_cache_mispredictions; - } - - virtual u32 get_num_cache_speculative_writes() const - { - return m_num_cache_speculative_writes; - } - - virtual f32 get_cache_miss_ratio() const - { - const auto num_flushes = m_num_flush_requests.load(); - return (num_flushes == 0u) ? 0.f : (f32)m_num_cache_misses.load() / num_flushes; - } /** * The read only texture invalidate flag is set if a read only texture is trampled by framebuffer memory * If set, all cached read only textures are considered invalid and should be re-fetched from the texture cache */ - virtual void clear_ro_tex_invalidate_intr() + void clear_ro_tex_invalidate_intr() { read_only_tex_invalidate = false; } - virtual bool get_ro_tex_invalidate_intr() const + bool get_ro_tex_invalidate_intr() const { return read_only_tex_invalidate; } @@ -2521,5 +2444,83 @@ namespace rsx auto ptr = vm::get_super_ptr>(texaddr); return *ptr == texaddr; } + + + /** + * Per-frame statistics + */ + void reset_frame_statistics() + { + m_flushes_this_frame.store(0u); + m_misses_this_frame.store(0u); + m_speculations_this_frame.store(0u); + m_unavoidable_hard_faults_this_frame.store(0u); + } + + void on_flush() + { + m_flushes_this_frame++; + } + + void on_speculative_flush() + { + m_speculations_this_frame++; + } + + void on_misprediction() + { + m_predictor.on_misprediction(); + } + + void on_miss(const section_storage_type& section) + { + m_misses_this_frame++; + + if (section.get_memory_read_flags() == memory_read_flags::flush_always) + { + m_unavoidable_hard_faults_this_frame++; + } + } + + virtual const u32 get_unreleased_textures_count() const + { + return m_storage.m_unreleased_texture_objects; + } + + const u64 get_texture_memory_in_use() const + { + return m_storage.m_texture_memory_in_use; + } + + u32 get_num_flush_requests() const + { + return m_flushes_this_frame; + } + + u32 get_num_cache_mispredictions() const + { + return m_predictor.m_mispredictions_this_frame; + } + + u32 get_num_cache_speculative_writes() const + { + return m_speculations_this_frame; + } + + u32 get_num_cache_misses() const + { + return m_misses_this_frame; + } + + u32 get_num_unavoidable_hard_faults() const + { + return m_unavoidable_hard_faults_this_frame; + } + + f32 get_cache_miss_ratio() const + { + const auto num_flushes = m_flushes_this_frame.load(); + return (num_flushes == 0u) ? 0.f : (f32)m_misses_this_frame.load() / num_flushes; + } }; } diff --git a/rpcs3/Emu/RSX/Common/texture_cache_predictor.h b/rpcs3/Emu/RSX/Common/texture_cache_predictor.h new file mode 100644 index 0000000000..a55f757cda --- /dev/null +++ b/rpcs3/Emu/RSX/Common/texture_cache_predictor.h @@ -0,0 +1,416 @@ +#pragma once + +#include "../rsx_cache.h" +#include "../rsx_utils.h" +#include "TextureUtils.h" + + +namespace rsx +{ + /** + * Predictor Entry History Queue + */ + template + class texture_cache_predictor_entry_history_queue + { + std::array buffer; + u32 m_front; + u32 m_size; + + public: + texture_cache_predictor_entry_history_queue() + { + clear(); + } + + void clear() + { + m_front = buffer_size; + m_size = 0; + } + + size_t size() const + { + return m_size; + } + + bool empty() const + { + return m_size == 0; + } + + void push(u32 val) + { + if (m_size < buffer_size) + { + m_size++; + } + + if (m_front == 0) + { + m_front = buffer_size - 1; + } + else + { + m_front--; + } + + AUDIT(m_front < buffer_size); + buffer[m_front] = val; + } + + u32 operator[](u32 pos) const + { + AUDIT(pos < m_size); + AUDIT(m_front < buffer_size); + return buffer[(m_front + pos) % buffer_size]; + } + }; + + /** + * Predictor key + */ + template + struct texture_cache_predictor_key + { + using texture_format = typename traits::texture_format; + using section_storage_type = typename traits::section_storage_type; + + address_range cpu_range; + texture_format format; + texture_upload_context context; + + // Constructors + texture_cache_predictor_key() = default; + + texture_cache_predictor_key(const address_range& _cpu_range, texture_format _format, texture_upload_context _context) + : cpu_range(_cpu_range) + , format(_format) + , context(_context) + {} + + texture_cache_predictor_key(const section_storage_type& section) + : cpu_range(section.get_section_range()) + , format(section.get_format()) + , context(section.get_context()) + {} + + // Methods + bool operator==(const texture_cache_predictor_key& other) const + { + return cpu_range == other.cpu_range && format == other.format && context == other.context; + } + + bool operator!=(const texture_cache_predictor_key& other) const + { + return !operator==(other); + } + }; + + /** + * Predictor entry + */ + template + class texture_cache_predictor_entry + { + public: + using key_type = texture_cache_predictor_key; + using section_storage_type = typename traits::section_storage_type; + + const key_type key; + + private: + u32 m_guessed_writes; + u32 m_writes_since_last_flush; + + static const u32 max_write_history_size = 16; + texture_cache_predictor_entry_history_queue write_history; + + static const u32 max_confidence = 8; // Cannot be more "confident" than this value + static const u32 confident_threshold = 6; // We are confident if confidence >= confidence_threshold + static const u32 starting_confidence = 3; + + static const u32 confidence_guessed_flush = 2; // Confidence granted when we correctly guess there will be a flush + static const u32 confidence_guessed_no_flush = 1; // Confidence granted when we correctly guess there won't be a flush + static const u32 confidence_incorrect_guess = -2; // Confidence granted when our guess is incorrect + static const u32 confidence_mispredict = -4; // Confidence granted when a speculative flush is incorrect + + u32 confidence; + + public: + texture_cache_predictor_entry(key_type _key) + : key(_key) + { + reset(); + } + ~texture_cache_predictor_entry() = default; + + u32 get_confidence() const + { + return confidence; + } + + bool is_confident() const + { + return confidence >= confident_threshold; + } + + bool key_matches(const key_type& other_key) const + { + return key == other_key; + } + + bool key_matches(const section_storage_type& section) const + { + return key_matches(key_type(section)); + } + + void update_confidence(s32 delta) + { + if (delta > 0) + { + confidence += delta; + + if (confidence > max_confidence) + { + confidence = max_confidence; + } + } + else if (delta < 0) + { + u32 neg_delta = static_cast(-delta); + if (confidence > neg_delta) + { + confidence -= neg_delta; + } + else + { + confidence = 0; + } + } + } + + private: + // Returns how many writes we think there will be this time (i.e. between the last flush and the next flush) + // Returning UINT32_MAX means no guess is possible + u32 guess_number_of_writes() const + { + const auto history_size = write_history.size(); + + if (history_size == 0) + { + // We need some history to be able to take a guess + return UINT32_MAX; + } + else if (history_size == 1) + { + // If we have one history entry, we assume it will repeat + return write_history[0]; + } + else + { + // For more than one entry, we try and find a pattern, and assume it holds + + const u32 stop_when_found_matches = 4; + u32 matches_found = 0; + u32 guess = UINT32_MAX; + + for (u32 i = 0; i < history_size; i++) + { + // If we are past the number of writes, it's not the same as this time + if (write_history[i] < m_writes_since_last_flush) + continue; + + u32 cur_matches_found = 1; + + // Try to find more matches + for (u32 j = 0; i + j + 1 < history_size; j++) + { + if (write_history[i + j + 1] != write_history[j]) + break; + + // We found another matching value + cur_matches_found++; + + if (cur_matches_found >= stop_when_found_matches) + break; + } + + // If we found more matches than all other comparisons, we take a guess + if (cur_matches_found > matches_found) + { + guess = write_history[i]; + matches_found = cur_matches_found; + } + + if (matches_found >= stop_when_found_matches) + break; + } + + return guess; + } + } + + void calculate_next_guess(bool reset) + { + if (reset || m_guessed_writes == UINT32_MAX || m_writes_since_last_flush > m_guessed_writes) + { + m_guessed_writes = guess_number_of_writes(); + } + } + + public: + void reset() + { + confidence = starting_confidence; + m_writes_since_last_flush = 0; + m_guessed_writes = UINT32_MAX; + write_history.clear(); + } + + void on_flush() + { + update_confidence(is_flush_likely() ? confidence_guessed_flush : confidence_incorrect_guess); + + // Update history + write_history.push(m_writes_since_last_flush); + m_writes_since_last_flush = 0; + + calculate_next_guess(true); + } + + void on_write(bool mispredict) + { + if (mispredict || is_flush_likely()) + { + update_confidence(mispredict ? confidence_mispredict : confidence_incorrect_guess); + } + else + { + update_confidence(confidence_guessed_no_flush); + } + + m_writes_since_last_flush++; + + calculate_next_guess(false); + } + + bool is_flush_likely() const + { + return m_writes_since_last_flush >= m_guessed_writes; + } + + // Returns true if we believe that the next operation on this memory range will be a flush + bool predict() const + { + // Disable prediction if we have a low confidence in our predictions + if (!is_confident()) + return false; + + return is_flush_likely(); + } + }; + + /** + * Predictor + */ + template + class texture_cache_predictor + { + public: + // Traits + using section_storage_type = typename traits::section_storage_type; + using texture_cache_type = typename traits::texture_cache_base_type; + + using key_type = texture_cache_predictor_key; + using mapped_type = texture_cache_predictor_entry; + using map_type = std::unordered_map; + + using value_type = typename map_type::value_type; + using size_type = typename map_type::size_type; + using iterator = typename map_type::iterator; + using const_iterator = typename map_type::const_iterator; + + private: + // Member variables + map_type m_entries; + texture_cache_type* m_tex_cache; + + public: + // Per-frame statistics + std::atomic m_mispredictions_this_frame = {0}; + + // Constructors + texture_cache_predictor(texture_cache_type* tex_cache) + : m_tex_cache(tex_cache){}; + ~texture_cache_predictor() = default; + + // Trait wrappers + constexpr iterator begin() noexcept { return m_entries.begin(); } + constexpr const_iterator begin() const noexcept { return m_entries.begin(); } + inline iterator end() noexcept { return m_entries.end(); } + inline const_iterator end() const noexcept { return m_entries.end(); } + inline iterator at(size_type pos) { return m_entries.at(pos); } + inline const_iterator at(size_type pos) const { return m_entries.at(pos); } + bool empty() const noexcept { return m_entries.empty(); } + size_type size() const noexcept { return m_entries.size(); } + void clear() { m_entries.clear(); } + + mapped_type& operator[](const key_type& key) + { + auto ret = m_entries.try_emplace(key, key); + AUDIT(ret.first != m_entries.end()); + return ret.first->second; + } + mapped_type& operator[](const section_storage_type& section) + { + return (*this)[key_type(section)]; + } + + // Callbacks + void on_frame_end() + { + m_mispredictions_this_frame = 0; + } + + void on_misprediction() + { + m_mispredictions_this_frame++; + } + + // Returns true if the next operation is likely to be a read + bool predict(const key_type& key) const + { + // Use "find" to avoid allocating entries if they do not exist + const_iterator entry_it = m_entries.find(key); + if (entry_it == m_entries.end()) + { + return false; + } + else + { + return entry_it->second.predict(); + } + } + + bool predict(const section_storage_type& section) const + { + return predict(key_type(section)); + } + }; +} // namespace rsx + +namespace std +{ + template + struct hash> + { + std::size_t operator()(const rsx::texture_cache_predictor_key& k) const + { + size_t result = std::hash{}(k.cpu_range); + result ^= static_cast(k.format); + result ^= (static_cast(k.context) << 16); + return result; + } + }; +}; // namespace std \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index 7c22adc1a7..21a5a0d247 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -2,6 +2,7 @@ #include "../rsx_cache.h" #include "../rsx_utils.h" +#include "texture_cache_predictor.h" #include "TextureUtils.h" #include @@ -303,19 +304,9 @@ namespace rsx }; - /** * Ranged storage */ - template - class texture_cache_base - { - public: - virtual void on_memory_read_flags_changed(section_storage_type §ion, rsx::memory_read_flags flags) = 0; - virtual void on_section_destroyed(section_storage_type & /*section*/) {}; - }; - - template class ranged_storage_block { @@ -566,7 +557,7 @@ namespace rsx }; - template + template class ranged_storage { public: @@ -575,9 +566,9 @@ namespace rsx static constexpr u32 num_blocks = u32{0x1'0000'0000ull / block_size}; static_assert((num_blocks > 0) && (u64{num_blocks} *block_size == 0x1'0000'0000ull), "Invalid block_size/num_blocks"); - using section_storage_type = _section_storage_type; - using texture_cache_type = texture_cache_base; - using block_type = ranged_storage_block; + using section_storage_type = typename traits::section_storage_type; + using texture_cache_type = typename traits::texture_cache_base_type; + using block_type = ranged_storage_block; private: block_type blocks[num_blocks]; @@ -956,13 +947,16 @@ namespace rsx /** * Cached Texture Section */ - template + template class cached_texture_section : public rsx::buffered_section { public: - using ranged_storage_type = ranged_storage; + using ranged_storage_type = ranged_storage; using ranged_storage_block_type = ranged_storage_block; - using texture_cache_type = typename ranged_storage_type::texture_cache_type; + using texture_cache_type = typename traits::texture_cache_base_type; + using predictor_type = texture_cache_predictor; + using predictor_key_type = typename predictor_type::key_type; + using predictor_entry_type = typename predictor_type::mapped_type; protected: ranged_storage_type *m_storage = nullptr; @@ -1000,15 +994,17 @@ namespace rsx u64 sync_timestamp = 0; bool synchronized = false; bool flushed = false; - - u32 num_writes = 0; - std::deque read_history; + bool speculatively_flushed = false; rsx::memory_read_flags readback_behaviour = rsx::memory_read_flags::flush_once; rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order; rsx::texture_upload_context context = rsx::texture_upload_context::shader_read; rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d; + predictor_type *m_predictor = nullptr; + size_t m_predictor_key_hash = 0; + predictor_entry_type *m_predictor_entry = nullptr; + public: u64 cache_tag = 0; u64 last_write_tag = 0; @@ -1019,7 +1015,7 @@ namespace rsx } cached_texture_section() = default; - cached_texture_section(ranged_storage_block_type *block) : m_block(block), m_storage(&block->get_storage()), m_tex_cache(&block->get_texture_cache()) + cached_texture_section(ranged_storage_block_type *block) : m_block(block), m_storage(&block->get_storage()), m_tex_cache(&block->get_texture_cache()), m_predictor(&m_tex_cache->get_predictor()) { update_unreleased(); } @@ -1030,6 +1026,7 @@ namespace rsx m_block = block; m_storage = &block->get_storage(); m_tex_cache = &block->get_texture_cache(); + m_predictor = &m_tex_cache->get_predictor(); update_unreleased(); } @@ -1064,12 +1061,12 @@ namespace rsx sync_timestamp = 0ull; synchronized = false; flushed = false; + speculatively_flushed = false; cache_tag = 0ull; last_write_tag = 0ull; - num_writes = 0; - read_history.clear(); + m_predictor_entry = nullptr; readback_behaviour = rsx::memory_read_flags::flush_once; view_flags = rsx::texture_create_flags::default_component_order; @@ -1196,6 +1193,8 @@ namespace rsx m_block->on_section_range_invalid(*derived()); //m_storage->on_section_range_invalid(*derived()); + m_predictor_entry = nullptr; + speculatively_flushed = false; buffered_section::invalidate_range(); } @@ -1302,25 +1301,65 @@ namespace rsx protect(prot, range); } + /** + * Prediction + */ + bool tracked_by_predictor() const + { + // We do not update the predictor statistics for flush_always sections + return get_context() != texture_upload_context::shader_read && get_memory_read_flags() != memory_read_flags::flush_always; + } + + void on_flush(bool miss) + { + speculatively_flushed = false; + + if (miss) + { + m_tex_cache->on_miss(*derived()); + } + m_tex_cache->on_flush(); + + if (tracked_by_predictor()) + { + get_predictor_entry().on_flush(); + } + } + + void on_speculative_flush() + { + speculatively_flushed = true; + + m_tex_cache->on_speculative_flush(); + } + + void touch(u64 tag) + { + last_write_tag = tag; + + if (tracked_by_predictor()) + { + get_predictor_entry().on_write(speculatively_flushed); + } + + if (speculatively_flushed) + { + m_tex_cache->on_misprediction(); + } + } + /** * Misc */ - void touch(u64 tag) + predictor_entry_type& get_predictor_entry() { - num_writes++; - last_write_tag = tag; - } - - void reset_write_statistics() - { - if (read_history.size() == 16) + // If we don't have a predictor entry, or the key has changed + if (m_predictor_entry == nullptr || !m_predictor_entry->key_matches(*derived())) { - read_history.pop_back(); + m_predictor_entry = &((*m_predictor)[*derived()]); } - - read_history.push_front(num_writes); - num_writes = 0; + return *m_predictor_entry; } void set_view_flags(rsx::texture_create_flags flags) @@ -1416,52 +1455,6 @@ namespace rsx return readback_behaviour; } - bool writes_likely_completed() const - { - // TODO: Move this to the miss statistics block - const auto num_records = read_history.size(); - - if (num_records == 0) - { - return false; - } - else if (num_records == 1) - { - return num_writes >= read_history.front(); - } - else - { - const u32 last = read_history.front(); - const u32 prev_last = read_history[1]; - - if (last == prev_last && num_records <= 3) - { - return num_writes >= last; - } - - u32 compare = UINT32_MAX; - for (u32 n = 1; n < num_records; n++) - { - if (read_history[n] == last) - { - // Uncertain, but possible - compare = read_history[n - 1]; - - if (num_records > (n + 1)) - { - if (read_history[n + 1] == prev_last) - { - // Confirmed with 2 values - break; - } - } - } - } - - return num_writes >= compare; - } - } - u64 get_sync_timestamp() const { return sync_timestamp; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index f144426b66..5762d4d60b 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1739,10 +1739,12 @@ void GLGSRender::flip(int buffer) const auto num_flushes = m_gl_texture_cache.get_num_flush_requests(); const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions(); const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes(); + const auto num_misses = m_gl_texture_cache.get_num_cache_misses(); + const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults(); const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100); m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures)); m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size)); - m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate)); + m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate)); } m_frame->flip(m_context); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index a4e8a13376..671fff80cb 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -243,7 +243,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const utils::address_range surface_range = m_surface_info[i].get_memory_range(); m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); - m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range); + m_gl_texture_cache.flush_if_cache_miss_likely(surface_range); } if (std::get<0>(m_rtts.m_bound_render_targets[i])) @@ -275,7 +275,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); - m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range); + m_gl_texture_cache.flush_if_cache_miss_likely(surface_range); } auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index e29f713e1d..d4e67e7202 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -141,11 +141,27 @@ namespace gl } }; - class cached_texture_section : public rsx::cached_texture_section - { - using baseclass = rsx::cached_texture_section; - private: + class cached_texture_section; + class texture_cache; + + struct texture_cache_traits + { + using commandbuffer_type = void*; + using section_storage_type = gl::cached_texture_section; + using texture_cache_type = gl::texture_cache; + using texture_cache_base_type = rsx::texture_cache; + using image_resource_type = gl::texture*; + using image_view_type = gl::texture_view*; + using image_storage_type = gl::texture; + using texture_format = gl::texture::format; + }; + + class cached_texture_section : public rsx::cached_texture_section + { + using baseclass = rsx::cached_texture_section; + friend baseclass; + fence m_fence; u32 pbo_id = 0; u32 pbo_size = 0; @@ -348,10 +364,15 @@ namespace gl } } - void copy_texture(bool=false) + void copy_texture(bool manage_lifetime) { ASSERT(exists()); + if (!manage_lifetime) + { + baseclass::on_speculative_flush(); + } + if (!pbo_id) { init_buffer(); @@ -474,7 +495,7 @@ namespace gl if (!synchronized) { LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base()); - copy_texture(); + copy_texture(true); if (!synchronized) { @@ -592,35 +613,33 @@ namespace gl glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); - reset_write_statistics(); + baseclass::on_flush(!result); return result; } void destroy() { - if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty()) + if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty() && managed_texture.get() == nullptr) //Already destroyed return; - if (pbo_id == 0) - { - //Read-only texture, destroy texture memory - managed_texture.reset(); - } - else + if (pbo_id != 0) { //Destroy pbo cache since vram texture is managed elsewhere glDeleteBuffers(1, &pbo_id); scaled_texture.reset(); } + managed_texture.reset(); vram_texture = nullptr; pbo_id = 0; pbo_size = 0; if (!m_fence.is_empty()) + { m_fence.destroy(); + } baseclass::on_section_resources_destroyed(); } @@ -695,11 +714,13 @@ namespace gl } }; - class texture_cache : public rsx::texture_cache + class texture_cache : public rsx::texture_cache { private: - using baseclass = rsx::texture_cache; + using baseclass = rsx::texture_cache; + friend baseclass; + private: struct discardable_storage { std::unique_ptr image; @@ -942,7 +963,7 @@ namespace gl const auto swizzle = get_component_mapping(gcm_format, flags); image->set_native_component_layout(swizzle); - auto& cached = *find_cached_texture(rsx_range, true, true, width, width, depth, mipmaps); + auto& cached = *find_cached_texture(rsx_range, true, true, width, height, depth, mipmaps); ASSERT(!cached.is_locked()); // Prepare section @@ -1126,8 +1147,8 @@ namespace gl } clear_temporary_subresources(); - m_temporary_subresource_cache.clear(); - reset_frame_statistics(); + + baseclass::on_frame_end(); } bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts) @@ -1151,7 +1172,7 @@ namespace gl gl::texture::format::depth_stencil : gl::texture::format::depth; } - flush_if_cache_miss_likely(fmt, result.to_address_range()); + flush_if_cache_miss_likely(result.to_address_range()); } return true; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 5778e1cac0..f408549e48 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2849,7 +2849,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) const utils::address_range rsx_range = m_surface_info[i].get_memory_range(); m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once); - m_texture_cache.flush_if_cache_miss_likely(old_format, rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_if_cache_miss_likely(rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue()); } m_surface_info[i].address = m_surface_info[i].pitch = 0; @@ -2865,7 +2865,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format); const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); - m_texture_cache.flush_if_cache_miss_likely(old_format, surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_if_cache_miss_likely(surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue()); } m_depth_surface_info.address = m_depth_surface_info.pitch = 0; @@ -3381,17 +3381,19 @@ void VKGSRender::flip(int buffer) m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", m_draw_time)); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", m_flip_time)); - const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); + const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024); const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024); const auto num_flushes = m_texture_cache.get_num_flush_requests(); const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions(); const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes(); + const auto num_misses = m_texture_cache.get_num_cache_misses(); + const auto num_unavoidable = m_texture_cache.get_num_unavoidable_hard_faults(); const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures)); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size)); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate)); } vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 76383c2431..a596971376 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -13,9 +13,25 @@ extern u64 get_system_time(); namespace vk { - class cached_texture_section : public rsx::cached_texture_section + class cached_texture_section; + class texture_cache; + + struct texture_cache_traits { - using baseclass = typename rsx::cached_texture_section; + using commandbuffer_type = vk::command_buffer; + using section_storage_type = vk::cached_texture_section; + using texture_cache_type = vk::texture_cache; + using texture_cache_base_type = rsx::texture_cache; + using image_resource_type = vk::image*; + using image_view_type = vk::image_view*; + using image_storage_type = vk::image; + using texture_format = VkFormat; + }; + + class cached_texture_section : public rsx::cached_texture_section + { + using baseclass = typename rsx::cached_texture_section; + friend baseclass; std::unique_ptr managed_texture = nullptr; @@ -83,6 +99,7 @@ namespace vk return; m_tex_cache->on_section_destroyed(*this); + vram_texture = nullptr; ASSERT(managed_texture.get() == nullptr); release_dma_resources(); @@ -102,11 +119,13 @@ namespace vk vk::image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap) { + ASSERT(vram_texture != nullptr); return vram_texture->get_view(remap_encoding, remap); } vk::image_view* get_raw_view() { + ASSERT(vram_texture != nullptr); return vram_texture->get_view(0xAAE4, rsx::default_remap_vector); } @@ -115,13 +134,14 @@ namespace vk return managed_texture.get(); } - std::unique_ptr& get_texture() + std::unique_ptr& get_texture() { return managed_texture; } - VkFormat get_format() + VkFormat get_format() const { + ASSERT(vram_texture != nullptr); return vram_texture->info.format; } @@ -135,6 +155,11 @@ namespace vk { ASSERT(exists()); + if (!manage_cb_lifetime) + { + baseclass::on_speculative_flush(); + } + if (m_device == nullptr) { m_device = &cmd.get_command_pool().get_owner(); @@ -342,7 +367,7 @@ namespace vk } dma_buffer->unmap(); - reset_write_statistics(); + baseclass::on_flush(!result); //Its highly likely that this surface will be reused, so we just leave resources in place return result; @@ -417,10 +442,14 @@ namespace vk } }; - class texture_cache : public rsx::texture_cache + class texture_cache : public rsx::texture_cache { + private: + using baseclass = rsx::texture_cache; + friend baseclass; + public: - virtual void on_section_destroyed(cached_texture_section& tex) + void on_section_destroyed(cached_texture_section& tex) override { if (tex.is_managed()) { @@ -430,7 +459,6 @@ namespace vk } private: - using baseclass = rsx::texture_cache; //Vulkan internals vk::render_device* m_device; @@ -1036,6 +1064,8 @@ namespace vk m_temporary_subresource_cache.clear(); reset_frame_statistics(); + + baseclass::on_frame_end(); } template @@ -1229,7 +1259,7 @@ namespace vk { if (reply.real_dst_size) { - flush_if_cache_miss_likely(helper.format, reply.to_address_range(), cmd, m_submit_queue); + flush_if_cache_miss_likely(reply.to_address_range(), cmd, m_submit_queue); } return true; @@ -1240,12 +1270,7 @@ namespace vk const u32 get_unreleased_textures_count() const override { - return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size(); - } - - const u64 get_texture_memory_in_use() const override - { - return m_storage.m_texture_memory_in_use; + return baseclass::get_unreleased_textures_count() + (u32)m_discardable_storage.size(); } const u32 get_temporary_memory_in_use() diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index b1e8a8bd03..dd25ea47d1 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -530,6 +530,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 5d0bd65ffd..918cd635c6 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1456,5 +1456,8 @@ Emu\GPU\RSX + + Emu\GPU\RSX\Common + \ No newline at end of file