Implement dedicated texture cache predictor

This commit is contained in:
Rui Pinheiro 2018-10-28 13:59:39 +00:00 committed by kd-11
parent af360b78f2
commit 9d1cdccb1a
10 changed files with 751 additions and 287 deletions

View File

@ -2,6 +2,7 @@
#include "../rsx_cache.h"
#include "../rsx_utils.h"
#include "texture_cache_predictor.h"
#include "texture_cache_utils.h"
#include "TextureUtils.h"
@ -11,17 +12,26 @@ extern u64 get_system_time();
namespace rsx
{
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>
class texture_cache : public rsx::texture_cache_base<section_storage_type>
template <typename derived_type, typename _traits>
class texture_cache
{
static_assert(std::is_base_of<rsx::cached_texture_section<section_storage_type>, section_storage_type>::value, "section_storage_type must derive from rsx::cached_texture_section");
public:
using baseclass = typename rsx::texture_cache_base<section_storage_type>;
using ranged_storage = typename rsx::ranged_storage<section_storage_type>;
using traits = _traits;
using commandbuffer_type = typename traits::commandbuffer_type;
using section_storage_type = typename traits::section_storage_type;
using image_resource_type = typename traits::image_resource_type;
using image_view_type = typename traits::image_view_type;
using image_storage_type = typename traits::image_storage_type;
using texture_format = typename traits::texture_format;
using predictor_type = texture_cache_predictor<traits>;
using ranged_storage = rsx::ranged_storage<traits>;
using ranged_storage_block = typename ranged_storage::block_type;
private:
static_assert(std::is_base_of<rsx::cached_texture_section<section_storage_type, traits>, section_storage_type>::value, "section_storage_type must derive from rsx::cached_texture_section");
/**
* Helper structs/enums
*/
@ -236,14 +246,13 @@ namespace rsx
shared_mutex m_cache_mutex;
ranged_storage m_storage;
std::unordered_multimap<u32, std::pair<deferred_subresource, image_view_type>> m_temporary_subresource_cache;
predictor_type m_predictor;
std::atomic<u64> m_cache_update_tag = {0};
address_range read_only_range;
address_range no_access_range;
std::unordered_map<address_range, framebuffer_memory_characteristics> m_cache_miss_statistics_table;
//Map of messages to only emit once
std::unordered_set<std::string> m_once_only_messages_set;
@ -258,17 +267,19 @@ namespace rsx
const u32 m_max_zombie_objects = 64; //Limit on how many texture objects to keep around for reuse after they are invalidated
//Other statistics
const u32 m_cache_miss_threshold = 8; // How many times an address can miss speculative writing before it is considered high priority
std::atomic<u32> m_num_flush_requests = { 0 };
std::atomic<u32> m_num_cache_misses = { 0 };
std::atomic<u32> m_num_cache_speculative_writes = { 0 };
std::atomic<u32> m_num_cache_mispredictions = { 0 };
std::atomic<u32> m_flushes_this_frame = { 0 };
std::atomic<u32> m_misses_this_frame = { 0 };
std::atomic<u32> m_speculations_this_frame = { 0 };
std::atomic<u32> m_unavoidable_hard_faults_this_frame = { 0 };
static const u32 m_predict_max_flushes_per_frame = 100; // Above this number the predictions are disabled
// Invalidation
static const bool invalidation_ignore_unsynchronized = true; // If true, unsynchronized sections don't get forcefully flushed unless they overlap the fault range
static const bool invalidation_keep_ro_during_read = true; // If true, RO sections are not invalidated during read faults
/**
* Virtual Methods
*/
@ -286,7 +297,14 @@ namespace rsx
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0;
public:
virtual void destroy() = 0;
virtual bool is_depth_texture(u32, u32) = 0;
virtual void on_section_destroyed(section_storage_type& section)
{}
protected:
/**
* Helpers
*/
@ -382,20 +400,11 @@ namespace rsx
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp())
{
m_num_cache_mispredictions++;
m_num_cache_misses++;
surface->copy_texture(true, std::forward<Args>(extras)...);
}
}
if (!surface->flush(std::forward<Args>(extras)...))
{
// Missed address, note this
// TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(*surface);
}
m_num_flush_requests++;
surface->flush(std::forward<Args>(extras)...);
}
data.flushed = true;
@ -916,16 +925,20 @@ namespace rsx
public:
texture_cache() : m_storage(this) {}
texture_cache() : m_storage(this), m_predictor(this) {}
~texture_cache() {}
virtual void destroy() = 0;
virtual bool is_depth_texture(u32, u32) = 0;
virtual void on_frame_end() = 0;
void clear()
{
m_storage.clear();
m_predictor.clear();
}
virtual void on_frame_end()
{
m_temporary_subresource_cache.clear();
m_predictor.on_frame_end();
reset_frame_statistics();
}
@ -966,6 +979,7 @@ namespace rsx
{
auto &block = m_storage.block_for(range);
section_storage_type *dimensions_mismatch = nullptr;
section_storage_type *best_fit = nullptr;
section_storage_type *reuse = nullptr;
#ifdef TEXTURE_CACHE_DEBUG
@ -988,6 +1002,10 @@ namespace rsx
res = &tex;
#endif
}
else if (dimensions_mismatch == nullptr)
{
dimensions_mismatch = &tex;
}
}
else if (best_fit == nullptr && tex.can_be_reused())
{
@ -1006,9 +1024,9 @@ namespace rsx
return res;
#endif
if (best_fit != nullptr)
if (dimensions_mismatch != nullptr)
{
auto &tex = *best_fit;
auto &tex = *dimensions_mismatch;
LOG_WARNING(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters (width=%d vs %d; height=%d vs %d; depth=%d vs %d; mipmaps=%d vs %d)",
range.start, width, tex.get_width(), height, tex.get_height(), depth, tex.get_depth(), mipmaps, tex.get_mipmaps());
}
@ -1198,38 +1216,6 @@ namespace rsx
}
public:
template <typename ...Args>
bool flush_memory_to_cache(const address_range &memory_range, bool skip_synchronized, u32 allowed_types_mask, Args&&... extra)
{
// Temporarily disable prediction if more than 50% of predictions are wrong. Also lower prediction pressure
if (m_num_cache_mispredictions > (m_num_cache_speculative_writes / 2) &&
m_num_cache_mispredictions > 8)
return false;
std::lock_guard lock(m_cache_mutex);
section_storage_type* region = find_flushable_section(memory_range);
// Check if section was released, usually if cell overwrites a currently bound render target
if (region == nullptr)
return true;
// Skip if already synchronized
if (skip_synchronized && region->is_synchronized())
return false;
// Skip if type is not allowed
if ((allowed_types_mask & region->get_context()) == 0)
return true;
// Skip if more writes to the same target are likely
if (!region->writes_likely_completed())
return true;
region->copy_texture(false, std::forward<Args>(extra)...);
m_num_cache_speculative_writes++;
return true;
}
template <typename ...Args>
bool load_memory_from_cache(const address_range &memory_range, Args&&... extras)
{
@ -1294,71 +1280,47 @@ namespace rsx
return true;
}
void record_cache_miss(section_storage_type &tex)
{
m_num_cache_misses++;
const auto& memory_range = tex.get_section_range();
const auto fmt = tex.get_format();
auto It = m_cache_miss_statistics_table.find(memory_range);
if (It == m_cache_miss_statistics_table.end())
{
m_cache_miss_statistics_table[memory_range] = { 1, fmt };
return;
}
auto &value = It->second;
if (value.format != fmt)
{
value = { 1, fmt };
return;
}
value.misses += 2;
}
template <typename ...Args>
bool flush_if_cache_miss_likely(texture_format fmt, const address_range &memory_range, Args&&... extras)
bool flush_if_cache_miss_likely(const address_range &range, Args&&... extras)
{
auto It = m_cache_miss_statistics_table.find(memory_range);
if (It == m_cache_miss_statistics_table.end())
{
m_cache_miss_statistics_table[memory_range] = { 0, fmt };
u32 cur_flushes_this_frame = (m_flushes_this_frame + m_speculations_this_frame);
if (cur_flushes_this_frame > m_predict_max_flushes_per_frame)
return false;
}
auto &value = It->second;
auto& block = m_storage.block_for(range);
if (block.empty())
return false;
if (value.format != fmt)
reader_lock lock(m_cache_mutex);
// Try to find matching regions
bool result = false;
for (auto &region : block)
{
//Reset since the data has changed
//TODO: Keep track of all this information together
value = { 0, fmt };
if (region.is_dirty() || region.is_synchronized() || !region.is_flushable())
continue;
if (!region.matches(range))
continue;
if (!region.tracked_by_predictor())
continue;
if (!m_predictor.predict(region))
continue;
lock.upgrade();
region.copy_texture(false, std::forward<Args>(extras)...);
result = true;
cur_flushes_this_frame++;
if (cur_flushes_this_frame > m_predict_max_flushes_per_frame)
return result;
}
// By default, blit targets are always to be tested for readback
u32 flush_mask = rsx::texture_upload_context::blit_engine_dst;
// Auto flush if this address keeps missing (not properly synchronized)
if (value.misses >= m_cache_miss_threshold)
{
// Disable prediction if memory is flagged as flush_always
if (m_flush_always_cache.find(memory_range) == m_flush_always_cache.end())
{
// TODO: Determine better way of setting threshold
// Allow all types
flush_mask = 0xFF;
}
}
if (!flush_memory_to_cache(memory_range, true, flush_mask, std::forward<Args>(extras)...) &&
value.misses > 0)
{
value.misses--;
}
return true;
return result;
}
void purge_unreleased_sections()
@ -2337,12 +2299,6 @@ namespace rsx
{
lock.upgrade();
if (cached_dest->is_locked() && cached_dest->is_synchronized())
{
// Premature readback
m_num_cache_mispredictions++;
}
u32 mem_length;
const u32 mem_base = dst_address - cached_dest->get_section_base();
@ -2457,55 +2413,22 @@ namespace rsx
}
}
void reset_frame_statistics()
predictor_type& get_predictor()
{
m_num_flush_requests.store(0u);
m_num_cache_misses.store(0u);
m_num_cache_mispredictions.store(0u);
m_num_cache_speculative_writes.store(0u);
return m_predictor;
}
virtual const u32 get_unreleased_textures_count() const
{
return m_storage.m_unreleased_texture_objects;
}
virtual const u64 get_texture_memory_in_use() const
{
return m_storage.m_texture_memory_in_use;
}
virtual u32 get_num_flush_requests() const
{
return m_num_flush_requests;
}
virtual u32 get_num_cache_mispredictions() const
{
return m_num_cache_mispredictions;
}
virtual u32 get_num_cache_speculative_writes() const
{
return m_num_cache_speculative_writes;
}
virtual f32 get_cache_miss_ratio() const
{
const auto num_flushes = m_num_flush_requests.load();
return (num_flushes == 0u) ? 0.f : (f32)m_num_cache_misses.load() / num_flushes;
}
/**
* The read only texture invalidate flag is set if a read only texture is trampled by framebuffer memory
* If set, all cached read only textures are considered invalid and should be re-fetched from the texture cache
*/
virtual void clear_ro_tex_invalidate_intr()
void clear_ro_tex_invalidate_intr()
{
read_only_tex_invalidate = false;
}
virtual bool get_ro_tex_invalidate_intr() const
bool get_ro_tex_invalidate_intr() const
{
return read_only_tex_invalidate;
}
@ -2521,5 +2444,83 @@ namespace rsx
auto ptr = vm::get_super_ptr<atomic_t<u32>>(texaddr);
return *ptr == texaddr;
}
/**
* Per-frame statistics
*/
void reset_frame_statistics()
{
m_flushes_this_frame.store(0u);
m_misses_this_frame.store(0u);
m_speculations_this_frame.store(0u);
m_unavoidable_hard_faults_this_frame.store(0u);
}
void on_flush()
{
m_flushes_this_frame++;
}
void on_speculative_flush()
{
m_speculations_this_frame++;
}
void on_misprediction()
{
m_predictor.on_misprediction();
}
void on_miss(const section_storage_type& section)
{
m_misses_this_frame++;
if (section.get_memory_read_flags() == memory_read_flags::flush_always)
{
m_unavoidable_hard_faults_this_frame++;
}
}
virtual const u32 get_unreleased_textures_count() const
{
return m_storage.m_unreleased_texture_objects;
}
const u64 get_texture_memory_in_use() const
{
return m_storage.m_texture_memory_in_use;
}
u32 get_num_flush_requests() const
{
return m_flushes_this_frame;
}
u32 get_num_cache_mispredictions() const
{
return m_predictor.m_mispredictions_this_frame;
}
u32 get_num_cache_speculative_writes() const
{
return m_speculations_this_frame;
}
u32 get_num_cache_misses() const
{
return m_misses_this_frame;
}
u32 get_num_unavoidable_hard_faults() const
{
return m_unavoidable_hard_faults_this_frame;
}
f32 get_cache_miss_ratio() const
{
const auto num_flushes = m_flushes_this_frame.load();
return (num_flushes == 0u) ? 0.f : (f32)m_misses_this_frame.load() / num_flushes;
}
};
}

View File

@ -0,0 +1,416 @@
#pragma once
#include "../rsx_cache.h"
#include "../rsx_utils.h"
#include "TextureUtils.h"
namespace rsx
{
/**
* Predictor Entry History Queue
*/
template <u32 buffer_size>
class texture_cache_predictor_entry_history_queue
{
std::array<u32, buffer_size> buffer;
u32 m_front;
u32 m_size;
public:
texture_cache_predictor_entry_history_queue()
{
clear();
}
void clear()
{
m_front = buffer_size;
m_size = 0;
}
size_t size() const
{
return m_size;
}
bool empty() const
{
return m_size == 0;
}
void push(u32 val)
{
if (m_size < buffer_size)
{
m_size++;
}
if (m_front == 0)
{
m_front = buffer_size - 1;
}
else
{
m_front--;
}
AUDIT(m_front < buffer_size);
buffer[m_front] = val;
}
u32 operator[](u32 pos) const
{
AUDIT(pos < m_size);
AUDIT(m_front < buffer_size);
return buffer[(m_front + pos) % buffer_size];
}
};
/**
* Predictor key
*/
template <typename traits>
struct texture_cache_predictor_key
{
using texture_format = typename traits::texture_format;
using section_storage_type = typename traits::section_storage_type;
address_range cpu_range;
texture_format format;
texture_upload_context context;
// Constructors
texture_cache_predictor_key() = default;
texture_cache_predictor_key(const address_range& _cpu_range, texture_format _format, texture_upload_context _context)
: cpu_range(_cpu_range)
, format(_format)
, context(_context)
{}
texture_cache_predictor_key(const section_storage_type& section)
: cpu_range(section.get_section_range())
, format(section.get_format())
, context(section.get_context())
{}
// Methods
bool operator==(const texture_cache_predictor_key& other) const
{
return cpu_range == other.cpu_range && format == other.format && context == other.context;
}
bool operator!=(const texture_cache_predictor_key& other) const
{
return !operator==(other);
}
};
/**
* Predictor entry
*/
template<typename traits>
class texture_cache_predictor_entry
{
public:
using key_type = texture_cache_predictor_key<traits>;
using section_storage_type = typename traits::section_storage_type;
const key_type key;
private:
u32 m_guessed_writes;
u32 m_writes_since_last_flush;
static const u32 max_write_history_size = 16;
texture_cache_predictor_entry_history_queue<max_write_history_size> write_history;
static const u32 max_confidence = 8; // Cannot be more "confident" than this value
static const u32 confident_threshold = 6; // We are confident if confidence >= confidence_threshold
static const u32 starting_confidence = 3;
static const u32 confidence_guessed_flush = 2; // Confidence granted when we correctly guess there will be a flush
static const u32 confidence_guessed_no_flush = 1; // Confidence granted when we correctly guess there won't be a flush
static const u32 confidence_incorrect_guess = -2; // Confidence granted when our guess is incorrect
static const u32 confidence_mispredict = -4; // Confidence granted when a speculative flush is incorrect
u32 confidence;
public:
texture_cache_predictor_entry(key_type _key)
: key(_key)
{
reset();
}
~texture_cache_predictor_entry() = default;
u32 get_confidence() const
{
return confidence;
}
bool is_confident() const
{
return confidence >= confident_threshold;
}
bool key_matches(const key_type& other_key) const
{
return key == other_key;
}
bool key_matches(const section_storage_type& section) const
{
return key_matches(key_type(section));
}
void update_confidence(s32 delta)
{
if (delta > 0)
{
confidence += delta;
if (confidence > max_confidence)
{
confidence = max_confidence;
}
}
else if (delta < 0)
{
u32 neg_delta = static_cast<u32>(-delta);
if (confidence > neg_delta)
{
confidence -= neg_delta;
}
else
{
confidence = 0;
}
}
}
private:
// Returns how many writes we think there will be this time (i.e. between the last flush and the next flush)
// Returning UINT32_MAX means no guess is possible
u32 guess_number_of_writes() const
{
const auto history_size = write_history.size();
if (history_size == 0)
{
// We need some history to be able to take a guess
return UINT32_MAX;
}
else if (history_size == 1)
{
// If we have one history entry, we assume it will repeat
return write_history[0];
}
else
{
// For more than one entry, we try and find a pattern, and assume it holds
const u32 stop_when_found_matches = 4;
u32 matches_found = 0;
u32 guess = UINT32_MAX;
for (u32 i = 0; i < history_size; i++)
{
// If we are past the number of writes, it's not the same as this time
if (write_history[i] < m_writes_since_last_flush)
continue;
u32 cur_matches_found = 1;
// Try to find more matches
for (u32 j = 0; i + j + 1 < history_size; j++)
{
if (write_history[i + j + 1] != write_history[j])
break;
// We found another matching value
cur_matches_found++;
if (cur_matches_found >= stop_when_found_matches)
break;
}
// If we found more matches than all other comparisons, we take a guess
if (cur_matches_found > matches_found)
{
guess = write_history[i];
matches_found = cur_matches_found;
}
if (matches_found >= stop_when_found_matches)
break;
}
return guess;
}
}
void calculate_next_guess(bool reset)
{
if (reset || m_guessed_writes == UINT32_MAX || m_writes_since_last_flush > m_guessed_writes)
{
m_guessed_writes = guess_number_of_writes();
}
}
public:
void reset()
{
confidence = starting_confidence;
m_writes_since_last_flush = 0;
m_guessed_writes = UINT32_MAX;
write_history.clear();
}
void on_flush()
{
update_confidence(is_flush_likely() ? confidence_guessed_flush : confidence_incorrect_guess);
// Update history
write_history.push(m_writes_since_last_flush);
m_writes_since_last_flush = 0;
calculate_next_guess(true);
}
void on_write(bool mispredict)
{
if (mispredict || is_flush_likely())
{
update_confidence(mispredict ? confidence_mispredict : confidence_incorrect_guess);
}
else
{
update_confidence(confidence_guessed_no_flush);
}
m_writes_since_last_flush++;
calculate_next_guess(false);
}
bool is_flush_likely() const
{
return m_writes_since_last_flush >= m_guessed_writes;
}
// Returns true if we believe that the next operation on this memory range will be a flush
bool predict() const
{
// Disable prediction if we have a low confidence in our predictions
if (!is_confident())
return false;
return is_flush_likely();
}
};
/**
* Predictor
*/
template <typename traits>
class texture_cache_predictor
{
public:
// Traits
using section_storage_type = typename traits::section_storage_type;
using texture_cache_type = typename traits::texture_cache_base_type;
using key_type = texture_cache_predictor_key<traits>;
using mapped_type = texture_cache_predictor_entry<traits>;
using map_type = std::unordered_map<key_type, mapped_type>;
using value_type = typename map_type::value_type;
using size_type = typename map_type::size_type;
using iterator = typename map_type::iterator;
using const_iterator = typename map_type::const_iterator;
private:
// Member variables
map_type m_entries;
texture_cache_type* m_tex_cache;
public:
// Per-frame statistics
std::atomic<u32> m_mispredictions_this_frame = {0};
// Constructors
texture_cache_predictor(texture_cache_type* tex_cache)
: m_tex_cache(tex_cache){};
~texture_cache_predictor() = default;
// Trait wrappers
constexpr iterator begin() noexcept { return m_entries.begin(); }
constexpr const_iterator begin() const noexcept { return m_entries.begin(); }
inline iterator end() noexcept { return m_entries.end(); }
inline const_iterator end() const noexcept { return m_entries.end(); }
inline iterator at(size_type pos) { return m_entries.at(pos); }
inline const_iterator at(size_type pos) const { return m_entries.at(pos); }
bool empty() const noexcept { return m_entries.empty(); }
size_type size() const noexcept { return m_entries.size(); }
void clear() { m_entries.clear(); }
mapped_type& operator[](const key_type& key)
{
auto ret = m_entries.try_emplace(key, key);
AUDIT(ret.first != m_entries.end());
return ret.first->second;
}
mapped_type& operator[](const section_storage_type& section)
{
return (*this)[key_type(section)];
}
// Callbacks
void on_frame_end()
{
m_mispredictions_this_frame = 0;
}
void on_misprediction()
{
m_mispredictions_this_frame++;
}
// Returns true if the next operation is likely to be a read
bool predict(const key_type& key) const
{
// Use "find" to avoid allocating entries if they do not exist
const_iterator entry_it = m_entries.find(key);
if (entry_it == m_entries.end())
{
return false;
}
else
{
return entry_it->second.predict();
}
}
bool predict(const section_storage_type& section) const
{
return predict(key_type(section));
}
};
} // namespace rsx
namespace std
{
template <typename traits>
struct hash<rsx::texture_cache_predictor_key<traits>>
{
std::size_t operator()(const rsx::texture_cache_predictor_key<traits>& k) const
{
size_t result = std::hash<utils::address_range>{}(k.cpu_range);
result ^= static_cast<size_t>(k.format);
result ^= (static_cast<size_t>(k.context) << 16);
return result;
}
};
}; // namespace std

View File

@ -2,6 +2,7 @@
#include "../rsx_cache.h"
#include "../rsx_utils.h"
#include "texture_cache_predictor.h"
#include "TextureUtils.h"
#include <list>
@ -303,19 +304,9 @@ namespace rsx
};
/**
* Ranged storage
*/
template <typename section_storage_type>
class texture_cache_base
{
public:
virtual void on_memory_read_flags_changed(section_storage_type &section, rsx::memory_read_flags flags) = 0;
virtual void on_section_destroyed(section_storage_type & /*section*/) {};
};
template <typename _ranged_storage_type>
class ranged_storage_block
{
@ -566,7 +557,7 @@ namespace rsx
};
template <typename _section_storage_type>
template <typename traits>
class ranged_storage
{
public:
@ -575,8 +566,8 @@ namespace rsx
static constexpr u32 num_blocks = u32{0x1'0000'0000ull / block_size};
static_assert((num_blocks > 0) && (u64{num_blocks} *block_size == 0x1'0000'0000ull), "Invalid block_size/num_blocks");
using section_storage_type = _section_storage_type;
using texture_cache_type = texture_cache_base<section_storage_type>;
using section_storage_type = typename traits::section_storage_type;
using texture_cache_type = typename traits::texture_cache_base_type;
using block_type = ranged_storage_block<ranged_storage>;
private:
@ -956,13 +947,16 @@ namespace rsx
/**
* Cached Texture Section
*/
template <typename derived_type>
template <typename derived_type, typename traits>
class cached_texture_section : public rsx::buffered_section
{
public:
using ranged_storage_type = ranged_storage<derived_type>;
using ranged_storage_type = ranged_storage<traits>;
using ranged_storage_block_type = ranged_storage_block<ranged_storage_type>;
using texture_cache_type = typename ranged_storage_type::texture_cache_type;
using texture_cache_type = typename traits::texture_cache_base_type;
using predictor_type = texture_cache_predictor<traits>;
using predictor_key_type = typename predictor_type::key_type;
using predictor_entry_type = typename predictor_type::mapped_type;
protected:
ranged_storage_type *m_storage = nullptr;
@ -1000,15 +994,17 @@ namespace rsx
u64 sync_timestamp = 0;
bool synchronized = false;
bool flushed = false;
u32 num_writes = 0;
std::deque<u32> read_history;
bool speculatively_flushed = false;
rsx::memory_read_flags readback_behaviour = rsx::memory_read_flags::flush_once;
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
rsx::texture_upload_context context = rsx::texture_upload_context::shader_read;
rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d;
predictor_type *m_predictor = nullptr;
size_t m_predictor_key_hash = 0;
predictor_entry_type *m_predictor_entry = nullptr;
public:
u64 cache_tag = 0;
u64 last_write_tag = 0;
@ -1019,7 +1015,7 @@ namespace rsx
}
cached_texture_section() = default;
cached_texture_section(ranged_storage_block_type *block) : m_block(block), m_storage(&block->get_storage()), m_tex_cache(&block->get_texture_cache())
cached_texture_section(ranged_storage_block_type *block) : m_block(block), m_storage(&block->get_storage()), m_tex_cache(&block->get_texture_cache()), m_predictor(&m_tex_cache->get_predictor())
{
update_unreleased();
}
@ -1030,6 +1026,7 @@ namespace rsx
m_block = block;
m_storage = &block->get_storage();
m_tex_cache = &block->get_texture_cache();
m_predictor = &m_tex_cache->get_predictor();
update_unreleased();
}
@ -1064,12 +1061,12 @@ namespace rsx
sync_timestamp = 0ull;
synchronized = false;
flushed = false;
speculatively_flushed = false;
cache_tag = 0ull;
last_write_tag = 0ull;
num_writes = 0;
read_history.clear();
m_predictor_entry = nullptr;
readback_behaviour = rsx::memory_read_flags::flush_once;
view_flags = rsx::texture_create_flags::default_component_order;
@ -1196,6 +1193,8 @@ namespace rsx
m_block->on_section_range_invalid(*derived());
//m_storage->on_section_range_invalid(*derived());
m_predictor_entry = nullptr;
speculatively_flushed = false;
buffered_section::invalidate_range();
}
@ -1302,25 +1301,65 @@ namespace rsx
protect(prot, range);
}
/**
* Prediction
*/
bool tracked_by_predictor() const
{
// We do not update the predictor statistics for flush_always sections
return get_context() != texture_upload_context::shader_read && get_memory_read_flags() != memory_read_flags::flush_always;
}
void on_flush(bool miss)
{
speculatively_flushed = false;
if (miss)
{
m_tex_cache->on_miss(*derived());
}
m_tex_cache->on_flush();
if (tracked_by_predictor())
{
get_predictor_entry().on_flush();
}
}
void on_speculative_flush()
{
speculatively_flushed = true;
m_tex_cache->on_speculative_flush();
}
void touch(u64 tag)
{
last_write_tag = tag;
if (tracked_by_predictor())
{
get_predictor_entry().on_write(speculatively_flushed);
}
if (speculatively_flushed)
{
m_tex_cache->on_misprediction();
}
}
/**
* Misc
*/
void touch(u64 tag)
predictor_entry_type& get_predictor_entry()
{
num_writes++;
last_write_tag = tag;
// If we don't have a predictor entry, or the key has changed
if (m_predictor_entry == nullptr || !m_predictor_entry->key_matches(*derived()))
{
m_predictor_entry = &((*m_predictor)[*derived()]);
}
void reset_write_statistics()
{
if (read_history.size() == 16)
{
read_history.pop_back();
}
read_history.push_front(num_writes);
num_writes = 0;
return *m_predictor_entry;
}
void set_view_flags(rsx::texture_create_flags flags)
@ -1416,52 +1455,6 @@ namespace rsx
return readback_behaviour;
}
bool writes_likely_completed() const
{
// TODO: Move this to the miss statistics block
const auto num_records = read_history.size();
if (num_records == 0)
{
return false;
}
else if (num_records == 1)
{
return num_writes >= read_history.front();
}
else
{
const u32 last = read_history.front();
const u32 prev_last = read_history[1];
if (last == prev_last && num_records <= 3)
{
return num_writes >= last;
}
u32 compare = UINT32_MAX;
for (u32 n = 1; n < num_records; n++)
{
if (read_history[n] == last)
{
// Uncertain, but possible
compare = read_history[n - 1];
if (num_records > (n + 1))
{
if (read_history[n + 1] == prev_last)
{
// Confirmed with 2 values
break;
}
}
}
}
return num_writes >= compare;
}
}
u64 get_sync_timestamp() const
{
return sync_timestamp;

View File

@ -1739,10 +1739,12 @@ void GLGSRender::flip(int buffer)
const auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_gl_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
m_frame->flip(m_context);

View File

@ -243,7 +243,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const utils::address_range surface_range = m_surface_info[i].get_memory_range();
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range);
m_gl_texture_cache.flush_if_cache_miss_likely(surface_range);
}
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
@ -275,7 +275,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, surface_range);
m_gl_texture_cache.flush_if_cache_miss_likely(surface_range);
}
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);

View File

@ -141,11 +141,27 @@ namespace gl
}
};
class cached_texture_section : public rsx::cached_texture_section<gl::cached_texture_section>
{
using baseclass = rsx::cached_texture_section<gl::cached_texture_section>;
private:
class cached_texture_section;
class texture_cache;
struct texture_cache_traits
{
using commandbuffer_type = void*;
using section_storage_type = gl::cached_texture_section;
using texture_cache_type = gl::texture_cache;
using texture_cache_base_type = rsx::texture_cache<texture_cache_type, texture_cache_traits>;
using image_resource_type = gl::texture*;
using image_view_type = gl::texture_view*;
using image_storage_type = gl::texture;
using texture_format = gl::texture::format;
};
class cached_texture_section : public rsx::cached_texture_section<gl::cached_texture_section, gl::texture_cache_traits>
{
using baseclass = rsx::cached_texture_section<gl::cached_texture_section, gl::texture_cache_traits>;
friend baseclass;
fence m_fence;
u32 pbo_id = 0;
u32 pbo_size = 0;
@ -348,10 +364,15 @@ namespace gl
}
}
void copy_texture(bool=false)
void copy_texture(bool manage_lifetime)
{
ASSERT(exists());
if (!manage_lifetime)
{
baseclass::on_speculative_flush();
}
if (!pbo_id)
{
init_buffer();
@ -474,7 +495,7 @@ namespace gl
if (!synchronized)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
copy_texture();
copy_texture(true);
if (!synchronized)
{
@ -592,35 +613,33 @@ namespace gl
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
reset_write_statistics();
baseclass::on_flush(!result);
return result;
}
void destroy()
{
if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty())
if (!is_locked() && pbo_id == 0 && vram_texture == nullptr && m_fence.is_empty() && managed_texture.get() == nullptr)
//Already destroyed
return;
if (pbo_id == 0)
{
//Read-only texture, destroy texture memory
managed_texture.reset();
}
else
if (pbo_id != 0)
{
//Destroy pbo cache since vram texture is managed elsewhere
glDeleteBuffers(1, &pbo_id);
scaled_texture.reset();
}
managed_texture.reset();
vram_texture = nullptr;
pbo_id = 0;
pbo_size = 0;
if (!m_fence.is_empty())
{
m_fence.destroy();
}
baseclass::on_section_resources_destroyed();
}
@ -695,11 +714,13 @@ namespace gl
}
};
class texture_cache : public rsx::texture_cache<void*, gl::cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>
class texture_cache : public rsx::texture_cache<gl::texture_cache, gl::texture_cache_traits>
{
private:
using baseclass = rsx::texture_cache<void*, gl::cached_texture_section, gl::texture*, gl::texture_view*, gl::texture, gl::texture::format>;
using baseclass = rsx::texture_cache<gl::texture_cache, gl::texture_cache_traits>;
friend baseclass;
private:
struct discardable_storage
{
std::unique_ptr<gl::texture> image;
@ -942,7 +963,7 @@ namespace gl
const auto swizzle = get_component_mapping(gcm_format, flags);
image->set_native_component_layout(swizzle);
auto& cached = *find_cached_texture(rsx_range, true, true, width, width, depth, mipmaps);
auto& cached = *find_cached_texture(rsx_range, true, true, width, height, depth, mipmaps);
ASSERT(!cached.is_locked());
// Prepare section
@ -1126,8 +1147,8 @@ namespace gl
}
clear_temporary_subresources();
m_temporary_subresource_cache.clear();
reset_frame_statistics();
baseclass::on_frame_end();
}
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts)
@ -1151,7 +1172,7 @@ namespace gl
gl::texture::format::depth_stencil : gl::texture::format::depth;
}
flush_if_cache_miss_likely(fmt, result.to_address_range());
flush_if_cache_miss_likely(result.to_address_range());
}
return true;

View File

@ -2849,7 +2849,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(old_format, rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
m_texture_cache.flush_if_cache_miss_likely(rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
}
m_surface_info[i].address = m_surface_info[i].pitch = 0;
@ -2865,7 +2865,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(old_format, surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
m_texture_cache.flush_if_cache_miss_likely(surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
}
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
@ -3387,11 +3387,13 @@ void VKGSRender::flip(int buffer)
const auto num_flushes = m_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %3d%% hard faults, %2d misprediction(s), %2d speculation(s)", num_flushes, cache_miss_ratio, num_mispredict, num_speculate));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres);

View File

@ -13,9 +13,25 @@ extern u64 get_system_time();
namespace vk
{
class cached_texture_section : public rsx::cached_texture_section<vk::cached_texture_section>
class cached_texture_section;
class texture_cache;
struct texture_cache_traits
{
using baseclass = typename rsx::cached_texture_section<vk::cached_texture_section>;
using commandbuffer_type = vk::command_buffer;
using section_storage_type = vk::cached_texture_section;
using texture_cache_type = vk::texture_cache;
using texture_cache_base_type = rsx::texture_cache<texture_cache_type, texture_cache_traits>;
using image_resource_type = vk::image*;
using image_view_type = vk::image_view*;
using image_storage_type = vk::image;
using texture_format = VkFormat;
};
class cached_texture_section : public rsx::cached_texture_section<vk::cached_texture_section, vk::texture_cache_traits>
{
using baseclass = typename rsx::cached_texture_section<vk::cached_texture_section, vk::texture_cache_traits>;
friend baseclass;
std::unique_ptr<vk::viewable_image> managed_texture = nullptr;
@ -83,6 +99,7 @@ namespace vk
return;
m_tex_cache->on_section_destroyed(*this);
vram_texture = nullptr;
ASSERT(managed_texture.get() == nullptr);
release_dma_resources();
@ -102,11 +119,13 @@ namespace vk
vk::image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap)
{
ASSERT(vram_texture != nullptr);
return vram_texture->get_view(remap_encoding, remap);
}
vk::image_view* get_raw_view()
{
ASSERT(vram_texture != nullptr);
return vram_texture->get_view(0xAAE4, rsx::default_remap_vector);
}
@ -120,8 +139,9 @@ namespace vk
return managed_texture;
}
VkFormat get_format()
VkFormat get_format() const
{
ASSERT(vram_texture != nullptr);
return vram_texture->info.format;
}
@ -135,6 +155,11 @@ namespace vk
{
ASSERT(exists());
if (!manage_cb_lifetime)
{
baseclass::on_speculative_flush();
}
if (m_device == nullptr)
{
m_device = &cmd.get_command_pool().get_owner();
@ -342,7 +367,7 @@ namespace vk
}
dma_buffer->unmap();
reset_write_statistics();
baseclass::on_flush(!result);
//Its highly likely that this surface will be reused, so we just leave resources in place
return result;
@ -417,10 +442,14 @@ namespace vk
}
};
class texture_cache : public rsx::texture_cache<vk::command_buffer, vk::cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>
class texture_cache : public rsx::texture_cache<vk::texture_cache, vk::texture_cache_traits>
{
private:
using baseclass = rsx::texture_cache<vk::texture_cache, vk::texture_cache_traits>;
friend baseclass;
public:
virtual void on_section_destroyed(cached_texture_section& tex)
void on_section_destroyed(cached_texture_section& tex) override
{
if (tex.is_managed())
{
@ -430,7 +459,6 @@ namespace vk
}
private:
using baseclass = rsx::texture_cache<vk::command_buffer, vk::cached_texture_section, vk::image*, vk::image_view*, vk::image, VkFormat>;
//Vulkan internals
vk::render_device* m_device;
@ -1036,6 +1064,8 @@ namespace vk
m_temporary_subresource_cache.clear();
reset_frame_statistics();
baseclass::on_frame_end();
}
template<typename RsxTextureType>
@ -1229,7 +1259,7 @@ namespace vk
{
if (reply.real_dst_size)
{
flush_if_cache_miss_likely(helper.format, reply.to_address_range(), cmd, m_submit_queue);
flush_if_cache_miss_likely(reply.to_address_range(), cmd, m_submit_queue);
}
return true;
@ -1240,12 +1270,7 @@ namespace vk
const u32 get_unreleased_textures_count() const override
{
return m_storage.m_unreleased_texture_objects + (u32)m_discardable_storage.size();
}
const u64 get_texture_memory_in_use() const override
{
return m_storage.m_texture_memory_in_use;
return baseclass::get_unreleased_textures_count() + (u32)m_discardable_storage.size();
}
const u32 get_temporary_memory_in_use()

View File

@ -530,6 +530,7 @@
<ClInclude Include="Emu\RSX\Common\TextGlyphs.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache_checker.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache_predictor.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache_utils.h" />
<ClInclude Include="Emu\RSX\gcm_enums.h" />
<ClInclude Include="Emu\RSX\gcm_printing.h" />

View File

@ -1456,5 +1456,8 @@
<ClInclude Include="Emu\RSX\RSXFIFO.h">
<Filter>Emu\GPU\RSX</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\texture_cache_predictor.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
</Project>