vulkan: Texture cache rewritten - Use a map of vectors instead of a flat array

This commit is contained in:
kd-11 2017-07-24 20:50:32 +03:00
parent 46fa6e47fe
commit dd19622823
1 changed files with 261 additions and 102 deletions

View File

@ -5,6 +5,7 @@
#include "Emu/System.h"
#include "../Common/TextureUtils.h"
#include "../rsx_utils.h"
#include "Utilities/mutex.h"
namespace vk
{
@ -292,9 +293,34 @@ namespace vk
class texture_cache
{
struct ranged_storage
{
std::vector<cached_texture_section> data; //Stored data
std::atomic_int valid_count = { 0 }; //Number of usable (non-dirty) blocks
u32 max_range = 0; //Largest stored block
void notify(u32 data_size)
{
max_range = std::max(data_size, max_range);
valid_count++;
}
void add(cached_texture_section& section, u32 data_size)
{
max_range = std::max(data_size, max_range);
valid_count++;
data.push_back(std::move(section));
}
};
private:
std::vector<cached_texture_section> m_cache;
std::pair<u32, u32> texture_cache_range = std::make_pair(0xFFFFFFFF, 0);
shared_mutex m_cache_mutex;
std::unordered_map<u32, ranged_storage> m_cache;
std::pair<u32, u32> read_only_range = std::make_pair(0xFFFFFFFF, 0);
std::pair<u32, u32> no_access_range = std::make_pair(0xFFFFFFFF, 0);
std::vector<std::unique_ptr<vk::image_view> > m_temporary_image_view;
std::vector<std::unique_ptr<vk::image>> m_dirty_textures;
@ -310,51 +336,71 @@ namespace vk
cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
for (auto &tex : m_cache)
{
if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty())
{
if (!confirm_dimensions) return tex;
reader_lock lock(m_cache_mutex);
if (tex.matches(rsx_address, width, height, mipmaps))
return tex;
else
auto found = m_cache.find(rsx_address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{
LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address);
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height());
if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty())
{
if (!confirm_dimensions) return tex;
if (tex.matches(rsx_address, width, height, mipmaps))
return tex;
else
{
LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address);
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height());
}
}
}
for (auto &tex : range_data.data)
{
if (tex.is_dirty())
{
if (tex.exists())
{
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
tex.release_dma_resources();
range_data.notify(rsx_size);
return tex;
}
}
}
}
for (auto &tex : m_cache)
{
if (tex.is_dirty())
{
if (tex.exists())
{
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
writer_lock lock(m_cache_mutex);
tex.release_dma_resources();
return tex;
}
}
m_cache.push_back(cached_texture_section());
return m_cache[m_cache.size() - 1];
cached_texture_section tmp;
m_cache[rsx_address].add(tmp, rsx_size);
return m_cache[rsx_address].data.back();
}
cached_texture_section* find_flushable_section(const u32 address, const u32 range)
{
for (auto &tex : m_cache)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable() && !tex.is_flushed()) continue;
reader_lock lock(m_cache_mutex);
if (tex.matches(address, range))
return &tex;
auto found = m_cache.find(address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable() && !tex.is_flushed()) continue;
if (tex.matches(address, range))
return &tex;
}
}
return nullptr;
@ -362,24 +408,28 @@ namespace vk
void purge_cache()
{
for (auto &tex : m_cache)
for (auto &address_range : m_cache)
{
if (tex.exists())
auto &range_data = address_range.second;
for (auto &tex : range_data.data)
{
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
if (tex.exists())
{
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
if (tex.is_locked())
tex.unprotect();
tex.release_dma_resources();
}
if (tex.is_locked())
tex.unprotect();
tex.release_dma_resources();
range_data.data.resize(0);
}
m_temporary_image_view.clear();
m_dirty_textures.clear();
m_cache.resize(0);
}
//Helpers
@ -611,13 +661,14 @@ namespace vk
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
vk::leave_uninterruptible();
writer_lock lock(m_cache_mutex);
region.reset(texaddr, range);
region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image);
region.protect(utils::protection::ro);
region.set_dirty(false);
texture_cache_range = region.get_min_max(texture_cache_range);
read_only_range = region.get_min_max(read_only_range);
return view;
}
@ -625,11 +676,13 @@ namespace vk
{
cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
writer_lock lock(m_cache_mutex);
if (!region.is_locked())
{
region.reset(memory_address, memory_size);
region.set_dirty(false);
texture_cache_range = region.get_min_max(texture_cache_range);
no_access_range = region.get_min_max(no_access_range);
}
region.protect(utils::protection::no);
@ -656,17 +709,48 @@ namespace vk
std::tuple<bool, bool> address_is_flushable(u32 address)
{
if (address < texture_cache_range.first ||
address > texture_cache_range.second)
if (address < no_access_range.first ||
address > no_access_range.second)
return std::make_tuple(false, false);
for (auto &tex : m_cache)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
reader_lock lock(m_cache_mutex);
if (tex.overlaps(address))
return std::make_tuple(true, tex.is_synchronized());
auto found = m_cache.find(address);
if (found != m_cache.end())
{
auto &range_data = found->second;
for (auto &tex : range_data.data)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (tex.overlaps(address))
return std::make_tuple(true, tex.is_synchronized());
}
}
for (auto &address_range : m_cache)
{
if (address_range.first == address)
continue;
auto &range_data = address_range.second;
//Quickly discard range
const u32 lock_base = address_range.first & ~0xfff;
const u32 lock_limit = align(range_data.max_range + address_range.first, 4096);
if (address < lock_base || address >= lock_limit)
continue;
for (auto &tex : range_data.data)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (tex.overlaps(address))
return std::make_tuple(true, tex.is_synchronized());
}
}
return std::make_tuple(false, false);
@ -674,42 +758,75 @@ namespace vk
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
{
if (address < texture_cache_range.first ||
address > texture_cache_range.second)
if (address < no_access_range.first ||
address > no_access_range.second)
return false;
bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
std::unordered_map<u32, bool> processed_ranges;
for (int i = 0; i < m_cache.size(); ++i)
reader_lock lock(m_cache_mutex);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{
auto &tex = m_cache[i];
auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (processed_ranges[base] || range_data.valid_count == 0)
continue;
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
//Quickly discard range
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
(lock_base > address || lock_limit <= address))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
trampled_range = new_range;
i = 0;
}
//TODO: Map basic host_visible memory without coherent constraint
if (!tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue))
{
//Missed address, note this
//TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(tex);
}
response = true;
processed_ranges[base] = true;
continue;
}
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
i = 0;
trampled_range = new_range;
range_reset = true;
}
//TODO: Map basic host_visible memory without coherent constraint
if (!tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue))
{
//Missed address, note this
//TODO: Lower severity when successful to keep the cache from overworking
record_cache_miss(tex);
}
response = true;
}
}
if (range_reset)
{
processed_ranges.clear();
It = m_cache.begin();
}
processed_ranges[base] = true;
}
return response;
@ -717,37 +834,79 @@ namespace vk
bool invalidate_address(u32 address)
{
if (address < texture_cache_range.first ||
address > texture_cache_range.second)
return false;
if (address < read_only_range.first ||
address > read_only_range.second)
{
//Doesnt fall in the read_only textures range; check render targets
if (address < no_access_range.first ||
address > no_access_range.second)
return false;
}
bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
std::unordered_map<u32, bool> processed_ranges;
for (int i = 0; i < m_cache.size(); ++i)
reader_lock lock(m_cache_mutex);
for (auto It = m_cache.begin(); It != m_cache.end(); It++)
{
auto &tex = m_cache[i];
auto &range_data = It->second;
const u32 base = It->first;
bool range_reset = false;
if (tex.is_dirty()) continue;
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
if (processed_ranges[base] || range_data.valid_count == 0)
continue;
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
//Quickly discard range
const u32 lock_base = base & ~0xfff;
const u32 lock_limit = align(range_data.max_range + base, 4096);
if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) &&
(lock_base > address || lock_limit <= address))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
trampled_range = new_range;
i = 0;
}
tex.set_dirty(true);
tex.unprotect();
response = true;
processed_ranges[base] = true;
continue;
}
for (int i = 0; i < range_data.data.size(); i++)
{
auto &tex = range_data.data[i];
if (tex.is_dirty()) continue;
if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
i = 0;
trampled_range = new_range;
range_reset = true;
}
// Upgrade to writer lock
lock.upgrade();
tex.set_dirty(true);
tex.unprotect();
range_data.valid_count--;
response = true;
}
}
if (range_reset)
{
processed_ranges.clear();
It = m_cache.begin();
}
processed_ranges[base] = true;
}
return response;