From fa1377a8ee5120522a47666fb4416b45fd85c3dd Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 23 Apr 2016 19:15:33 +0200 Subject: [PATCH] gsdx tc: reduce texture cache overhead Cache page coverage of texture into a hash map Test done on Champion of Norrath (paltex + DisablePartialInvalidation) Profiler: Self of GSTextureCache::SourceMap::Add 5.39% => 0.23% Self of GSTextureCache::LookupSource 15.27% => 10.82% Hard to measure on CoN as it depends on memory transfer. Seem to be 5-10 fps faster. --- plugins/GSdx/GSTextureCache.cpp | 69 +++++++++++++++++++++++++++++++++ plugins/GSdx/GSTextureCache.h | 3 ++ plugins/GSdx/stdafx.h | 18 ++++----- 3 files changed, 81 insertions(+), 9 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 7d25fcebfe..188a05fe33 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -1761,6 +1761,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSO // Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages) // (Maybe GetPages could be used instead, perf opt?) // The source pointer will be stored/duplicated in all m_map[array of pages] +#if 0 const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs; @@ -1801,6 +1802,71 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSO } } } +#else + uint32* pages = GetPagesCoverage(TEX0, off); + for(size_t i = 0; i < countof(m_pages); i++) + { + if(uint32 p = pages[i]) + { + list* m = &m_map[i << 5]; + + unsigned long j; + + while(_BitScanForward(&j, p)) + { + p ^= 1 << j; + + m[j].push_front(s); + } + } + } +#endif +} + +uint32* GSTextureCache::SourceMap::GetPagesCoverage(const GIFRegTEX0& TEX0, const GSOffset* off) +{ + uint64 hash = TEX0.u64 & 0x3ffffffffull; // TBP0 TBW PSM TW TH + + auto it_pages = m_pages_coverage.find(hash); + + if (it_pages != m_pages_coverage.end()) return it_pages->second; + + // Aligned on 64 bytes to store the full bitmap in a single cache line + uint32* pages = (uint32*)_aligned_malloc(MAX_PAGES/8, 64); + + m_pages_coverage.emplace(hash, pages); + + ((GSVector4i*)pages)[0] = GSVector4i::zero(); + ((GSVector4i*)pages)[1] = GSVector4i::zero(); + ((GSVector4i*)pages)[2] = GSVector4i::zero(); + ((GSVector4i*)pages)[3] = GSVector4i::zero(); + + // Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages) + // (Maybe GetPages could be used instead, perf opt?) + // The source pointer will be stored/duplicated in all m_map[array of pages] + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; + + GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs; + + int tw = 1 << TEX0.TW; + int th = 1 << TEX0.TH; + + for(int y = 0; y < th; y += bs.y) + { + uint32 base = off->block.row[y >> 3]; + + for(int x = 0; x < tw; x += bs.x) + { + uint32 page = (base + off->block.col[x >> 3]) >> 5; + + if(page < MAX_PAGES) + { + pages[page >> 5] |= 1 << (page & 31); + } + } + } + + return pages; } void GSTextureCache::SourceMap::RemoveAll() @@ -1813,6 +1879,9 @@ void GSTextureCache::SourceMap::RemoveAll() { m_map[i].clear(); } + + for_each(m_pages_coverage.begin(), m_pages_coverage.end(), aligned_free_second()); + m_pages_coverage.clear(); } void GSTextureCache::SourceMap::RemoveAt(Source* s) diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 0d1438481c..bba4be2ee1 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -98,6 +98,7 @@ public: { public: hash_set m_surfaces; + hash_map m_pages_coverage; list m_map[MAX_PAGES]; uint32 m_pages[16]; // bitmap of all pages bool m_used; @@ -108,6 +109,8 @@ public: void RemoveAll(); void RemovePartial(); void RemoveAt(Source* s); + + uint32* GetPagesCoverage(const GIFRegTEX0& TEX0, const GSOffset* off); }; protected: diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h index 0c2f968c88..19238bdb5e 100644 --- a/plugins/GSdx/stdafx.h +++ b/plugins/GSdx/stdafx.h @@ -234,15 +234,6 @@ using namespace stdext; #endif -extern string format(const char* fmt, ...); - -struct delete_object {template void operator()(T& p) {delete p;}}; -struct delete_first {template void operator()(T& p) {delete p.first;}}; -struct delete_second {template void operator()(T& p) {delete p.second;}}; -struct aligned_free_object {template void operator()(T& p) {_aligned_free(p);}}; -struct aligned_free_first {template void operator()(T& p) {_aligned_free(p.first);}}; -struct aligned_free_second {template void operator()(T& p) {_aligned_free(p.second);}}; - #define countof(a) (sizeof(a) / sizeof(a[0])) #ifndef RESTRICT @@ -409,6 +400,15 @@ struct aligned_free_second {template void operator()(T& p) {_aligned_fr #endif +extern string format(const char* fmt, ...); + +struct delete_object {template void operator()(T& p) {delete p;}}; +struct delete_first {template void operator()(T& p) {delete p.first;}}; +struct delete_second {template void operator()(T& p) {delete p.second;}}; +struct aligned_free_object {template void operator()(T& p) {_aligned_free(p);}}; +struct aligned_free_first {template void operator()(T& p) {_aligned_free(p.first);}}; +struct aligned_free_second {template void operator()(T& p) {_aligned_free(p.second);}}; + extern void* vmalloc(size_t size, bool code); extern void vmfree(void* ptr, size_t size);