mirror of https://github.com/PCSX2/pcsx2.git
gsdx tc: reduce texture cache overhead
Cache page coverage of texture into a hash map Test done on Champion of Norrath (paltex + DisablePartialInvalidation) Profiler: Self of GSTextureCache::SourceMap::Add 5.39% => 0.23% Self of GSTextureCache::LookupSource 15.27% => 10.82% Hard to measure on CoN as it depends on memory transfer. Seem to be 5-10 fps faster.
This commit is contained in:
parent
67e955919f
commit
fa1377a8ee
|
@ -1761,6 +1761,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSO
|
|||
// Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages)
|
||||
// (Maybe GetPages could be used instead, perf opt?)
|
||||
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
||||
#if 0
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
|
||||
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
|
||||
|
@ -1801,6 +1802,71 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSO
|
|||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
uint32* pages = GetPagesCoverage(TEX0, off);
|
||||
for(size_t i = 0; i < countof(m_pages); i++)
|
||||
{
|
||||
if(uint32 p = pages[i])
|
||||
{
|
||||
list<Source*>* m = &m_map[i << 5];
|
||||
|
||||
unsigned long j;
|
||||
|
||||
while(_BitScanForward(&j, p))
|
||||
{
|
||||
p ^= 1 << j;
|
||||
|
||||
m[j].push_front(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32* GSTextureCache::SourceMap::GetPagesCoverage(const GIFRegTEX0& TEX0, const GSOffset* off)
|
||||
{
|
||||
uint64 hash = TEX0.u64 & 0x3ffffffffull; // TBP0 TBW PSM TW TH
|
||||
|
||||
auto it_pages = m_pages_coverage.find(hash);
|
||||
|
||||
if (it_pages != m_pages_coverage.end()) return it_pages->second;
|
||||
|
||||
// Aligned on 64 bytes to store the full bitmap in a single cache line
|
||||
uint32* pages = (uint32*)_aligned_malloc(MAX_PAGES/8, 64);
|
||||
|
||||
m_pages_coverage.emplace(hash, pages);
|
||||
|
||||
((GSVector4i*)pages)[0] = GSVector4i::zero();
|
||||
((GSVector4i*)pages)[1] = GSVector4i::zero();
|
||||
((GSVector4i*)pages)[2] = GSVector4i::zero();
|
||||
((GSVector4i*)pages)[3] = GSVector4i::zero();
|
||||
|
||||
// Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages)
|
||||
// (Maybe GetPages could be used instead, perf opt?)
|
||||
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
|
||||
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
|
||||
|
||||
int tw = 1 << TEX0.TW;
|
||||
int th = 1 << TEX0.TH;
|
||||
|
||||
for(int y = 0; y < th; y += bs.y)
|
||||
{
|
||||
uint32 base = off->block.row[y >> 3];
|
||||
|
||||
for(int x = 0; x < tw; x += bs.x)
|
||||
{
|
||||
uint32 page = (base + off->block.col[x >> 3]) >> 5;
|
||||
|
||||
if(page < MAX_PAGES)
|
||||
{
|
||||
pages[page >> 5] |= 1 << (page & 31);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
||||
void GSTextureCache::SourceMap::RemoveAll()
|
||||
|
@ -1813,6 +1879,9 @@ void GSTextureCache::SourceMap::RemoveAll()
|
|||
{
|
||||
m_map[i].clear();
|
||||
}
|
||||
|
||||
for_each(m_pages_coverage.begin(), m_pages_coverage.end(), aligned_free_second());
|
||||
m_pages_coverage.clear();
|
||||
}
|
||||
|
||||
void GSTextureCache::SourceMap::RemoveAt(Source* s)
|
||||
|
|
|
@ -98,6 +98,7 @@ public:
|
|||
{
|
||||
public:
|
||||
hash_set<Source*> m_surfaces;
|
||||
hash_map<uint64, uint32*> m_pages_coverage;
|
||||
list<Source*> m_map[MAX_PAGES];
|
||||
uint32 m_pages[16]; // bitmap of all pages
|
||||
bool m_used;
|
||||
|
@ -108,6 +109,8 @@ public:
|
|||
void RemoveAll();
|
||||
void RemovePartial();
|
||||
void RemoveAt(Source* s);
|
||||
|
||||
uint32* GetPagesCoverage(const GIFRegTEX0& TEX0, const GSOffset* off);
|
||||
};
|
||||
|
||||
protected:
|
||||
|
|
|
@ -234,15 +234,6 @@ using namespace stdext;
|
|||
|
||||
#endif
|
||||
|
||||
extern string format(const char* fmt, ...);
|
||||
|
||||
struct delete_object {template<class T> void operator()(T& p) {delete p;}};
|
||||
struct delete_first {template<class T> void operator()(T& p) {delete p.first;}};
|
||||
struct delete_second {template<class T> void operator()(T& p) {delete p.second;}};
|
||||
struct aligned_free_object {template<class T> void operator()(T& p) {_aligned_free(p);}};
|
||||
struct aligned_free_first {template<class T> void operator()(T& p) {_aligned_free(p.first);}};
|
||||
struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_free(p.second);}};
|
||||
|
||||
#define countof(a) (sizeof(a) / sizeof(a[0]))
|
||||
|
||||
#ifndef RESTRICT
|
||||
|
@ -409,6 +400,15 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
|
|||
|
||||
#endif
|
||||
|
||||
extern string format(const char* fmt, ...);
|
||||
|
||||
struct delete_object {template<class T> void operator()(T& p) {delete p;}};
|
||||
struct delete_first {template<class T> void operator()(T& p) {delete p.first;}};
|
||||
struct delete_second {template<class T> void operator()(T& p) {delete p.second;}};
|
||||
struct aligned_free_object {template<class T> void operator()(T& p) {_aligned_free(p);}};
|
||||
struct aligned_free_first {template<class T> void operator()(T& p) {_aligned_free(p.first);}};
|
||||
struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_free(p.second);}};
|
||||
|
||||
extern void* vmalloc(size_t size, bool code);
|
||||
extern void vmfree(void* ptr, size_t size);
|
||||
|
||||
|
|
Loading…
Reference in New Issue