diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index a4e2789a7a..5b807350d3 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -2053,19 +2053,13 @@ GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm) pixel.col[i] = GSLocalMemory::m_psm[_psm].rowOffset[i]; } - for(int i = 0; i < 256; i++) - { - coverages[i] = nullptr; - } + pages_as_bit.fill(nullptr); } GSOffset::~GSOffset() { - for(int i = 0; i < 256; i++) - { - _aligned_free(coverages[i]); - } - + for(auto buffer: pages_as_bit) + _aligned_free(buffer); } uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bbox) @@ -2132,24 +2126,42 @@ uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bb return pages; } -GSVector4i* GSOffset::GetPagesAsBits(const GSVector4i& rect, GSVector4i* pages, GSVector4i* bbox) +uint32* GSOffset::GetPagesAsBits(const GIFRegTEX0& TEX0) { - if(pages == NULL) - { - pages = (GSVector4i*)_aligned_malloc(sizeof(GSVector4i) * 4, 16); - } + // Performance note: + // GSOffset is per bp/bw/psm + // Pages coverage depends also on TW and Th (8bits). Therefore we will save them as a small array. + // It is faster than a hash cache and it reduces the GetPagesAsBits overhead. - pages[0] = GSVector4i::zero(); - pages[1] = GSVector4i::zero(); - pages[2] = GSVector4i::zero(); - pages[3] = GSVector4i::zero(); + int hash_key = (TEX0.u64 >> 26) & 0xFF; + uint32* pages = pages_as_bit[hash_key]; + + if (pages) + return pages; + + // Aligned on 64 bytes to store the full bitmap in a single cache line + pages = (uint32*)_aligned_malloc(MAX_PAGES/8, 64); + pages_as_bit[hash_key] = pages; + + GetPagesAsBits(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH), pages); + + return pages; +} + + +void* GSOffset::GetPagesAsBits(const GSVector4i& rect, void* pages) +{ + ASSERT(pages != nullptr); + + ((GSVector4i*)pages)[0] = GSVector4i::zero(); + ((GSVector4i*)pages)[1] = GSVector4i::zero(); + ((GSVector4i*)pages)[2] = GSVector4i::zero(); + ((GSVector4i*)pages)[3] = GSVector4i::zero(); GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; GSVector4i r = rect.ralign(bs); - if(bbox != NULL) *bbox = r; - r = r.sra32(3); bs.x >>= 3; @@ -2168,5 +2180,4 @@ GSVector4i* GSOffset::GetPagesAsBits(const GSVector4i& rect, GSVector4i* pages, } return pages; - } diff --git a/plugins/GSdx/GSLocalMemory.h b/plugins/GSdx/GSLocalMemory.h index 7243fa0492..00ed960fd0 100644 --- a/plugins/GSdx/GSLocalMemory.h +++ b/plugins/GSdx/GSLocalMemory.h @@ -47,7 +47,7 @@ public: Block block; Pixel pixel; - uint32* coverages[256]; // texture page coverage based on the texture size. Lazy allocated + std::array pages_as_bit; // texture page coverage based on the texture size. Lazy allocated GSOffset(uint32 bp, uint32 bw, uint32 psm); virtual ~GSOffset(); @@ -55,7 +55,8 @@ public: enum {EOP = 0xffffffff}; uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL); - GSVector4i* GetPagesAsBits(const GSVector4i& rect, GSVector4i* pages = NULL, GSVector4i* bbox = NULL); // free returned value with _aligned_free + void* GetPagesAsBits(const GSVector4i& rect, void* pages); + uint32* GetPagesAsBits(const GIFRegTEX0& TEX0); }; struct GSPixelOffset diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index a4e2b27c56..af94aa0891 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -1607,6 +1607,9 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR { m_p2t = r->m_mem.GetPage2TileMap(m_TEX0); } + + GSOffset* off = m_renderer->m_context->offset.tex; + m_pages_as_bit = off->GetPagesAsBits(m_TEX0); } } @@ -1987,13 +1990,10 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* return; } - // Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages) - // (Maybe GetPages could be used instead, perf opt?) // The source pointer will be stored/duplicated in all m_map[array of pages] - s->m_pages_ptr = GetPagesCoverage(TEX0, off); for(size_t i = 0; i < countof(m_pages); i++) { - if(uint32 p = s->m_pages_ptr[i]) + if(uint32 p = s->m_pages_as_bit[i]) { list* m = &m_map[i << 5]; auto* e = &s->m_erase_it[i << 5]; @@ -2014,56 +2014,6 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* } } -uint32* GSTextureCache::SourceMap::GetPagesCoverage(const GIFRegTEX0& TEX0, GSOffset* off) -{ - // Performance note: - // GSOffset is a hash lookup of the following parameter TB0, TBW, PSM - // Coverage adds TW and Th (8bits). Therefore GSOffset was extended with a small array. - // Avoid the hash map overhead (memory and lookup) - - int index = (TEX0.u64 >> 26) & 0xFF; - - if (off->coverages[index]) - return off->coverages[index]; - - // Aligned on 64 bytes to store the full bitmap in a single cache line - uint32* pages = (uint32*)_aligned_malloc(MAX_PAGES/8, 64); - - off->coverages[index] = pages; - - ((GSVector4i*)pages)[0] = GSVector4i::zero(); - ((GSVector4i*)pages)[1] = GSVector4i::zero(); - ((GSVector4i*)pages)[2] = GSVector4i::zero(); - ((GSVector4i*)pages)[3] = GSVector4i::zero(); - - // Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages) - // (Maybe GetPages could be used instead, perf opt?) - // The source pointer will be stored/duplicated in all m_map[array of pages] - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; - - GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs; - - int tw = 1 << TEX0.TW; - int th = 1 << TEX0.TH; - - for(int y = 0; y < th; y += bs.y) - { - uint32 base = off->block.row[y >> 3]; - - for(int x = 0; x < tw; x += bs.x) - { - uint32 page = (base + off->block.col[x >> 3]) >> 5; - - if(page < MAX_PAGES) - { - pages[page >> 5] |= 1 << (page & 31); - } - } - } - - return pages; -} - void GSTextureCache::SourceMap::RemoveAll() { for_each(m_surfaces.begin(), m_surfaces.end(), delete_object()); @@ -2092,10 +2042,9 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) } else { - // Mirror of GetPagesCoverage for(size_t i = 0; i < countof(m_pages); i++) { - if(uint32 p = s->m_pages_ptr[i]) + if(uint32 p = s->m_pages_as_bit[i]) { list* m = &m_map[i << 5]; auto* e = &s->m_erase_it[i << 5]; diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 1b9e0bb93b..4d87b70bea 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -74,7 +74,7 @@ public: GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value // Keep an GSTextureCache::m_map iterator to allow fast erase std::array::iterator, MAX_PAGES> m_erase_it; - uint32* m_pages_ptr; + uint32* m_pages_as_bit; public: Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false); @@ -108,7 +108,6 @@ public: { public: hash_set m_surfaces; - hash_map m_pages_coverage; std::list m_map[MAX_PAGES]; uint32 m_pages[16]; // bitmap of all pages bool m_used; @@ -119,8 +118,6 @@ public: void RemoveAll(); void RemovePartial(); void RemoveAt(Source* s); - - uint32* GetPagesCoverage(const GIFRegTEX0& TEX0, GSOffset* off); }; protected: diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index 87d58f5e64..6a9b3de67d 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -179,20 +179,13 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& } memset(m_valid, 0, sizeof(m_valid)); - memset(m_pages.bm, 0, sizeof(m_pages.bm)); m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM); m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); - - for(const uint32* p = m_pages.n; *p != GSOffset::EOP; p++) - { - uint32 page = *p; - - m_pages.bm[page >> 5] |= 1 << (page & 31); - } + memcpy(m_pages.bm, m_offset->GetPagesAsBits(TEX0), sizeof(m_pages.bm)); m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower