mirror of https://github.com/PCSX2/pcsx2.git
gsdx tc: merge page coverage code
A function was already done for openCL. Use the same for others renderers
This commit is contained in:
parent
5c7c9452d6
commit
d67b9cba14
|
@ -2053,19 +2053,13 @@ GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
|
||||||
pixel.col[i] = GSLocalMemory::m_psm[_psm].rowOffset[i];
|
pixel.col[i] = GSLocalMemory::m_psm[_psm].rowOffset[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i = 0; i < 256; i++)
|
pages_as_bit.fill(nullptr);
|
||||||
{
|
|
||||||
coverages[i] = nullptr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GSOffset::~GSOffset()
|
GSOffset::~GSOffset()
|
||||||
{
|
{
|
||||||
for(int i = 0; i < 256; i++)
|
for(auto buffer: pages_as_bit)
|
||||||
{
|
_aligned_free(buffer);
|
||||||
_aligned_free(coverages[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bbox)
|
uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bbox)
|
||||||
|
@ -2132,24 +2126,42 @@ uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bb
|
||||||
return pages;
|
return pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVector4i* GSOffset::GetPagesAsBits(const GSVector4i& rect, GSVector4i* pages, GSVector4i* bbox)
|
uint32* GSOffset::GetPagesAsBits(const GIFRegTEX0& TEX0)
|
||||||
{
|
{
|
||||||
if(pages == NULL)
|
// Performance note:
|
||||||
{
|
// GSOffset is per bp/bw/psm
|
||||||
pages = (GSVector4i*)_aligned_malloc(sizeof(GSVector4i) * 4, 16);
|
// Pages coverage depends also on TW and Th (8bits). Therefore we will save them as a small array.
|
||||||
}
|
// It is faster than a hash cache and it reduces the GetPagesAsBits overhead.
|
||||||
|
|
||||||
pages[0] = GSVector4i::zero();
|
int hash_key = (TEX0.u64 >> 26) & 0xFF;
|
||||||
pages[1] = GSVector4i::zero();
|
uint32* pages = pages_as_bit[hash_key];
|
||||||
pages[2] = GSVector4i::zero();
|
|
||||||
pages[3] = GSVector4i::zero();
|
if (pages)
|
||||||
|
return pages;
|
||||||
|
|
||||||
|
// Aligned on 64 bytes to store the full bitmap in a single cache line
|
||||||
|
pages = (uint32*)_aligned_malloc(MAX_PAGES/8, 64);
|
||||||
|
pages_as_bit[hash_key] = pages;
|
||||||
|
|
||||||
|
GetPagesAsBits(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH), pages);
|
||||||
|
|
||||||
|
return pages;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void* GSOffset::GetPagesAsBits(const GSVector4i& rect, void* pages)
|
||||||
|
{
|
||||||
|
ASSERT(pages != nullptr);
|
||||||
|
|
||||||
|
((GSVector4i*)pages)[0] = GSVector4i::zero();
|
||||||
|
((GSVector4i*)pages)[1] = GSVector4i::zero();
|
||||||
|
((GSVector4i*)pages)[2] = GSVector4i::zero();
|
||||||
|
((GSVector4i*)pages)[3] = GSVector4i::zero();
|
||||||
|
|
||||||
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
||||||
|
|
||||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||||
|
|
||||||
if(bbox != NULL) *bbox = r;
|
|
||||||
|
|
||||||
r = r.sra32(3);
|
r = r.sra32(3);
|
||||||
|
|
||||||
bs.x >>= 3;
|
bs.x >>= 3;
|
||||||
|
@ -2168,5 +2180,4 @@ GSVector4i* GSOffset::GetPagesAsBits(const GSVector4i& rect, GSVector4i* pages,
|
||||||
}
|
}
|
||||||
|
|
||||||
return pages;
|
return pages;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ public:
|
||||||
Block block;
|
Block block;
|
||||||
Pixel pixel;
|
Pixel pixel;
|
||||||
|
|
||||||
uint32* coverages[256]; // texture page coverage based on the texture size. Lazy allocated
|
std::array<uint32*,256> pages_as_bit; // texture page coverage based on the texture size. Lazy allocated
|
||||||
|
|
||||||
GSOffset(uint32 bp, uint32 bw, uint32 psm);
|
GSOffset(uint32 bp, uint32 bw, uint32 psm);
|
||||||
virtual ~GSOffset();
|
virtual ~GSOffset();
|
||||||
|
@ -55,7 +55,8 @@ public:
|
||||||
enum {EOP = 0xffffffff};
|
enum {EOP = 0xffffffff};
|
||||||
|
|
||||||
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
|
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
|
||||||
GSVector4i* GetPagesAsBits(const GSVector4i& rect, GSVector4i* pages = NULL, GSVector4i* bbox = NULL); // free returned value with _aligned_free
|
void* GetPagesAsBits(const GSVector4i& rect, void* pages);
|
||||||
|
uint32* GetPagesAsBits(const GIFRegTEX0& TEX0);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GSPixelOffset
|
struct GSPixelOffset
|
||||||
|
|
|
@ -1607,6 +1607,9 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR
|
||||||
{
|
{
|
||||||
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
|
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GSOffset* off = m_renderer->m_context->offset.tex;
|
||||||
|
m_pages_as_bit = off->GetPagesAsBits(m_TEX0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1987,13 +1990,10 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages)
|
|
||||||
// (Maybe GetPages could be used instead, perf opt?)
|
|
||||||
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
||||||
s->m_pages_ptr = GetPagesCoverage(TEX0, off);
|
|
||||||
for(size_t i = 0; i < countof(m_pages); i++)
|
for(size_t i = 0; i < countof(m_pages); i++)
|
||||||
{
|
{
|
||||||
if(uint32 p = s->m_pages_ptr[i])
|
if(uint32 p = s->m_pages_as_bit[i])
|
||||||
{
|
{
|
||||||
list<Source*>* m = &m_map[i << 5];
|
list<Source*>* m = &m_map[i << 5];
|
||||||
auto* e = &s->m_erase_it[i << 5];
|
auto* e = &s->m_erase_it[i << 5];
|
||||||
|
@ -2014,56 +2014,6 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32* GSTextureCache::SourceMap::GetPagesCoverage(const GIFRegTEX0& TEX0, GSOffset* off)
|
|
||||||
{
|
|
||||||
// Performance note:
|
|
||||||
// GSOffset is a hash lookup of the following parameter TB0, TBW, PSM
|
|
||||||
// Coverage adds TW and Th (8bits). Therefore GSOffset was extended with a small array.
|
|
||||||
// Avoid the hash map overhead (memory and lookup)
|
|
||||||
|
|
||||||
int index = (TEX0.u64 >> 26) & 0xFF;
|
|
||||||
|
|
||||||
if (off->coverages[index])
|
|
||||||
return off->coverages[index];
|
|
||||||
|
|
||||||
// Aligned on 64 bytes to store the full bitmap in a single cache line
|
|
||||||
uint32* pages = (uint32*)_aligned_malloc(MAX_PAGES/8, 64);
|
|
||||||
|
|
||||||
off->coverages[index] = pages;
|
|
||||||
|
|
||||||
((GSVector4i*)pages)[0] = GSVector4i::zero();
|
|
||||||
((GSVector4i*)pages)[1] = GSVector4i::zero();
|
|
||||||
((GSVector4i*)pages)[2] = GSVector4i::zero();
|
|
||||||
((GSVector4i*)pages)[3] = GSVector4i::zero();
|
|
||||||
|
|
||||||
// Remaining code will compute a list of pages that are dirty (in a similar fashion as GSOffset::GetPages)
|
|
||||||
// (Maybe GetPages could be used instead, perf opt?)
|
|
||||||
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
|
||||||
|
|
||||||
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
|
|
||||||
|
|
||||||
int tw = 1 << TEX0.TW;
|
|
||||||
int th = 1 << TEX0.TH;
|
|
||||||
|
|
||||||
for(int y = 0; y < th; y += bs.y)
|
|
||||||
{
|
|
||||||
uint32 base = off->block.row[y >> 3];
|
|
||||||
|
|
||||||
for(int x = 0; x < tw; x += bs.x)
|
|
||||||
{
|
|
||||||
uint32 page = (base + off->block.col[x >> 3]) >> 5;
|
|
||||||
|
|
||||||
if(page < MAX_PAGES)
|
|
||||||
{
|
|
||||||
pages[page >> 5] |= 1 << (page & 31);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return pages;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSTextureCache::SourceMap::RemoveAll()
|
void GSTextureCache::SourceMap::RemoveAll()
|
||||||
{
|
{
|
||||||
for_each(m_surfaces.begin(), m_surfaces.end(), delete_object());
|
for_each(m_surfaces.begin(), m_surfaces.end(), delete_object());
|
||||||
|
@ -2092,10 +2042,9 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Mirror of GetPagesCoverage
|
|
||||||
for(size_t i = 0; i < countof(m_pages); i++)
|
for(size_t i = 0; i < countof(m_pages); i++)
|
||||||
{
|
{
|
||||||
if(uint32 p = s->m_pages_ptr[i])
|
if(uint32 p = s->m_pages_as_bit[i])
|
||||||
{
|
{
|
||||||
list<Source*>* m = &m_map[i << 5];
|
list<Source*>* m = &m_map[i << 5];
|
||||||
auto* e = &s->m_erase_it[i << 5];
|
auto* e = &s->m_erase_it[i << 5];
|
||||||
|
|
|
@ -74,7 +74,7 @@ public:
|
||||||
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
|
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
|
||||||
// Keep an GSTextureCache::m_map iterator to allow fast erase
|
// Keep an GSTextureCache::m_map iterator to allow fast erase
|
||||||
std::array<std::list<Source*>::iterator, MAX_PAGES> m_erase_it;
|
std::array<std::list<Source*>::iterator, MAX_PAGES> m_erase_it;
|
||||||
uint32* m_pages_ptr;
|
uint32* m_pages_as_bit;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false);
|
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false);
|
||||||
|
@ -108,7 +108,6 @@ public:
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
hash_set<Source*> m_surfaces;
|
hash_set<Source*> m_surfaces;
|
||||||
hash_map<uint64, uint32*> m_pages_coverage;
|
|
||||||
std::list<Source*> m_map[MAX_PAGES];
|
std::list<Source*> m_map[MAX_PAGES];
|
||||||
uint32 m_pages[16]; // bitmap of all pages
|
uint32 m_pages[16]; // bitmap of all pages
|
||||||
bool m_used;
|
bool m_used;
|
||||||
|
@ -119,8 +118,6 @@ public:
|
||||||
void RemoveAll();
|
void RemoveAll();
|
||||||
void RemovePartial();
|
void RemovePartial();
|
||||||
void RemoveAt(Source* s);
|
void RemoveAt(Source* s);
|
||||||
|
|
||||||
uint32* GetPagesCoverage(const GIFRegTEX0& TEX0, GSOffset* off);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -179,20 +179,13 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(m_valid, 0, sizeof(m_valid));
|
memset(m_valid, 0, sizeof(m_valid));
|
||||||
memset(m_pages.bm, 0, sizeof(m_pages.bm));
|
|
||||||
|
|
||||||
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
|
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
|
||||||
|
|
||||||
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
|
|
||||||
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||||
|
memcpy(m_pages.bm, m_offset->GetPagesAsBits(TEX0), sizeof(m_pages.bm));
|
||||||
for(const uint32* p = m_pages.n; *p != GSOffset::EOP; p++)
|
|
||||||
{
|
|
||||||
uint32 page = *p;
|
|
||||||
|
|
||||||
m_pages.bm[page >> 5] |= 1 << (page & 31);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
|
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue