GSdx: implemented the caching of the new page/tile map, dq8 should be fast again

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4597 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-04-26 00:56:54 +00:00
parent e59670ff97
commit fd0245d365
6 changed files with 42 additions and 26 deletions

View File

@ -448,6 +448,11 @@ GSLocalMemory::~GSLocalMemory()
for_each(m_omap.begin(), m_omap.end(), aligned_free_second());
for_each(m_po4map.begin(), m_po4map.end(), aligned_free_second());
for(hash_map<uint32, list<GSVector2i>*>::iterator i = m_p2tmap.begin(); i != m_p2tmap.end(); i++)
{
delete [] i->second;
}
}
GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
@ -542,20 +547,25 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
return o;
}
void GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0, list<GSVector2i>* page2tile)
list<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
{
// TODO: cache this, hash = hash of o + tw + th (th not even needed, it can be 1024 always)
uint32 hash = TEX0.TBP0 | (TEX0.TBW << 14) | (TEX0.PSM << 20) | (TEX0.TW << 26);
const GSOffset* o = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
hash_map<uint32, list<GSVector2i>*>::iterator i = m_p2tmap.find(hash);
const GSLocalMemory::psm_t& psm = m_psm[TEX0.PSM];
if(i != m_p2tmap.end())
{
return i->second;
}
GSVector2i bs = psm.bs;
GSVector2i bs = m_psm[TEX0.PSM].bs;
int tw = std::max<int>(1 << TEX0.TW, bs.x);
int th = std::max<int>(1 << TEX0.TH, bs.y);
map<uint32, hash_set<uint32> > tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
const GSOffset* o = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
hash_map<uint32, hash_set<uint32> > tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
for(int y = 0; y < th; y += bs.y)
{
@ -574,11 +584,13 @@ void GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0, list<GSVector2i>* pa
// combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an uint32 array
for(map<uint32, hash_set<uint32> >::iterator i = tmp.begin(); i != tmp.end(); i++)
list<GSVector2i>* p2t = new list<GSVector2i>[MAX_PAGES];
for(hash_map<uint32, hash_set<uint32> >::iterator i = tmp.begin(); i != tmp.end(); i++)
{
uint32 page = i->first;
const hash_set<uint32>& tiles = i->second;
hash_set<uint32>& tiles = i->second;
hash_map<uint32, uint32> m;
@ -603,9 +615,13 @@ void GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0, list<GSVector2i>* pa
for(hash_map<uint32, uint32>::iterator j = m.begin(); j != m.end(); j++)
{
page2tile[page].push_back(GSVector2i(j->first, j->second));
p2t[page].push_back(GSVector2i(j->first, j->second));
}
}
m_p2tmap[hash] = p2t;
return p2t;
}
////////////////////

View File

@ -146,6 +146,7 @@ protected:
hash_map<uint32, GSOffset*> m_omap;
hash_map<uint32, GSPixelOffset4*> m_po4map;
hash_map<uint32, list<GSVector2i>*> m_p2tmap;
public:
GSLocalMemory();
@ -153,8 +154,7 @@ public:
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
void GetPage2TileMap(const GIFRegTEX0& TEX0, list<GSVector2i>* page2tile); // count = 512
list<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
// address

View File

@ -339,7 +339,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
{
if(s->m_repeating)
{
list<GSVector2i>& l = s->m_page2tile[page];
list<GSVector2i>& l = s->m_p2t[page];
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{
@ -852,6 +852,7 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR
, m_fmt(0)
, m_target(false)
, m_complete(false)
, m_p2t(NULL)
{
m_TEX0 = TEX0;
m_TEXA = TEXA;
@ -869,7 +870,7 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR
if(m_repeating)
{
r->m_mem.GetPage2TileMap(m_TEX0, m_page2tile);
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
}
}

View File

@ -75,7 +75,7 @@ public:
bool m_target;
bool m_complete;
bool m_repeating;
list<GSVector2i> m_page2tile[MAX_PAGES];
list<GSVector2i>* m_p2t;
public:
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp);

View File

@ -165,11 +165,11 @@ void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& r
{
if(t->m_repeating)
{
list<GSVector2i>& l = t->m_page2tile[page];
list<GSVector2i>& l = t->m_p2t[page];
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{
t->m_valid[k->x] &= ~k->y;
t->m_valid[j->x] &= ~j->y;
}
}
else
@ -240,6 +240,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint3
, m_tw(tw0)
, m_age(0)
, m_complete(false)
, m_p2t(NULL)
{
m_TEX0 = TEX0;
m_TEXA = TEXA;
@ -247,6 +248,11 @@ GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint3
memset(m_valid, 0, sizeof(m_valid));
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
if(m_repeating)
{
m_p2t = m_state->m_mem.GetPage2TileMap(m_TEX0);
}
}
GSTextureCacheSW::Texture::~Texture()
@ -284,7 +290,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
if(m_buff == NULL)
{
uint32 tw0 = std::max<int>(m_TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
uint32 tw0 = std::max<int>(m_TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
if(m_tw == 0)
{
@ -303,13 +309,6 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{
return false;
}
const GSOffset* RESTRICT o = m_offset;
if(m_repeating)
{
m_state->m_mem.GetPage2TileMap(m_TEX0, m_page2tile);
}
}
GSLocalMemory& mem = m_state->m_mem;

View File

@ -38,7 +38,7 @@ public:
uint32 m_age;
bool m_complete;
bool m_repeating;
list<GSVector2i> m_page2tile[MAX_PAGES];
list<GSVector2i>* m_p2t;
uint32 m_valid[MAX_PAGES];
// m_valid