GSdx: trying another approach for r4589

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4596 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-04-25 18:18:21 +00:00
parent eaaa8eef59
commit e59670ff97
6 changed files with 186 additions and 137 deletions

View File

@ -542,6 +542,72 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
return o;
}
void GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0, list<GSVector2i>* page2tile)
{
// TODO: cache this, hash = hash of o + tw + th (th not even needed, it can be 1024 always)
const GSOffset* o = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
const GSLocalMemory::psm_t& psm = m_psm[TEX0.PSM];
GSVector2i bs = psm.bs;
int tw = std::max<int>(1 << TEX0.TW, bs.x);
int th = std::max<int>(1 << TEX0.TH, bs.y);
map<uint32, hash_set<uint32> > tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
for(int y = 0; y < th; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = 0, i = y << 7; x < tw; x += bs.x, i += bs.x)
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
tmp[page].insert(i >> 3); // ((y << 7) | x) >> 3
}
}
}
// combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an uint32 array
for(map<uint32, hash_set<uint32> >::iterator i = tmp.begin(); i != tmp.end(); i++)
{
uint32 page = i->first;
const hash_set<uint32>& tiles = i->second;
hash_map<uint32, uint32> m;
for(hash_set<uint32>::iterator j = tiles.begin(); j != tiles.end(); j++)
{
uint32 addr = *j;
uint32 row = addr >> 5;
uint32 col = 1 << (addr & 31);
hash_map<uint32, uint32>::iterator k = m.find(row);
if(k != m.end())
{
k->second |= col;
}
else
{
m[row] = col;
}
}
for(hash_map<uint32, uint32>::iterator j = m.begin(); j != m.end(); j++)
{
page2tile[page].push_back(GSVector2i(j->first, j->second));
}
}
}
////////////////////
template<int psm, int bsx, int bsy, bool aligned>

View File

@ -154,6 +154,8 @@ public:
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
void GetPage2TileMap(const GIFRegTEX0& TEX0, list<GSVector2i>* page2tile); // count = 512
// address
static uint32 BlockNumber32(int x, int y, uint32 bp, uint32 bw)

View File

@ -133,7 +133,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
}
}
src->Update(TEX0, TEXA, r);
src->Update(r);
m_src.m_used = true;
@ -336,8 +336,21 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
bool b = bp == s->m_TEX0.TBP0;
if(!s->m_target)
{
if(s->m_repeating)
{
list<GSVector2i>& l = s->m_page2tile[page];
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{
s->m_valid[k->x] &= ~k->y;
}
}
else
{
s->m_valid[page] = 0;
}
s->m_complete = false;
found = b;
@ -528,10 +541,7 @@ void GSTextureCache::IncAge()
//Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work.
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst)
{
Source* src = new Source(m_renderer, m_temp);
src->m_TEX0 = TEX0;
src->m_TEXA = TEXA;
Source* src = new Source(m_renderer, TEX0, TEXA, m_temp);
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
@ -783,9 +793,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type)
{
Target* t = new Target(m_renderer, m_temp);
t->m_TEX0 = TEX0;
Target* t = new Target(m_renderer, TEX0, m_temp);
// FIXME: initial data should be unswizzled from local mem in Update() if dirty
@ -837,7 +845,7 @@ void GSTextureCache::Surface::Update()
// GSTextureCache::Source
GSTextureCache::Source::Source(GSRenderer* r, uint8* temp)
GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp)
: Surface(r, temp)
, m_palette(NULL)
, m_initpalette(true)
@ -845,6 +853,9 @@ GSTextureCache::Source::Source(GSRenderer* r, uint8* temp)
, m_target(false)
, m_complete(false)
{
m_TEX0 = TEX0;
m_TEXA = TEXA;
memset(m_valid, 0, sizeof(m_valid));
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32);
@ -853,6 +864,13 @@ GSTextureCache::Source::Source(GSRenderer* r, uint8* temp)
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32);
m_write.count = 0;
m_repeating = m_TEX0.IsRepeating();
if(m_repeating)
{
r->m_mem.GetPage2TileMap(m_TEX0, m_page2tile);
}
}
GSTextureCache::Source::~Source()
@ -864,7 +882,7 @@ GSTextureCache::Source::~Source()
_aligned_free(m_write.rect);
}
void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
void GSTextureCache::Source::Update(const GSVector4i& rect)
{
Surface::Update();
@ -873,9 +891,6 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
return;
}
m_TEX0 = TEX0;
m_TEXA = TEXA;
GSVector2i bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs;
int tw = std::max<int>(1 << m_TEX0.TW, bs.x);
@ -890,42 +905,24 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
const GSOffset* o = m_renderer->m_context->offset.tex;
bool repeating = m_TEX0.IsRepeating();
if(repeating && m_tiles.empty())
{
for(int y = 0; y < th; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = 0; x < tw; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
m_tiles[block].push_back(GSVector2i(x, y));
}
}
}
}
uint32 blocks = 0;
if(!repeating)
if(m_repeating)
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
uint32 addr = i >> 3;
uint32 row = addr >> 5;
uint32 col = 1 << (addr & 31);
if((m_valid[row] & col) == 0)
{
@ -958,19 +955,7 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
{
m_valid[row] |= col;
hash_map<uint32, list<GSVector2i> >::iterator i = m_tiles.find(block);
if(i != m_tiles.end())
{
list<GSVector2i>& l = i->second;
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{
Write(GSVector4i(j->x, j->y, j->x + bs.x, j->y + bs.y));
blocks++;
}
}
Write(GSVector4i(x, y, x + bs.x, y + bs.y));
blocks++;
}
@ -1084,11 +1069,13 @@ void GSTextureCache::Source::Flush(uint32 count)
// GSTextureCache::Target
GSTextureCache::Target::Target(GSRenderer* r, uint8* temp)
GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp)
: Surface(r, temp)
, m_type(-1)
, m_used(false)
{
m_TEX0 = TEX0;
m_valid = GSVector4i::zero();
}

View File

@ -74,13 +74,14 @@ public:
int m_fmt;
bool m_target;
bool m_complete;
hash_map<uint32, list<GSVector2i> > m_tiles;
bool m_repeating;
list<GSVector2i> m_page2tile[MAX_PAGES];
public:
Source(GSRenderer* r, uint8* temp);
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp);
virtual ~Source();
virtual void Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect);
virtual void Update(const GSVector4i& rect);
};
class Target : public Surface
@ -92,7 +93,7 @@ public:
GSVector4i m_valid;
public:
Target(GSRenderer* r, uint8* temp);
Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp);
virtual void Update();
};

View File

@ -72,7 +72,7 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
{
const GSOffset* o = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
t = new Texture(m_state, o, tw0);
t = new Texture(m_state, o, tw0, TEX0, TEXA);
m_textures.insert(t);
@ -123,7 +123,7 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
}
}
if(!t->Update(TEX0, TEXA, r))
if(!t->Update(r))
{
printf("!@#$\n"); // memory allocation may fail if the game is too hungry (tales of legendia fight transition/scene)
@ -162,8 +162,21 @@ void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& r
Texture* t = *i;
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
if(t->m_repeating)
{
list<GSVector2i>& l = t->m_page2tile[page];
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{
t->m_valid[k->x] &= ~k->y;
}
}
else
{
t->m_valid[page] = 0;
}
t->m_complete = false;
}
}
@ -220,7 +233,7 @@ void GSTextureCacheSW::IncAge()
//
GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0)
GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
: m_state(state)
, m_offset(offset)
, m_buff(NULL)
@ -228,7 +241,12 @@ GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint3
, m_age(0)
, m_complete(false)
{
m_TEX0 = TEX0;
m_TEXA = TEXA;
memset(m_valid, 0, sizeof(m_valid));
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
}
GSTextureCacheSW::Texture::~Texture()
@ -239,29 +257,24 @@ GSTextureCacheSW::Texture::~Texture()
}
}
bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{
if(m_complete)
{
return true;
}
m_TEX0 = TEX0;
m_TEXA = TEXA;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
GSVector2i bs = psm.bs;
int shift = psm.pal == 0 ? 2 : 0;
int tw = std::max<int>(1 << TEX0.TW, bs.x);
int th = std::max<int>(1 << TEX0.TH, bs.y);
int tw = std::max<int>(1 << m_TEX0.TW, bs.x);
int th = std::max<int>(1 << m_TEX0.TH, bs.y);
GSVector4i r = rect;
bool repeating = m_TEX0.IsRepeating();
r = r.ralign<Align_Outside>(bs);
if(r.eq(GSVector4i(0, 0, tw, th)))
@ -271,7 +284,7 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
if(m_buff == NULL)
{
uint32 tw0 = std::max<int>(TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
uint32 tw0 = std::max<int>(m_TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
if(m_tw == 0)
{
@ -291,28 +304,11 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
return false;
}
if(repeating)
{
// TODO: pull this from cache (hash = o->... + m_tw), need to use m_buff relative pointers then
const GSOffset* RESTRICT o = m_offset;
uint8* dst = (uint8*)m_buff;
for(int y = 0, block_pitch = pitch * bs.y; y < th; y += bs.y, dst += block_pitch)
if(m_repeating)
{
uint32 base = o->block.row[y >> 3];
for(int x = 0; x < tw; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
m_tiles[block].push_back(&dst[x << shift]);
}
}
}
m_state->m_mem.GetPage2TileMap(m_TEX0, m_page2tile);
}
}
@ -326,10 +322,39 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
uint32 pitch = (1 << m_tw) << shift;
if(!repeating)
{
uint8* dst = (uint8*)m_buff + pitch * r.top;
if(m_repeating)
{
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
uint32 addr = i >> 3;
uint32 row = addr >> 5;
uint32 col = 1 << (addr & 31);
if((m_valid[row] & col) == 0)
{
m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA);
blocks++;
}
}
}
}
}
else
{
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
{
uint32 base = o->block.row[y >> 3];
@ -347,7 +372,7 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
{
m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA);
(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA);
blocks++;
}
@ -355,43 +380,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
}
}
}
else
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
if((m_valid[row] & col) == 0)
{
m_valid[row] |= col;
hash_map<uint32, list<uint8*> >::iterator i = m_tiles.find(block);
if(i != m_tiles.end())
{
list<uint8*>& l = i->second;
for(list<uint8*>::iterator j = l.begin(); j != l.end(); j++)
{
(mem.*rtxbP)(block, *j, pitch, TEXA);
blocks++;
}
}
}
}
}
}
}
if(blocks > 0)
{

View File

@ -35,15 +35,20 @@ public:
GIFRegTEXA m_TEXA;
void* m_buff;
uint32 m_tw;
uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
uint32 m_age;
bool m_complete;
hash_map<uint32, list<uint8*> > m_tiles;
bool m_repeating;
list<GSVector2i> m_page2tile[MAX_PAGES];
uint32 m_valid[MAX_PAGES];
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0);
// m_valid
// fast mode: each uint32 bits map to the 32 blocks of that page
// repeating mode: 1 bpp image of the texture tiles (8x8), also having 512 elements is just a coincidence (worst case: (1024*1024)/(8*8)/(sizeof(uint32)*8))
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
virtual ~Texture();
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
bool Update(const GSVector4i& r);
bool Save(const string& fn, bool dds = false) const;
};