From e59670ff97cb3fd22cb58035c8b4cdf9f53095b7 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Mon, 25 Apr 2011 18:18:21 +0000 Subject: [PATCH] GSdx: trying another approach for r4589 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4596 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSLocalMemory.cpp | 66 ++++++++++++++ plugins/GSdx/GSLocalMemory.h | 2 + plugins/GSdx/GSTextureCache.cpp | 91 ++++++++----------- plugins/GSdx/GSTextureCache.h | 9 +- plugins/GSdx/GSTextureCacheSW.cpp | 142 ++++++++++++++---------------- plugins/GSdx/GSTextureCacheSW.h | 13 ++- 6 files changed, 186 insertions(+), 137 deletions(-) diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index 8df9b14164..4f663509f3 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -542,6 +542,72 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G return o; } +void GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0, list* page2tile) +{ + // TODO: cache this, hash = hash of o + tw + th (th not even needed, it can be 1024 always) + + const GSOffset* o = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); + + const GSLocalMemory::psm_t& psm = m_psm[TEX0.PSM]; + + GSVector2i bs = psm.bs; + + int tw = std::max(1 << TEX0.TW, bs.x); + int th = std::max(1 << TEX0.TH, bs.y); + + map > tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks) + + for(int y = 0; y < th; y += bs.y) + { + uint32 base = o->block.row[y >> 3]; + + for(int x = 0, i = y << 7; x < tw; x += bs.x, i += bs.x) + { + uint32 page = (base + o->block.col[x >> 3]) >> 5; + + if(page < MAX_PAGES) + { + tmp[page].insert(i >> 3); // ((y << 7) | x) >> 3 + } + } + } + + // combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an uint32 array + + for(map >::iterator i = tmp.begin(); i != tmp.end(); i++) + { + uint32 page = i->first; + + const hash_set& tiles = i->second; + + hash_map m; + + for(hash_set::iterator j = tiles.begin(); j != tiles.end(); j++) + { + uint32 addr = *j; + + uint32 row = addr >> 5; + uint32 col = 1 << (addr & 31); + + hash_map::iterator k = m.find(row); + + if(k != m.end()) + { + k->second |= col; + } + else + { + m[row] = col; + } + } + + for(hash_map::iterator j = m.begin(); j != m.end(); j++) + { + page2tile[page].push_back(GSVector2i(j->first, j->second)); + } + } +} + //////////////////// template diff --git a/plugins/GSdx/GSLocalMemory.h b/plugins/GSdx/GSLocalMemory.h index 1bd4943e71..ec953258d5 100644 --- a/plugins/GSdx/GSLocalMemory.h +++ b/plugins/GSdx/GSLocalMemory.h @@ -154,6 +154,8 @@ public: GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm); GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); + void GetPage2TileMap(const GIFRegTEX0& TEX0, list* page2tile); // count = 512 + // address static uint32 BlockNumber32(int x, int y, uint32 bp, uint32 bw) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 249b756c61..5f56e80fcd 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -133,7 +133,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con } } - src->Update(TEX0, TEXA, r); + src->Update(r); m_src.m_used = true; @@ -337,7 +337,20 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec if(!s->m_target) { - s->m_valid[page] = 0; + if(s->m_repeating) + { + list& l = s->m_page2tile[page]; + + for(list::iterator k = l.begin(); k != l.end(); k++) + { + s->m_valid[k->x] &= ~k->y; + } + } + else + { + s->m_valid[page] = 0; + } + s->m_complete = false; found = b; @@ -528,10 +541,7 @@ void GSTextureCache::IncAge() //Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work. GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst) { - Source* src = new Source(m_renderer, m_temp); - - src->m_TEX0 = TEX0; - src->m_TEXA = TEXA; + Source* src = new Source(m_renderer, TEX0, TEXA, m_temp); int tw = 1 << TEX0.TW; int th = 1 << TEX0.TH; @@ -783,9 +793,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type) { - Target* t = new Target(m_renderer, m_temp); - - t->m_TEX0 = TEX0; + Target* t = new Target(m_renderer, TEX0, m_temp); // FIXME: initial data should be unswizzled from local mem in Update() if dirty @@ -837,7 +845,7 @@ void GSTextureCache::Surface::Update() // GSTextureCache::Source -GSTextureCache::Source::Source(GSRenderer* r, uint8* temp) +GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp) : Surface(r, temp) , m_palette(NULL) , m_initpalette(true) @@ -845,6 +853,9 @@ GSTextureCache::Source::Source(GSRenderer* r, uint8* temp) , m_target(false) , m_complete(false) { + m_TEX0 = TEX0; + m_TEXA = TEXA; + memset(m_valid, 0, sizeof(m_valid)); m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32); @@ -853,6 +864,13 @@ GSTextureCache::Source::Source(GSRenderer* r, uint8* temp) m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32); m_write.count = 0; + + m_repeating = m_TEX0.IsRepeating(); + + if(m_repeating) + { + r->m_mem.GetPage2TileMap(m_TEX0, m_page2tile); + } } GSTextureCache::Source::~Source() @@ -864,7 +882,7 @@ GSTextureCache::Source::~Source() _aligned_free(m_write.rect); } -void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect) +void GSTextureCache::Source::Update(const GSVector4i& rect) { Surface::Update(); @@ -873,9 +891,6 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE return; } - m_TEX0 = TEX0; - m_TEXA = TEXA; - GSVector2i bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs; int tw = std::max(1 << m_TEX0.TW, bs.x); @@ -890,42 +905,24 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE const GSOffset* o = m_renderer->m_context->offset.tex; - bool repeating = m_TEX0.IsRepeating(); - - if(repeating && m_tiles.empty()) - { - for(int y = 0; y < th; y += bs.y) - { - uint32 base = o->block.row[y >> 3]; - - for(int x = 0; x < tw; x += bs.x) - { - uint32 block = base + o->block.col[x >> 3]; - - if(block < MAX_BLOCKS) - { - m_tiles[block].push_back(GSVector2i(x, y)); - } - } - } - } - uint32 blocks = 0; - if(!repeating) + if(m_repeating) { for(int y = r.top; y < r.bottom; y += bs.y) { uint32 base = o->block.row[y >> 3]; - for(int x = r.left; x < r.right; x += bs.x) + for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) { uint32 block = base + o->block.col[x >> 3]; if(block < MAX_BLOCKS) { - uint32 row = block >> 5; - uint32 col = 1 << (block & 31); + uint32 addr = i >> 3; + + uint32 row = addr >> 5; + uint32 col = 1 << (addr & 31); if((m_valid[row] & col) == 0) { @@ -958,19 +955,7 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE { m_valid[row] |= col; - hash_map >::iterator i = m_tiles.find(block); - - if(i != m_tiles.end()) - { - list& l = i->second; - - for(list::iterator j = l.begin(); j != l.end(); j++) - { - Write(GSVector4i(j->x, j->y, j->x + bs.x, j->y + bs.y)); - - blocks++; - } - } + Write(GSVector4i(x, y, x + bs.x, y + bs.y)); blocks++; } @@ -1084,11 +1069,13 @@ void GSTextureCache::Source::Flush(uint32 count) // GSTextureCache::Target -GSTextureCache::Target::Target(GSRenderer* r, uint8* temp) +GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp) : Surface(r, temp) , m_type(-1) , m_used(false) { + m_TEX0 = TEX0; + m_valid = GSVector4i::zero(); } diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 48290b0eac..840db2321f 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -74,13 +74,14 @@ public: int m_fmt; bool m_target; bool m_complete; - hash_map > m_tiles; + bool m_repeating; + list m_page2tile[MAX_PAGES]; public: - Source(GSRenderer* r, uint8* temp); + Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp); virtual ~Source(); - virtual void Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect); + virtual void Update(const GSVector4i& rect); }; class Target : public Surface @@ -92,7 +93,7 @@ public: GSVector4i m_valid; public: - Target(GSRenderer* r, uint8* temp); + Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp); virtual void Update(); }; diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index dfcec54690..cffdf8c334 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -72,7 +72,7 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0 { const GSOffset* o = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - t = new Texture(m_state, o, tw0); + t = new Texture(m_state, o, tw0, TEX0, TEXA); m_textures.insert(t); @@ -123,7 +123,7 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0 } } - if(!t->Update(TEX0, TEXA, r)) + if(!t->Update(r)) { printf("!@#$\n"); // memory allocation may fail if the game is too hungry (tales of legendia fight transition/scene) @@ -163,7 +163,20 @@ void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& r if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM)) { - t->m_valid[page] = 0; + if(t->m_repeating) + { + list& l = t->m_page2tile[page]; + + for(list::iterator k = l.begin(); k != l.end(); k++) + { + t->m_valid[k->x] &= ~k->y; + } + } + else + { + t->m_valid[page] = 0; + } + t->m_complete = false; } } @@ -220,7 +233,7 @@ void GSTextureCacheSW::IncAge() // -GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0) +GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) : m_state(state) , m_offset(offset) , m_buff(NULL) @@ -228,7 +241,12 @@ GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint3 , m_age(0) , m_complete(false) { + m_TEX0 = TEX0; + m_TEXA = TEXA; + memset(m_valid, 0, sizeof(m_valid)); + + m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower } GSTextureCacheSW::Texture::~Texture() @@ -239,29 +257,24 @@ GSTextureCacheSW::Texture::~Texture() } } -bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect) +bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) { if(m_complete) { return true; } - m_TEX0 = TEX0; - m_TEXA = TEXA; - - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; GSVector2i bs = psm.bs; int shift = psm.pal == 0 ? 2 : 0; - int tw = std::max(1 << TEX0.TW, bs.x); - int th = std::max(1 << TEX0.TH, bs.y); + int tw = std::max(1 << m_TEX0.TW, bs.x); + int th = std::max(1 << m_TEX0.TH, bs.y); GSVector4i r = rect; - bool repeating = m_TEX0.IsRepeating(); - r = r.ralign(bs); if(r.eq(GSVector4i(0, 0, tw, th))) @@ -271,7 +284,7 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& if(m_buff == NULL) { - uint32 tw0 = std::max(TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff + uint32 tw0 = std::max(m_TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff if(m_tw == 0) { @@ -291,29 +304,12 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& return false; } - if(repeating) + const GSOffset* RESTRICT o = m_offset; + + if(m_repeating) { - // TODO: pull this from cache (hash = o->... + m_tw), need to use m_buff relative pointers then - - const GSOffset* RESTRICT o = m_offset; - - uint8* dst = (uint8*)m_buff; - - for(int y = 0, block_pitch = pitch * bs.y; y < th; y += bs.y, dst += block_pitch) - { - uint32 base = o->block.row[y >> 3]; - - for(int x = 0; x < tw; x += bs.x) - { - uint32 block = base + o->block.col[x >> 3]; - - if(block < MAX_BLOCKS) - { - m_tiles[block].push_back(&dst[x << shift]); - } - } - } - } + m_state->m_mem.GetPage2TileMap(m_TEX0, m_page2tile); + } } GSLocalMemory& mem = m_state->m_mem; @@ -326,10 +322,39 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& uint32 pitch = (1 << m_tw) << shift; - if(!repeating) - { - uint8* dst = (uint8*)m_buff + pitch * r.top; + uint8* dst = (uint8*)m_buff + pitch * r.top; + if(m_repeating) + { + for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch) + { + uint32 base = o->block.row[y >> 3]; + + for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) + { + uint32 block = base + o->block.col[x >> 3]; + + if(block < MAX_BLOCKS) + { + uint32 addr = i >> 3; + + uint32 row = addr >> 5; + uint32 col = 1 << (addr & 31); + + if((m_valid[row] & col) == 0) + { + m_valid[row] |= col; + + (mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA); + + blocks++; + } + } + } + } + } + else + { for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch) { uint32 base = o->block.row[y >> 3]; @@ -347,7 +372,7 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& { m_valid[row] |= col; - (mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA); + (mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA); blocks++; } @@ -355,43 +380,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& } } } - else - { - for(int y = r.top; y < r.bottom; y += bs.y) - { - uint32 base = o->block.row[y >> 3]; - - for(int x = r.left; x < r.right; x += bs.x) - { - uint32 block = base + o->block.col[x >> 3]; - - if(block < MAX_BLOCKS) - { - uint32 row = block >> 5; - uint32 col = 1 << (block & 31); - - if((m_valid[row] & col) == 0) - { - m_valid[row] |= col; - - hash_map >::iterator i = m_tiles.find(block); - - if(i != m_tiles.end()) - { - list& l = i->second; - - for(list::iterator j = l.begin(); j != l.end(); j++) - { - (mem.*rtxbP)(block, *j, pitch, TEXA); - - blocks++; - } - } - } - } - } - } - } if(blocks > 0) { diff --git a/plugins/GSdx/GSTextureCacheSW.h b/plugins/GSdx/GSTextureCacheSW.h index 7efb0153cc..8191c7c7ab 100644 --- a/plugins/GSdx/GSTextureCacheSW.h +++ b/plugins/GSdx/GSTextureCacheSW.h @@ -35,15 +35,20 @@ public: GIFRegTEXA m_TEXA; void* m_buff; uint32 m_tw; - uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page uint32 m_age; bool m_complete; - hash_map > m_tiles; + bool m_repeating; + list m_page2tile[MAX_PAGES]; + uint32 m_valid[MAX_PAGES]; - explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0); + // m_valid + // fast mode: each uint32 bits map to the 32 blocks of that page + // repeating mode: 1 bpp image of the texture tiles (8x8), also having 512 elements is just a coincidence (worst case: (1024*1024)/(8*8)/(sizeof(uint32)*8)) + + explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); virtual ~Texture(); - bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); + bool Update(const GSVector4i& r); bool Save(const string& fn, bool dds = false) const; };