From cc8d14511b3e152642083160a3695fdfc04942b3 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Mon, 25 Apr 2011 01:44:00 +0000 Subject: [PATCH] GSdx: the texture cache fix discussed under r4589. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4592 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSRasterizer.cpp | 12 +-- plugins/GSdx/GSRenderer.cpp | 3 - plugins/GSdx/GSRendererSW.cpp | 2 +- plugins/GSdx/GSTextureCache.cpp | 102 +++++++++++++++------- plugins/GSdx/GSTextureCache.h | 1 + plugins/GSdx/GSTextureCacheSW.cpp | 137 +++++++++++++++++++----------- plugins/GSdx/GSTextureCacheSW.h | 2 +- plugins/GSdx/GSVector.h | 34 ++++---- 8 files changed, 182 insertions(+), 111 deletions(-) diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 280b18d09c..fcd66893af 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -493,15 +493,15 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices) return; } - GSVertexSW dedge = GSVertexSW::zero(); - GSVertexSW dscan = GSVertexSW::zero(); - GSVertexSW dv = v[1] - v[0]; - GSVector4 zero = GSVector4::zero(); + GSVector4 dt = dv.t / dv.p.xyxy(); - dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww(); - dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww(); + GSVertexSW dedge; + GSVertexSW dscan; + + dedge.t = GSVector4::zero().insert<1, 1>(dt); + dscan.t = GSVector4::zero().insert<0, 0>(dt); GSVector4 prestep = GSVector4(r.left, r.top) - scan.p; diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index ac9374cb6d..d1cda1a7ad 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -619,9 +619,6 @@ void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const G switch(wms) { case CLAMP_REPEAT: - // FixMe: The last + 1 here breaks character portraits in Ar Tonelico 2. - // The problem is the same in HW and in SW rendering, and I also ruled out the - // usual scaling problems. (rama) if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;} break; case CLAMP_CLAMP: diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index bae4da6616..cd60f70cfb 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -729,7 +729,7 @@ void GSRendererSW::VertexKick(bool skip) GSVertexSW& dst = m_vl.AddTail(); GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET; - GSVector4i zf = GSVector4i((int)std::min(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); + GSVector4i zf = GSVector4i((int)std::min(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 4b04969e80..249b756c61 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -892,31 +892,88 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE bool repeating = m_TEX0.IsRepeating(); + if(repeating && m_tiles.empty()) + { + for(int y = 0; y < th; y += bs.y) + { + uint32 base = o->block.row[y >> 3]; + + for(int x = 0; x < tw; x += bs.x) + { + uint32 block = base + o->block.col[x >> 3]; + + if(block < MAX_BLOCKS) + { + m_tiles[block].push_back(GSVector2i(x, y)); + } + } + } + } + uint32 blocks = 0; - for(int y = r.top; y < r.bottom; y += bs.y) + if(!repeating) { - uint32 base = o->block.row[y >> 3]; - - for(int x = r.left; x < r.right; x += bs.x) + for(int y = r.top; y < r.bottom; y += bs.y) { - uint32 block = base + o->block.col[x >> 3]; + uint32 base = o->block.row[y >> 3]; - if(block < MAX_BLOCKS) + for(int x = r.left; x < r.right; x += bs.x) { - uint32 row = block >> 5; - uint32 col = 1 << (block & 31); + uint32 block = base + o->block.col[x >> 3]; - if((m_valid[row] & col) == 0) + if(block < MAX_BLOCKS) { - if(!repeating) + uint32 row = block >> 5; + uint32 col = 1 << (block & 31); + + if((m_valid[row] & col) == 0) { m_valid[row] |= col; + + Write(GSVector4i(x, y, x + bs.x, y + bs.y)); + + blocks++; } + } + } + } + } + else + { + for(int y = r.top; y < r.bottom; y += bs.y) + { + uint32 base = o->block.row[y >> 3]; - Write(GSVector4i(x, y, x + bs.x, y + bs.y)); + for(int x = r.left; x < r.right; x += bs.x) + { + uint32 block = base + o->block.col[x >> 3]; - blocks++; + if(block < MAX_BLOCKS) + { + uint32 row = block >> 5; + uint32 col = 1 << (block & 31); + + if((m_valid[row] & col) == 0) + { + m_valid[row] |= col; + + hash_map >::iterator i = m_tiles.find(block); + + if(i != m_tiles.end()) + { + list& l = i->second; + + for(list::iterator j = l.begin(); j != l.end(); j++) + { + Write(GSVector4i(j->x, j->y, j->x + bs.x, j->y + bs.y)); + + blocks++; + } + } + + blocks++; + } } } } @@ -924,27 +981,6 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE if(blocks > 0) { - if(repeating) - { - for(int y = r.top; y < r.bottom; y += bs.y) - { - uint32 base = o->block.row[y >> 3]; - - for(int x = r.left; x < r.right; x += bs.x) - { - uint32 block = base + o->block.col[x >> 3]; - - if(block < MAX_BLOCKS) - { - uint32 row = block >> 5; - uint32 col = 1 << (block & 31); - - m_valid[row] |= col; - } - } - } - } - m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_fmt == FMT_32 ? 2 : 0)); Flush(m_write.count); diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 6abb856e08..48290b0eac 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -74,6 +74,7 @@ public: int m_fmt; bool m_target; bool m_complete; + hash_map > m_tiles; public: Source(GSRenderer* r, uint8* temp); diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index 0b0d3a01a4..dfcec54690 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -25,7 +25,6 @@ GSTextureCacheSW::GSTextureCacheSW(GSState* state) : m_state(state) { - memset(m_pages, 0, sizeof(m_pages)); } GSTextureCacheSW::~GSTextureCacheSW() @@ -77,6 +76,13 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0 m_textures.insert(t); + __aligned(uint32, 16) pages[16]; + + ((GSVector4i*)pages)[0] = GSVector4i::zero(); + ((GSVector4i*)pages)[1] = GSVector4i::zero(); + ((GSVector4i*)pages)[2] = GSVector4i::zero(); + ((GSVector4i*)pages)[3] = GSVector4i::zero(); + GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs; int tw = 1 << TEX0.TW; @@ -92,17 +98,17 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0 if(page < MAX_PAGES) { - m_pages[page >> 5] |= 1 << (page & 31); + pages[page >> 5] |= 1 << (page & 31); } } } - for(int i = 0; i < countof(m_pages); i++) + for(int i = 0; i < countof(pages); i++) { - if(uint32 p = m_pages[i]) - { - m_pages[i] = 0; + uint32 p = pages[i]; + if(p != 0) + { list* m = &m_map[i << 5]; unsigned long j; @@ -256,17 +262,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& bool repeating = m_TEX0.IsRepeating(); - if(m_TEX0.TBW == 1 && m_tw != 0) // repeating) - { - // FIXME: - // - marking a block prevents fetching it again to a different part of the texture - // - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited (onimusha 3 intro / sidewalk) - - // r = GSVector4i(0, 0, tw, th); - r.top = 0; - r.bottom = th; - } - r = r.ralign(bs); if(r.eq(GSVector4i(0, 0, tw, th))) @@ -295,6 +290,30 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& { return false; } + + if(repeating) + { + // TODO: pull this from cache (hash = o->... + m_tw), need to use m_buff relative pointers then + + const GSOffset* RESTRICT o = m_offset; + + uint8* dst = (uint8*)m_buff; + + for(int y = 0, block_pitch = pitch * bs.y; y < th; y += bs.y, dst += block_pitch) + { + uint32 base = o->block.row[y >> 3]; + + for(int x = 0; x < tw; x += bs.x) + { + uint32 block = base + o->block.col[x >> 3]; + + if(block < MAX_BLOCKS) + { + m_tiles[block].push_back(&dst[x << shift]); + } + } + } + } } GSLocalMemory& mem = m_state->m_mem; @@ -307,31 +326,68 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& uint32 pitch = (1 << m_tw) << shift; - uint8* dst = (uint8*)m_buff + pitch * r.top; - - for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch) + if(!repeating) { - uint32 base = o->block.row[y >> 3]; + uint8* dst = (uint8*)m_buff + pitch * r.top; - for(int x = r.left; x < r.right; x += bs.x) + for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch) { - uint32 block = base + o->block.col[x >> 3]; + uint32 base = o->block.row[y >> 3]; - if(block < MAX_BLOCKS) + for(int x = r.left; x < r.right; x += bs.x) { - uint32 row = block >> 5; - uint32 col = 1 << (block & 31); + uint32 block = base + o->block.col[x >> 3]; - if((m_valid[row] & col) == 0) + if(block < MAX_BLOCKS) { - if(!repeating) + uint32 row = block >> 5; + uint32 col = 1 << (block & 31); + + if((m_valid[row] & col) == 0) { m_valid[row] |= col; + + (mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA); + + blocks++; } + } + } + } + } + else + { + for(int y = r.top; y < r.bottom; y += bs.y) + { + uint32 base = o->block.row[y >> 3]; - (mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA); + for(int x = r.left; x < r.right; x += bs.x) + { + uint32 block = base + o->block.col[x >> 3]; - blocks++; + if(block < MAX_BLOCKS) + { + uint32 row = block >> 5; + uint32 col = 1 << (block & 31); + + if((m_valid[row] & col) == 0) + { + m_valid[row] |= col; + + hash_map >::iterator i = m_tiles.find(block); + + if(i != m_tiles.end()) + { + list& l = i->second; + + for(list::iterator j = l.begin(); j != l.end(); j++) + { + (mem.*rtxbP)(block, *j, pitch, TEXA); + + blocks++; + } + } + } } } } @@ -339,27 +395,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& if(blocks > 0) { - if(repeating) - { - for(int y = r.top; y < r.bottom; y += bs.y) - { - uint32 base = o->block.row[y >> 3]; - - for(int x = r.left; x < r.right; x += bs.x) - { - uint32 block = base + o->block.col[x >> 3]; - - if(block < MAX_BLOCKS) - { - uint32 row = block >> 5; - uint32 col = 1 << (block & 31); - - m_valid[row] |= col; - } - } - } - } - m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift); } diff --git a/plugins/GSdx/GSTextureCacheSW.h b/plugins/GSdx/GSTextureCacheSW.h index 636e8b94c0..7efb0153cc 100644 --- a/plugins/GSdx/GSTextureCacheSW.h +++ b/plugins/GSdx/GSTextureCacheSW.h @@ -38,6 +38,7 @@ public: uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page uint32 m_age; bool m_complete; + hash_map > m_tiles; explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0); virtual ~Texture(); @@ -50,7 +51,6 @@ protected: GSState* m_state; hash_set m_textures; list m_map[MAX_PAGES]; - uint32 m_pages[16]; public: GSTextureCacheSW(GSState* state); diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index 8d14d091e8..691bee02f9 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -2753,6 +2753,8 @@ public: template __forceinline GSVector4 insert(const GSVector4& v) const { + // TODO: use blendps when src == dst + #if 0 // _M_SSE >= 0x401 // NOTE: it's faster with shuffles... @@ -2766,40 +2768,40 @@ public: case 0: switch(src) { - case 0: return v.xxyy(*this).xzzw(*this); - case 1: return v.yyyy(*this).xzzw(*this); - case 2: return v.zzyy(*this).xzzw(*this); - case 3: return v.wwyy(*this).xzzw(*this); + case 0: return yyxx(v).zxzw(*this); + case 1: return yyyy(v).zxzw(*this); + case 2: return yyzz(v).zxzw(*this); + case 3: return yyww(v).zxzw(*this); default: __assume(0); } break; case 1: switch(src) { - case 0: return v.xxxx(*this).zxzw(*this); - case 1: return v.yyxx(*this).zxzw(*this); - case 2: return v.zzxx(*this).zxzw(*this); - case 3: return v.wwxx(*this).zxzw(*this); + case 0: return xxxx(v).xzzw(*this); + case 1: return xxyy(v).xzzw(*this); + case 2: return xxzz(v).xzzw(*this); + case 3: return xxww(v).xzzw(*this); default: __assume(0); } break; case 2: switch(src) { - case 0: return xyxz(v.xxww(*this)); - case 1: return xyxz(v.yyww(*this)); - case 2: return xyxz(v.zzww(*this)); - case 3: return xyxz(v.wwww(*this)); + case 0: return xyzx(wwxx(v)); + case 1: return xyzx(wwyy(v)); + case 2: return xyzx(wwzz(v)); + case 3: return xyzx(wwww(v)); default: __assume(0); } break; case 3: switch(src) { - case 0: return xyzx(v.xxzz(*this)); - case 1: return xyzx(v.yyzz(*this)); - case 2: return xyzx(v.zzzz(*this)); - case 3: return xyzx(v.wwzz(*this)); + case 0: return xyxz(zzxx(v)); + case 1: return xyxz(zzyy(v)); + case 2: return xyxz(zzzz(v)); + case 3: return xyxz(zzww(v)); default: __assume(0); } break;