GSdx: the texture cache fix discussed under r4589.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4592 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-04-25 01:44:00 +00:00
parent 7029f7aa98
commit cc8d14511b
8 changed files with 182 additions and 111 deletions

View File

@ -493,15 +493,15 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
return;
}
GSVertexSW dedge = GSVertexSW::zero();
GSVertexSW dscan = GSVertexSW::zero();
GSVertexSW dv = v[1] - v[0];
GSVector4 zero = GSVector4::zero();
GSVector4 dt = dv.t / dv.p.xyxy();
dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww();
dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww();
GSVertexSW dedge;
GSVertexSW dscan;
dedge.t = GSVector4::zero().insert<1, 1>(dt);
dscan.t = GSVector4::zero().insert<0, 0>(dt);
GSVector4 prestep = GSVector4(r.left, r.top) - scan.p;

View File

@ -619,9 +619,6 @@ void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const G
switch(wms)
{
case CLAMP_REPEAT:
// FixMe: The last + 1 here breaks character portraits in Ar Tonelico 2.
// The problem is the same in HW and in SW rendering, and I also ruled out the
// usual scaling problems. (rama)
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
break;
case CLAMP_CLAMP:

View File

@ -729,7 +729,7 @@ void GSRendererSW::VertexKick(bool skip)
GSVertexSW& dst = m_vl.AddTail();
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F);
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;

View File

@ -892,31 +892,88 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
bool repeating = m_TEX0.IsRepeating();
if(repeating && m_tiles.empty())
{
for(int y = 0; y < th; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = 0; x < tw; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
m_tiles[block].push_back(GSVector2i(x, y));
}
}
}
}
uint32 blocks = 0;
for(int y = r.top; y < r.bottom; y += bs.y)
if(!repeating)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 block = base + o->block.col[x >> 3];
uint32 base = o->block.row[y >> 3];
if(block < MAX_BLOCKS)
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
uint32 block = base + o->block.col[x >> 3];
if((m_valid[row] & col) == 0)
if(block < MAX_BLOCKS)
{
if(!repeating)
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
if((m_valid[row] & col) == 0)
{
m_valid[row] |= col;
Write(GSVector4i(x, y, x + bs.x, y + bs.y));
blocks++;
}
}
}
}
}
else
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
Write(GSVector4i(x, y, x + bs.x, y + bs.y));
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
blocks++;
if(block < MAX_BLOCKS)
{
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
if((m_valid[row] & col) == 0)
{
m_valid[row] |= col;
hash_map<uint32, list<GSVector2i> >::iterator i = m_tiles.find(block);
if(i != m_tiles.end())
{
list<GSVector2i>& l = i->second;
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{
Write(GSVector4i(j->x, j->y, j->x + bs.x, j->y + bs.y));
blocks++;
}
}
blocks++;
}
}
}
}
@ -924,27 +981,6 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
if(blocks > 0)
{
if(repeating)
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
m_valid[row] |= col;
}
}
}
}
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_fmt == FMT_32 ? 2 : 0));
Flush(m_write.count);

View File

@ -74,6 +74,7 @@ public:
int m_fmt;
bool m_target;
bool m_complete;
hash_map<uint32, list<GSVector2i> > m_tiles;
public:
Source(GSRenderer* r, uint8* temp);

View File

@ -25,7 +25,6 @@
GSTextureCacheSW::GSTextureCacheSW(GSState* state)
: m_state(state)
{
memset(m_pages, 0, sizeof(m_pages));
}
GSTextureCacheSW::~GSTextureCacheSW()
@ -77,6 +76,13 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
m_textures.insert(t);
__aligned(uint32, 16) pages[16];
((GSVector4i*)pages)[0] = GSVector4i::zero();
((GSVector4i*)pages)[1] = GSVector4i::zero();
((GSVector4i*)pages)[2] = GSVector4i::zero();
((GSVector4i*)pages)[3] = GSVector4i::zero();
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
int tw = 1 << TEX0.TW;
@ -92,17 +98,17 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
if(page < MAX_PAGES)
{
m_pages[page >> 5] |= 1 << (page & 31);
pages[page >> 5] |= 1 << (page & 31);
}
}
}
for(int i = 0; i < countof(m_pages); i++)
for(int i = 0; i < countof(pages); i++)
{
if(uint32 p = m_pages[i])
{
m_pages[i] = 0;
uint32 p = pages[i];
if(p != 0)
{
list<Texture*>* m = &m_map[i << 5];
unsigned long j;
@ -256,17 +262,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
bool repeating = m_TEX0.IsRepeating();
if(m_TEX0.TBW == 1 && m_tw != 0) // repeating)
{
// FIXME:
// - marking a block prevents fetching it again to a different part of the texture
// - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited (onimusha 3 intro / sidewalk)
// r = GSVector4i(0, 0, tw, th);
r.top = 0;
r.bottom = th;
}
r = r.ralign<Align_Outside>(bs);
if(r.eq(GSVector4i(0, 0, tw, th)))
@ -295,6 +290,30 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
{
return false;
}
if(repeating)
{
// TODO: pull this from cache (hash = o->... + m_tw), need to use m_buff relative pointers then
const GSOffset* RESTRICT o = m_offset;
uint8* dst = (uint8*)m_buff;
for(int y = 0, block_pitch = pitch * bs.y; y < th; y += bs.y, dst += block_pitch)
{
uint32 base = o->block.row[y >> 3];
for(int x = 0; x < tw; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
m_tiles[block].push_back(&dst[x << shift]);
}
}
}
}
}
GSLocalMemory& mem = m_state->m_mem;
@ -307,31 +326,68 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
uint32 pitch = (1 << m_tw) << shift;
uint8* dst = (uint8*)m_buff + pitch * r.top;
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
if(!repeating)
{
uint32 base = o->block.row[y >> 3];
uint8* dst = (uint8*)m_buff + pitch * r.top;
for(int x = r.left; x < r.right; x += bs.x)
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
{
uint32 block = base + o->block.col[x >> 3];
uint32 base = o->block.row[y >> 3];
if(block < MAX_BLOCKS)
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
uint32 block = base + o->block.col[x >> 3];
if((m_valid[row] & col) == 0)
if(block < MAX_BLOCKS)
{
if(!repeating)
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
if((m_valid[row] & col) == 0)
{
m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA);
blocks++;
}
}
}
}
}
else
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
(mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA);
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
blocks++;
if(block < MAX_BLOCKS)
{
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
if((m_valid[row] & col) == 0)
{
m_valid[row] |= col;
hash_map<uint32, list<uint8*> >::iterator i = m_tiles.find(block);
if(i != m_tiles.end())
{
list<uint8*>& l = i->second;
for(list<uint8*>::iterator j = l.begin(); j != l.end(); j++)
{
(mem.*rtxbP)(block, *j, pitch, TEXA);
blocks++;
}
}
}
}
}
}
@ -339,27 +395,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
if(blocks > 0)
{
if(repeating)
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
m_valid[row] |= col;
}
}
}
}
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift);
}

View File

@ -38,6 +38,7 @@ public:
uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
uint32 m_age;
bool m_complete;
hash_map<uint32, list<uint8*> > m_tiles;
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0);
virtual ~Texture();
@ -50,7 +51,6 @@ protected:
GSState* m_state;
hash_set<Texture*> m_textures;
list<Texture*> m_map[MAX_PAGES];
uint32 m_pages[16];
public:
GSTextureCacheSW(GSState* state);

View File

@ -2753,6 +2753,8 @@ public:
template<int src, int dst> __forceinline GSVector4 insert(const GSVector4& v) const
{
// TODO: use blendps when src == dst
#if 0 // _M_SSE >= 0x401
// NOTE: it's faster with shuffles...
@ -2766,40 +2768,40 @@ public:
case 0:
switch(src)
{
case 0: return v.xxyy(*this).xzzw(*this);
case 1: return v.yyyy(*this).xzzw(*this);
case 2: return v.zzyy(*this).xzzw(*this);
case 3: return v.wwyy(*this).xzzw(*this);
case 0: return yyxx(v).zxzw(*this);
case 1: return yyyy(v).zxzw(*this);
case 2: return yyzz(v).zxzw(*this);
case 3: return yyww(v).zxzw(*this);
default: __assume(0);
}
break;
case 1:
switch(src)
{
case 0: return v.xxxx(*this).zxzw(*this);
case 1: return v.yyxx(*this).zxzw(*this);
case 2: return v.zzxx(*this).zxzw(*this);
case 3: return v.wwxx(*this).zxzw(*this);
case 0: return xxxx(v).xzzw(*this);
case 1: return xxyy(v).xzzw(*this);
case 2: return xxzz(v).xzzw(*this);
case 3: return xxww(v).xzzw(*this);
default: __assume(0);
}
break;
case 2:
switch(src)
{
case 0: return xyxz(v.xxww(*this));
case 1: return xyxz(v.yyww(*this));
case 2: return xyxz(v.zzww(*this));
case 3: return xyxz(v.wwww(*this));
case 0: return xyzx(wwxx(v));
case 1: return xyzx(wwyy(v));
case 2: return xyzx(wwzz(v));
case 3: return xyzx(wwww(v));
default: __assume(0);
}
break;
case 3:
switch(src)
{
case 0: return xyzx(v.xxzz(*this));
case 1: return xyzx(v.yyzz(*this));
case 2: return xyzx(v.zzzz(*this));
case 3: return xyzx(v.wwzz(*this));
case 0: return xyxz(zzxx(v));
case 1: return xyxz(zzyy(v));
case 2: return xyxz(zzzz(v));
case 3: return xyxz(zzww(v));
default: __assume(0);
}
break;