mirror of https://github.com/PCSX2/pcsx2.git
GSdx: the texture cache fix discussed under r4589.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4592 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
7029f7aa98
commit
cc8d14511b
|
@ -493,15 +493,15 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
|
|||
return;
|
||||
}
|
||||
|
||||
GSVertexSW dedge = GSVertexSW::zero();
|
||||
GSVertexSW dscan = GSVertexSW::zero();
|
||||
|
||||
GSVertexSW dv = v[1] - v[0];
|
||||
|
||||
GSVector4 zero = GSVector4::zero();
|
||||
GSVector4 dt = dv.t / dv.p.xyxy();
|
||||
|
||||
dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww();
|
||||
dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww();
|
||||
GSVertexSW dedge;
|
||||
GSVertexSW dscan;
|
||||
|
||||
dedge.t = GSVector4::zero().insert<1, 1>(dt);
|
||||
dscan.t = GSVector4::zero().insert<0, 0>(dt);
|
||||
|
||||
GSVector4 prestep = GSVector4(r.left, r.top) - scan.p;
|
||||
|
||||
|
|
|
@ -619,9 +619,6 @@ void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const G
|
|||
switch(wms)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
// FixMe: The last + 1 here breaks character portraits in Ar Tonelico 2.
|
||||
// The problem is the same in HW and in SW rendering, and I also ruled out the
|
||||
// usual scaling problems. (rama)
|
||||
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
|
|
|
@ -729,7 +729,7 @@ void GSRendererSW::VertexKick(bool skip)
|
|||
GSVertexSW& dst = m_vl.AddTail();
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F);
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
|
||||
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
|
||||
|
|
|
@ -892,31 +892,88 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
|||
|
||||
bool repeating = m_TEX0.IsRepeating();
|
||||
|
||||
if(repeating && m_tiles.empty())
|
||||
{
|
||||
for(int y = 0; y < th; y += bs.y)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
for(int x = 0; x < tw; x += bs.x)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
m_tiles[block].push_back(GSVector2i(x, y));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32 blocks = 0;
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
if(!repeating)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
if(!repeating)
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
{
|
||||
m_valid[row] |= col;
|
||||
|
||||
Write(GSVector4i(x, y, x + bs.x, y + bs.y));
|
||||
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
Write(GSVector4i(x, y, x + bs.x, y + bs.y));
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
|
||||
blocks++;
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
{
|
||||
m_valid[row] |= col;
|
||||
|
||||
hash_map<uint32, list<GSVector2i> >::iterator i = m_tiles.find(block);
|
||||
|
||||
if(i != m_tiles.end())
|
||||
{
|
||||
list<GSVector2i>& l = i->second;
|
||||
|
||||
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
|
||||
{
|
||||
Write(GSVector4i(j->x, j->y, j->x + bs.x, j->y + bs.y));
|
||||
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -924,27 +981,6 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
|||
|
||||
if(blocks > 0)
|
||||
{
|
||||
if(repeating)
|
||||
{
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
m_valid[row] |= col;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_fmt == FMT_32 ? 2 : 0));
|
||||
|
||||
Flush(m_write.count);
|
||||
|
|
|
@ -74,6 +74,7 @@ public:
|
|||
int m_fmt;
|
||||
bool m_target;
|
||||
bool m_complete;
|
||||
hash_map<uint32, list<GSVector2i> > m_tiles;
|
||||
|
||||
public:
|
||||
Source(GSRenderer* r, uint8* temp);
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
GSTextureCacheSW::GSTextureCacheSW(GSState* state)
|
||||
: m_state(state)
|
||||
{
|
||||
memset(m_pages, 0, sizeof(m_pages));
|
||||
}
|
||||
|
||||
GSTextureCacheSW::~GSTextureCacheSW()
|
||||
|
@ -77,6 +76,13 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
|
|||
|
||||
m_textures.insert(t);
|
||||
|
||||
__aligned(uint32, 16) pages[16];
|
||||
|
||||
((GSVector4i*)pages)[0] = GSVector4i::zero();
|
||||
((GSVector4i*)pages)[1] = GSVector4i::zero();
|
||||
((GSVector4i*)pages)[2] = GSVector4i::zero();
|
||||
((GSVector4i*)pages)[3] = GSVector4i::zero();
|
||||
|
||||
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
|
||||
|
||||
int tw = 1 << TEX0.TW;
|
||||
|
@ -92,17 +98,17 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
|
|||
|
||||
if(page < MAX_PAGES)
|
||||
{
|
||||
m_pages[page >> 5] |= 1 << (page & 31);
|
||||
pages[page >> 5] |= 1 << (page & 31);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i < countof(m_pages); i++)
|
||||
for(int i = 0; i < countof(pages); i++)
|
||||
{
|
||||
if(uint32 p = m_pages[i])
|
||||
{
|
||||
m_pages[i] = 0;
|
||||
uint32 p = pages[i];
|
||||
|
||||
if(p != 0)
|
||||
{
|
||||
list<Texture*>* m = &m_map[i << 5];
|
||||
|
||||
unsigned long j;
|
||||
|
@ -256,17 +262,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
|||
|
||||
bool repeating = m_TEX0.IsRepeating();
|
||||
|
||||
if(m_TEX0.TBW == 1 && m_tw != 0) // repeating)
|
||||
{
|
||||
// FIXME:
|
||||
// - marking a block prevents fetching it again to a different part of the texture
|
||||
// - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited (onimusha 3 intro / sidewalk)
|
||||
|
||||
// r = GSVector4i(0, 0, tw, th);
|
||||
r.top = 0;
|
||||
r.bottom = th;
|
||||
}
|
||||
|
||||
r = r.ralign<Align_Outside>(bs);
|
||||
|
||||
if(r.eq(GSVector4i(0, 0, tw, th)))
|
||||
|
@ -295,6 +290,30 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
|||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if(repeating)
|
||||
{
|
||||
// TODO: pull this from cache (hash = o->... + m_tw), need to use m_buff relative pointers then
|
||||
|
||||
const GSOffset* RESTRICT o = m_offset;
|
||||
|
||||
uint8* dst = (uint8*)m_buff;
|
||||
|
||||
for(int y = 0, block_pitch = pitch * bs.y; y < th; y += bs.y, dst += block_pitch)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
for(int x = 0; x < tw; x += bs.x)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
m_tiles[block].push_back(&dst[x << shift]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSLocalMemory& mem = m_state->m_mem;
|
||||
|
@ -307,31 +326,68 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
|||
|
||||
uint32 pitch = (1 << m_tw) << shift;
|
||||
|
||||
uint8* dst = (uint8*)m_buff + pitch * r.top;
|
||||
|
||||
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
|
||||
if(!repeating)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
uint8* dst = (uint8*)m_buff + pitch * r.top;
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
if(!repeating)
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
{
|
||||
m_valid[row] |= col;
|
||||
|
||||
(mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA);
|
||||
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
(mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA);
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
|
||||
blocks++;
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
{
|
||||
m_valid[row] |= col;
|
||||
|
||||
hash_map<uint32, list<uint8*> >::iterator i = m_tiles.find(block);
|
||||
|
||||
if(i != m_tiles.end())
|
||||
{
|
||||
list<uint8*>& l = i->second;
|
||||
|
||||
for(list<uint8*>::iterator j = l.begin(); j != l.end(); j++)
|
||||
{
|
||||
(mem.*rtxbP)(block, *j, pitch, TEXA);
|
||||
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -339,27 +395,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
|||
|
||||
if(blocks > 0)
|
||||
{
|
||||
if(repeating)
|
||||
{
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
m_valid[row] |= col;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift);
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ public:
|
|||
uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
|
||||
uint32 m_age;
|
||||
bool m_complete;
|
||||
hash_map<uint32, list<uint8*> > m_tiles;
|
||||
|
||||
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0);
|
||||
virtual ~Texture();
|
||||
|
@ -50,7 +51,6 @@ protected:
|
|||
GSState* m_state;
|
||||
hash_set<Texture*> m_textures;
|
||||
list<Texture*> m_map[MAX_PAGES];
|
||||
uint32 m_pages[16];
|
||||
|
||||
public:
|
||||
GSTextureCacheSW(GSState* state);
|
||||
|
|
|
@ -2753,6 +2753,8 @@ public:
|
|||
|
||||
template<int src, int dst> __forceinline GSVector4 insert(const GSVector4& v) const
|
||||
{
|
||||
// TODO: use blendps when src == dst
|
||||
|
||||
#if 0 // _M_SSE >= 0x401
|
||||
|
||||
// NOTE: it's faster with shuffles...
|
||||
|
@ -2766,40 +2768,40 @@ public:
|
|||
case 0:
|
||||
switch(src)
|
||||
{
|
||||
case 0: return v.xxyy(*this).xzzw(*this);
|
||||
case 1: return v.yyyy(*this).xzzw(*this);
|
||||
case 2: return v.zzyy(*this).xzzw(*this);
|
||||
case 3: return v.wwyy(*this).xzzw(*this);
|
||||
case 0: return yyxx(v).zxzw(*this);
|
||||
case 1: return yyyy(v).zxzw(*this);
|
||||
case 2: return yyzz(v).zxzw(*this);
|
||||
case 3: return yyww(v).zxzw(*this);
|
||||
default: __assume(0);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
switch(src)
|
||||
{
|
||||
case 0: return v.xxxx(*this).zxzw(*this);
|
||||
case 1: return v.yyxx(*this).zxzw(*this);
|
||||
case 2: return v.zzxx(*this).zxzw(*this);
|
||||
case 3: return v.wwxx(*this).zxzw(*this);
|
||||
case 0: return xxxx(v).xzzw(*this);
|
||||
case 1: return xxyy(v).xzzw(*this);
|
||||
case 2: return xxzz(v).xzzw(*this);
|
||||
case 3: return xxww(v).xzzw(*this);
|
||||
default: __assume(0);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch(src)
|
||||
{
|
||||
case 0: return xyxz(v.xxww(*this));
|
||||
case 1: return xyxz(v.yyww(*this));
|
||||
case 2: return xyxz(v.zzww(*this));
|
||||
case 3: return xyxz(v.wwww(*this));
|
||||
case 0: return xyzx(wwxx(v));
|
||||
case 1: return xyzx(wwyy(v));
|
||||
case 2: return xyzx(wwzz(v));
|
||||
case 3: return xyzx(wwww(v));
|
||||
default: __assume(0);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch(src)
|
||||
{
|
||||
case 0: return xyzx(v.xxzz(*this));
|
||||
case 1: return xyzx(v.yyzz(*this));
|
||||
case 2: return xyzx(v.zzzz(*this));
|
||||
case 3: return xyzx(v.wwzz(*this));
|
||||
case 0: return xyxz(zzxx(v));
|
||||
case 1: return xyxz(zzyy(v));
|
||||
case 2: return xyxz(zzzz(v));
|
||||
case 3: return xyxz(zzww(v));
|
||||
default: __assume(0);
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue