mirror of https://github.com/PCSX2/pcsx2.git
GSdx: the texture cache fix discussed under r4589.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4592 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
7029f7aa98
commit
cc8d14511b
|
@ -493,15 +493,15 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVertexSW dedge = GSVertexSW::zero();
|
|
||||||
GSVertexSW dscan = GSVertexSW::zero();
|
|
||||||
|
|
||||||
GSVertexSW dv = v[1] - v[0];
|
GSVertexSW dv = v[1] - v[0];
|
||||||
|
|
||||||
GSVector4 zero = GSVector4::zero();
|
GSVector4 dt = dv.t / dv.p.xyxy();
|
||||||
|
|
||||||
dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww();
|
GSVertexSW dedge;
|
||||||
dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww();
|
GSVertexSW dscan;
|
||||||
|
|
||||||
|
dedge.t = GSVector4::zero().insert<1, 1>(dt);
|
||||||
|
dscan.t = GSVector4::zero().insert<0, 0>(dt);
|
||||||
|
|
||||||
GSVector4 prestep = GSVector4(r.left, r.top) - scan.p;
|
GSVector4 prestep = GSVector4(r.left, r.top) - scan.p;
|
||||||
|
|
||||||
|
|
|
@ -619,9 +619,6 @@ void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const G
|
||||||
switch(wms)
|
switch(wms)
|
||||||
{
|
{
|
||||||
case CLAMP_REPEAT:
|
case CLAMP_REPEAT:
|
||||||
// FixMe: The last + 1 here breaks character portraits in Ar Tonelico 2.
|
|
||||||
// The problem is the same in HW and in SW rendering, and I also ruled out the
|
|
||||||
// usual scaling problems. (rama)
|
|
||||||
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
|
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
|
||||||
break;
|
break;
|
||||||
case CLAMP_CLAMP:
|
case CLAMP_CLAMP:
|
||||||
|
|
|
@ -729,7 +729,7 @@ void GSRendererSW::VertexKick(bool skip)
|
||||||
GSVertexSW& dst = m_vl.AddTail();
|
GSVertexSW& dst = m_vl.AddTail();
|
||||||
|
|
||||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
|
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
|
||||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F);
|
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||||
|
|
||||||
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||||
|
|
||||||
|
|
|
@ -892,8 +892,28 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
||||||
|
|
||||||
bool repeating = m_TEX0.IsRepeating();
|
bool repeating = m_TEX0.IsRepeating();
|
||||||
|
|
||||||
|
if(repeating && m_tiles.empty())
|
||||||
|
{
|
||||||
|
for(int y = 0; y < th; y += bs.y)
|
||||||
|
{
|
||||||
|
uint32 base = o->block.row[y >> 3];
|
||||||
|
|
||||||
|
for(int x = 0; x < tw; x += bs.x)
|
||||||
|
{
|
||||||
|
uint32 block = base + o->block.col[x >> 3];
|
||||||
|
|
||||||
|
if(block < MAX_BLOCKS)
|
||||||
|
{
|
||||||
|
m_tiles[block].push_back(GSVector2i(x, y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32 blocks = 0;
|
uint32 blocks = 0;
|
||||||
|
|
||||||
|
if(!repeating)
|
||||||
|
{
|
||||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||||
{
|
{
|
||||||
uint32 base = o->block.row[y >> 3];
|
uint32 base = o->block.row[y >> 3];
|
||||||
|
@ -908,11 +928,8 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
||||||
uint32 col = 1 << (block & 31);
|
uint32 col = 1 << (block & 31);
|
||||||
|
|
||||||
if((m_valid[row] & col) == 0)
|
if((m_valid[row] & col) == 0)
|
||||||
{
|
|
||||||
if(!repeating)
|
|
||||||
{
|
{
|
||||||
m_valid[row] |= col;
|
m_valid[row] |= col;
|
||||||
}
|
|
||||||
|
|
||||||
Write(GSVector4i(x, y, x + bs.x, y + bs.y));
|
Write(GSVector4i(x, y, x + bs.x, y + bs.y));
|
||||||
|
|
||||||
|
@ -921,10 +938,8 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if(blocks > 0)
|
else
|
||||||
{
|
|
||||||
if(repeating)
|
|
||||||
{
|
{
|
||||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||||
{
|
{
|
||||||
|
@ -939,12 +954,33 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
||||||
uint32 row = block >> 5;
|
uint32 row = block >> 5;
|
||||||
uint32 col = 1 << (block & 31);
|
uint32 col = 1 << (block & 31);
|
||||||
|
|
||||||
|
if((m_valid[row] & col) == 0)
|
||||||
|
{
|
||||||
m_valid[row] |= col;
|
m_valid[row] |= col;
|
||||||
|
|
||||||
|
hash_map<uint32, list<GSVector2i> >::iterator i = m_tiles.find(block);
|
||||||
|
|
||||||
|
if(i != m_tiles.end())
|
||||||
|
{
|
||||||
|
list<GSVector2i>& l = i->second;
|
||||||
|
|
||||||
|
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
|
||||||
|
{
|
||||||
|
Write(GSVector4i(j->x, j->y, j->x + bs.x, j->y + bs.y));
|
||||||
|
|
||||||
|
blocks++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
blocks++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(blocks > 0)
|
||||||
|
{
|
||||||
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_fmt == FMT_32 ? 2 : 0));
|
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_fmt == FMT_32 ? 2 : 0));
|
||||||
|
|
||||||
Flush(m_write.count);
|
Flush(m_write.count);
|
||||||
|
|
|
@ -74,6 +74,7 @@ public:
|
||||||
int m_fmt;
|
int m_fmt;
|
||||||
bool m_target;
|
bool m_target;
|
||||||
bool m_complete;
|
bool m_complete;
|
||||||
|
hash_map<uint32, list<GSVector2i> > m_tiles;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Source(GSRenderer* r, uint8* temp);
|
Source(GSRenderer* r, uint8* temp);
|
||||||
|
|
|
@ -25,7 +25,6 @@
|
||||||
GSTextureCacheSW::GSTextureCacheSW(GSState* state)
|
GSTextureCacheSW::GSTextureCacheSW(GSState* state)
|
||||||
: m_state(state)
|
: m_state(state)
|
||||||
{
|
{
|
||||||
memset(m_pages, 0, sizeof(m_pages));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GSTextureCacheSW::~GSTextureCacheSW()
|
GSTextureCacheSW::~GSTextureCacheSW()
|
||||||
|
@ -77,6 +76,13 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
|
||||||
|
|
||||||
m_textures.insert(t);
|
m_textures.insert(t);
|
||||||
|
|
||||||
|
__aligned(uint32, 16) pages[16];
|
||||||
|
|
||||||
|
((GSVector4i*)pages)[0] = GSVector4i::zero();
|
||||||
|
((GSVector4i*)pages)[1] = GSVector4i::zero();
|
||||||
|
((GSVector4i*)pages)[2] = GSVector4i::zero();
|
||||||
|
((GSVector4i*)pages)[3] = GSVector4i::zero();
|
||||||
|
|
||||||
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
|
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
|
||||||
|
|
||||||
int tw = 1 << TEX0.TW;
|
int tw = 1 << TEX0.TW;
|
||||||
|
@ -92,17 +98,17 @@ const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0
|
||||||
|
|
||||||
if(page < MAX_PAGES)
|
if(page < MAX_PAGES)
|
||||||
{
|
{
|
||||||
m_pages[page >> 5] |= 1 << (page & 31);
|
pages[page >> 5] |= 1 << (page & 31);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i = 0; i < countof(m_pages); i++)
|
for(int i = 0; i < countof(pages); i++)
|
||||||
{
|
{
|
||||||
if(uint32 p = m_pages[i])
|
uint32 p = pages[i];
|
||||||
{
|
|
||||||
m_pages[i] = 0;
|
|
||||||
|
|
||||||
|
if(p != 0)
|
||||||
|
{
|
||||||
list<Texture*>* m = &m_map[i << 5];
|
list<Texture*>* m = &m_map[i << 5];
|
||||||
|
|
||||||
unsigned long j;
|
unsigned long j;
|
||||||
|
@ -256,17 +262,6 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
||||||
|
|
||||||
bool repeating = m_TEX0.IsRepeating();
|
bool repeating = m_TEX0.IsRepeating();
|
||||||
|
|
||||||
if(m_TEX0.TBW == 1 && m_tw != 0) // repeating)
|
|
||||||
{
|
|
||||||
// FIXME:
|
|
||||||
// - marking a block prevents fetching it again to a different part of the texture
|
|
||||||
// - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited (onimusha 3 intro / sidewalk)
|
|
||||||
|
|
||||||
// r = GSVector4i(0, 0, tw, th);
|
|
||||||
r.top = 0;
|
|
||||||
r.bottom = th;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = r.ralign<Align_Outside>(bs);
|
r = r.ralign<Align_Outside>(bs);
|
||||||
|
|
||||||
if(r.eq(GSVector4i(0, 0, tw, th)))
|
if(r.eq(GSVector4i(0, 0, tw, th)))
|
||||||
|
@ -295,6 +290,30 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(repeating)
|
||||||
|
{
|
||||||
|
// TODO: pull this from cache (hash = o->... + m_tw), need to use m_buff relative pointers then
|
||||||
|
|
||||||
|
const GSOffset* RESTRICT o = m_offset;
|
||||||
|
|
||||||
|
uint8* dst = (uint8*)m_buff;
|
||||||
|
|
||||||
|
for(int y = 0, block_pitch = pitch * bs.y; y < th; y += bs.y, dst += block_pitch)
|
||||||
|
{
|
||||||
|
uint32 base = o->block.row[y >> 3];
|
||||||
|
|
||||||
|
for(int x = 0; x < tw; x += bs.x)
|
||||||
|
{
|
||||||
|
uint32 block = base + o->block.col[x >> 3];
|
||||||
|
|
||||||
|
if(block < MAX_BLOCKS)
|
||||||
|
{
|
||||||
|
m_tiles[block].push_back(&dst[x << shift]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GSLocalMemory& mem = m_state->m_mem;
|
GSLocalMemory& mem = m_state->m_mem;
|
||||||
|
@ -307,6 +326,8 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
||||||
|
|
||||||
uint32 pitch = (1 << m_tw) << shift;
|
uint32 pitch = (1 << m_tw) << shift;
|
||||||
|
|
||||||
|
if(!repeating)
|
||||||
|
{
|
||||||
uint8* dst = (uint8*)m_buff + pitch * r.top;
|
uint8* dst = (uint8*)m_buff + pitch * r.top;
|
||||||
|
|
||||||
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
|
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
|
||||||
|
@ -323,11 +344,8 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
||||||
uint32 col = 1 << (block & 31);
|
uint32 col = 1 << (block & 31);
|
||||||
|
|
||||||
if((m_valid[row] & col) == 0)
|
if((m_valid[row] & col) == 0)
|
||||||
{
|
|
||||||
if(!repeating)
|
|
||||||
{
|
{
|
||||||
m_valid[row] |= col;
|
m_valid[row] |= col;
|
||||||
}
|
|
||||||
|
|
||||||
(mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA);
|
(mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA);
|
||||||
|
|
||||||
|
@ -336,10 +354,8 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if(blocks > 0)
|
else
|
||||||
{
|
|
||||||
if(repeating)
|
|
||||||
{
|
{
|
||||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||||
{
|
{
|
||||||
|
@ -354,12 +370,31 @@ bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA&
|
||||||
uint32 row = block >> 5;
|
uint32 row = block >> 5;
|
||||||
uint32 col = 1 << (block & 31);
|
uint32 col = 1 << (block & 31);
|
||||||
|
|
||||||
|
if((m_valid[row] & col) == 0)
|
||||||
|
{
|
||||||
m_valid[row] |= col;
|
m_valid[row] |= col;
|
||||||
|
|
||||||
|
hash_map<uint32, list<uint8*> >::iterator i = m_tiles.find(block);
|
||||||
|
|
||||||
|
if(i != m_tiles.end())
|
||||||
|
{
|
||||||
|
list<uint8*>& l = i->second;
|
||||||
|
|
||||||
|
for(list<uint8*>::iterator j = l.begin(); j != l.end(); j++)
|
||||||
|
{
|
||||||
|
(mem.*rtxbP)(block, *j, pitch, TEXA);
|
||||||
|
|
||||||
|
blocks++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(blocks > 0)
|
||||||
|
{
|
||||||
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift);
|
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,7 @@ public:
|
||||||
uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
|
uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
|
||||||
uint32 m_age;
|
uint32 m_age;
|
||||||
bool m_complete;
|
bool m_complete;
|
||||||
|
hash_map<uint32, list<uint8*> > m_tiles;
|
||||||
|
|
||||||
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0);
|
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0);
|
||||||
virtual ~Texture();
|
virtual ~Texture();
|
||||||
|
@ -50,7 +51,6 @@ protected:
|
||||||
GSState* m_state;
|
GSState* m_state;
|
||||||
hash_set<Texture*> m_textures;
|
hash_set<Texture*> m_textures;
|
||||||
list<Texture*> m_map[MAX_PAGES];
|
list<Texture*> m_map[MAX_PAGES];
|
||||||
uint32 m_pages[16];
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSTextureCacheSW(GSState* state);
|
GSTextureCacheSW(GSState* state);
|
||||||
|
|
|
@ -2753,6 +2753,8 @@ public:
|
||||||
|
|
||||||
template<int src, int dst> __forceinline GSVector4 insert(const GSVector4& v) const
|
template<int src, int dst> __forceinline GSVector4 insert(const GSVector4& v) const
|
||||||
{
|
{
|
||||||
|
// TODO: use blendps when src == dst
|
||||||
|
|
||||||
#if 0 // _M_SSE >= 0x401
|
#if 0 // _M_SSE >= 0x401
|
||||||
|
|
||||||
// NOTE: it's faster with shuffles...
|
// NOTE: it's faster with shuffles...
|
||||||
|
@ -2766,40 +2768,40 @@ public:
|
||||||
case 0:
|
case 0:
|
||||||
switch(src)
|
switch(src)
|
||||||
{
|
{
|
||||||
case 0: return v.xxyy(*this).xzzw(*this);
|
case 0: return yyxx(v).zxzw(*this);
|
||||||
case 1: return v.yyyy(*this).xzzw(*this);
|
case 1: return yyyy(v).zxzw(*this);
|
||||||
case 2: return v.zzyy(*this).xzzw(*this);
|
case 2: return yyzz(v).zxzw(*this);
|
||||||
case 3: return v.wwyy(*this).xzzw(*this);
|
case 3: return yyww(v).zxzw(*this);
|
||||||
default: __assume(0);
|
default: __assume(0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
switch(src)
|
switch(src)
|
||||||
{
|
{
|
||||||
case 0: return v.xxxx(*this).zxzw(*this);
|
case 0: return xxxx(v).xzzw(*this);
|
||||||
case 1: return v.yyxx(*this).zxzw(*this);
|
case 1: return xxyy(v).xzzw(*this);
|
||||||
case 2: return v.zzxx(*this).zxzw(*this);
|
case 2: return xxzz(v).xzzw(*this);
|
||||||
case 3: return v.wwxx(*this).zxzw(*this);
|
case 3: return xxww(v).xzzw(*this);
|
||||||
default: __assume(0);
|
default: __assume(0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
switch(src)
|
switch(src)
|
||||||
{
|
{
|
||||||
case 0: return xyxz(v.xxww(*this));
|
case 0: return xyzx(wwxx(v));
|
||||||
case 1: return xyxz(v.yyww(*this));
|
case 1: return xyzx(wwyy(v));
|
||||||
case 2: return xyxz(v.zzww(*this));
|
case 2: return xyzx(wwzz(v));
|
||||||
case 3: return xyxz(v.wwww(*this));
|
case 3: return xyzx(wwww(v));
|
||||||
default: __assume(0);
|
default: __assume(0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
switch(src)
|
switch(src)
|
||||||
{
|
{
|
||||||
case 0: return xyzx(v.xxzz(*this));
|
case 0: return xyxz(zzxx(v));
|
||||||
case 1: return xyzx(v.yyzz(*this));
|
case 1: return xyxz(zzyy(v));
|
||||||
case 2: return xyzx(v.zzzz(*this));
|
case 2: return xyxz(zzzz(v));
|
||||||
case 3: return xyzx(v.wwzz(*this));
|
case 3: return xyxz(zzww(v));
|
||||||
default: __assume(0);
|
default: __assume(0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue