GSdx: changes of r5007 did not help as much as I thought, disabled it for the time being, plus other minor optimizations

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5010 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-12-23 15:53:53 +00:00
parent da41bcd756
commit 963a6a653a
10 changed files with 359 additions and 273 deletions

View File

@ -466,30 +466,7 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
return i->second;
}
GSOffset* o = (GSOffset*)_aligned_malloc(sizeof(GSOffset), 32);
o->hash = hash;
pixelAddress bn = m_psm[psm].bn;
for(int i = 0; i < 256; i++)
{
o->block.row[i] = (short)bn(0, i << 3, bp, bw);
}
o->block.col = m_psm[psm].blockOffset;
pixelAddress pa = m_psm[psm].pa;
for(int i = 0; i < 4096; i++)
{
o->pixel.row[i] = (int)pa(0, i & 0x7ff, bp, bw);
}
for(int i = 0; i < 8; i++)
{
o->pixel.col[i] = m_psm[psm].rowOffset[i];
}
GSOffset* o = new GSOffset(bp, bw, psm);
m_omap[hash] = o;
@ -1986,3 +1963,102 @@ void GSLocalMemory::SaveBMP(const string& fn, uint32 bp, uint32 bw, uint32 psm,
_aligned_free(bits);
}
// GSOffset
GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
{
hash = _bp | (_bw << 14) | (_psm << 20);
GSLocalMemory::pixelAddress bn = GSLocalMemory::m_psm[_psm].bn;
for(int i = 0; i < 256; i++)
{
block.row[i] = (short)bn(0, i << 3, _bp, _bw);
}
block.col = GSLocalMemory::m_psm[_psm].blockOffset;
GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[_psm].pa;
for(int i = 0; i < 4096; i++)
{
pixel.row[i] = (int)pa(0, i & 0x7ff, _bp, _bw);
}
for(int i = 0; i < 8; i++)
{
pixel.col[i] = GSLocalMemory::m_psm[_psm].rowOffset[i];
}
}
GSOffset::~GSOffset()
{
for(hash_map<uint64, list<uint32>*>::iterator i = m_cache.begin(); i != m_cache.end(); i++)
{
delete i->second;
}
}
list<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
{
GSAutoLock lock(&m_lock);
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<Align_Outside>(bs);
if(bbox != NULL) *bbox = r;
uint64 r_hash;
GSVector4i::storel(&r_hash, r.sra32(3).ps32()); // max 19-bit coordinates, should not be a problem (can shift right by 3 because it is mod8, smallest block size)
hash_map<uint64, list<uint32>*>::iterator i = m_cache.find(r_hash);
if(i != m_cache.end())
{
return i->second;
}
uint32 tmp[16];
memset(tmp, 0, sizeof(tmp));
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 n = (base + block.col[x >> 3]) >> 5;
if(n < MAX_PAGES)
{
tmp[n >> 5] |= 1 << (n & 31);
}
}
}
list<uint32>* l = new list<uint32>();
for(int i = 0; i < countof(tmp); i++)
{
uint32 p = tmp[i];
if(p == 0) continue;
unsigned long j;
while(_BitScanForward(&j, p))
{
p ^= 1 << j;
l->push_back((i << 5) + j);
}
}
m_cache[r_hash] = l;
return l;
}

View File

@ -26,22 +26,36 @@
#include "GSVector.h"
#include "GSBlock.h"
#include "GSClut.h"
#include "GSThread.h"
struct GSOffset
class GSOffset : public GSAlignedClass<32>
{
struct
GSCritSec m_lock; // GetPages could be called from multiple threads
hash_map<uint64, list<uint32>*> m_cache;
public:
__aligned(struct, 32) Block
{
short row[256]; // yn (n = 0 8 16 ...)
short* col; // blockOffset*
} block;
struct
};
__aligned(struct, 32) Pixel
{
int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing)
int* col[8]; // rowOffset*
} pixel;
};
union {uint32 hash; struct {uint32 bp:14, bw:6, psm:6;};};
Block block;
Pixel pixel;
GSOffset(uint32 bp, uint32 bw, uint32 psm);
virtual ~GSOffset();
list<uint32>* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
};
struct GSPixelOffset4

View File

@ -61,9 +61,9 @@ GSRasterizer::~GSRasterizer()
delete m_ds;
}
bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
bool GSRasterizer::IsOneOfMyScanlines(int top) const
{
return m_myscanline[scanline >> THREAD_HEIGHT] != 0;
return m_myscanline[top >> THREAD_HEIGHT] != 0;
}
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
@ -82,6 +82,20 @@ bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
return false;
}
int GSRasterizer::FindMyNextScanline(int top) const
{
int i = top >> THREAD_HEIGHT;
if(m_myscanline[i] == 0)
{
while(m_myscanline[++i] == 0);
top = i << THREAD_HEIGHT;
}
return top;
}
void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
{
Draw(data);
@ -91,10 +105,12 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if(data->count == 0) return;
m_ds->BeginDraw(data->param);
const GSVertexSW* vertices = data->vertices;
const int count = data->count;
const GSVertexSW* vertices_end = data->vertices + data->count;
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
@ -108,21 +124,39 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
switch(data->primclass)
{
case GS_POINT_CLASS:
if(scissor_test) DrawPoint<true>(vertices, count);
else DrawPoint<false>(vertices, count);
if(scissor_test)
{
DrawPoint<true>(vertices, data->count);
}
else
{
DrawPoint<false>(vertices, data->count);
}
break;
case GS_LINE_CLASS:
ASSERT(!(count & 1));
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i]);
do {DrawLine(vertices); vertices += 2;}
while(vertices < vertices_end);
break;
case GS_TRIANGLE_CLASS:
ASSERT(!(count % 3));
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i]);
do {DrawTriangle(vertices); vertices += 3;}
while(vertices < vertices_end);
break;
case GS_SPRITE_CLASS:
ASSERT(!(count & 1));
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i], data->solidrect);
do {DrawSprite(vertices, data->solidrect); vertices += 2;}
while(vertices < vertices_end);
break;
default:
__assume(0);
}
@ -417,36 +451,41 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
GSVector4 scissor = m_fscissor.xzxz();
do
top = FindMyNextScanline(top);
while(top < bottom)
{
if(IsOneOfMyScanlines(top))
GSVector4 dy = GSVector4(top) - p0.yyyy();
GSVertexSW scan;
scan.p = edge.p + dedge.p * dy;
GSVector4 lrf = scan.p.ceil();
GSVector4 l = lrf.max(scissor);
GSVector4 r = lrf.min(scissor);
GSVector4i lr = GSVector4i(l.xxyy(r));
int left = lr.extract32<0>();
int right = lr.extract32<2>();
int pixels = right - left;
if(pixels > 0)
{
GSVector4 dy = GSVector4(top) - p0.yyyy();
scan.t = edge.t + dedge.t * dy;
scan.c = edge.c + dedge.c * dy;
GSVertexSW scan;
AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx());
}
scan.p = edge.p + dedge.p * dy;
top++;
GSVector4 lrf = scan.p.ceil();
GSVector4 l = lrf.max(scissor);
GSVector4 r = lrf.min(scissor);
GSVector4i lr = GSVector4i(l.xxyy(r));
int left = lr.extract32<0>();
int right = lr.extract32<2>();
int pixels = right - left;
if(pixels > 0)
{
scan.t = edge.t + dedge.t * dy;
scan.c = edge.c + dedge.c * dy;
AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx());
}
if(!IsOneOfMyScanlines(top))
{
top += (m_threads - 1) << THREAD_HEIGHT;
}
}
while(++top < bottom);
m_edge.count += e - &m_edge.buff[m_edge.count];
}
@ -782,7 +821,13 @@ GSRasterizerList::~GSRasterizerList()
void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data)
{
Push(data);
// disable dispatcher thread for now and pass-through directly,
// would only be relevant if data->syncpoint was utilized more,
// it would hide the syncing latency from the main gs thread
// Push(data);
Process(data); m_count++;
}
void GSRasterizerList::Sync()

View File

@ -138,8 +138,9 @@ public:
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
virtual ~GSRasterizer();
__forceinline bool IsOneOfMyScanlines(int scanline) const;
__forceinline bool IsOneOfMyScanlines(int top) const;
__forceinline bool IsOneOfMyScanlines(int top, int bottom) const;
__forceinline int FindMyNextScanline(int top) const;
void Draw(shared_ptr<GSRasterizerData> data);

View File

@ -37,8 +37,8 @@ GSRendererSW::GSRendererSW(int threads)
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
memset(m_tex_pages, 0, sizeof(m_tex_pages));
memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
memset(m_tex_pages, 0, sizeof(m_tex_pages));
}
GSRendererSW::~GSRendererSW()
@ -166,7 +166,7 @@ void GSRendererSW::Draw()
{
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
shared_ptr<GSRasterizerData> data(new GSRasterizerData2());
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
@ -197,11 +197,15 @@ void GSRendererSW::Draw()
if(gd->sel.fwrite)
{
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
UseTargetPages(m_context->offset.fb, r);
}
if(gd->sel.zwrite)
{
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
UseTargetPages(m_context->offset.zb, r);
}
if(s_dump)
@ -260,16 +264,6 @@ void GSRendererSW::Draw()
else
{
m_rl->Queue(data);
if(gd->sel.fwrite)
{
InvalidatePages(m_context->offset.fb, r);
}
if(gd->sel.zwrite)
{
InvalidatePages(m_context->offset.zb, r);
}
}
// TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims);
@ -295,43 +289,77 @@ void GSRendererSW::Sync(int reason)
m_rl->Sync();
memset(m_tex_pages, 0, sizeof(m_tex_pages));
memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
memset(m_tex_pages, 0, sizeof(m_tex_pages));
}
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
//printf("ivm %05x %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(o, r);
if(CheckPages(o, r)) // check if the changing pages either used as a texture or a target
// check if the changing pages either used as a texture or a target
list<uint32>* pages = o->GetPages(r);
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
{
Sync(5);
uint32 page = *i;
if(m_fzb_pages[page] | (m_tex_pages[page >> 5] & (1 << (page & 31))))
{
Sync(5);
break;
}
}
}
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
//printf("ilm %05x %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
if(CheckPages(o, r)) // TODO: only checking m_fzb_pages would be enough (read-backs are rare anyway)
list<uint32>* pages = o->GetPages(r);
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
{
Sync(6);
uint32 page = *i;
if(m_fzb_pages[page])
{
Sync(6);
break;
}
}
}
void GSRendererSW::InvalidatePages(const GSTextureCacheSW::Texture* t)
void GSRendererSW::UseTargetPages(GSOffset* o, const GSVector4i& rect)
{
//printf("tex %05x %d %d\n", t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM);
list<uint32>* pages = o->GetPages(rect);
for(size_t i = 0; i < countof(t->m_pages); i++)
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
{
if(m_fzb_pages[i] & t->m_pages[i]) // currently being drawn to? => sync
_InterlockedIncrement(&m_fzb_pages[*i]);
}
}
void GSRendererSW::ReleaseTargetPages(GSOffset* o, const GSVector4i& rect)
{
list<uint32>* pages = o->GetPages(rect);
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
{
_InterlockedDecrement(&m_fzb_pages[*i]);
}
}
void GSRendererSW::UseSourcePages(const GSTextureCacheSW::Texture* t)
{
for(list<uint32>::const_iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
{
if(m_fzb_pages[*i]) // currently being drawn to? => sync
{
//
Sync(7);
@ -340,63 +368,14 @@ void GSRendererSW::InvalidatePages(const GSTextureCacheSW::Texture* t)
return;
}
m_tex_pages[i] |= t->m_pages[i]; // remember which texture pages are used
}
}
void GSRendererSW::InvalidatePages(const GSOffset* o, const GSVector4i& rect)
{
//printf("fzb %05x %d %d\n", o->bp, o->bw, o->psm);
GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[o->psm].pgs : GSLocalMemory::m_psm[o->psm].bs;
GSVector4i r = rect.ralign<Align_Outside>(bs);
for(int y = r.top; y < r.bottom; y += bs.y)
for(size_t i = 0; i < countof(t->m_pages.bm); i++)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
m_fzb_pages[page >> 5] |= 1 << (page & 31);
}
}
m_tex_pages[i] |= t->m_pages.bm[i]; // remember which texture pages are used
}
}
bool GSRendererSW::CheckPages(const GSOffset* o, const GSVector4i& rect)
{
GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[o->psm].pgs : GSLocalMemory::m_psm[o->psm].bs;
GSVector4i r = rect.ralign<Align_Outside>(bs);
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
uint32 mask = 1 << (page & 31);
if((m_tex_pages[page >> 5] | m_fzb_pages[page >> 5]) & mask)
{
return true;
}
}
}
}
return false;
}
#include "GSTextureSW.h"
bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
@ -510,7 +489,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(t == NULL) {ASSERT(0); return false;}
InvalidatePages(t);
UseSourcePages(t);
GSVector4i r;
@ -663,7 +642,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(t == NULL) {ASSERT(0); return false;}
InvalidatePages(t);
UseSourcePages(t);
GSVector4i r;

View File

@ -29,8 +29,15 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
{
class GSRasterizerData2 : public GSRasterizerData
{
GSRendererSW* m_parent;
GSOffset* m_fb;
GSOffset* m_zb;
public:
GSRasterizerData2()
GSRasterizerData2(GSRendererSW* parent)
: m_parent(parent)
, m_fb(parent->m_context->offset.fb)
, m_zb(parent->m_context->offset.zb)
{
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
@ -43,6 +50,18 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
virtual ~GSRasterizerData2()
{
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
GSVector4i r = bbox.rintersect(scissor);
if(gd->sel.fwrite)
{
m_parent->ReleaseTargetPages(m_fb, r);
}
if(gd->sel.zwrite)
{
m_parent->ReleaseTargetPages(m_zb, r);
}
if(gd->clut) _aligned_free(gd->clut);
if(gd->dimx) _aligned_free(gd->dimx);
@ -58,7 +77,7 @@ protected:
uint8* m_output;
bool m_reset;
GSPixelOffset4* m_fzb;
uint32 m_fzb_pages[16];
long m_fzb_pages[512];
uint32 m_tex_pages[16];
void Reset();
@ -71,9 +90,9 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void InvalidatePages(const GSOffset* o, const GSVector4i& rect);
void InvalidatePages(const GSTextureCacheSW::Texture* t);
bool CheckPages(const GSOffset* o, const GSVector4i& rect);
void UseTargetPages(GSOffset* o, const GSVector4i& rect);
void ReleaseTargetPages(GSOffset* o, const GSVector4i& rect);
void UseSourcePages(const GSTextureCacheSW::Texture* t);
bool GetScanlineGlobalData(GSScanlineGlobalData& gd);

View File

@ -289,19 +289,14 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
return dst;
}
void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect, bool target)
void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, bool target)
{
// Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
if (!o) return;
if(!o) return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
uint32 bp = o->bp;
uint32 bw = o->bw;
uint32 psm = o->psm;
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<Align_Outside>(bs);
if(!target)
{
const list<Source*>& m = m_src.m_map[bp >> 5];
@ -319,60 +314,56 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
}
}
GSVector4i r;
list<uint32>* pages = o->GetPages(rect, &r);
bool found = false;
for(int y = r.top; y < r.bottom; y += bs.y)
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
{
uint32 base = o->block.row[y >> 3];
uint32 page = *p;
for(int x = r.left; x < r.right; x += bs.x)
const list<Source*>& m = m_src.m_map[page];
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
list<Source*>::const_iterator j = i++;
if(page < MAX_PAGES)
Source* s = *j;
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
{
const list<Source*>& m = m_src.m_map[page];
bool b = bp == s->m_TEX0.TBP0;
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
if(!s->m_target)
{
list<Source*>::const_iterator j = i++;
Source* s = *j;
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
if(s->m_repeating)
{
bool b = bp == s->m_TEX0.TBP0;
if(!s->m_target)
{
if(s->m_repeating)
{
list<GSVector2i>& l = s->m_p2t[page];
list<GSVector2i>& l = s->m_p2t[page];
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{
s->m_valid[k->x] &= k->y;
}
}
else
{
s->m_valid[page] = 0;
}
s->m_complete = false;
found = b;
}
else
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{
// TODO
if(b)
{
m_src.RemoveAt(s);
}
s->m_valid[k->x] &= k->y;
}
}
else
{
s->m_valid[page] = 0;
}
s->m_complete = false;
found = b;
}
else
{
// TODO
if(b)
{
m_src.RemoveAt(s);
}
}
}
}
@ -425,7 +416,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
}
}
void GSTextureCache::InvalidateLocalMem(const GSOffset* o, const GSVector4i& r)
void GSTextureCache::InvalidateLocalMem(GSOffset* o, const GSVector4i& r)
{
uint32 bp = o->bp;
uint32 psm = o->psm;

View File

@ -143,8 +143,8 @@ public:
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used);
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h);
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(const GSOffset* o, const GSVector4i& r);
void InvalidateVideoMem(GSOffset* o, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(GSOffset* o, const GSVector4i& r);
void IncAge();
bool UserHacks_HalfPixelOffset;

View File

@ -70,78 +70,50 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
if(t == NULL)
{
const GSOffset* o = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
t = new Texture(m_state, o, tw0, TEX0, TEXA);
t = new Texture(m_state, tw0, TEX0, TEXA);
m_textures.insert(t);
for(int i = 0; i < countof(t->m_pages); i++)
for(list<uint32>::iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
{
uint32 p = t->m_pages[i];
if(p != 0)
{
list<Texture*>* m = &m_map[i << 5];
unsigned long j;
while(_BitScanForward(&j, p))
{
p ^= 1 << j;
m[j].push_front(t);
}
}
m_map[*i].push_front(t);
}
}
return t;
}
void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect)
void GSTextureCacheSW::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect)
{
uint32 psm = o->psm;
list<uint32>* pages = o->GetPages(rect);
GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<Align_Outside>(bs);
for(int y = r.top; y < r.bottom; y += bs.y)
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
{
uint32 base = o->block.row[y >> 3];
uint32 page = *p;
for(int x = r.left; x < r.right; x += bs.x)
const list<Texture*>& map = m_map[page];
for(list<Texture*>::const_iterator i = map.begin(); i != map.end(); i++)
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
Texture* t = *i;
if(page < MAX_PAGES)
if(GSUtil::HasSharedBits(o->psm, t->m_TEX0.PSM))
{
const list<Texture*>& map = m_map[page];
for(list<Texture*>::const_iterator i = map.begin(); i != map.end(); i++)
if(t->m_repeating)
{
Texture* t = *i;
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
if(t->m_repeating)
{
list<GSVector2i>& l = t->m_p2t[page];
list<GSVector2i>& l = t->m_p2t[page];
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{
t->m_valid[j->x] &= j->y;
}
}
else
{
t->m_valid[page] = 0;
}
t->m_complete = false;
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
{
t->m_valid[j->x] &= j->y;
}
}
else
{
t->m_valid[page] = 0;
}
t->m_complete = false;
}
}
}
@ -195,9 +167,8 @@ void GSTextureCacheSW::IncAge()
//
GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
: m_state(state)
, m_offset(offset)
, m_buff(NULL)
, m_tw(tw0)
, m_age(0)
@ -208,28 +179,18 @@ GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint3
m_TEXA = TEXA;
memset(m_valid, 0, sizeof(m_valid));
memset(m_pages, 0, sizeof(m_pages));
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
memset(m_pages.bm, 0, sizeof(m_pages.bm));
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
list<uint32>* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
for(int y = 0; y < th; y += bs.y)
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
{
uint32 base = offset->block.row[y >> 3];
uint32 page = *i;
for(int x = 0; x < tw; x += bs.x)
{
uint32 page = (base + offset->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
m_pages[page >> 5] |= 1 << (page & 31);
}
}
m_pages.bm[page >> 5] |= 1 << (page & 31);
m_pages.n.push_back(page);
}
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower

View File

@ -30,7 +30,7 @@ public:
{
public:
GSState* m_state;
const GSOffset* m_offset;
GSOffset* m_offset;
GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA;
void* m_buff;
@ -40,13 +40,13 @@ public:
bool m_repeating;
list<GSVector2i>* m_p2t;
uint32 m_valid[MAX_PAGES];
uint32 m_pages[16];
struct {uint32 bm[16]; list<uint32> n;} m_pages;
// m_valid
// fast mode: each uint32 bits map to the 32 blocks of that page
// repeating mode: 1 bpp image of the texture tiles (8x8), also having 512 elements is just a coincidence (worst case: (1024*1024)/(8*8)/(sizeof(uint32)*8))
Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
virtual ~Texture();
bool Update(const GSVector4i& r);
@ -64,7 +64,7 @@ public:
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r);
void InvalidateVideoMem(GSOffset* o, const GSVector4i& r);
void RemoveAll();
void RemoveAt(Texture* t);