mirror of https://github.com/PCSX2/pcsx2.git
GSdx: changes of r5007 did not help as much as I thought, disabled it for the time being, plus other minor optimizations
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5010 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
da41bcd756
commit
963a6a653a
|
@ -466,30 +466,7 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
|||
return i->second;
|
||||
}
|
||||
|
||||
GSOffset* o = (GSOffset*)_aligned_malloc(sizeof(GSOffset), 32);
|
||||
|
||||
o->hash = hash;
|
||||
|
||||
pixelAddress bn = m_psm[psm].bn;
|
||||
|
||||
for(int i = 0; i < 256; i++)
|
||||
{
|
||||
o->block.row[i] = (short)bn(0, i << 3, bp, bw);
|
||||
}
|
||||
|
||||
o->block.col = m_psm[psm].blockOffset;
|
||||
|
||||
pixelAddress pa = m_psm[psm].pa;
|
||||
|
||||
for(int i = 0; i < 4096; i++)
|
||||
{
|
||||
o->pixel.row[i] = (int)pa(0, i & 0x7ff, bp, bw);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++)
|
||||
{
|
||||
o->pixel.col[i] = m_psm[psm].rowOffset[i];
|
||||
}
|
||||
GSOffset* o = new GSOffset(bp, bw, psm);
|
||||
|
||||
m_omap[hash] = o;
|
||||
|
||||
|
@ -1986,3 +1963,102 @@ void GSLocalMemory::SaveBMP(const string& fn, uint32 bp, uint32 bw, uint32 psm,
|
|||
|
||||
_aligned_free(bits);
|
||||
}
|
||||
|
||||
// GSOffset
|
||||
|
||||
GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
|
||||
{
|
||||
hash = _bp | (_bw << 14) | (_psm << 20);
|
||||
|
||||
GSLocalMemory::pixelAddress bn = GSLocalMemory::m_psm[_psm].bn;
|
||||
|
||||
for(int i = 0; i < 256; i++)
|
||||
{
|
||||
block.row[i] = (short)bn(0, i << 3, _bp, _bw);
|
||||
}
|
||||
|
||||
block.col = GSLocalMemory::m_psm[_psm].blockOffset;
|
||||
|
||||
GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[_psm].pa;
|
||||
|
||||
for(int i = 0; i < 4096; i++)
|
||||
{
|
||||
pixel.row[i] = (int)pa(0, i & 0x7ff, _bp, _bw);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++)
|
||||
{
|
||||
pixel.col[i] = GSLocalMemory::m_psm[_psm].rowOffset[i];
|
||||
}
|
||||
}
|
||||
|
||||
GSOffset::~GSOffset()
|
||||
{
|
||||
for(hash_map<uint64, list<uint32>*>::iterator i = m_cache.begin(); i != m_cache.end(); i++)
|
||||
{
|
||||
delete i->second;
|
||||
}
|
||||
}
|
||||
|
||||
list<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
|
||||
{
|
||||
GSAutoLock lock(&m_lock);
|
||||
|
||||
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
||||
|
||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
if(bbox != NULL) *bbox = r;
|
||||
|
||||
uint64 r_hash;
|
||||
|
||||
GSVector4i::storel(&r_hash, r.sra32(3).ps32()); // max 19-bit coordinates, should not be a problem (can shift right by 3 because it is mod8, smallest block size)
|
||||
|
||||
hash_map<uint64, list<uint32>*>::iterator i = m_cache.find(r_hash);
|
||||
|
||||
if(i != m_cache.end())
|
||||
{
|
||||
return i->second;
|
||||
}
|
||||
|
||||
uint32 tmp[16];
|
||||
|
||||
memset(tmp, 0, sizeof(tmp));
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = block.row[y >> 3];
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 n = (base + block.col[x >> 3]) >> 5;
|
||||
|
||||
if(n < MAX_PAGES)
|
||||
{
|
||||
tmp[n >> 5] |= 1 << (n & 31);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
list<uint32>* l = new list<uint32>();
|
||||
|
||||
for(int i = 0; i < countof(tmp); i++)
|
||||
{
|
||||
uint32 p = tmp[i];
|
||||
|
||||
if(p == 0) continue;
|
||||
|
||||
unsigned long j;
|
||||
|
||||
while(_BitScanForward(&j, p))
|
||||
{
|
||||
p ^= 1 << j;
|
||||
|
||||
l->push_back((i << 5) + j);
|
||||
}
|
||||
}
|
||||
|
||||
m_cache[r_hash] = l;
|
||||
|
||||
return l;
|
||||
}
|
||||
|
|
|
@ -26,22 +26,36 @@
|
|||
#include "GSVector.h"
|
||||
#include "GSBlock.h"
|
||||
#include "GSClut.h"
|
||||
#include "GSThread.h"
|
||||
|
||||
struct GSOffset
|
||||
class GSOffset : public GSAlignedClass<32>
|
||||
{
|
||||
struct
|
||||
GSCritSec m_lock; // GetPages could be called from multiple threads
|
||||
|
||||
hash_map<uint64, list<uint32>*> m_cache;
|
||||
|
||||
public:
|
||||
__aligned(struct, 32) Block
|
||||
{
|
||||
short row[256]; // yn (n = 0 8 16 ...)
|
||||
short* col; // blockOffset*
|
||||
} block;
|
||||
|
||||
struct
|
||||
};
|
||||
|
||||
__aligned(struct, 32) Pixel
|
||||
{
|
||||
int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing)
|
||||
int* col[8]; // rowOffset*
|
||||
} pixel;
|
||||
};
|
||||
|
||||
union {uint32 hash; struct {uint32 bp:14, bw:6, psm:6;};};
|
||||
|
||||
Block block;
|
||||
Pixel pixel;
|
||||
|
||||
GSOffset(uint32 bp, uint32 bw, uint32 psm);
|
||||
virtual ~GSOffset();
|
||||
|
||||
list<uint32>* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
|
||||
};
|
||||
|
||||
struct GSPixelOffset4
|
||||
|
|
|
@ -61,9 +61,9 @@ GSRasterizer::~GSRasterizer()
|
|||
delete m_ds;
|
||||
}
|
||||
|
||||
bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
||||
bool GSRasterizer::IsOneOfMyScanlines(int top) const
|
||||
{
|
||||
return m_myscanline[scanline >> THREAD_HEIGHT] != 0;
|
||||
return m_myscanline[top >> THREAD_HEIGHT] != 0;
|
||||
}
|
||||
|
||||
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
|
||||
|
@ -82,6 +82,20 @@ bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
|
|||
return false;
|
||||
}
|
||||
|
||||
int GSRasterizer::FindMyNextScanline(int top) const
|
||||
{
|
||||
int i = top >> THREAD_HEIGHT;
|
||||
|
||||
if(m_myscanline[i] == 0)
|
||||
{
|
||||
while(m_myscanline[++i] == 0);
|
||||
|
||||
top = i << THREAD_HEIGHT;
|
||||
}
|
||||
|
||||
return top;
|
||||
}
|
||||
|
||||
void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
|
||||
{
|
||||
Draw(data);
|
||||
|
@ -91,10 +105,12 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
|
|||
{
|
||||
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
|
||||
|
||||
if(data->count == 0) return;
|
||||
|
||||
m_ds->BeginDraw(data->param);
|
||||
|
||||
const GSVertexSW* vertices = data->vertices;
|
||||
const int count = data->count;
|
||||
const GSVertexSW* vertices_end = data->vertices + data->count;
|
||||
|
||||
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
|
||||
|
||||
|
@ -108,21 +124,39 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
|
|||
switch(data->primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
if(scissor_test) DrawPoint<true>(vertices, count);
|
||||
else DrawPoint<false>(vertices, count);
|
||||
|
||||
if(scissor_test)
|
||||
{
|
||||
DrawPoint<true>(vertices, data->count);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawPoint<false>(vertices, data->count);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case GS_LINE_CLASS:
|
||||
ASSERT(!(count & 1));
|
||||
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i]);
|
||||
|
||||
do {DrawLine(vertices); vertices += 2;}
|
||||
while(vertices < vertices_end);
|
||||
|
||||
break;
|
||||
|
||||
case GS_TRIANGLE_CLASS:
|
||||
ASSERT(!(count % 3));
|
||||
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i]);
|
||||
|
||||
do {DrawTriangle(vertices); vertices += 3;}
|
||||
while(vertices < vertices_end);
|
||||
|
||||
break;
|
||||
|
||||
case GS_SPRITE_CLASS:
|
||||
ASSERT(!(count & 1));
|
||||
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i], data->solidrect);
|
||||
|
||||
do {DrawSprite(vertices, data->solidrect); vertices += 2;}
|
||||
while(vertices < vertices_end);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
@ -417,36 +451,41 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
|||
|
||||
GSVector4 scissor = m_fscissor.xzxz();
|
||||
|
||||
do
|
||||
top = FindMyNextScanline(top);
|
||||
|
||||
while(top < bottom)
|
||||
{
|
||||
if(IsOneOfMyScanlines(top))
|
||||
GSVector4 dy = GSVector4(top) - p0.yyyy();
|
||||
|
||||
GSVertexSW scan;
|
||||
|
||||
scan.p = edge.p + dedge.p * dy;
|
||||
|
||||
GSVector4 lrf = scan.p.ceil();
|
||||
GSVector4 l = lrf.max(scissor);
|
||||
GSVector4 r = lrf.min(scissor);
|
||||
GSVector4i lr = GSVector4i(l.xxyy(r));
|
||||
|
||||
int left = lr.extract32<0>();
|
||||
int right = lr.extract32<2>();
|
||||
|
||||
int pixels = right - left;
|
||||
|
||||
if(pixels > 0)
|
||||
{
|
||||
GSVector4 dy = GSVector4(top) - p0.yyyy();
|
||||
scan.t = edge.t + dedge.t * dy;
|
||||
scan.c = edge.c + dedge.c * dy;
|
||||
|
||||
GSVertexSW scan;
|
||||
AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx());
|
||||
}
|
||||
|
||||
scan.p = edge.p + dedge.p * dy;
|
||||
top++;
|
||||
|
||||
GSVector4 lrf = scan.p.ceil();
|
||||
GSVector4 l = lrf.max(scissor);
|
||||
GSVector4 r = lrf.min(scissor);
|
||||
GSVector4i lr = GSVector4i(l.xxyy(r));
|
||||
|
||||
int left = lr.extract32<0>();
|
||||
int right = lr.extract32<2>();
|
||||
|
||||
int pixels = right - left;
|
||||
|
||||
if(pixels > 0)
|
||||
{
|
||||
scan.t = edge.t + dedge.t * dy;
|
||||
scan.c = edge.c + dedge.c * dy;
|
||||
|
||||
AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx());
|
||||
}
|
||||
if(!IsOneOfMyScanlines(top))
|
||||
{
|
||||
top += (m_threads - 1) << THREAD_HEIGHT;
|
||||
}
|
||||
}
|
||||
while(++top < bottom);
|
||||
|
||||
m_edge.count += e - &m_edge.buff[m_edge.count];
|
||||
}
|
||||
|
@ -782,7 +821,13 @@ GSRasterizerList::~GSRasterizerList()
|
|||
|
||||
void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data)
|
||||
{
|
||||
Push(data);
|
||||
// disable dispatcher thread for now and pass-through directly,
|
||||
// would only be relevant if data->syncpoint was utilized more,
|
||||
// it would hide the syncing latency from the main gs thread
|
||||
|
||||
// Push(data);
|
||||
|
||||
Process(data); m_count++;
|
||||
}
|
||||
|
||||
void GSRasterizerList::Sync()
|
||||
|
|
|
@ -138,8 +138,9 @@ public:
|
|||
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
|
||||
virtual ~GSRasterizer();
|
||||
|
||||
__forceinline bool IsOneOfMyScanlines(int scanline) const;
|
||||
__forceinline bool IsOneOfMyScanlines(int top) const;
|
||||
__forceinline bool IsOneOfMyScanlines(int top, int bottom) const;
|
||||
__forceinline int FindMyNextScanline(int top) const;
|
||||
|
||||
void Draw(shared_ptr<GSRasterizerData> data);
|
||||
|
||||
|
|
|
@ -37,8 +37,8 @@ GSRendererSW::GSRendererSW(int threads)
|
|||
|
||||
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
|
||||
|
||||
memset(m_tex_pages, 0, sizeof(m_tex_pages));
|
||||
memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
|
||||
memset(m_tex_pages, 0, sizeof(m_tex_pages));
|
||||
}
|
||||
|
||||
GSRendererSW::~GSRendererSW()
|
||||
|
@ -166,7 +166,7 @@ void GSRendererSW::Draw()
|
|||
{
|
||||
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
|
||||
|
||||
shared_ptr<GSRasterizerData> data(new GSRasterizerData2());
|
||||
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
|
||||
|
||||
|
@ -197,11 +197,15 @@ void GSRendererSW::Draw()
|
|||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
|
||||
|
||||
UseTargetPages(m_context->offset.fb, r);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
|
||||
|
||||
UseTargetPages(m_context->offset.zb, r);
|
||||
}
|
||||
|
||||
if(s_dump)
|
||||
|
@ -260,16 +264,6 @@ void GSRendererSW::Draw()
|
|||
else
|
||||
{
|
||||
m_rl->Queue(data);
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
InvalidatePages(m_context->offset.fb, r);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
InvalidatePages(m_context->offset.zb, r);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
|
@ -295,43 +289,77 @@ void GSRendererSW::Sync(int reason)
|
|||
|
||||
m_rl->Sync();
|
||||
|
||||
memset(m_tex_pages, 0, sizeof(m_tex_pages));
|
||||
memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
|
||||
memset(m_tex_pages, 0, sizeof(m_tex_pages));
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
{
|
||||
//printf("ivm %05x %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
m_tc->InvalidateVideoMem(o, r);
|
||||
|
||||
if(CheckPages(o, r)) // check if the changing pages either used as a texture or a target
|
||||
// check if the changing pages either used as a texture or a target
|
||||
|
||||
list<uint32>* pages = o->GetPages(r);
|
||||
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
Sync(5);
|
||||
uint32 page = *i;
|
||||
|
||||
if(m_fzb_pages[page] | (m_tex_pages[page >> 5] & (1 << (page & 31))))
|
||||
{
|
||||
Sync(5);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
{
|
||||
//printf("ilm %05x %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
||||
|
||||
if(CheckPages(o, r)) // TODO: only checking m_fzb_pages would be enough (read-backs are rare anyway)
|
||||
list<uint32>* pages = o->GetPages(r);
|
||||
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
Sync(6);
|
||||
uint32 page = *i;
|
||||
|
||||
if(m_fzb_pages[page])
|
||||
{
|
||||
Sync(6);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidatePages(const GSTextureCacheSW::Texture* t)
|
||||
void GSRendererSW::UseTargetPages(GSOffset* o, const GSVector4i& rect)
|
||||
{
|
||||
//printf("tex %05x %d %d\n", t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM);
|
||||
list<uint32>* pages = o->GetPages(rect);
|
||||
|
||||
for(size_t i = 0; i < countof(t->m_pages); i++)
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
if(m_fzb_pages[i] & t->m_pages[i]) // currently being drawn to? => sync
|
||||
_InterlockedIncrement(&m_fzb_pages[*i]);
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::ReleaseTargetPages(GSOffset* o, const GSVector4i& rect)
|
||||
{
|
||||
list<uint32>* pages = o->GetPages(rect);
|
||||
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
_InterlockedDecrement(&m_fzb_pages[*i]);
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::UseSourcePages(const GSTextureCacheSW::Texture* t)
|
||||
{
|
||||
for(list<uint32>::const_iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
|
||||
{
|
||||
if(m_fzb_pages[*i]) // currently being drawn to? => sync
|
||||
{
|
||||
//
|
||||
Sync(7);
|
||||
|
@ -340,63 +368,14 @@ void GSRendererSW::InvalidatePages(const GSTextureCacheSW::Texture* t)
|
|||
return;
|
||||
}
|
||||
|
||||
m_tex_pages[i] |= t->m_pages[i]; // remember which texture pages are used
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidatePages(const GSOffset* o, const GSVector4i& rect)
|
||||
{
|
||||
//printf("fzb %05x %d %d\n", o->bp, o->bw, o->psm);
|
||||
|
||||
GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[o->psm].pgs : GSLocalMemory::m_psm[o->psm].bs;
|
||||
|
||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
for(size_t i = 0; i < countof(t->m_pages.bm); i++)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 page = (base + o->block.col[x >> 3]) >> 5;
|
||||
|
||||
if(page < MAX_PAGES)
|
||||
{
|
||||
m_fzb_pages[page >> 5] |= 1 << (page & 31);
|
||||
}
|
||||
}
|
||||
m_tex_pages[i] |= t->m_pages.bm[i]; // remember which texture pages are used
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRendererSW::CheckPages(const GSOffset* o, const GSVector4i& rect)
|
||||
{
|
||||
GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[o->psm].pgs : GSLocalMemory::m_psm[o->psm].bs;
|
||||
|
||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 page = (base + o->block.col[x >> 3]) >> 5;
|
||||
|
||||
if(page < MAX_PAGES)
|
||||
{
|
||||
uint32 mask = 1 << (page & 31);
|
||||
|
||||
if((m_tex_pages[page >> 5] | m_fzb_pages[page >> 5]) & mask)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#include "GSTextureSW.h"
|
||||
|
||||
bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
||||
|
@ -510,7 +489,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
|
||||
InvalidatePages(t);
|
||||
UseSourcePages(t);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
|
@ -663,7 +642,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
|
||||
InvalidatePages(t);
|
||||
UseSourcePages(t);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
|
|
|
@ -29,8 +29,15 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
|
|||
{
|
||||
class GSRasterizerData2 : public GSRasterizerData
|
||||
{
|
||||
GSRendererSW* m_parent;
|
||||
GSOffset* m_fb;
|
||||
GSOffset* m_zb;
|
||||
|
||||
public:
|
||||
GSRasterizerData2()
|
||||
GSRasterizerData2(GSRendererSW* parent)
|
||||
: m_parent(parent)
|
||||
, m_fb(parent->m_context->offset.fb)
|
||||
, m_zb(parent->m_context->offset.zb)
|
||||
{
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
|
||||
|
||||
|
@ -43,6 +50,18 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
|
|||
virtual ~GSRasterizerData2()
|
||||
{
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_parent->ReleaseTargetPages(m_fb, r);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_parent->ReleaseTargetPages(m_zb, r);
|
||||
}
|
||||
|
||||
if(gd->clut) _aligned_free(gd->clut);
|
||||
if(gd->dimx) _aligned_free(gd->dimx);
|
||||
|
@ -58,7 +77,7 @@ protected:
|
|||
uint8* m_output;
|
||||
bool m_reset;
|
||||
GSPixelOffset4* m_fzb;
|
||||
uint32 m_fzb_pages[16];
|
||||
long m_fzb_pages[512];
|
||||
uint32 m_tex_pages[16];
|
||||
|
||||
void Reset();
|
||||
|
@ -71,9 +90,9 @@ protected:
|
|||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
||||
|
||||
void InvalidatePages(const GSOffset* o, const GSVector4i& rect);
|
||||
void InvalidatePages(const GSTextureCacheSW::Texture* t);
|
||||
bool CheckPages(const GSOffset* o, const GSVector4i& rect);
|
||||
void UseTargetPages(GSOffset* o, const GSVector4i& rect);
|
||||
void ReleaseTargetPages(GSOffset* o, const GSVector4i& rect);
|
||||
void UseSourcePages(const GSTextureCacheSW::Texture* t);
|
||||
|
||||
bool GetScanlineGlobalData(GSScanlineGlobalData& gd);
|
||||
|
||||
|
|
|
@ -289,19 +289,14 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
|
|||
return dst;
|
||||
}
|
||||
|
||||
void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect, bool target)
|
||||
void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, bool target)
|
||||
{
|
||||
// Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
|
||||
if (!o) return;
|
||||
if(!o) return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
|
||||
|
||||
uint32 bp = o->bp;
|
||||
uint32 bw = o->bw;
|
||||
uint32 psm = o->psm;
|
||||
|
||||
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
||||
|
||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
if(!target)
|
||||
{
|
||||
const list<Source*>& m = m_src.m_map[bp >> 5];
|
||||
|
@ -319,60 +314,56 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
|
|||
}
|
||||
}
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
list<uint32>* pages = o->GetPages(rect, &r);
|
||||
|
||||
bool found = false;
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
uint32 page = *p;
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
const list<Source*>& m = m_src.m_map[page];
|
||||
|
||||
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
|
||||
{
|
||||
uint32 page = (base + o->block.col[x >> 3]) >> 5;
|
||||
list<Source*>::const_iterator j = i++;
|
||||
|
||||
if(page < MAX_PAGES)
|
||||
Source* s = *j;
|
||||
|
||||
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
|
||||
{
|
||||
const list<Source*>& m = m_src.m_map[page];
|
||||
bool b = bp == s->m_TEX0.TBP0;
|
||||
|
||||
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
|
||||
if(!s->m_target)
|
||||
{
|
||||
list<Source*>::const_iterator j = i++;
|
||||
|
||||
Source* s = *j;
|
||||
|
||||
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
|
||||
if(s->m_repeating)
|
||||
{
|
||||
bool b = bp == s->m_TEX0.TBP0;
|
||||
|
||||
if(!s->m_target)
|
||||
{
|
||||
if(s->m_repeating)
|
||||
{
|
||||
list<GSVector2i>& l = s->m_p2t[page];
|
||||
list<GSVector2i>& l = s->m_p2t[page];
|
||||
|
||||
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
|
||||
{
|
||||
s->m_valid[k->x] &= k->y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
s->m_valid[page] = 0;
|
||||
}
|
||||
|
||||
s->m_complete = false;
|
||||
|
||||
found = b;
|
||||
}
|
||||
else
|
||||
for(list<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
|
||||
{
|
||||
// TODO
|
||||
|
||||
if(b)
|
||||
{
|
||||
m_src.RemoveAt(s);
|
||||
}
|
||||
s->m_valid[k->x] &= k->y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
s->m_valid[page] = 0;
|
||||
}
|
||||
|
||||
s->m_complete = false;
|
||||
|
||||
found = b;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO
|
||||
|
||||
if(b)
|
||||
{
|
||||
m_src.RemoveAt(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -425,7 +416,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
|
|||
}
|
||||
}
|
||||
|
||||
void GSTextureCache::InvalidateLocalMem(const GSOffset* o, const GSVector4i& r)
|
||||
void GSTextureCache::InvalidateLocalMem(GSOffset* o, const GSVector4i& r)
|
||||
{
|
||||
uint32 bp = o->bp;
|
||||
uint32 psm = o->psm;
|
||||
|
|
|
@ -143,8 +143,8 @@ public:
|
|||
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used);
|
||||
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h);
|
||||
|
||||
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r, bool target = true);
|
||||
void InvalidateLocalMem(const GSOffset* o, const GSVector4i& r);
|
||||
void InvalidateVideoMem(GSOffset* o, const GSVector4i& r, bool target = true);
|
||||
void InvalidateLocalMem(GSOffset* o, const GSVector4i& r);
|
||||
|
||||
void IncAge();
|
||||
bool UserHacks_HalfPixelOffset;
|
||||
|
|
|
@ -70,78 +70,50 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
|
|||
|
||||
if(t == NULL)
|
||||
{
|
||||
const GSOffset* o = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
|
||||
t = new Texture(m_state, o, tw0, TEX0, TEXA);
|
||||
t = new Texture(m_state, tw0, TEX0, TEXA);
|
||||
|
||||
m_textures.insert(t);
|
||||
|
||||
for(int i = 0; i < countof(t->m_pages); i++)
|
||||
for(list<uint32>::iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
|
||||
{
|
||||
uint32 p = t->m_pages[i];
|
||||
|
||||
if(p != 0)
|
||||
{
|
||||
list<Texture*>* m = &m_map[i << 5];
|
||||
|
||||
unsigned long j;
|
||||
|
||||
while(_BitScanForward(&j, p))
|
||||
{
|
||||
p ^= 1 << j;
|
||||
|
||||
m[j].push_front(t);
|
||||
}
|
||||
}
|
||||
m_map[*i].push_front(t);
|
||||
}
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect)
|
||||
void GSTextureCacheSW::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect)
|
||||
{
|
||||
uint32 psm = o->psm;
|
||||
list<uint32>* pages = o->GetPages(rect);
|
||||
|
||||
GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
||||
|
||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
uint32 page = *p;
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
const list<Texture*>& map = m_map[page];
|
||||
|
||||
for(list<Texture*>::const_iterator i = map.begin(); i != map.end(); i++)
|
||||
{
|
||||
uint32 page = (base + o->block.col[x >> 3]) >> 5;
|
||||
Texture* t = *i;
|
||||
|
||||
if(page < MAX_PAGES)
|
||||
if(GSUtil::HasSharedBits(o->psm, t->m_TEX0.PSM))
|
||||
{
|
||||
const list<Texture*>& map = m_map[page];
|
||||
|
||||
for(list<Texture*>::const_iterator i = map.begin(); i != map.end(); i++)
|
||||
if(t->m_repeating)
|
||||
{
|
||||
Texture* t = *i;
|
||||
|
||||
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
|
||||
{
|
||||
if(t->m_repeating)
|
||||
{
|
||||
list<GSVector2i>& l = t->m_p2t[page];
|
||||
list<GSVector2i>& l = t->m_p2t[page];
|
||||
|
||||
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
|
||||
{
|
||||
t->m_valid[j->x] &= j->y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
t->m_valid[page] = 0;
|
||||
}
|
||||
|
||||
t->m_complete = false;
|
||||
for(list<GSVector2i>::iterator j = l.begin(); j != l.end(); j++)
|
||||
{
|
||||
t->m_valid[j->x] &= j->y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
t->m_valid[page] = 0;
|
||||
}
|
||||
|
||||
t->m_complete = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -195,9 +167,8 @@ void GSTextureCacheSW::IncAge()
|
|||
|
||||
//
|
||||
|
||||
GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||
GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||
: m_state(state)
|
||||
, m_offset(offset)
|
||||
, m_buff(NULL)
|
||||
, m_tw(tw0)
|
||||
, m_age(0)
|
||||
|
@ -208,28 +179,18 @@ GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint3
|
|||
m_TEXA = TEXA;
|
||||
|
||||
memset(m_valid, 0, sizeof(m_valid));
|
||||
memset(m_pages, 0, sizeof(m_pages));
|
||||
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
memset(m_pages.bm, 0, sizeof(m_pages.bm));
|
||||
|
||||
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
|
||||
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
|
||||
int tw = 1 << TEX0.TW;
|
||||
int th = 1 << TEX0.TH;
|
||||
list<uint32>* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||
|
||||
for(int y = 0; y < th; y += bs.y)
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
uint32 base = offset->block.row[y >> 3];
|
||||
uint32 page = *i;
|
||||
|
||||
for(int x = 0; x < tw; x += bs.x)
|
||||
{
|
||||
uint32 page = (base + offset->block.col[x >> 3]) >> 5;
|
||||
|
||||
if(page < MAX_PAGES)
|
||||
{
|
||||
m_pages[page >> 5] |= 1 << (page & 31);
|
||||
}
|
||||
}
|
||||
m_pages.bm[page >> 5] |= 1 << (page & 31);
|
||||
m_pages.n.push_back(page);
|
||||
}
|
||||
|
||||
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
|
||||
|
|
|
@ -30,7 +30,7 @@ public:
|
|||
{
|
||||
public:
|
||||
GSState* m_state;
|
||||
const GSOffset* m_offset;
|
||||
GSOffset* m_offset;
|
||||
GIFRegTEX0 m_TEX0;
|
||||
GIFRegTEXA m_TEXA;
|
||||
void* m_buff;
|
||||
|
@ -40,13 +40,13 @@ public:
|
|||
bool m_repeating;
|
||||
list<GSVector2i>* m_p2t;
|
||||
uint32 m_valid[MAX_PAGES];
|
||||
uint32 m_pages[16];
|
||||
struct {uint32 bm[16]; list<uint32> n;} m_pages;
|
||||
|
||||
// m_valid
|
||||
// fast mode: each uint32 bits map to the 32 blocks of that page
|
||||
// repeating mode: 1 bpp image of the texture tiles (8x8), also having 512 elements is just a coincidence (worst case: (1024*1024)/(8*8)/(sizeof(uint32)*8))
|
||||
|
||||
Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
||||
Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
||||
virtual ~Texture();
|
||||
|
||||
bool Update(const GSVector4i& r);
|
||||
|
@ -64,7 +64,7 @@ public:
|
|||
|
||||
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
|
||||
|
||||
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r);
|
||||
void InvalidateVideoMem(GSOffset* o, const GSVector4i& r);
|
||||
|
||||
void RemoveAll();
|
||||
void RemoveAt(Texture* t);
|
||||
|
|
Loading…
Reference in New Issue