mirror of https://github.com/PCSX2/pcsx2.git
GSdx: bit less idle time by refcouting used texture pages.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5026 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
95b1c260af
commit
eaf06e8b1d
|
@ -28,9 +28,8 @@ public:
|
|||
{
|
||||
Main,
|
||||
Sync,
|
||||
WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15,
|
||||
WorkerSync0, WorkerSync1, WorkerSync2, WorkerSync3, WorkerSync4, WorkerSync5, WorkerSync6, WorkerSync7, WorkerSync8, WorkerSync9, WorkerSync10, WorkerSync11, WorkerSync12, WorkerSync13, WorkerSync14, WorkerSync15,
|
||||
WorkerSleep0, WorkerSleep1, WorkerSleep2, WorkerSleep3, WorkerSleep4, WorkerSleep5, WorkerSleep6, WorkerSleep7, WorkerSleep8, WorkerSleep9, WorkerSleep10, WorkerSleep11, WorkerSleep12, WorkerSleep13, WorkerSleep14, WorkerSleep15,
|
||||
WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7,
|
||||
WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15,
|
||||
TimerLast,
|
||||
};
|
||||
|
||||
|
|
|
@ -68,34 +68,20 @@ void GSRendererSW::Reset()
|
|||
void GSRendererSW::VSync(int field)
|
||||
{
|
||||
Sync(0); // IncAge might delete a cached texture in use
|
||||
|
||||
/*
|
||||
printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n",
|
||||
int draw[8], sum = 0;
|
||||
|
||||
for(int i = 0; i < countof(draw); i++)
|
||||
{
|
||||
draw[i] = m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i);
|
||||
sum += draw[i];
|
||||
}
|
||||
|
||||
printf("CPU %d Sync %d W %d %d %d %d %d %d %d %d (%d)\n",
|
||||
m_perfmon.CPU(GSPerfMon::Main),
|
||||
m_perfmon.CPU(GSPerfMon::Sync),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSync0),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSleep0),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerDraw0),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSync1),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSleep1),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerDraw1),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSync2),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSleep2),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerDraw2),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSync3),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSleep3),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerDraw3),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSync4),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSleep4),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerDraw4),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSync5),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSleep5),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerDraw5),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSync6),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSleep6),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerDraw6),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSync7),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerSleep7),
|
||||
m_perfmon.CPU(GSPerfMon::WorkerDraw7));
|
||||
draw[0], draw[1], draw[2], draw[3], draw[4], draw[5], draw[6], draw[7], sum);
|
||||
|
||||
//
|
||||
printf("m_sync_count = %d\n", ((GSRasterizerList*)m_rl)->m_sync_count); ((GSRasterizerList*)m_rl)->m_sync_count = 0;
|
||||
|
@ -176,17 +162,17 @@ void GSRendererSW::Draw()
|
|||
list<uint32>* fb_pages = m_context->offset.fb->GetPages(r);
|
||||
list<uint32>* zb_pages = m_context->offset.zb->GetPages(r);
|
||||
|
||||
GSRasterizerData2* data2 = new GSRasterizerData2(this, fb_pages, zb_pages);
|
||||
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
|
||||
|
||||
shared_ptr<GSRasterizerData> data(data2);
|
||||
GSRasterizerData2* data2 = (GSRasterizerData2*)data.get();
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
|
||||
|
||||
if(!GetScanlineGlobalData(*gd))
|
||||
if(!GetScanlineGlobalData(data2))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
|
||||
|
||||
data->scissor = scissor;
|
||||
data->bbox = bbox;
|
||||
data->primclass = m_vt.m_primclass;
|
||||
|
@ -254,7 +240,7 @@ void GSRendererSW::Draw()
|
|||
|
||||
//
|
||||
|
||||
data2->UseTargetPages();
|
||||
data2->UseTargetPages(fb_pages, zb_pages);
|
||||
|
||||
//
|
||||
|
||||
|
@ -347,10 +333,6 @@ void GSRendererSW::Sync(int reason)
|
|||
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
|
||||
|
||||
m_rl->Sync();
|
||||
|
||||
// NOTE: m_fzb_pages is refcounted, zeroing is done automatically
|
||||
|
||||
memset(m_tex_pages, 0, sizeof(m_tex_pages));
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
|
@ -366,8 +348,10 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
uint32 page = *i;
|
||||
|
||||
//while(m_fzb_pages[page] | m_tex_pages[page]) _mm_pause();
|
||||
|
||||
if(m_fzb_pages[page] | (m_tex_pages[page >> 5] & (1 << (page & 31))))
|
||||
if(m_fzb_pages[page] | m_tex_pages[page])
|
||||
{
|
||||
Sync(5);
|
||||
|
||||
|
@ -384,6 +368,8 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
//while(m_fzb_pages[*i]) _mm_pause();
|
||||
|
||||
if(m_fzb_pages[*i])
|
||||
{
|
||||
Sync(6);
|
||||
|
@ -393,49 +379,68 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::UseTargetPages(const list<uint32>* pages, int offset)
|
||||
void GSRendererSW::UsePages(const list<uint32>* pages, int type)
|
||||
{
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
if(type < 2)
|
||||
{
|
||||
ASSERT(((short*)&m_fzb_pages[*i])[offset] < SHRT_MAX);
|
||||
|
||||
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + offset);
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::ReleaseTargetPages(const list<uint32>* pages, int offset)
|
||||
{
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
ASSERT(((short*)&m_fzb_pages[*i])[offset] > 0);
|
||||
|
||||
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + offset);
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::UseSourcePages(const GSTextureCacheSW::Texture* t)
|
||||
{
|
||||
for(list<uint32>::const_iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
|
||||
{
|
||||
if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
Sync(7);
|
||||
ASSERT(((short*)&m_fzb_pages[*i])[type] < SHRT_MAX);
|
||||
|
||||
return;
|
||||
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + type);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < countof(t->m_pages.bm); i++)
|
||||
else
|
||||
{
|
||||
m_tex_pages[i] |= t->m_pages.bm[i]; // remember which texture pages are used
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
//while(m_fzb_pages[*i]) _mm_pause();
|
||||
|
||||
if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
|
||||
{
|
||||
Sync(7);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
ASSERT(m_tex_pages[*i] < SHRT_MAX);
|
||||
|
||||
_InterlockedIncrement16((short*)&m_tex_pages[*i]); // remember which texture pages are used
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::ReleasePages(const list<uint32>* pages, int type)
|
||||
{
|
||||
if(type < 2)
|
||||
{
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
ASSERT(((short*)&m_fzb_pages[*i])[type] > 0);
|
||||
|
||||
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + type);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
ASSERT(m_tex_pages[*i] > 0);
|
||||
|
||||
_InterlockedDecrement16((short*)&m_tex_pages[*i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#include "GSTextureSW.h"
|
||||
|
||||
bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
||||
bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
||||
{
|
||||
GSScanlineGlobalData& gd = *(GSScanlineGlobalData*)data2->param;
|
||||
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
|
||||
|
@ -545,7 +550,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
|
||||
UseSourcePages(t);
|
||||
data2->UseSourcePages(t, 0);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
|
@ -698,7 +703,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
|
||||
UseSourcePages(t);
|
||||
data2->UseSourcePages(t, i);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
|
@ -1096,3 +1101,88 @@ if(!m_dump)
|
|||
// Flush();
|
||||
}
|
||||
}
|
||||
|
||||
// GSRendererSW::GSRasterizerData2
|
||||
|
||||
GSRendererSW::GSRasterizerData2::GSRasterizerData2(GSRendererSW* parent)
|
||||
: m_parent(parent)
|
||||
, m_fb_pages(NULL)
|
||||
, m_zb_pages(NULL)
|
||||
, m_using_pages(false)
|
||||
{
|
||||
memset(m_tex_pages, 0, sizeof(m_tex_pages));
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
|
||||
|
||||
gd->sel.key = 0;
|
||||
|
||||
gd->clut = NULL;
|
||||
gd->dimx = NULL;
|
||||
|
||||
param = gd;
|
||||
}
|
||||
|
||||
GSRendererSW::GSRasterizerData2::~GSRasterizerData2()
|
||||
{
|
||||
if(m_using_pages)
|
||||
{
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_parent->ReleasePages(m_fb_pages, 0);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_parent->ReleasePages(m_zb_pages, 1);
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < countof(m_tex_pages) && m_tex_pages[i] != NULL; i++)
|
||||
{
|
||||
m_parent->ReleasePages(m_tex_pages[i], 2);
|
||||
}
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->clut) _aligned_free(gd->clut);
|
||||
if(gd->dimx) _aligned_free(gd->dimx);
|
||||
|
||||
_aligned_free(gd);
|
||||
|
||||
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
|
||||
}
|
||||
|
||||
void GSRendererSW::GSRasterizerData2::UseTargetPages(const list<uint32>* fb_pages, const list<uint32>* zb_pages)
|
||||
{
|
||||
if(m_using_pages) return;
|
||||
|
||||
m_fb_pages = fb_pages;
|
||||
m_zb_pages = zb_pages;
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_parent->UsePages(fb_pages, 0);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_parent->UsePages(zb_pages, 1);
|
||||
}
|
||||
|
||||
m_using_pages = true;
|
||||
}
|
||||
|
||||
void GSRendererSW::GSRasterizerData2::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
|
||||
{
|
||||
ASSERT(m_tex_pages[level] == NULL);
|
||||
|
||||
const list<uint32>* pages = t->m_pages.n;
|
||||
|
||||
m_tex_pages[level] = pages;
|
||||
|
||||
m_parent->UsePages(pages, 2);
|
||||
}
|
||||
|
|
|
@ -32,76 +32,15 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
|
|||
GSRendererSW* m_parent;
|
||||
const list<uint32>* m_fb_pages;
|
||||
const list<uint32>* m_zb_pages;
|
||||
const list<uint32>* m_tex_pages[7];
|
||||
bool m_using_pages;
|
||||
|
||||
public:
|
||||
GSRasterizerData2(GSRendererSW* parent, const list<uint32>* fb_pages, const list<uint32>* zb_pages)
|
||||
: m_parent(parent)
|
||||
, m_fb_pages(fb_pages)
|
||||
, m_zb_pages(zb_pages)
|
||||
, m_using_pages(false)
|
||||
{
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
|
||||
GSRasterizerData2(GSRendererSW* parent);
|
||||
virtual ~GSRasterizerData2();
|
||||
|
||||
gd->sel.key = 0;
|
||||
|
||||
gd->clut = NULL;
|
||||
gd->dimx = NULL;
|
||||
|
||||
param = gd;
|
||||
}
|
||||
|
||||
virtual ~GSRasterizerData2()
|
||||
{
|
||||
ReleaseTargetPages();
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->clut) _aligned_free(gd->clut);
|
||||
if(gd->dimx) _aligned_free(gd->dimx);
|
||||
|
||||
_aligned_free(gd);
|
||||
|
||||
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
|
||||
}
|
||||
|
||||
void UseTargetPages()
|
||||
{
|
||||
if(m_using_pages) {ASSERT(0); return;}
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_parent->UseTargetPages(m_fb_pages, 0);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_parent->UseTargetPages(m_zb_pages, 1);
|
||||
}
|
||||
|
||||
m_using_pages = true;
|
||||
}
|
||||
|
||||
void ReleaseTargetPages()
|
||||
{
|
||||
if(!m_using_pages) {ASSERT(0); return;}
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_parent->ReleaseTargetPages(m_fb_pages, 0);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_parent->ReleaseTargetPages(m_zb_pages, 1);
|
||||
}
|
||||
|
||||
m_using_pages = false;
|
||||
}
|
||||
void UseTargetPages(const list<uint32>* fb_pages, const list<uint32>* zb_pages);
|
||||
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
|
||||
};
|
||||
|
||||
protected:
|
||||
|
@ -112,7 +51,7 @@ protected:
|
|||
bool m_reset;
|
||||
GSPixelOffset4* m_fzb;
|
||||
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
|
||||
uint32 m_tex_pages[16];
|
||||
uint16 m_tex_pages[512];
|
||||
|
||||
void Reset();
|
||||
void VSync(int field);
|
||||
|
@ -124,11 +63,10 @@ protected:
|
|||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
||||
|
||||
void UseTargetPages(const list<uint32>* pages, int offset);
|
||||
void ReleaseTargetPages(const list<uint32>* pages, int offset);
|
||||
void UseSourcePages(const GSTextureCacheSW::Texture* t);
|
||||
void UsePages(const list<uint32>* pages, int type);
|
||||
void ReleasePages(const list<uint32>* pages, int type);
|
||||
|
||||
bool GetScanlineGlobalData(GSScanlineGlobalData& gd);
|
||||
bool GetScanlineGlobalData(GSRasterizerData2* data2);
|
||||
|
||||
public:
|
||||
GSRendererSW(int threads);
|
||||
|
|
|
@ -74,7 +74,7 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
|
|||
|
||||
m_textures.insert(t);
|
||||
|
||||
for(list<uint32>::iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++)
|
||||
for(list<uint32>::const_iterator i = t->m_pages.n->begin(); i != t->m_pages.n->end(); i++)
|
||||
{
|
||||
m_map[*i].push_front(t);
|
||||
}
|
||||
|
@ -181,14 +181,13 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
|||
|
||||
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
|
||||
list<uint32>* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(list<uint32>::const_iterator i = m_pages.n->begin(); i != m_pages.n->end(); i++)
|
||||
{
|
||||
uint32 page = *i;
|
||||
|
||||
m_pages.bm[page >> 5] |= 1 << (page & 31);
|
||||
m_pages.n.push_back(page);
|
||||
}
|
||||
|
||||
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
|
||||
|
|
|
@ -40,7 +40,7 @@ public:
|
|||
bool m_repeating;
|
||||
list<GSVector2i>* m_p2t;
|
||||
uint32 m_valid[MAX_PAGES];
|
||||
struct {uint32 bm[16]; list<uint32> n;} m_pages;
|
||||
struct {uint32 bm[16]; const list<uint32>* n;} m_pages;
|
||||
|
||||
// m_valid
|
||||
// fast mode: each uint32 bits map to the 32 blocks of that page
|
||||
|
|
Loading…
Reference in New Issue