From eaf06e8b1d5f02e716e4efdd6d3439e326de85a5 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Wed, 28 Dec 2011 14:10:20 +0000 Subject: [PATCH] GSdx: bit less idle time by refcouting used texture pages. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5026 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSPerfMon.h | 5 +- plugins/GSdx/GSRendererSW.cpp | 226 +++++++++++++++++++++--------- plugins/GSdx/GSRendererSW.h | 80 ++--------- plugins/GSdx/GSTextureCacheSW.cpp | 7 +- plugins/GSdx/GSTextureCacheSW.h | 2 +- 5 files changed, 173 insertions(+), 147 deletions(-) diff --git a/plugins/GSdx/GSPerfMon.h b/plugins/GSdx/GSPerfMon.h index 53483ccc82..907af994bf 100644 --- a/plugins/GSdx/GSPerfMon.h +++ b/plugins/GSdx/GSPerfMon.h @@ -28,9 +28,8 @@ public: { Main, Sync, - WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15, - WorkerSync0, WorkerSync1, WorkerSync2, WorkerSync3, WorkerSync4, WorkerSync5, WorkerSync6, WorkerSync7, WorkerSync8, WorkerSync9, WorkerSync10, WorkerSync11, WorkerSync12, WorkerSync13, WorkerSync14, WorkerSync15, - WorkerSleep0, WorkerSleep1, WorkerSleep2, WorkerSleep3, WorkerSleep4, WorkerSleep5, WorkerSleep6, WorkerSleep7, WorkerSleep8, WorkerSleep9, WorkerSleep10, WorkerSleep11, WorkerSleep12, WorkerSleep13, WorkerSleep14, WorkerSleep15, + WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, + WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15, TimerLast, }; diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 86c72c5452..1a50ae0c90 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -68,34 +68,20 @@ void GSRendererSW::Reset() void GSRendererSW::VSync(int field) { Sync(0); // IncAge might delete a cached texture in use + /* - printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n", + int draw[8], sum = 0; + + for(int i = 0; i < countof(draw); i++) + { + draw[i] = m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); + sum += draw[i]; + } + + printf("CPU %d Sync %d W %d %d %d %d %d %d %d %d (%d)\n", m_perfmon.CPU(GSPerfMon::Main), m_perfmon.CPU(GSPerfMon::Sync), - m_perfmon.CPU(GSPerfMon::WorkerSync0), - m_perfmon.CPU(GSPerfMon::WorkerSleep0), - m_perfmon.CPU(GSPerfMon::WorkerDraw0), - m_perfmon.CPU(GSPerfMon::WorkerSync1), - m_perfmon.CPU(GSPerfMon::WorkerSleep1), - m_perfmon.CPU(GSPerfMon::WorkerDraw1), - m_perfmon.CPU(GSPerfMon::WorkerSync2), - m_perfmon.CPU(GSPerfMon::WorkerSleep2), - m_perfmon.CPU(GSPerfMon::WorkerDraw2), - m_perfmon.CPU(GSPerfMon::WorkerSync3), - m_perfmon.CPU(GSPerfMon::WorkerSleep3), - m_perfmon.CPU(GSPerfMon::WorkerDraw3), - m_perfmon.CPU(GSPerfMon::WorkerSync4), - m_perfmon.CPU(GSPerfMon::WorkerSleep4), - m_perfmon.CPU(GSPerfMon::WorkerDraw4), - m_perfmon.CPU(GSPerfMon::WorkerSync5), - m_perfmon.CPU(GSPerfMon::WorkerSleep5), - m_perfmon.CPU(GSPerfMon::WorkerDraw5), - m_perfmon.CPU(GSPerfMon::WorkerSync6), - m_perfmon.CPU(GSPerfMon::WorkerSleep6), - m_perfmon.CPU(GSPerfMon::WorkerDraw6), - m_perfmon.CPU(GSPerfMon::WorkerSync7), - m_perfmon.CPU(GSPerfMon::WorkerSleep7), - m_perfmon.CPU(GSPerfMon::WorkerDraw7)); + draw[0], draw[1], draw[2], draw[3], draw[4], draw[5], draw[6], draw[7], sum); // printf("m_sync_count = %d\n", ((GSRasterizerList*)m_rl)->m_sync_count); ((GSRasterizerList*)m_rl)->m_sync_count = 0; @@ -176,17 +162,17 @@ void GSRendererSW::Draw() list* fb_pages = m_context->offset.fb->GetPages(r); list* zb_pages = m_context->offset.zb->GetPages(r); - GSRasterizerData2* data2 = new GSRasterizerData2(this, fb_pages, zb_pages); + shared_ptr data(new GSRasterizerData2(this)); - shared_ptr data(data2); + GSRasterizerData2* data2 = (GSRasterizerData2*)data.get(); - GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param; - - if(!GetScanlineGlobalData(*gd)) + if(!GetScanlineGlobalData(data2)) { return; } + GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param; + data->scissor = scissor; data->bbox = bbox; data->primclass = m_vt.m_primclass; @@ -254,7 +240,7 @@ void GSRendererSW::Draw() // - data2->UseTargetPages(); + data2->UseTargetPages(fb_pages, zb_pages); // @@ -347,10 +333,6 @@ void GSRendererSW::Sync(int reason) GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync); m_rl->Sync(); - - // NOTE: m_fzb_pages is refcounted, zeroing is done automatically - - memset(m_tex_pages, 0, sizeof(m_tex_pages)); } void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) @@ -366,8 +348,10 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS for(list::const_iterator i = pages->begin(); i != pages->end(); i++) { uint32 page = *i; + + //while(m_fzb_pages[page] | m_tex_pages[page]) _mm_pause(); - if(m_fzb_pages[page] | (m_tex_pages[page >> 5] & (1 << (page & 31)))) + if(m_fzb_pages[page] | m_tex_pages[page]) { Sync(5); @@ -384,6 +368,8 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS for(list::const_iterator i = pages->begin(); i != pages->end(); i++) { + //while(m_fzb_pages[*i]) _mm_pause(); + if(m_fzb_pages[*i]) { Sync(6); @@ -393,49 +379,68 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS } } -void GSRendererSW::UseTargetPages(const list* pages, int offset) +void GSRendererSW::UsePages(const list* pages, int type) { - for(list::const_iterator i = pages->begin(); i != pages->end(); i++) + if(type < 2) { - ASSERT(((short*)&m_fzb_pages[*i])[offset] < SHRT_MAX); - - _InterlockedIncrement16((short*)&m_fzb_pages[*i] + offset); - } -} - -void GSRendererSW::ReleaseTargetPages(const list* pages, int offset) -{ - for(list::const_iterator i = pages->begin(); i != pages->end(); i++) - { - ASSERT(((short*)&m_fzb_pages[*i])[offset] > 0); - - _InterlockedDecrement16((short*)&m_fzb_pages[*i] + offset); - } -} - -void GSRendererSW::UseSourcePages(const GSTextureCacheSW::Texture* t) -{ - for(list::const_iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++) - { - if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D) + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) { - Sync(7); + ASSERT(((short*)&m_fzb_pages[*i])[type] < SHRT_MAX); - return; + _InterlockedIncrement16((short*)&m_fzb_pages[*i] + type); } - } - - for(size_t i = 0; i < countof(t->m_pages.bm); i++) + else { - m_tex_pages[i] |= t->m_pages.bm[i]; // remember which texture pages are used + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) + { + //while(m_fzb_pages[*i]) _mm_pause(); + + if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D) + { + Sync(7); + + break; + } + } + + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) + { + ASSERT(m_tex_pages[*i] < SHRT_MAX); + + _InterlockedIncrement16((short*)&m_tex_pages[*i]); // remember which texture pages are used + } + } +} + +void GSRendererSW::ReleasePages(const list* pages, int type) +{ + if(type < 2) + { + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) + { + ASSERT(((short*)&m_fzb_pages[*i])[type] > 0); + + _InterlockedDecrement16((short*)&m_fzb_pages[*i] + type); + } + } + else + { + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) + { + ASSERT(m_tex_pages[*i] > 0); + + _InterlockedDecrement16((short*)&m_tex_pages[*i]); + } } } #include "GSTextureSW.h" -bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) +bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) { + GSScanlineGlobalData& gd = *(GSScanlineGlobalData*)data2->param; + const GSDrawingEnvironment& env = m_env; const GSDrawingContext* context = m_context; const GS_PRIM_CLASS primclass = m_vt.m_primclass; @@ -545,7 +550,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) if(t == NULL) {ASSERT(0); return false;} - UseSourcePages(t); + data2->UseSourcePages(t, 0); GSVector4i r; @@ -698,7 +703,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) if(t == NULL) {ASSERT(0); return false;} - UseSourcePages(t); + data2->UseSourcePages(t, i); GSVector4i r; @@ -1096,3 +1101,88 @@ if(!m_dump) // Flush(); } } + +// GSRendererSW::GSRasterizerData2 + +GSRendererSW::GSRasterizerData2::GSRasterizerData2(GSRendererSW* parent) + : m_parent(parent) + , m_fb_pages(NULL) + , m_zb_pages(NULL) + , m_using_pages(false) +{ + memset(m_tex_pages, 0, sizeof(m_tex_pages)); + + GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32); + + gd->sel.key = 0; + + gd->clut = NULL; + gd->dimx = NULL; + + param = gd; +} + +GSRendererSW::GSRasterizerData2::~GSRasterizerData2() +{ + if(m_using_pages) + { + GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param; + + if(gd->sel.fwrite) + { + m_parent->ReleasePages(m_fb_pages, 0); + } + + if(gd->sel.zwrite) + { + m_parent->ReleasePages(m_zb_pages, 1); + } + } + + for(size_t i = 0; i < countof(m_tex_pages) && m_tex_pages[i] != NULL; i++) + { + m_parent->ReleasePages(m_tex_pages[i], 2); + } + + GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param; + + if(gd->clut) _aligned_free(gd->clut); + if(gd->dimx) _aligned_free(gd->dimx); + + _aligned_free(gd); + + m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels); +} + +void GSRendererSW::GSRasterizerData2::UseTargetPages(const list* fb_pages, const list* zb_pages) +{ + if(m_using_pages) return; + + m_fb_pages = fb_pages; + m_zb_pages = zb_pages; + + GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param; + + if(gd->sel.fwrite) + { + m_parent->UsePages(fb_pages, 0); + } + + if(gd->sel.zwrite) + { + m_parent->UsePages(zb_pages, 1); + } + + m_using_pages = true; +} + +void GSRendererSW::GSRasterizerData2::UseSourcePages(GSTextureCacheSW::Texture* t, int level) +{ + ASSERT(m_tex_pages[level] == NULL); + + const list* pages = t->m_pages.n; + + m_tex_pages[level] = pages; + + m_parent->UsePages(pages, 2); +} diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index abb2267b21..54d1236941 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -32,76 +32,15 @@ class GSRendererSW : public GSRendererT GSRendererSW* m_parent; const list* m_fb_pages; const list* m_zb_pages; + const list* m_tex_pages[7]; bool m_using_pages; public: - GSRasterizerData2(GSRendererSW* parent, const list* fb_pages, const list* zb_pages) - : m_parent(parent) - , m_fb_pages(fb_pages) - , m_zb_pages(zb_pages) - , m_using_pages(false) - { - GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32); + GSRasterizerData2(GSRendererSW* parent); + virtual ~GSRasterizerData2(); - gd->sel.key = 0; - - gd->clut = NULL; - gd->dimx = NULL; - - param = gd; - } - - virtual ~GSRasterizerData2() - { - ReleaseTargetPages(); - - GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param; - - if(gd->clut) _aligned_free(gd->clut); - if(gd->dimx) _aligned_free(gd->dimx); - - _aligned_free(gd); - - m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels); - } - - void UseTargetPages() - { - if(m_using_pages) {ASSERT(0); return;} - - GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param; - - if(gd->sel.fwrite) - { - m_parent->UseTargetPages(m_fb_pages, 0); - } - - if(gd->sel.zwrite) - { - m_parent->UseTargetPages(m_zb_pages, 1); - } - - m_using_pages = true; - } - - void ReleaseTargetPages() - { - if(!m_using_pages) {ASSERT(0); return;} - - GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param; - - if(gd->sel.fwrite) - { - m_parent->ReleaseTargetPages(m_fb_pages, 0); - } - - if(gd->sel.zwrite) - { - m_parent->ReleaseTargetPages(m_zb_pages, 1); - } - - m_using_pages = false; - } + void UseTargetPages(const list* fb_pages, const list* zb_pages); + void UseSourcePages(GSTextureCacheSW::Texture* t, int level); }; protected: @@ -112,7 +51,7 @@ protected: bool m_reset; GSPixelOffset4* m_fzb; uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved - uint32 m_tex_pages[16]; + uint16 m_tex_pages[512]; void Reset(); void VSync(int field); @@ -124,11 +63,10 @@ protected: void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); - void UseTargetPages(const list* pages, int offset); - void ReleaseTargetPages(const list* pages, int offset); - void UseSourcePages(const GSTextureCacheSW::Texture* t); + void UsePages(const list* pages, int type); + void ReleasePages(const list* pages, int type); - bool GetScanlineGlobalData(GSScanlineGlobalData& gd); + bool GetScanlineGlobalData(GSRasterizerData2* data2); public: GSRendererSW(int threads); diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index 2f45a9bcde..93d026fadf 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -74,7 +74,7 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons m_textures.insert(t); - for(list::iterator i = t->m_pages.n.begin(); i != t->m_pages.n.end(); i++) + for(list::const_iterator i = t->m_pages.n->begin(); i != t->m_pages.n->end(); i++) { m_map[*i].push_front(t); } @@ -181,14 +181,13 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - list* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); + m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); - for(list::const_iterator i = pages->begin(); i != pages->end(); i++) + for(list::const_iterator i = m_pages.n->begin(); i != m_pages.n->end(); i++) { uint32 page = *i; m_pages.bm[page >> 5] |= 1 << (page & 31); - m_pages.n.push_back(page); } m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower diff --git a/plugins/GSdx/GSTextureCacheSW.h b/plugins/GSdx/GSTextureCacheSW.h index f65bcf42ba..0ac12dcbec 100644 --- a/plugins/GSdx/GSTextureCacheSW.h +++ b/plugins/GSdx/GSTextureCacheSW.h @@ -40,7 +40,7 @@ public: bool m_repeating; list* m_p2t; uint32 m_valid[MAX_PAGES]; - struct {uint32 bm[16]; list n;} m_pages; + struct {uint32 bm[16]; const list* n;} m_pages; // m_valid // fast mode: each uint32 bits map to the 32 blocks of that page