From d5dbe7e7e9e09d8f0df2822cb4d9f3842bd6ed0b Mon Sep 17 00:00:00 2001 From: gabest11 Date: Thu, 22 Dec 2011 14:36:54 +0000 Subject: [PATCH] GSdx: Moved filling up rendering threads on a new thread, to not block the main, it looks like now I can replace one of the spin loops with an event. Using events results in about -5% fps, but still pretty fast. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5007 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSPerfMon.cpp | 2 +- plugins/GSdx/GSRasterizer.cpp | 267 +++++++---------------------- plugins/GSdx/GSRasterizer.h | 94 +++------- plugins/GSdx/GSRendererSW.cpp | 38 ++--- plugins/GSdx/GSRendererSW.h | 2 +- plugins/GSdx/GSThread.h | 311 ++++++++++++++++++++++------------ 6 files changed, 317 insertions(+), 397 deletions(-) diff --git a/plugins/GSdx/GSPerfMon.cpp b/plugins/GSdx/GSPerfMon.cpp index 6c5ec05238..e3fb71e42f 100644 --- a/plugins/GSdx/GSPerfMon.cpp +++ b/plugins/GSdx/GSPerfMon.cpp @@ -90,7 +90,7 @@ void GSPerfMon::Stop(int timer) int GSPerfMon::CPU(int timer, bool reset) { - int percent = m_total[timer] / 1000; // (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer])); + int percent = (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer])); if(reset) { diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 605c32812e..17db90c649 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -766,183 +766,15 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo // -GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon) - : GSRasterizer(ds, id, threads, perfmon) - , m_exit(false) - , m_break(true) -{ - CreateThread(); -} - -GSRasterizerMT::~GSRasterizerMT() -{ - m_break = true; - - m_exit = true; - - m_draw.Set(); - - CloseThread(); -} - -void GSRasterizerMT::Queue(shared_ptr data) -{ - GSAutoLock l(&m_lock); - - m_queue.push(data); - - if(m_break) - { - m_break = false; - - m_draw.Set(); - } -} - -void GSRasterizerMT::Sync() -{ - while(!m_queue.empty()) _mm_pause(); - - m_break = true; -} - -void GSRasterizerMT::ThreadProc() -{ - while(m_draw.Wait() && !m_exit) - { - // once we are running it is better to spin, jobs can be smaller than the cost of waking up every time - - while(!m_break) - { - if(!m_queue.empty()) - { - while(!m_queue.empty()) - { - shared_ptr data; - - { - GSAutoLock l(&m_lock); - - data = m_queue.front(); - } - - Draw(data); - - { - GSAutoLock l(&m_lock); - - m_queue.pop(); - } - } - } - else - { - _mm_pause(); - } - } - } -} - -#ifdef _WINDOWS - -GSRasterizerMT2::GSRasterizerMT2(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon) - : GSRasterizer(ds, id, threads, perfmon) -{ - InitializeSRWLock(&m_lock); - InitializeConditionVariable(&m_notempty); - InitializeConditionVariable(&m_empty); - - CreateThread(); -} - -GSRasterizerMT2::~GSRasterizerMT2() -{ - m_queue.push(shared_ptr()); - - WakeConditionVariable(&m_notempty); - - CloseThread(); -} - -void GSRasterizerMT2::Queue(shared_ptr data) -{ - AcquireSRWLockExclusive(&m_lock); - - m_queue.push(data); - - ReleaseSRWLockExclusive(&m_lock); - - WakeConditionVariable(&m_notempty); -} - -void GSRasterizerMT2::Sync() -{ - AcquireSRWLockExclusive(&m_lock); - - while(!m_queue.empty()) - { - // TODO: instead of just waiting for the workers, help finishing their queues! - // TODO: to do that, queues needs to be merged and id'ed, and threads must switch m_myscanline on the fly - - GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerSync0 + m_id); - - SleepConditionVariableSRW(&m_empty, &m_lock, INFINITE, 0); - } - - ReleaseSRWLockExclusive(&m_lock); -} - -void GSRasterizerMT2::ThreadProc() -{ - AcquireSRWLockExclusive(&m_lock); - - while(true) - { - while(m_queue.empty()) - { - GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerSleep0 + m_id); - - SleepConditionVariableSRW(&m_notempty, &m_lock, INFINITE, 0); - } - - shared_ptr data; - - data = m_queue.front(); - - ReleaseSRWLockExclusive(&m_lock); - - if(data == NULL) - { - break; - } - - Draw(data); - - AcquireSRWLockExclusive(&m_lock); - - m_queue.pop(); - - if(m_queue.empty()) - { - WakeConditionVariable(&m_empty); - } - } -} - -#endif - -// - GSRasterizerList::GSRasterizerList() - : m_sync_count(0) - , m_count(0) - , m_dispatched(0) + : GSJobQueue >() + , m_sync_count(0) { } GSRasterizerList::~GSRasterizerList() { - for(vector::iterator i = begin(); i != end(); i++) + for(vector::iterator i = m_workers.begin(); i != m_workers.end(); i++) { delete *i; } @@ -950,46 +782,77 @@ GSRasterizerList::~GSRasterizerList() void GSRasterizerList::Queue(shared_ptr data) { - if(size() > 1 && data->solidrect) // TODO: clip to thread area and dispatch? - { - Sync(); // complete previous drawings - - front()->Draw(data); - - return; - } - - GSVector4i bbox = data->bbox.rintersect(data->scissor); - - for(int i = 0; i < size(); i++) - { - GSRasterizer* r = (*this)[i]; - - if(r->IsOneOfMyScanlines(bbox.top, bbox.bottom)) - { - r->Queue(data); - - m_dispatched++; - } - } - - m_count++; + Push(data); } void GSRasterizerList::Sync() { - if(m_count > 0) + if(GetCount() == 0) return; + + Wait(); // first dispatch all items to workers + + for(size_t i = 0; i < m_workers.size(); i++) { - for(int i = 0; i < size(); i++) + m_workers[i]->Wait(); // then wait all workers to finish their jobs + } + + m_sync_count++; +} + +void GSRasterizerList::Process(shared_ptr& item) +{ + if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch? + { + for(size_t i = 0; i < m_workers.size(); i++) { - (*this)[i]->Sync(); + m_workers[i]->Wait(); } - m_sync_count++; + m_workers.front()->Process(item); - //printf("%d %d%%\n", m_count, 100 * m_dispatched / (m_count * size())); + return; + } - m_count = 0; - m_dispatched = 0; + if(item->syncpoint) + { + for(size_t i = 0; i < m_workers.size(); i++) + { + m_workers[i]->Wait(); + } + } + + for(size_t i = 0; i < m_workers.size(); i++) + { + m_workers[i]->Push(item); } } + +// GSRasterizerList::GSWorker + +GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r) + : GSJobQueue >() + , m_r(r) +{ +} + +GSRasterizerList::GSWorker::~GSWorker() +{ + Wait(); + + delete m_r; +} + +void GSRasterizerList::GSWorker::Push(const shared_ptr& item) +{ + GSVector4i r = item->bbox.rintersect(item->scissor); + + if(m_r->IsOneOfMyScanlines(r.top, r.bottom)) + { + GSJobQueue >::Push(item); + } +} + +void GSRasterizerList::GSWorker::Process(shared_ptr& item) +{ + m_r->Draw(item); +} diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index ebbbac20bc..0b17faddb8 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -26,6 +26,7 @@ #include "GSFunctionMap.h" #include "GSThread.h" #include "GSAlignedClass.h" +#include "GSPerfMon.h" __aligned(class, 32) GSRasterizerData : public GSAlignedClass<32> { @@ -36,6 +37,7 @@ public: GSVertexSW* vertices; int count; bool solidrect; + bool syncpoint; uint64 frame; void* param; @@ -43,6 +45,7 @@ public: : vertices(NULL) , count(0) , solidrect(false) + , syncpoint(false) , param(NULL) { } @@ -52,7 +55,7 @@ public: if(vertices != NULL) _aligned_free(vertices); // derived class should free param and its members - } + } }; class IDrawScanline : public GSAlignedClass<32> @@ -103,8 +106,6 @@ public: virtual void Sync() = 0; }; -#include "GSPerfMon.h" - __aligned(class, 32) GSRasterizer : public IRasterizer { protected: @@ -148,59 +149,33 @@ public: void Sync() {} }; -class GSRasterizerMT : public GSRasterizer, private GSThread +class GSRasterizerList + : public IRasterizer + , private GSJobQueue > { protected: - volatile bool m_exit; - volatile bool m_break; - GSCritSec m_lock; - GSEvent m_draw; - queue > m_queue; + class GSWorker : public GSJobQueue > + { + GSRasterizer* m_r; - void ThreadProc(); + public: + GSWorker(GSRasterizer* r); + virtual ~GSWorker(); -public: - GSRasterizerMT(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon); - virtual ~GSRasterizerMT(); + // GSJobQueue - // IRasterizer + void Push(const shared_ptr& item); + void Process(shared_ptr& item); + }; - void Queue(shared_ptr data); - void Sync(); -}; - -#ifdef _WINDOWS - -class GSRasterizerMT2 : public GSRasterizer, private GSThread -{ -protected: - SRWLOCK m_lock; - CONDITION_VARIABLE m_notempty; - CONDITION_VARIABLE m_empty; - queue > m_queue; - - void ThreadProc(); - -public: - GSRasterizerMT2(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon); - virtual ~GSRasterizerMT2(); - - // IRasterizer - - void Queue(shared_ptr data); - void Sync(); -}; - -#endif - -class GSRasterizerList : public IRasterizer, protected vector -{ -protected: - int m_count; - int m_dispatched; + vector m_workers; GSRasterizerList(); + // GSJobQueue + + void Process(shared_ptr& item); + public: virtual ~GSRasterizerList(); @@ -216,38 +191,19 @@ public: { GSRasterizerList* rl = new GSRasterizerList(); - #ifdef _WINDOWS - - OSVERSIONINFOEX version; - memset(&version, 0, sizeof(version)); - version.dwOSVersionInfoSize = sizeof(version); - GetVersionEx((OSVERSIONINFO*)&version); - - if(version.dwMajorVersion >= 6) - { - for(int i = 0; i < threads; i++) - { - rl->push_back(new GSRasterizerMT2(new DS(), i, threads, perfmon)); - } - - return rl; - } - - #endif - for(int i = 0; i < threads; i++) { - rl->push_back(new GSRasterizerMT(new DS(), i, threads, perfmon)); + rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon))); } return rl; } } + int m_sync_count; + // IRasterizer void Queue(shared_ptr data); void Sync(); - - int m_sync_count; }; diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 24f85bb1ff..3e97671b77 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -67,7 +67,7 @@ void GSRendererSW::Reset() void GSRendererSW::VSync(int field) { - Sync(); // IncAge might delete a cached texture in use + Sync(0); // IncAge might delete a cached texture in use /* printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n", m_perfmon.CPU(GSPerfMon::Main), @@ -127,7 +127,7 @@ void GSRendererSW::ResetDevice() GSTexture* GSRendererSW::GetOutput(int i) { - Sync(); + Sync(1); const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB; @@ -166,17 +166,6 @@ void GSRendererSW::Draw() { if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass); - if(m_fzb != m_context->offset.fzb) - { - // rasterizers must write the same outputs at the same time, this makes sure each thread has its own private surface area - - // TODO: detect if frame/zbuf overlap eachother (?) - - m_fzb = m_context->offset.fzb; - - Sync(); - } - shared_ptr data(new GSRasterizerData2()); GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param; @@ -196,6 +185,13 @@ void GSRendererSW::Draw() data->solidrect = gd->sel.IsSolidRect(); data->frame = m_perfmon.GetFrame(); + if(m_fzb != m_context->offset.fzb) + { + m_fzb = m_context->offset.fzb; + + data->syncpoint = true; + } + GSVector4i r = data->bbox.rintersect(data->scissor); if(gd->sel.fwrite) @@ -210,7 +206,7 @@ void GSRendererSW::Draw() if(s_dump) { - Sync(); + Sync(3); uint64 frame = m_perfmon.GetFrame(); @@ -243,7 +239,7 @@ void GSRendererSW::Draw() m_rl->Queue(data); - Sync(); + Sync(4); if(s_save && s_n >= s_saven) { @@ -291,9 +287,9 @@ void GSRendererSW::Draw() */ } -void GSRendererSW::Sync() +void GSRendererSW::Sync(int reason) { - //printf("sync\n"); + //printf("sync %d\n", reason); GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync); @@ -313,7 +309,7 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS if(CheckPages(o, r)) // check if the changing pages either used as a texture or a target { - Sync(); + Sync(5); } } @@ -325,7 +321,7 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS if(CheckPages(o, r)) // TODO: only checking m_fzb_pages would be enough (read-backs are rare anyway) { - Sync(); + Sync(6); } } @@ -337,8 +333,10 @@ void GSRendererSW::InvalidatePages(const GSTextureCacheSW::Texture* t) { if(m_fzb_pages[i] & t->m_pages[i]) // currently being drawn to? => sync { - Sync(); + // + Sync(7); + // return; } diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 4e0986be34..54876a44c2 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -67,7 +67,7 @@ protected: GSTexture* GetOutput(int i); void Draw(); - void Sync(); + void Sync(int reason); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); diff --git a/plugins/GSdx/GSThread.h b/plugins/GSdx/GSThread.h index 23b5ef156e..8aba366969 100644 --- a/plugins/GSdx/GSThread.h +++ b/plugins/GSdx/GSThread.h @@ -68,110 +68,6 @@ public: bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;} }; -// TODO: pthreads version (needs manual-reset event) - -template< - class T, - class ENQUEUE_EVENT = GSEvent, - class DEQUEUE_EVENT = GSEvent> -class GSQueue : public GSCritSec -{ - std::list m_queue; - HANDLE m_put; - HANDLE m_get; - ENQUEUE_EVENT m_enqueue; - DEQUEUE_EVENT m_dequeue; - long m_count; - -public: - GSQueue(long count) - : m_enqueue(true) - , m_dequeue(true) - , m_count(count) - { - m_put = CreateSemaphore(NULL, count, count, NULL); - m_get = CreateSemaphore(NULL, 0, count, NULL); - - m_dequeue.Set(); - } - - virtual ~GSQueue() - { - CloseHandle(m_put); - CloseHandle(m_get); - } - - size_t GetCount() const - { - // GSAutoLock cAutoLock(this); - - return m_queue.size(); - } - - size_t GetMaxCount() const - { - // GSAutoLock cAutoLock(this); - - return (size_t)m_count; - } - - ENQUEUE_EVENT& GetEnqueueEvent() - { - return m_enqueue; - } - - DEQUEUE_EVENT& GetDequeueEvent() - { - return m_dequeue; - } - - void Enqueue(T item) - { - WaitForSingleObject(m_put, INFINITE); - - { - GSAutoLock cAutoLock(this); - - m_queue.push_back(item); - - m_enqueue.Set(); - m_dequeue.Reset(); - } - - ReleaseSemaphore(m_get, 1, NULL); - } - - T Dequeue() - { - T item; - - WaitForSingleObject(m_get, INFINITE); - - { - GSAutoLock cAutoLock(this); - - item = m_queue.front(); - - m_queue.pop_front(); - - if(m_queue.empty()) - { - m_enqueue.Reset(); - m_dequeue.Set(); - } - } - - ReleaseSemaphore(m_put, 1, NULL); - - return item; - } - - T Peek() // lock on "this" - { - return m_queue.front(); - } -}; - #else #include @@ -263,3 +159,210 @@ public: return true; } }; + +template class GSJobQueue : private GSThread +{ +protected: + int m_count; + queue m_queue; + volatile bool m_exit; + struct {GSCritSec lock; GSEvent notempty, empty;} m_ev; + #ifdef _WINDOWS + struct {SRWLOCK lock; CONDITION_VARIABLE notempty, empty; bool available;} m_cv; + #endif + + void ThreadProc() + { + #ifdef _WINDOWS + + if(m_cv.available) + { + AcquireSRWLockExclusive(&m_cv.lock); + + while(true) + { + while(m_queue.empty()) + { + SleepConditionVariableSRW(&m_cv.notempty, &m_cv.lock, INFINITE, 0); + + if(m_exit) {ReleaseSRWLockExclusive(&m_cv.lock); return;} + } + + { + T item = m_queue.front(); + + ReleaseSRWLockExclusive(&m_cv.lock); + + Process(item); + + AcquireSRWLockExclusive(&m_cv.lock); + } + + m_queue.pop(); + + if(m_queue.empty()) + { + WakeConditionVariable(&m_cv.empty); + } + } + } + else + { + + #endif + + while(m_ev.notempty.Wait()) + { + if(m_exit) break; + + while(!m_queue.empty()) + { + T item; + + { + GSAutoLock l(&m_ev.lock); + + item = m_queue.front(); + } + + Process(item); + + { + GSAutoLock l(&m_ev.lock); + + m_queue.pop(); + } + } + } + + #ifdef _WINDOWS + + } + + #endif + } + +public: + GSJobQueue() + : m_count(0) + , m_exit(false) + { + m_cv.available = false; + + #ifdef _WINDOWS + + OSVERSIONINFOEX version; + memset(&version, 0, sizeof(version)); + version.dwOSVersionInfoSize = sizeof(version); + GetVersionEx((OSVERSIONINFO*)&version); + + if(version.dwMajorVersion >= 6) + { + InitializeSRWLock(&m_cv.lock); + InitializeConditionVariable(&m_cv.notempty); + InitializeConditionVariable(&m_cv.empty); + + m_cv.available = true; + } + + #endif + + CreateThread(); + } + + virtual ~GSJobQueue() + { + m_exit = true; + + #ifdef _WINDOWS + + if(m_cv.available) + { + WakeConditionVariable(&m_cv.notempty); + } + else + { + + #endif + + m_ev.notempty.Set(); + + #ifdef _WINDOWS + + } + + #endif + } + + int GetCount() const + { + return m_count; + } + + virtual void Push(const T& item) + { + #ifdef _WINDOWS + + if(m_cv.available) + { + AcquireSRWLockExclusive(&m_cv.lock); + + m_queue.push(item); + + ReleaseSRWLockExclusive(&m_cv.lock); + + WakeConditionVariable(&m_cv.notempty); + } + else + { + + #endif + + GSAutoLock l(&m_ev.lock); + + m_queue.push(item); + + m_ev.notempty.Set(); + + #ifdef _WINDOWS + + } + + #endif + + m_count++; + } + + virtual void Wait() + { + #ifdef _WINDOWS + + if(m_cv.available) + { + AcquireSRWLockExclusive(&m_cv.lock); + + while(!m_queue.empty()) + { + SleepConditionVariableSRW(&m_cv.empty, &m_cv.lock, INFINITE, 0); + } + + ReleaseSRWLockExclusive(&m_cv.lock); + } + else + { + + #endif + + while(!m_queue.empty()) _mm_pause(); + + #ifdef _WINDOWS + + } + + #endif + + m_count++; + } + + virtual void Process(T& item) = 0; +};