GSdx: Moved filling up rendering threads on a new thread, to not block the main, it looks like now I can replace one of the spin loops with an event. Using events results in about -5% fps, but still pretty fast.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5007 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-12-22 14:36:54 +00:00
parent 2421c68bee
commit d5dbe7e7e9
6 changed files with 317 additions and 397 deletions

View File

@ -90,7 +90,7 @@ void GSPerfMon::Stop(int timer)
int GSPerfMon::CPU(int timer, bool reset)
{
int percent = m_total[timer] / 1000; // (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer]));
int percent = (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer]));
if(reset)
{

View File

@ -766,183 +766,15 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
//
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
: GSRasterizer(ds, id, threads, perfmon)
, m_exit(false)
, m_break(true)
{
CreateThread();
}
GSRasterizerMT::~GSRasterizerMT()
{
m_break = true;
m_exit = true;
m_draw.Set();
CloseThread();
}
void GSRasterizerMT::Queue(shared_ptr<GSRasterizerData> data)
{
GSAutoLock l(&m_lock);
m_queue.push(data);
if(m_break)
{
m_break = false;
m_draw.Set();
}
}
void GSRasterizerMT::Sync()
{
while(!m_queue.empty()) _mm_pause();
m_break = true;
}
void GSRasterizerMT::ThreadProc()
{
while(m_draw.Wait() && !m_exit)
{
// once we are running it is better to spin, jobs can be smaller than the cost of waking up every time
while(!m_break)
{
if(!m_queue.empty())
{
while(!m_queue.empty())
{
shared_ptr<GSRasterizerData> data;
{
GSAutoLock l(&m_lock);
data = m_queue.front();
}
Draw(data);
{
GSAutoLock l(&m_lock);
m_queue.pop();
}
}
}
else
{
_mm_pause();
}
}
}
}
#ifdef _WINDOWS
GSRasterizerMT2::GSRasterizerMT2(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
: GSRasterizer(ds, id, threads, perfmon)
{
InitializeSRWLock(&m_lock);
InitializeConditionVariable(&m_notempty);
InitializeConditionVariable(&m_empty);
CreateThread();
}
GSRasterizerMT2::~GSRasterizerMT2()
{
m_queue.push(shared_ptr<GSRasterizerData>());
WakeConditionVariable(&m_notempty);
CloseThread();
}
void GSRasterizerMT2::Queue(shared_ptr<GSRasterizerData> data)
{
AcquireSRWLockExclusive(&m_lock);
m_queue.push(data);
ReleaseSRWLockExclusive(&m_lock);
WakeConditionVariable(&m_notempty);
}
void GSRasterizerMT2::Sync()
{
AcquireSRWLockExclusive(&m_lock);
while(!m_queue.empty())
{
// TODO: instead of just waiting for the workers, help finishing their queues!
// TODO: to do that, queues needs to be merged and id'ed, and threads must switch m_myscanline on the fly
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerSync0 + m_id);
SleepConditionVariableSRW(&m_empty, &m_lock, INFINITE, 0);
}
ReleaseSRWLockExclusive(&m_lock);
}
void GSRasterizerMT2::ThreadProc()
{
AcquireSRWLockExclusive(&m_lock);
while(true)
{
while(m_queue.empty())
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerSleep0 + m_id);
SleepConditionVariableSRW(&m_notempty, &m_lock, INFINITE, 0);
}
shared_ptr<GSRasterizerData> data;
data = m_queue.front();
ReleaseSRWLockExclusive(&m_lock);
if(data == NULL)
{
break;
}
Draw(data);
AcquireSRWLockExclusive(&m_lock);
m_queue.pop();
if(m_queue.empty())
{
WakeConditionVariable(&m_empty);
}
}
}
#endif
//
GSRasterizerList::GSRasterizerList()
: m_sync_count(0)
, m_count(0)
, m_dispatched(0)
: GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_sync_count(0)
{
}
GSRasterizerList::~GSRasterizerList()
{
for(vector<GSRasterizer*>::iterator i = begin(); i != end(); i++)
for(vector<GSWorker*>::iterator i = m_workers.begin(); i != m_workers.end(); i++)
{
delete *i;
}
@ -950,46 +782,77 @@ GSRasterizerList::~GSRasterizerList()
void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data)
{
if(size() > 1 && data->solidrect) // TODO: clip to thread area and dispatch?
{
Sync(); // complete previous drawings
front()->Draw(data);
return;
}
GSVector4i bbox = data->bbox.rintersect(data->scissor);
for(int i = 0; i < size(); i++)
{
GSRasterizer* r = (*this)[i];
if(r->IsOneOfMyScanlines(bbox.top, bbox.bottom))
{
r->Queue(data);
m_dispatched++;
}
}
m_count++;
Push(data);
}
void GSRasterizerList::Sync()
{
if(m_count > 0)
if(GetCount() == 0) return;
Wait(); // first dispatch all items to workers
for(size_t i = 0; i < m_workers.size(); i++)
{
for(int i = 0; i < size(); i++)
m_workers[i]->Wait(); // then wait all workers to finish their jobs
}
m_sync_count++;
}
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
{
if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch?
{
for(size_t i = 0; i < m_workers.size(); i++)
{
(*this)[i]->Sync();
m_workers[i]->Wait();
}
m_sync_count++;
m_workers.front()->Process(item);
//printf("%d %d%%\n", m_count, 100 * m_dispatched / (m_count * size()));
return;
}
m_count = 0;
m_dispatched = 0;
if(item->syncpoint)
{
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
}
}
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Push(item);
}
}
// GSRasterizerList::GSWorker
GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r)
: GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_r(r)
{
}
GSRasterizerList::GSWorker::~GSWorker()
{
Wait();
delete m_r;
}
void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
{
GSVector4i r = item->bbox.rintersect(item->scissor);
if(m_r->IsOneOfMyScanlines(r.top, r.bottom))
{
GSJobQueue<shared_ptr<GSRasterizerData> >::Push(item);
}
}
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item);
}

View File

@ -26,6 +26,7 @@
#include "GSFunctionMap.h"
#include "GSThread.h"
#include "GSAlignedClass.h"
#include "GSPerfMon.h"
__aligned(class, 32) GSRasterizerData : public GSAlignedClass<32>
{
@ -36,6 +37,7 @@ public:
GSVertexSW* vertices;
int count;
bool solidrect;
bool syncpoint;
uint64 frame;
void* param;
@ -43,6 +45,7 @@ public:
: vertices(NULL)
, count(0)
, solidrect(false)
, syncpoint(false)
, param(NULL)
{
}
@ -52,7 +55,7 @@ public:
if(vertices != NULL) _aligned_free(vertices);
// derived class should free param and its members
}
}
};
class IDrawScanline : public GSAlignedClass<32>
@ -103,8 +106,6 @@ public:
virtual void Sync() = 0;
};
#include "GSPerfMon.h"
__aligned(class, 32) GSRasterizer : public IRasterizer
{
protected:
@ -148,59 +149,33 @@ public:
void Sync() {}
};
class GSRasterizerMT : public GSRasterizer, private GSThread
class GSRasterizerList
: public IRasterizer
, private GSJobQueue<shared_ptr<GSRasterizerData> >
{
protected:
volatile bool m_exit;
volatile bool m_break;
GSCritSec m_lock;
GSEvent m_draw;
queue<shared_ptr<GSRasterizerData> > m_queue;
class GSWorker : public GSJobQueue<shared_ptr<GSRasterizerData> >
{
GSRasterizer* m_r;
void ThreadProc();
public:
GSWorker(GSRasterizer* r);
virtual ~GSWorker();
public:
GSRasterizerMT(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
virtual ~GSRasterizerMT();
// GSJobQueue
// IRasterizer
void Push(const shared_ptr<GSRasterizerData>& item);
void Process(shared_ptr<GSRasterizerData>& item);
};
void Queue(shared_ptr<GSRasterizerData> data);
void Sync();
};
#ifdef _WINDOWS
class GSRasterizerMT2 : public GSRasterizer, private GSThread
{
protected:
SRWLOCK m_lock;
CONDITION_VARIABLE m_notempty;
CONDITION_VARIABLE m_empty;
queue<shared_ptr<GSRasterizerData> > m_queue;
void ThreadProc();
public:
GSRasterizerMT2(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
virtual ~GSRasterizerMT2();
// IRasterizer
void Queue(shared_ptr<GSRasterizerData> data);
void Sync();
};
#endif
class GSRasterizerList : public IRasterizer, protected vector<GSRasterizer*>
{
protected:
int m_count;
int m_dispatched;
vector<GSWorker*> m_workers;
GSRasterizerList();
// GSJobQueue
void Process(shared_ptr<GSRasterizerData>& item);
public:
virtual ~GSRasterizerList();
@ -216,38 +191,19 @@ public:
{
GSRasterizerList* rl = new GSRasterizerList();
#ifdef _WINDOWS
OSVERSIONINFOEX version;
memset(&version, 0, sizeof(version));
version.dwOSVersionInfoSize = sizeof(version);
GetVersionEx((OSVERSIONINFO*)&version);
if(version.dwMajorVersion >= 6)
{
for(int i = 0; i < threads; i++)
{
rl->push_back(new GSRasterizerMT2(new DS(), i, threads, perfmon));
}
return rl;
}
#endif
for(int i = 0; i < threads; i++)
{
rl->push_back(new GSRasterizerMT(new DS(), i, threads, perfmon));
rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
}
return rl;
}
}
int m_sync_count;
// IRasterizer
void Queue(shared_ptr<GSRasterizerData> data);
void Sync();
int m_sync_count;
};

View File

@ -67,7 +67,7 @@ void GSRendererSW::Reset()
void GSRendererSW::VSync(int field)
{
Sync(); // IncAge might delete a cached texture in use
Sync(0); // IncAge might delete a cached texture in use
/*
printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n",
m_perfmon.CPU(GSPerfMon::Main),
@ -127,7 +127,7 @@ void GSRendererSW::ResetDevice()
GSTexture* GSRendererSW::GetOutput(int i)
{
Sync();
Sync(1);
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
@ -166,17 +166,6 @@ void GSRendererSW::Draw()
{
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
if(m_fzb != m_context->offset.fzb)
{
// rasterizers must write the same outputs at the same time, this makes sure each thread has its own private surface area
// TODO: detect if frame/zbuf overlap eachother (?)
m_fzb = m_context->offset.fzb;
Sync();
}
shared_ptr<GSRasterizerData> data(new GSRasterizerData2());
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
@ -196,6 +185,13 @@ void GSRendererSW::Draw()
data->solidrect = gd->sel.IsSolidRect();
data->frame = m_perfmon.GetFrame();
if(m_fzb != m_context->offset.fzb)
{
m_fzb = m_context->offset.fzb;
data->syncpoint = true;
}
GSVector4i r = data->bbox.rintersect(data->scissor);
if(gd->sel.fwrite)
@ -210,7 +206,7 @@ void GSRendererSW::Draw()
if(s_dump)
{
Sync();
Sync(3);
uint64 frame = m_perfmon.GetFrame();
@ -243,7 +239,7 @@ void GSRendererSW::Draw()
m_rl->Queue(data);
Sync();
Sync(4);
if(s_save && s_n >= s_saven)
{
@ -291,9 +287,9 @@ void GSRendererSW::Draw()
*/
}
void GSRendererSW::Sync()
void GSRendererSW::Sync(int reason)
{
//printf("sync\n");
//printf("sync %d\n", reason);
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
@ -313,7 +309,7 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
if(CheckPages(o, r)) // check if the changing pages either used as a texture or a target
{
Sync();
Sync(5);
}
}
@ -325,7 +321,7 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
if(CheckPages(o, r)) // TODO: only checking m_fzb_pages would be enough (read-backs are rare anyway)
{
Sync();
Sync(6);
}
}
@ -337,8 +333,10 @@ void GSRendererSW::InvalidatePages(const GSTextureCacheSW::Texture* t)
{
if(m_fzb_pages[i] & t->m_pages[i]) // currently being drawn to? => sync
{
Sync();
//
Sync(7);
//
return;
}

View File

@ -67,7 +67,7 @@ protected:
GSTexture* GetOutput(int i);
void Draw();
void Sync();
void Sync(int reason);
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);

View File

@ -68,110 +68,6 @@ public:
bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;}
};
// TODO: pthreads version (needs manual-reset event)
template<
class T,
class ENQUEUE_EVENT = GSEvent,
class DEQUEUE_EVENT = GSEvent>
class GSQueue : public GSCritSec
{
std::list<T> m_queue;
HANDLE m_put;
HANDLE m_get;
ENQUEUE_EVENT m_enqueue;
DEQUEUE_EVENT m_dequeue;
long m_count;
public:
GSQueue(long count)
: m_enqueue(true)
, m_dequeue(true)
, m_count(count)
{
m_put = CreateSemaphore(NULL, count, count, NULL);
m_get = CreateSemaphore(NULL, 0, count, NULL);
m_dequeue.Set();
}
virtual ~GSQueue()
{
CloseHandle(m_put);
CloseHandle(m_get);
}
size_t GetCount() const
{
// GSAutoLock cAutoLock(this);
return m_queue.size();
}
size_t GetMaxCount() const
{
// GSAutoLock cAutoLock(this);
return (size_t)m_count;
}
ENQUEUE_EVENT& GetEnqueueEvent()
{
return m_enqueue;
}
DEQUEUE_EVENT& GetDequeueEvent()
{
return m_dequeue;
}
void Enqueue(T item)
{
WaitForSingleObject(m_put, INFINITE);
{
GSAutoLock cAutoLock(this);
m_queue.push_back(item);
m_enqueue.Set();
m_dequeue.Reset();
}
ReleaseSemaphore(m_get, 1, NULL);
}
T Dequeue()
{
T item;
WaitForSingleObject(m_get, INFINITE);
{
GSAutoLock cAutoLock(this);
item = m_queue.front();
m_queue.pop_front();
if(m_queue.empty())
{
m_enqueue.Reset();
m_dequeue.Set();
}
}
ReleaseSemaphore(m_put, 1, NULL);
return item;
}
T Peek() // lock on "this"
{
return m_queue.front();
}
};
#else
#include <pthread.h>
@ -263,3 +159,210 @@ public:
return true;
}
};
template<class T> class GSJobQueue : private GSThread
{
protected:
int m_count;
queue<T> m_queue;
volatile bool m_exit;
struct {GSCritSec lock; GSEvent notempty, empty;} m_ev;
#ifdef _WINDOWS
struct {SRWLOCK lock; CONDITION_VARIABLE notempty, empty; bool available;} m_cv;
#endif
void ThreadProc()
{
#ifdef _WINDOWS
if(m_cv.available)
{
AcquireSRWLockExclusive(&m_cv.lock);
while(true)
{
while(m_queue.empty())
{
SleepConditionVariableSRW(&m_cv.notempty, &m_cv.lock, INFINITE, 0);
if(m_exit) {ReleaseSRWLockExclusive(&m_cv.lock); return;}
}
{
T item = m_queue.front();
ReleaseSRWLockExclusive(&m_cv.lock);
Process(item);
AcquireSRWLockExclusive(&m_cv.lock);
}
m_queue.pop();
if(m_queue.empty())
{
WakeConditionVariable(&m_cv.empty);
}
}
}
else
{
#endif
while(m_ev.notempty.Wait())
{
if(m_exit) break;
while(!m_queue.empty())
{
T item;
{
GSAutoLock l(&m_ev.lock);
item = m_queue.front();
}
Process(item);
{
GSAutoLock l(&m_ev.lock);
m_queue.pop();
}
}
}
#ifdef _WINDOWS
}
#endif
}
public:
GSJobQueue()
: m_count(0)
, m_exit(false)
{
m_cv.available = false;
#ifdef _WINDOWS
OSVERSIONINFOEX version;
memset(&version, 0, sizeof(version));
version.dwOSVersionInfoSize = sizeof(version);
GetVersionEx((OSVERSIONINFO*)&version);
if(version.dwMajorVersion >= 6)
{
InitializeSRWLock(&m_cv.lock);
InitializeConditionVariable(&m_cv.notempty);
InitializeConditionVariable(&m_cv.empty);
m_cv.available = true;
}
#endif
CreateThread();
}
virtual ~GSJobQueue()
{
m_exit = true;
#ifdef _WINDOWS
if(m_cv.available)
{
WakeConditionVariable(&m_cv.notempty);
}
else
{
#endif
m_ev.notempty.Set();
#ifdef _WINDOWS
}
#endif
}
int GetCount() const
{
return m_count;
}
virtual void Push(const T& item)
{
#ifdef _WINDOWS
if(m_cv.available)
{
AcquireSRWLockExclusive(&m_cv.lock);
m_queue.push(item);
ReleaseSRWLockExclusive(&m_cv.lock);
WakeConditionVariable(&m_cv.notempty);
}
else
{
#endif
GSAutoLock l(&m_ev.lock);
m_queue.push(item);
m_ev.notempty.Set();
#ifdef _WINDOWS
}
#endif
m_count++;
}
virtual void Wait()
{
#ifdef _WINDOWS
if(m_cv.available)
{
AcquireSRWLockExclusive(&m_cv.lock);
while(!m_queue.empty())
{
SleepConditionVariableSRW(&m_cv.empty, &m_cv.lock, INFINITE, 0);
}
ReleaseSRWLockExclusive(&m_cv.lock);
}
else
{
#endif
while(!m_queue.empty()) _mm_pause();
#ifdef _WINDOWS
}
#endif
m_count++;
}
virtual void Process(T& item) = 0;
};