GSdx: Moved filling up rendering threads on a new thread, to not block the main, it looks like now I can replace one of the spin loops with an event. Using events results in about -5% fps, but still pretty fast.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5007 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-12-22 14:36:54 +00:00
parent 2421c68bee
commit d5dbe7e7e9
6 changed files with 317 additions and 397 deletions

View File

@ -90,7 +90,7 @@ void GSPerfMon::Stop(int timer)
int GSPerfMon::CPU(int timer, bool reset) int GSPerfMon::CPU(int timer, bool reset)
{ {
int percent = m_total[timer] / 1000; // (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer])); int percent = (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer]));
if(reset) if(reset)
{ {

View File

@ -766,183 +766,15 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
// //
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
: GSRasterizer(ds, id, threads, perfmon)
, m_exit(false)
, m_break(true)
{
CreateThread();
}
GSRasterizerMT::~GSRasterizerMT()
{
m_break = true;
m_exit = true;
m_draw.Set();
CloseThread();
}
void GSRasterizerMT::Queue(shared_ptr<GSRasterizerData> data)
{
GSAutoLock l(&m_lock);
m_queue.push(data);
if(m_break)
{
m_break = false;
m_draw.Set();
}
}
void GSRasterizerMT::Sync()
{
while(!m_queue.empty()) _mm_pause();
m_break = true;
}
void GSRasterizerMT::ThreadProc()
{
while(m_draw.Wait() && !m_exit)
{
// once we are running it is better to spin, jobs can be smaller than the cost of waking up every time
while(!m_break)
{
if(!m_queue.empty())
{
while(!m_queue.empty())
{
shared_ptr<GSRasterizerData> data;
{
GSAutoLock l(&m_lock);
data = m_queue.front();
}
Draw(data);
{
GSAutoLock l(&m_lock);
m_queue.pop();
}
}
}
else
{
_mm_pause();
}
}
}
}
#ifdef _WINDOWS
GSRasterizerMT2::GSRasterizerMT2(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
: GSRasterizer(ds, id, threads, perfmon)
{
InitializeSRWLock(&m_lock);
InitializeConditionVariable(&m_notempty);
InitializeConditionVariable(&m_empty);
CreateThread();
}
GSRasterizerMT2::~GSRasterizerMT2()
{
m_queue.push(shared_ptr<GSRasterizerData>());
WakeConditionVariable(&m_notempty);
CloseThread();
}
void GSRasterizerMT2::Queue(shared_ptr<GSRasterizerData> data)
{
AcquireSRWLockExclusive(&m_lock);
m_queue.push(data);
ReleaseSRWLockExclusive(&m_lock);
WakeConditionVariable(&m_notempty);
}
void GSRasterizerMT2::Sync()
{
AcquireSRWLockExclusive(&m_lock);
while(!m_queue.empty())
{
// TODO: instead of just waiting for the workers, help finishing their queues!
// TODO: to do that, queues needs to be merged and id'ed, and threads must switch m_myscanline on the fly
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerSync0 + m_id);
SleepConditionVariableSRW(&m_empty, &m_lock, INFINITE, 0);
}
ReleaseSRWLockExclusive(&m_lock);
}
void GSRasterizerMT2::ThreadProc()
{
AcquireSRWLockExclusive(&m_lock);
while(true)
{
while(m_queue.empty())
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerSleep0 + m_id);
SleepConditionVariableSRW(&m_notempty, &m_lock, INFINITE, 0);
}
shared_ptr<GSRasterizerData> data;
data = m_queue.front();
ReleaseSRWLockExclusive(&m_lock);
if(data == NULL)
{
break;
}
Draw(data);
AcquireSRWLockExclusive(&m_lock);
m_queue.pop();
if(m_queue.empty())
{
WakeConditionVariable(&m_empty);
}
}
}
#endif
//
GSRasterizerList::GSRasterizerList() GSRasterizerList::GSRasterizerList()
: m_sync_count(0) : GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_count(0) , m_sync_count(0)
, m_dispatched(0)
{ {
} }
GSRasterizerList::~GSRasterizerList() GSRasterizerList::~GSRasterizerList()
{ {
for(vector<GSRasterizer*>::iterator i = begin(); i != end(); i++) for(vector<GSWorker*>::iterator i = m_workers.begin(); i != m_workers.end(); i++)
{ {
delete *i; delete *i;
} }
@ -950,46 +782,77 @@ GSRasterizerList::~GSRasterizerList()
void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data) void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data)
{ {
if(size() > 1 && data->solidrect) // TODO: clip to thread area and dispatch? Push(data);
{
Sync(); // complete previous drawings
front()->Draw(data);
return;
}
GSVector4i bbox = data->bbox.rintersect(data->scissor);
for(int i = 0; i < size(); i++)
{
GSRasterizer* r = (*this)[i];
if(r->IsOneOfMyScanlines(bbox.top, bbox.bottom))
{
r->Queue(data);
m_dispatched++;
}
}
m_count++;
} }
void GSRasterizerList::Sync() void GSRasterizerList::Sync()
{ {
if(m_count > 0) if(GetCount() == 0) return;
Wait(); // first dispatch all items to workers
for(size_t i = 0; i < m_workers.size(); i++)
{ {
for(int i = 0; i < size(); i++) m_workers[i]->Wait(); // then wait all workers to finish their jobs
{
(*this)[i]->Sync();
} }
m_sync_count++; m_sync_count++;
}
//printf("%d %d%%\n", m_count, 100 * m_dispatched / (m_count * size())); void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
{
if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch?
{
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
}
m_count = 0; m_workers.front()->Process(item);
m_dispatched = 0;
return;
}
if(item->syncpoint)
{
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
} }
} }
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Push(item);
}
}
// GSRasterizerList::GSWorker
GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r)
: GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_r(r)
{
}
GSRasterizerList::GSWorker::~GSWorker()
{
Wait();
delete m_r;
}
void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
{
GSVector4i r = item->bbox.rintersect(item->scissor);
if(m_r->IsOneOfMyScanlines(r.top, r.bottom))
{
GSJobQueue<shared_ptr<GSRasterizerData> >::Push(item);
}
}
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item);
}

View File

@ -26,6 +26,7 @@
#include "GSFunctionMap.h" #include "GSFunctionMap.h"
#include "GSThread.h" #include "GSThread.h"
#include "GSAlignedClass.h" #include "GSAlignedClass.h"
#include "GSPerfMon.h"
__aligned(class, 32) GSRasterizerData : public GSAlignedClass<32> __aligned(class, 32) GSRasterizerData : public GSAlignedClass<32>
{ {
@ -36,6 +37,7 @@ public:
GSVertexSW* vertices; GSVertexSW* vertices;
int count; int count;
bool solidrect; bool solidrect;
bool syncpoint;
uint64 frame; uint64 frame;
void* param; void* param;
@ -43,6 +45,7 @@ public:
: vertices(NULL) : vertices(NULL)
, count(0) , count(0)
, solidrect(false) , solidrect(false)
, syncpoint(false)
, param(NULL) , param(NULL)
{ {
} }
@ -103,8 +106,6 @@ public:
virtual void Sync() = 0; virtual void Sync() = 0;
}; };
#include "GSPerfMon.h"
__aligned(class, 32) GSRasterizer : public IRasterizer __aligned(class, 32) GSRasterizer : public IRasterizer
{ {
protected: protected:
@ -148,59 +149,33 @@ public:
void Sync() {} void Sync() {}
}; };
class GSRasterizerMT : public GSRasterizer, private GSThread class GSRasterizerList
: public IRasterizer
, private GSJobQueue<shared_ptr<GSRasterizerData> >
{ {
protected: protected:
volatile bool m_exit; class GSWorker : public GSJobQueue<shared_ptr<GSRasterizerData> >
volatile bool m_break; {
GSCritSec m_lock; GSRasterizer* m_r;
GSEvent m_draw;
queue<shared_ptr<GSRasterizerData> > m_queue;
void ThreadProc();
public: public:
GSRasterizerMT(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon); GSWorker(GSRasterizer* r);
virtual ~GSRasterizerMT(); virtual ~GSWorker();
// IRasterizer // GSJobQueue
void Queue(shared_ptr<GSRasterizerData> data); void Push(const shared_ptr<GSRasterizerData>& item);
void Sync(); void Process(shared_ptr<GSRasterizerData>& item);
}; };
#ifdef _WINDOWS vector<GSWorker*> m_workers;
class GSRasterizerMT2 : public GSRasterizer, private GSThread
{
protected:
SRWLOCK m_lock;
CONDITION_VARIABLE m_notempty;
CONDITION_VARIABLE m_empty;
queue<shared_ptr<GSRasterizerData> > m_queue;
void ThreadProc();
public:
GSRasterizerMT2(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
virtual ~GSRasterizerMT2();
// IRasterizer
void Queue(shared_ptr<GSRasterizerData> data);
void Sync();
};
#endif
class GSRasterizerList : public IRasterizer, protected vector<GSRasterizer*>
{
protected:
int m_count;
int m_dispatched;
GSRasterizerList(); GSRasterizerList();
// GSJobQueue
void Process(shared_ptr<GSRasterizerData>& item);
public: public:
virtual ~GSRasterizerList(); virtual ~GSRasterizerList();
@ -216,38 +191,19 @@ public:
{ {
GSRasterizerList* rl = new GSRasterizerList(); GSRasterizerList* rl = new GSRasterizerList();
#ifdef _WINDOWS
OSVERSIONINFOEX version;
memset(&version, 0, sizeof(version));
version.dwOSVersionInfoSize = sizeof(version);
GetVersionEx((OSVERSIONINFO*)&version);
if(version.dwMajorVersion >= 6)
{
for(int i = 0; i < threads; i++) for(int i = 0; i < threads; i++)
{ {
rl->push_back(new GSRasterizerMT2(new DS(), i, threads, perfmon)); rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
}
return rl;
}
#endif
for(int i = 0; i < threads; i++)
{
rl->push_back(new GSRasterizerMT(new DS(), i, threads, perfmon));
} }
return rl; return rl;
} }
} }
int m_sync_count;
// IRasterizer // IRasterizer
void Queue(shared_ptr<GSRasterizerData> data); void Queue(shared_ptr<GSRasterizerData> data);
void Sync(); void Sync();
int m_sync_count;
}; };

View File

@ -67,7 +67,7 @@ void GSRendererSW::Reset()
void GSRendererSW::VSync(int field) void GSRendererSW::VSync(int field)
{ {
Sync(); // IncAge might delete a cached texture in use Sync(0); // IncAge might delete a cached texture in use
/* /*
printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n", printf("CPU %d Sync %d W %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d | %d %d %d\n",
m_perfmon.CPU(GSPerfMon::Main), m_perfmon.CPU(GSPerfMon::Main),
@ -127,7 +127,7 @@ void GSRendererSW::ResetDevice()
GSTexture* GSRendererSW::GetOutput(int i) GSTexture* GSRendererSW::GetOutput(int i)
{ {
Sync(); Sync(1);
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB; const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
@ -166,17 +166,6 @@ void GSRendererSW::Draw()
{ {
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass); if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
if(m_fzb != m_context->offset.fzb)
{
// rasterizers must write the same outputs at the same time, this makes sure each thread has its own private surface area
// TODO: detect if frame/zbuf overlap eachother (?)
m_fzb = m_context->offset.fzb;
Sync();
}
shared_ptr<GSRasterizerData> data(new GSRasterizerData2()); shared_ptr<GSRasterizerData> data(new GSRasterizerData2());
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param; GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
@ -196,6 +185,13 @@ void GSRendererSW::Draw()
data->solidrect = gd->sel.IsSolidRect(); data->solidrect = gd->sel.IsSolidRect();
data->frame = m_perfmon.GetFrame(); data->frame = m_perfmon.GetFrame();
if(m_fzb != m_context->offset.fzb)
{
m_fzb = m_context->offset.fzb;
data->syncpoint = true;
}
GSVector4i r = data->bbox.rintersect(data->scissor); GSVector4i r = data->bbox.rintersect(data->scissor);
if(gd->sel.fwrite) if(gd->sel.fwrite)
@ -210,7 +206,7 @@ void GSRendererSW::Draw()
if(s_dump) if(s_dump)
{ {
Sync(); Sync(3);
uint64 frame = m_perfmon.GetFrame(); uint64 frame = m_perfmon.GetFrame();
@ -243,7 +239,7 @@ void GSRendererSW::Draw()
m_rl->Queue(data); m_rl->Queue(data);
Sync(); Sync(4);
if(s_save && s_n >= s_saven) if(s_save && s_n >= s_saven)
{ {
@ -291,9 +287,9 @@ void GSRendererSW::Draw()
*/ */
} }
void GSRendererSW::Sync() void GSRendererSW::Sync(int reason)
{ {
//printf("sync\n"); //printf("sync %d\n", reason);
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync); GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
@ -313,7 +309,7 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
if(CheckPages(o, r)) // check if the changing pages either used as a texture or a target if(CheckPages(o, r)) // check if the changing pages either used as a texture or a target
{ {
Sync(); Sync(5);
} }
} }
@ -325,7 +321,7 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
if(CheckPages(o, r)) // TODO: only checking m_fzb_pages would be enough (read-backs are rare anyway) if(CheckPages(o, r)) // TODO: only checking m_fzb_pages would be enough (read-backs are rare anyway)
{ {
Sync(); Sync(6);
} }
} }
@ -337,8 +333,10 @@ void GSRendererSW::InvalidatePages(const GSTextureCacheSW::Texture* t)
{ {
if(m_fzb_pages[i] & t->m_pages[i]) // currently being drawn to? => sync if(m_fzb_pages[i] & t->m_pages[i]) // currently being drawn to? => sync
{ {
Sync(); //
Sync(7);
//
return; return;
} }

View File

@ -67,7 +67,7 @@ protected:
GSTexture* GetOutput(int i); GSTexture* GetOutput(int i);
void Draw(); void Draw();
void Sync(); void Sync(int reason);
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);

View File

@ -68,110 +68,6 @@ public:
bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;} bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;}
}; };
// TODO: pthreads version (needs manual-reset event)
template<
class T,
class ENQUEUE_EVENT = GSEvent,
class DEQUEUE_EVENT = GSEvent>
class GSQueue : public GSCritSec
{
std::list<T> m_queue;
HANDLE m_put;
HANDLE m_get;
ENQUEUE_EVENT m_enqueue;
DEQUEUE_EVENT m_dequeue;
long m_count;
public:
GSQueue(long count)
: m_enqueue(true)
, m_dequeue(true)
, m_count(count)
{
m_put = CreateSemaphore(NULL, count, count, NULL);
m_get = CreateSemaphore(NULL, 0, count, NULL);
m_dequeue.Set();
}
virtual ~GSQueue()
{
CloseHandle(m_put);
CloseHandle(m_get);
}
size_t GetCount() const
{
// GSAutoLock cAutoLock(this);
return m_queue.size();
}
size_t GetMaxCount() const
{
// GSAutoLock cAutoLock(this);
return (size_t)m_count;
}
ENQUEUE_EVENT& GetEnqueueEvent()
{
return m_enqueue;
}
DEQUEUE_EVENT& GetDequeueEvent()
{
return m_dequeue;
}
void Enqueue(T item)
{
WaitForSingleObject(m_put, INFINITE);
{
GSAutoLock cAutoLock(this);
m_queue.push_back(item);
m_enqueue.Set();
m_dequeue.Reset();
}
ReleaseSemaphore(m_get, 1, NULL);
}
T Dequeue()
{
T item;
WaitForSingleObject(m_get, INFINITE);
{
GSAutoLock cAutoLock(this);
item = m_queue.front();
m_queue.pop_front();
if(m_queue.empty())
{
m_enqueue.Reset();
m_dequeue.Set();
}
}
ReleaseSemaphore(m_put, 1, NULL);
return item;
}
T Peek() // lock on "this"
{
return m_queue.front();
}
};
#else #else
#include <pthread.h> #include <pthread.h>
@ -263,3 +159,210 @@ public:
return true; return true;
} }
}; };
template<class T> class GSJobQueue : private GSThread
{
protected:
int m_count;
queue<T> m_queue;
volatile bool m_exit;
struct {GSCritSec lock; GSEvent notempty, empty;} m_ev;
#ifdef _WINDOWS
struct {SRWLOCK lock; CONDITION_VARIABLE notempty, empty; bool available;} m_cv;
#endif
void ThreadProc()
{
#ifdef _WINDOWS
if(m_cv.available)
{
AcquireSRWLockExclusive(&m_cv.lock);
while(true)
{
while(m_queue.empty())
{
SleepConditionVariableSRW(&m_cv.notempty, &m_cv.lock, INFINITE, 0);
if(m_exit) {ReleaseSRWLockExclusive(&m_cv.lock); return;}
}
{
T item = m_queue.front();
ReleaseSRWLockExclusive(&m_cv.lock);
Process(item);
AcquireSRWLockExclusive(&m_cv.lock);
}
m_queue.pop();
if(m_queue.empty())
{
WakeConditionVariable(&m_cv.empty);
}
}
}
else
{
#endif
while(m_ev.notempty.Wait())
{
if(m_exit) break;
while(!m_queue.empty())
{
T item;
{
GSAutoLock l(&m_ev.lock);
item = m_queue.front();
}
Process(item);
{
GSAutoLock l(&m_ev.lock);
m_queue.pop();
}
}
}
#ifdef _WINDOWS
}
#endif
}
public:
GSJobQueue()
: m_count(0)
, m_exit(false)
{
m_cv.available = false;
#ifdef _WINDOWS
OSVERSIONINFOEX version;
memset(&version, 0, sizeof(version));
version.dwOSVersionInfoSize = sizeof(version);
GetVersionEx((OSVERSIONINFO*)&version);
if(version.dwMajorVersion >= 6)
{
InitializeSRWLock(&m_cv.lock);
InitializeConditionVariable(&m_cv.notempty);
InitializeConditionVariable(&m_cv.empty);
m_cv.available = true;
}
#endif
CreateThread();
}
virtual ~GSJobQueue()
{
m_exit = true;
#ifdef _WINDOWS
if(m_cv.available)
{
WakeConditionVariable(&m_cv.notempty);
}
else
{
#endif
m_ev.notempty.Set();
#ifdef _WINDOWS
}
#endif
}
int GetCount() const
{
return m_count;
}
virtual void Push(const T& item)
{
#ifdef _WINDOWS
if(m_cv.available)
{
AcquireSRWLockExclusive(&m_cv.lock);
m_queue.push(item);
ReleaseSRWLockExclusive(&m_cv.lock);
WakeConditionVariable(&m_cv.notempty);
}
else
{
#endif
GSAutoLock l(&m_ev.lock);
m_queue.push(item);
m_ev.notempty.Set();
#ifdef _WINDOWS
}
#endif
m_count++;
}
virtual void Wait()
{
#ifdef _WINDOWS
if(m_cv.available)
{
AcquireSRWLockExclusive(&m_cv.lock);
while(!m_queue.empty())
{
SleepConditionVariableSRW(&m_cv.empty, &m_cv.lock, INFINITE, 0);
}
ReleaseSRWLockExclusive(&m_cv.lock);
}
else
{
#endif
while(!m_queue.empty()) _mm_pause();
#ifdef _WINDOWS
}
#endif
m_count++;
}
virtual void Process(T& item) = 0;
};