gsdx-queue: add a new option "spin_thread" to select the queue behavior at runtime

If someone has a more elegant solution, feel free to share it

spin_thread = 0
spin_thread = 1 // the faster but GS thread will never stop, very bad for laptop
This commit is contained in:
Gregory Hainaut 2015-03-13 19:52:04 +01:00
parent 9682061472
commit 0aac47ca59
4 changed files with 96 additions and 41 deletions

View File

@ -1147,7 +1147,7 @@ GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
GSRasterizerList::~GSRasterizerList()
{
for(vector<GSWorker*>::iterator i = m_workers.begin(); i != m_workers.end(); i++)
for(auto i = m_workers.begin(); i != m_workers.end(); i++)
{
delete *i;
}
@ -1210,13 +1210,13 @@ int GSRasterizerList::GetPixels(bool reset)
// GSRasterizerList::GSWorker
GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r)
GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r)
: GSJobQueue<shared_ptr<GSRasterizerData> >()
, m_r(r)
{
}
GSRasterizerList::GSWorker::~GSWorker()
GSRasterizerList::GSWorker::~GSWorker()
{
Wait();
@ -1228,7 +1228,33 @@ int GSRasterizerList::GSWorker::GetPixels(bool reset)
return m_r->GetPixels(reset);
}
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item.get());
}
// GSRasterizerList::GSWorkerSpin
#ifdef ENABLE_BOOST
GSRasterizerList::GSWorkerSpin::GSWorkerSpin(GSRasterizer* r)
: GSJobQueueSpin<shared_ptr<GSRasterizerData> >()
, m_r(r)
{
}
GSRasterizerList::GSWorkerSpin::~GSWorkerSpin()
{
Wait();
delete m_r;
}
int GSRasterizerList::GSWorkerSpin::GetPixels(bool reset)
{
return m_r->GetPixels(reset);
}
void GSRasterizerList::GSWorkerSpin::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item.get());
}
#endif

View File

@ -199,8 +199,29 @@ protected:
void Process(shared_ptr<GSRasterizerData>& item);
};
#ifdef ENABLE_BOOST
class GSWorkerSpin : public GSJobQueueSpin<shared_ptr<GSRasterizerData> >
{
GSRasterizer* m_r;
public:
GSWorkerSpin(GSRasterizer* r);
virtual ~GSWorkerSpin();
int GetPixels(bool reset);
// GSJobQueue
void Process(shared_ptr<GSRasterizerData>& item);
};
#endif
GSPerfMon* m_perfmon;
#ifdef ENABLE_BOOST
vector<IGSJobQueue<shared_ptr<GSRasterizerData> > *> m_workers;
#else
vector<GSWorker*> m_workers;
#endif
uint8* m_scanline;
GSRasterizerList(int threads, GSPerfMon* perfmon);
@ -208,7 +229,7 @@ protected:
public:
virtual ~GSRasterizerList();
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon)
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon, bool spin_thread = false)
{
threads = std::max<int>(threads, 0);
@ -222,7 +243,14 @@ public:
for(int i = 0; i < threads; i++)
{
#ifdef ENABLE_BOOST
if (spin_thread)
rl->m_workers.push_back(new GSWorkerSpin(new GSRasterizer(new DS(), i, threads, perfmon)));
else
rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
#else
rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
#endif
}
return rl;

View File

@ -41,7 +41,8 @@ GSRendererSW::GSRendererSW(int threads)
memset(m_texture, 0, sizeof(m_texture));
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);
bool spin_thread = !!theApp.GetConfig("spin_thread", 0);
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon, spin_thread);
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);

View File

@ -82,26 +82,25 @@ public:
#endif
// Activate only a single define (From the lowest latency to better CPU usage)
// To allow switching between queue dynamically
template<class T> class IGSJobQueue : public GSThread
{
public:
IGSJobQueue() {}
virtual ~IGSJobQueue() {}
// This queue locks RENDERING threads + GS threads onto dedicated CPU
// pros: best fps by thread
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 8 cores CPU.
//#define NO_WAIT_BUT_CPU_INTENSIVE
virtual bool IsEmpty() const = 0;
virtual void Push(const T& item) = 0;
virtual void Wait() = 0;
// This queue locks 'only' RENDERING threads mostly the same performance as above it the CPU is fast enough
// pros: nearly best fps by thread
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 6/8 cores CPU.
//#define WAIT_ON_GS_STILL_CPU_INTENSIVE
virtual void Process(T& item) = 0;
virtual int GetPixels(bool reset) = 0;
};
// This queue doesn't lock any thread. It would be nicer for 2c/4c CPU.
// pros: no hard limit on thread numbers
// cons: less performance by thread
#define FULL_WAIT_LESS_CPU_INTENSIVE
#if defined(FULL_WAIT_LESS_CPU_INTENSIVE)
template<class T> class GSJobQueue : private GSThread
template<class T> class GSJobQueue : public IGSJobQueue<T>
{
protected:
std::atomic<int16_t> m_count;
@ -145,13 +144,13 @@ public:
m_count(0),
m_exit(false)
{
CreateThread();
};
this->CreateThread();
}
virtual ~GSJobQueue() {
m_exit = true;
m_notempty.notify_one();
CloseThread();
this->CloseThread();
}
bool IsEmpty() const {
@ -184,16 +183,16 @@ public:
ASSERT(m_count == 0);
}
virtual void Process(T& item) = 0;
void operator()(T& item) {
Process(item);
void operator() (T& item) {
this->Process(item);
}
};
#elif defined(WAIT_ON_GS_STILL_CPU_INTENSIVE)
template<class T> class GSJobQueue : private GSThread
// This queue locks 'only' RENDERING threads mostly the same performance as above if the CPU is fast enough
// pros: nearly best fps by thread
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 6/8 cores CPU.
template<class T> class GSJobQueueSpin : public IGSJobQueue<T>
{
protected:
std::atomic<int16_t> m_count;
@ -232,16 +231,16 @@ protected:
}
public:
GSJobQueue() :
GSJobQueueSpin() :
m_count(0),
m_exit(false)
{
CreateThread();
this->CreateThread();
};
virtual ~GSJobQueue() {
virtual ~GSJobQueueSpin() {
m_exit = true;
CloseThread();
this->CloseThread();
}
bool IsEmpty() const {
@ -270,14 +269,17 @@ public:
virtual void Process(T& item) = 0;
void operator()(T& item) {
Process(item);
void operator() (T& item) {
this->Process(item);
}
};
#elif defined(NO_WAIT_BUT_CPU_INTENSIVE)
// This queue locks RENDERING threads + GS threads onto dedicated CPU
// pros: best fps by thread
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 8 cores CPU.
#if 0
template<class T> class GSJobQueue : private GSThread
template<class T> class GSJobQueue : public IGSJobQueue<T>
{
protected:
std::atomic<int16_t> m_count;
@ -329,11 +331,9 @@ public:
virtual void Process(T& item) = 0;
void operator()(T& item) {
Process(item);
void operator() (T& item) {
this->Process(item);
}
};
#else
#very bad
#endif