gsdx-queue:add a new job dispatcher queue based on boost and C++11

It is faster on linux, it requires less code, and it is "portable"

It requires boost (only hpp files) + MSVC 2013 (for atomic) (seem doable by 2012 too)

Actually there are several queues that either use spinlock or full sleep
This commit is contained in:
Gregory Hainaut 2015-03-03 10:01:22 +01:00
parent a75d78bd7e
commit 9682061472
3 changed files with 350 additions and 1 deletions

View File

@ -24,9 +24,13 @@
#include "GS.h" #include "GS.h"
#include "GSVertexSW.h" #include "GSVertexSW.h"
#include "GSFunctionMap.h" #include "GSFunctionMap.h"
#include "GSThread.h"
#include "GSAlignedClass.h" #include "GSAlignedClass.h"
#include "GSPerfMon.h" #include "GSPerfMon.h"
#ifdef ENABLE_BOOST
#include "GSThread_CXX11.h"
#else
#include "GSThread.h"
#endif
__aligned(class, 32) GSRasterizerData : public GSAlignedClass<32> __aligned(class, 32) GSRasterizerData : public GSAlignedClass<32>
{ {

View File

@ -20,10 +20,15 @@
*/ */
#include "stdafx.h" #include "stdafx.h"
#ifdef ENABLE_BOOST
#include "GSThread_CXX11.h"
#else
#include "GSThread.h" #include "GSThread.h"
#endif
#ifdef _WINDOWS #ifdef _WINDOWS
#ifndef ENABLE_BOOST
InitializeConditionVariablePtr pInitializeConditionVariable; InitializeConditionVariablePtr pInitializeConditionVariable;
WakeConditionVariablePtr pWakeConditionVariable; WakeConditionVariablePtr pWakeConditionVariable;
WakeAllConditionVariablePtr pWakeAllConditionVariable; WakeAllConditionVariablePtr pWakeAllConditionVariable;
@ -65,6 +70,7 @@ public:
}; };
static InitCondVar s_icv; static InitCondVar s_icv;
#endif
#endif #endif

View File

@ -0,0 +1,339 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSdx.h"
#include <boost/lockfree/spsc_queue.hpp>
class IGSThread
{
protected:
virtual void ThreadProc() = 0;
};
// let us use std::thread for now, comment out the definition to go back to pthread
// There are currently some bugs/limitations to std::thread (see various comment)
// For the moment let's keep pthread but uses new std object (mutex, cond_var)
//#define _STD_THREAD_
#ifdef _WINDOWS
class GSThread : public IGSThread
{
DWORD m_ThreadId;
HANDLE m_hThread;
static DWORD WINAPI StaticThreadProc(void* lpParam);
protected:
void CreateThread();
void CloseThread();
public:
GSThread();
virtual ~GSThread();
};
#else
#ifdef _STD_THREAD_
#include <thread>
#else
#include <pthread.h>
#endif
class GSThread : public IGSThread
{
#ifdef _STD_THREAD_
std::thread *t;
#else
pthread_attr_t m_thread_attr;
pthread_t m_thread;
#endif
static void* StaticThreadProc(void* param);
protected:
void CreateThread();
void CloseThread();
public:
GSThread();
virtual ~GSThread();
};
#endif
// Activate only a single define (From the lowest latency to better CPU usage)
// This queue locks RENDERING threads + GS threads onto dedicated CPU
// pros: best fps by thread
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 8 cores CPU.
//#define NO_WAIT_BUT_CPU_INTENSIVE
// This queue locks 'only' RENDERING threads mostly the same performance as above it the CPU is fast enough
// pros: nearly best fps by thread
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 6/8 cores CPU.
//#define WAIT_ON_GS_STILL_CPU_INTENSIVE
// This queue doesn't lock any thread. It would be nicer for 2c/4c CPU.
// pros: no hard limit on thread numbers
// cons: less performance by thread
#define FULL_WAIT_LESS_CPU_INTENSIVE
#if defined(FULL_WAIT_LESS_CPU_INTENSIVE)
template<class T> class GSJobQueue : private GSThread
{
protected:
std::atomic<int16_t> m_count;
std::atomic<bool> m_exit;
boost::lockfree::spsc_queue<T, boost::lockfree::capacity<256> > m_queue;
std::mutex m_lock;
std::condition_variable m_empty;
std::condition_variable m_notempty;
void ThreadProc() {
std::unique_lock<std::mutex> l(m_lock);
while (true) {
while (m_count == 0) {
if (m_exit.load(memory_order_acquire)) return;
m_notempty.wait(l);
}
l.unlock();
int16_t consumed = 0;
for (int16_t nb = m_count; nb >= 0; nb--) {
if (m_queue.consume_one(*this))
consumed++;
}
l.lock();
m_count -= consumed;
if (m_count <= 0)
m_empty.notify_one();
}
}
public:
GSJobQueue() :
m_count(0),
m_exit(false)
{
CreateThread();
};
virtual ~GSJobQueue() {
m_exit = true;
m_notempty.notify_one();
CloseThread();
}
bool IsEmpty() const {
ASSERT(m_count >= 0);
return m_count == 0;
}
void Push(const T& item) {
while(!m_queue.push(item))
std::this_thread::yield();
std::unique_lock<std::mutex> l(m_lock);
m_count++;
l.unlock();
m_notempty.notify_one();
}
void Wait() {
if (m_count > 0) {
std::unique_lock<std::mutex> l(m_lock);
while (m_count > 0) {
m_empty.wait(l);
}
}
ASSERT(m_count == 0);
}
virtual void Process(T& item) = 0;
void operator()(T& item) {
Process(item);
}
};
#elif defined(WAIT_ON_GS_STILL_CPU_INTENSIVE)
template<class T> class GSJobQueue : private GSThread
{
protected:
std::atomic<int16_t> m_count;
std::atomic<bool> m_exit;
boost::lockfree::spsc_queue<T, boost::lockfree::capacity<256> > m_queue;
std::mutex m_lock;
std::condition_variable m_empty;
void ThreadProc() {
std::unique_lock<std::mutex> l(m_lock, defer_lock);
while (true) {
while (m_count == 0) {
if (m_exit.load(memory_order_acquire)) return;
std::this_thread::yield();
}
int16_t consumed = 0;
for (int16_t nb = m_count; nb >= 0; nb--) {
if (m_queue.consume_one(*this))
consumed++;
}
l.lock();
m_count -= consumed;
l.unlock();
if (m_count <= 0)
m_empty.notify_one();
}
}
public:
GSJobQueue() :
m_count(0),
m_exit(false)
{
CreateThread();
};
virtual ~GSJobQueue() {
m_exit = true;
CloseThread();
}
bool IsEmpty() const {
ASSERT(m_count >= 0);
return m_count == 0;
}
void Push(const T& item) {
while(!m_queue.push(item))
std::this_thread::yield();
m_count++;
}
void Wait() {
if (m_count > 0) {
std::unique_lock<std::mutex> l(m_lock);
while (m_count > 0) {
m_empty.wait(l);
}
}
ASSERT(m_count == 0);
}
virtual void Process(T& item) = 0;
void operator()(T& item) {
Process(item);
}
};
#elif defined(NO_WAIT_BUT_CPU_INTENSIVE)
template<class T> class GSJobQueue : private GSThread
{
protected:
std::atomic<int16_t> m_count;
std::atomic<bool> m_exit;
boost::lockfree::spsc_queue<T, boost::lockfree::capacity<256> > m_queue;
void ThreadProc() {
while (true) {
while (m_count == 0) {
if (m_exit.load(memory_order_acquire)) return;
std::this_thread::yield();
}
m_count -= m_queue.consume_all(*this);
}
}
public:
GSJobQueue() :
m_count(0),
m_exit(false)
{
CreateThread();
};
virtual ~GSJobQueue() {
m_exit = true;
CloseThread();
}
bool IsEmpty() const {
ASSERT(m_count >= 0);
return m_count == 0;
}
void Push(const T& item) {
m_count++;
while(!m_queue.push(item))
std::this_thread::yield();
}
void Wait() {
while (m_count > 0)
std::this_thread::yield();
ASSERT(m_count == 0);
}
virtual void Process(T& item) = 0;
void operator()(T& item) {
Process(item);
}
};
#else
#very bad
#endif