gsdx-queue: add a new option "spin_thread" to select the queue behavior at runtime

If someone has a more elegant solution, feel free to share it spin_thread = 0 spin_thread = 1 // the faster but GS thread will never stop, very bad for laptop
2015-03-13 19:52:04 +01:00 · 2015-03-13 19:52:04 +01:00 · 0aac47ca59
parent 9682061472
commit 0aac47ca59
4 changed files with 96 additions and 41 deletions
--- a/plugins/GSdx/GSRasterizer.cpp
+++ b/plugins/GSdx/GSRasterizer.cpp
@ -1147,7 +1147,7 @@ GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)

 GSRasterizerList::~GSRasterizerList()
 {
-	for(vector<GSWorker*>::iterator i = m_workers.begin(); i != m_workers.end(); i++)
+	for(auto i = m_workers.begin(); i != m_workers.end(); i++)
 	{
 		delete *i;
 	}
@ -1210,13 +1210,13 @@ int GSRasterizerList::GetPixels(bool reset)

 // GSRasterizerList::GSWorker

-GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r) 
+GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r)
 	: GSJobQueue<shared_ptr<GSRasterizerData> >()
 	, m_r(r)
 {
 }

-GSRasterizerList::GSWorker::~GSWorker() 
+GSRasterizerList::GSWorker::~GSWorker()
 {
 	Wait();

@ -1228,7 +1228,33 @@ int GSRasterizerList::GSWorker::GetPixels(bool reset)
 	return m_r->GetPixels(reset);
 }

-void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item) 
+void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
 {
 	m_r->Draw(item.get());
 }
+
+// GSRasterizerList::GSWorkerSpin
+#ifdef ENABLE_BOOST
+GSRasterizerList::GSWorkerSpin::GSWorkerSpin(GSRasterizer* r)
+	: GSJobQueueSpin<shared_ptr<GSRasterizerData> >()
+	, m_r(r)
+{
+}
+
+GSRasterizerList::GSWorkerSpin::~GSWorkerSpin()
+{
+	Wait();
+
+	delete m_r;
+}
+
+int GSRasterizerList::GSWorkerSpin::GetPixels(bool reset)
+{
+	return m_r->GetPixels(reset);
+}
+
+void GSRasterizerList::GSWorkerSpin::Process(shared_ptr<GSRasterizerData>& item)
+{
+	m_r->Draw(item.get());
+}
+#endif
--- a/plugins/GSdx/GSRasterizer.h
+++ b/plugins/GSdx/GSRasterizer.h
@ -199,8 +199,29 @@ protected:
 		void Process(shared_ptr<GSRasterizerData>& item);
 	};

+#ifdef ENABLE_BOOST
+	class GSWorkerSpin : public GSJobQueueSpin<shared_ptr<GSRasterizerData> >
+	{
+		GSRasterizer* m_r;
+
+	public:
+		GSWorkerSpin(GSRasterizer* r);
+		virtual ~GSWorkerSpin();
+
+		int GetPixels(bool reset);
+
+		// GSJobQueue
+
+		void Process(shared_ptr<GSRasterizerData>& item);
+	};
+#endif
+
 	GSPerfMon* m_perfmon;
+#ifdef ENABLE_BOOST
+	vector<IGSJobQueue<shared_ptr<GSRasterizerData> > *> m_workers;
+#else
 	vector<GSWorker*> m_workers;
+#endif
 	uint8* m_scanline;

 	GSRasterizerList(int threads, GSPerfMon* perfmon);
@ -208,7 +229,7 @@ protected:
 public:
 	virtual ~GSRasterizerList();

-	template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon)
+	template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon, bool spin_thread = false)
 	{
 		threads = std::max<int>(threads, 0);

@ -222,7 +243,14 @@ public:

 			for(int i = 0; i < threads; i++)
 			{
+#ifdef ENABLE_BOOST
+				if (spin_thread)
+					rl->m_workers.push_back(new GSWorkerSpin(new GSRasterizer(new DS(), i, threads, perfmon)));
+				else
+					rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
+#else
 				rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
+#endif
 			}

 			return rl;
--- a/plugins/GSdx/GSRendererSW.cpp
+++ b/plugins/GSdx/GSRendererSW.cpp
@ -41,7 +41,8 @@ GSRendererSW::GSRendererSW(int threads)

 	memset(m_texture, 0, sizeof(m_texture));

-	m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);
+	bool spin_thread = !!theApp.GetConfig("spin_thread", 0);
+	m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon, spin_thread);

 	m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);

--- a/plugins/GSdx/GSThread_CXX11.h
+++ b/plugins/GSdx/GSThread_CXX11.h
@ -82,26 +82,25 @@ public:

 #endif

-// Activate only a single define (From the lowest latency to better CPU usage)
+// To allow switching between queue dynamically
+template<class T> class IGSJobQueue : public GSThread
+{
+public:
+	IGSJobQueue() {}
+	virtual ~IGSJobQueue() {}

-// This queue locks RENDERING threads + GS threads onto dedicated CPU
-// pros: best fps by thread
-// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 8 cores CPU.
-//#define NO_WAIT_BUT_CPU_INTENSIVE
+	virtual bool IsEmpty() const = 0;
+	virtual void Push(const T& item) = 0;
+	virtual void Wait() = 0;

-// This queue locks 'only' RENDERING threads mostly the same performance as above it the CPU is fast enough
-// pros: nearly best fps by thread
-// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 6/8 cores CPU.
-//#define WAIT_ON_GS_STILL_CPU_INTENSIVE
+	virtual void Process(T& item) = 0;
+	virtual int GetPixels(bool reset) = 0;
+};

 // This queue doesn't lock any thread. It would be nicer for 2c/4c CPU.
 // pros: no hard limit on thread numbers
 // cons: less performance by thread
-#define FULL_WAIT_LESS_CPU_INTENSIVE
-
-#if defined(FULL_WAIT_LESS_CPU_INTENSIVE)
-
-template<class T> class GSJobQueue : private GSThread
+template<class T> class GSJobQueue : public IGSJobQueue<T>
 {
 protected:
 	std::atomic<int16_t> m_count;
@ -145,13 +144,13 @@ public:
 		m_count(0),
 		m_exit(false)
 	{
-		CreateThread();
-	};
+		this->CreateThread();
+	}

 	virtual ~GSJobQueue() {
 		m_exit = true;
 		m_notempty.notify_one();
-		CloseThread();
+		this->CloseThread();
 	}

 	bool IsEmpty() const {
@ -184,16 +183,16 @@ public:
 		ASSERT(m_count == 0);
 	}

-	virtual void Process(T& item) = 0;
-
-	void operator()(T& item) {
-		Process(item);
+	void operator() (T& item) {
+		this->Process(item);
 	}
 };

-#elif defined(WAIT_ON_GS_STILL_CPU_INTENSIVE)

-template<class T> class GSJobQueue : private GSThread
+// This queue locks 'only' RENDERING threads mostly the same performance as above if the CPU is fast enough
+// pros: nearly best fps by thread
+// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 6/8 cores CPU.
+template<class T> class GSJobQueueSpin : public IGSJobQueue<T>
 {
 protected:
 	std::atomic<int16_t> m_count;
@ -232,16 +231,16 @@ protected:
 	}

 public:
-	GSJobQueue() :
+	GSJobQueueSpin() :
 		m_count(0),
 		m_exit(false)
 	{
-		CreateThread();
+		this->CreateThread();
 	};

-	virtual ~GSJobQueue() {
+	virtual ~GSJobQueueSpin() {
 		m_exit = true;
-		CloseThread();
+		this->CloseThread();
 	}

 	bool IsEmpty() const {
@ -270,14 +269,17 @@ public:

 	virtual void Process(T& item) = 0;

-	void operator()(T& item) {
-		Process(item);
+	void operator() (T& item) {
+		this->Process(item);
 	}
 };

-#elif defined(NO_WAIT_BUT_CPU_INTENSIVE)
+// This queue locks RENDERING threads + GS threads onto dedicated CPU
+// pros: best fps by thread
+// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 8 cores CPU.
+#if 0

-template<class T> class GSJobQueue : private GSThread
+template<class T> class GSJobQueue : public IGSJobQueue<T>
 {
 protected:
 	std::atomic<int16_t> m_count;
@ -329,11 +331,9 @@ public:

 	virtual void Process(T& item) = 0;

-	void operator()(T& item) {
-		Process(item);
+	void operator() (T& item) {
+		this->Process(item);
 	}
 };

-#else
-	#very bad
 #endif