gsdx: remove aggressive threading

http://wiki.pcsx2.net/index.php/PCSX2_Documentation/Threading_Basics
2015-11-20 14:56:29 +01:00 · 2015-11-20 14:56:29 +01:00 · 19c9a0b441
parent a46204ef9e
commit 19c9a0b441
5 changed files with 4 additions and 206 deletions
--- a/plugins/GSdx/GSLinuxDialog.cpp
+++ b/plugins/GSdx/GSLinuxDialog.cpp
@ -341,7 +341,6 @@ void populate_sw_table(GtkWidget* sw_table)

 	GtkWidget* aa_check         = CreateCheckBox("Edge anti-aliasing (AA1)", "aa1");
 	GtkWidget* mipmap_check     = CreateCheckBox("Mipmap", "mipmap", true);
-	GtkWidget* spin_thread_check= CreateCheckBox("Disable thread sleeping (6+ cores CPU)", "spin_thread");

 	AddTooltip(aa_check, IDC_AA1);
 	AddTooltip(mipmap_check, IDC_MIPMAP);
@ -350,7 +349,6 @@ void populate_sw_table(GtkWidget* sw_table)
 	s_table_line = 0;
 	InsertWidgetInTable(sw_table , threads_label     , threads_spin);
 	InsertWidgetInTable(sw_table , aa_check, mipmap_check);
-	InsertWidgetInTable(sw_table , spin_thread_check , spin_thread_check);
 }

 void populate_shader_table(GtkWidget* shader_table)
--- a/plugins/GSdx/GSRasterizer.cpp
+++ b/plugins/GSdx/GSRasterizer.cpp
@ -1232,27 +1232,3 @@ void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
 {
 	m_r->Draw(item.get());
 }
-
-// GSRasterizerList::GSWorkerSpin
-GSRasterizerList::GSWorkerSpin::GSWorkerSpin(GSRasterizer* r)
-	: GSJobQueueSpin<shared_ptr<GSRasterizerData>, 256>()
-	, m_r(r)
-{
-}
-
-GSRasterizerList::GSWorkerSpin::~GSWorkerSpin()
-{
-	Wait();
-
-	delete m_r;
-}
-
-int GSRasterizerList::GSWorkerSpin::GetPixels(bool reset)
-{
-	return m_r->GetPixels(reset);
-}
-
-void GSRasterizerList::GSWorkerSpin::Process(shared_ptr<GSRasterizerData>& item)
-{
-	m_r->Draw(item.get());
-}
--- a/plugins/GSdx/GSRasterizer.h
+++ b/plugins/GSdx/GSRasterizer.h
@ -195,23 +195,8 @@ protected:
 		void Process(shared_ptr<GSRasterizerData>& item);
 	};

-	class GSWorkerSpin : public GSJobQueueSpin<shared_ptr<GSRasterizerData>, 256>
-	{
-		GSRasterizer* m_r;
-
-	public:
-		GSWorkerSpin(GSRasterizer* r);
-		virtual ~GSWorkerSpin();
-
-		int GetPixels(bool reset);
-
-		// GSJobQueue
-
-		void Process(shared_ptr<GSRasterizerData>& item);
-	};
-
 	GSPerfMon* m_perfmon;
-	vector<IGSJobQueue<shared_ptr<GSRasterizerData> > *> m_workers;
+	vector<GSWorker*> m_workers;
 	uint8* m_scanline;

 	GSRasterizerList(int threads, GSPerfMon* perfmon);
@ -219,7 +204,7 @@ protected:
 public:
 	virtual ~GSRasterizerList();

-	template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon, bool spin_thread = false)
+	template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon)
 	{
 		threads = std::max<int>(threads, 0);

@ -233,10 +218,7 @@ public:

 			for(int i = 0; i < threads; i++)
 			{
-				if (spin_thread)
-					rl->m_workers.push_back(new GSWorkerSpin(new GSRasterizer(new DS(), i, threads, perfmon)));
-				else
-					rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
+				rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
 			}

 			return rl;
--- a/plugins/GSdx/GSRendererSW.cpp
+++ b/plugins/GSdx/GSRendererSW.cpp
@ -41,8 +41,7 @@ GSRendererSW::GSRendererSW(int threads)

 	memset(m_texture, 0, sizeof(m_texture));

-	bool spin_thread = !!theApp.GetConfig("spin_thread", 0);
-	m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon, spin_thread);
+	m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);

 	m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);

--- a/plugins/GSdx/GSThread_CXX11.h
+++ b/plugins/GSdx/GSThread_CXX11.h
@ -82,7 +82,6 @@ public:

 #endif

-// To allow switching between queue dynamically
 template<class T> class IGSJobQueue : public GSThread
 {
 public:
@ -97,9 +96,6 @@ public:
 	virtual int GetPixels(bool reset) = 0;
 };

-// This queue doesn't reserve any thread. It would be nicer for 2c/4c CPU.
-// pros: no hard limit on thread numbers
-// cons: less performance by thread
 template<class T, int CAPACITY> class GSJobQueue : public IGSJobQueue<T>
 {
 protected:
@ -187,156 +183,3 @@ public:
 		this->Process(item);
 	}
 };
-
-
-// This queue reserves 'only' RENDERING threads mostly the same performance as a no reservation queue if the CPU is fast enough
-// pros: nearly best fps by thread
-// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 6/8 cores CPU.
-// Note: I'm not sure of the source of the speedup
-//		1/ It could be related to less MT logic (lock, cond var)
-//		2/ But I highly suspect that waking up thread is rather slow.  My guess
-//		is that low power feature (like C state) increases latency. In this case
-//		gain will be smaller if PCSX2 is running or in limited core CPU (<=4)
-template<class T, int CAPACITY> class GSJobQueueSpin : public IGSJobQueue<T>
-{
-protected:
-	std::atomic<int16_t> m_count;
-	std::atomic<bool> m_exit;
-	ringbuffer_base<T, CAPACITY> m_queue;
-
-	std::mutex m_lock;
-	std::condition_variable m_empty;
-
-	void ThreadProc() {
-		std::unique_lock<std::mutex> l(m_lock, defer_lock);
-
-		while (true) {
-
-			while (m_count == 0) {
-				if (m_exit.load(memory_order_acquire)) return;
-				std::this_thread::yield();
-			}
-
-			int16_t consumed = 0;
-			for (int16_t nb = m_count; nb >= 0; nb--) {
-				if (m_queue.consume_one(*this))
-					consumed++;
-			}
-
-			l.lock();
-
-			m_count -= consumed;
-
-			l.unlock();
-
-			if (m_count <= 0)
-				m_empty.notify_one();
-
-		}
-	}
-
-public:
-	GSJobQueueSpin() :
-		m_count(0),
-		m_exit(false)
-	{
-		this->CreateThread();
-	};
-
-	virtual ~GSJobQueueSpin() {
-		m_exit.store(true, memory_order_release);
-		this->CloseThread();
-	}
-
-	bool IsEmpty() const {
-		ASSERT(m_count >= 0);
-
-		return m_count == 0;
-	}
-
-	void Push(const T& item) {
-		while(!m_queue.push(item))
-			std::this_thread::yield();
-
-		m_count++;
-	}
-
-	void Wait() {
-		if (m_count > 0) {
-			std::unique_lock<std::mutex> l(m_lock);
-			while (m_count > 0) {
-				m_empty.wait(l);
-			}
-		}
-
-		ASSERT(m_count == 0);
-	}
-
-	void operator() (T& item) {
-		this->Process(item);
-	}
-};
-
-// This queue reserves RENDERING threads + GS threads onto dedicated CPU
-// pros: best fps by thread
-// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 8 cores CPU.
-#if 0
-
-template<class T> class GSJobQueue : public IGSJobQueue<T>
-{
-protected:
-	std::atomic<int16_t> m_count;
-	std::atomic<bool> m_exit;
-	boost::lockfree::spsc_queue<T, boost::lockfree::capacity<255> > m_queue;
-
-	void ThreadProc() {
-		while (true) {
-			while (m_count == 0) {
-				if (m_exit.load(memory_order_acquire)) return;
-				std::this_thread::yield();
-			}
-
-			m_count -= m_queue.consume_all(*this);
-		}
-	}
-
-public:
-	GSJobQueue() :
-		m_count(0),
-		m_exit(false)
-	{
-		CreateThread();
-	};
-
-	virtual ~GSJobQueue() {
-		m_exit = true;
-		CloseThread();
-	}
-
-	bool IsEmpty() const {
-		ASSERT(m_count >= 0);
-
-		return m_count == 0;
-	}
-
-	void Push(const T& item) {
-		m_count++;
-		while(!m_queue.push(item))
-			std::this_thread::yield();
-	}
-
-	void Wait() {
-		while (m_count > 0)
-			std::this_thread::yield();
-
-		ASSERT(m_count == 0);
-	}
-
-	virtual void Process(T& item) = 0;
-
-	void operator() (T& item) {
-		this->Process(item);
-	}
-};
-
-#endif