mirror of https://github.com/PCSX2/pcsx2.git
gsdx: remove aggressive threading
http://wiki.pcsx2.net/index.php/PCSX2_Documentation/Threading_Basics
This commit is contained in:
parent
a46204ef9e
commit
19c9a0b441
|
@ -341,7 +341,6 @@ void populate_sw_table(GtkWidget* sw_table)
|
|||
|
||||
GtkWidget* aa_check = CreateCheckBox("Edge anti-aliasing (AA1)", "aa1");
|
||||
GtkWidget* mipmap_check = CreateCheckBox("Mipmap", "mipmap", true);
|
||||
GtkWidget* spin_thread_check= CreateCheckBox("Disable thread sleeping (6+ cores CPU)", "spin_thread");
|
||||
|
||||
AddTooltip(aa_check, IDC_AA1);
|
||||
AddTooltip(mipmap_check, IDC_MIPMAP);
|
||||
|
@ -350,7 +349,6 @@ void populate_sw_table(GtkWidget* sw_table)
|
|||
s_table_line = 0;
|
||||
InsertWidgetInTable(sw_table , threads_label , threads_spin);
|
||||
InsertWidgetInTable(sw_table , aa_check, mipmap_check);
|
||||
InsertWidgetInTable(sw_table , spin_thread_check , spin_thread_check);
|
||||
}
|
||||
|
||||
void populate_shader_table(GtkWidget* shader_table)
|
||||
|
|
|
@ -1232,27 +1232,3 @@ void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
|
|||
{
|
||||
m_r->Draw(item.get());
|
||||
}
|
||||
|
||||
// GSRasterizerList::GSWorkerSpin
|
||||
GSRasterizerList::GSWorkerSpin::GSWorkerSpin(GSRasterizer* r)
|
||||
: GSJobQueueSpin<shared_ptr<GSRasterizerData>, 256>()
|
||||
, m_r(r)
|
||||
{
|
||||
}
|
||||
|
||||
GSRasterizerList::GSWorkerSpin::~GSWorkerSpin()
|
||||
{
|
||||
Wait();
|
||||
|
||||
delete m_r;
|
||||
}
|
||||
|
||||
int GSRasterizerList::GSWorkerSpin::GetPixels(bool reset)
|
||||
{
|
||||
return m_r->GetPixels(reset);
|
||||
}
|
||||
|
||||
void GSRasterizerList::GSWorkerSpin::Process(shared_ptr<GSRasterizerData>& item)
|
||||
{
|
||||
m_r->Draw(item.get());
|
||||
}
|
||||
|
|
|
@ -195,23 +195,8 @@ protected:
|
|||
void Process(shared_ptr<GSRasterizerData>& item);
|
||||
};
|
||||
|
||||
class GSWorkerSpin : public GSJobQueueSpin<shared_ptr<GSRasterizerData>, 256>
|
||||
{
|
||||
GSRasterizer* m_r;
|
||||
|
||||
public:
|
||||
GSWorkerSpin(GSRasterizer* r);
|
||||
virtual ~GSWorkerSpin();
|
||||
|
||||
int GetPixels(bool reset);
|
||||
|
||||
// GSJobQueue
|
||||
|
||||
void Process(shared_ptr<GSRasterizerData>& item);
|
||||
};
|
||||
|
||||
GSPerfMon* m_perfmon;
|
||||
vector<IGSJobQueue<shared_ptr<GSRasterizerData> > *> m_workers;
|
||||
vector<GSWorker*> m_workers;
|
||||
uint8* m_scanline;
|
||||
|
||||
GSRasterizerList(int threads, GSPerfMon* perfmon);
|
||||
|
@ -219,7 +204,7 @@ protected:
|
|||
public:
|
||||
virtual ~GSRasterizerList();
|
||||
|
||||
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon, bool spin_thread = false)
|
||||
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon)
|
||||
{
|
||||
threads = std::max<int>(threads, 0);
|
||||
|
||||
|
@ -233,9 +218,6 @@ public:
|
|||
|
||||
for(int i = 0; i < threads; i++)
|
||||
{
|
||||
if (spin_thread)
|
||||
rl->m_workers.push_back(new GSWorkerSpin(new GSRasterizer(new DS(), i, threads, perfmon)));
|
||||
else
|
||||
rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
|
||||
}
|
||||
|
||||
|
|
|
@ -41,8 +41,7 @@ GSRendererSW::GSRendererSW(int threads)
|
|||
|
||||
memset(m_texture, 0, sizeof(m_texture));
|
||||
|
||||
bool spin_thread = !!theApp.GetConfig("spin_thread", 0);
|
||||
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon, spin_thread);
|
||||
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);
|
||||
|
||||
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
|
||||
|
||||
|
|
|
@ -82,7 +82,6 @@ public:
|
|||
|
||||
#endif
|
||||
|
||||
// To allow switching between queue dynamically
|
||||
template<class T> class IGSJobQueue : public GSThread
|
||||
{
|
||||
public:
|
||||
|
@ -97,9 +96,6 @@ public:
|
|||
virtual int GetPixels(bool reset) = 0;
|
||||
};
|
||||
|
||||
// This queue doesn't reserve any thread. It would be nicer for 2c/4c CPU.
|
||||
// pros: no hard limit on thread numbers
|
||||
// cons: less performance by thread
|
||||
template<class T, int CAPACITY> class GSJobQueue : public IGSJobQueue<T>
|
||||
{
|
||||
protected:
|
||||
|
@ -187,156 +183,3 @@ public:
|
|||
this->Process(item);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// This queue reserves 'only' RENDERING threads mostly the same performance as a no reservation queue if the CPU is fast enough
|
||||
// pros: nearly best fps by thread
|
||||
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 6/8 cores CPU.
|
||||
// Note: I'm not sure of the source of the speedup
|
||||
// 1/ It could be related to less MT logic (lock, cond var)
|
||||
// 2/ But I highly suspect that waking up thread is rather slow. My guess
|
||||
// is that low power feature (like C state) increases latency. In this case
|
||||
// gain will be smaller if PCSX2 is running or in limited core CPU (<=4)
|
||||
template<class T, int CAPACITY> class GSJobQueueSpin : public IGSJobQueue<T>
|
||||
{
|
||||
protected:
|
||||
std::atomic<int16_t> m_count;
|
||||
std::atomic<bool> m_exit;
|
||||
ringbuffer_base<T, CAPACITY> m_queue;
|
||||
|
||||
std::mutex m_lock;
|
||||
std::condition_variable m_empty;
|
||||
|
||||
void ThreadProc() {
|
||||
std::unique_lock<std::mutex> l(m_lock, defer_lock);
|
||||
|
||||
while (true) {
|
||||
|
||||
while (m_count == 0) {
|
||||
if (m_exit.load(memory_order_acquire)) return;
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
int16_t consumed = 0;
|
||||
for (int16_t nb = m_count; nb >= 0; nb--) {
|
||||
if (m_queue.consume_one(*this))
|
||||
consumed++;
|
||||
}
|
||||
|
||||
l.lock();
|
||||
|
||||
m_count -= consumed;
|
||||
|
||||
l.unlock();
|
||||
|
||||
if (m_count <= 0)
|
||||
m_empty.notify_one();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
GSJobQueueSpin() :
|
||||
m_count(0),
|
||||
m_exit(false)
|
||||
{
|
||||
this->CreateThread();
|
||||
};
|
||||
|
||||
virtual ~GSJobQueueSpin() {
|
||||
m_exit.store(true, memory_order_release);
|
||||
this->CloseThread();
|
||||
}
|
||||
|
||||
bool IsEmpty() const {
|
||||
ASSERT(m_count >= 0);
|
||||
|
||||
return m_count == 0;
|
||||
}
|
||||
|
||||
void Push(const T& item) {
|
||||
while(!m_queue.push(item))
|
||||
std::this_thread::yield();
|
||||
|
||||
m_count++;
|
||||
}
|
||||
|
||||
void Wait() {
|
||||
if (m_count > 0) {
|
||||
std::unique_lock<std::mutex> l(m_lock);
|
||||
while (m_count > 0) {
|
||||
m_empty.wait(l);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(m_count == 0);
|
||||
}
|
||||
|
||||
void operator() (T& item) {
|
||||
this->Process(item);
|
||||
}
|
||||
};
|
||||
|
||||
// This queue reserves RENDERING threads + GS threads onto dedicated CPU
|
||||
// pros: best fps by thread
|
||||
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 8 cores CPU.
|
||||
#if 0
|
||||
|
||||
template<class T> class GSJobQueue : public IGSJobQueue<T>
|
||||
{
|
||||
protected:
|
||||
std::atomic<int16_t> m_count;
|
||||
std::atomic<bool> m_exit;
|
||||
boost::lockfree::spsc_queue<T, boost::lockfree::capacity<255> > m_queue;
|
||||
|
||||
void ThreadProc() {
|
||||
while (true) {
|
||||
while (m_count == 0) {
|
||||
if (m_exit.load(memory_order_acquire)) return;
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
m_count -= m_queue.consume_all(*this);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
GSJobQueue() :
|
||||
m_count(0),
|
||||
m_exit(false)
|
||||
{
|
||||
CreateThread();
|
||||
};
|
||||
|
||||
virtual ~GSJobQueue() {
|
||||
m_exit = true;
|
||||
CloseThread();
|
||||
}
|
||||
|
||||
bool IsEmpty() const {
|
||||
ASSERT(m_count >= 0);
|
||||
|
||||
return m_count == 0;
|
||||
}
|
||||
|
||||
void Push(const T& item) {
|
||||
m_count++;
|
||||
while(!m_queue.push(item))
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
void Wait() {
|
||||
while (m_count > 0)
|
||||
std::this_thread::yield();
|
||||
|
||||
ASSERT(m_count == 0);
|
||||
}
|
||||
|
||||
virtual void Process(T& item) = 0;
|
||||
|
||||
void operator() (T& item) {
|
||||
this->Process(item);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue