Cocoa Port: Improve SoftRasterizer multithreading and stabilize multithreaded performance.
- Automatic setting of the SoftRasterizer thread count (the most common use case) now takes into account systems with many CPU cores/hyperthreads. When using Automatic mode, SoftRasterizer will take advantage of more threads on machines like the Mac Pro and iMac Pro. - Manually assign the thread priorities of the SoftRasterizer threads and other related high-priority threads to better ensure stable performance. Most importantly, the main emulation thread will no longer preempt any SoftRasterizer thread since the main emulation thread has to wait on the results of SoftRasterizer anyways. - These changes aren't targeted for improving overall performance -- they help stabilize performance so that CPU cycles are used more consistently, which might translate into slightly improved performance, depending on hardware, as a byproduct of doing these changes.
This commit is contained in:
parent
fe851aa10c
commit
b6072287c5
|
@ -449,7 +449,23 @@ public:
|
|||
{
|
||||
isCPUCoreCountAuto = YES;
|
||||
|
||||
if (numberCores >= 8)
|
||||
if (numberCores >= 96)
|
||||
{
|
||||
numberCores = 128;
|
||||
}
|
||||
else if (numberCores >= 48)
|
||||
{
|
||||
numberCores = 64;
|
||||
}
|
||||
else if (numberCores >= 24)
|
||||
{
|
||||
numberCores = 32;
|
||||
}
|
||||
else if (numberCores >= 16)
|
||||
{
|
||||
numberCores = 16;
|
||||
}
|
||||
else if (numberCores >= 8)
|
||||
{
|
||||
numberCores = 8;
|
||||
}
|
||||
|
@ -955,9 +971,20 @@ public:
|
|||
_threadMessageID = MESSAGE_NONE;
|
||||
_fetchIndex = 0;
|
||||
pthread_cond_init(&_condSignalFetch, NULL);
|
||||
pthread_create(&_threadFetch, NULL, &RunFetchThread, self);
|
||||
pthread_mutex_init(&_mutexFetchExecute, NULL);
|
||||
|
||||
pthread_attr_t threadAttr;
|
||||
pthread_attr_init(&threadAttr);
|
||||
pthread_attr_setschedpolicy(&threadAttr, SCHED_RR);
|
||||
|
||||
struct sched_param sp;
|
||||
memset(&sp, 0, sizeof(struct sched_param));
|
||||
sp.sched_priority = 44;
|
||||
pthread_attr_setschedparam(&threadAttr, &sp);
|
||||
|
||||
pthread_create(&_threadFetch, &threadAttr, &RunFetchThread, self);
|
||||
pthread_attr_destroy(&threadAttr);
|
||||
|
||||
_taskEmulationLoop = 0;
|
||||
|
||||
for (size_t i = 0; i < MAX_FRAMEBUFFER_PAGES; i++)
|
||||
|
|
|
@ -132,7 +132,6 @@ volatile bool execute = true;
|
|||
pthread_mutex_init(&threadParam.mutexThreadExecute, NULL);
|
||||
pthread_cond_init(&threadParam.condThreadExecute, NULL);
|
||||
pthread_rwlock_init(&threadParam.rwlockCoreExecute, NULL);
|
||||
pthread_create(&coreThread, NULL, &RunCoreThread, &threadParam);
|
||||
|
||||
// The core emulation thread needs max priority since it is the sole
|
||||
// producer thread for all output threads. Note that this is not being
|
||||
|
@ -146,12 +145,18 @@ volatile bool execute = true;
|
|||
// lot of CPU time under certain conditions, which may interfere with
|
||||
// other threads. (Example: Video tearing on display windows, even with
|
||||
// V-sync enabled.)
|
||||
|
||||
pthread_attr_t threadAttr;
|
||||
pthread_attr_init(&threadAttr);
|
||||
pthread_attr_setschedpolicy(&threadAttr, SCHED_RR);
|
||||
|
||||
struct sched_param sp;
|
||||
int thePolicy = 0;
|
||||
memset(&sp, 0, sizeof(struct sched_param));
|
||||
pthread_getschedparam(coreThread, &thePolicy, &sp);
|
||||
sp.sched_priority = sched_get_priority_max(thePolicy);
|
||||
pthread_setschedparam(coreThread, thePolicy, &sp);
|
||||
sp.sched_priority = 42;
|
||||
pthread_attr_setschedparam(&threadAttr, &sp);
|
||||
|
||||
pthread_create(&coreThread, &threadAttr, &RunCoreThread, &threadParam);
|
||||
pthread_attr_destroy(&threadAttr);
|
||||
|
||||
[cdsGPU setOutputList:cdsOutputList rwlock:&threadParam.rwlockOutputList];
|
||||
|
||||
|
|
|
@ -93,7 +93,18 @@
|
|||
{
|
||||
pthread_mutex_init(&_mutexMessageLoop, NULL);
|
||||
pthread_cond_init(&_condSignalMessage, NULL);
|
||||
pthread_create(&_pthread, NULL, &RunOutputThread, self);
|
||||
|
||||
pthread_attr_t threadAttr;
|
||||
pthread_attr_init(&threadAttr);
|
||||
pthread_attr_setschedpolicy(&threadAttr, SCHED_RR);
|
||||
|
||||
struct sched_param sp;
|
||||
memset(&sp, 0, sizeof(struct sched_param));
|
||||
sp.sched_priority = 45;
|
||||
pthread_attr_setschedparam(&threadAttr, &sp);
|
||||
|
||||
pthread_create(&_pthread, &threadAttr, &RunOutputThread, self);
|
||||
pthread_attr_destroy(&threadAttr);
|
||||
}
|
||||
|
||||
- (void) exitThread
|
||||
|
|
|
@ -42,12 +42,30 @@ typedef struct scond scond_t;
|
|||
* @userdata : pointer to userdata that will be made
|
||||
* available in thread entry callback function
|
||||
*
|
||||
* Create a new thread.
|
||||
* Create a new thread using the operating system's default thread
|
||||
* priority.
|
||||
*
|
||||
* Returns: pointer to new thread if successful, otherwise NULL.
|
||||
*/
|
||||
sthread_t *sthread_create(void (*thread_func)(void*), void *userdata);
|
||||
|
||||
/**
|
||||
* sthread_create_with_priority:
|
||||
* @start_routine : thread entry callback function
|
||||
* @userdata : pointer to userdata that will be made
|
||||
* available in thread entry callback function
|
||||
* @thread_priority : thread priority hint value from [1-100]
|
||||
*
|
||||
* Create a new thread. It is possible for the caller to give a hint
|
||||
* for the thread's priority from [1-100]. Any passed in @thread_priority
|
||||
* values that are outside of this range will cause sthread_create() to
|
||||
* create a new thread using the operating system's default thread
|
||||
* priority.
|
||||
*
|
||||
* Returns: pointer to new thread if successful, otherwise NULL.
|
||||
*/
|
||||
sthread_t *sthread_create_with_priority(void (*thread_func)(void*), void *userdata, int thread_priority);
|
||||
|
||||
/**
|
||||
* sthread_detach:
|
||||
* @thread : pointer to thread object
|
||||
|
|
|
@ -151,13 +151,35 @@ static void *thread_wrap(void *data_)
|
|||
* @userdata : pointer to userdata that will be made
|
||||
* available in thread entry callback function
|
||||
*
|
||||
* Create a new thread.
|
||||
* Create a new thread using the operating system's default thread
|
||||
* priority.
|
||||
*
|
||||
* Returns: pointer to new thread if successful, otherwise NULL.
|
||||
*/
|
||||
sthread_t *sthread_create(void (*thread_func)(void*), void *userdata)
|
||||
{
|
||||
return sthread_create_with_priority(thread_func, userdata, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* sthread_create_with_priority:
|
||||
* @start_routine : thread entry callback function
|
||||
* @userdata : pointer to userdata that will be made
|
||||
* available in thread entry callback function
|
||||
* @thread_priority : thread priority hint value from [1-100]
|
||||
*
|
||||
* Create a new thread. It is possible for the caller to give a hint
|
||||
* for the thread's priority from [1-100]. Any passed in @thread_priority
|
||||
* values that are outside of this range will cause sthread_create() to
|
||||
* create a new thread using the operating system's default thread
|
||||
* priority.
|
||||
*
|
||||
* Returns: pointer to new thread if successful, otherwise NULL.
|
||||
*/
|
||||
sthread_t *sthread_create_with_priority(void (*thread_func)(void*), void *userdata, int thread_priority)
|
||||
{
|
||||
bool thread_created = false;
|
||||
bool thread_attr_needed = false;
|
||||
struct thread_data *data = NULL;
|
||||
sthread_t *thread = (sthread_t*)calloc(1, sizeof(*thread));
|
||||
if (!thread)
|
||||
|
@ -174,14 +196,35 @@ sthread_t *sthread_create(void (*thread_func)(void*), void *userdata)
|
|||
thread->thread = CreateThread(NULL, 0, thread_wrap, data, 0, &thread->thread_id);
|
||||
thread_created = !!thread->thread;
|
||||
#else
|
||||
#if defined(VITA)
|
||||
pthread_attr_t thread_attr;
|
||||
pthread_attr_init(&thread_attr);
|
||||
|
||||
if ( (thread_priority >= 1) && (thread_priority <= 100) )
|
||||
{
|
||||
struct sched_param sp;
|
||||
memset(&sp, 0, sizeof(struct sched_param));
|
||||
sp.sched_priority = thread_priority;
|
||||
pthread_attr_setschedpolicy(&thread_attr, SCHED_RR);
|
||||
pthread_attr_setschedparam(&thread_attr, &sp);
|
||||
|
||||
thread_attr_needed = true;
|
||||
}
|
||||
|
||||
#if defined(VITA)
|
||||
pthread_attr_setstacksize(&thread_attr , 0x10000 );
|
||||
thread_created = pthread_create(&thread->id, &thread_attr, thread_wrap, data) == 0;
|
||||
#else
|
||||
thread_created = pthread_create(&thread->id, NULL, thread_wrap, data) == 0;
|
||||
thread_attr_needed = true;
|
||||
#endif
|
||||
|
||||
if (thread_attr_needed)
|
||||
{
|
||||
thread_created = pthread_create(&thread->id, &thread_attr, thread_wrap, data) == 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
thread_created = pthread_create(&thread->id, NULL, thread_wrap, data) == 0;
|
||||
}
|
||||
|
||||
pthread_attr_destroy(&thread_attr);
|
||||
#endif
|
||||
|
||||
if (!thread_created)
|
||||
|
|
|
@ -1501,7 +1501,13 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
|
|||
_threadClearParam[i].startPixel = i * _customPixelsPerThread;
|
||||
_threadClearParam[i].endPixel = (i < _threadCount - 1) ? (i + 1) * _customPixelsPerThread : _framebufferPixCount;
|
||||
|
||||
#ifdef DESMUME_COCOA
|
||||
// The Cocoa port takes advantage of hand-optimized thread priorities
|
||||
// to help stabilize performance when running SoftRasterizer.
|
||||
_task[i].start(false, 43);
|
||||
#else
|
||||
_task[i].start(false);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ public:
|
|||
Impl();
|
||||
~Impl();
|
||||
|
||||
void start(bool spinlock);
|
||||
void start(bool spinlock, int threadPriority);
|
||||
void execute(const TWork &work, void *param);
|
||||
void* finish();
|
||||
void shutdown();
|
||||
|
@ -85,25 +85,25 @@ Task::Impl::~Impl()
|
|||
shutdown();
|
||||
slock_free(mutex);
|
||||
scond_free(condWork);
|
||||
}
|
||||
|
||||
void Task::Impl::start(bool spinlock)
|
||||
{
|
||||
slock_lock(this->mutex);
|
||||
|
||||
if (this->_isThreadRunning) {
|
||||
slock_unlock(this->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
this->workFunc = NULL;
|
||||
this->workFuncParam = NULL;
|
||||
this->ret = NULL;
|
||||
this->exitThread = false;
|
||||
this->_thread = sthread_create(&taskProc,this);
|
||||
this->_isThreadRunning = true;
|
||||
|
||||
slock_unlock(this->mutex);
|
||||
}
|
||||
|
||||
void Task::Impl::start(bool spinlock, int threadPriority)
|
||||
{
|
||||
slock_lock(this->mutex);
|
||||
|
||||
if (this->_isThreadRunning) {
|
||||
slock_unlock(this->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
this->workFunc = NULL;
|
||||
this->workFuncParam = NULL;
|
||||
this->ret = NULL;
|
||||
this->exitThread = false;
|
||||
this->_thread = sthread_create_with_priority(&taskProc, this, threadPriority);
|
||||
this->_isThreadRunning = true;
|
||||
|
||||
slock_unlock(this->mutex);
|
||||
}
|
||||
|
||||
void Task::Impl::execute(const TWork &work, void *param)
|
||||
|
@ -168,7 +168,8 @@ void Task::Impl::shutdown()
|
|||
slock_unlock(this->mutex);
|
||||
}
|
||||
|
||||
void Task::start(bool spinlock) { impl->start(spinlock); }
|
||||
void Task::start(bool spinlock) { impl->start(spinlock, 0); }
|
||||
void Task::start(bool spinlock, int threadPriority) { impl->start(spinlock, threadPriority); }
|
||||
void Task::shutdown() { impl->shutdown(); }
|
||||
Task::Task() : impl(new Task::Impl()) {}
|
||||
Task::~Task() { delete impl; }
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (C) 2009 DeSmuME team
|
||||
Copyright (C) 2009-2018 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -29,7 +29,8 @@ public:
|
|||
typedef void * (*TWork)(void *);
|
||||
|
||||
// initialize task runner
|
||||
void start(bool spinlock);
|
||||
void start(bool spinlock);
|
||||
void start(bool spinlock, int threadPriority);
|
||||
|
||||
//execute some work
|
||||
void execute(const TWork &work, void* param);
|
||||
|
|
Loading…
Reference in New Issue