Cocoa Port: Improve SoftRasterizer multithreading and stabilize multithreaded performance.

- Automatic setting of the SoftRasterizer thread count (the most common use case) now takes into account systems with many CPU cores/hyperthreads. When using Automatic mode, SoftRasterizer will take advantage of more threads on machines like the Mac Pro and iMac Pro.
- Manually assign the thread priorities of the SoftRasterizer threads and other related high-priority threads to better ensure stable performance. Most importantly, the main emulation thread will no longer preempt any SoftRasterizer thread since the main emulation thread has to wait on the results of SoftRasterizer anyways.
- These changes aren't targeted for improving overall performance -- they help stabilize performance so that CPU cycles are used more consistently, which might translate into slightly improved performance, depending on hardware, as a byproduct of doing these changes.
This commit is contained in:
rogerman 2018-07-25 14:19:25 -07:00
parent fe851aa10c
commit b6072287c5
8 changed files with 149 additions and 37 deletions

View File

@ -449,7 +449,23 @@ public:
{
isCPUCoreCountAuto = YES;
if (numberCores >= 8)
if (numberCores >= 96)
{
numberCores = 128;
}
else if (numberCores >= 48)
{
numberCores = 64;
}
else if (numberCores >= 24)
{
numberCores = 32;
}
else if (numberCores >= 16)
{
numberCores = 16;
}
else if (numberCores >= 8)
{
numberCores = 8;
}
@ -955,9 +971,20 @@ public:
_threadMessageID = MESSAGE_NONE;
_fetchIndex = 0;
pthread_cond_init(&_condSignalFetch, NULL);
pthread_create(&_threadFetch, NULL, &RunFetchThread, self);
pthread_mutex_init(&_mutexFetchExecute, NULL);
pthread_attr_t threadAttr;
pthread_attr_init(&threadAttr);
pthread_attr_setschedpolicy(&threadAttr, SCHED_RR);
struct sched_param sp;
memset(&sp, 0, sizeof(struct sched_param));
sp.sched_priority = 44;
pthread_attr_setschedparam(&threadAttr, &sp);
pthread_create(&_threadFetch, &threadAttr, &RunFetchThread, self);
pthread_attr_destroy(&threadAttr);
_taskEmulationLoop = 0;
for (size_t i = 0; i < MAX_FRAMEBUFFER_PAGES; i++)

View File

@ -132,7 +132,6 @@ volatile bool execute = true;
pthread_mutex_init(&threadParam.mutexThreadExecute, NULL);
pthread_cond_init(&threadParam.condThreadExecute, NULL);
pthread_rwlock_init(&threadParam.rwlockCoreExecute, NULL);
pthread_create(&coreThread, NULL, &RunCoreThread, &threadParam);
// The core emulation thread needs max priority since it is the sole
// producer thread for all output threads. Note that this is not being
@ -146,12 +145,18 @@ volatile bool execute = true;
// lot of CPU time under certain conditions, which may interfere with
// other threads. (Example: Video tearing on display windows, even with
// V-sync enabled.)
pthread_attr_t threadAttr;
pthread_attr_init(&threadAttr);
pthread_attr_setschedpolicy(&threadAttr, SCHED_RR);
struct sched_param sp;
int thePolicy = 0;
memset(&sp, 0, sizeof(struct sched_param));
pthread_getschedparam(coreThread, &thePolicy, &sp);
sp.sched_priority = sched_get_priority_max(thePolicy);
pthread_setschedparam(coreThread, thePolicy, &sp);
sp.sched_priority = 42;
pthread_attr_setschedparam(&threadAttr, &sp);
pthread_create(&coreThread, &threadAttr, &RunCoreThread, &threadParam);
pthread_attr_destroy(&threadAttr);
[cdsGPU setOutputList:cdsOutputList rwlock:&threadParam.rwlockOutputList];

View File

@ -93,7 +93,18 @@
{
pthread_mutex_init(&_mutexMessageLoop, NULL);
pthread_cond_init(&_condSignalMessage, NULL);
pthread_create(&_pthread, NULL, &RunOutputThread, self);
pthread_attr_t threadAttr;
pthread_attr_init(&threadAttr);
pthread_attr_setschedpolicy(&threadAttr, SCHED_RR);
struct sched_param sp;
memset(&sp, 0, sizeof(struct sched_param));
sp.sched_priority = 45;
pthread_attr_setschedparam(&threadAttr, &sp);
pthread_create(&_pthread, &threadAttr, &RunOutputThread, self);
pthread_attr_destroy(&threadAttr);
}
- (void) exitThread

View File

@ -42,12 +42,30 @@ typedef struct scond scond_t;
* @userdata : pointer to userdata that will be made
* available in thread entry callback function
*
* Create a new thread.
* Create a new thread using the operating system's default thread
* priority.
*
* Returns: pointer to new thread if successful, otherwise NULL.
*/
sthread_t *sthread_create(void (*thread_func)(void*), void *userdata);
/**
* sthread_create_with_priority:
* @start_routine : thread entry callback function
* @userdata : pointer to userdata that will be made
* available in thread entry callback function
* @thread_priority : thread priority hint value from [1-100]
*
* Create a new thread. It is possible for the caller to give a hint
* for the thread's priority from [1-100]. Any passed in @thread_priority
* values that are outside of this range will cause sthread_create() to
* create a new thread using the operating system's default thread
* priority.
*
* Returns: pointer to new thread if successful, otherwise NULL.
*/
sthread_t *sthread_create_with_priority(void (*thread_func)(void*), void *userdata, int thread_priority);
/**
* sthread_detach:
* @thread : pointer to thread object

View File

@ -151,13 +151,35 @@ static void *thread_wrap(void *data_)
* @userdata : pointer to userdata that will be made
* available in thread entry callback function
*
* Create a new thread.
* Create a new thread using the operating system's default thread
* priority.
*
* Returns: pointer to new thread if successful, otherwise NULL.
*/
sthread_t *sthread_create(void (*thread_func)(void*), void *userdata)
{
return sthread_create_with_priority(thread_func, userdata, 0);
}
/**
* sthread_create_with_priority:
* @start_routine : thread entry callback function
* @userdata : pointer to userdata that will be made
* available in thread entry callback function
* @thread_priority : thread priority hint value from [1-100]
*
* Create a new thread. It is possible for the caller to give a hint
* for the thread's priority from [1-100]. Any passed in @thread_priority
* values that are outside of this range will cause sthread_create() to
* create a new thread using the operating system's default thread
* priority.
*
* Returns: pointer to new thread if successful, otherwise NULL.
*/
sthread_t *sthread_create_with_priority(void (*thread_func)(void*), void *userdata, int thread_priority)
{
bool thread_created = false;
bool thread_attr_needed = false;
struct thread_data *data = NULL;
sthread_t *thread = (sthread_t*)calloc(1, sizeof(*thread));
if (!thread)
@ -174,14 +196,35 @@ sthread_t *sthread_create(void (*thread_func)(void*), void *userdata)
thread->thread = CreateThread(NULL, 0, thread_wrap, data, 0, &thread->thread_id);
thread_created = !!thread->thread;
#else
#if defined(VITA)
pthread_attr_t thread_attr;
pthread_attr_init(&thread_attr);
if ( (thread_priority >= 1) && (thread_priority <= 100) )
{
struct sched_param sp;
memset(&sp, 0, sizeof(struct sched_param));
sp.sched_priority = thread_priority;
pthread_attr_setschedpolicy(&thread_attr, SCHED_RR);
pthread_attr_setschedparam(&thread_attr, &sp);
thread_attr_needed = true;
}
#if defined(VITA)
pthread_attr_setstacksize(&thread_attr , 0x10000 );
thread_created = pthread_create(&thread->id, &thread_attr, thread_wrap, data) == 0;
#else
thread_created = pthread_create(&thread->id, NULL, thread_wrap, data) == 0;
thread_attr_needed = true;
#endif
if (thread_attr_needed)
{
thread_created = pthread_create(&thread->id, &thread_attr, thread_wrap, data) == 0;
}
else
{
thread_created = pthread_create(&thread->id, NULL, thread_wrap, data) == 0;
}
pthread_attr_destroy(&thread_attr);
#endif
if (!thread_created)

View File

@ -1501,7 +1501,13 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
_threadClearParam[i].startPixel = i * _customPixelsPerThread;
_threadClearParam[i].endPixel = (i < _threadCount - 1) ? (i + 1) * _customPixelsPerThread : _framebufferPixCount;
#ifdef DESMUME_COCOA
// The Cocoa port takes advantage of hand-optimized thread priorities
// to help stabilize performance when running SoftRasterizer.
_task[i].start(false, 43);
#else
_task[i].start(false);
#endif
}
}

View File

@ -30,7 +30,7 @@ public:
Impl();
~Impl();
void start(bool spinlock);
void start(bool spinlock, int threadPriority);
void execute(const TWork &work, void *param);
void* finish();
void shutdown();
@ -85,25 +85,25 @@ Task::Impl::~Impl()
shutdown();
slock_free(mutex);
scond_free(condWork);
}
void Task::Impl::start(bool spinlock)
{
slock_lock(this->mutex);
if (this->_isThreadRunning) {
slock_unlock(this->mutex);
return;
}
this->workFunc = NULL;
this->workFuncParam = NULL;
this->ret = NULL;
this->exitThread = false;
this->_thread = sthread_create(&taskProc,this);
this->_isThreadRunning = true;
slock_unlock(this->mutex);
}
void Task::Impl::start(bool spinlock, int threadPriority)
{
slock_lock(this->mutex);
if (this->_isThreadRunning) {
slock_unlock(this->mutex);
return;
}
this->workFunc = NULL;
this->workFuncParam = NULL;
this->ret = NULL;
this->exitThread = false;
this->_thread = sthread_create_with_priority(&taskProc, this, threadPriority);
this->_isThreadRunning = true;
slock_unlock(this->mutex);
}
void Task::Impl::execute(const TWork &work, void *param)
@ -168,7 +168,8 @@ void Task::Impl::shutdown()
slock_unlock(this->mutex);
}
void Task::start(bool spinlock) { impl->start(spinlock); }
void Task::start(bool spinlock) { impl->start(spinlock, 0); }
void Task::start(bool spinlock, int threadPriority) { impl->start(spinlock, threadPriority); }
void Task::shutdown() { impl->shutdown(); }
Task::Task() : impl(new Task::Impl()) {}
Task::~Task() { delete impl; }

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2009 DeSmuME team
Copyright (C) 2009-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -29,7 +29,8 @@ public:
typedef void * (*TWork)(void *);
// initialize task runner
void start(bool spinlock);
void start(bool spinlock);
void start(bool spinlock, int threadPriority);
//execute some work
void execute(const TWork &work, void* param);