macOS: faster semaphores

mach semaphore is kernel-only, and doesn't have a userspace fast path
This commit is contained in:
TellowKrinkle 2021-02-14 05:32:01 -06:00 committed by Kojin
parent 5011b9ead5
commit cf02893d22
1 changed files with 20 additions and 13 deletions

View File

@ -62,13 +62,12 @@ Threading::Semaphore::Semaphore()
// other platforms explicitly make a thread-private (unshared) semaphore
// here. But it seems Mach doesn't support that.
MACH_CHECK(semaphore_create(mach_task_self(), (semaphore_t*)&m_sema, SYNC_POLICY_FIFO, 0));
__atomic_store_n(&m_counter, 0, __ATOMIC_SEQ_CST);
__atomic_store_n(&m_counter, 0, __ATOMIC_RELEASE);
}
Threading::Semaphore::~Semaphore()
{
MACH_CHECK(semaphore_destroy(mach_task_self(), (semaphore_t)m_sema));
__atomic_store_n(&m_counter, 0, __ATOMIC_SEQ_CST);
}
void Threading::Semaphore::Reset()
@ -80,24 +79,23 @@ void Threading::Semaphore::Reset()
void Threading::Semaphore::Post()
{
MACH_CHECK(semaphore_signal(m_sema));
__atomic_add_fetch(&m_counter, 1, __ATOMIC_SEQ_CST);
if (__atomic_fetch_add(&m_counter, 1, __ATOMIC_RELEASE) < 0)
MACH_CHECK(semaphore_signal(m_sema));
}
void Threading::Semaphore::Post(int multiple)
{
for (int i = 0; i < multiple; ++i)
{
MACH_CHECK(semaphore_signal(m_sema));
Post();
}
__atomic_add_fetch(&m_counter, multiple, __ATOMIC_SEQ_CST);
}
void Threading::Semaphore::WaitWithoutYield()
{
pxAssertMsg(!wxThread::IsMain(), "Unyielding semaphore wait issued from the main/gui thread. Please use Wait() instead.");
MACH_CHECK(semaphore_wait(m_sema));
__atomic_sub_fetch(&m_counter, 1, __ATOMIC_SEQ_CST);
if (__atomic_sub_fetch(&m_counter, 1, __ATOMIC_ACQUIRE) < 0)
MACH_CHECK(semaphore_wait(m_sema));
}
bool Threading::Semaphore::WaitWithoutYield(const wxTimeSpan& timeout)
@ -109,6 +107,9 @@ bool Threading::Semaphore::WaitWithoutYield(const wxTimeSpan& timeout)
// semaphore_timedwait() is the way it's used in Grand Central Dispatch,
// which is open-source.
if (__atomic_sub_fetch(&m_counter, 1, __ATOMIC_ACQUIRE) >= 0)
return true;
// on x86 platforms, mach_absolute_time() returns nanoseconds
// TODO(aktau): on iOS a scale value from mach_timebase_info will be necessary
u64 const kOneThousand = 1000;
@ -122,7 +123,8 @@ bool Threading::Semaphore::WaitWithoutYield(const wxTimeSpan& timeout)
if (now > deadline)
{
// timed out by definition
return false;
kr = KERN_OPERATION_TIMED_OUT;
break;
}
u64 timeleft = deadline - now;
@ -141,15 +143,20 @@ bool Threading::Semaphore::WaitWithoutYield(const wxTimeSpan& timeout)
if (kr == KERN_OPERATION_TIMED_OUT)
{
return false;
int orig = __atomic_load_n(&m_counter, __ATOMIC_RELAXED);
while (orig < 0)
{
if (__atomic_compare_exchange_n(&m_counter, &orig, orig + 1, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
return false;
}
// Semaphore was signalled between our wait expiring and now, keep kernel sema in sync
kr = semaphore_wait(m_sema);
}
// while it's entirely possible to have KERN_FAILURE here, we should
// probably assert so we can study and correct the actual error here
// (the thread dying while someone is wainting for it).
MACH_CHECK(kr);
__atomic_sub_fetch(&m_counter, 1, __ATOMIC_SEQ_CST);
return true;
}
@ -250,6 +257,6 @@ void Threading::Semaphore::WaitNoCancel(const wxTimeSpan& timeout)
int Threading::Semaphore::Count()
{
return __atomic_load_n(&m_counter, __ATOMIC_SEQ_CST);
return __atomic_load_n(&m_counter, __ATOMIC_RELAXED);
}
#endif