Darwin/OSX semaphore & thread & os backend.

2015-11-17 19:30:20 +02:00 · 2015-11-17 19:30:20 +02:00 · f813b9e25b
parent c15958f3c2
commit f813b9e25b
4 changed files with 543 additions and 5 deletions
--- a/common/src/Utilities/CMakeLists.txt
+++ b/common/src/Utilities/CMakeLists.txt
@ -52,6 +52,7 @@ endif(CMAKE_BUILD_TYPE STREQUAL Release)

 # variable with all sources of this library
 set(UtilitiesSources
+	VirtualMemory.cpp
 	AlignedMalloc.cpp
 	../../include/Utilities/FixedPointTypes.inl
 	../../include/Utilities/EventSource.inl
@ -65,8 +66,6 @@ set(UtilitiesSources
 	HashTools.cpp
 	IniInterface.cpp
 	Linux/LnxHostSys.cpp
-	Linux/LnxMisc.cpp
-	Linux/LnxThreads.cpp
 	Mutex.cpp
 	PathUtils.cpp
 	PrecompiledHeader.cpp
@ -76,12 +75,10 @@ set(UtilitiesSources
 	pxStreams.cpp
 	pxTranslate.cpp
 	pxWindowTextWriter.cpp
-	Semaphore.cpp
 	StringHelpers.cpp
 	ThreadingDialogs.cpp
 	ThreadTools.cpp
 	vssprintf.cpp
-	VirtualMemory.cpp
 	wxAppWithHelpers.cpp
 	wxGuiTools.cpp
 	wxHelpers.cpp
@ -120,7 +117,22 @@ set(UtilitiesHeaders
 	../../include/Utilities/wxAppWithHelpers.h
 	../../include/Utilities/wxBaseTools.h
 	../../include/Utilities/wxGuiTools.h
-	PrecompiledHeader.h)
+	PrecompiledHeader.h
+)
+
+if(APPLE)
+	LIST(APPEND UtilitiesSources
+		Darwin/DarwinThreads.cpp
+		Darwin/DarwinMisc.cpp
+		Darwin/DarwinSemaphore.cpp
+	)
+else()
+	LIST(APPEND UtilitiesSources
+		Linux/LnxThreads.cpp
+		Linux/LnxMisc.cpp
+		Semaphore.cpp
+	)
+endif()

 set(UtilitiesFinalSources
 	${UtilitiesSources}
--- a/common/src/Utilities/Darwin/DarwinMisc.cpp
+++ b/common/src/Utilities/Darwin/DarwinMisc.cpp
@ -0,0 +1,150 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2002-2014  PCSX2 Dev Team
+ *
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../PrecompiledHeader.h"
+
+#include <cstring>
+#include <cstdlib>
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#include <mach/mach_time.h>
+
+#define NELEM(x) \
+	((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
+
+// Darwin (OSX) is a bit different from Linux when requesting properties of
+// the OS because of its BSD/Mach heritage. Helpfully, most of this code
+// should translate pretty well to other *BSD systems. (e.g.: the sysctl(3)
+// interface).
+//
+// For an overview of all of Darwin's sysctls, check:
+// https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/sysctl.3.html
+
+// Return the total physical memory on the machine, in bytes. Returns 0 on
+// failure (not supported by the operating system).
+u64 GetPhysicalMemory()
+{
+	static u64 mem = 0;
+
+	// fetch the total memory only once, as its an expensive system call and
+	// doesn't change during the course of the program. Thread-safety is
+	// ensured by atomic operations with full-barriers (usually compiled
+	// down to XCHG on x86).
+	if (__atomic_load_n(&mem, __ATOMIC_SEQ_CST) == 0) {
+		u64 getmem = 0;
+		size_t len = sizeof(getmem);
+		int mib[] = { CTL_HW, HW_MEMSIZE };
+		if (sysctl(mib, NELEM(mib), &getmem, &len, NULL, 0) < 0) {
+			perror("sysctl:");
+		}
+		__atomic_store_n(&mem, getmem, __ATOMIC_SEQ_CST);
+	}
+
+	return mem;
+}
+
+void InitCPUTicks()
+{
+}
+
+// returns the performance-counter frequency: ticks per second (Hz)
+//
+// usage:
+//   u64 seconds_passed = GetCPUTicks() / GetTickFrequency();
+//   u64 millis_passed = (GetCPUTicks() * 1000) / GetTickFrequency();
+//
+// NOTE: multiply, subtract, ... your ticks before dividing by
+// GetTickFrequency() to maintain good precision.
+u64 GetTickFrequency()
+{
+	static u64 freq = 0;
+
+	// by the time denom is not 0, the structure will have been fully
+	// updated and no more atomic accesses are necessary.
+	if (__atomic_load_n(&freq, __ATOMIC_SEQ_CST) == 0) {
+		mach_timebase_info_data_t info;
+
+		// mach_timebase_info() is a syscall, very slow, that's why we take
+		// pains to only do it once. On x86(-64), the result is guaranteed
+		// to be info.denom == info.numer == 1 (i.e.: the frequency is 1e9,
+		// which means GetCPUTicks is just nanoseconds).
+		if (mach_timebase_info(&info) != KERN_SUCCESS) {
+			abort();
+		}
+
+		// store the calculated value atomically
+		__atomic_store_n(&freq, (u64) 1e9 * (u64) info.denom / (u64) info.numer, __ATOMIC_SEQ_CST);
+	}
+
+	return freq;
+}
+
+// return the number of "ticks" since some arbitrary, fixed time in the
+// past. On OSX x86(-64), this is actually the number of nanoseconds passed,
+// because mach_timebase_info.numer == denom == 1. So "ticks" ==
+// nanoseconds.
+u64 GetCPUTicks()
+{
+	return mach_absolute_time();
+}
+
+wxString GetOSVersionString()
+{
+	wxString version;
+	static int initialized = 0;
+
+	// fetch the OS description only once (thread-safely)
+	if (__atomic_load_n(&initialized, __ATOMIC_SEQ_CST) == 0) {
+		char type[32] = {0};
+		char release[32] = {0};
+		char arch[32] = {0};
+
+#define SYSCTL_GET(var, base, name) \
+	do { \
+		int mib[] = { base, name }; \
+		size_t len = sizeof(var); \
+		sysctl(mib, NELEM(mib), NULL, &len, NULL, 0); \
+		sysctl(mib, NELEM(mib), var, &len, NULL, 0); \
+	} while (0)
+
+		SYSCTL_GET(release, CTL_KERN, KERN_OSRELEASE);
+		SYSCTL_GET(type, CTL_KERN, KERN_OSTYPE);
+		SYSCTL_GET(arch, CTL_HW, HW_MACHINE);
+
+#undef SYSCTL_KERN
+
+		// I know strcat is not good, but stpcpy is not universally
+		// available yet.
+		char buf[128] = {0};
+		strcat(buf, type);
+		strcat(buf, " ");
+		strcat(buf, release);
+		strcat(buf, " ");
+		strcat(buf, arch);
+
+		version = buf;
+
+		__atomic_store_n(&initialized, 1, __ATOMIC_SEQ_CST);
+	}
+
+	return version;
+}
+
+void ScreensaverAllow(bool allow)
+{
+	// no-op
+}
--- a/common/src/Utilities/Darwin/DarwinSemaphore.cpp
+++ b/common/src/Utilities/Darwin/DarwinSemaphore.cpp
@ -0,0 +1,243 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2002-2014  PCSX2 Dev Team
+ *
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <cstdio>
+#include <cassert>             // assert
+
+#include <pthread.h>           // pthread_setcancelstate()
+
+#include <sys/time.h>          // gettimeofday()
+
+#include <mach/mach.h>
+#include <mach/task.h>         // semaphore_create() and semaphore_destroy()
+#include <mach/semaphore.h>    // semaphore_*()
+#include <mach/mach_error.h>   // mach_error_string()
+#include <mach/mach_time.h>    // mach_absolute_time()
+
+#include "PrecompiledHeader.h"
+
+#include "Threading.h"
+#include "ThreadingInternal.h"
+
+#include "wxBaseTools.h"
+#include "wxGuiTools.h"
+
+// --------------------------------------------------------------------------------------
+//  Semaphore Implementation for Darwin/OSX
+//
+//  Sadly, Darwin/OSX needs its own implementation of Semaphores instead of
+//  relying on phtreads, because OSX unnamed semaphore (the best kind)
+//  support is very poor.
+//
+//  This implementation makes use of Mach primitives instead. These are also
+//  what Grand Central Dispatch (GCD) is based on, as far as I understand:
+//  http://newosxbook.com/articles/GCD.html.
+//
+// --------------------------------------------------------------------------------------
+
+#define MACH_CHECK(mach_retval) \
+	do { \
+		kern_return_t _kr = (mach_retval); \
+		if (_kr != KERN_SUCCESS) { \
+			fprintf(stderr, "mach error: %s", mach_error_string(_kr)); \
+			assert(_kr == KERN_SUCCESS); \
+		} \
+	} while (0)
+
+Threading::Semaphore::Semaphore()
+{
+	// other platforms explicitly make a thread-private (unshared) semaphore
+	// here. But it seems Mach doesn't support that.
+	MACH_CHECK(semaphore_create(mach_task_self(), (semaphore_t *)&m_sema, SYNC_POLICY_FIFO, 0));
+	__atomic_store_n(&m_counter, 0, __ATOMIC_SEQ_CST);
+}
+
+Threading::Semaphore::~Semaphore() throw()
+{
+	MACH_CHECK(semaphore_destroy(mach_task_self(), (semaphore_t) m_sema));
+	__atomic_store_n(&m_counter, 0, __ATOMIC_SEQ_CST);
+}
+
+void Threading::Semaphore::Reset()
+{
+	MACH_CHECK(semaphore_destroy(mach_task_self(), (semaphore_t) m_sema));
+	MACH_CHECK(semaphore_create(mach_task_self(), (semaphore_t *) &m_sema, SYNC_POLICY_FIFO, 0));
+	__atomic_store_n(&m_counter, 0, __ATOMIC_SEQ_CST);
+}
+
+void Threading::Semaphore::Post()
+{
+	MACH_CHECK(semaphore_signal(m_sema));
+	__atomic_add_fetch(&m_counter, 1, __ATOMIC_SEQ_CST);
+}
+
+void Threading::Semaphore::Post(int multiple)
+{
+	for (int i = 0; i < multiple; ++i) {
+		MACH_CHECK(semaphore_signal(m_sema));
+	}
+	__atomic_add_fetch(&m_counter, multiple, __ATOMIC_SEQ_CST);
+}
+
+void Threading::Semaphore::WaitWithoutYield()
+{
+	pxAssertMsg(!wxThread::IsMain(), "Unyielding semaphore wait issued from the main/gui thread.  Please use Wait() instead.");
+	MACH_CHECK(semaphore_wait(m_sema));
+	__atomic_sub_fetch(&m_counter, 1, __ATOMIC_SEQ_CST);
+}
+
+bool Threading::Semaphore::WaitWithoutYield(const wxTimeSpan& timeout)
+{
+	// This method is the reason why there has to be a special Darwin
+	// implementation of Semaphore. Note that semaphore_timedwait() is prone
+	// to returning with KERN_ABORTED, which basically signifies that some
+	// signal has worken it up. The best official "documentation" for
+	// semaphore_timedwait() is the way it's used in Grand Central Dispatch,
+	// which is open-source.
+
+	// on x86 platforms, mach_absolute_time() returns nanoseconds
+	// TODO(aktau): on iOS a scale value from mach_timebase_info will be necessary
+	u64 const kOneThousand = 1000;
+	u64 const kOneBillion = kOneThousand * kOneThousand * kOneThousand;
+	u64 const delta = timeout.GetMilliseconds().GetValue() * (kOneThousand * kOneThousand);
+	mach_timespec_t ts;
+	kern_return_t kr = KERN_ABORTED;
+	for (u64 now = mach_absolute_time(), deadline = now + delta;
+		kr == KERN_ABORTED; now = mach_absolute_time()) {
+		if (now > deadline) {
+			// timed out by definition
+			return false;
+		}
+
+		u64 timeleft = deadline - now;
+		ts.tv_sec = timeleft / kOneBillion;
+		ts.tv_nsec = timeleft % kOneBillion;
+
+		// possible return values of semaphore_timedwait() (from XNU sources):
+		// internal kernel val -> return value
+		// THREAD_INTERRUPTED  -> KERN_ABORTED
+		// THREAD_TIMED_OUT    -> KERN_OPERATION_TIMED_OUT
+		// THREAD_AWAKENED     -> KERN_SUCCESS
+		// THREAD_RESTART      -> KERN_TERMINATED
+		// default             -> KERN_FAILURE
+		kr = semaphore_timedwait(m_sema, ts);
+	}
+
+	if (kr == KERN_OPERATION_TIMED_OUT) {
+		return false;
+	}
+
+	// while it's entirely possible to have KERN_FAILURE here, we should
+	// probably assert so we can study and correct the actual error here
+	// (the thread dying while someone is wainting for it).
+	MACH_CHECK(kr);
+
+	__atomic_sub_fetch(&m_counter, 1, __ATOMIC_SEQ_CST);
+	return true;
+}
+
+// This is a wxApp-safe implementation of Wait, which makes sure and executes the App's
+// pending messages *if* the Wait is performed on the Main/GUI thread. This ensures that
+// user input continues to be handled and that windows continue to repaint. If the Wait is
+// called from another thread, no message pumping is performed.
+void Threading::Semaphore::Wait()
+{
+#if wxUSE_GUI
+	if(!wxThread::IsMain() || (wxTheApp == NULL)) {
+		WaitWithoutYield();
+	}
+	else if(_WaitGui_RecursionGuard( L"Semaphore::Wait" )) {
+		ScopedBusyCursor hourglass( Cursor_ReallyBusy );
+		WaitWithoutYield();
+	}
+	else {
+		while (!WaitWithoutYield(def_yieldgui_interval)) {
+			YieldToMain();
+		}
+	}
+#else
+	WaitWithoutYield();
+#endif
+}
+
+// This is a wxApp-safe implementation of WaitWithoutYield, which makes sure and executes the App's
+// pending messages *if* the Wait is performed on the Main/GUI thread.  This ensures that
+// user input continues to be handled and that windows continue to repaint.  If the Wait is
+// called from another thread, no message pumping is performed.
+//
+// Returns:
+//   false if the wait timed out before the semaphore was signaled, or true if the signal was
+//   reached prior to timeout.
+//
+bool Threading::Semaphore::Wait(const wxTimeSpan& timeout)
+{
+#if wxUSE_GUI
+	if(!wxThread::IsMain() || (wxTheApp == NULL)) {
+		return WaitWithoutYield(timeout);
+	}
+	else if (_WaitGui_RecursionGuard( L"Semaphore::TimedWait")) {
+		ScopedBusyCursor hourglass(Cursor_ReallyBusy);
+		return WaitWithoutYield(timeout);
+	}
+	else {
+		//ScopedBusyCursor hourglass( Cursor_KindaBusy );
+		wxTimeSpan countdown((timeout));
+
+		do {
+			if (WaitWithoutYield(def_yieldgui_interval)) break;
+			YieldToMain();
+			countdown -= def_yieldgui_interval;
+		} while (countdown.GetMilliseconds() > 0);
+
+		return countdown.GetMilliseconds() > 0;
+	}
+#else
+	return WaitWithoutYield(timeout);
+#endif
+}
+
+// Performs an uncancellable wait on a semaphore; restoring the thread's previous cancel state
+// after the wait has completed.  Useful for situations where the semaphore itself is stored on
+// the stack and passed to another thread via GUI message or such, avoiding complications where
+// the thread might be canceled and the stack value becomes invalid.
+//
+// Performance note: this function has quite a bit more overhead compared to Semaphore::WaitWithoutYield(), so
+// consider manually specifying the thread as uncancellable and using WaitWithoutYield() instead if you need
+// to do a lot of no-cancel waits in a tight loop worker thread, for example.
+//
+// I'm unsure how to do this with pure Mach primitives, the docs in
+// osfmk/man seem a bit out of date so perhaps there's a possibility, but
+// since as far as I know Mach threads are 1-to-1 on BSD uthreads (and thus
+// POSIX threads), this should work. -- aktau
+void Threading::Semaphore::WaitNoCancel()
+{
+	int oldstate;
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
+	Wait();
+	pthread_setcancelstate(oldstate, NULL);
+}
+
+void Threading::Semaphore::WaitNoCancel(const wxTimeSpan& timeout)
+{
+	int oldstate;
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
+	Wait(timeout);
+	pthread_setcancelstate(oldstate, NULL);
+}
+
+int Threading::Semaphore::Count()
+{
+	return __atomic_load_n(&m_counter, __ATOMIC_SEQ_CST);
+}
--- a/common/src/Utilities/Darwin/DarwinThreads.cpp
+++ b/common/src/Utilities/Darwin/DarwinThreads.cpp
@ -0,0 +1,133 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2002-2014  PCSX2 Dev Team
+ *
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../PrecompiledHeader.h"
+#include "PersistentThread.h"
+
+#include <unistd.h>
+
+#if !defined(__APPLE__)
+#	error "DarwinThreads.cpp should only be compiled by projects or makefiles targeted at OSX."
+#else
+
+#include <mach/mach_init.h>
+#include <mach/thread_act.h>
+#include <mach/mach_port.h>
+
+// Note: assuming multicore is safer because it forces the interlocked routines to use
+// the LOCK prefix.  The prefix works on single core CPUs fine (but is slow), but not
+// having the LOCK prefix is very bad indeed.
+
+__forceinline void Threading::Sleep( int ms )
+{
+    usleep(1000 * ms);
+}
+
+// For use in spin/wait loops, acts as a hint to Intel CPUs and should, in theory
+// improve performance and reduce cpu power consumption.
+__forceinline void Threading::SpinWait()
+{
+    // If this doesn't compile you can just comment it out (it only serves as a
+    // performance hint and isn't required).
+    __asm__ ( "pause" );
+}
+
+__forceinline void Threading::EnableHiresScheduler()
+{
+    // Darwin has customizable schedulers, see xnu/osfmk/man. Not
+    // implemented yet though (and not sure if useful for pcsx2).
+}
+
+__forceinline void Threading::DisableHiresScheduler()
+{
+    // see EnableHiresScheduler()
+}
+
+// Just like on Windows, this is not really the number of ticks per second,
+// but just a factor by which one has to divide GetThreadCpuTime() or
+// pxThread::GetCpuTime() if one wants to receive a value in seconds. NOTE:
+// doing this will of course yield precision loss.
+u64 Threading::GetThreadTicksPerSecond()
+{
+    return 1000000; // the *CpuTime() functions return values in microseconds
+}
+
+// gets the CPU time used by the current thread (both system and user), in
+// microseconds, returns 0 on failure
+static u64 getthreadtime(thread_port_t thread) {
+    mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
+    thread_basic_info_data_t info;
+
+    kern_return_t kr = thread_info(thread, THREAD_BASIC_INFO,
+            (thread_info_t) &info, &count);
+    if (kr != KERN_SUCCESS) {
+        return 0;
+    }
+
+    // add system and user time
+    return (u64) info.user_time.seconds * (u64) 1e6 +
+        (u64) info.user_time.microseconds +
+        (u64) info.system_time.seconds * (u64) 1e6 +
+        (u64) info.system_time.microseconds;
+}
+
+// Returns the current timestamp (not relative to a real world clock) in
+// units of 100 nanoseconds. The weird units are to mirror the Windows
+// counterpart in WinThreads.cpp, which uses the GetThreadTimes() API.  On
+// OSX/Darwin, this is only accurate up until 1ms (and possibly less), so
+// not very good.
+u64 Threading::GetThreadCpuTime()
+{
+    // we could also use mach_thread_self() and mach_port_deallocate(), but
+    // that calls upon mach traps (kinda like system calls). Unless I missed
+    // something in the COMMPAGE (like Linux vDSO) which makes overrides it
+    // to be user-space instead. In contract,
+    // pthread_mach_thread_np(pthread_self()) is entirely in user-space.
+    u64 us = getthreadtime(pthread_mach_thread_np(pthread_self()));
+    return us * 10ULL;
+}
+
+u64 Threading::pxThread::GetCpuTime() const
+{
+    // Get the cpu time for the thread belonging to this object.  Use m_native_id and/or
+    // m_native_handle to implement it. Return value should be a measure of total time the
+    // thread has used on the CPU (scaled by the value returned by GetThreadTicksPerSecond(),
+    // which typically would be an OS-provided scalar or some sort).
+    if (!m_native_id) {
+        return 0;
+    }
+
+    return getthreadtime((thread_port_t) m_native_id) * 10ULL;
+}
+
+void Threading::pxThread::_platform_specific_OnStartInThread()
+{
+    m_native_id = (uptr) mach_thread_self();
+}
+
+void Threading::pxThread::_platform_specific_OnCleanupInThread()
+{
+    // cleanup of handles that were upened in
+    // _platform_specific_OnStartInThread
+    mach_port_deallocate(mach_task_self(), (thread_port_t) m_native_id);
+}
+
+// name can be up to 16 bytes
+void Threading::pxThread::_DoSetThreadName(const char *name)
+{
+    pthread_setname_np(name);
+}
+
+#endif