From 9473e69b7f1bd0f923bafbe4cb11656b3c7e7458 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 24 Dec 2009 22:22:34 +0000 Subject: [PATCH] Thread Local Storage Fixes: * Implemented TlsVariable, a nifty alternative to __threadlocal, suitable for archaic operating systems that don't have native TLS support (namely Mac OS/X). * Added a forced reference to TLS in AppInit so that TLS is sure to be available to DLLs (otherwise windows doesn't init TLS by default). * Disabled TLS support in the x86emitter by default, since it's looking increasingly like we won't find a use for multithreading PS2 sub-components (can be re-enabled later if spontaneous brilliance at a later date proves me wrong). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2396 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/build/Utilities/Utilities.cbp | 411 +++++++++++---------- common/build/Utilities/utilities.vcproj | 52 ++- common/include/Utilities/Threading.h | 19 +- common/include/Utilities/TlsVariable.inl | 125 +++++++ common/include/x86emitter/x86types.h | 27 +- common/src/Utilities/Console.cpp | 14 +- common/src/Utilities/Exceptions.cpp | 3 +- common/src/Utilities/ThreadTools.cpp | 2 - common/src/x86emitter/LnxCpuDetect.cpp | 7 + common/src/x86emitter/WinCpuDetect.cpp | 22 +- common/src/x86emitter/cpudetect.cpp | 2 + common/src/x86emitter/cpudetect_internal.h | 1 + common/src/x86emitter/x86emitter.cpp | 5 +- pcsx2/MTGS.cpp | 11 - pcsx2/PrecompiledHeader.h | 2 - pcsx2/System/SysCoreThread.cpp | 3 +- pcsx2/gui/AppAssert.cpp | 3 +- pcsx2/gui/AppInit.cpp | 10 + pcsx2/x86/iCore.cpp | 4 +- pcsx2/x86/iCore.h | 4 +- 20 files changed, 464 insertions(+), 263 deletions(-) create mode 100644 common/include/Utilities/TlsVariable.inl diff --git a/common/build/Utilities/Utilities.cbp b/common/build/Utilities/Utilities.cbp index cc8df1e175..69ff9524d5 100644 --- a/common/build/Utilities/Utilities.cbp +++ b/common/build/Utilities/Utilities.cbp @@ -1,205 +1,206 @@ - - - - - - + + + + + + diff --git a/common/build/Utilities/utilities.vcproj b/common/build/Utilities/utilities.vcproj index 0c70dc5300..b71d59714d 100644 --- a/common/build/Utilities/utilities.vcproj +++ b/common/build/Utilities/utilities.vcproj @@ -223,10 +223,6 @@ RelativePath="..\..\src\Utilities\x86\MemcpyFast.cpp" > - - @@ -271,22 +267,10 @@ RelativePath="..\..\src\Utilities\pxStaticText.cpp" > - - - - - - @@ -431,6 +415,26 @@ > + + + + + + + + + + - - @@ -541,6 +541,18 @@ RelativePath="..\..\include\Utilities\wxGuiTools.h" > + + + + + + diff --git a/common/include/Utilities/Threading.h b/common/include/Utilities/Threading.h index edcf289d09..579fbc73c7 100644 --- a/common/include/Utilities/Threading.h +++ b/common/include/Utilities/Threading.h @@ -22,11 +22,27 @@ #include "Pcsx2Defs.h" #include "ScopedPtr.h" -#undef Yield // release th burden of windows.h global namespace spam. +#undef Yield // release the burden of windows.h global namespace spam. #define AffinityAssert_AllowFromMain() \ pxAssertMsg( wxThread::IsMain(), "Thread affinity violation: Call allowed from main thread only." ) +// -------------------------------------------------------------------------------------- +// PCSX2_THREAD_LOCAL - Defines platform/operating system support for Thread Local Storage +// -------------------------------------------------------------------------------------- +// For complimentary support for TLS, include Utilities/TlsVariable.inl, and use the +// DeclareTls macro in the place of __threadlocal. +// +//#define PCSX2_THREAD_LOCAL 0 // uncomment this line to force-disable native TLS (useful for testing TlsVariabel on windows/linux) + +#ifndef PCSX2_THREAD_LOCAL +# ifdef __WXMAC__ +# define PCSX2_THREAD_LOCAL 0 +# else +# define PCSX2_THREAD_LOCAL 1 +# endif +#endif + class wxTimeSpan; namespace Threading @@ -131,6 +147,7 @@ namespace Exception #endif } + namespace Threading { // -------------------------------------------------------------------------------------- diff --git a/common/include/Utilities/TlsVariable.inl b/common/include/Utilities/TlsVariable.inl new file mode 100644 index 0000000000..d781bfbdb3 --- /dev/null +++ b/common/include/Utilities/TlsVariable.inl @@ -0,0 +1,125 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2009 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "Threading.h" + +#if PCSX2_THREAD_LOCAL +# define DeclareTls(x) __threadlocal x +#else +# define DeclareTls(x) Threading::TlsVariable +#endif + +namespace Threading +{ +// -------------------------------------------------------------------------------------- +// TlsVariable - Thread local storage +// -------------------------------------------------------------------------------------- +// Wrapper class for pthread_getspecific, which is pthreads language for "thread local +// storage." This class enables code to act as a drop-in replacement for compiler-native +// thread local storage (typically specified via __threadlocal). Mac OS/X (Darwin) does +// not have TLS, which is the main reason for this class existing. +// +// Performance considerations: While certainly convenient, performance of this class can +// be sub-optimal when the operator overloads are used, since each one will most likely +// result in repeated calls to pthread_getspecific. (if the function inlines then it +// should actually optimize well enough, but I doubt it does). +// + template< typename T > + class TlsVariable + { + DeclareNoncopyableObject(TlsVariable); + + protected: + pthread_key_t m_thread_key; + T m_initval; + + public: + TlsVariable(); + TlsVariable( T initval ); + + virtual ~TlsVariable() throw(); + T* GetPtr() const; + T& GetRef() const { return *GetPtr(); } + + TlsVariable& operator=( const T& src ) + { + GetRef() = src; + return *this; + } + + bool operator==( const T& src ) const { return GetRef() == src; } + bool operator!=( const T& src ) const { return GetRef() != src; } + bool operator>( const T& src ) const { return GetRef() > src; } + bool operator<( const T& src ) const { return GetRef() < src; } + bool operator>=( const T& src ) const { return GetRef() >= src; } + bool operator<=( const T& src ) const { return GetRef() <= src; } + + T operator+( const T& src ) const { return GetRef() + src; } + T operator-( const T& src ) const { return GetRef() - src; } + + void operator+=( const T& src ) { GetRef() += src; } + void operator-=( const T& src ) { GetRef() -= src; } + + operator T&() const { return GetRef(); } + + protected: + void CreateKey(); + }; +}; + +template< typename T > +Threading::TlsVariable::TlsVariable() +{ + CreateKey(); +} + +template< typename T > +Threading::TlsVariable::TlsVariable( T initval ) +{ + CreateKey(); + m_initval = initval; +} + +template< typename T > +Threading::TlsVariable::~TlsVariable() throw() +{ + if( m_thread_key != NULL ) + pthread_key_delete( m_thread_key ); +} + +template< typename T > +T* Threading::TlsVariable::GetPtr() const +{ + T* result = (T*)pthread_getspecific( m_thread_key ); + if( result == NULL ) + { + pthread_setspecific( m_thread_key, result = (T*)_aligned_malloc( sizeof(T), 16 ) ); + if( result == NULL ) + throw Exception::OutOfMemory( "Out of memory allocating thread local storage variable." ); + *result = m_initval; + } + return result; +} + +template< typename T > +void Threading::TlsVariable::CreateKey() +{ + if( 0 != pthread_key_create(&m_thread_key, _aligned_free) ) + { + pxFailRel( "Thread Local Storage Error: key creation failed." ); + } +} diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h index aa997bffd9..8b4f289653 100644 --- a/common/include/x86emitter/x86types.h +++ b/common/include/x86emitter/x86types.h @@ -27,8 +27,31 @@ enum XMMSSEType //XMMT_FPD = 3, // double }; -extern __threadlocal u8 *x86Ptr; -extern __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM]; +// -------------------------------------------------------------------------------------- +// __tls_emit / x86EMIT_MULTITHREADED +// -------------------------------------------------------------------------------------- +// Multithreaded support for the x86 emitter. (defaults to 0) +// To enable the multithreaded emitter, either set the below define to 1, or set the define +// as a project option. The multithreaded emitter relies on native compiler support for +// TLS -- Macs are crap out of luck there (for now). + +#ifndef x86EMIT_MULTITHREADED +# define x86EMIT_MULTITHREADED 0 +#endif + +#ifndef __tls_emit +# if x86EMIT_MULTITHREADED && PCSX2_THREAD_LOCAL +# define __tls_emit __threadlocal +# else + // Using TlsVariable is sub-optimal and could result in huge executables, so we + // force-disable TLS entirely, and disallow running multithreaded recompilation + // components within PCSX2 manually. +# define __tls_emit +# endif +#endif + +extern __tls_emit u8* x86Ptr; +extern __tls_emit XMMSSEType g_xmmtypes[iREGCNT_XMM]; namespace x86Emitter { diff --git a/common/src/Utilities/Console.cpp b/common/src/Utilities/Console.cpp index 0cea1191e0..e87c508c26 100644 --- a/common/src/Utilities/Console.cpp +++ b/common/src/Utilities/Console.cpp @@ -15,9 +15,17 @@ #include "PrecompiledHeader.h" #include "Threading.h" +#include "TlsVariable.inl" using namespace Threading; +// thread-local console indentation setting. +static DeclareTls(int) conlog_Indent( 0 ); + +// thread-local console color storage. +static DeclareTls(ConsoleColors) conlog_Color( DefaultConsoleColor ); + + static wxString m_buffer; // used by ConsoleBuffer static Mutex m_bufferlock; // used by ConsoleBuffer @@ -364,12 +372,6 @@ static void format_that_unicode_mess( SafeArray& buffer, const wxChar* f // though it'd be kinda nice if we did. } -// thread-local console indentation setting. -static __threadlocal int conlog_Indent = 0; - -// thread-local console color storage. -static __threadlocal ConsoleColors conlog_Color = DefaultConsoleColor; - static wxString ascii_format_string(const char* fmt, va_list argptr) { if( ascii_buffer_is_deleted ) diff --git a/common/src/Utilities/Exceptions.cpp b/common/src/Utilities/Exceptions.cpp index e72ecfee62..b6dbe8092f 100644 --- a/common/src/Utilities/Exceptions.cpp +++ b/common/src/Utilities/Exceptions.cpp @@ -17,6 +17,7 @@ #include #include "Threading.h" +#include "TlsVariable.inl" wxString GetEnglish( const char* msg ) { @@ -41,7 +42,7 @@ wxString GetTranslation( const char* msg ) // Using a threadlocal assertion guard. Separate threads can assert at the same time. // That's ok. What we don't want is the *same* thread recurse-asserting. -static __threadlocal int s_assert_guard = 0; +static DeclareTls(int) s_assert_guard( 0 ); pxDoAssertFnType* pxDoAssert = pxAssertImpl_LogIt; diff --git a/common/src/Utilities/ThreadTools.cpp b/common/src/Utilities/ThreadTools.cpp index b7fa076bf2..c989363c69 100644 --- a/common/src/Utilities/ThreadTools.cpp +++ b/common/src/Utilities/ThreadTools.cpp @@ -37,8 +37,6 @@ const wxTimeSpan Threading::def_yieldgui_interval( 0, 0, 0, 100 ); // three second interval for deadlock protection on waitgui. const wxTimeSpan Threading::def_deadlock_timeout( 0, 0, 3, 0 ); -//static __threadlocal PersistentThread* tls_current_thread = NULL; - static pthread_key_t curthread_key = NULL; static s32 total_key_count = 0; static Mutex total_key_lock; diff --git a/common/src/x86emitter/LnxCpuDetect.cpp b/common/src/x86emitter/LnxCpuDetect.cpp index 7356e9fef3..cb43114430 100644 --- a/common/src/x86emitter/LnxCpuDetect.cpp +++ b/common/src/x86emitter/LnxCpuDetect.cpp @@ -36,6 +36,13 @@ void CountLogicalCores( int LogicalCoresPerPhysicalCPU, int PhysicalCoresPerPhys } } +bool CanEmitShit() +{ + // In Linux I'm pretty sure TLS always works, none of the funny business that Windows + // has involving DLLs. >_< + return true; +} + bool CanTestInstructionSets() { // Not implemented yet for linux. (see cpudetect_internal.h for details) diff --git a/common/src/x86emitter/WinCpuDetect.cpp b/common/src/x86emitter/WinCpuDetect.cpp index 06888dd982..ba8c6ab26c 100644 --- a/common/src/x86emitter/WinCpuDetect.cpp +++ b/common/src/x86emitter/WinCpuDetect.cpp @@ -49,15 +49,29 @@ bool _test_instruction( void* pfnCall ) u128 regsave; ((void (__fastcall *)(void*))pfnCall)( ®save ); } - __except(EXCEPTION_EXECUTE_HANDLER) { - return false; - } + __except(EXCEPTION_EXECUTE_HANDLER) { return false; } + + return true; +} + +bool CanEmitShit() +{ + // Under Windows, pre 0.9.6 versions of PCSX2 may not initialize the TLS + // register (FS register), so plugins (DLLs) using our x86emitter in multithreaded + // mode will just crash/fail if it tries to do the instruction set tests. + +#if x86EMIT_MULTITHREADED + static __threadlocal int tls_failcheck; + __try { tls_failcheck = 1; } + __except(EXCEPTION_EXECUTE_HANDLER) { return false; } +#endif + return true; } bool CanTestInstructionSets() { - return true; + return CanEmitShit(); } SingleCoreAffinity::SingleCoreAffinity() diff --git a/common/src/x86emitter/cpudetect.cpp b/common/src/x86emitter/cpudetect.cpp index efbf17f424..4d487538c3 100644 --- a/common/src/x86emitter/cpudetect.cpp +++ b/common/src/x86emitter/cpudetect.cpp @@ -96,6 +96,8 @@ void EstablishMXCSRmask() MXCSR_Mask.bitmask = 0xFFFF; // SSE2 features added } + if( !CanEmitShit() ) return; + // the fxsave buffer must be 16-byte aligned to avoid GPF. I just save it to an // unused portion of recSSE, since it has plenty of room to spare. diff --git a/common/src/x86emitter/cpudetect_internal.h b/common/src/x86emitter/cpudetect_internal.h index e22e2e443d..400f78972e 100644 --- a/common/src/x86emitter/cpudetect_internal.h +++ b/common/src/x86emitter/cpudetect_internal.h @@ -51,5 +51,6 @@ public: // This secondary test fixes such cases (although apparently a CMOS reset does as well). // +extern bool CanEmitShit(); extern bool CanTestInstructionSets(); extern bool _test_instruction( void* pfnCall ); diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp index 6de0c6a7e2..d56afa124c 100644 --- a/common/src/x86emitter/x86emitter.cpp +++ b/common/src/x86emitter/x86emitter.cpp @@ -64,9 +64,8 @@ // -__threadlocal u8 *x86Ptr; - -__threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; +__tls_emit u8* x86Ptr; +__tls_emit XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; namespace x86Emitter { diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index ff35e9b715..e989d7b6d2 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -82,14 +82,6 @@ extern bool renderswitch; std::list ringposStack; #endif -static __threadlocal SysMtgsThread* tls_mtgsThread = NULL; - -SysMtgsThread& SysMtgsThread::Get() -{ - pxAssertMsg( tls_mtgsThread != NULL, L"This function must be called from the context of a running SysMtgsThread." ); - return *tls_mtgsThread; -} - SysMtgsThread::SysMtgsThread() : SysThreadBase() #ifdef RINGBUF_DEBUG_STACK @@ -268,8 +260,6 @@ public: void SysMtgsThread::ExecuteTaskInThread() { - tls_mtgsThread = this; - #ifdef RINGBUF_DEBUG_STACK PacketTagType prevCmd; #endif @@ -513,7 +503,6 @@ void SysMtgsThread::OnResumeInThread( bool isSuspended ) void SysMtgsThread::OnCleanupInThread() { ClosePlugin(); - tls_mtgsThread = NULL; _parent::OnCleanupInThread(); } diff --git a/pcsx2/PrecompiledHeader.h b/pcsx2/PrecompiledHeader.h index bdbade65fa..efd8321dd9 100644 --- a/pcsx2/PrecompiledHeader.h +++ b/pcsx2/PrecompiledHeader.h @@ -53,8 +53,6 @@ // might as well add them here) #include -#include -#include #include #include diff --git a/pcsx2/System/SysCoreThread.cpp b/pcsx2/System/SysCoreThread.cpp index 6c5857640f..25b79eb130 100644 --- a/pcsx2/System/SysCoreThread.cpp +++ b/pcsx2/System/SysCoreThread.cpp @@ -23,6 +23,7 @@ #include "PageFaultSource.h" #include "SysThreads.h" +#include "Utilities/TlsVariable.inl" #ifdef __WXMSW__ # include @@ -30,7 +31,7 @@ #include -static __threadlocal SysCoreThread* tls_coreThread = NULL; +static DeclareTls(SysCoreThread*) tls_coreThread( NULL ); // -------------------------------------------------------------------------------------- // SysCoreThread *External Thread* Implementations diff --git a/pcsx2/gui/AppAssert.cpp b/pcsx2/gui/AppAssert.cpp index dcfb867390..b96ef8ce13 100644 --- a/pcsx2/gui/AppAssert.cpp +++ b/pcsx2/gui/AppAssert.cpp @@ -15,6 +15,7 @@ #include "PrecompiledHeader.h" #include "App.h" +#include "Utilities/TlsVariable.inl" #include @@ -105,7 +106,7 @@ static wxString pxGetStackTrace( const FnChar_t* calledFrom ) #ifdef __WXDEBUG__ -static __threadlocal int _reentrant_lock = 0; +static TlsVariable< int > _reentrant_lock( 0 ); // This override of wx's implementation provides thread safe assertion message reporting. If we aren't // on the main gui thread then the assertion message box needs to be passed off to the main gui thread diff --git a/pcsx2/gui/AppInit.cpp b/pcsx2/gui/AppInit.cpp index 3a7009b9a2..56a14744f8 100644 --- a/pcsx2/gui/AppInit.cpp +++ b/pcsx2/gui/AppInit.cpp @@ -518,6 +518,16 @@ Pcsx2App::Pcsx2App() SetAppName( L"pcsx2" ); BuildCommandHash(); + +#ifdef __WXMSW__ + // This variable assignment ensures that MSVC links in the TLS setup stubs even in + // full optimization builds. Without it, DLLs that use TLS won't work because the + // FS segment register won't have been initialized by the main exe, due to tls_insurance + // being optimized away >_< --air + + static __threadlocal int tls_insurance = 0; + tls_insurance = 1; +#endif } Pcsx2App::~Pcsx2App() diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 9525c18f4b..ed756fda52 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -22,8 +22,8 @@ #include "VU.h" #include "R3000A.h" -__threadlocal u8 *j8Ptr[32]; -__threadlocal u32 *j32Ptr[32]; +__tls_emit u8 *j8Ptr[32]; +__tls_emit u32 *j32Ptr[32]; u16 g_x86AllocCounter = 0; u16 g_xmmAllocCounter = 0; diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index c21a05c961..6b0c28f8b2 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -265,8 +265,8 @@ extern u32 g_cpuRegHasSignExt, g_cpuPrevRegHasSignExt; extern _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM]; -extern __threadlocal u8 *j8Ptr[32]; // depreciated item. use local u8* vars instead. -extern __threadlocal u32 *j32Ptr[32]; // depreciated item. use local u32* vars instead. +extern __tls_emit u8 *j8Ptr[32]; // depreciated item. use local u8* vars instead. +extern __tls_emit u32 *j32Ptr[32]; // depreciated item. use local u32* vars instead. extern u16 g_x86AllocCounter; extern u16 g_xmmAllocCounter;