Merge pull request #467 from PCSX2/atomic

Replace volatile with real atomic
2016-02-22 21:20:03 +01:00 · 2016-02-22 21:20:03 +01:00 · c0e58e48ae
parent 729218ec4b f9b4ff17a5
commit c0e58e48ae
6 changed files with 60 additions and 145 deletions
--- a/plugins/GSdx/GSRendererSW.cpp
+++ b/plugins/GSdx/GSRendererSW.cpp
@ -45,8 +45,12 @@ GSRendererSW::GSRendererSW(int threads)

 	m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);

-	memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
-	memset(m_tex_pages, 0, sizeof(m_tex_pages));
+	for (uint32 i = 0; i < countof(m_fzb_pages); i++) {
+		m_fzb_pages[i] = 0;
+	}
+	for (uint32 i = 0; i < countof(m_tex_pages); i++) {
+		m_tex_pages[i] = 0;
+	}

 	#define InitCVB(P) \
 		m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer<P, 0, 0>; \
@ -749,60 +753,44 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
 	}
 }

-__forceinline void Increment16(volatile short* lpAddend)
+void GSRendererSW::UsePages(const uint32* pages, const int type)
 {
-	// (*lpAddend)++;
-
-	_InterlockedIncrement16(lpAddend);
-}
-
-__forceinline void Decrement16(volatile short* lpAddend)
-{
-	// (*lpAddend)--;
-
-	_InterlockedDecrement16(lpAddend);
-}
-	
-void GSRendererSW::UsePages(const uint32* pages, int type)
-{
-	if(type < 2)
-	{
-		for(const uint32* p = pages; *p != GSOffset::EOP; p++)
-		{
-			ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX);
-
-			Increment16((short*)&m_fzb_pages[*p] + type);
-		}
-	}
-	else
-	{
-		for(const uint32* p = pages; *p != GSOffset::EOP; p++)
-		{
-			ASSERT(m_tex_pages[*p] < SHRT_MAX);
-
-			Increment16((short*)&m_tex_pages[*p]);
+	for(const uint32* p = pages; *p != GSOffset::EOP; p++) {
+		switch (type) {
+			case 0:
+				ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX);
+				m_fzb_pages[*p] += 1;
+				break;
+			case 1:
+				ASSERT((m_fzb_pages[*p] >> 16) < USHRT_MAX);
+				m_fzb_pages[*p] += 0x10000;
+				break;
+			case 2:
+				ASSERT(m_tex_pages[*p] < USHRT_MAX);
+				m_tex_pages[*p] += 1;
+				break;
+			default:break;
 		}
 	}
 }

-void GSRendererSW::ReleasePages(const uint32* pages, int type)
+void GSRendererSW::ReleasePages(const uint32* pages, const int type)
 {
-	if(type < 2)
-	{
-		for(const uint32* p = pages; *p != GSOffset::EOP; p++)
-		{
-			ASSERT(((short*)&m_fzb_pages[*p])[type] > 0);
-
-			Decrement16((short*)&m_fzb_pages[*p] + type);
-		}
-	}
-	else
-	{
-		for(const uint32* p = pages; *p != GSOffset::EOP; p++)
-		{
-			ASSERT(m_tex_pages[*p] > 0);
-
-			Decrement16((short*)&m_tex_pages[*p]);
+	for(const uint32* p = pages; *p != GSOffset::EOP; p++) {
+		switch (type) {
+			case 0:
+				ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0);
+				m_fzb_pages[*p] -= 1;
+				break;
+			case 1:
+				ASSERT((m_fzb_pages[*p] >> 16) > 0);
+				m_fzb_pages[*p] -= 0x10000;
+				break;
+			case 2:
+				ASSERT(m_tex_pages[*p] > 0);
+				m_tex_pages[*p] -= 1;
+				break;
+			default:break;
 		}
 	}
 }
--- a/plugins/GSdx/GSRendererSW.h
+++ b/plugins/GSdx/GSRendererSW.h
@ -71,8 +71,8 @@ protected:
 	GSPixelOffset4* m_fzb;
 	GSVector4i m_fzb_bbox;
 	uint32 m_fzb_cur_pages[16];
-	uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
-	uint16 m_tex_pages[512];
+	std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
+	std::atomic<uint16> m_tex_pages[512];
 	uint32 m_tmp_pages[512 + 1];

 	void Reset();
@ -86,8 +86,8 @@ protected:
 	void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
 	void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);

-	void UsePages(const uint32* pages, int type);
-	void ReleasePages(const uint32* pages, int type);
+	void UsePages(const uint32* pages, const int type);
+	void ReleasePages(const uint32* pages, const int type);

 	bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r);
 	bool CheckSourcePages(SharedData* sd);
--- a/plugins/GSdx/GSTextureSW.cpp
+++ b/plugins/GSdx/GSTextureSW.cpp
@ -24,8 +24,8 @@
 #include "GSPng.h"

 GSTextureSW::GSTextureSW(int type, int width, int height)
-	: m_mapped(0)
 {
+	m_mapped.clear();
 	m_size = GSVector2i(width, height);
 	m_type = type;
 	m_format = 0;
@ -68,7 +68,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)

 	if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y)
 	{
-		if(!_interlockedbittestandset(&m_mapped, 0))
+		while(m_mapped.test_and_set()) {}
 		{
 			m.bits = (uint8*)m_data + ((m_pitch * r2.top + r2.left) << 2);
 			m.pitch = m_pitch;
@ -82,7 +82,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)

 void GSTextureSW::Unmap()
 {
-	m_mapped = 0;
+	m_mapped.clear();
 }

 #ifndef _WIN32
--- a/plugins/GSdx/GSTextureSW.h
+++ b/plugins/GSdx/GSTextureSW.h
@ -29,7 +29,7 @@ class GSTextureSW : public GSTexture

 	int m_pitch;
 	void* m_data;
-	long m_mapped;
+	std::atomic_flag m_mapped;

 public:
 	GSTextureSW(int type, int width, int height);
--- a/plugins/GSdx/GSThread.h
+++ b/plugins/GSdx/GSThread.h
@ -30,30 +30,31 @@ class TransactionScope
 public:
 	class Lock
 	{
-		volatile long state;
+		std::atomic<bool> state;

 	public:
-		Lock() 
-			: state(0) 
+		Lock()
+			: state(false)
 		{
 		}

 		void lock()
 		{
-			while(_InterlockedCompareExchange(&state, 1, 0) != 0)
+			bool expected_value = false;
+			while(state.compare_exchange_strong(expected_value, true))
 			{
-				do {_mm_pause();} while(state == 1);
+				do {_mm_pause();} while(state);
 			}
 		}

-		void unlock() 
+		void unlock()
 		{
-			_InterlockedExchange(&state, 0);
+			state = false;
 		}

-		bool isLocked() const 
+		bool isLocked() const
 		{
-			return state == 1;
+			return state.load();
 		}
 	};

@ -63,7 +64,7 @@ private:
 	TransactionScope();

 public:
-	TransactionScope(Lock& fallBackLock_, int max_retries = 3) 
+	TransactionScope(Lock& fallBackLock_, int max_retries = 3)
 		: fallBackLock(fallBackLock_)
 	{
 		// The TSX (RTM/HLE) instructions on Intel AVX2 CPUs may either be
@ -74,7 +75,7 @@ public:
 		#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__)

 		int nretries = 0;
-		
+
 		while(1)
 		{
 			++nretries;
@ -85,7 +86,7 @@ public:
 			{
 				if(!fallBackLock.isLocked()) return;

-				_xabort(0xff); 
+				_xabort(0xff);
 			}

 			if((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff && !(status & _XABORT_NESTED))
@ -97,7 +98,7 @@ public:
 				break;
 			}

-			if(nretries >= max_retries) 
+			if(nretries >= max_retries)
 			{
 				break;
 			}
--- a/plugins/GSdx/stdafx.h
+++ b/plugins/GSdx/stdafx.h
@ -387,80 +387,6 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
 		return Mask ? 1 : 0;
 	}

-	__forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b)
-	{
-		unsigned char retval;
-		
-		__asm__("lock; btrl %k[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
-		
-		return retval;
-	}
-
-	__forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b)
-	{
-		unsigned char retval;
-		
-		__asm__("lock; btsl %k[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
-		
-		return retval;
-	}
-
-	__forceinline long _InterlockedCompareExchange(volatile long* const Destination, const long Exchange, const long Comperand)
-	{
-		long retval = Comperand;
-		
-		__asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
-		
-		return retval;
-	}
-
-	__forceinline long _InterlockedExchange(volatile long* const Target, const long Value)
-	{
-		long retval = Value;
-		
-		__asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
-
-		return retval;
-	}
-
-	__forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value)
-	{
-		long retval = Value;
-		
-		__asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
-		
-		return retval;
-	}
-	
-	__forceinline short _InterlockedExchangeAdd16(volatile short* const Addend, const short Value)
-	{
-		short retval = Value;
-		
-		__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
-		
-		return retval;
-	}
-
-	__forceinline long _InterlockedDecrement(volatile long* const lpAddend)
-	{
-		return _InterlockedExchangeAdd(lpAddend, -1) - 1;
-	}
-	
-	__forceinline long _InterlockedIncrement(volatile long* const lpAddend)
-	{
-		return _InterlockedExchangeAdd(lpAddend, 1) + 1;
-	}
-	
-	__forceinline short _InterlockedDecrement16(volatile short* const lpAddend)
-	{
-		return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
-	}
-	
-	__forceinline short _InterlockedIncrement16(volatile short* const lpAddend)
-	{
-		return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
-	}
-
 	#ifdef __GNUC__

 	// gcc 4.8 define __rdtsc but unfortunately the compiler crash...