From 9bbb0fe1f6171316023c7df075edd5825a169aae Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 28 Feb 2015 10:57:00 +0100 Subject: [PATCH 1/4] gsdx: add atomic for texture upload V2: fix init of atomic flag object --- plugins/GSdx/GSTextureSW.cpp | 6 +++--- plugins/GSdx/GSTextureSW.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/GSTextureSW.cpp b/plugins/GSdx/GSTextureSW.cpp index 8f9f712412..99e1a9aa31 100644 --- a/plugins/GSdx/GSTextureSW.cpp +++ b/plugins/GSdx/GSTextureSW.cpp @@ -24,8 +24,8 @@ #include "GSPng.h" GSTextureSW::GSTextureSW(int type, int width, int height) - : m_mapped(0) { + m_mapped.clear(); m_size = GSVector2i(width, height); m_type = type; m_format = 0; @@ -68,7 +68,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r) if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y) { - if(!_interlockedbittestandset(&m_mapped, 0)) + while(m_mapped.test_and_set()) {} { m.bits = (uint8*)m_data + ((m_pitch * r2.top + r2.left) << 2); m.pitch = m_pitch; @@ -82,7 +82,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r) void GSTextureSW::Unmap() { - m_mapped = 0; + m_mapped.clear(); } #ifndef _WIN32 diff --git a/plugins/GSdx/GSTextureSW.h b/plugins/GSdx/GSTextureSW.h index b6f8a7f041..8cba020ff5 100644 --- a/plugins/GSdx/GSTextureSW.h +++ b/plugins/GSdx/GSTextureSW.h @@ -29,7 +29,7 @@ class GSTextureSW : public GSTexture int m_pitch; void* m_data; - long m_mapped; + std::atomic_flag m_mapped; public: GSTextureSW(int type, int width, int height); From f904cd6c4a671030e3620c34bb993a8c21b8c764 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 28 Feb 2015 10:59:48 +0100 Subject: [PATCH 2/4] gsdx: add atomic for SW Renderer V2: fix assertion --- plugins/GSdx/GSRendererSW.cpp | 88 +++++++++++++++-------------------- plugins/GSdx/GSRendererSW.h | 8 ++-- 2 files changed, 42 insertions(+), 54 deletions(-) diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 86dc453212..6e7f6bb9e3 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -45,8 +45,12 @@ GSRendererSW::GSRendererSW(int threads) m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); - memset(m_fzb_pages, 0, sizeof(m_fzb_pages)); - memset(m_tex_pages, 0, sizeof(m_tex_pages)); + for (uint32 i = 0; i < countof(m_fzb_pages); i++) { + m_fzb_pages[i] = 0; + } + for (uint32 i = 0; i < countof(m_tex_pages); i++) { + m_tex_pages[i] = 0; + } #define InitCVB(P) \ m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer; \ @@ -749,60 +753,44 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS } } -__forceinline void Increment16(volatile short* lpAddend) +void GSRendererSW::UsePages(const uint32* pages, const int type) { - // (*lpAddend)++; - - _InterlockedIncrement16(lpAddend); -} - -__forceinline void Decrement16(volatile short* lpAddend) -{ - // (*lpAddend)--; - - _InterlockedDecrement16(lpAddend); -} - -void GSRendererSW::UsePages(const uint32* pages, int type) -{ - if(type < 2) - { - for(const uint32* p = pages; *p != GSOffset::EOP; p++) - { - ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX); - - Increment16((short*)&m_fzb_pages[*p] + type); - } - } - else - { - for(const uint32* p = pages; *p != GSOffset::EOP; p++) - { - ASSERT(m_tex_pages[*p] < SHRT_MAX); - - Increment16((short*)&m_tex_pages[*p]); + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { + switch (type) { + case 0: + ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX); + m_fzb_pages[*p] += 1; + break; + case 1: + ASSERT((m_fzb_pages[*p] >> 16) < USHRT_MAX); + m_fzb_pages[*p] += 0x10000; + break; + case 2: + ASSERT(m_tex_pages[*p] < USHRT_MAX); + m_tex_pages[*p] += 1; + break; + default:break; } } } -void GSRendererSW::ReleasePages(const uint32* pages, int type) +void GSRendererSW::ReleasePages(const uint32* pages, const int type) { - if(type < 2) - { - for(const uint32* p = pages; *p != GSOffset::EOP; p++) - { - ASSERT(((short*)&m_fzb_pages[*p])[type] > 0); - - Decrement16((short*)&m_fzb_pages[*p] + type); - } - } - else - { - for(const uint32* p = pages; *p != GSOffset::EOP; p++) - { - ASSERT(m_tex_pages[*p] > 0); - - Decrement16((short*)&m_tex_pages[*p]); + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { + switch (type) { + case 0: + ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0); + m_fzb_pages[*p] -= 1; + break; + case 1: + ASSERT((m_fzb_pages[*p] >> 16) > 0); + m_fzb_pages[*p] -= 0x10000; + break; + case 2: + ASSERT(m_tex_pages[*p] > 0); + m_tex_pages[*p] -= 1; + break; + default:break; } } } diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 37c62e3462..b7b66c145c 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -71,8 +71,8 @@ protected: GSPixelOffset4* m_fzb; GSVector4i m_fzb_bbox; uint32 m_fzb_cur_pages[16]; - uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved - uint16 m_tex_pages[512]; + std::atomic m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved + std::atomic m_tex_pages[512]; uint32 m_tmp_pages[512 + 1]; void Reset(); @@ -86,8 +86,8 @@ protected: void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); - void UsePages(const uint32* pages, int type); - void ReleasePages(const uint32* pages, int type); + void UsePages(const uint32* pages, const int type); + void ReleasePages(const uint32* pages, const int type); bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r); bool CheckSourcePages(SharedData* sd); From a601991f91de91b13f3437c5e8eee77fd77a4bc3 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 28 Feb 2015 11:01:38 +0100 Subject: [PATCH 3/4] gsdx: add atomic for transaction scope object --- plugins/GSdx/GSThread.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/plugins/GSdx/GSThread.h b/plugins/GSdx/GSThread.h index f916219d8b..9594a793f3 100644 --- a/plugins/GSdx/GSThread.h +++ b/plugins/GSdx/GSThread.h @@ -30,30 +30,31 @@ class TransactionScope public: class Lock { - volatile long state; + std::atomic state; public: - Lock() - : state(0) + Lock() + : state(false) { } void lock() { - while(_InterlockedCompareExchange(&state, 1, 0) != 0) + bool expected_value = false; + while(state.compare_exchange_strong(expected_value, true)) { - do {_mm_pause();} while(state == 1); + do {_mm_pause();} while(state); } } - void unlock() + void unlock() { - _InterlockedExchange(&state, 0); + state = false; } - bool isLocked() const + bool isLocked() const { - return state == 1; + return state.load(); } }; @@ -63,7 +64,7 @@ private: TransactionScope(); public: - TransactionScope(Lock& fallBackLock_, int max_retries = 3) + TransactionScope(Lock& fallBackLock_, int max_retries = 3) : fallBackLock(fallBackLock_) { // The TSX (RTM/HLE) instructions on Intel AVX2 CPUs may either be @@ -74,7 +75,7 @@ public: #if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__) int nretries = 0; - + while(1) { ++nretries; @@ -85,7 +86,7 @@ public: { if(!fallBackLock.isLocked()) return; - _xabort(0xff); + _xabort(0xff); } if((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff && !(status & _XABORT_NESTED)) @@ -97,7 +98,7 @@ public: break; } - if(nretries >= max_retries) + if(nretries >= max_retries) { break; } From f9b4ff17a5324b7db4166d80114f1c0443f81d66 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 28 Feb 2015 11:02:38 +0100 Subject: [PATCH 4/4] gsdx: drop interlocked* function on linux --- plugins/GSdx/stdafx.h | 74 ------------------------------------------- 1 file changed, 74 deletions(-) diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h index 082aa4440a..99d6869919 100644 --- a/plugins/GSdx/stdafx.h +++ b/plugins/GSdx/stdafx.h @@ -387,80 +387,6 @@ struct aligned_free_second {template void operator()(T& p) {_aligned_fr return Mask ? 1 : 0; } - __forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b) - { - unsigned char retval; - - __asm__("lock; btrl %k[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory"); - - return retval; - } - - __forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b) - { - unsigned char retval; - - __asm__("lock; btsl %k[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory"); - - return retval; - } - - __forceinline long _InterlockedCompareExchange(volatile long* const Destination, const long Exchange, const long Comperand) - { - long retval = Comperand; - - __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory"); - - return retval; - } - - __forceinline long _InterlockedExchange(volatile long* const Target, const long Value) - { - long retval = Value; - - __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory"); - - return retval; - } - - __forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value) - { - long retval = Value; - - __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory"); - - return retval; - } - - __forceinline short _InterlockedExchangeAdd16(volatile short* const Addend, const short Value) - { - short retval = Value; - - __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory"); - - return retval; - } - - __forceinline long _InterlockedDecrement(volatile long* const lpAddend) - { - return _InterlockedExchangeAdd(lpAddend, -1) - 1; - } - - __forceinline long _InterlockedIncrement(volatile long* const lpAddend) - { - return _InterlockedExchangeAdd(lpAddend, 1) + 1; - } - - __forceinline short _InterlockedDecrement16(volatile short* const lpAddend) - { - return _InterlockedExchangeAdd16(lpAddend, -1) - 1; - } - - __forceinline short _InterlockedIncrement16(volatile short* const lpAddend) - { - return _InterlockedExchangeAdd16(lpAddend, 1) + 1; - } - #ifdef __GNUC__ // gcc 4.8 define __rdtsc but unfortunately the compiler crash...