Merge pull request #467 from PCSX2/atomic

Replace volatile with real atomic
This commit is contained in:
Gregory Hainaut 2016-02-22 21:20:03 +01:00
commit c0e58e48ae
6 changed files with 60 additions and 145 deletions

View File

@ -45,8 +45,12 @@ GSRendererSW::GSRendererSW(int threads)
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
memset(m_fzb_pages, 0, sizeof(m_fzb_pages)); for (uint32 i = 0; i < countof(m_fzb_pages); i++) {
memset(m_tex_pages, 0, sizeof(m_tex_pages)); m_fzb_pages[i] = 0;
}
for (uint32 i = 0; i < countof(m_tex_pages); i++) {
m_tex_pages[i] = 0;
}
#define InitCVB(P) \ #define InitCVB(P) \
m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer<P, 0, 0>; \ m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer<P, 0, 0>; \
@ -749,60 +753,44 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
} }
} }
__forceinline void Increment16(volatile short* lpAddend) void GSRendererSW::UsePages(const uint32* pages, const int type)
{ {
// (*lpAddend)++; for(const uint32* p = pages; *p != GSOffset::EOP; p++) {
switch (type) {
_InterlockedIncrement16(lpAddend); case 0:
} ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX);
m_fzb_pages[*p] += 1;
__forceinline void Decrement16(volatile short* lpAddend) break;
{ case 1:
// (*lpAddend)--; ASSERT((m_fzb_pages[*p] >> 16) < USHRT_MAX);
m_fzb_pages[*p] += 0x10000;
_InterlockedDecrement16(lpAddend); break;
} case 2:
ASSERT(m_tex_pages[*p] < USHRT_MAX);
void GSRendererSW::UsePages(const uint32* pages, int type) m_tex_pages[*p] += 1;
{ break;
if(type < 2) default:break;
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX);
Increment16((short*)&m_fzb_pages[*p] + type);
}
}
else
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(m_tex_pages[*p] < SHRT_MAX);
Increment16((short*)&m_tex_pages[*p]);
} }
} }
} }
void GSRendererSW::ReleasePages(const uint32* pages, int type) void GSRendererSW::ReleasePages(const uint32* pages, const int type)
{ {
if(type < 2) for(const uint32* p = pages; *p != GSOffset::EOP; p++) {
{ switch (type) {
for(const uint32* p = pages; *p != GSOffset::EOP; p++) case 0:
{ ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0);
ASSERT(((short*)&m_fzb_pages[*p])[type] > 0); m_fzb_pages[*p] -= 1;
break;
Decrement16((short*)&m_fzb_pages[*p] + type); case 1:
} ASSERT((m_fzb_pages[*p] >> 16) > 0);
} m_fzb_pages[*p] -= 0x10000;
else break;
{ case 2:
for(const uint32* p = pages; *p != GSOffset::EOP; p++) ASSERT(m_tex_pages[*p] > 0);
{ m_tex_pages[*p] -= 1;
ASSERT(m_tex_pages[*p] > 0); break;
default:break;
Decrement16((short*)&m_tex_pages[*p]);
} }
} }
} }

View File

@ -71,8 +71,8 @@ protected:
GSPixelOffset4* m_fzb; GSPixelOffset4* m_fzb;
GSVector4i m_fzb_bbox; GSVector4i m_fzb_bbox;
uint32 m_fzb_cur_pages[16]; uint32 m_fzb_cur_pages[16];
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
uint16 m_tex_pages[512]; std::atomic<uint16> m_tex_pages[512];
uint32 m_tmp_pages[512 + 1]; uint32 m_tmp_pages[512 + 1];
void Reset(); void Reset();
@ -86,8 +86,8 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UsePages(const uint32* pages, int type); void UsePages(const uint32* pages, const int type);
void ReleasePages(const uint32* pages, int type); void ReleasePages(const uint32* pages, const int type);
bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r); bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r);
bool CheckSourcePages(SharedData* sd); bool CheckSourcePages(SharedData* sd);

View File

@ -24,8 +24,8 @@
#include "GSPng.h" #include "GSPng.h"
GSTextureSW::GSTextureSW(int type, int width, int height) GSTextureSW::GSTextureSW(int type, int width, int height)
: m_mapped(0)
{ {
m_mapped.clear();
m_size = GSVector2i(width, height); m_size = GSVector2i(width, height);
m_type = type; m_type = type;
m_format = 0; m_format = 0;
@ -68,7 +68,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)
if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y) if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y)
{ {
if(!_interlockedbittestandset(&m_mapped, 0)) while(m_mapped.test_and_set()) {}
{ {
m.bits = (uint8*)m_data + ((m_pitch * r2.top + r2.left) << 2); m.bits = (uint8*)m_data + ((m_pitch * r2.top + r2.left) << 2);
m.pitch = m_pitch; m.pitch = m_pitch;
@ -82,7 +82,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)
void GSTextureSW::Unmap() void GSTextureSW::Unmap()
{ {
m_mapped = 0; m_mapped.clear();
} }
#ifndef _WIN32 #ifndef _WIN32

View File

@ -29,7 +29,7 @@ class GSTextureSW : public GSTexture
int m_pitch; int m_pitch;
void* m_data; void* m_data;
long m_mapped; std::atomic_flag m_mapped;
public: public:
GSTextureSW(int type, int width, int height); GSTextureSW(int type, int width, int height);

View File

@ -30,30 +30,31 @@ class TransactionScope
public: public:
class Lock class Lock
{ {
volatile long state; std::atomic<bool> state;
public: public:
Lock() Lock()
: state(0) : state(false)
{ {
} }
void lock() void lock()
{ {
while(_InterlockedCompareExchange(&state, 1, 0) != 0) bool expected_value = false;
while(state.compare_exchange_strong(expected_value, true))
{ {
do {_mm_pause();} while(state == 1); do {_mm_pause();} while(state);
} }
} }
void unlock() void unlock()
{ {
_InterlockedExchange(&state, 0); state = false;
} }
bool isLocked() const bool isLocked() const
{ {
return state == 1; return state.load();
} }
}; };
@ -63,7 +64,7 @@ private:
TransactionScope(); TransactionScope();
public: public:
TransactionScope(Lock& fallBackLock_, int max_retries = 3) TransactionScope(Lock& fallBackLock_, int max_retries = 3)
: fallBackLock(fallBackLock_) : fallBackLock(fallBackLock_)
{ {
// The TSX (RTM/HLE) instructions on Intel AVX2 CPUs may either be // The TSX (RTM/HLE) instructions on Intel AVX2 CPUs may either be
@ -74,7 +75,7 @@ public:
#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__) #if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__)
int nretries = 0; int nretries = 0;
while(1) while(1)
{ {
++nretries; ++nretries;
@ -85,7 +86,7 @@ public:
{ {
if(!fallBackLock.isLocked()) return; if(!fallBackLock.isLocked()) return;
_xabort(0xff); _xabort(0xff);
} }
if((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff && !(status & _XABORT_NESTED)) if((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff && !(status & _XABORT_NESTED))
@ -97,7 +98,7 @@ public:
break; break;
} }
if(nretries >= max_retries) if(nretries >= max_retries)
{ {
break; break;
} }

View File

@ -387,80 +387,6 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
return Mask ? 1 : 0; return Mask ? 1 : 0;
} }
__forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btrl %k[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btsl %k[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline long _InterlockedCompareExchange(volatile long* const Destination, const long Exchange, const long Comperand)
{
long retval = Comperand;
__asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
return retval;
}
__forceinline long _InterlockedExchange(volatile long* const Target, const long Value)
{
long retval = Value;
__asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
return retval;
}
__forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value)
{
long retval = Value;
__asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}
__forceinline short _InterlockedExchangeAdd16(volatile short* const Addend, const short Value)
{
short retval = Value;
__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}
__forceinline long _InterlockedDecrement(volatile long* const lpAddend)
{
return _InterlockedExchangeAdd(lpAddend, -1) - 1;
}
__forceinline long _InterlockedIncrement(volatile long* const lpAddend)
{
return _InterlockedExchangeAdd(lpAddend, 1) + 1;
}
__forceinline short _InterlockedDecrement16(volatile short* const lpAddend)
{
return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
}
__forceinline short _InterlockedIncrement16(volatile short* const lpAddend)
{
return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
}
#ifdef __GNUC__ #ifdef __GNUC__
// gcc 4.8 define __rdtsc but unfortunately the compiler crash... // gcc 4.8 define __rdtsc but unfortunately the compiler crash...