Merge pull request #467 from PCSX2/atomic

Replace volatile with real atomic
This commit is contained in:
Gregory Hainaut 2016-02-22 21:20:03 +01:00
commit c0e58e48ae
6 changed files with 60 additions and 145 deletions

View File

@ -45,8 +45,12 @@ GSRendererSW::GSRendererSW(int threads)
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
memset(m_tex_pages, 0, sizeof(m_tex_pages));
for (uint32 i = 0; i < countof(m_fzb_pages); i++) {
m_fzb_pages[i] = 0;
}
for (uint32 i = 0; i < countof(m_tex_pages); i++) {
m_tex_pages[i] = 0;
}
#define InitCVB(P) \
m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer<P, 0, 0>; \
@ -749,60 +753,44 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
}
}
__forceinline void Increment16(volatile short* lpAddend)
void GSRendererSW::UsePages(const uint32* pages, const int type)
{
// (*lpAddend)++;
_InterlockedIncrement16(lpAddend);
}
__forceinline void Decrement16(volatile short* lpAddend)
{
// (*lpAddend)--;
_InterlockedDecrement16(lpAddend);
}
void GSRendererSW::UsePages(const uint32* pages, int type)
{
if(type < 2)
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX);
Increment16((short*)&m_fzb_pages[*p] + type);
}
}
else
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(m_tex_pages[*p] < SHRT_MAX);
Increment16((short*)&m_tex_pages[*p]);
for(const uint32* p = pages; *p != GSOffset::EOP; p++) {
switch (type) {
case 0:
ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX);
m_fzb_pages[*p] += 1;
break;
case 1:
ASSERT((m_fzb_pages[*p] >> 16) < USHRT_MAX);
m_fzb_pages[*p] += 0x10000;
break;
case 2:
ASSERT(m_tex_pages[*p] < USHRT_MAX);
m_tex_pages[*p] += 1;
break;
default:break;
}
}
}
void GSRendererSW::ReleasePages(const uint32* pages, int type)
void GSRendererSW::ReleasePages(const uint32* pages, const int type)
{
if(type < 2)
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*p])[type] > 0);
Decrement16((short*)&m_fzb_pages[*p] + type);
}
}
else
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(m_tex_pages[*p] > 0);
Decrement16((short*)&m_tex_pages[*p]);
for(const uint32* p = pages; *p != GSOffset::EOP; p++) {
switch (type) {
case 0:
ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0);
m_fzb_pages[*p] -= 1;
break;
case 1:
ASSERT((m_fzb_pages[*p] >> 16) > 0);
m_fzb_pages[*p] -= 0x10000;
break;
case 2:
ASSERT(m_tex_pages[*p] > 0);
m_tex_pages[*p] -= 1;
break;
default:break;
}
}
}

View File

@ -71,8 +71,8 @@ protected:
GSPixelOffset4* m_fzb;
GSVector4i m_fzb_bbox;
uint32 m_fzb_cur_pages[16];
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
uint16 m_tex_pages[512];
std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
std::atomic<uint16> m_tex_pages[512];
uint32 m_tmp_pages[512 + 1];
void Reset();
@ -86,8 +86,8 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UsePages(const uint32* pages, int type);
void ReleasePages(const uint32* pages, int type);
void UsePages(const uint32* pages, const int type);
void ReleasePages(const uint32* pages, const int type);
bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r);
bool CheckSourcePages(SharedData* sd);

View File

@ -24,8 +24,8 @@
#include "GSPng.h"
GSTextureSW::GSTextureSW(int type, int width, int height)
: m_mapped(0)
{
m_mapped.clear();
m_size = GSVector2i(width, height);
m_type = type;
m_format = 0;
@ -68,7 +68,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)
if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y)
{
if(!_interlockedbittestandset(&m_mapped, 0))
while(m_mapped.test_and_set()) {}
{
m.bits = (uint8*)m_data + ((m_pitch * r2.top + r2.left) << 2);
m.pitch = m_pitch;
@ -82,7 +82,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)
void GSTextureSW::Unmap()
{
m_mapped = 0;
m_mapped.clear();
}
#ifndef _WIN32

View File

@ -29,7 +29,7 @@ class GSTextureSW : public GSTexture
int m_pitch;
void* m_data;
long m_mapped;
std::atomic_flag m_mapped;
public:
GSTextureSW(int type, int width, int height);

View File

@ -30,30 +30,31 @@ class TransactionScope
public:
class Lock
{
volatile long state;
std::atomic<bool> state;
public:
Lock()
: state(0)
Lock()
: state(false)
{
}
void lock()
{
while(_InterlockedCompareExchange(&state, 1, 0) != 0)
bool expected_value = false;
while(state.compare_exchange_strong(expected_value, true))
{
do {_mm_pause();} while(state == 1);
do {_mm_pause();} while(state);
}
}
void unlock()
void unlock()
{
_InterlockedExchange(&state, 0);
state = false;
}
bool isLocked() const
bool isLocked() const
{
return state == 1;
return state.load();
}
};
@ -63,7 +64,7 @@ private:
TransactionScope();
public:
TransactionScope(Lock& fallBackLock_, int max_retries = 3)
TransactionScope(Lock& fallBackLock_, int max_retries = 3)
: fallBackLock(fallBackLock_)
{
// The TSX (RTM/HLE) instructions on Intel AVX2 CPUs may either be
@ -74,7 +75,7 @@ public:
#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__)
int nretries = 0;
while(1)
{
++nretries;
@ -85,7 +86,7 @@ public:
{
if(!fallBackLock.isLocked()) return;
_xabort(0xff);
_xabort(0xff);
}
if((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff && !(status & _XABORT_NESTED))
@ -97,7 +98,7 @@ public:
break;
}
if(nretries >= max_retries)
if(nretries >= max_retries)
{
break;
}

View File

@ -387,80 +387,6 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
return Mask ? 1 : 0;
}
__forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btrl %k[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btsl %k[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline long _InterlockedCompareExchange(volatile long* const Destination, const long Exchange, const long Comperand)
{
long retval = Comperand;
__asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
return retval;
}
__forceinline long _InterlockedExchange(volatile long* const Target, const long Value)
{
long retval = Value;
__asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
return retval;
}
__forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value)
{
long retval = Value;
__asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}
__forceinline short _InterlockedExchangeAdd16(volatile short* const Addend, const short Value)
{
short retval = Value;
__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}
__forceinline long _InterlockedDecrement(volatile long* const lpAddend)
{
return _InterlockedExchangeAdd(lpAddend, -1) - 1;
}
__forceinline long _InterlockedIncrement(volatile long* const lpAddend)
{
return _InterlockedExchangeAdd(lpAddend, 1) + 1;
}
__forceinline short _InterlockedDecrement16(volatile short* const lpAddend)
{
return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
}
__forceinline short _InterlockedIncrement16(volatile short* const lpAddend)
{
return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
}
#ifdef __GNUC__
// gcc 4.8 define __rdtsc but unfortunately the compiler crash...