Atomic operations library.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3775 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Nolan Check 2009-07-13 05:38:34 +00:00
parent 3070e159c6
commit d779554ff3
10 changed files with 258 additions and 115 deletions

View File

@ -556,6 +556,18 @@
RelativePath=".\Src\ABI.h"
>
</File>
<File
RelativePath=".\Src\Atomic.h"
>
</File>
<File
RelativePath=".\Src\Atomic_GCC.h"
>
</File>
<File
RelativePath=".\Src\Atomic_Win32.h"
>
</File>
<File
RelativePath=".\Src\BreakPoints.cpp"
>

View File

@ -0,0 +1,32 @@
// Copyright (C) 2003-2009 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _ATOMIC_H_
#define _ATOMIC_H_
#ifdef _WIN32
#include "Atomic_Win32.h"
#else
// GCC-compatible compiler assumed!
#include "Atomic_GCC.h"
#endif
#endif

View File

@ -0,0 +1,110 @@
// Copyright (C) 2003-2009 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _ATOMIC_GCC_H_
#define _ATOMIC_GCC_H_
#include "Common.h"
// Atomic operations are performed in a single step by the CPU. It is
// impossible for other threads to see the operation "half-done."
//
// Some atomic operations can be combined with different types of memory
// barriers called "Acquire semantics" and "Release semantics", defined below.
//
// Acquire semantics: Future memory accesses cannot be relocated to before the
// operation.
//
// Release semantics: Past memory accesses cannot be relocated to after the
// operation.
//
// These barriers affect not only the compiler, but also the CPU.
namespace Common
{
inline void AtomicAdd(volatile u32& target, u32 value) {
__sync_add_and_fetch(target, value);
}
inline void AtomicIncrement(volatile u32& target) {
__sync_add_and_fetch(target, 1);
}
inline u32 AtomicLoad(volatile u32& src) {
return src; // 32-bit reads are always atomic.
}
inline u32 AtomicLoadAcquire(volatile u32& src) {
__sync_synchronize();
return src;
}
inline void AtomicStore(volatile u32& dest, u32 value) {
dest = value; // 32-bit writes are always atomic.
}
inline void AtomicStoreRelease(volatile u32& dest, u32 value) {
__sync_lock_test_and_set(dest, value);
}
}
// Old code kept here for reference in case we need the parts with __asm__ __volatile__.
#if 0
LONG SyncInterlockedIncrement(LONG *Dest)
{
#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__))
return __sync_add_and_fetch(Dest, 1);
#else
register int result;
__asm__ __volatile__("lock; xadd %0,%1"
: "=r" (result), "=m" (*Dest)
: "0" (1), "m" (*Dest)
: "memory");
return result;
#endif
}
LONG SyncInterlockedExchangeAdd(LONG *Dest, LONG Val)
{
#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__))
return __sync_add_and_fetch(Dest, Val);
#else
register int result;
__asm__ __volatile__("lock; xadd %0,%1"
: "=r" (result), "=m" (*Dest)
: "0" (Val), "m" (*Dest)
: "memory");
return result;
#endif
}
LONG SyncInterlockedExchange(LONG *Dest, LONG Val)
{
#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__))
return __sync_lock_test_and_set(Dest, Val);
#else
register int result;
__asm__ __volatile__("lock; xchg %0,%1"
: "=r" (result), "=m" (*Dest)
: "0" (Val), "m" (*Dest)
: "memory");
return result;
#endif
}
#endif
#endif

View File

@ -0,0 +1,71 @@
// Copyright (C) 2003-2009 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _ATOMIC_WIN32_H_
#define _ATOMIC_WIN32_H_
#include "Common.h"
#include <Windows.h>
// Atomic operations are performed in a single step by the CPU. It is
// impossible for other threads to see the operation "half-done."
//
// Some atomic operations can be combined with different types of memory
// barriers called "Acquire semantics" and "Release semantics", defined below.
//
// Acquire semantics: Future memory accesses cannot be relocated to before the
// operation.
//
// Release semantics: Past memory accesses cannot be relocated to after the
// operation.
//
// These barriers affect not only the compiler, but also the CPU.
//
// NOTE: Acquire and Release are not differentiated right now. They perform a
// full memory barrier instead of a "one-way" memory barrier. The newest
// Windows SDK has Acquire and Release versions of some Interlocked* functions.
namespace Common
{
inline void AtomicAdd(volatile u32& target, u32 value) {
InterlockedAdd((volatile LONG*)&target, (LONG)value);
}
inline void AtomicIncrement(volatile u32& target) {
InterlockedIncrement((volatile LONG*)&target);
}
inline u32 AtomicLoad(volatile u32& src) {
return src; // 32-bit reads are always atomic.
}
inline u32 AtomicLoadAcquire(volatile u32& src) {
MemoryBarrier();
return src;
}
inline void AtomicStore(volatile u32& dest, u32 value) {
dest = value; // 32-bit writes are always atomic.
}
inline void AtomicStoreRelease(volatile u32& dest, u32 value) {
// InterlockedExchange includes a memory barrier as a bonus.
InterlockedExchange((volatile LONG*)&dest, (LONG)value);
}
}
#endif

View File

@ -31,10 +31,6 @@
namespace Common
{
#ifndef _WIN32
// TODO see thread.h
void MemFence(){;}
#endif
#ifdef _WIN32
@ -325,21 +321,6 @@ void SetCurrentThreadName(const TCHAR* szThreadName)
__except(EXCEPTION_CONTINUE_EXECUTION)
{}
}
// TODO: check if ever inline
LONG SyncInterlockedIncrement(LONG *Dest)
{
return InterlockedIncrement(Dest);
}
LONG SyncInterlockedExchangeAdd(LONG *Dest, LONG Val)
{
return InterlockedExchangeAdd(Dest, Val);
}
LONG SyncInterlockedExchange(LONG *Dest, LONG Val)
{
return InterlockedExchange(Dest, Val);
}
#else // !WIN32, so must be POSIX threads
@ -516,48 +497,6 @@ void Event::Wait()
pthread_mutex_unlock(&mutex_);
}
LONG SyncInterlockedIncrement(LONG *Dest)
{
#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__))
return __sync_add_and_fetch(Dest, 1);
#else
register int result;
__asm__ __volatile__("lock; xadd %0,%1"
: "=r" (result), "=m" (*Dest)
: "0" (1), "m" (*Dest)
: "memory");
return result;
#endif
}
LONG SyncInterlockedExchangeAdd(LONG *Dest, LONG Val)
{
#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__))
return __sync_add_and_fetch(Dest, Val);
#else
register int result;
__asm__ __volatile__("lock; xadd %0,%1"
: "=r" (result), "=m" (*Dest)
: "0" (Val), "m" (*Dest)
: "memory");
return result;
#endif
}
LONG SyncInterlockedExchange(LONG *Dest, LONG Val)
{
#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__))
return __sync_lock_test_and_set(Dest, Val);
#else
register int result;
__asm__ __volatile__("lock; xchg %0,%1"
: "=r" (result), "=m" (*Dest)
: "0" (Val), "m" (*Dest)
: "memory");
return result;
#endif
}
#endif
} // namespace Common

View File

@ -72,18 +72,6 @@
namespace Common
{
// MemFence: Neither the compiler nor the CPU can reorder memory accesses
// beyond this barrier.
#ifdef _WIN32
__forceinline void MemFence()
{
MemoryBarrier();
}
#else
// TODO: UNIX experts, please implement the memory fence.
void MemFence();
#endif
class CriticalSection
{
#ifdef _WIN32
@ -206,10 +194,6 @@ void SleepCurrentThread(int ms);
void SetCurrentThreadName(const char *name);
LONG SyncInterlockedExchangeAdd(LONG *Dest, LONG Val);
LONG SyncInterlockedExchange(LONG *Dest, LONG Val);
LONG SyncInterlockedIncrement(LONG *Dest);
} // namespace Common
#endif // _THREAD_H_

View File

@ -74,6 +74,7 @@
#include "../ConfigManager.h"
#include "MathUtil.h"
#include "Thread.h"
#include "Atomic.h"
#include "Memmap.h"
#include "PeripheralInterface.h"
@ -178,8 +179,8 @@ void UpdateInterrupts();
//inline void WriteLow (u32& _reg, u16 lowbits) {_reg = (_reg & 0xFFFF0000) | lowbits;}
//inline void WriteHigh(u32& _reg, u16 highbits) {_reg = (_reg & 0x0000FFFF) | ((u32)highbits << 16);}
inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::SyncInterlockedExchange((LONG*)&_reg,(_reg & 0xFFFF0000) | lowbits);}
inline void WriteHigh(volatile u32& _reg, u16 highbits) {Common::SyncInterlockedExchange((LONG*)&_reg,(_reg & 0x0000FFFF) | ((u32)highbits << 16));}
inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);}
inline void WriteHigh(volatile u32& _reg, u16 highbits) {Common::AtomicStore(_reg,(_reg & 0x0000FFFF) | ((u32)highbits << 16));}
inline u16 ReadLow (u32 _reg) {return (u16)(_reg & 0xFFFF);}
inline u16 ReadHigh (u32 _reg) {return (u16)(_reg >> 16);}
@ -189,7 +190,7 @@ int et_UpdateInterrupts;
// for GP watchdog hack
void IncrementGPWDToken()
{
Common::SyncInterlockedIncrement((LONG*)&fifo.Fake_GPWDToken);
Common::AtomicIncrement(fifo.Fake_GPWDToken);
}
// Check every FAKE_GP_WATCHDOG_PERIOD if a PE-frame-finish occured
@ -426,9 +427,9 @@ void Write16(const u16 _Value, const u32 _Address)
fake_CommandProcessorNotUsed = false;
UCPCtrlReg tmpCtrl(_Value);
Common::SyncInterlockedExchange((LONG*)&fifo.bFF_GPReadEnable, tmpCtrl.GPReadEnable);
Common::SyncInterlockedExchange((LONG*)&fifo.bFF_GPLinkEnable, tmpCtrl.GPLinkEnable);
Common::SyncInterlockedExchange((LONG*)&fifo.bFF_BPEnable, tmpCtrl.BPEnable);
Common::AtomicStore(fifo.bFF_GPReadEnable, tmpCtrl.GPReadEnable);
Common::AtomicStore(fifo.bFF_GPLinkEnable, tmpCtrl.GPLinkEnable);
Common::AtomicStore(fifo.bFF_BPEnable, tmpCtrl.BPEnable);
// TOCHECK (mb2): could BP irq be cleared with w16 to STATUS_REGISTER?
// funny hack: eg in MP1 if we disable the clear breakpoint ability by commenting this block
@ -600,7 +601,7 @@ void STACKALIGN GatherPipeBursted()
fifo.CPWritePointer += GPFifo::GATHER_PIPE_SIZE;
if (fifo.CPWritePointer >= fifo.CPEnd)
fifo.CPWritePointer = fifo.CPBase;
Common::SyncInterlockedExchangeAdd((LONG*)&fifo.CPReadWriteDistance, GPFifo::GATHER_PIPE_SIZE);
Common::AtomicAdd(fifo.CPReadWriteDistance, GPFifo::GATHER_PIPE_SIZE);
// High watermark overflow handling (hacked way)
u32 ct = 0;
@ -732,8 +733,8 @@ void UpdateFifoRegister()
dist = wp - rp;
else
dist = (wp - fifo.CPBase) + (fifo.CPEnd - rp);
//fifo.CPReadWriteDistance = dist;
Common::SyncInterlockedExchange((LONG*)&fifo.CPReadWriteDistance, dist);
Common::AtomicStore(fifo.CPReadWriteDistance, dist);
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore)
CatchUpGPU();

View File

@ -22,7 +22,7 @@
#include "Common.h"
#include "ChunkFile.h"
#include "Thread.h"
#include "Atomic.h"
#include "PixelEngine.h"
@ -360,7 +360,10 @@ void SetToken(const u16 _token, const int _bSetTokenAcknowledge)
{
// we do it directly from videoThread because of
// Super Monkey Ball
Common::SyncInterlockedExchange((LONG*)&CommandProcessor::fifo.PEToken, _token);
// XXX: No 16-bit atomic store available, so cheat and use 32-bit.
// That's what we've always done. We're counting on fifo.PEToken to be
// 4-byte padded.
Common::AtomicStore(*(volatile u32*)&CommandProcessor::fifo.PEToken, _token);
}
}

View File

@ -23,6 +23,7 @@
#endif
#include "MemoryUtil.h"
#include "Thread.h"
#include "Atomic.h"
#include "OpcodeDecoding.h"
#include "Fifo.h"
@ -151,7 +152,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
// check if we are able to run this buffer
if ((_fifo.bFF_GPReadEnable) && _fifo.CPReadWriteDistance && !(_fifo.bFF_BPEnable && _fifo.bFF_Breakpoint))
{
Common::SyncInterlockedExchange((LONG*)&_fifo.CPReadIdle, 0);
Common::AtomicStore(_fifo.CPReadIdle, 0);
//video_initialize.pLog("RUN...........................",FALSE);
int peek_counter = 0;
while (_fifo.bFF_GPReadEnable && _fifo.CPReadWriteDistance)
@ -175,7 +176,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
if (readPtr == _fifo.CPBreakpoint)
{
video_initialize.pLog("!!! BP irq raised",FALSE);
Common::SyncInterlockedExchange((LONG*)&_fifo.bFF_Breakpoint, 1);
Common::AtomicStore(_fifo.bFF_Breakpoint, 1);
video_initialize.pUpdateInterrupts();
break;
@ -210,11 +211,11 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
}
// Execute new instructions found in uData
Video_SendFifoData(uData, distToSend);
Common::SyncInterlockedExchange((LONG*)&_fifo.CPReadPointer, readPtr);
Common::SyncInterlockedExchangeAdd((LONG*)&_fifo.CPReadWriteDistance, -distToSend);
Common::AtomicStore(_fifo.CPReadPointer, readPtr);
Common::AtomicAdd(_fifo.CPReadWriteDistance, -distToSend);
}
//video_initialize.pLog("..........................IDLE",FALSE);
Common::SyncInterlockedExchange((LONG*)&_fifo.CPReadIdle, 1);
Common::AtomicStore(_fifo.CPReadIdle, 1);
}
s_criticalFifo.Leave();
}

View File

@ -54,6 +54,7 @@ Make AA apply instantly during gameplay if possible
#include "Globals.h"
#include "LogManager.h"
#include "Thread.h"
#include "Atomic.h"
#include <cstdarg>
@ -101,10 +102,10 @@ int GLScissorX, GLScissorY, GLScissorW, GLScissorH;
static bool s_PluginInitialized = false;
static volatile bool s_swapRequested = false;
static volatile u32 s_swapRequested = false;
static Common::Event s_swapResponseEvent;
static volatile bool s_efbAccessRequested = false;
static volatile u32 s_efbAccessRequested = false;
static Common::Event s_efbResponseEvent;
@ -390,10 +391,10 @@ void Shutdown(void)
{
s_PluginInitialized = false;
s_efbAccessRequested = false;
s_efbAccessRequested = FALSE;
s_efbResponseEvent.Shutdown();
s_swapRequested = false;
s_swapRequested = FALSE;
s_swapResponseEvent.Shutdown();
Fifo_Shutdown();
@ -450,11 +451,9 @@ static volatile struct
// Run from the graphics thread (from Fifo.cpp)
void VideoFifo_CheckSwapRequest()
{
if (s_swapRequested)
if (Common::AtomicLoadAcquire(s_swapRequested))
{
s_swapRequested = false;
Common::MemFence();
s_swapRequested = FALSE;
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
@ -478,13 +477,10 @@ inline bool addrRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper)
// Run from the graphics thread (from Fifo.cpp)
void VideoFifo_CheckSwapRequestAt(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
{
if (s_swapRequested)
if (Common::AtomicLoadAcquire(s_swapRequested))
{
u32 aLower = xfbAddr;
u32 aUpper = xfbAddr + 2 * fbWidth * fbHeight;
Common::MemFence();
u32 bLower = s_beginFieldArgs.xfbAddr;
u32 bUpper = s_beginFieldArgs.xfbAddr + 2 * s_beginFieldArgs.fbWidth * s_beginFieldArgs.fbHeight;
@ -508,9 +504,7 @@ void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
Common::MemFence();
s_swapRequested = true;
Common::AtomicStoreRelease(s_swapRequested, TRUE);
}
}
@ -525,11 +519,9 @@ static volatile u32 s_AccessEFBResult = 0;
void VideoFifo_CheckEFBAccess()
{
if (s_efbAccessRequested)
if (Common::AtomicLoadAcquire(s_efbAccessRequested))
{
s_efbAccessRequested = false;
Common::MemFence();
s_efbAccessRequested = FALSE;
switch (s_accessEFBArgs.type)
{
@ -593,9 +585,7 @@ u32 Video_AccessEFB(EFBAccessType type, u32 x, u32 y)
s_accessEFBArgs.x = x;
s_accessEFBArgs.y = y;
Common::MemFence();
s_efbAccessRequested = true;
Common::AtomicStoreRelease(s_efbAccessRequested, TRUE);
if (g_VideoInitialize.bUseDualCore)
s_efbResponseEvent.MsgWait();