diff --git a/Source/Core/Common/Common.vcproj b/Source/Core/Common/Common.vcproj index ccb0a04cd3..5bc4f10c23 100644 --- a/Source/Core/Common/Common.vcproj +++ b/Source/Core/Common/Common.vcproj @@ -556,6 +556,18 @@ RelativePath=".\Src\ABI.h" > + + + + + + diff --git a/Source/Core/Common/Src/Atomic.h b/Source/Core/Common/Src/Atomic.h new file mode 100644 index 0000000000..63a883b046 --- /dev/null +++ b/Source/Core/Common/Src/Atomic.h @@ -0,0 +1,32 @@ +// Copyright (C) 2003-2009 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _ATOMIC_H_ +#define _ATOMIC_H_ + +#ifdef _WIN32 + +#include "Atomic_Win32.h" + +#else + +// GCC-compatible compiler assumed! +#include "Atomic_GCC.h" + +#endif + +#endif diff --git a/Source/Core/Common/Src/Atomic_GCC.h b/Source/Core/Common/Src/Atomic_GCC.h new file mode 100644 index 0000000000..5d1057e931 --- /dev/null +++ b/Source/Core/Common/Src/Atomic_GCC.h @@ -0,0 +1,110 @@ +// Copyright (C) 2003-2009 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _ATOMIC_GCC_H_ +#define _ATOMIC_GCC_H_ + +#include "Common.h" + +// Atomic operations are performed in a single step by the CPU. It is +// impossible for other threads to see the operation "half-done." +// +// Some atomic operations can be combined with different types of memory +// barriers called "Acquire semantics" and "Release semantics", defined below. +// +// Acquire semantics: Future memory accesses cannot be relocated to before the +// operation. +// +// Release semantics: Past memory accesses cannot be relocated to after the +// operation. +// +// These barriers affect not only the compiler, but also the CPU. + +namespace Common +{ + +inline void AtomicAdd(volatile u32& target, u32 value) { + __sync_add_and_fetch(target, value); +} + +inline void AtomicIncrement(volatile u32& target) { + __sync_add_and_fetch(target, 1); +} + +inline u32 AtomicLoad(volatile u32& src) { + return src; // 32-bit reads are always atomic. +} +inline u32 AtomicLoadAcquire(volatile u32& src) { + __sync_synchronize(); + return src; +} + +inline void AtomicStore(volatile u32& dest, u32 value) { + dest = value; // 32-bit writes are always atomic. +} +inline void AtomicStoreRelease(volatile u32& dest, u32 value) { + __sync_lock_test_and_set(dest, value); +} + +} + +// Old code kept here for reference in case we need the parts with __asm__ __volatile__. +#if 0 +LONG SyncInterlockedIncrement(LONG *Dest) +{ +#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__)) + return __sync_add_and_fetch(Dest, 1); +#else + register int result; + __asm__ __volatile__("lock; xadd %0,%1" + : "=r" (result), "=m" (*Dest) + : "0" (1), "m" (*Dest) + : "memory"); + return result; +#endif +} + +LONG SyncInterlockedExchangeAdd(LONG *Dest, LONG Val) +{ +#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__)) + return __sync_add_and_fetch(Dest, Val); +#else + register int result; + __asm__ __volatile__("lock; xadd %0,%1" + : "=r" (result), "=m" (*Dest) + : "0" (Val), "m" (*Dest) + : "memory"); + return result; +#endif +} + +LONG SyncInterlockedExchange(LONG *Dest, LONG Val) +{ +#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__)) + return __sync_lock_test_and_set(Dest, Val); +#else + register int result; + __asm__ __volatile__("lock; xchg %0,%1" + : "=r" (result), "=m" (*Dest) + : "0" (Val), "m" (*Dest) + : "memory"); + return result; +#endif +} +#endif + +#endif diff --git a/Source/Core/Common/Src/Atomic_Win32.h b/Source/Core/Common/Src/Atomic_Win32.h new file mode 100644 index 0000000000..c79853db36 --- /dev/null +++ b/Source/Core/Common/Src/Atomic_Win32.h @@ -0,0 +1,71 @@ +// Copyright (C) 2003-2009 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _ATOMIC_WIN32_H_ +#define _ATOMIC_WIN32_H_ + +#include "Common.h" +#include + +// Atomic operations are performed in a single step by the CPU. It is +// impossible for other threads to see the operation "half-done." +// +// Some atomic operations can be combined with different types of memory +// barriers called "Acquire semantics" and "Release semantics", defined below. +// +// Acquire semantics: Future memory accesses cannot be relocated to before the +// operation. +// +// Release semantics: Past memory accesses cannot be relocated to after the +// operation. +// +// These barriers affect not only the compiler, but also the CPU. +// +// NOTE: Acquire and Release are not differentiated right now. They perform a +// full memory barrier instead of a "one-way" memory barrier. The newest +// Windows SDK has Acquire and Release versions of some Interlocked* functions. + +namespace Common +{ + +inline void AtomicAdd(volatile u32& target, u32 value) { + InterlockedAdd((volatile LONG*)&target, (LONG)value); +} + +inline void AtomicIncrement(volatile u32& target) { + InterlockedIncrement((volatile LONG*)&target); +} + +inline u32 AtomicLoad(volatile u32& src) { + return src; // 32-bit reads are always atomic. +} +inline u32 AtomicLoadAcquire(volatile u32& src) { + MemoryBarrier(); + return src; +} + +inline void AtomicStore(volatile u32& dest, u32 value) { + dest = value; // 32-bit writes are always atomic. +} +inline void AtomicStoreRelease(volatile u32& dest, u32 value) { + // InterlockedExchange includes a memory barrier as a bonus. + InterlockedExchange((volatile LONG*)&dest, (LONG)value); +} + +} + +#endif diff --git a/Source/Core/Common/Src/Thread.cpp b/Source/Core/Common/Src/Thread.cpp index 61029aa2d6..53ea908b77 100644 --- a/Source/Core/Common/Src/Thread.cpp +++ b/Source/Core/Common/Src/Thread.cpp @@ -31,10 +31,6 @@ namespace Common { -#ifndef _WIN32 -// TODO see thread.h -void MemFence(){;} -#endif #ifdef _WIN32 @@ -325,21 +321,6 @@ void SetCurrentThreadName(const TCHAR* szThreadName) __except(EXCEPTION_CONTINUE_EXECUTION) {} } -// TODO: check if ever inline -LONG SyncInterlockedIncrement(LONG *Dest) -{ - return InterlockedIncrement(Dest); -} - -LONG SyncInterlockedExchangeAdd(LONG *Dest, LONG Val) -{ - return InterlockedExchangeAdd(Dest, Val); -} - -LONG SyncInterlockedExchange(LONG *Dest, LONG Val) -{ - return InterlockedExchange(Dest, Val); -} #else // !WIN32, so must be POSIX threads @@ -516,48 +497,6 @@ void Event::Wait() pthread_mutex_unlock(&mutex_); } -LONG SyncInterlockedIncrement(LONG *Dest) -{ -#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__)) - return __sync_add_and_fetch(Dest, 1); -#else - register int result; - __asm__ __volatile__("lock; xadd %0,%1" - : "=r" (result), "=m" (*Dest) - : "0" (1), "m" (*Dest) - : "memory"); - return result; -#endif -} - -LONG SyncInterlockedExchangeAdd(LONG *Dest, LONG Val) -{ -#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__)) - return __sync_add_and_fetch(Dest, Val); -#else - register int result; - __asm__ __volatile__("lock; xadd %0,%1" - : "=r" (result), "=m" (*Dest) - : "0" (Val), "m" (*Dest) - : "memory"); - return result; -#endif -} - -LONG SyncInterlockedExchange(LONG *Dest, LONG Val) -{ -#if defined(__GNUC__) && defined (__GNUC_MINOR__) && ((4 < __GNUC__) || (4 == __GNUC__ && 1 <= __GNUC_MINOR__)) - return __sync_lock_test_and_set(Dest, Val); -#else - register int result; - __asm__ __volatile__("lock; xchg %0,%1" - : "=r" (result), "=m" (*Dest) - : "0" (Val), "m" (*Dest) - : "memory"); - return result; -#endif -} - #endif } // namespace Common diff --git a/Source/Core/Common/Src/Thread.h b/Source/Core/Common/Src/Thread.h index c5c07daf95..e06e02b101 100644 --- a/Source/Core/Common/Src/Thread.h +++ b/Source/Core/Common/Src/Thread.h @@ -72,18 +72,6 @@ namespace Common { -// MemFence: Neither the compiler nor the CPU can reorder memory accesses -// beyond this barrier. -#ifdef _WIN32 -__forceinline void MemFence() -{ - MemoryBarrier(); -} -#else -// TODO: UNIX experts, please implement the memory fence. -void MemFence(); -#endif - class CriticalSection { #ifdef _WIN32 @@ -206,10 +194,6 @@ void SleepCurrentThread(int ms); void SetCurrentThreadName(const char *name); -LONG SyncInterlockedExchangeAdd(LONG *Dest, LONG Val); -LONG SyncInterlockedExchange(LONG *Dest, LONG Val); -LONG SyncInterlockedIncrement(LONG *Dest); - } // namespace Common #endif // _THREAD_H_ diff --git a/Source/Core/Core/Src/HW/CommandProcessor.cpp b/Source/Core/Core/Src/HW/CommandProcessor.cpp index 7916999f78..856ab8450f 100644 --- a/Source/Core/Core/Src/HW/CommandProcessor.cpp +++ b/Source/Core/Core/Src/HW/CommandProcessor.cpp @@ -74,6 +74,7 @@ #include "../ConfigManager.h" #include "MathUtil.h" #include "Thread.h" +#include "Atomic.h" #include "Memmap.h" #include "PeripheralInterface.h" @@ -178,8 +179,8 @@ void UpdateInterrupts(); //inline void WriteLow (u32& _reg, u16 lowbits) {_reg = (_reg & 0xFFFF0000) | lowbits;} //inline void WriteHigh(u32& _reg, u16 highbits) {_reg = (_reg & 0x0000FFFF) | ((u32)highbits << 16);} -inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::SyncInterlockedExchange((LONG*)&_reg,(_reg & 0xFFFF0000) | lowbits);} -inline void WriteHigh(volatile u32& _reg, u16 highbits) {Common::SyncInterlockedExchange((LONG*)&_reg,(_reg & 0x0000FFFF) | ((u32)highbits << 16));} +inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);} +inline void WriteHigh(volatile u32& _reg, u16 highbits) {Common::AtomicStore(_reg,(_reg & 0x0000FFFF) | ((u32)highbits << 16));} inline u16 ReadLow (u32 _reg) {return (u16)(_reg & 0xFFFF);} inline u16 ReadHigh (u32 _reg) {return (u16)(_reg >> 16);} @@ -189,7 +190,7 @@ int et_UpdateInterrupts; // for GP watchdog hack void IncrementGPWDToken() { - Common::SyncInterlockedIncrement((LONG*)&fifo.Fake_GPWDToken); + Common::AtomicIncrement(fifo.Fake_GPWDToken); } // Check every FAKE_GP_WATCHDOG_PERIOD if a PE-frame-finish occured @@ -426,9 +427,9 @@ void Write16(const u16 _Value, const u32 _Address) fake_CommandProcessorNotUsed = false; UCPCtrlReg tmpCtrl(_Value); - Common::SyncInterlockedExchange((LONG*)&fifo.bFF_GPReadEnable, tmpCtrl.GPReadEnable); - Common::SyncInterlockedExchange((LONG*)&fifo.bFF_GPLinkEnable, tmpCtrl.GPLinkEnable); - Common::SyncInterlockedExchange((LONG*)&fifo.bFF_BPEnable, tmpCtrl.BPEnable); + Common::AtomicStore(fifo.bFF_GPReadEnable, tmpCtrl.GPReadEnable); + Common::AtomicStore(fifo.bFF_GPLinkEnable, tmpCtrl.GPLinkEnable); + Common::AtomicStore(fifo.bFF_BPEnable, tmpCtrl.BPEnable); // TOCHECK (mb2): could BP irq be cleared with w16 to STATUS_REGISTER? // funny hack: eg in MP1 if we disable the clear breakpoint ability by commenting this block @@ -600,7 +601,7 @@ void STACKALIGN GatherPipeBursted() fifo.CPWritePointer += GPFifo::GATHER_PIPE_SIZE; if (fifo.CPWritePointer >= fifo.CPEnd) fifo.CPWritePointer = fifo.CPBase; - Common::SyncInterlockedExchangeAdd((LONG*)&fifo.CPReadWriteDistance, GPFifo::GATHER_PIPE_SIZE); + Common::AtomicAdd(fifo.CPReadWriteDistance, GPFifo::GATHER_PIPE_SIZE); // High watermark overflow handling (hacked way) u32 ct = 0; @@ -732,8 +733,8 @@ void UpdateFifoRegister() dist = wp - rp; else dist = (wp - fifo.CPBase) + (fifo.CPEnd - rp); - //fifo.CPReadWriteDistance = dist; - Common::SyncInterlockedExchange((LONG*)&fifo.CPReadWriteDistance, dist); + + Common::AtomicStore(fifo.CPReadWriteDistance, dist); if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore) CatchUpGPU(); diff --git a/Source/Core/Core/Src/HW/PixelEngine.cpp b/Source/Core/Core/Src/HW/PixelEngine.cpp index 8e7d437ebb..969c19bc66 100644 --- a/Source/Core/Core/Src/HW/PixelEngine.cpp +++ b/Source/Core/Core/Src/HW/PixelEngine.cpp @@ -22,7 +22,7 @@ #include "Common.h" #include "ChunkFile.h" -#include "Thread.h" +#include "Atomic.h" #include "PixelEngine.h" @@ -360,7 +360,10 @@ void SetToken(const u16 _token, const int _bSetTokenAcknowledge) { // we do it directly from videoThread because of // Super Monkey Ball - Common::SyncInterlockedExchange((LONG*)&CommandProcessor::fifo.PEToken, _token); + // XXX: No 16-bit atomic store available, so cheat and use 32-bit. + // That's what we've always done. We're counting on fifo.PEToken to be + // 4-byte padded. + Common::AtomicStore(*(volatile u32*)&CommandProcessor::fifo.PEToken, _token); } } diff --git a/Source/Core/VideoCommon/Src/Fifo.cpp b/Source/Core/VideoCommon/Src/Fifo.cpp index 43e5d399f7..dcd593550a 100644 --- a/Source/Core/VideoCommon/Src/Fifo.cpp +++ b/Source/Core/VideoCommon/Src/Fifo.cpp @@ -23,6 +23,7 @@ #endif #include "MemoryUtil.h" #include "Thread.h" +#include "Atomic.h" #include "OpcodeDecoding.h" #include "Fifo.h" @@ -151,7 +152,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize) // check if we are able to run this buffer if ((_fifo.bFF_GPReadEnable) && _fifo.CPReadWriteDistance && !(_fifo.bFF_BPEnable && _fifo.bFF_Breakpoint)) { - Common::SyncInterlockedExchange((LONG*)&_fifo.CPReadIdle, 0); + Common::AtomicStore(_fifo.CPReadIdle, 0); //video_initialize.pLog("RUN...........................",FALSE); int peek_counter = 0; while (_fifo.bFF_GPReadEnable && _fifo.CPReadWriteDistance) @@ -175,7 +176,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize) if (readPtr == _fifo.CPBreakpoint) { video_initialize.pLog("!!! BP irq raised",FALSE); - Common::SyncInterlockedExchange((LONG*)&_fifo.bFF_Breakpoint, 1); + Common::AtomicStore(_fifo.bFF_Breakpoint, 1); video_initialize.pUpdateInterrupts(); break; @@ -210,11 +211,11 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize) } // Execute new instructions found in uData Video_SendFifoData(uData, distToSend); - Common::SyncInterlockedExchange((LONG*)&_fifo.CPReadPointer, readPtr); - Common::SyncInterlockedExchangeAdd((LONG*)&_fifo.CPReadWriteDistance, -distToSend); + Common::AtomicStore(_fifo.CPReadPointer, readPtr); + Common::AtomicAdd(_fifo.CPReadWriteDistance, -distToSend); } //video_initialize.pLog("..........................IDLE",FALSE); - Common::SyncInterlockedExchange((LONG*)&_fifo.CPReadIdle, 1); + Common::AtomicStore(_fifo.CPReadIdle, 1); } s_criticalFifo.Leave(); } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index 1392f8c2e3..a284bf3b21 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -54,6 +54,7 @@ Make AA apply instantly during gameplay if possible #include "Globals.h" #include "LogManager.h" #include "Thread.h" +#include "Atomic.h" #include @@ -101,10 +102,10 @@ int GLScissorX, GLScissorY, GLScissorW, GLScissorH; static bool s_PluginInitialized = false; -static volatile bool s_swapRequested = false; +static volatile u32 s_swapRequested = false; static Common::Event s_swapResponseEvent; -static volatile bool s_efbAccessRequested = false; +static volatile u32 s_efbAccessRequested = false; static Common::Event s_efbResponseEvent; @@ -390,10 +391,10 @@ void Shutdown(void) { s_PluginInitialized = false; - s_efbAccessRequested = false; + s_efbAccessRequested = FALSE; s_efbResponseEvent.Shutdown(); - s_swapRequested = false; + s_swapRequested = FALSE; s_swapResponseEvent.Shutdown(); Fifo_Shutdown(); @@ -450,11 +451,9 @@ static volatile struct // Run from the graphics thread (from Fifo.cpp) void VideoFifo_CheckSwapRequest() { - if (s_swapRequested) + if (Common::AtomicLoadAcquire(s_swapRequested)) { - s_swapRequested = false; - - Common::MemFence(); + s_swapRequested = FALSE; Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); @@ -478,13 +477,10 @@ inline bool addrRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper) // Run from the graphics thread (from Fifo.cpp) void VideoFifo_CheckSwapRequestAt(u32 xfbAddr, u32 fbWidth, u32 fbHeight) { - if (s_swapRequested) + if (Common::AtomicLoadAcquire(s_swapRequested)) { u32 aLower = xfbAddr; u32 aUpper = xfbAddr + 2 * fbWidth * fbHeight; - - Common::MemFence(); - u32 bLower = s_beginFieldArgs.xfbAddr; u32 bUpper = s_beginFieldArgs.xfbAddr + 2 * s_beginFieldArgs.fbWidth * s_beginFieldArgs.fbHeight; @@ -508,9 +504,7 @@ void Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) s_beginFieldArgs.fbWidth = fbWidth; s_beginFieldArgs.fbHeight = fbHeight; - Common::MemFence(); - - s_swapRequested = true; + Common::AtomicStoreRelease(s_swapRequested, TRUE); } } @@ -525,11 +519,9 @@ static volatile u32 s_AccessEFBResult = 0; void VideoFifo_CheckEFBAccess() { - if (s_efbAccessRequested) + if (Common::AtomicLoadAcquire(s_efbAccessRequested)) { - s_efbAccessRequested = false; - - Common::MemFence(); + s_efbAccessRequested = FALSE; switch (s_accessEFBArgs.type) { @@ -593,9 +585,7 @@ u32 Video_AccessEFB(EFBAccessType type, u32 x, u32 y) s_accessEFBArgs.x = x; s_accessEFBArgs.y = y; - Common::MemFence(); - - s_efbAccessRequested = true; + Common::AtomicStoreRelease(s_efbAccessRequested, TRUE); if (g_VideoInitialize.bUseDualCore) s_efbResponseEvent.MsgWait();