Remove Atomic.h

The STL has everything we need nowadays.

I have tried to not alter any behavior or semantics with this
change wherever possible. In particular, WriteLow and WriteHigh
in CommandProcessor retain the ability to accidentally undo
another thread's write to the upper half or lower half
respectively. If that should be fixed, it should be done in a
separate commit for clarity. One thing did change: The places
where we were using += on a volatile variable (not an atomic
operation) are now using fetch_add (actually an atomic operation).

Tested with single core and dual core on x86-64 and AArch64.
This commit is contained in:
JosJuice 2021-05-13 18:44:59 +02:00
parent 80ac36a712
commit b93983b50a
13 changed files with 178 additions and 319 deletions

View File

@ -1,16 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#ifdef _WIN32
#include "Common/Atomic_Win32.h" // IWYU pragma: export
#else
// GCC-compatible compiler assumed!
#include "Common/Atomic_GCC.h" // IWYU pragma: export
#endif

View File

@ -1,86 +0,0 @@
// Copyright 2009 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
// IWYU pragma: private, include "Common/Atomic.h"
#pragma once
#include "Common/Common.h"
#include "Common/CommonTypes.h"
// Atomic operations are performed in a single step by the CPU. It is
// impossible for other threads to see the operation "half-done."
//
// Some atomic operations can be combined with different types of memory
// barriers called "Acquire semantics" and "Release semantics", defined below.
//
// Acquire semantics: Future memory accesses cannot be relocated to before the
// operation.
//
// Release semantics: Past memory accesses cannot be relocated to after the
// operation.
//
// These barriers affect not only the compiler, but also the CPU.
namespace Common
{
inline void AtomicAdd(volatile u32& target, u32 value)
{
__sync_add_and_fetch(&target, value);
}
inline void AtomicAnd(volatile u32& target, u32 value)
{
__sync_and_and_fetch(&target, value);
}
inline void AtomicDecrement(volatile u32& target)
{
__sync_add_and_fetch(&target, -1);
}
inline void AtomicIncrement(volatile u32& target)
{
__sync_add_and_fetch(&target, 1);
}
inline void AtomicOr(volatile u32& target, u32 value)
{
__sync_or_and_fetch(&target, value);
}
#ifndef __ATOMIC_RELAXED
#error __ATOMIC_RELAXED not defined; your compiler version is too old.
#endif
template <typename T>
inline T AtomicLoad(volatile T& src)
{
return __atomic_load_n(&src, __ATOMIC_RELAXED);
}
template <typename T>
inline T AtomicLoadAcquire(volatile T& src)
{
return __atomic_load_n(&src, __ATOMIC_ACQUIRE);
}
template <typename T, typename U>
inline void AtomicStore(volatile T& dest, U value)
{
__atomic_store_n(&dest, value, __ATOMIC_RELAXED);
}
template <typename T, typename U>
inline void AtomicStoreRelease(volatile T& dest, U value)
{
__atomic_store_n(&dest, value, __ATOMIC_RELEASE);
}
template <typename T, typename U>
inline T* AtomicExchangeAcquire(T* volatile& loc, U newval)
{
return __atomic_exchange_n(&loc, newval, __ATOMIC_ACQ_REL);
}
} // namespace Common

View File

@ -1,94 +0,0 @@
// Copyright 2009 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
// IWYU pragma: private, include "Common/Atomic.h"
#pragma once
#include <Windows.h>
#include <atomic>
#include "Common/CommonTypes.h"
// Atomic operations are performed in a single step by the CPU. It is
// impossible for other threads to see the operation "half-done."
//
// Some atomic operations can be combined with different types of memory
// barriers called "Acquire semantics" and "Release semantics", defined below.
//
// Acquire semantics: Future memory accesses cannot be relocated to before the
// operation.
//
// Release semantics: Past memory accesses cannot be relocated to after the
// operation.
//
// These barriers affect not only the compiler, but also the CPU.
//
// NOTE: Acquire and Release are not differentiated right now. They perform a
// full memory barrier instead of a "one-way" memory barrier. The newest
// Windows SDK has Acquire and Release versions of some Interlocked* functions.
namespace Common
{
inline void AtomicAdd(volatile u32& target, u32 value)
{
_InterlockedExchangeAdd((volatile LONG*)&target, (LONG)value);
}
inline void AtomicAnd(volatile u32& target, u32 value)
{
_InterlockedAnd((volatile LONG*)&target, (LONG)value);
}
inline void AtomicIncrement(volatile u32& target)
{
_InterlockedIncrement((volatile LONG*)&target);
}
inline void AtomicDecrement(volatile u32& target)
{
_InterlockedDecrement((volatile LONG*)&target);
}
inline void AtomicOr(volatile u32& target, u32 value)
{
_InterlockedOr((volatile LONG*)&target, (LONG)value);
}
template <typename T>
inline T AtomicLoad(volatile T& src)
{
return src; // 32-bit reads are always atomic.
}
template <typename T>
inline T AtomicLoadAcquire(volatile T& src)
{
// 32-bit reads are always atomic.
T result = src;
// Compiler instruction only. x86 loads always have acquire semantics.
std::atomic_thread_fence(std::memory_order_acquire);
return result;
}
template <typename T, typename U>
inline void AtomicStore(volatile T& dest, U value)
{
dest = (T)value; // 32-bit writes are always atomic.
}
template <typename T, typename U>
inline void AtomicStoreRelease(volatile T& dest, U value)
{
// Compiler instruction only. x86 stores always have release semantics.
std::atomic_thread_fence(std::memory_order_release);
dest = (T)value; // 32-bit writes are always atomic.
}
template <typename T, typename U>
inline T* AtomicExchangeAcquire(T* volatile& loc, U newval)
{
return (T*)_InterlockedExchangePointer_acq((void* volatile*)&loc, (void*)newval);
}
} // namespace Common

View File

@ -2,7 +2,6 @@ add_library(common
Analytics.cpp Analytics.cpp
Analytics.h Analytics.h
Assert.h Assert.h
Atomic.h
BitField.h BitField.h
BitSet.h BitSet.h
BitUtils.h BitUtils.h

View File

@ -221,10 +221,10 @@ public:
template <typename T> template <typename T>
void Do(std::atomic<T>& atomic) void Do(std::atomic<T>& atomic)
{ {
T temp = atomic.load(); T temp = atomic.load(std::memory_order_relaxed);
Do(temp); Do(temp);
if (mode == MODE_READ) if (mode == MODE_READ)
atomic.store(temp); atomic.store(temp, std::memory_order_relaxed);
} }
template <typename T> template <typename T>

View File

@ -5,11 +5,13 @@
#pragma once #pragma once
#include <array> #include <array>
#include <atomic>
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <type_traits> #include <type_traits>
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/BitUtils.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
#include "Core/HW/MMIOHandlers.h" #include "Core/HW/MMIOHandlers.h"
@ -79,17 +81,19 @@ inline u16* LowPart(u32* ptr)
{ {
return (u16*)ptr; return (u16*)ptr;
} }
inline u16* LowPart(volatile u32* ptr) inline u16* LowPart(std::atomic<u32>* ptr)
{ {
return (u16*)ptr; static_assert(std::atomic<u32>::is_always_lock_free && sizeof(std::atomic<u32>) == sizeof(u32));
return LowPart(Common::BitCast<u32*>(ptr));
} }
inline u16* HighPart(u32* ptr) inline u16* HighPart(u32* ptr)
{ {
return LowPart(ptr) + 1; return LowPart(ptr) + 1;
} }
inline u16* HighPart(volatile u32* ptr) inline u16* HighPart(std::atomic<u32>* ptr)
{ {
return LowPart(ptr) + 1; static_assert(std::atomic<u32>::is_always_lock_free && sizeof(std::atomic<u32>) == sizeof(u32));
return HighPart(Common::BitCast<u32*>(ptr));
} }
} // namespace Utils } // namespace Utils

View File

@ -49,7 +49,6 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule:
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include "Common/Atomic.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Common/Thread.h" #include "Common/Thread.h"

View File

@ -16,9 +16,6 @@
<ClInclude Include="Common\Align.h" /> <ClInclude Include="Common\Align.h" />
<ClInclude Include="Common\Analytics.h" /> <ClInclude Include="Common\Analytics.h" />
<ClInclude Include="Common\Assert.h" /> <ClInclude Include="Common\Assert.h" />
<ClInclude Include="Common\Atomic_GCC.h" />
<ClInclude Include="Common\Atomic_Win32.h" />
<ClInclude Include="Common\Atomic.h" />
<ClInclude Include="Common\BitField.h" /> <ClInclude Include="Common\BitField.h" />
<ClInclude Include="Common\BitSet.h" /> <ClInclude Include="Common\BitSet.h" />
<ClInclude Include="Common\BitUtils.h" /> <ClInclude Include="Common\BitUtils.h" />

View File

@ -11,7 +11,6 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "Common/Atomic.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/GL/GLContext.h" #include "Common/GL/GLContext.h"
#include "Common/GL/GLUtil.h" #include "Common/GL/GLUtil.h"

View File

@ -6,7 +6,6 @@
#include <cstring> #include <cstring>
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/Atomic.h"
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Flag.h" #include "Common/Flag.h"
@ -91,21 +90,15 @@ void DoState(PointerWrap& p)
p.Do(s_interrupt_waiting); p.Do(s_interrupt_waiting);
} }
static inline void WriteLow(volatile u32& _reg, u16 lowbits) static inline void WriteLow(std::atomic<u32>& reg, u16 lowbits)
{ {
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits); reg.store((reg.load(std::memory_order_relaxed) & 0xFFFF0000) | lowbits,
std::memory_order_relaxed);
} }
static inline void WriteHigh(volatile u32& _reg, u16 highbits) static inline void WriteHigh(std::atomic<u32>& reg, u16 highbits)
{ {
Common::AtomicStore(_reg, (_reg & 0x0000FFFF) | ((u32)highbits << 16)); reg.store((reg.load(std::memory_order_relaxed) & 0x0000FFFF) | (static_cast<u32>(highbits) << 16),
} std::memory_order_relaxed);
static inline u16 ReadLow(u32 _reg)
{
return (u16)(_reg & 0xFFFF);
}
static inline u16 ReadHigh(u32 _reg)
{
return (u16)(_reg >> 16);
} }
void Init() void Init()
@ -259,30 +252,49 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
mmio->Register(base | PERF_SELECT, MMIO::InvalidRead<u16>(), MMIO::Nop<u16>()); mmio->Register(base | PERF_SELECT, MMIO::InvalidRead<u16>(), MMIO::Nop<u16>());
// Some MMIOs have different handlers for single core vs. dual core mode. // Some MMIOs have different handlers for single core vs. dual core mode.
mmio->Register(base | FIFO_RW_DISTANCE_LO, mmio->Register(
IsOnThread() ? base | FIFO_RW_DISTANCE_LO,
MMIO::ComplexRead<u16>([](u32) { IsOnThread() ? MMIO::ComplexRead<u16>([](u32) {
if (fifo.CPWritePointer >= fifo.SafeCPReadPointer) if (fifo.CPWritePointer.load(std::memory_order_relaxed) >=
return ReadLow(fifo.CPWritePointer - fifo.SafeCPReadPointer); fifo.SafeCPReadPointer.load(std::memory_order_relaxed))
{
return static_cast<u16>(fifo.CPWritePointer.load(std::memory_order_relaxed) -
fifo.SafeCPReadPointer.load(std::memory_order_relaxed));
}
else else
return ReadLow(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - {
fifo.CPBase + 32); return static_cast<u16>(fifo.CPEnd.load(std::memory_order_relaxed) -
fifo.SafeCPReadPointer.load(std::memory_order_relaxed) +
fifo.CPWritePointer.load(std::memory_order_relaxed) -
fifo.CPBase.load(std::memory_order_relaxed) + 32);
}
}) : }) :
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)), MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)),
MMIO::DirectWrite<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance), MMIO::DirectWrite<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance),
WMASK_LO_ALIGN_32BIT)); WMASK_LO_ALIGN_32BIT));
mmio->Register(base | FIFO_RW_DISTANCE_HI, mmio->Register(base | FIFO_RW_DISTANCE_HI,
IsOnThread() ? MMIO::ComplexRead<u16>([](u32) { IsOnThread() ?
MMIO::ComplexRead<u16>([](u32) {
Fifo::SyncGPUForRegisterAccess(); Fifo::SyncGPUForRegisterAccess();
if (fifo.CPWritePointer >= fifo.SafeCPReadPointer) if (fifo.CPWritePointer.load(std::memory_order_relaxed) >=
return ReadHigh(fifo.CPWritePointer - fifo.SafeCPReadPointer); fifo.SafeCPReadPointer.load(std::memory_order_relaxed))
{
return (fifo.CPWritePointer.load(std::memory_order_relaxed) -
fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) >>
16;
}
else else
return ReadHigh(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - {
fifo.CPBase + 32); return (fifo.CPEnd.load(std::memory_order_relaxed) -
fifo.SafeCPReadPointer.load(std::memory_order_relaxed) +
fifo.CPWritePointer.load(std::memory_order_relaxed) -
fifo.CPBase.load(std::memory_order_relaxed) + 32) >>
16;
}
}) : }) :
MMIO::ComplexRead<u16>([](u32) { MMIO::ComplexRead<u16>([](u32) {
Fifo::SyncGPUForRegisterAccess(); Fifo::SyncGPUForRegisterAccess();
return ReadHigh(fifo.CPReadWriteDistance); return fifo.CPReadWriteDistance.load(std::memory_order_relaxed) >> 16;
}), }),
MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) { MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) {
Fifo::SyncGPUForRegisterAccess(); Fifo::SyncGPUForRegisterAccess();
@ -297,16 +309,17 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
mmio->Register(base | FIFO_READ_POINTER_HI, mmio->Register(base | FIFO_READ_POINTER_HI,
IsOnThread() ? MMIO::ComplexRead<u16>([](u32) { IsOnThread() ? MMIO::ComplexRead<u16>([](u32) {
Fifo::SyncGPUForRegisterAccess(); Fifo::SyncGPUForRegisterAccess();
return ReadHigh(fifo.SafeCPReadPointer); return fifo.SafeCPReadPointer.load(std::memory_order_relaxed) >> 16;
}) : }) :
MMIO::ComplexRead<u16>([](u32) { MMIO::ComplexRead<u16>([](u32) {
Fifo::SyncGPUForRegisterAccess(); Fifo::SyncGPUForRegisterAccess();
return ReadHigh(fifo.CPReadPointer); return fifo.CPReadPointer.load(std::memory_order_relaxed) >> 16;
}), }),
IsOnThread() ? MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) { IsOnThread() ? MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) {
Fifo::SyncGPUForRegisterAccess(); Fifo::SyncGPUForRegisterAccess();
WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT); WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT);
fifo.SafeCPReadPointer = fifo.CPReadPointer; fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed),
std::memory_order_relaxed);
}) : }) :
MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) { MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) {
Fifo::SyncGPUForRegisterAccess(); Fifo::SyncGPUForRegisterAccess();
@ -325,8 +338,9 @@ void GatherPipeBursted()
{ {
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU. // In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
// Fix Pokemon XD in DC mode. // Fix Pokemon XD in DC mode.
if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd) && if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd.load(std::memory_order_relaxed)) &&
(ProcessorInterface::Fifo_CPUBase == fifo.CPBase) && fifo.CPReadWriteDistance > 0) (ProcessorInterface::Fifo_CPUBase == fifo.CPBase.load(std::memory_order_relaxed)) &&
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > 0)
{ {
Fifo::FlushGpu(); Fifo::FlushGpu();
} }
@ -336,35 +350,47 @@ void GatherPipeBursted()
} }
// update the fifo pointer // update the fifo pointer
if (fifo.CPWritePointer == fifo.CPEnd) if (fifo.CPWritePointer.load(std::memory_order_relaxed) ==
fifo.CPWritePointer = fifo.CPBase; fifo.CPEnd.load(std::memory_order_relaxed))
{
fifo.CPWritePointer.store(fifo.CPBase, std::memory_order_relaxed);
}
else else
fifo.CPWritePointer += GATHER_PIPE_SIZE; {
fifo.CPWritePointer.fetch_add(GATHER_PIPE_SIZE, std::memory_order_relaxed);
}
if (m_CPCtrlReg.GPReadEnable && m_CPCtrlReg.GPLinkEnable) if (m_CPCtrlReg.GPReadEnable && m_CPCtrlReg.GPLinkEnable)
{ {
ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer; ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer.load(std::memory_order_relaxed);
ProcessorInterface::Fifo_CPUBase = fifo.CPBase; ProcessorInterface::Fifo_CPUBase = fifo.CPBase.load(std::memory_order_relaxed);
ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd; ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd.load(std::memory_order_relaxed);
} }
// If the game is running close to overflowing, make the exception checking more frequent. // If the game is running close to overflowing, make the exception checking more frequent.
if (fifo.bFF_HiWatermark) if (fifo.bFF_HiWatermark)
CoreTiming::ForceExceptionCheck(0); CoreTiming::ForceExceptionCheck(0);
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE); fifo.CPReadWriteDistance.fetch_add(GATHER_PIPE_SIZE, std::memory_order_seq_cst);
Fifo::RunGpu(); Fifo::RunGpu();
ASSERT_MSG(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase, ASSERT_MSG(COMMANDPROCESSOR,
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) <=
fifo.CPEnd.load(std::memory_order_relaxed) -
fifo.CPBase.load(std::memory_order_relaxed),
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!"); "FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
// check if we are in sync // check if we are in sync
ASSERT_MSG(COMMANDPROCESSOR, fifo.CPWritePointer == ProcessorInterface::Fifo_CPUWritePointer, ASSERT_MSG(COMMANDPROCESSOR,
fifo.CPWritePointer.load(std::memory_order_relaxed) ==
ProcessorInterface::Fifo_CPUWritePointer,
"FIFOs linked but out of sync"); "FIFOs linked but out of sync");
ASSERT_MSG(COMMANDPROCESSOR, fifo.CPBase == ProcessorInterface::Fifo_CPUBase, ASSERT_MSG(COMMANDPROCESSOR,
fifo.CPBase.load(std::memory_order_relaxed) == ProcessorInterface::Fifo_CPUBase,
"FIFOs linked but out of sync"); "FIFOs linked but out of sync");
ASSERT_MSG(COMMANDPROCESSOR, fifo.CPEnd == ProcessorInterface::Fifo_CPUEnd, ASSERT_MSG(COMMANDPROCESSOR,
fifo.CPEnd.load(std::memory_order_relaxed) == ProcessorInterface::Fifo_CPUEnd,
"FIFOs linked but out of sync"); "FIFOs linked but out of sync");
} }
@ -403,31 +429,41 @@ void SetCPStatusFromGPU()
// breakpoint // breakpoint
if (fifo.bFF_BPEnable) if (fifo.bFF_BPEnable)
{ {
if (fifo.CPBreakpoint == fifo.CPReadPointer) if (fifo.CPBreakpoint.load(std::memory_order_relaxed) ==
fifo.CPReadPointer.load(std::memory_order_relaxed))
{ {
if (!fifo.bFF_Breakpoint) if (!fifo.bFF_Breakpoint)
{ {
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}", fifo.CPReadPointer); DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}",
fifo.CPReadPointer.load(std::memory_order_relaxed));
fifo.bFF_Breakpoint = true; fifo.bFF_Breakpoint = true;
} }
} }
else else
{ {
if (fifo.bFF_Breakpoint) if (fifo.bFF_Breakpoint)
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", fifo.CPReadPointer); {
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
fifo.CPReadPointer.load(std::memory_order_relaxed));
}
fifo.bFF_Breakpoint = false; fifo.bFF_Breakpoint = false;
} }
} }
else else
{ {
if (fifo.bFF_Breakpoint) if (fifo.bFF_Breakpoint)
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", fifo.CPReadPointer); {
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
fifo.CPReadPointer.load(std::memory_order_relaxed));
}
fifo.bFF_Breakpoint = false; fifo.bFF_Breakpoint = false;
} }
// overflow & underflow check // overflow & underflow check
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark); fifo.bFF_HiWatermark =
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark); (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark);
fifo.bFF_LoWatermark =
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark);
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt; bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt; bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
@ -457,8 +493,10 @@ void SetCPStatusFromGPU()
void SetCPStatusFromCPU() void SetCPStatusFromCPU()
{ {
// overflow & underflow check // overflow & underflow check
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark); fifo.bFF_HiWatermark =
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark); (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark);
fifo.bFF_LoWatermark =
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark);
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt; bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt; bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
@ -489,9 +527,11 @@ void SetCpStatusRegister()
{ {
// Here always there is one fifo attached to the GPU // Here always there is one fifo attached to the GPU
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint; m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint;
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance || (fifo.CPReadPointer == fifo.CPWritePointer); m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
m_CPStatusReg.CommandIdle = (fifo.CPReadPointer.load(std::memory_order_relaxed) ==
!fifo.CPReadWriteDistance || Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable; fifo.CPWritePointer.load(std::memory_order_relaxed));
m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable;
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark; m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark;
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark; m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark;
@ -548,7 +588,8 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false"); cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");
{ {
PanicAlertFmt("Illegal command {:02x}\n" PanicAlertFmt(
"Illegal command {:02x}\n"
"CPBase: {:#010x}\n" "CPBase: {:#010x}\n"
"CPEnd: {:#010x}\n" "CPEnd: {:#010x}\n"
"CPHiWatermark: {:#010x}\n" "CPHiWatermark: {:#010x}\n"
@ -564,13 +605,15 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
"bFF_GPLinkEnable: {}\n" "bFF_GPLinkEnable: {}\n"
"bFF_HiWatermarkInt: {}\n" "bFF_HiWatermarkInt: {}\n"
"bFF_LoWatermarkInt: {}\n", "bFF_LoWatermarkInt: {}\n",
cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, cmd_byte, fifo.CPBase.load(std::memory_order_relaxed),
fifo.CPReadWriteDistance, fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPEnd.load(std::memory_order_relaxed), fifo.CPHiWatermark, fifo.CPLoWatermark,
fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false", fifo.CPReadWriteDistance.load(std::memory_order_relaxed),
fifo.CPWritePointer.load(std::memory_order_relaxed),
fifo.CPReadPointer.load(std::memory_order_relaxed),
fifo.CPBreakpoint.load(std::memory_order_relaxed), fifo.bFF_GPReadEnable ? "true" : "false",
fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false", fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false",
fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false", fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false",
fifo.bFF_HiWatermarkInt ? "true" : "false", fifo.bFF_HiWatermarkInt ? "true" : "false", fifo.bFF_LoWatermarkInt ? "true" : "false");
fifo.bFF_LoWatermarkInt ? "true" : "false");
} }
} }

View File

@ -4,6 +4,8 @@
#pragma once #pragma once
#include <atomic>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
class PointerWrap; class PointerWrap;
@ -17,15 +19,15 @@ namespace CommandProcessor
struct SCPFifoStruct struct SCPFifoStruct
{ {
// fifo registers // fifo registers
volatile u32 CPBase; std::atomic<u32> CPBase;
volatile u32 CPEnd; std::atomic<u32> CPEnd;
u32 CPHiWatermark; u32 CPHiWatermark;
u32 CPLoWatermark; u32 CPLoWatermark;
volatile u32 CPReadWriteDistance; std::atomic<u32> CPReadWriteDistance;
volatile u32 CPWritePointer; std::atomic<u32> CPWritePointer;
volatile u32 CPReadPointer; std::atomic<u32> CPReadPointer;
volatile u32 CPBreakpoint; std::atomic<u32> CPBreakpoint;
volatile u32 SafeCPReadPointer; std::atomic<u32> SafeCPReadPointer;
volatile u32 bFF_GPLinkEnable; volatile u32 bFF_GPLinkEnable;
volatile u32 bFF_GPReadEnable; volatile u32 bFF_GPReadEnable;

View File

@ -8,7 +8,6 @@
#include <cstring> #include <cstring>
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/Atomic.h"
#include "Common/BlockingLoop.h" #include "Common/BlockingLoop.h"
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
#include "Common/Event.h" #include "Common/Event.h"
@ -329,33 +328,37 @@ void RunGpuLoop()
// check if we are able to run this buffer // check if we are able to run this buffer
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable &&
fifo.CPReadWriteDistance && !AtBreakpoint()) fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint())
{ {
if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance) if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
break; break;
u32 cyclesExecuted = 0; u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer; u32 readPtr = fifo.CPReadPointer.load(std::memory_order_relaxed);
ReadDataFromFifo(readPtr); ReadDataFromFifo(readPtr);
if (readPtr == fifo.CPEnd) if (readPtr == fifo.CPEnd.load(std::memory_order_relaxed))
readPtr = fifo.CPBase; readPtr = fifo.CPBase.load(std::memory_order_relaxed);
else else
readPtr += 32; readPtr += 32;
ASSERT_MSG(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0, ASSERT_MSG(COMMANDPROCESSOR,
(s32)fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32 >= 0,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce " "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce "
"instability in the game. Please report it.", "instability in the game. Please report it.",
fifo.CPReadWriteDistance - 32); fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32);
u8* write_ptr = s_video_buffer_write_ptr; u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder::Run( s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
Common::AtomicStore(fifo.CPReadPointer, readPtr); fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed);
Common::AtomicAdd(fifo.CPReadWriteDistance, static_cast<u32>(-32)); fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst);
if ((write_ptr - s_video_buffer_read_ptr) == 0) if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); {
fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed),
std::memory_order_relaxed);
}
CommandProcessor::SetCPStatusFromGPU(); CommandProcessor::SetCPStatusFromGPU();
@ -412,7 +415,8 @@ void GpuMaySleep()
bool AtBreakpoint() bool AtBreakpoint()
{ {
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo; CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint); return fifo.bFF_BPEnable && (fifo.CPReadPointer.load(std::memory_order_relaxed) ==
fifo.CPBreakpoint.load(std::memory_order_relaxed));
} }
void RunGpu() void RunGpu()
@ -442,12 +446,12 @@ static int RunGpuOnCpu(int ticks)
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo; CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
bool reset_simd_state = false; bool reset_simd_state = false;
int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load(); int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() && while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) &&
available_ticks >= 0) !AtBreakpoint() && available_ticks >= 0)
{ {
if (s_use_deterministic_gpu_thread) if (s_use_deterministic_gpu_thread)
{ {
ReadDataFromFifoOnCPU(fifo.CPReadPointer); ReadDataFromFifoOnCPU(fifo.CPReadPointer.load(std::memory_order_relaxed));
s_gpu_mainloop.Wakeup(); s_gpu_mainloop.Wakeup();
} }
else else
@ -458,19 +462,25 @@ static int RunGpuOnCpu(int ticks)
FPURoundMode::LoadDefaultSIMDState(); FPURoundMode::LoadDefaultSIMDState();
reset_simd_state = true; reset_simd_state = true;
} }
ReadDataFromFifo(fifo.CPReadPointer); ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed));
u32 cycles = 0; u32 cycles = 0;
s_video_buffer_read_ptr = OpcodeDecoder::Run( s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false); DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
available_ticks -= cycles; available_ticks -= cycles;
} }
if (fifo.CPReadPointer == fifo.CPEnd) if (fifo.CPReadPointer.load(std::memory_order_relaxed) ==
fifo.CPReadPointer = fifo.CPBase; fifo.CPEnd.load(std::memory_order_relaxed))
{
fifo.CPReadPointer.store(fifo.CPBase.load(std::memory_order_relaxed),
std::memory_order_relaxed);
}
else else
fifo.CPReadPointer += 32; {
fifo.CPReadPointer.fetch_add(32, std::memory_order_relaxed);
}
fifo.CPReadWriteDistance -= 32; fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_relaxed);
} }
CommandProcessor::SetCPStatusFromGPU(); CommandProcessor::SetCPStatusFromGPU();

View File

@ -889,7 +889,9 @@ void Renderer::CheckFifoRecording()
RecordVideoMemory(); RecordVideoMemory();
} }
FifoRecorder::GetInstance().EndFrame(CommandProcessor::fifo.CPBase, CommandProcessor::fifo.CPEnd); FifoRecorder::GetInstance().EndFrame(
CommandProcessor::fifo.CPBase.load(std::memory_order_relaxed),
CommandProcessor::fifo.CPEnd.load(std::memory_order_relaxed));
} }
void Renderer::RecordVideoMemory() void Renderer::RecordVideoMemory()