Remove Atomic.h
The STL has everything we need nowadays. I have tried to not alter any behavior or semantics with this change wherever possible. In particular, WriteLow and WriteHigh in CommandProcessor retain the ability to accidentally undo another thread's write to the upper half or lower half respectively. If that should be fixed, it should be done in a separate commit for clarity. One thing did change: The places where we were using += on a volatile variable (not an atomic operation) are now using fetch_add (actually an atomic operation). Tested with single core and dual core on x86-64 and AArch64.
This commit is contained in:
parent
80ac36a712
commit
b93983b50a
|
@ -1,16 +0,0 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include "Common/Atomic_Win32.h" // IWYU pragma: export
|
||||
|
||||
#else
|
||||
|
||||
// GCC-compatible compiler assumed!
|
||||
#include "Common/Atomic_GCC.h" // IWYU pragma: export
|
||||
|
||||
#endif
|
|
@ -1,86 +0,0 @@
|
|||
// Copyright 2009 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// IWYU pragma: private, include "Common/Atomic.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
// Atomic operations are performed in a single step by the CPU. It is
|
||||
// impossible for other threads to see the operation "half-done."
|
||||
//
|
||||
// Some atomic operations can be combined with different types of memory
|
||||
// barriers called "Acquire semantics" and "Release semantics", defined below.
|
||||
//
|
||||
// Acquire semantics: Future memory accesses cannot be relocated to before the
|
||||
// operation.
|
||||
//
|
||||
// Release semantics: Past memory accesses cannot be relocated to after the
|
||||
// operation.
|
||||
//
|
||||
// These barriers affect not only the compiler, but also the CPU.
|
||||
|
||||
namespace Common
|
||||
{
|
||||
inline void AtomicAdd(volatile u32& target, u32 value)
|
||||
{
|
||||
__sync_add_and_fetch(&target, value);
|
||||
}
|
||||
|
||||
inline void AtomicAnd(volatile u32& target, u32 value)
|
||||
{
|
||||
__sync_and_and_fetch(&target, value);
|
||||
}
|
||||
|
||||
inline void AtomicDecrement(volatile u32& target)
|
||||
{
|
||||
__sync_add_and_fetch(&target, -1);
|
||||
}
|
||||
|
||||
inline void AtomicIncrement(volatile u32& target)
|
||||
{
|
||||
__sync_add_and_fetch(&target, 1);
|
||||
}
|
||||
|
||||
inline void AtomicOr(volatile u32& target, u32 value)
|
||||
{
|
||||
__sync_or_and_fetch(&target, value);
|
||||
}
|
||||
|
||||
#ifndef __ATOMIC_RELAXED
|
||||
#error __ATOMIC_RELAXED not defined; your compiler version is too old.
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
inline T AtomicLoad(volatile T& src)
|
||||
{
|
||||
return __atomic_load_n(&src, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T AtomicLoadAcquire(volatile T& src)
|
||||
{
|
||||
return __atomic_load_n(&src, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
inline void AtomicStore(volatile T& dest, U value)
|
||||
{
|
||||
__atomic_store_n(&dest, value, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
inline void AtomicStoreRelease(volatile T& dest, U value)
|
||||
{
|
||||
__atomic_store_n(&dest, value, __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
inline T* AtomicExchangeAcquire(T* volatile& loc, U newval)
|
||||
{
|
||||
return __atomic_exchange_n(&loc, newval, __ATOMIC_ACQ_REL);
|
||||
}
|
||||
} // namespace Common
|
|
@ -1,94 +0,0 @@
|
|||
// Copyright 2009 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// IWYU pragma: private, include "Common/Atomic.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
#include <atomic>
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
// Atomic operations are performed in a single step by the CPU. It is
|
||||
// impossible for other threads to see the operation "half-done."
|
||||
//
|
||||
// Some atomic operations can be combined with different types of memory
|
||||
// barriers called "Acquire semantics" and "Release semantics", defined below.
|
||||
//
|
||||
// Acquire semantics: Future memory accesses cannot be relocated to before the
|
||||
// operation.
|
||||
//
|
||||
// Release semantics: Past memory accesses cannot be relocated to after the
|
||||
// operation.
|
||||
//
|
||||
// These barriers affect not only the compiler, but also the CPU.
|
||||
//
|
||||
// NOTE: Acquire and Release are not differentiated right now. They perform a
|
||||
// full memory barrier instead of a "one-way" memory barrier. The newest
|
||||
// Windows SDK has Acquire and Release versions of some Interlocked* functions.
|
||||
|
||||
namespace Common
|
||||
{
|
||||
inline void AtomicAdd(volatile u32& target, u32 value)
|
||||
{
|
||||
_InterlockedExchangeAdd((volatile LONG*)&target, (LONG)value);
|
||||
}
|
||||
|
||||
inline void AtomicAnd(volatile u32& target, u32 value)
|
||||
{
|
||||
_InterlockedAnd((volatile LONG*)&target, (LONG)value);
|
||||
}
|
||||
|
||||
inline void AtomicIncrement(volatile u32& target)
|
||||
{
|
||||
_InterlockedIncrement((volatile LONG*)&target);
|
||||
}
|
||||
|
||||
inline void AtomicDecrement(volatile u32& target)
|
||||
{
|
||||
_InterlockedDecrement((volatile LONG*)&target);
|
||||
}
|
||||
|
||||
inline void AtomicOr(volatile u32& target, u32 value)
|
||||
{
|
||||
_InterlockedOr((volatile LONG*)&target, (LONG)value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T AtomicLoad(volatile T& src)
|
||||
{
|
||||
return src; // 32-bit reads are always atomic.
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T AtomicLoadAcquire(volatile T& src)
|
||||
{
|
||||
// 32-bit reads are always atomic.
|
||||
T result = src;
|
||||
// Compiler instruction only. x86 loads always have acquire semantics.
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
inline void AtomicStore(volatile T& dest, U value)
|
||||
{
|
||||
dest = (T)value; // 32-bit writes are always atomic.
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
inline void AtomicStoreRelease(volatile T& dest, U value)
|
||||
{
|
||||
// Compiler instruction only. x86 stores always have release semantics.
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
dest = (T)value; // 32-bit writes are always atomic.
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
inline T* AtomicExchangeAcquire(T* volatile& loc, U newval)
|
||||
{
|
||||
return (T*)_InterlockedExchangePointer_acq((void* volatile*)&loc, (void*)newval);
|
||||
}
|
||||
} // namespace Common
|
|
@ -2,7 +2,6 @@ add_library(common
|
|||
Analytics.cpp
|
||||
Analytics.h
|
||||
Assert.h
|
||||
Atomic.h
|
||||
BitField.h
|
||||
BitSet.h
|
||||
BitUtils.h
|
||||
|
|
|
@ -221,10 +221,10 @@ public:
|
|||
template <typename T>
|
||||
void Do(std::atomic<T>& atomic)
|
||||
{
|
||||
T temp = atomic.load();
|
||||
T temp = atomic.load(std::memory_order_relaxed);
|
||||
Do(temp);
|
||||
if (mode == MODE_READ)
|
||||
atomic.store(temp);
|
||||
atomic.store(temp, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -5,11 +5,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/BitUtils.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/HW/MMIOHandlers.h"
|
||||
|
@ -79,17 +81,19 @@ inline u16* LowPart(u32* ptr)
|
|||
{
|
||||
return (u16*)ptr;
|
||||
}
|
||||
inline u16* LowPart(volatile u32* ptr)
|
||||
inline u16* LowPart(std::atomic<u32>* ptr)
|
||||
{
|
||||
return (u16*)ptr;
|
||||
static_assert(std::atomic<u32>::is_always_lock_free && sizeof(std::atomic<u32>) == sizeof(u32));
|
||||
return LowPart(Common::BitCast<u32*>(ptr));
|
||||
}
|
||||
inline u16* HighPart(u32* ptr)
|
||||
{
|
||||
return LowPart(ptr) + 1;
|
||||
}
|
||||
inline u16* HighPart(volatile u32* ptr)
|
||||
inline u16* HighPart(std::atomic<u32>* ptr)
|
||||
{
|
||||
return LowPart(ptr) + 1;
|
||||
static_assert(std::atomic<u32>::is_always_lock_free && sizeof(std::atomic<u32>) == sizeof(u32));
|
||||
return HighPart(Common::BitCast<u32*>(ptr));
|
||||
}
|
||||
} // namespace Utils
|
||||
|
||||
|
|
|
@ -49,7 +49,6 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule:
|
|||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "Common/Atomic.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Logging/Log.h"
|
||||
#include "Common/Thread.h"
|
||||
|
|
|
@ -16,9 +16,6 @@
|
|||
<ClInclude Include="Common\Align.h" />
|
||||
<ClInclude Include="Common\Analytics.h" />
|
||||
<ClInclude Include="Common\Assert.h" />
|
||||
<ClInclude Include="Common\Atomic_GCC.h" />
|
||||
<ClInclude Include="Common\Atomic_Win32.h" />
|
||||
<ClInclude Include="Common\Atomic.h" />
|
||||
<ClInclude Include="Common\BitField.h" />
|
||||
<ClInclude Include="Common\BitSet.h" />
|
||||
<ClInclude Include="Common\BitUtils.h" />
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "Common/Atomic.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/GL/GLContext.h"
|
||||
#include "Common/GL/GLUtil.h"
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
#include <cstring>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/Atomic.h"
|
||||
#include "Common/ChunkFile.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Flag.h"
|
||||
|
@ -91,21 +90,15 @@ void DoState(PointerWrap& p)
|
|||
p.Do(s_interrupt_waiting);
|
||||
}
|
||||
|
||||
static inline void WriteLow(volatile u32& _reg, u16 lowbits)
|
||||
static inline void WriteLow(std::atomic<u32>& reg, u16 lowbits)
|
||||
{
|
||||
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits);
|
||||
reg.store((reg.load(std::memory_order_relaxed) & 0xFFFF0000) | lowbits,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
static inline void WriteHigh(volatile u32& _reg, u16 highbits)
|
||||
static inline void WriteHigh(std::atomic<u32>& reg, u16 highbits)
|
||||
{
|
||||
Common::AtomicStore(_reg, (_reg & 0x0000FFFF) | ((u32)highbits << 16));
|
||||
}
|
||||
static inline u16 ReadLow(u32 _reg)
|
||||
{
|
||||
return (u16)(_reg & 0xFFFF);
|
||||
}
|
||||
static inline u16 ReadHigh(u32 _reg)
|
||||
{
|
||||
return (u16)(_reg >> 16);
|
||||
reg.store((reg.load(std::memory_order_relaxed) & 0x0000FFFF) | (static_cast<u32>(highbits) << 16),
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void Init()
|
||||
|
@ -259,31 +252,50 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
mmio->Register(base | PERF_SELECT, MMIO::InvalidRead<u16>(), MMIO::Nop<u16>());
|
||||
|
||||
// Some MMIOs have different handlers for single core vs. dual core mode.
|
||||
mmio->Register(base | FIFO_RW_DISTANCE_LO,
|
||||
mmio->Register(
|
||||
base | FIFO_RW_DISTANCE_LO,
|
||||
IsOnThread() ? MMIO::ComplexRead<u16>([](u32) {
|
||||
if (fifo.CPWritePointer.load(std::memory_order_relaxed) >=
|
||||
fifo.SafeCPReadPointer.load(std::memory_order_relaxed))
|
||||
{
|
||||
return static_cast<u16>(fifo.CPWritePointer.load(std::memory_order_relaxed) -
|
||||
fifo.SafeCPReadPointer.load(std::memory_order_relaxed));
|
||||
}
|
||||
else
|
||||
{
|
||||
return static_cast<u16>(fifo.CPEnd.load(std::memory_order_relaxed) -
|
||||
fifo.SafeCPReadPointer.load(std::memory_order_relaxed) +
|
||||
fifo.CPWritePointer.load(std::memory_order_relaxed) -
|
||||
fifo.CPBase.load(std::memory_order_relaxed) + 32);
|
||||
}
|
||||
}) :
|
||||
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)),
|
||||
MMIO::DirectWrite<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance),
|
||||
WMASK_LO_ALIGN_32BIT));
|
||||
mmio->Register(base | FIFO_RW_DISTANCE_HI,
|
||||
IsOnThread() ?
|
||||
MMIO::ComplexRead<u16>([](u32) {
|
||||
if (fifo.CPWritePointer >= fifo.SafeCPReadPointer)
|
||||
return ReadLow(fifo.CPWritePointer - fifo.SafeCPReadPointer);
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
if (fifo.CPWritePointer.load(std::memory_order_relaxed) >=
|
||||
fifo.SafeCPReadPointer.load(std::memory_order_relaxed))
|
||||
{
|
||||
return (fifo.CPWritePointer.load(std::memory_order_relaxed) -
|
||||
fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) >>
|
||||
16;
|
||||
}
|
||||
else
|
||||
return ReadLow(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer -
|
||||
fifo.CPBase + 32);
|
||||
{
|
||||
return (fifo.CPEnd.load(std::memory_order_relaxed) -
|
||||
fifo.SafeCPReadPointer.load(std::memory_order_relaxed) +
|
||||
fifo.CPWritePointer.load(std::memory_order_relaxed) -
|
||||
fifo.CPBase.load(std::memory_order_relaxed) + 32) >>
|
||||
16;
|
||||
}
|
||||
}) :
|
||||
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)),
|
||||
MMIO::DirectWrite<u16>(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance),
|
||||
WMASK_LO_ALIGN_32BIT));
|
||||
mmio->Register(base | FIFO_RW_DISTANCE_HI,
|
||||
IsOnThread() ? MMIO::ComplexRead<u16>([](u32) {
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
if (fifo.CPWritePointer >= fifo.SafeCPReadPointer)
|
||||
return ReadHigh(fifo.CPWritePointer - fifo.SafeCPReadPointer);
|
||||
else
|
||||
return ReadHigh(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer -
|
||||
fifo.CPBase + 32);
|
||||
}) :
|
||||
MMIO::ComplexRead<u16>([](u32) {
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
return ReadHigh(fifo.CPReadWriteDistance);
|
||||
}),
|
||||
MMIO::ComplexRead<u16>([](u32) {
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
return fifo.CPReadWriteDistance.load(std::memory_order_relaxed) >> 16;
|
||||
}),
|
||||
MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) {
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
WriteHigh(fifo.CPReadWriteDistance, val & WMASK_HI_RESTRICT);
|
||||
|
@ -297,16 +309,17 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
mmio->Register(base | FIFO_READ_POINTER_HI,
|
||||
IsOnThread() ? MMIO::ComplexRead<u16>([](u32) {
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
return ReadHigh(fifo.SafeCPReadPointer);
|
||||
return fifo.SafeCPReadPointer.load(std::memory_order_relaxed) >> 16;
|
||||
}) :
|
||||
MMIO::ComplexRead<u16>([](u32) {
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
return ReadHigh(fifo.CPReadPointer);
|
||||
return fifo.CPReadPointer.load(std::memory_order_relaxed) >> 16;
|
||||
}),
|
||||
IsOnThread() ? MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) {
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT);
|
||||
fifo.SafeCPReadPointer = fifo.CPReadPointer;
|
||||
fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed),
|
||||
std::memory_order_relaxed);
|
||||
}) :
|
||||
MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](u32, u16 val) {
|
||||
Fifo::SyncGPUForRegisterAccess();
|
||||
|
@ -325,8 +338,9 @@ void GatherPipeBursted()
|
|||
{
|
||||
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
|
||||
// Fix Pokemon XD in DC mode.
|
||||
if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd) &&
|
||||
(ProcessorInterface::Fifo_CPUBase == fifo.CPBase) && fifo.CPReadWriteDistance > 0)
|
||||
if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd.load(std::memory_order_relaxed)) &&
|
||||
(ProcessorInterface::Fifo_CPUBase == fifo.CPBase.load(std::memory_order_relaxed)) &&
|
||||
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > 0)
|
||||
{
|
||||
Fifo::FlushGpu();
|
||||
}
|
||||
|
@ -336,35 +350,47 @@ void GatherPipeBursted()
|
|||
}
|
||||
|
||||
// update the fifo pointer
|
||||
if (fifo.CPWritePointer == fifo.CPEnd)
|
||||
fifo.CPWritePointer = fifo.CPBase;
|
||||
if (fifo.CPWritePointer.load(std::memory_order_relaxed) ==
|
||||
fifo.CPEnd.load(std::memory_order_relaxed))
|
||||
{
|
||||
fifo.CPWritePointer.store(fifo.CPBase, std::memory_order_relaxed);
|
||||
}
|
||||
else
|
||||
fifo.CPWritePointer += GATHER_PIPE_SIZE;
|
||||
{
|
||||
fifo.CPWritePointer.fetch_add(GATHER_PIPE_SIZE, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
if (m_CPCtrlReg.GPReadEnable && m_CPCtrlReg.GPLinkEnable)
|
||||
{
|
||||
ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer;
|
||||
ProcessorInterface::Fifo_CPUBase = fifo.CPBase;
|
||||
ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd;
|
||||
ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer.load(std::memory_order_relaxed);
|
||||
ProcessorInterface::Fifo_CPUBase = fifo.CPBase.load(std::memory_order_relaxed);
|
||||
ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// If the game is running close to overflowing, make the exception checking more frequent.
|
||||
if (fifo.bFF_HiWatermark)
|
||||
CoreTiming::ForceExceptionCheck(0);
|
||||
|
||||
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
|
||||
fifo.CPReadWriteDistance.fetch_add(GATHER_PIPE_SIZE, std::memory_order_seq_cst);
|
||||
|
||||
Fifo::RunGpu();
|
||||
|
||||
ASSERT_MSG(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
|
||||
ASSERT_MSG(COMMANDPROCESSOR,
|
||||
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) <=
|
||||
fifo.CPEnd.load(std::memory_order_relaxed) -
|
||||
fifo.CPBase.load(std::memory_order_relaxed),
|
||||
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
|
||||
|
||||
// check if we are in sync
|
||||
ASSERT_MSG(COMMANDPROCESSOR, fifo.CPWritePointer == ProcessorInterface::Fifo_CPUWritePointer,
|
||||
ASSERT_MSG(COMMANDPROCESSOR,
|
||||
fifo.CPWritePointer.load(std::memory_order_relaxed) ==
|
||||
ProcessorInterface::Fifo_CPUWritePointer,
|
||||
"FIFOs linked but out of sync");
|
||||
ASSERT_MSG(COMMANDPROCESSOR, fifo.CPBase == ProcessorInterface::Fifo_CPUBase,
|
||||
ASSERT_MSG(COMMANDPROCESSOR,
|
||||
fifo.CPBase.load(std::memory_order_relaxed) == ProcessorInterface::Fifo_CPUBase,
|
||||
"FIFOs linked but out of sync");
|
||||
ASSERT_MSG(COMMANDPROCESSOR, fifo.CPEnd == ProcessorInterface::Fifo_CPUEnd,
|
||||
ASSERT_MSG(COMMANDPROCESSOR,
|
||||
fifo.CPEnd.load(std::memory_order_relaxed) == ProcessorInterface::Fifo_CPUEnd,
|
||||
"FIFOs linked but out of sync");
|
||||
}
|
||||
|
||||
|
@ -403,31 +429,41 @@ void SetCPStatusFromGPU()
|
|||
// breakpoint
|
||||
if (fifo.bFF_BPEnable)
|
||||
{
|
||||
if (fifo.CPBreakpoint == fifo.CPReadPointer)
|
||||
if (fifo.CPBreakpoint.load(std::memory_order_relaxed) ==
|
||||
fifo.CPReadPointer.load(std::memory_order_relaxed))
|
||||
{
|
||||
if (!fifo.bFF_Breakpoint)
|
||||
{
|
||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}", fifo.CPReadPointer);
|
||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}",
|
||||
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
||||
fifo.bFF_Breakpoint = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fifo.bFF_Breakpoint)
|
||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", fifo.CPReadPointer);
|
||||
{
|
||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
|
||||
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
||||
}
|
||||
fifo.bFF_Breakpoint = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fifo.bFF_Breakpoint)
|
||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", fifo.CPReadPointer);
|
||||
{
|
||||
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
|
||||
fifo.CPReadPointer.load(std::memory_order_relaxed));
|
||||
}
|
||||
fifo.bFF_Breakpoint = false;
|
||||
}
|
||||
|
||||
// overflow & underflow check
|
||||
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark);
|
||||
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark);
|
||||
fifo.bFF_HiWatermark =
|
||||
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark);
|
||||
fifo.bFF_LoWatermark =
|
||||
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark);
|
||||
|
||||
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
|
||||
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
|
||||
|
@ -457,8 +493,10 @@ void SetCPStatusFromGPU()
|
|||
void SetCPStatusFromCPU()
|
||||
{
|
||||
// overflow & underflow check
|
||||
fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark);
|
||||
fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark);
|
||||
fifo.bFF_HiWatermark =
|
||||
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark);
|
||||
fifo.bFF_LoWatermark =
|
||||
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark);
|
||||
|
||||
bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
|
||||
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
|
||||
|
@ -489,9 +527,11 @@ void SetCpStatusRegister()
|
|||
{
|
||||
// Here always there is one fifo attached to the GPU
|
||||
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint;
|
||||
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance || (fifo.CPReadPointer == fifo.CPWritePointer);
|
||||
m_CPStatusReg.CommandIdle =
|
||||
!fifo.CPReadWriteDistance || Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable;
|
||||
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
|
||||
(fifo.CPReadPointer.load(std::memory_order_relaxed) ==
|
||||
fifo.CPWritePointer.load(std::memory_order_relaxed));
|
||||
m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
|
||||
Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable;
|
||||
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark;
|
||||
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark;
|
||||
|
||||
|
@ -548,29 +588,32 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
|
|||
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");
|
||||
|
||||
{
|
||||
PanicAlertFmt("Illegal command {:02x}\n"
|
||||
"CPBase: {:#010x}\n"
|
||||
"CPEnd: {:#010x}\n"
|
||||
"CPHiWatermark: {:#010x}\n"
|
||||
"CPLoWatermark: {:#010x}\n"
|
||||
"CPReadWriteDistance: {:#010x}\n"
|
||||
"CPWritePointer: {:#010x}\n"
|
||||
"CPReadPointer: {:#010x}\n"
|
||||
"CPBreakpoint: {:#010x}\n"
|
||||
"bFF_GPReadEnable: {}\n"
|
||||
"bFF_BPEnable: {}\n"
|
||||
"bFF_BPInt: {}\n"
|
||||
"bFF_Breakpoint: {}\n"
|
||||
"bFF_GPLinkEnable: {}\n"
|
||||
"bFF_HiWatermarkInt: {}\n"
|
||||
"bFF_LoWatermarkInt: {}\n",
|
||||
cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark,
|
||||
fifo.CPReadWriteDistance, fifo.CPWritePointer, fifo.CPReadPointer,
|
||||
fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false",
|
||||
fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false",
|
||||
fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false",
|
||||
fifo.bFF_HiWatermarkInt ? "true" : "false",
|
||||
fifo.bFF_LoWatermarkInt ? "true" : "false");
|
||||
PanicAlertFmt(
|
||||
"Illegal command {:02x}\n"
|
||||
"CPBase: {:#010x}\n"
|
||||
"CPEnd: {:#010x}\n"
|
||||
"CPHiWatermark: {:#010x}\n"
|
||||
"CPLoWatermark: {:#010x}\n"
|
||||
"CPReadWriteDistance: {:#010x}\n"
|
||||
"CPWritePointer: {:#010x}\n"
|
||||
"CPReadPointer: {:#010x}\n"
|
||||
"CPBreakpoint: {:#010x}\n"
|
||||
"bFF_GPReadEnable: {}\n"
|
||||
"bFF_BPEnable: {}\n"
|
||||
"bFF_BPInt: {}\n"
|
||||
"bFF_Breakpoint: {}\n"
|
||||
"bFF_GPLinkEnable: {}\n"
|
||||
"bFF_HiWatermarkInt: {}\n"
|
||||
"bFF_LoWatermarkInt: {}\n",
|
||||
cmd_byte, fifo.CPBase.load(std::memory_order_relaxed),
|
||||
fifo.CPEnd.load(std::memory_order_relaxed), fifo.CPHiWatermark, fifo.CPLoWatermark,
|
||||
fifo.CPReadWriteDistance.load(std::memory_order_relaxed),
|
||||
fifo.CPWritePointer.load(std::memory_order_relaxed),
|
||||
fifo.CPReadPointer.load(std::memory_order_relaxed),
|
||||
fifo.CPBreakpoint.load(std::memory_order_relaxed), fifo.bFF_GPReadEnable ? "true" : "false",
|
||||
fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false",
|
||||
fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false",
|
||||
fifo.bFF_HiWatermarkInt ? "true" : "false", fifo.bFF_LoWatermarkInt ? "true" : "false");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
class PointerWrap;
|
||||
|
@ -17,15 +19,15 @@ namespace CommandProcessor
|
|||
struct SCPFifoStruct
|
||||
{
|
||||
// fifo registers
|
||||
volatile u32 CPBase;
|
||||
volatile u32 CPEnd;
|
||||
std::atomic<u32> CPBase;
|
||||
std::atomic<u32> CPEnd;
|
||||
u32 CPHiWatermark;
|
||||
u32 CPLoWatermark;
|
||||
volatile u32 CPReadWriteDistance;
|
||||
volatile u32 CPWritePointer;
|
||||
volatile u32 CPReadPointer;
|
||||
volatile u32 CPBreakpoint;
|
||||
volatile u32 SafeCPReadPointer;
|
||||
std::atomic<u32> CPReadWriteDistance;
|
||||
std::atomic<u32> CPWritePointer;
|
||||
std::atomic<u32> CPReadPointer;
|
||||
std::atomic<u32> CPBreakpoint;
|
||||
std::atomic<u32> SafeCPReadPointer;
|
||||
|
||||
volatile u32 bFF_GPLinkEnable;
|
||||
volatile u32 bFF_GPReadEnable;
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include <cstring>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/Atomic.h"
|
||||
#include "Common/BlockingLoop.h"
|
||||
#include "Common/ChunkFile.h"
|
||||
#include "Common/Event.h"
|
||||
|
@ -329,33 +328,37 @@ void RunGpuLoop()
|
|||
|
||||
// check if we are able to run this buffer
|
||||
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable &&
|
||||
fifo.CPReadWriteDistance && !AtBreakpoint())
|
||||
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint())
|
||||
{
|
||||
if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
|
||||
break;
|
||||
|
||||
u32 cyclesExecuted = 0;
|
||||
u32 readPtr = fifo.CPReadPointer;
|
||||
u32 readPtr = fifo.CPReadPointer.load(std::memory_order_relaxed);
|
||||
ReadDataFromFifo(readPtr);
|
||||
|
||||
if (readPtr == fifo.CPEnd)
|
||||
readPtr = fifo.CPBase;
|
||||
if (readPtr == fifo.CPEnd.load(std::memory_order_relaxed))
|
||||
readPtr = fifo.CPBase.load(std::memory_order_relaxed);
|
||||
else
|
||||
readPtr += 32;
|
||||
|
||||
ASSERT_MSG(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0,
|
||||
ASSERT_MSG(COMMANDPROCESSOR,
|
||||
(s32)fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32 >= 0,
|
||||
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce "
|
||||
"instability in the game. Please report it.",
|
||||
fifo.CPReadWriteDistance - 32);
|
||||
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32);
|
||||
|
||||
u8* write_ptr = s_video_buffer_write_ptr;
|
||||
s_video_buffer_read_ptr = OpcodeDecoder::Run(
|
||||
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
|
||||
|
||||
Common::AtomicStore(fifo.CPReadPointer, readPtr);
|
||||
Common::AtomicAdd(fifo.CPReadWriteDistance, static_cast<u32>(-32));
|
||||
fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed);
|
||||
fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst);
|
||||
if ((write_ptr - s_video_buffer_read_ptr) == 0)
|
||||
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
|
||||
{
|
||||
fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed),
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
|
@ -412,7 +415,8 @@ void GpuMaySleep()
|
|||
bool AtBreakpoint()
|
||||
{
|
||||
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
|
||||
return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint);
|
||||
return fifo.bFF_BPEnable && (fifo.CPReadPointer.load(std::memory_order_relaxed) ==
|
||||
fifo.CPBreakpoint.load(std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
void RunGpu()
|
||||
|
@ -442,12 +446,12 @@ static int RunGpuOnCpu(int ticks)
|
|||
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
|
||||
bool reset_simd_state = false;
|
||||
int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
|
||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() &&
|
||||
available_ticks >= 0)
|
||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) &&
|
||||
!AtBreakpoint() && available_ticks >= 0)
|
||||
{
|
||||
if (s_use_deterministic_gpu_thread)
|
||||
{
|
||||
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
|
||||
ReadDataFromFifoOnCPU(fifo.CPReadPointer.load(std::memory_order_relaxed));
|
||||
s_gpu_mainloop.Wakeup();
|
||||
}
|
||||
else
|
||||
|
@ -458,19 +462,25 @@ static int RunGpuOnCpu(int ticks)
|
|||
FPURoundMode::LoadDefaultSIMDState();
|
||||
reset_simd_state = true;
|
||||
}
|
||||
ReadDataFromFifo(fifo.CPReadPointer);
|
||||
ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed));
|
||||
u32 cycles = 0;
|
||||
s_video_buffer_read_ptr = OpcodeDecoder::Run(
|
||||
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
|
||||
available_ticks -= cycles;
|
||||
}
|
||||
|
||||
if (fifo.CPReadPointer == fifo.CPEnd)
|
||||
fifo.CPReadPointer = fifo.CPBase;
|
||||
if (fifo.CPReadPointer.load(std::memory_order_relaxed) ==
|
||||
fifo.CPEnd.load(std::memory_order_relaxed))
|
||||
{
|
||||
fifo.CPReadPointer.store(fifo.CPBase.load(std::memory_order_relaxed),
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
else
|
||||
fifo.CPReadPointer += 32;
|
||||
{
|
||||
fifo.CPReadPointer.fetch_add(32, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
fifo.CPReadWriteDistance -= 32;
|
||||
fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
|
|
@ -889,7 +889,9 @@ void Renderer::CheckFifoRecording()
|
|||
RecordVideoMemory();
|
||||
}
|
||||
|
||||
FifoRecorder::GetInstance().EndFrame(CommandProcessor::fifo.CPBase, CommandProcessor::fifo.CPEnd);
|
||||
FifoRecorder::GetInstance().EndFrame(
|
||||
CommandProcessor::fifo.CPBase.load(std::memory_order_relaxed),
|
||||
CommandProcessor::fifo.CPEnd.load(std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
void Renderer::RecordVideoMemory()
|
||||
|
|
Loading…
Reference in New Issue