mirror of https://github.com/PCSX2/pcsx2.git
Merge branch 'greg/mtvu-mtgs-lockless'
This commit is contained in:
commit
21612cafc1
|
@ -225,6 +225,7 @@ public:
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
#include "Pcsx2Defs.h"
|
#include "Pcsx2Defs.h"
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,11 @@
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include "System/SysThreads.h"
|
#include "System/SysThreads.h"
|
||||||
#include "Gif.h"
|
#include "Gif.h"
|
||||||
|
#include "GS.h"
|
||||||
|
|
||||||
|
// FIXME common path ?
|
||||||
|
#include "../plugins/GSdx/boost_spsc_queue.hpp"
|
||||||
|
|
||||||
struct GS_Packet;
|
struct GS_Packet;
|
||||||
extern void Gif_MTGS_Wait(bool isMTVU);
|
extern void Gif_MTGS_Wait(bool isMTVU);
|
||||||
extern void Gif_FinishIRQ();
|
extern void Gif_FinishIRQ();
|
||||||
|
@ -118,11 +123,13 @@ struct Gif_Tag {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GS_Packet {
|
struct GS_Packet {
|
||||||
|
// PERF note: this struct is copied various time in hot path. Don't add
|
||||||
|
// new field
|
||||||
|
|
||||||
u32 offset; // Path buffer offset for start of packet
|
u32 offset; // Path buffer offset for start of packet
|
||||||
u32 size; // Full size of GS-Packet
|
u32 size; // Full size of GS-Packet
|
||||||
s32 cycles; // EE Cycles taken to process this GS packet
|
s32 cycles; // EE Cycles taken to process this GS packet
|
||||||
s32 readAmount; // Dummy read-amount data needed for proper buffer calculations
|
s32 readAmount; // Dummy read-amount data needed for proper buffer calculations
|
||||||
bool done; // 0 = Incomplete, 1 = Complete
|
|
||||||
GS_Packet() { Reset(); }
|
GS_Packet() { Reset(); }
|
||||||
void Reset() { memzero(*this); }
|
void Reset() { memzero(*this); }
|
||||||
};
|
};
|
||||||
|
@ -147,12 +154,14 @@ static __fi void incTag(u32& offset, u32& size, u32 incAmount) {
|
||||||
struct Gif_Path_MTVU {
|
struct Gif_Path_MTVU {
|
||||||
u32 fakePackets; // Fake packets pending to be sent to MTGS
|
u32 fakePackets; // Fake packets pending to be sent to MTGS
|
||||||
GS_Packet fakePacket;
|
GS_Packet fakePacket;
|
||||||
Mutex gsPackMutex; // Used for atomic access to gsPackQueue
|
// Set a size based on MTGS but keep a factor 2 to avoid too waste to much
|
||||||
std::deque<GS_Packet> gsPackQueue; // VU1 programs' XGkick(s)
|
// memory overhead. Note the struct is instantied 3 times (for each gif
|
||||||
|
// path)
|
||||||
|
ringbuffer_base<GS_Packet, RingBufferSize / 2> gsPackQueue;
|
||||||
Gif_Path_MTVU() { Reset(); }
|
Gif_Path_MTVU() { Reset(); }
|
||||||
void Reset() { fakePackets = 0; gsPackQueue.clear();
|
void Reset() { fakePackets = 0;
|
||||||
|
gsPackQueue.reset();
|
||||||
fakePacket.Reset();
|
fakePacket.Reset();
|
||||||
fakePacket.done = 1; // Fake packets don't get processed by pcsx2
|
|
||||||
fakePacket.size =~0u; // Used to indicate that its a fake packet
|
fakePacket.size =~0u; // Used to indicate that its a fake packet
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -200,7 +209,7 @@ struct Gif_Path {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isMTVU() const { return !idx && THREAD_VU1; }
|
bool isMTVU() const { return !idx && THREAD_VU1; }
|
||||||
s32 getReadAmount() { return readAmount.load() + gsPack.readAmount; }
|
s32 getReadAmount() { return readAmount.load(std::memory_order_acquire) + gsPack.readAmount; }
|
||||||
bool hasDataRemaining() const { return curOffset < curSize; }
|
bool hasDataRemaining() const { return curOffset < curSize; }
|
||||||
bool isDone() const { return isMTVU() ? !mtvu.fakePackets : (!hasDataRemaining() && (state == GIF_PATH_IDLE || state == GIF_PATH_WAIT)); }
|
bool isDone() const { return isMTVU() ? !mtvu.fakePackets : (!hasDataRemaining() && (state == GIF_PATH_IDLE || state == GIF_PATH_WAIT)); }
|
||||||
|
|
||||||
|
@ -256,11 +265,12 @@ struct Gif_Path {
|
||||||
curSize += size;
|
curSize += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If completed a GS packet (with EOP) then returned GS_Packet.done = 1
|
// If completed a GS packet (with EOP) then set done to true
|
||||||
// MTVU: This function only should be called called on EE thread
|
// MTVU: This function only should be called called on EE thread
|
||||||
GS_Packet ExecuteGSPacket() {
|
GS_Packet ExecuteGSPacket(bool &done) {
|
||||||
if (mtvu.fakePackets) { // For MTVU mode...
|
if (mtvu.fakePackets) { // For MTVU mode...
|
||||||
mtvu.fakePackets--;
|
mtvu.fakePackets--;
|
||||||
|
done = true;
|
||||||
return mtvu.fakePacket;
|
return mtvu.fakePacket;
|
||||||
}
|
}
|
||||||
pxAssert(!isMTVU());
|
pxAssert(!isMTVU());
|
||||||
|
@ -309,8 +319,7 @@ struct Gif_Path {
|
||||||
|
|
||||||
if (gifTag.tag.EOP) {
|
if (gifTag.tag.EOP) {
|
||||||
GS_Packet t = gsPack;
|
GS_Packet t = gsPack;
|
||||||
t.done = 1;
|
done = true;
|
||||||
|
|
||||||
|
|
||||||
dmaRewind = 0;
|
dmaRewind = 0;
|
||||||
|
|
||||||
|
@ -380,22 +389,23 @@ struct Gif_Path {
|
||||||
|
|
||||||
// MTVU: Gets called after VU1 execution on MTVU thread
|
// MTVU: Gets called after VU1 execution on MTVU thread
|
||||||
void FinishGSPacketMTVU() {
|
void FinishGSPacketMTVU() {
|
||||||
if (1) {
|
// Performance note: fetch_add atomic operation might create some stall for atomic
|
||||||
ScopedLock lock(mtvu.gsPackMutex);
|
// operation in gsPack.push
|
||||||
readAmount.fetch_add(gsPack.size + gsPack.readAmount);
|
readAmount.fetch_add(gsPack.size + gsPack.readAmount, std::memory_order_acq_rel);
|
||||||
mtvu.gsPackQueue.push_back(gsPack);
|
while (!mtvu.gsPackQueue.push(gsPack))
|
||||||
}
|
;
|
||||||
|
|
||||||
gsPack.Reset();
|
gsPack.Reset();
|
||||||
gsPack.offset = curOffset;
|
gsPack.offset = curOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
// MTVU: Gets called by MTGS thread
|
// MTVU: Gets called by MTGS thread
|
||||||
GS_Packet GetGSPacketMTVU() {
|
GS_Packet GetGSPacketMTVU() {
|
||||||
ScopedLock lock(mtvu.gsPackMutex);
|
// FIXME is the error path useful ?
|
||||||
if (mtvu.gsPackQueue.size()) {
|
if (!mtvu.gsPackQueue.empty()) {
|
||||||
GS_Packet t = mtvu.gsPackQueue[0];
|
return mtvu.gsPackQueue.front();
|
||||||
return t; // XGkick GS packet(s)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Console.Error("MTVU: Expected gsPackQueue to have elements!");
|
Console.Error("MTVU: Expected gsPackQueue to have elements!");
|
||||||
pxAssert(0);
|
pxAssert(0);
|
||||||
return GS_Packet(); // gsPack.size will be 0
|
return GS_Packet(); // gsPack.size will be 0
|
||||||
|
@ -403,18 +413,13 @@ struct Gif_Path {
|
||||||
|
|
||||||
// MTVU: Gets called by MTGS thread
|
// MTVU: Gets called by MTGS thread
|
||||||
void PopGSPacketMTVU() {
|
void PopGSPacketMTVU() {
|
||||||
ScopedLock lock(mtvu.gsPackMutex);
|
mtvu.gsPackQueue.pop();
|
||||||
if (mtvu.gsPackQueue.size()) {
|
|
||||||
mtvu.gsPackQueue.pop_front();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MTVU: Returns the amount of pending
|
// MTVU: Returns the amount of pending
|
||||||
// GS Packets that MTGS hasn't yet processed
|
// GS Packets that MTGS hasn't yet processed
|
||||||
u32 GetPendingGSPackets() {
|
u32 GetPendingGSPackets() {
|
||||||
ScopedLock lock(mtvu.gsPackMutex);
|
return mtvu.gsPackQueue.size();
|
||||||
u32 t = mtvu.gsPackQueue.size();
|
|
||||||
return t;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -566,8 +571,9 @@ struct Gif_Unit {
|
||||||
for(;;) {
|
for(;;) {
|
||||||
if (stat.APATH) { // Some Transfer is happening
|
if (stat.APATH) { // Some Transfer is happening
|
||||||
Gif_Path& path = gifPath[stat.APATH-1];
|
Gif_Path& path = gifPath[stat.APATH-1];
|
||||||
GS_Packet gsPack = path.ExecuteGSPacket();
|
bool done = false;
|
||||||
if(!gsPack.done) {
|
GS_Packet gsPack = path.ExecuteGSPacket(done);
|
||||||
|
if(!done) {
|
||||||
if (stat.APATH == 3 && CanDoP3Slice() && !gsSIGNAL.queued) {
|
if (stat.APATH == 3 && CanDoP3Slice() && !gsSIGNAL.queued) {
|
||||||
if(!didPath3 && /*!Path3Masked() &&*/ checkPaths(1,1,0)) { // Path3 slicing
|
if(!didPath3 && /*!Path3Masked() &&*/ checkPaths(1,1,0)) { // Path3 slicing
|
||||||
didPath3 = true;
|
didPath3 = true;
|
||||||
|
|
|
@ -407,7 +407,7 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
u32 offset = tag.data[0];
|
u32 offset = tag.data[0];
|
||||||
u32 size = tag.data[1];
|
u32 size = tag.data[1];
|
||||||
if (offset != ~0u) GSgifTransfer((u32*)&path.buffer[offset], size/16);
|
if (offset != ~0u) GSgifTransfer((u32*)&path.buffer[offset], size/16);
|
||||||
path.readAmount.fetch_sub(size);
|
path.readAmount.fetch_sub(size, std::memory_order_acq_rel);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -421,7 +421,7 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
Gif_Path& path = gifUnit.gifPath[GIF_PATH_1];
|
Gif_Path& path = gifUnit.gifPath[GIF_PATH_1];
|
||||||
GS_Packet gsPack = path.GetGSPacketMTVU(); // Get vu1 program's xgkick packet(s)
|
GS_Packet gsPack = path.GetGSPacketMTVU(); // Get vu1 program's xgkick packet(s)
|
||||||
if (gsPack.size) GSgifTransfer((u32*)&path.buffer[gsPack.offset], gsPack.size/16);
|
if (gsPack.size) GSgifTransfer((u32*)&path.buffer[gsPack.offset], gsPack.size/16);
|
||||||
path.readAmount.fetch_sub(gsPack.size + gsPack.readAmount);
|
path.readAmount.fetch_sub(gsPack.size + gsPack.readAmount, std::memory_order_acq_rel);
|
||||||
path.PopGSPacketMTVU(); // Should be done last, for proper Gif_MTGS_Wait()
|
path.PopGSPacketMTVU(); // Should be done last, for proper Gif_MTGS_Wait()
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
121
pcsx2/MTVU.cpp
121
pcsx2/MTVU.cpp
|
@ -79,11 +79,12 @@ void VU_Thread::Reset()
|
||||||
{
|
{
|
||||||
ScopedLock lock(mtxBusy);
|
ScopedLock lock(mtxBusy);
|
||||||
|
|
||||||
write_offset = 0;
|
|
||||||
vuCycleIdx = 0;
|
vuCycleIdx = 0;
|
||||||
read_pos = 0;
|
|
||||||
isBusy = false;
|
isBusy = false;
|
||||||
write_pos = 0;
|
m_ato_write_pos = 0;
|
||||||
|
m_write_pos = 0;
|
||||||
|
m_ato_read_pos = 0;
|
||||||
|
m_read_pos = 0;
|
||||||
memzero(vif);
|
memzero(vif);
|
||||||
memzero(vifRegs);
|
memzero(vifRegs);
|
||||||
for (size_t i = 0; i < 4; ++i)
|
for (size_t i = 0; i < 4; ++i)
|
||||||
|
@ -102,7 +103,7 @@ void VU_Thread::ExecuteRingBuffer()
|
||||||
for(;;) {
|
for(;;) {
|
||||||
semaEvent.WaitWithoutYield();
|
semaEvent.WaitWithoutYield();
|
||||||
ScopedLockBool lock(mtxBusy, isBusy);
|
ScopedLockBool lock(mtxBusy, isBusy);
|
||||||
while (read_pos.load(std::memory_order_relaxed) != GetWritePos()) {
|
while (m_ato_read_pos.load(std::memory_order_relaxed) != GetWritePos()) {
|
||||||
u32 tag = Read();
|
u32 tag = Read();
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case MTVU_VU_EXECUTE: {
|
case MTVU_VU_EXECUTE: {
|
||||||
|
@ -110,11 +111,12 @@ void VU_Thread::ExecuteRingBuffer()
|
||||||
s32 addr = Read();
|
s32 addr = Read();
|
||||||
vifRegs.top = Read();
|
vifRegs.top = Read();
|
||||||
vifRegs.itop = Read();
|
vifRegs.itop = Read();
|
||||||
|
|
||||||
if (addr != -1) vuRegs.VI[REG_TPC].UL = addr;
|
if (addr != -1) vuRegs.VI[REG_TPC].UL = addr;
|
||||||
vuCPU->Execute(vu1RunCycles);
|
vuCPU->Execute(vu1RunCycles);
|
||||||
gifUnit.gifPath[GIF_PATH_1].FinishGSPacketMTVU();
|
gifUnit.gifPath[GIF_PATH_1].FinishGSPacketMTVU();
|
||||||
semaXGkick.Post(); // Tell MTGS a path1 packet is complete
|
semaXGkick.Post(); // Tell MTGS a path1 packet is complete
|
||||||
vuCycles[vuCycleIdx].store(vuRegs.cycle, std::memory_order_relaxed);
|
vuCycles[vuCycleIdx].store(vuRegs.cycle, std::memory_order_release);
|
||||||
vuCycleIdx = (vuCycleIdx + 1) & 3;
|
vuCycleIdx = (vuCycleIdx + 1) & 3;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -142,15 +144,17 @@ void VU_Thread::ExecuteRingBuffer()
|
||||||
Read(&vif.tag, vif_copy_size);
|
Read(&vif.tag, vif_copy_size);
|
||||||
ReadRegs(&vifRegs);
|
ReadRegs(&vifRegs);
|
||||||
u32 size = Read();
|
u32 size = Read();
|
||||||
MTVU_Unpack(&buffer[read_pos.load(std::memory_order_relaxed)], vifRegs);
|
MTVU_Unpack(&buffer[m_read_pos], vifRegs);
|
||||||
incReadPos(size_u32(size));
|
m_read_pos += size_u32(size);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MTVU_NULL_PACKET:
|
case MTVU_NULL_PACKET:
|
||||||
read_pos.store(0, std::memory_order_release);
|
m_read_pos = 0;
|
||||||
break;
|
break;
|
||||||
jNO_DEFAULT;
|
jNO_DEFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CommitReadPos();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -161,12 +165,16 @@ __ri void VU_Thread::WaitOnSize(s32 size)
|
||||||
{
|
{
|
||||||
for(;;) {
|
for(;;) {
|
||||||
s32 readPos = GetReadPos();
|
s32 readPos = GetReadPos();
|
||||||
if (readPos <= write_pos.load(std::memory_order_relaxed)) break; // MTVU is reading in back of write_pos
|
if (readPos <= m_write_pos) break; // MTVU is reading in back of write_pos
|
||||||
if (readPos > write_pos.load(std::memory_order_relaxed) + size) break; // Enough free front space
|
if (readPos > m_write_pos + size) break; // Enough free front space
|
||||||
if (1) { // Let MTVU run to free up buffer space
|
{ // Let MTVU run to free up buffer space
|
||||||
KickStart();
|
KickStart();
|
||||||
if (IsDevBuild) DevCon.WriteLn("WaitOnSize()");
|
if (IsDevBuild) DevCon.WriteLn("WaitOnSize()");
|
||||||
ScopedLock lock(mtxBusy);
|
// Locking might trigger a full flush of the ring buffer. Yield
|
||||||
|
// will be more aggressive, and only flush the minimal size.
|
||||||
|
// Performance will be smoother but it will consume extra CPU cycle
|
||||||
|
// on the EE thread (not an issue on 4 cores).
|
||||||
|
std::this_thread::yield();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -175,83 +183,87 @@ __ri void VU_Thread::WaitOnSize(s32 size)
|
||||||
// to write a continuous 'size * sizeof(u32)' bytes
|
// to write a continuous 'size * sizeof(u32)' bytes
|
||||||
void VU_Thread::ReserveSpace(s32 size)
|
void VU_Thread::ReserveSpace(s32 size)
|
||||||
{
|
{
|
||||||
pxAssert(write_pos < buffer_size);
|
pxAssert(m_write_pos < buffer_size);
|
||||||
pxAssert(size < buffer_size);
|
pxAssert(size < buffer_size);
|
||||||
pxAssert(size > 0);
|
pxAssert(size > 0);
|
||||||
pxAssert(write_offset == 0);
|
|
||||||
if (write_pos.load(std::memory_order_relaxed) + size > buffer_size) {
|
if (m_write_pos + size > (buffer_size - 1)) {
|
||||||
pxAssert(write_pos > 0);
|
|
||||||
WaitOnSize(1); // Size of MTVU_NULL_PACKET
|
WaitOnSize(1); // Size of MTVU_NULL_PACKET
|
||||||
Write(MTVU_NULL_PACKET);
|
Write(MTVU_NULL_PACKET);
|
||||||
write_offset = 0;
|
// Reset local write pointer/position
|
||||||
write_pos.store(0, std::memory_order_release);
|
m_write_pos = 0;
|
||||||
|
CommitWritePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
WaitOnSize(size);
|
WaitOnSize(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use this when reading read_pos from ee thread
|
// Use this when reading read_pos from ee thread
|
||||||
__fi s32 VU_Thread::GetReadPos()
|
__fi s32 VU_Thread::GetReadPos()
|
||||||
{
|
{
|
||||||
return read_pos.load(std::memory_order_acquire);
|
return m_ato_read_pos.load(std::memory_order_acquire);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use this when reading write_pos from vu thread
|
// Use this when reading write_pos from vu thread
|
||||||
__fi s32 VU_Thread::GetWritePos()
|
__fi s32 VU_Thread::GetWritePos()
|
||||||
{
|
{
|
||||||
return write_pos.load(std::memory_order_acquire);
|
return m_ato_write_pos.load(std::memory_order_acquire);
|
||||||
}
|
|
||||||
// Gets the effective write pointer after adding write_offset
|
|
||||||
__fi u32* VU_Thread::GetWritePtr()
|
|
||||||
{
|
|
||||||
return &buffer[(write_pos.load(std::memory_order_relaxed) + write_offset) & buffer_mask];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void VU_Thread::incReadPos(s32 offset)
|
// Gets the effective write pointer after
|
||||||
{ // Offset in u32 sizes
|
__fi u32* VU_Thread::GetWritePtr()
|
||||||
read_pos.store((read_pos.load(std::memory_order_relaxed) + offset) & buffer_mask, std::memory_order_release);
|
{
|
||||||
|
pxAssert(m_write_pos < buffer_size);
|
||||||
|
return &buffer[m_write_pos];
|
||||||
}
|
}
|
||||||
__fi void VU_Thread::incWritePos()
|
|
||||||
{ // Adds write_offset
|
__fi void VU_Thread::CommitWritePos()
|
||||||
s32 temp = (write_pos.load(std::memory_order_relaxed) + write_offset) & buffer_mask;
|
{
|
||||||
write_offset = 0;
|
m_ato_write_pos.store(m_write_pos, std::memory_order_release);
|
||||||
write_pos.store(temp, std::memory_order_release);
|
|
||||||
if (MTVU_ALWAYS_KICK) KickStart();
|
if (MTVU_ALWAYS_KICK) KickStart();
|
||||||
if (MTVU_SYNC_MODE) WaitVU();
|
if (MTVU_SYNC_MODE) WaitVU();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__fi void VU_Thread::CommitReadPos()
|
||||||
|
{
|
||||||
|
m_ato_read_pos.store(m_read_pos, std::memory_order_release);
|
||||||
|
}
|
||||||
|
|
||||||
__fi u32 VU_Thread::Read()
|
__fi u32 VU_Thread::Read()
|
||||||
{
|
{
|
||||||
u32 ret = buffer[read_pos.load(std::memory_order_relaxed)];
|
u32 ret = buffer[m_read_pos];
|
||||||
incReadPos(1);
|
m_read_pos++;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void VU_Thread::Read(void* dest, u32 size)
|
__fi void VU_Thread::Read(void* dest, u32 size)
|
||||||
{
|
{
|
||||||
memcpy(dest, &buffer[read_pos.load(std::memory_order_relaxed)], size);
|
memcpy(dest, &buffer[m_read_pos], size);
|
||||||
incReadPos(size_u32(size));
|
m_read_pos += size_u32(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void VU_Thread::ReadRegs(VIFregisters* dest)
|
__fi void VU_Thread::ReadRegs(VIFregisters* dest)
|
||||||
{
|
{
|
||||||
VIFregistersMTVU* src = (VIFregistersMTVU*)&buffer[read_pos.load(std::memory_order_relaxed)];
|
VIFregistersMTVU* src = (VIFregistersMTVU*)&buffer[m_read_pos];
|
||||||
dest->cycle = src->cycle;
|
dest->cycle = src->cycle;
|
||||||
dest->mode = src->mode;
|
dest->mode = src->mode;
|
||||||
dest->num = src->num;
|
dest->num = src->num;
|
||||||
dest->mask = src->mask;
|
dest->mask = src->mask;
|
||||||
dest->itop = src->itop;
|
dest->itop = src->itop;
|
||||||
dest->top = src->top;
|
dest->top = src->top;
|
||||||
incReadPos(size_u32(sizeof(VIFregistersMTVU)));
|
m_read_pos += size_u32(sizeof(VIFregistersMTVU));
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void VU_Thread::Write(u32 val)
|
__fi void VU_Thread::Write(u32 val)
|
||||||
{
|
{
|
||||||
GetWritePtr()[0] = val;
|
GetWritePtr()[0] = val;
|
||||||
write_offset += 1;
|
m_write_pos += 1;
|
||||||
}
|
}
|
||||||
__fi void VU_Thread::Write(void* src, u32 size)
|
__fi void VU_Thread::Write(void* src, u32 size)
|
||||||
{
|
{
|
||||||
memcpy(GetWritePtr(), src, size);
|
memcpy(GetWritePtr(), src, size);
|
||||||
write_offset += size_u32(size);
|
m_write_pos += size_u32(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void VU_Thread::WriteRegs(VIFregisters* src)
|
__fi void VU_Thread::WriteRegs(VIFregisters* src)
|
||||||
|
@ -263,28 +275,28 @@ __fi void VU_Thread::WriteRegs(VIFregisters* src)
|
||||||
dest->mask = src->mask;
|
dest->mask = src->mask;
|
||||||
dest->top = src->top;
|
dest->top = src->top;
|
||||||
dest->itop = src->itop;
|
dest->itop = src->itop;
|
||||||
write_offset += size_u32(sizeof(VIFregistersMTVU));
|
m_write_pos += size_u32(sizeof(VIFregistersMTVU));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns Average number of vu Cycles from last 4 runs
|
// Returns Average number of vu Cycles from last 4 runs
|
||||||
// Used for vu cycle stealing hack
|
// Used for vu cycle stealing hack
|
||||||
u32 VU_Thread::Get_vuCycles()
|
u32 VU_Thread::Get_vuCycles()
|
||||||
{
|
{
|
||||||
return (vuCycles[0].load(std::memory_order_relaxed) +
|
return (vuCycles[0].load(std::memory_order_acquire) +
|
||||||
vuCycles[1].load(std::memory_order_relaxed) +
|
vuCycles[1].load(std::memory_order_acquire) +
|
||||||
vuCycles[2].load(std::memory_order_relaxed) +
|
vuCycles[2].load(std::memory_order_acquire) +
|
||||||
vuCycles[3].load(std::memory_order_relaxed)) >> 2;
|
vuCycles[3].load(std::memory_order_acquire)) >> 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VU_Thread::KickStart(bool forceKick)
|
void VU_Thread::KickStart(bool forceKick)
|
||||||
{
|
{
|
||||||
if ((forceKick && !semaEvent.Count())
|
if ((forceKick && !semaEvent.Count())
|
||||||
|| (!isBusy.load(std::memory_order_relaxed) && GetReadPos() != write_pos.load(std::memory_order_relaxed))) semaEvent.Post();
|
|| (!isBusy.load(std::memory_order_acquire) && GetReadPos() != m_ato_write_pos.load(std::memory_order_relaxed))) semaEvent.Post();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VU_Thread::IsDone()
|
bool VU_Thread::IsDone()
|
||||||
{
|
{
|
||||||
return !isBusy.load(std::memory_order_relaxed) && GetReadPos() == GetWritePos();
|
return GetReadPos() == GetWritePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VU_Thread::WaitVU()
|
void VU_Thread::WaitVU()
|
||||||
|
@ -295,6 +307,7 @@ void VU_Thread::WaitVU()
|
||||||
//DevCon.WriteLn("WaitVU()");
|
//DevCon.WriteLn("WaitVU()");
|
||||||
pxAssert(THREAD_VU1);
|
pxAssert(THREAD_VU1);
|
||||||
KickStart();
|
KickStart();
|
||||||
|
std::this_thread::yield(); // Give a chance to the MTVU thread to actually start
|
||||||
ScopedLock lock(mtxBusy);
|
ScopedLock lock(mtxBusy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -307,7 +320,7 @@ void VU_Thread::ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop)
|
||||||
Write(vu_addr);
|
Write(vu_addr);
|
||||||
Write(vif_top);
|
Write(vif_top);
|
||||||
Write(vif_itop);
|
Write(vif_itop);
|
||||||
incWritePos();
|
CommitWritePos();
|
||||||
gifUnit.TransferGSPacketData(GIF_TRANS_MTVU, NULL, 0);
|
gifUnit.TransferGSPacketData(GIF_TRANS_MTVU, NULL, 0);
|
||||||
KickStart();
|
KickStart();
|
||||||
u32 cycles = std::min(Get_vuCycles(), 3000u);
|
u32 cycles = std::min(Get_vuCycles(), 3000u);
|
||||||
|
@ -324,7 +337,7 @@ void VU_Thread::VifUnpack(vifStruct& _vif, VIFregisters& _vifRegs, u8* data, u32
|
||||||
WriteRegs(&_vifRegs);
|
WriteRegs(&_vifRegs);
|
||||||
Write(size);
|
Write(size);
|
||||||
Write(data, size);
|
Write(data, size);
|
||||||
incWritePos();
|
CommitWritePos();
|
||||||
KickStart();
|
KickStart();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -336,7 +349,7 @@ void VU_Thread::WriteMicroMem(u32 vu_micro_addr, void* data, u32 size)
|
||||||
Write(vu_micro_addr);
|
Write(vu_micro_addr);
|
||||||
Write(size);
|
Write(size);
|
||||||
Write(data, size);
|
Write(data, size);
|
||||||
incWritePos();
|
CommitWritePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VU_Thread::WriteDataMem(u32 vu_data_addr, void* data, u32 size)
|
void VU_Thread::WriteDataMem(u32 vu_data_addr, void* data, u32 size)
|
||||||
|
@ -347,7 +360,7 @@ void VU_Thread::WriteDataMem(u32 vu_data_addr, void* data, u32 size)
|
||||||
Write(vu_data_addr);
|
Write(vu_data_addr);
|
||||||
Write(size);
|
Write(size);
|
||||||
Write(data, size);
|
Write(data, size);
|
||||||
incWritePos();
|
CommitWritePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VU_Thread::WriteCol(vifStruct& _vif)
|
void VU_Thread::WriteCol(vifStruct& _vif)
|
||||||
|
@ -356,7 +369,7 @@ void VU_Thread::WriteCol(vifStruct& _vif)
|
||||||
ReserveSpace(1 + size_u32(sizeof(_vif.MaskCol)));
|
ReserveSpace(1 + size_u32(sizeof(_vif.MaskCol)));
|
||||||
Write(MTVU_VIF_WRITE_COL);
|
Write(MTVU_VIF_WRITE_COL);
|
||||||
Write(&_vif.MaskCol, sizeof(_vif.MaskCol));
|
Write(&_vif.MaskCol, sizeof(_vif.MaskCol));
|
||||||
incWritePos();
|
CommitWritePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VU_Thread::WriteRow(vifStruct& _vif)
|
void VU_Thread::WriteRow(vifStruct& _vif)
|
||||||
|
@ -365,5 +378,5 @@ void VU_Thread::WriteRow(vifStruct& _vif)
|
||||||
ReserveSpace(1 + size_u32(sizeof(_vif.MaskRow)));
|
ReserveSpace(1 + size_u32(sizeof(_vif.MaskRow)));
|
||||||
Write(MTVU_VIF_WRITE_ROW);
|
Write(MTVU_VIF_WRITE_ROW);
|
||||||
Write(&_vif.MaskRow, sizeof(_vif.MaskRow));
|
Write(&_vif.MaskRow, sizeof(_vif.MaskRow));
|
||||||
incWritePos();
|
CommitWritePos();
|
||||||
}
|
}
|
||||||
|
|
27
pcsx2/MTVU.h
27
pcsx2/MTVU.h
|
@ -28,16 +28,18 @@
|
||||||
// - ring-buffer has no complete pending packets when read_pos==write_pos
|
// - ring-buffer has no complete pending packets when read_pos==write_pos
|
||||||
class VU_Thread : public pxThread {
|
class VU_Thread : public pxThread {
|
||||||
static const s32 buffer_size = (_1mb * 16) / sizeof(s32);
|
static const s32 buffer_size = (_1mb * 16) / sizeof(s32);
|
||||||
static const u32 buffer_mask = buffer_size - 1;
|
|
||||||
__aligned(4) u32 buffer[buffer_size];
|
u32 buffer[buffer_size];
|
||||||
std::atomic<int> read_pos; // Only modified by VU thread
|
// Note: keep atomic on separate cache line to avoid CPU conflict
|
||||||
std::atomic<bool> isBusy; // Is thread processing data?
|
__aligned(64) std::atomic<bool> isBusy; // Is thread processing data?
|
||||||
std::atomic<int> write_pos; // Only modified by EE thread
|
__aligned(64) std::atomic<int> m_ato_read_pos; // Only modified by VU thread
|
||||||
__aligned(4) s32 write_offset; // Only modified by EE thread
|
__aligned(64) std::atomic<int> m_ato_write_pos; // Only modified by EE thread
|
||||||
__aligned(4) Mutex mtxBusy;
|
__aligned(64) int m_read_pos; // temporary read pos (local to the VU thread)
|
||||||
__aligned(4) Semaphore semaEvent;
|
int m_write_pos; // temporary write pos (local to the EE thread)
|
||||||
__aligned(4) BaseVUmicroCPU*& vuCPU;
|
Mutex mtxBusy;
|
||||||
__aligned(4) VURegs& vuRegs;
|
Semaphore semaEvent;
|
||||||
|
BaseVUmicroCPU*& vuCPU;
|
||||||
|
VURegs& vuRegs;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
__aligned16 vifStruct vif;
|
__aligned16 vifStruct vif;
|
||||||
|
@ -85,10 +87,11 @@ private:
|
||||||
|
|
||||||
s32 GetReadPos();
|
s32 GetReadPos();
|
||||||
s32 GetWritePos();
|
s32 GetWritePos();
|
||||||
|
|
||||||
u32* GetWritePtr();
|
u32* GetWritePtr();
|
||||||
|
|
||||||
void incReadPos(s32 offset);
|
void CommitWritePos();
|
||||||
void incWritePos();
|
void CommitReadPos();
|
||||||
|
|
||||||
u32 Read();
|
u32 Read();
|
||||||
void Read(void* dest, u32 size);
|
void Read(void* dest, u32 size);
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
// the lower 16 bit value. IF the change is breaking of all compatibility with old
|
// the lower 16 bit value. IF the change is breaking of all compatibility with old
|
||||||
// states, increment the upper 16 bit value, and clear the lower 16 bits to 0.
|
// states, increment the upper 16 bit value, and clear the lower 16 bits to 0.
|
||||||
|
|
||||||
static const u32 g_SaveVersion = (0x9A0C << 16) | 0x0000;
|
static const u32 g_SaveVersion = (0x9A0D << 16) | 0x0000;
|
||||||
|
|
||||||
// this function is meant to be used in the place of GSfreeze, and provides a safe layer
|
// this function is meant to be used in the place of GSfreeze, and provides a safe layer
|
||||||
// between the GS saving function and the MTGS's needs. :)
|
// between the GS saving function and the MTGS's needs. :)
|
||||||
|
|
|
@ -44,6 +44,10 @@
|
||||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
// DEALINGS IN THE SOFTWARE.
|
// DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
|
// I don't like it
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
template <typename T, size_t max_size>
|
template <typename T, size_t max_size>
|
||||||
class ringbuffer_base
|
class ringbuffer_base
|
||||||
|
@ -53,6 +57,9 @@ class ringbuffer_base
|
||||||
atomic<size_t> write_index_;
|
atomic<size_t> write_index_;
|
||||||
char padding1[padding_size]; /* force read_index and write_index to different cache lines */
|
char padding1[padding_size]; /* force read_index and write_index to different cache lines */
|
||||||
atomic<size_t> read_index_;
|
atomic<size_t> read_index_;
|
||||||
|
char padding2[padding_size]; /* force read_index and pending_pop_read_index to different cache lines */
|
||||||
|
|
||||||
|
size_t pending_pop_read_index;
|
||||||
|
|
||||||
T *buffer;
|
T *buffer;
|
||||||
|
|
||||||
|
@ -85,11 +92,12 @@ public:
|
||||||
{
|
{
|
||||||
size_t ret = arg + 1;
|
size_t ret = arg + 1;
|
||||||
#if 0
|
#if 0
|
||||||
|
// Initial boost code
|
||||||
while (unlikely(ret >= max_size))
|
while (unlikely(ret >= max_size))
|
||||||
#else
|
|
||||||
while (ret >= max_size)
|
|
||||||
#endif
|
|
||||||
ret -= max_size;
|
ret -= max_size;
|
||||||
|
#else
|
||||||
|
ret %= max_size;
|
||||||
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,6 +131,21 @@ public:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
T& front()
|
||||||
|
{
|
||||||
|
pending_pop_read_index = read_index_.load(memory_order_relaxed); // only written from pop thread
|
||||||
|
|
||||||
|
return buffer[pending_pop_read_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
void pop()
|
||||||
|
{
|
||||||
|
buffer[pending_pop_read_index].~T();
|
||||||
|
|
||||||
|
size_t next = next_index(pending_pop_read_index);
|
||||||
|
read_index_.store(next, memory_order_release);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Functor>
|
template <typename Functor>
|
||||||
bool consume_one(Functor & f)
|
bool consume_one(Functor & f)
|
||||||
{
|
{
|
||||||
|
@ -169,6 +192,17 @@ public:
|
||||||
return write_index_.is_lock_free() && read_index_.is_lock_free();
|
return write_index_.is_lock_free() && read_index_.is_lock_free();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t size() const
|
||||||
|
{
|
||||||
|
const size_t write_index = write_index_.load(memory_order_relaxed);
|
||||||
|
const size_t read_index = read_index_.load(memory_order_relaxed);
|
||||||
|
if (read_index > write_index) {
|
||||||
|
return (write_index + max_size) - read_index;
|
||||||
|
} else {
|
||||||
|
return write_index - read_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool empty(size_t write_index, size_t read_index)
|
bool empty(size_t write_index, size_t read_index)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue